tensotflow笔记

tfAPI查询

1.线性回归

一组数据求回归方程 y=wx+b

转化为求loss=sum [(wxi+b-yi)**2]的minimize

梯度下降 w’=w-learningrated(loss)/dw
b’=b-learningrate
d(loss)/db

使得w’x+b’ -> y

import numpy as np


# y = wx + b
def compute_error_for_line_given_points(b, w, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # computer mean-squared-error
        totalError += (y - (w * x + b)) ** 2
    # average loss for each point
    return totalError / float(len(points))

def step_gradient(b_current, w_current, points, learningRate):
    b_gradient = 0
    w_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # grad_b = 2(wx+b-y)
        b_gradient += (2/N) * ((w_current * x + b_current) - y)
        # grad_w = 2(wx+b-y)*x
        w_gradient += (2/N) * x * ((w_current * x + b_current) - y)
    # update w'
    new_b = b_current - (learningRate * b_gradient)
    new_w = w_current - (learningRate * w_gradient)
    return [new_b, new_w]

def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
    b = starting_b
    w = starting_w
    # update for several times
    for i in range(num_iterations):
        b, w = step_gradient(b, w, np.array(points), learning_rate)
    return [b, w]


def run():
    points = np.genfromtxt("data.csv", delimiter=",")
    learning_rate = 0.0001
    initial_b = 0 # initial y-intercept guess
    initial_w = 0 # initial slope guess
    num_iterations = 1000
    print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
          .format(initial_b, initial_w,
                  compute_error_for_line_given_points(initial_b, initial_w, points))
          )
    print("Running...")
    [b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
    print("After {0} iterations b = {1}, w = {2}, error = {3}".
          format(num_iterations, b, w,
                 compute_error_for_line_given_points(b, w, points))
          )

if __name__ == '__main__':
    run()

2. 图像分类mnist手写数据集

iteration:表示1次迭代(也叫training step),每次迭代更新1次网络结构的参数;
batch-size:1次迭代所使用的样本量;
epoch:1个epoch表示过了1遍训练集中的所有样本。

图像[28,28,1] -> [784]

X:[b,784] 图像数目b
W:[784,10] 图像类别10
b:[10]
out=X@W + b #out:[b,10]

out=relu(X@W + b)
修正线性单元(Rectified linear unit,ReLU)神经元的激活函数
如果不适用激励函数,那么在这种情况下每一层的输出都是上层输入的线性函数,很容易验证,无论你神经网络有多少层,输出都是输入的线性组合,与没有隐藏层效果相当

梯度消失 sigmoid函数正无穷和负无穷

import  os
import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import layers, optimizers, datasets


os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

(x, y), (x_val, y_val) = datasets.mnist.load_data() 
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
y = tf.one_hot(y, depth=10)
print(x.shape, y.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.batch(200)


model = keras.Sequential([ 
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(10)])

optimizer = optimizers.SGD(learning_rate=0.001)


def train_epoch(epoch):

    # Step4.loop
    for step, (x, y) in enumerate(train_dataset):


        with tf.GradientTape() as tape:
            # [b, 28, 28] => [b, 784]
            x = tf.reshape(x, (-1, 28*28))
            # Step1. compute output
            # [b, 784] => [b, 10]
            out = model(x)
            # Step2. compute loss
            loss = tf.reduce_sum(tf.square(out - y)) / x.shape[0]

        # Step3. optimize and update w1, w2, w3, b1, b2, b3
        grads = tape.gradient(loss, model.trainable_variables)
        # w' = w - lr * grad
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 100 == 0:
            print(epoch, step, 'loss:', loss.numpy())

def train():

    for epoch in range(30):

        train_epoch(epoch)


if __name__ == '__main__':
    train()

3.tensorflow数据类型

list -> np.array -> tf.Tensor

  • int,float,double
  • bool
  • string
#数据类型
tf.constant(1) #int32

tf.constant(1.) #float32

tf.constant(1.1,dtype=tf.int) #error

tf.constant(1.,dtype=tf.double) #float64

tf.constant([True,False]) #bool

tf.constant('hello world') #string

#属性
with tf.device('cpu'):
     a = tf.constant([1])
a.devive #cpu

aa=a.gpu() #aa gpu

a.numpy #返回numpy

a.ndim #返回维度

a.shape #返回shape

#判断
a=tf.constant([1.])

isinstance(a.tfTensor)
tf.is_tensor(a) #true

a.dtype #tf.float32

#数据类型转换
a=np.arange(5)#([0,1,2,3,4])

a.dtype #int64

aa=tf.convert_to_tensor(a) #int64

aa=tf.convert_to_tensor(a,dtype=int32)

tf.cast(aa,dtype=tf.float32) #cast数据类型转换

b=tf.constant([0,1])
tf.cast(b,dtype=bool) #false,true

c=tf.Variable(a)
c.trainable #true 

int(Tesor) #直接转换 前提标量

4.创建tensor

tf.convert_to_tensor(np.ones[2,3]) #2行3列 元素都为1  dtype=float64

tf.convert_to_tensor([1,2]) #dtype=int32 1维长度为2的data

tf.convert_to_tensor([1],[2.]) #2行一列 dyype=float32


tf.zero([22]) #shape=[]

tf.zero_like(a)
tf.zeros(a.shape)  #等同


tf.ones(1) #shape=(1,)
tf.ones([]) #shape=()
tf.ones([2]) #shape=(2,)
tf.ones([2,3])
tf.ones_like(a) == tf.ones(a.shape)


tf.fill([2,2],0) #填充为0

tf.random.normal([2,2],mean=1,stddev=1) # mean: 正态分布的均值,默认为0 ,stddev: 正态分布的标准差,默认为1.0

tf.random.normal([2,2])

tf.random.truncated_normal([2,2],mean=0,stddev=1) #截断的正态分布中输出随机值,和一般的正太分布的产生随机数据比起来,这个函数产生的随机数与均值的差距不会超过两倍的标准差,但是一般的别的函数是可能的。

tf.random.uniform([2,2],minval=0,maxval=1) #均匀分布0~1

idx=tf.range(10)
idx=tf.random.suffle(idx) #打乱顺序

a=tf.gather(a,idx)
b=tf.gather(b.idx) #用于有对应关系的数据

DIM:
NPL [b,seq_len,word_dim] #b句 句长 字维度

IMAGE [b,h,w,c] #4维 b张图片 high wight rgb通道

5. 索引与切片

a=tf.ones([1,5,5,3])
a[0][0] #shape(5,3) num=1
a[0][0][0] #shape(3,)
a[0][0][0][0] #shape()


a=tf.random.normal([4,28,28,3])#image
a[1].shape #[28,28,3]
a[1,2].shape #[28,3]
a[1,2,3].shape # [3]
a[1,2,3,2].shape # []

a=tf.range(10)
a[-1:] #[9]
a[:2] #[0,1]

a.shape #([4,28,28,3])
a[:,:,:,0] #([4,28,28]) 所有图片第一个rgb通道

#strat:end:step
#::step
a.tf.range(4)
a[::-1] #倒序3,2,1,0
a[::-2] #3,1
a[2::-2] #2,0

a=tf.randommnormal([2,4,28,28,3]) #task
a[0,...].shape ==a[0,:,:,:]


#scores [4,35,8] 4个班 35学生 8门成绩
tf.gather(a,axis=0,indices[2,1,3,0])

tf.gather_nd(a,[0,1,2]).shape #0班第1人第2门成绩 []
tf.gather_nd(a,[[0,1,2]]).shape # [1]

tf.boolean_mask(a,mask=[True,True,False,False])

6.维度变换

view
[b,28,28]
-> [b,2828]
-> [b,2,14
28]
-> [b,28,28,1]

content:[b,h,w,c] [batch,height,width,channel]

image:[4,28,28,3] view=content(h,w)

reshape to:[4,784,3]

[4,784,3] —height28,width28—-> [4,28,28,3]

tf.reshape

a=tf.random.normal([4,28,28,3]) #view1
a.shape,a.ndim #[4,28,28,3] , 4
tf.reshape(a,[4,784,3]) == tf.reshape(a,[4,-1,3]) # view2
tf.rehsape(a,[4,784*3]) == tf.reshape(s,[4,-1]) #view1

tf.transpose

a=tf.random.normal((4,3,2,1))
tf.transpose(a).shape #([1,2,3,4])
tf.transpose(a,perm=[0,1,3,2]) #([4,3,1,2])

Squeeze and Expand dim

a=tf.random.normal([4,35,8])
tf.expand_dims(a,axis=0).shape #([1,4,35,8])
tf.expand_dims(a,axis=3).shape #([4,35,8,1])
tf.expand_dims(a,axis=-1).shape #([4,35,8,1])
tf.expand_dims(a,axis=-4).shape #([1,4,35,8])

tf.squeeze(tf.zeros([1,2,1,1,3])).shape #([2,3])
a=tf.zeros([1,2,1,3])
tf.squeeze(a.axis=0).shape #([2,1,3])

7.Broadcasting

KEY IDEA
-Insert 1 dim ahead if needed
-Expand dims with size 1 to same size
扩张成同一shape进行运算,实际扩张部分没有数据。
右对齐
节省内存

x=tf,random,normal([4,32,32,3])
(x+tf.random.normal([3])).shape #([4,32,32,3])
(x+tf.random.normal([32,32,1])).shape #([4,32,32,3])
(x+tf.random.normal([4,1,1,1])).shape #([4,32,32,3])
(x+tf.random.normal([1,4,1,1])).shape #incompatible shape
b=tf.broadcast_to(tf.random.normal([4,1,1,1],[4,32,32,3])

8.数学运算

element-wise

b=tf.fill([2,2],2.)
a=tf.ones([2,2])
a+b,a-b,a*b,a/b 
b//a,b%a #整除 取余

a=[[1.,1.,1.,1.]]
tf.math.log(a)
tf.exp(a)
tf.math.log(8.)/tf.math.log(2.) #3

b=[[2.,2.,2.,2.]]
tf.pow(b,3)
b**3
tf.sqpt(b)

matrix-wise

a=[[1.,1.,1.,1.]]
b=[[2.,2.,2.,2.]]
a@b #[[4.,4.,4.,4.]]
tf.matmul(a,b)


a=tf.ones([4,2,3])
b=tf.fill([4,3,5],2.)
a@b #shape =[4,2,5]


a.shape #[4,2,3]
b.shape #[3,5]
bb=tf.broadcast_to(b,[4,3,5])
a@b

9.前向传播

import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import datasets
import  os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #消除tensorflow初始化的一些提示

# x: [60k, 28, 28],
# y: [60k]
(x, y), _ = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))


train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)


# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

lr = 1e-3

for epoch in range(10): # iterate db for 10
    for step, (x, y) in enumerate(train_db): # for every batch
        # x:[128, 28, 28]
        # y: [128]

        # [b, 28, 28] => [b, 28*28]
        x = tf.reshape(x, [-1, 28*28])

        with tf.GradientTape() as tape: # tf.Variable
            # x: [b, 28*28]
            # h1 = x@w1 + b1
            # [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
            h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
            h1 = tf.nn.relu(h1)
            # [b, 256] => [b, 128]
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)
            # [b, 128] => [b, 10]
            out = h2@w3 + b3

            # compute loss
            # out: [b, 10]
            # y: [b] => [b, 10]
            y_onehot = tf.one_hot(y, depth=10)

            # mse = mean(sum(y-out)^2)
            # [b, 10]
            loss = tf.square(y_onehot - out)
            # mean: scalar
            loss = tf.reduce_mean(loss)

        # compute gradients
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        # print(grads)
        # w1 = w1 - lr * w1_grad
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])


        if step % 100 == 0:
            print(epoch, step, 'loss:', float(loss))

10.合并与分割

tf.concat / tf.stack

a=tf.ones([4,32,8])
b=tf.ones([4,3,8])
c=tf.concat([a,b],axis=1)
c.shape # ([4,35,8])

tf.stack([a,b],axis=0) # error 所有维度都相等

tf.unstack / tf.split

#[2,4,35,8]
res=tf.unstack(c,axis=3) #打散
len(res) #8

res=tf.split(c,axis=3,num_or_size_splits=2)
len(res) #2
res[0].shape #[2,4,35,4]

res[0]=tf.split(c,axis=3,num_or_size_splits=[2,2,4])
res[0].shape,res[2].shape #[2,4,35,2] [2,4,35,4]

11.数据统计

Vector Norm 范数
-Eukl.Norm=sqrt(sum(xi**2)) #2范数 xi平方和开根号
-Max.norm=max(abs(xi)) #正无穷范数 向量的所有元素的绝对值中最大的
-L1-Norm=sum(abs(xi)) #1范数 元素绝对值和

a=tf.ones([2,2])
tf.norm(a) #2范数 2
==tf.sqrt(tf.reduce_sum(tf.square(a)))

a=tf.ones([4,28,28,3])
tf.norm #96.99484

b=tf.ones([2,2])
tf.norm(b) #2
tf.norm(b,ord=2,axis=1) #axis=1 行   [sqrt(2),sqrt(2)]
tf.norm(b,ord=1) #1范数 4
tf.norm(b,ord=1,axis=0) #1范数 列 [2,2]
tf.norm(b.ord=1,axis=1)

reduce_min/max/mean

a=tf.random.normal([4,10])
tf.reduce_min(a)
tf.reduce_max(a,axis=1)

argmax/argmin #最大最小值的位置

a.shape #([4,10])
tf.argmax(a).shape #axis默认为0, [10]
tf.argmax(a)

tf.equal

a=tf.constant([1,2,3,3,5])
b=tf.range(5)
tf.equal(a,b) #[False,False,False,True,False]
res=tf.equal(a,b)
tf.reduce_sum(tf.cast(res,dtype=tf.int32)) #numpy=1

#accuracy
a=[[0.1,0.2,0.7],[0.9,0.05,0.05]]
pred=tf.cast(tf.argmax(a,axis=1),dtype=tf.int32) #[2,0]
y=[2,1]
tf.equal(y,pred) #[True,False]
correct=tf.reduce_sum(tf.cast(tf.equal(y,pred),dtype=tf.int32)) #1
correct/2 #0.5
```python
**tf,unique**
a=tf.constant([4,2,2,4,3])
tf.unique(a) #消去重复[4,2,3]  原始索引[0,1,1,0,2]

12.张量排序

Sort/argsort

a=tf.random.suffle(tf.range(5)) #[2,0,3,4,1]
tf.sort(a,direction='DESCENGING') #[4,3,2,1,0]
tf.argsort(a,direction='DESCENGING') #[3,2,0,4,1]
idx=tf.argsort(a,direction='DESCENGING')
tf.gather(a,idx)

a=tf.ranfom.uniform([3,3],maxval=10,dtype=tf.int32) #[[4,6,8],[9,4,7],[4,5,1]]
tf.sort(a) #默认升序 [[4,6,8],[4,7,9],[1,4,5]]
tf.sort(a,direction='DESCENFING')
idx=tf.argsort(a)

Top_k

a=[[4,6,8],[9,4,7],[4,5,1]]
res=tf.math.top_k(a,2)  #前两个最大值
res.indices #[[2,1],[0,2],[1,0]]
res.values #[[8,6][9,7],[5,4]]

#accuracy
prob=tf.constant([0.1,0.2,0.7],[0.2,0.7,0.1])
k_b=tf.mah.top_k(prob,3).indices #[[2,1,0],[1,0,2]]
k_b=tf.transpose(k_b,[1,0]) #[[2,1],[1,0],[0,2]]
target=tf.broadcast_to(target,[3,2])

#top_k accuracy
def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.shape[0]

    pred = tf.math.top_k(output, maxk).indices
    pred = tf.transpose(pred, perm=[1, 0])
    target_ = tf.broadcast_to(target, pred.shape)
    correct = tf.equal(pred, target_)

    res = []
    for k in topk:
        correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
        correct_k = tf.reduce_sum(correct_k)
        acc = float(correct_k* (100.0 / batch_size) )
        res.append(acc)

    return res

13.填充与复制

pad

a=tf.reshape(tf.range(9),[3,3]) #[[0,1,2],[3,4,5],[6,7,8]]
tf.pad(a,[[0,0],[1,0]]) #[[上,下],[左,右]] 左边扩张一列默认为0

#image padding
a=tf.random.noraml([4,28,28,3])
b=tf.pad(a,[[0,0],[2,2],[2,2],[0,0]])
b.shape #[4,32,32,3]

tile复制

a.shape=[3,3]
b=tf.tile(a,[1,2]) #[行,列] 1保持不变 2列维度变为两倍
b.shape=[3,6]

tile VS broadcast_to

14.张量限幅

clip_by_value Relu

a=[0,1,2,3,4,5,6,7,8,9]
tf.maximum(a,2) #[2,2,2,3,4,5,6,7,8,9]
tf.minimum(a,8) #[0,1,2,3,4,5,6,7,8,8]
tf.clip_by_value(a,2,8) #[2,2,2,3,4,5,6,7,8,8]

a=a-5 #[-5,-4,-3,-2,-1,0,1,2,3,4]
tf.nn.relu(a) #[0,0,0,0,0,0,1,2,3,4]
==tf.maxmum(a,0)

clip_by_nrom根据范数 等比例缩小

a=tf.random.normal([2,2],mean=10)
tf.norm(a)
aa=tf.clip_by_norm(a,15)
tf.norm(aa) #范数=15

梯度爆炸(Gradiet Exploding) 单次步长过长 造成loss震荡
梯度消失(Gradiet vanishing) 单次步长过小 造成loss降不下去
保证梯度方向不变 [w1,w2,w3]同时变化
new_grads,total_norm=tf.clip_by_global_norm(grads,25) #保持整体norm=25

15.高级操作

Where(tensor)

a=tf.random,normal([3,3])
mask=a>0 #shape=3x3 value=true or false
tf.boolean_mask(a,mask) #返回值ture的具体值 == tf.gather_nd(a,indices)
indices=tf.where(mask) #返回值为true的坐标 


mask #[[T,T,F],[T,F,F],[T,T,F]]
A=tf.ones([3,3])
B=tf.zeros([3,3])
tf.whera(mask,A,B) #[[1,1,0],[1,0,0],[1,1,0]]

scatter_nd
只用于全0的底板进行更新

#一维更新
indices=tf.constant([4],[3],[1],[7])
updates-tf.constant([9,10,11,12)
shape=tf.constant([8]) #1x8 全为0

tf.scatter_nd(indices,updates,shape) #[0.11,0,10,9,0,0,12]

#多维更新同理

meshgrid

#numpy
points=[]
for y in np.linspace(-2,2,5):
  for x in np.linspace(-2,2,5):
    points.append([x,y])
return np.array(points)

#tf
y=tf.linspace(-2,2,5)
x=tf.linspace(-2,2,5)
points_x,points_y=tf.meshgrid(x,y)
points_x.shape #[5,5]
points_x #所有点x部分
points_y #所有点y部分
points=tf.stack([points_x,points_y],axis=2)
#绘制三维图

16.数据加载

keras.datasets

  • boston housing
  • mnist/fashion mnist
  • cifar10/100
  • imdb #nlp
    (x,y),(x_test,y_test)=keras.datasets.mnist.load_data() #numpy
    x.shaoe #(60000,28,28)
    y.shape #(60000,)  0-9
    x.min(),x.max() #0 ,255
    

y[:2] #[5,0]
y_onehot=tf.one_hot(y,depth=10)
y_onehot[:2] #[[0,0,0,0,0,1,0,0,0,0],[1,0,0,0,0,0,0,0,0,]]

**tf.data.Dataset** #转换为tensor
```python
(x,y),(x_test,y_test)=keras.datasets.cifar10.load_data()
#50k   10k
db=tf.data.Dataset.from_tensor_slice(x_test,y_test)
next(iter(db))[0].shape #tensor [32,32,3]

#shuffle 随机打散
db=db.shuffle(10000)

#数据预处理
def preprocess(x,y):
    x=tf.cast(x,dtype=tf.float32)/255  #变为0-1
    y=tf.cast(y,dtype=tf.int32)
    y=tf.one_hot(y,depth=10)
    return x,y

db2=db.map(preprocess)
res=next(iter(db2))
res[0].shape,res[1].shape #tensor [32,32,3]  [1,10]


db3=db2.batch(32)
res=next(iter(db3))
res[0].shape,res[1].shape #tensor [32,32,32,3]  [32,1,10]

db4=db3.repeat(2)

标准预处理流程

def preprocess(x,y):
    x=tf.cast(x,dtype=tf.float32)/255  #变为0-1
    y=tf.cast(y,dtype=tf.int32)
    return x,y

def minst_dataset():
    (x,y),(x_test,y_test)=keras.datasets.fashion_mnist.load_data()
    y=tf.one_hot(y,depth=10)
    y_test =tf.one_hot(y_test,depth=10)

    ds=tf.data.Dataset.from_tensor_slice(x,y)
    ds=ds.map(preprocess)
    ds=ds.shuffle(60000).batch(100)
    ds_test=tf.data.Dataset.from_tensor_slice(x_test,y_test)
    ds_test=ds.map(preprocess)
    ds_test=ds.shuffle(60000).batch(100)
    return ds,ds_test

17.全连接层

矩阵运算 转换为 全连接层包括input hidden output
神经元x 连接w

x=tf.random.normal([4,784])

net=tf.keras.layers.Dense(512)
out=net(x)

out.shape #9=([4,521])
net.kernel.shape,net.bias.shape #[784,512],[512]

#Multi-layers
model=keras.Sequential()


import tensorflow as tf 
from     tensorflow import keras

x = tf.random.normal([2, 3])

model = keras.Sequential([
        keras.layers.Dense(2, activation='relu'),
        keras.layers.Dense(2, activation='relu'),
        keras.layers.Dense(2)
    ])
model.build(input_shape=[None, 3])
model.summary()

for p in model.trainable_variables:
    print(p.name, p.shape)

18.输出方式

tf.sigmoid() 压缩到0-1
tf.nn.softmax() e的yi方/sum(e的yi方) 压缩到总概率和为1
tf.tanh 压缩到(-1,1)

19.损失函数

MSE (Mean Squaared Error 均方差) loss=1/N sum((y-out)**2)

y=tf.constant([1,2,3,0,2])
y=tf.one_hot(y,depth=4)
y=tf.cast(y,dtype=tf.float32)

out=tf.ranfom.normal([5,4])
loss1=tf.reduce_mean(tf.square(y-out))
loss2=tf.square(tf.norm(y-out))/(5*4)
loss3=tf.reduce_mean(tf.losses.MSE(y,out))

Entropy 数字大,稳定 数字小,不均等

a=tf.fill([4],0.25)
a=tf.math.log(a)/tf.mat.log(2.) #PlogP
tf.reduce_sum(a*tf.matf.log(a)/tf.math.log(2.))  #2

a=tf.constant([0.1,0.1,0.1,0.7])
tf.reduce_sum(a*tf.matf.log(a)/tf.math.log(2.))  #1.35677

Cross Entropy交叉熵

tf.losser.categorical_crossentrop([0,1,0,0],[0.25,0.25,0.25,0.25])
criteon([0,1,0,0],[0.1,0.7,0.1,0.1])
tf.losses.BinaryCrossentropy()([1],[0.1])

tf.losses.categorical_crossentrop([0,1],logits,from_logits=True)

20.梯度下降

梯度方向代表函数增大的方向

w=tf.constant(1.)
x=tf.constant(2.)
With tf.GradientTape() as tape:
    tape.watch([w])
    y2=x*w
grda2=tape.gradient(y2,[w]) #2

With tf.GradientTape(persistent=True) as tape:

21.函数优化

Himmelblau function

import  numpy as np
from    mpl_toolkits.mplot3d import Axes3D
from    matplotlib import pyplot as plt
import  tensorflow as tf



def himmelblau(x):
    return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2


x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])

fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()


# 初始位置不同 对应结果不同
x = tf.constant([4., 0.])

for step in range(200):

    with tf.GradientTape() as tape:
        tape.watch([x])
        y = himmelblau(x)

    grads = tape.gradient(y, [x])[0] 
    x -= 0.01*grads



    if step % 20 == 0:
        print ('step {}: x = {}, f(x) = {}'
               .format(step, x.numpy(), y.numpy()))

fashion mnist实战

23.tensorboard 数据可视化

使用 pytorch visdom

#喂数据
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = 'logs/' + current_time
summary_writer = tf.summary.create_file_writer(log_dir)

#进入到logs目录
tenosrboard --logdir logs

24.keras高级API

主要功能 datasets / layers / losses / metrics / optimizers
metrics

acc_meter = metrics.Accuracy()
loss_meter = metrics.Mean()

loss_meter.update_state(loss)
acc_meter.update_state(y, pred)

print(step, 'loss:', loss_meter.result().numpy()) 
loss_meter.reset_states()

compile / fit / evaluate / predict

network.compile(optimizer=optimizers.Adam(lr=0.01),
        loss=tf.losses.CategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )

network.fit(db, epochs=5, validation_data=ds_val, validation_freq=2) #2个epoch测试
network.evaluate(ds_val) #训练过程中验证

25.自定义网络层

  • keras.Swquential

  • keras.layers.Layer

  • keras.Model

    class MyDense(layers.Layer):
    
      def __init__(self, inp_dim, outp_dim):
          super(MyDense, self).__init__()
    
          self.kernel = self.add_variable('w', [inp_dim, outp_dim])
          self.bias = self.add_variable('b', [outp_dim])
    
      def call(self, inputs, training=None):
    
          out = inputs @ self.kernel + self.bias
    
          return out 
    

class MyModel(keras.Model):

def __init__(self):
    super(MyModel, self).__init__()

    self.fc1 = MyDense(28*28, 256)
    self.fc2 = MyDense(256, 128)
    self.fc3 = MyDense(128, 64)
    self.fc4 = MyDense(64, 32)
    self.fc5 = MyDense(32, 10)

def call(self, inputs, training=None):

    x = self.fc1(inputs)
    x = tf.nn.relu(x)
    x = self.fc2(x)
    x = tf.nn.relu(x)
    x = self.fc3(x)
    x = tf.nn.relu(x)
    x = self.fc4(x)
    x = tf.nn.relu(x)
    x = self.fc5(x) 

    return x
## 26.模型保存与加载
- save/load weights
```python
network.save_weights('weights.ckpt') #保存参数 
del network
#创建网络
network.load_weights('weights.ckpt')
  • save/load entire model
    network.save('model.h5')
    del network
    #不需要重新创建网络
    network.tf.keras.models.load_model('model.h5')
  • save_model #工业环境部署
    tf.saved_model.save(m,'/目录')
    imported=tf.saved_model.load(path)
    f=impored.signatures["serving_default"]
    print(f(x=tf.ones([1,28,28,3])))

27.过拟合与欠拟合

under-fitting: Estimated < Ground-truth
train acc is bad ,test acc is bad as well.

over-fitting: Ground-truth < Estimated
train acc is much better ,test acc is worse .
==> Generalization Performance 泛化能力变差

28.数据集划分和交叉验证

train / val / test
如果test做验证 调参 会造成数据泄露 泛化能力降低

k-flod cross-validation
把train 划分为train和val训练 之后重新划分train和val训练

29.减轻过拟合的方法

  • more data
  • constraint model complexity
    降低模型复杂度 shallow / regularization
  • Dropout
  • Data argumentation
  • Early Stopping

Regularization
loss function 中x的高次方参数 高维特征参数趋向0

L1-Regularization 取网络所有权值的一范数 weight decay
L2-Regularization 取网络所有权值的二范数

动量与学习率衰减
momentum
当前的梯度方向加入动量 有利于找到最优解

learning rate decay
太小运算过慢 太大不收敛

30.training tricks

  • Early Stopping
    提前停止训练 防止过拟合
    validation set to select parameters
    monitor validation performance
    stop at the highest val performance
  • Dropout
    learning less to learn better 减少全连接层之间的链接
  • Stochastic Gradient Descent
    取一个batch的梯度

31.卷积神经网络

Receptive Field # 类似人的眼睛 第一眼只看到感兴趣区域 通过扫描 滑动窗口获得全部信息

卷积核: 权值共享 滑动窗口 减少参数量

[b,5,5,c] => [c,3,3] kernel => [b,3,3,1] #图像c个通道 对应元素相乘累加
保持输入输出shape一致 padding & stride(步长)

[b,5,5,c] => [n,c,3,3] n个kernel => [b,3,3,n] #多通道输入多通道输出

layers.kernal #[3,3,c,n]

import tensorflow as tf
from tensprflow.keras import lyers

layer=layers.Conv2D(4,kernel_size=5,stride=1,padding='same')

layers.kernal #[5,5,c,n]
layers.bias #(4,)

32.池化与采样

  • pooling 降维过程
    max/avg pooling 取最大/平均
    sride
    x #[1,14,14,4]
    pool=layers.MaxPool2D(2,strides=2)
    out=pool(x)
    # tensorshape [1,7,7,4]
    

out=tf.nn.max_pool2d(x,2,strides=2,padding=’VALID’)

- unsample 升维
```python
x=tf.random.normal([1,7,7,4])
layer=layers.UnSampling2D(size=3)
out=layer(x)
# tensorshape [1,21,21,4]
  • ReLU 同relu函数