tensotflow笔记

1.线性回归

一组数据求回归方程 y=wx+b

转化为求loss=sum [(wxi+b-yi)**2]的minimize

梯度下降 w’=w-learningrated(loss)/dw
b’=b-learningrated(loss)/db

使得w’x+b’ -> y

import numpy as np


# y = wx + b
def compute_error_for_line_given_points(b, w, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # computer mean-squared-error
        totalError += (y - (w * x + b)) ** 2
    # average loss for each point
    return totalError / float(len(points))

def step_gradient(b_current, w_current, points, learningRate):
    b_gradient = 0
    w_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # grad_b = 2(wx+b-y)
        b_gradient += (2/N) * ((w_current * x + b_current) - y)
        # grad_w = 2(wx+b-y)*x
        w_gradient += (2/N) * x * ((w_current * x + b_current) - y)
    # update w'
    new_b = b_current - (learningRate * b_gradient)
    new_w = w_current - (learningRate * w_gradient)
    return [new_b, new_w]

def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
    b = starting_b
    w = starting_w
    # update for several times
    for i in range(num_iterations):
        b, w = step_gradient(b, w, np.array(points), learning_rate)
    return [b, w]


def run():
    points = np.genfromtxt("data.csv", delimiter=",")
    learning_rate = 0.0001
    initial_b = 0 # initial y-intercept guess
    initial_w = 0 # initial slope guess
    num_iterations = 1000
    print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
          .format(initial_b, initial_w,
                  compute_error_for_line_given_points(initial_b, initial_w, points))
          )
    print("Running...")
    [b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
    print("After {0} iterations b = {1}, w = {2}, error = {3}".
          format(num_iterations, b, w,
                 compute_error_for_line_given_points(b, w, points))
          )

if __name__ == '__main__':
    run()

2. 图像分类mnist手写数据集

iteration：表示1次迭代（也叫training step），每次迭代更新1次网络结构的参数；
batch-size：1次迭代所使用的样本量；
epoch：1个epoch表示过了1遍训练集中的所有样本。

图像[28,28,1] -> [784]

X:[b,784] 图像数目b
W:[784,10] 图像类别10
b:[10]
out=X@W + b #out:[b,10]

out=relu(X@W + b)
修正线性单元(Rectified linear unit，ReLU）神经元的激活函数
如果不适用激励函数，那么在这种情况下每一层的输出都是上层输入的线性函数，很容易验证，无论你神经网络有多少层，输出都是输入的线性组合，与没有隐藏层效果相当

梯度消失 sigmoid函数正无穷和负无穷

import  os
import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import layers, optimizers, datasets


os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

(x, y), (x_val, y_val) = datasets.mnist.load_data() 
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
y = tf.one_hot(y, depth=10)
print(x.shape, y.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.batch(200)


model = keras.Sequential([ 
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(10)])

optimizer = optimizers.SGD(learning_rate=0.001)


def train_epoch(epoch):

    # Step4.loop
    for step, (x, y) in enumerate(train_dataset):


        with tf.GradientTape() as tape:
            # [b, 28, 28] => [b, 784]
            x = tf.reshape(x, (-1, 28*28))
            # Step1. compute output
            # [b, 784] => [b, 10]
            out = model(x)
            # Step2. compute loss
            loss = tf.reduce_sum(tf.square(out - y)) / x.shape[0]

        # Step3. optimize and update w1, w2, w3, b1, b2, b3
        grads = tape.gradient(loss, model.trainable_variables)
        # w' = w - lr * grad
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 100 == 0:
            print(epoch, step, 'loss:', loss.numpy())

def train():

    for epoch in range(30):

        train_epoch(epoch)


if __name__ == '__main__':
    train()

3.tensorflow数据类型

list -> np.array -> tf.Tensor

int,float,double
bool
string

#数据类型
tf.constant(1) #int32

tf.constant(1.) #float32

tf.constant(1.1,dtype=tf.int) #error

tf.constant(1.,dtype=tf.double) #float64

tf.constant([True,False]) #bool

tf.constant('hello world') #string

#属性
with tf.device('cpu')：
     a = tf.constant([1])
a.devive #cpu

aa=a.gpu() #aa gpu

a.numpy #返回numpy

a.ndim #返回维度

a.shape #返回shape

#判断
a=tf.constant([1.])

isinstance(a.tfTensor)
tf.is_tensor(a) #true

a.dtype #tf.float32

#数据类型转换
a=np.arange(5)#([0,1,2,3,4])

a.dtype #int64

aa=tf.convert_to_tensor(a) #int64

aa=tf.convert_to_tensor(a,dtype=int32)

tf.cast(aa,dtype=tf.float32) #cast数据类型转换

b=tf.constant([0,1])
tf.cast(b,dtype=bool) #false,true

c=tf.Variable(a)
c.trainable #true 

int(Tesor) #直接转换 前提标量

4.创建tensor

tf.convert_to_tensor(np.ones[2,3]) #2行3列 元素都为1  dtype=float64

tf.convert_to_tensor([1,2]) #dtype=int32 1维长度为2的data

tf.convert_to_tensor([1],[2.]) #2行一列 dyype=float32


tf.zero([2，2]) #shape=[]

tf.zero_like(a)
tf.zeros(a.shape)  #等同


tf.ones(1) #shape=(1,)
tf.ones([]) #shape=()
tf.ones([2]) #shape=(2,)
tf.ones([2,3])
tf.ones_like(a) == tf.ones(a.shape)


tf.fill([2,2],0) #填充为0

tf.random.normal([2,2],mean=1,stddev=1) # mean: 正态分布的均值，默认为0 ,stddev: 正态分布的标准差，默认为1.0

tf.random.normal([2,2])

tf.random.truncated_normal([2,2],mean=0,stddev=1) #截断的正态分布中输出随机值,和一般的正太分布的产生随机数据比起来，这个函数产生的随机数与均值的差距不会超过两倍的标准差，但是一般的别的函数是可能的。

tf.random.uniform([2,2],minval=0,maxval=1) #均匀分布0~1

idx=tf.range(10)
idx=tf.random.suffle(idx) #打乱顺序

a=tf.gather(a,idx)
b=tf.gather(b.idx) #用于有对应关系的数据

DIM:
NPL [b,seq_len,word_dim] #b句句长字维度

IMAGE [b,h,w,c] #4维 b张图片 high wight rgb通道

5. 索引与切片

a=tf.ones([1,5,5,3])
a[0][0] #shape(5,3) num=1
a[0][0][0] #shape(3,)
a[0][0][0][0] #shape()


a=tf.random.normal([4,28,28,3])#image
a[1].shape #[28,28,3]
a[1,2].shape #[28,3]
a[1,2,3].shape # [3]
a[1,2,3,2].shape # []

a=tf.range(10)
a[-1:] #[9]
a[:2] #[0,1]

a.shape #([4,28,28,3])
a[:,:,:,0] #([4,28,28]) 所有图片第一个rgb通道

#strat:end:step
#::step
a.tf.range(4)
a[::-1] #倒序3,2,1,0
a[::-2] #3,1
a[2::-2] #2,0

a=tf.randommnormal([2,4,28,28,3]) #task
a[0,...].shape ==a[0,:,:,:]


#scores [4,35,8] 4个班 35学生 8门成绩
tf.gather(a,axis=0,indices[2,1,3,0])

tf.gather_nd(a,[0,1,2]).shape #0班第1人第2门成绩 []
tf.gather_nd(a,[[0,1,2]]).shape # [1]

tf.boolean_mask(a,mask=[True,True,False,False])

6.维度变换

view
[b,28,28]
-> [b,2828]
-> [b,2,1428]
-> [b,28,28,1]

content:[b,h,w,c] [batch,height,width,channel]

image:[4,28,28,3] view=content(h,w)

reshape to:[4,784,3]

[4,784,3] —height28,width28—-> [4,28,28,3]

tf.reshape

a=tf.random.normal([4,28,28,3]) #view1
a.shape,a.ndim #[4,28,28,3] , 4
tf.reshape(a,[4,784,3]) == tf.reshape(a,[4,-1,3]) # view2
tf.rehsape(a,[4,784*3]) == tf.reshape(s,[4,-1]) #view1

tf.transpose

a=tf.random.normal((4,3,2,1))
tf.transpose(a).shape #([1,2,3,4])
tf.transpose(a,perm=[0,1,3,2]) #([4,3,1,2])

Squeeze and Expand dim

a=tf.random.normal([4,35,8])
tf.expand_dims(a,axis=0).shape #([1,4,35,8])
tf.expand_dims(a,axis=3).shape #([4,35,8,1])
tf.expand_dims(a,axis=-1).shape #([4,35,8,1])
tf.expand_dims(a,axis=-4).shape #([1,4,35,8])

tf.squeeze(tf.zeros([1,2,1,1,3])).shape #([2,3])
a=tf.zeros([1,2,1,3])
tf.squeeze(a.axis=0).shape #([2,1,3])

7.Broadcasting

KEY IDEA
-Insert 1 dim ahead if needed
-Expand dims with size 1 to same size
扩张成同一shape进行运算，实际扩张部分没有数据。
右对齐
节省内存

x=tf,random,normal([4,32,32,3])
(x+tf.random.normal([3])).shape #([4,32,32,3])
(x+tf.random.normal([32,32,1])).shape #([4,32,32,3])
(x+tf.random.normal([4,1,1,1])).shape #([4,32,32,3])
(x+tf.random.normal([1,4,1,1])).shape #incompatible shape
b=tf.broadcast_to(tf.random.normal([4,1,1,1],[4,32,32,3])

8.数学运算

element-wise

b=tf.fill([2,2],2.)
a=tf.ones([2,2])
a+b,a-b,a*b,a/b 
b//a,b%a #整除 取余

a=[[1.,1.,1.,1.]]
tf.math.log(a)
tf.exp(a)
tf.math.log(8.)/tf.math.log(2.) #3

b=[[2.,2.,2.,2.]]
tf.pow(b,3)
b**3
tf.sqpt(b)

matrix-wise

a=[[1.,1.,1.,1.]]
b=[[2.,2.,2.,2.]]
a@b #[[4.,4.,4.,4.]]
tf.matmul(a,b)


a=tf.ones([4,2,3])
b=tf.fill([4,3,5],2.)
a@b #shape =[4,2,5]


a.shape #[4,2,3]
b.shape #[3,5]
bb=tf.broadcast_to(b,[4,3,5])
a@b

9.前向传播

import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import datasets
import  os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #消除tensorflow初始化的一些提示

# x: [60k, 28, 28],
# y: [60k]
(x, y), _ = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))


train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)


# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

lr = 1e-3

for epoch in range(10): # iterate db for 10
    for step, (x, y) in enumerate(train_db): # for every batch
        # x:[128, 28, 28]
        # y: [128]

        # [b, 28, 28] => [b, 28*28]
        x = tf.reshape(x, [-1, 28*28])

        with tf.GradientTape() as tape: # tf.Variable
            # x: [b, 28*28]
            # h1 = x@w1 + b1
            # [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
            h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
            h1 = tf.nn.relu(h1)
            # [b, 256] => [b, 128]
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)
            # [b, 128] => [b, 10]
            out = h2@w3 + b3

            # compute loss
            # out: [b, 10]
            # y: [b] => [b, 10]
            y_onehot = tf.one_hot(y, depth=10)

            # mse = mean(sum(y-out)^2)
            # [b, 10]
            loss = tf.square(y_onehot - out)
            # mean: scalar
            loss = tf.reduce_mean(loss)

        # compute gradients
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        # print(grads)
        # w1 = w1 - lr * w1_grad
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])


        if step % 100 == 0:
            print(epoch, step, 'loss:', float(loss))

10.合并与分割

tf.concat / tf.stack

a=tf.ones([4,32,8])
b=tf.ones([4,3,8])
c=tf.concat([a,b],axis=1)
c.shape # ([4,35,8])

tf.stack([a,b],axis=0) # error 所有维度都相等

tf.unstack / tf.split

#[2,4,35,8]
res=tf.unstack(c,axis=3) #打散
len(res) #8

res=tf.split(c,axis=3,num_or_size_splits=2)
len(res) #2
res[0].shape #[2,4,35,4]

res[0]=tf.split(c,axis=3,num_or_size_splits=[2,2,4])
res[0].shape,res[2].shape #[2,4,35,2] [2,4,35,4]

11.数据统计

Vector Norm 范数
-Eukl.Norm=sqrt(sum(xi**2)) #2范数 xi平方和开根号
-Max.norm=max(abs(xi)) #正无穷范数向量的所有元素的绝对值中最大的
-L1-Norm=sum(abs(xi)) #1范数元素绝对值和

a=tf.ones([2,2])
tf.norm(a) #2范数 2
==tf.sqrt(tf.reduce_sum(tf.square(a)))

a=tf.ones([4,28,28,3])
tf.norm #96.99484

b=tf.ones([2,2])
tf.norm(b) #2
tf.norm(b,ord=2,axis=1) #axis=1 行   [sqrt(2),sqrt(2)]
tf.norm(b,ord=1) #1范数 4
tf.norm(b,ord=1,axis=0) #1范数 列 [2,2]
tf.norm(b.ord=1,axis=1)

reduce_min/max/mean

a=tf.random.normal([4,10])
tf.reduce_min(a)
tf.reduce_max(a,axis=1)

argmax/argmin #最大最小值的位置

a.shape #([4,10])
tf.argmax(a).shape #axis默认为0, [10]
tf.argmax(a)

tf.equal

a=tf.constant([1,2,3,3,5])
b=tf.range(5)
tf.equal(a,b) #[False,False,False,True,False]
res=tf.equal(a,b)
tf.reduce_sum(tf.cast(res,dtype=tf.int32)) #numpy=1

#accuracy
a=[[0.1,0.2,0.7],[0.9,0.05,0.05]]
pred=tf.cast(tf.argmax(a,axis=1),dtype=tf.int32) #[2,0]
y=[2,1]
tf.equal(y,pred) #[True,False]
correct=tf.reduce_sum(tf.cast(tf.equal(y,pred),dtype=tf.int32)) #1
correct/2 #0.5
```python
**tf,unique**
a=tf.constant([4,2,2,4,3])
tf.unique(a) #消去重复[4,2,3]  原始索引[0,1,1,0,2]

12.张量排序

Sort/argsort

a=tf.random.suffle(tf.range(5)) #[2,0,3,4,1]
tf.sort(a,direction='DESCENGING') #[4,3,2,1,0]
tf.argsort(a,direction='DESCENGING') #[3,2,0,4,1]
idx=tf.argsort(a,direction='DESCENGING')
tf.gather(a,idx)

a=tf.ranfom.uniform([3,3],maxval=10,dtype=tf.int32) #[[4,6,8],[9,4,7],[4,5,1]]
tf.sort(a) #默认升序 [[4,6,8],[4,7,9],[1,4,5]]
tf.sort(a,direction='DESCENFING')
idx=tf.argsort(a)

Top_k

a=[[4,6,8],[9,4,7],[4,5,1]]
res=tf.math.top_k(a,2)  #前两个最大值
res.indices #[[2,1],[0,2],[1,0]]
res.values #[[8,6][9,7],[5,4]]

#accuracy
prob=tf.constant([0.1,0.2,0.7],[0.2,0.7,0.1])
k_b=tf.mah.top_k(prob,3).indices #[[2,1,0],[1,0,2]]
k_b=tf.transpose(k_b,[1,0]) #[[2,1],[1,0],[0,2]]
target=tf.broadcast_to(target,[3,2])

#top_k accuracy
def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.shape[0]

    pred = tf.math.top_k(output, maxk).indices
    pred = tf.transpose(pred, perm=[1, 0])
    target_ = tf.broadcast_to(target, pred.shape)
    correct = tf.equal(pred, target_)

    res = []
    for k in topk:
        correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
        correct_k = tf.reduce_sum(correct_k)
        acc = float(correct_k* (100.0 / batch_size) )
        res.append(acc)

    return res

13.填充与复制

pad

a=tf.reshape(tf.range(9),[3,3]) #[[0,1,2],[3,4,5],[6,7,8]]
tf.pad(a,[[0,0],[1,0]]) #[[上，下]，[左，右]] 左边扩张一列默认为0

#image padding
a=tf.random.noraml([4,28,28,3])
b=tf.pad(a,[[0,0],[2,2],[2,2],[0,0]])
b.shape #[4,32,32,3]

tile复制

a.shape=[3,3]
b=tf.tile(a,[1,2]) #[行，列] 1保持不变 2列维度变为两倍
b.shape=[3,6]

tile VS broadcast_to

14.张量限幅

clip_by_value Relu

a=[0,1,2,3,4,5,6,7,8,9]
tf.maximum(a,2) #[2,2,2,3,4,5,6,7,8,9]
tf.minimum(a,8) #[0,1,2,3,4,5,6,7,8,8]
tf.clip_by_value(a,2,8) #[2,2,2,3,4,5,6,7,8,8]

a=a-5 #[-5,-4,-3,-2,-1,0,1,2,3,4]
tf.nn.relu(a) #[0,0,0,0,0,0,1,2,3,4]
==tf.maxmum(a,0)

clip_by_nrom根据范数等比例缩小

a=tf.random.normal([2,2],mean=10)
tf.norm(a)
aa=tf.clip_by_norm(a,15)
tf.norm(aa) #范数=15

梯度爆炸(Gradiet Exploding) 单次步长过长造成loss震荡
梯度消失(Gradiet vanishing) 单次步长过小造成loss降不下去
保证梯度方向不变 [w1,w2,w3]同时变化
new_grads,total_norm=tf.clip_by_global_norm(grads,25) #保持整体norm=25

15.高级操作

Where(tensor)

a=tf.random,normal([3,3])
mask=a>0 #shape=3x3 value=true or false
tf.boolean_mask(a,mask) #返回值ture的具体值 == tf.gather_nd(a,indices)
indices=tf.where(mask) #返回值为true的坐标 


mask #[[T,T,F],[T,F,F],[T,T,F]]
A=tf.ones([3,3])
B=tf.zeros([3,3])
tf.whera(mask,A,B) #[[1,1,0],[1,0,0],[1,1,0]]

scatter_nd
只用于全0的底板进行更新

#一维更新
indices=tf.constant([4],[3],[1],[7])
updates-tf.constant([9,10,11,12)
shape=tf.constant([8]) #1x8 全为0

tf.scatter_nd(indices,updates,shape) #[0.11,0,10,9,0,0,12]

#多维更新同理

meshgrid

#numpy
points=[]
for y in np.linspace(-2,2,5):
  for x in np.linspace(-2,2,5):
    points.append([x,y])
return np.array(points)

#tf
y=tf.linspace(-2,2,5)
x=tf.linspace(-2,2,5)
points_x,points_y=tf.meshgrid(x,y)
points_x.shape #[5,5]
points_x #所有点x部分
points_y #所有点y部分
points=tf.stack([points_x,points_y],axis=2)
#绘制三维图

16.数据加载

keras.datasets

boston housing
mnist/fashion mnist
cifar10/100

imdb #nlp

(x,y),(x_test,y_test)=keras.datasets.mnist.load_data() #numpy
x.shaoe #(60000,28,28)
y.shape #(60000,)  0-9
x.min(),x.max() #0 ,255

y[:2] #[5,0]
y_onehot=tf.one_hot(y,depth=10)
y_onehot[:2] #[[0,0,0,0,0,1,0,0,0,0],[1,0,0,0,0,0,0,0,0,]]

**tf.data.Dataset** #转换为tensor
```python
(x,y),(x_test,y_test)=keras.datasets.cifar10.load_data()
#50k   10k
db=tf.data.Dataset.from_tensor_slice(x_test，y_test)
next(iter(db))[0].shape #tensor [32,32,3]

#shuffle 随机打散
db=db.shuffle(10000)

#数据预处理
def preprocess(x,y):
    x=tf.cast(x,dtype=tf.float32)/255  #变为0-1
    y=tf.cast(y,dtype=tf.int32)
    y=tf.one_hot(y,depth=10)
    return x,y

db2=db.map(preprocess)
res=next(iter(db2))
res[0].shape,res[1].shape #tensor [32,32,3]  [1,10]


db3=db2.batch(32)
res=next(iter(db3))
res[0].shape,res[1].shape #tensor [32,32,32,3]  [32,1,10]

db4=db3.repeat(2)

标准预处理流程

def preprocess(x,y):
    x=tf.cast(x,dtype=tf.float32)/255  #变为0-1
    y=tf.cast(y,dtype=tf.int32)
    return x,y

def minst_dataset():
    (x,y),(x_test,y_test)=keras.datasets.fashion_mnist.load_data()
    y=tf.one_hot(y,depth=10)
    y_test =tf.one_hot(y_test,depth=10)

    ds=tf.data.Dataset.from_tensor_slice(x，y)
    ds=ds.map(preprocess)
    ds=ds.shuffle(60000).batch(100)
    ds_test=tf.data.Dataset.from_tensor_slice(x_test，y_test)
    ds_test=ds.map(preprocess)
    ds_test=ds.shuffle(60000).batch(100)
    return ds,ds_test

17.全连接层

矩阵运算转换为全连接层包括input hidden output
神经元x 连接w

x=tf.random.normal([4,784])

net=tf.keras.layers.Dense(512)
out=net(x)

out.shape #9=([4,521])
net.kernel.shape,net.bias.shape #[784,512],[512]

#Multi-layers
model=keras.Sequential()


import tensorflow as tf 
from     tensorflow import keras

x = tf.random.normal([2, 3])

model = keras.Sequential([
        keras.layers.Dense(2, activation='relu'),
        keras.layers.Dense(2, activation='relu'),
        keras.layers.Dense(2)
    ])
model.build(input_shape=[None, 3])
model.summary()

for p in model.trainable_variables:
    print(p.name, p.shape)

18.输出方式

tf.sigmoid() 压缩到0-1
tf.nn.softmax() e的yi方/sum(e的yi方) 压缩到总概率和为1
tf.tanh 压缩到(-1,1)

19.损失函数

MSE (Mean Squaared Error 均方差) loss=1/N sum((y-out)**2)

y=tf.constant([1,2,3,0,2])
y=tf.one_hot(y,depth=4)
y=tf.cast(y,dtype=tf.float32)

out=tf.ranfom.normal([5,4])
loss1=tf.reduce_mean(tf.square(y-out))
loss2=tf.square(tf.norm(y-out))/(5*4)
loss3=tf.reduce_mean(tf.losses.MSE(y,out))

Entropy 数字大，稳定数字小，不均等

a=tf.fill([4],0.25)
a=tf.math.log(a)/tf.mat.log(2.) #PlogP
tf.reduce_sum(a*tf.matf.log(a)/tf.math.log(2.))  #2

a=tf.constant([0.1,0.1,0.1,0.7])
tf.reduce_sum(a*tf.matf.log(a)/tf.math.log(2.))  #1.35677

Cross Entropy交叉熵

tf.losser.categorical_crossentrop([0,1,0,0],[0.25,0.25,0.25,0.25])
criteon([0,1,0,0],[0.1,0.7,0.1,0.1])
tf.losses.BinaryCrossentropy()([1],[0.1])

tf.losses.categorical_crossentrop([0,1],logits,from_logits=True)

20.梯度下降

梯度方向代表函数增大的方向

w=tf.constant(1.)
x=tf.constant(2.)
With tf.GradientTape() as tape:
    tape.watch([w])
    y2=x*w
grda2=tape.gradient(y2,[w]) #2

With tf.GradientTape(persistent=True) as tape:

21.函数优化

Himmelblau function

import  numpy as np
from    mpl_toolkits.mplot3d import Axes3D
from    matplotlib import pyplot as plt
import  tensorflow as tf



def himmelblau(x):
    return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2


x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])

fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()


# 初始位置不同 对应结果不同
x = tf.constant([4., 0.])

for step in range(200):

    with tf.GradientTape() as tape:
        tape.watch([x])
        y = himmelblau(x)

    grads = tape.gradient(y, [x])[0] 
    x -= 0.01*grads



    if step % 20 == 0:
        print ('step {}: x = {}, f(x) = {}'
               .format(step, x.numpy(), y.numpy()))

fashion mnist实战

23.tensorboard 数据可视化

使用 pytorch visdom

#喂数据
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = 'logs/' + current_time
summary_writer = tf.summary.create_file_writer(log_dir)

#进入到logs目录
tenosrboard --logdir logs

24.keras高级API

主要功能 datasets / layers / losses / metrics / optimizers
metrics

acc_meter = metrics.Accuracy()
loss_meter = metrics.Mean()

loss_meter.update_state(loss)
acc_meter.update_state(y, pred)

print(step, 'loss:', loss_meter.result().numpy()) 
loss_meter.reset_states()

compile / fit / evaluate / predict

network.compile(optimizer=optimizers.Adam(lr=0.01),
        loss=tf.losses.CategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )

network.fit(db, epochs=5, validation_data=ds_val, validation_freq=2) #2个epoch测试
network.evaluate(ds_val) #训练过程中验证

25.自定义网络层

keras.Swquential
keras.layers.Layer

keras.Model

class MyDense(layers.Layer):

  def __init__(self, inp_dim, outp_dim):
      super(MyDense, self).__init__()

      self.kernel = self.add_variable('w', [inp_dim, outp_dim])
      self.bias = self.add_variable('b', [outp_dim])

  def call(self, inputs, training=None):

      out = inputs @ self.kernel + self.bias

      return out

class MyModel(keras.Model):

def __init__(self):
    super(MyModel, self).__init__()

    self.fc1 = MyDense(28*28, 256)
    self.fc2 = MyDense(256, 128)
    self.fc3 = MyDense(128, 64)
    self.fc4 = MyDense(64, 32)
    self.fc5 = MyDense(32, 10)

def call(self, inputs, training=None):

    x = self.fc1(inputs)
    x = tf.nn.relu(x)
    x = self.fc2(x)
    x = tf.nn.relu(x)
    x = self.fc3(x)
    x = tf.nn.relu(x)
    x = self.fc4(x)
    x = tf.nn.relu(x)
    x = self.fc5(x) 

    return x

## 26.模型保存与加载
- save/load weights
```python
network.save_weights('weights.ckpt') #保存参数 
del network
#创建网络
network.load_weights('weights.ckpt')

save/load entire model

network.save('model.h5')
del network
#不需要重新创建网络
network.tf.keras.models.load_model('model.h5')

save_model #工业环境部署

tf.saved_model.save(m,'/目录')
imported=tf.saved_model.load(path)
f=impored.signatures["serving_default"]
print(f(x=tf.ones([1,28,28,3])))

27.过拟合与欠拟合

under-fitting: Estimated < Ground-truth
train acc is bad ,test acc is bad as well.

over-fitting: Ground-truth < Estimated
train acc is much better ,test acc is worse .
==> Generalization Performance 泛化能力变差

28.数据集划分和交叉验证

train / val / test
如果test做验证调参会造成数据泄露泛化能力降低

k-flod cross-validation
把train 划分为train和val训练之后重新划分train和val训练

29.减轻过拟合的方法

more data
constraint model complexity
降低模型复杂度 shallow / regularization
Dropout
Data argumentation
Early Stopping

Regularization
loss function 中x的高次方参数高维特征参数趋向0

L1-Regularization 取网络所有权值的一范数 weight decay
L2-Regularization 取网络所有权值的二范数

动量与学习率衰减
momentum
当前的梯度方向加入动量有利于找到最优解

learning rate decay
太小运算过慢太大不收敛

30.training tricks

Early Stopping
提前停止训练防止过拟合
validation set to select parameters
monitor validation performance
stop at the highest val performance
Dropout
learning less to learn better 减少全连接层之间的链接
Stochastic Gradient Descent
取一个batch的梯度

31.卷积神经网络

Receptive Field # 类似人的眼睛第一眼只看到感兴趣区域通过扫描滑动窗口获得全部信息

卷积核：权值共享滑动窗口减少参数量

[b,5,5,c] => [c,3，3] kernel => [b,3,3,1] #图像c个通道对应元素相乘累加
保持输入输出shape一致 padding & stride(步长)

[b,5,5,c] => [n,c,3,3] n个kernel => [b,3,3,n] #多通道输入多通道输出

layers.kernal #[3,3,c,n]

import tensorflow as tf
from tensprflow.keras import lyers

layer=layers.Conv2D(4,kernel_size=5,stride=1,padding='same')

layers.kernal #[5,5,c,n]
layers.bias #(4,)

32.池化与采样

pooling 降维过程
max/avg pooling 取最大/平均
sride

x #[1,14,14,4]
pool=layers.MaxPool2D(2,strides=2)
out=pool(x)
# tensorshape [1,7,7,4]

out=tf.nn.max_pool2d(x,2,strides=2,padding=’VALID’)

- unsample 升维
```python
x=tf.random.normal([1,7,7,4])
layer=layers.UnSampling2D(size=3)
out=layer(x)
# tensorshape [1,21,21,4]

ReLU 同relu函数