tensotflow笔记
1.线性回归
一组数据求回归方程 y=wx+b
转化为求loss=sum [(wxi+b-yi)**2]的minimize
梯度下降 w’=w-learningrated(loss)/dw
b’=b-learningrated(loss)/db
使得w’x+b’ -> y
import numpy as np
# y = wx + b
def compute_error_for_line_given_points(b, w, points):
totalError = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# computer mean-squared-error
totalError += (y - (w * x + b)) ** 2
# average loss for each point
return totalError / float(len(points))
def step_gradient(b_current, w_current, points, learningRate):
b_gradient = 0
w_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# grad_b = 2(wx+b-y)
b_gradient += (2/N) * ((w_current * x + b_current) - y)
# grad_w = 2(wx+b-y)*x
w_gradient += (2/N) * x * ((w_current * x + b_current) - y)
# update w'
new_b = b_current - (learningRate * b_gradient)
new_w = w_current - (learningRate * w_gradient)
return [new_b, new_w]
def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
b = starting_b
w = starting_w
# update for several times
for i in range(num_iterations):
b, w = step_gradient(b, w, np.array(points), learning_rate)
return [b, w]
def run():
points = np.genfromtxt("data.csv", delimiter=",")
learning_rate = 0.0001
initial_b = 0 # initial y-intercept guess
initial_w = 0 # initial slope guess
num_iterations = 1000
print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
compute_error_for_line_given_points(initial_b, initial_w, points))
)
print("Running...")
[b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
print("After {0} iterations b = {1}, w = {2}, error = {3}".
format(num_iterations, b, w,
compute_error_for_line_given_points(b, w, points))
)
if __name__ == '__main__':
run()
2. 图像分类mnist手写数据集
iteration:表示1次迭代(也叫training step),每次迭代更新1次网络结构的参数;
batch-size:1次迭代所使用的样本量;
epoch:1个epoch表示过了1遍训练集中的所有样本。
图像[28,28,1] -> [784]
X:[b,784] 图像数目b
W:[784,10] 图像类别10
b:[10]
out=X@W + b #out:[b,10]
out=relu(X@W + b)
修正线性单元(Rectified linear unit,ReLU)神经元的激活函数
如果不适用激励函数,那么在这种情况下每一层的输出都是上层输入的线性函数,很容易验证,无论你神经网络有多少层,输出都是输入的线性组合,与没有隐藏层效果相当
梯度消失 sigmoid函数正无穷和负无穷
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
(x, y), (x_val, y_val) = datasets.mnist.load_data()
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
y = tf.one_hot(y, depth=10)
print(x.shape, y.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.batch(200)
model = keras.Sequential([
layers.Dense(512, activation='relu'),
layers.Dense(256, activation='relu'),
layers.Dense(10)])
optimizer = optimizers.SGD(learning_rate=0.001)
def train_epoch(epoch):
# Step4.loop
for step, (x, y) in enumerate(train_dataset):
with tf.GradientTape() as tape:
# [b, 28, 28] => [b, 784]
x = tf.reshape(x, (-1, 28*28))
# Step1. compute output
# [b, 784] => [b, 10]
out = model(x)
# Step2. compute loss
loss = tf.reduce_sum(tf.square(out - y)) / x.shape[0]
# Step3. optimize and update w1, w2, w3, b1, b2, b3
grads = tape.gradient(loss, model.trainable_variables)
# w' = w - lr * grad
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 100 == 0:
print(epoch, step, 'loss:', loss.numpy())
def train():
for epoch in range(30):
train_epoch(epoch)
if __name__ == '__main__':
train()
3.tensorflow数据类型
list -> np.array -> tf.Tensor
- int,float,double
- bool
- string
#数据类型
tf.constant(1) #int32
tf.constant(1.) #float32
tf.constant(1.1,dtype=tf.int) #error
tf.constant(1.,dtype=tf.double) #float64
tf.constant([True,False]) #bool
tf.constant('hello world') #string
#属性
with tf.device('cpu'):
a = tf.constant([1])
a.devive #cpu
aa=a.gpu() #aa gpu
a.numpy #返回numpy
a.ndim #返回维度
a.shape #返回shape
#判断
a=tf.constant([1.])
isinstance(a.tfTensor)
tf.is_tensor(a) #true
a.dtype #tf.float32
#数据类型转换
a=np.arange(5)#([0,1,2,3,4])
a.dtype #int64
aa=tf.convert_to_tensor(a) #int64
aa=tf.convert_to_tensor(a,dtype=int32)
tf.cast(aa,dtype=tf.float32) #cast数据类型转换
b=tf.constant([0,1])
tf.cast(b,dtype=bool) #false,true
c=tf.Variable(a)
c.trainable #true
int(Tesor) #直接转换 前提标量
4.创建tensor
tf.convert_to_tensor(np.ones[2,3]) #2行3列 元素都为1 dtype=float64
tf.convert_to_tensor([1,2]) #dtype=int32 1维长度为2的data
tf.convert_to_tensor([1],[2.]) #2行一列 dyype=float32
tf.zero([2,2]) #shape=[]
tf.zero_like(a)
tf.zeros(a.shape) #等同
tf.ones(1) #shape=(1,)
tf.ones([]) #shape=()
tf.ones([2]) #shape=(2,)
tf.ones([2,3])
tf.ones_like(a) == tf.ones(a.shape)
tf.fill([2,2],0) #填充为0
tf.random.normal([2,2],mean=1,stddev=1) # mean: 正态分布的均值,默认为0 ,stddev: 正态分布的标准差,默认为1.0
tf.random.normal([2,2])
tf.random.truncated_normal([2,2],mean=0,stddev=1) #截断的正态分布中输出随机值,和一般的正太分布的产生随机数据比起来,这个函数产生的随机数与均值的差距不会超过两倍的标准差,但是一般的别的函数是可能的。
tf.random.uniform([2,2],minval=0,maxval=1) #均匀分布0~1
idx=tf.range(10)
idx=tf.random.suffle(idx) #打乱顺序
a=tf.gather(a,idx)
b=tf.gather(b.idx) #用于有对应关系的数据
DIM:
NPL [b,seq_len,word_dim] #b句 句长 字维度
IMAGE [b,h,w,c] #4维 b张图片 high wight rgb通道
5. 索引与切片
a=tf.ones([1,5,5,3])
a[0][0] #shape(5,3) num=1
a[0][0][0] #shape(3,)
a[0][0][0][0] #shape()
a=tf.random.normal([4,28,28,3])#image
a[1].shape #[28,28,3]
a[1,2].shape #[28,3]
a[1,2,3].shape # [3]
a[1,2,3,2].shape # []
a=tf.range(10)
a[-1:] #[9]
a[:2] #[0,1]
a.shape #([4,28,28,3])
a[:,:,:,0] #([4,28,28]) 所有图片第一个rgb通道
#strat:end:step
#::step
a.tf.range(4)
a[::-1] #倒序3,2,1,0
a[::-2] #3,1
a[2::-2] #2,0
a=tf.randommnormal([2,4,28,28,3]) #task
a[0,...].shape ==a[0,:,:,:]
#scores [4,35,8] 4个班 35学生 8门成绩
tf.gather(a,axis=0,indices[2,1,3,0])
tf.gather_nd(a,[0,1,2]).shape #0班第1人第2门成绩 []
tf.gather_nd(a,[[0,1,2]]).shape # [1]
tf.boolean_mask(a,mask=[True,True,False,False])
6.维度变换
view
[b,28,28]
-> [b,2828]
-> [b,2,1428]
-> [b,28,28,1]
content:[b,h,w,c] [batch,height,width,channel]
image:[4,28,28,3] view=content(h,w)
reshape to:[4,784,3]
[4,784,3] —height28,width28—-> [4,28,28,3]
tf.reshape
a=tf.random.normal([4,28,28,3]) #view1
a.shape,a.ndim #[4,28,28,3] , 4
tf.reshape(a,[4,784,3]) == tf.reshape(a,[4,-1,3]) # view2
tf.rehsape(a,[4,784*3]) == tf.reshape(s,[4,-1]) #view1
tf.transpose
a=tf.random.normal((4,3,2,1))
tf.transpose(a).shape #([1,2,3,4])
tf.transpose(a,perm=[0,1,3,2]) #([4,3,1,2])
Squeeze and Expand dim
a=tf.random.normal([4,35,8])
tf.expand_dims(a,axis=0).shape #([1,4,35,8])
tf.expand_dims(a,axis=3).shape #([4,35,8,1])
tf.expand_dims(a,axis=-1).shape #([4,35,8,1])
tf.expand_dims(a,axis=-4).shape #([1,4,35,8])
tf.squeeze(tf.zeros([1,2,1,1,3])).shape #([2,3])
a=tf.zeros([1,2,1,3])
tf.squeeze(a.axis=0).shape #([2,1,3])
7.Broadcasting
KEY IDEA
-Insert 1 dim ahead if needed
-Expand dims with size 1 to same size
扩张成同一shape进行运算,实际扩张部分没有数据。
右对齐
节省内存
x=tf,random,normal([4,32,32,3])
(x+tf.random.normal([3])).shape #([4,32,32,3])
(x+tf.random.normal([32,32,1])).shape #([4,32,32,3])
(x+tf.random.normal([4,1,1,1])).shape #([4,32,32,3])
(x+tf.random.normal([1,4,1,1])).shape #incompatible shape
b=tf.broadcast_to(tf.random.normal([4,1,1,1],[4,32,32,3])
8.数学运算
element-wise
b=tf.fill([2,2],2.)
a=tf.ones([2,2])
a+b,a-b,a*b,a/b
b//a,b%a #整除 取余
a=[[1.,1.,1.,1.]]
tf.math.log(a)
tf.exp(a)
tf.math.log(8.)/tf.math.log(2.) #3
b=[[2.,2.,2.,2.]]
tf.pow(b,3)
b**3
tf.sqpt(b)
matrix-wise
a=[[1.,1.,1.,1.]]
b=[[2.,2.,2.,2.]]
a@b #[[4.,4.,4.,4.]]
tf.matmul(a,b)
a=tf.ones([4,2,3])
b=tf.fill([4,3,5],2.)
a@b #shape =[4,2,5]
a.shape #[4,2,3]
b.shape #[3,5]
bb=tf.broadcast_to(b,[4,3,5])
a@b
9.前向传播
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #消除tensorflow初始化的一些提示
# x: [60k, 28, 28],
# y: [60k]
(x, y), _ = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
lr = 1e-3
for epoch in range(10): # iterate db for 10
for step, (x, y) in enumerate(train_db): # for every batch
# x:[128, 28, 28]
# y: [128]
# [b, 28, 28] => [b, 28*28]
x = tf.reshape(x, [-1, 28*28])
with tf.GradientTape() as tape: # tf.Variable
# x: [b, 28*28]
# h1 = x@w1 + b1
# [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
h1 = tf.nn.relu(h1)
# [b, 256] => [b, 128]
h2 = h1@w2 + b2
h2 = tf.nn.relu(h2)
# [b, 128] => [b, 10]
out = h2@w3 + b3
# compute loss
# out: [b, 10]
# y: [b] => [b, 10]
y_onehot = tf.one_hot(y, depth=10)
# mse = mean(sum(y-out)^2)
# [b, 10]
loss = tf.square(y_onehot - out)
# mean: scalar
loss = tf.reduce_mean(loss)
# compute gradients
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# print(grads)
# w1 = w1 - lr * w1_grad
w1.assign_sub(lr * grads[0])
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])
if step % 100 == 0:
print(epoch, step, 'loss:', float(loss))
10.合并与分割
tf.concat / tf.stack
a=tf.ones([4,32,8])
b=tf.ones([4,3,8])
c=tf.concat([a,b],axis=1)
c.shape # ([4,35,8])
tf.stack([a,b],axis=0) # error 所有维度都相等
tf.unstack / tf.split
#[2,4,35,8]
res=tf.unstack(c,axis=3) #打散
len(res) #8
res=tf.split(c,axis=3,num_or_size_splits=2)
len(res) #2
res[0].shape #[2,4,35,4]
res[0]=tf.split(c,axis=3,num_or_size_splits=[2,2,4])
res[0].shape,res[2].shape #[2,4,35,2] [2,4,35,4]
11.数据统计
Vector Norm 范数
-Eukl.Norm=sqrt(sum(xi**2)) #2范数 xi平方和开根号
-Max.norm=max(abs(xi)) #正无穷范数 向量的所有元素的绝对值中最大的
-L1-Norm=sum(abs(xi)) #1范数 元素绝对值和
a=tf.ones([2,2])
tf.norm(a) #2范数 2
==tf.sqrt(tf.reduce_sum(tf.square(a)))
a=tf.ones([4,28,28,3])
tf.norm #96.99484
b=tf.ones([2,2])
tf.norm(b) #2
tf.norm(b,ord=2,axis=1) #axis=1 行 [sqrt(2),sqrt(2)]
tf.norm(b,ord=1) #1范数 4
tf.norm(b,ord=1,axis=0) #1范数 列 [2,2]
tf.norm(b.ord=1,axis=1)
reduce_min/max/mean
a=tf.random.normal([4,10])
tf.reduce_min(a)
tf.reduce_max(a,axis=1)
argmax/argmin #最大最小值的位置
a.shape #([4,10])
tf.argmax(a).shape #axis默认为0, [10]
tf.argmax(a)
tf.equal
a=tf.constant([1,2,3,3,5])
b=tf.range(5)
tf.equal(a,b) #[False,False,False,True,False]
res=tf.equal(a,b)
tf.reduce_sum(tf.cast(res,dtype=tf.int32)) #numpy=1
#accuracy
a=[[0.1,0.2,0.7],[0.9,0.05,0.05]]
pred=tf.cast(tf.argmax(a,axis=1),dtype=tf.int32) #[2,0]
y=[2,1]
tf.equal(y,pred) #[True,False]
correct=tf.reduce_sum(tf.cast(tf.equal(y,pred),dtype=tf.int32)) #1
correct/2 #0.5
```python
**tf,unique**
a=tf.constant([4,2,2,4,3])
tf.unique(a) #消去重复[4,2,3] 原始索引[0,1,1,0,2]
12.张量排序
Sort/argsort
a=tf.random.suffle(tf.range(5)) #[2,0,3,4,1]
tf.sort(a,direction='DESCENGING') #[4,3,2,1,0]
tf.argsort(a,direction='DESCENGING') #[3,2,0,4,1]
idx=tf.argsort(a,direction='DESCENGING')
tf.gather(a,idx)
a=tf.ranfom.uniform([3,3],maxval=10,dtype=tf.int32) #[[4,6,8],[9,4,7],[4,5,1]]
tf.sort(a) #默认升序 [[4,6,8],[4,7,9],[1,4,5]]
tf.sort(a,direction='DESCENFING')
idx=tf.argsort(a)
Top_k
a=[[4,6,8],[9,4,7],[4,5,1]]
res=tf.math.top_k(a,2) #前两个最大值
res.indices #[[2,1],[0,2],[1,0]]
res.values #[[8,6][9,7],[5,4]]
#accuracy
prob=tf.constant([0.1,0.2,0.7],[0.2,0.7,0.1])
k_b=tf.mah.top_k(prob,3).indices #[[2,1,0],[1,0,2]]
k_b=tf.transpose(k_b,[1,0]) #[[2,1],[1,0],[0,2]]
target=tf.broadcast_to(target,[3,2])
#top_k accuracy
def accuracy(output, target, topk=(1,)):
maxk = max(topk)
batch_size = target.shape[0]
pred = tf.math.top_k(output, maxk).indices
pred = tf.transpose(pred, perm=[1, 0])
target_ = tf.broadcast_to(target, pred.shape)
correct = tf.equal(pred, target_)
res = []
for k in topk:
correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
correct_k = tf.reduce_sum(correct_k)
acc = float(correct_k* (100.0 / batch_size) )
res.append(acc)
return res
13.填充与复制
pad
a=tf.reshape(tf.range(9),[3,3]) #[[0,1,2],[3,4,5],[6,7,8]]
tf.pad(a,[[0,0],[1,0]]) #[[上,下],[左,右]] 左边扩张一列默认为0
#image padding
a=tf.random.noraml([4,28,28,3])
b=tf.pad(a,[[0,0],[2,2],[2,2],[0,0]])
b.shape #[4,32,32,3]
tile复制
a.shape=[3,3]
b=tf.tile(a,[1,2]) #[行,列] 1保持不变 2列维度变为两倍
b.shape=[3,6]
tile VS broadcast_to
14.张量限幅
clip_by_value Relu
a=[0,1,2,3,4,5,6,7,8,9]
tf.maximum(a,2) #[2,2,2,3,4,5,6,7,8,9]
tf.minimum(a,8) #[0,1,2,3,4,5,6,7,8,8]
tf.clip_by_value(a,2,8) #[2,2,2,3,4,5,6,7,8,8]
a=a-5 #[-5,-4,-3,-2,-1,0,1,2,3,4]
tf.nn.relu(a) #[0,0,0,0,0,0,1,2,3,4]
==tf.maxmum(a,0)
clip_by_nrom根据范数 等比例缩小
a=tf.random.normal([2,2],mean=10)
tf.norm(a)
aa=tf.clip_by_norm(a,15)
tf.norm(aa) #范数=15
梯度爆炸(Gradiet Exploding) 单次步长过长 造成loss震荡
梯度消失(Gradiet vanishing) 单次步长过小 造成loss降不下去
保证梯度方向不变 [w1,w2,w3]同时变化
new_grads,total_norm=tf.clip_by_global_norm(grads,25) #保持整体norm=25
15.高级操作
Where(tensor)
a=tf.random,normal([3,3])
mask=a>0 #shape=3x3 value=true or false
tf.boolean_mask(a,mask) #返回值ture的具体值 == tf.gather_nd(a,indices)
indices=tf.where(mask) #返回值为true的坐标
mask #[[T,T,F],[T,F,F],[T,T,F]]
A=tf.ones([3,3])
B=tf.zeros([3,3])
tf.whera(mask,A,B) #[[1,1,0],[1,0,0],[1,1,0]]
scatter_nd
只用于全0的底板进行更新
#一维更新
indices=tf.constant([4],[3],[1],[7])
updates-tf.constant([9,10,11,12)
shape=tf.constant([8]) #1x8 全为0
tf.scatter_nd(indices,updates,shape) #[0.11,0,10,9,0,0,12]
#多维更新同理
meshgrid
#numpy
points=[]
for y in np.linspace(-2,2,5):
for x in np.linspace(-2,2,5):
points.append([x,y])
return np.array(points)
#tf
y=tf.linspace(-2,2,5)
x=tf.linspace(-2,2,5)
points_x,points_y=tf.meshgrid(x,y)
points_x.shape #[5,5]
points_x #所有点x部分
points_y #所有点y部分
points=tf.stack([points_x,points_y],axis=2)
#绘制三维图
16.数据加载
keras.datasets
- boston housing
- mnist/fashion mnist
- cifar10/100
- imdb #nlp
(x,y),(x_test,y_test)=keras.datasets.mnist.load_data() #numpy x.shaoe #(60000,28,28) y.shape #(60000,) 0-9 x.min(),x.max() #0 ,255
y[:2] #[5,0]
y_onehot=tf.one_hot(y,depth=10)
y_onehot[:2] #[[0,0,0,0,0,1,0,0,0,0],[1,0,0,0,0,0,0,0,0,]]
**tf.data.Dataset** #转换为tensor
```python
(x,y),(x_test,y_test)=keras.datasets.cifar10.load_data()
#50k 10k
db=tf.data.Dataset.from_tensor_slice(x_test,y_test)
next(iter(db))[0].shape #tensor [32,32,3]
#shuffle 随机打散
db=db.shuffle(10000)
#数据预处理
def preprocess(x,y):
x=tf.cast(x,dtype=tf.float32)/255 #变为0-1
y=tf.cast(y,dtype=tf.int32)
y=tf.one_hot(y,depth=10)
return x,y
db2=db.map(preprocess)
res=next(iter(db2))
res[0].shape,res[1].shape #tensor [32,32,3] [1,10]
db3=db2.batch(32)
res=next(iter(db3))
res[0].shape,res[1].shape #tensor [32,32,32,3] [32,1,10]
db4=db3.repeat(2)
标准预处理流程
def preprocess(x,y):
x=tf.cast(x,dtype=tf.float32)/255 #变为0-1
y=tf.cast(y,dtype=tf.int32)
return x,y
def minst_dataset():
(x,y),(x_test,y_test)=keras.datasets.fashion_mnist.load_data()
y=tf.one_hot(y,depth=10)
y_test =tf.one_hot(y_test,depth=10)
ds=tf.data.Dataset.from_tensor_slice(x,y)
ds=ds.map(preprocess)
ds=ds.shuffle(60000).batch(100)
ds_test=tf.data.Dataset.from_tensor_slice(x_test,y_test)
ds_test=ds.map(preprocess)
ds_test=ds.shuffle(60000).batch(100)
return ds,ds_test
17.全连接层
矩阵运算 转换为 全连接层包括input hidden output
神经元x 连接w
x=tf.random.normal([4,784])
net=tf.keras.layers.Dense(512)
out=net(x)
out.shape #9=([4,521])
net.kernel.shape,net.bias.shape #[784,512],[512]
#Multi-layers
model=keras.Sequential()
import tensorflow as tf
from tensorflow import keras
x = tf.random.normal([2, 3])
model = keras.Sequential([
keras.layers.Dense(2, activation='relu'),
keras.layers.Dense(2, activation='relu'),
keras.layers.Dense(2)
])
model.build(input_shape=[None, 3])
model.summary()
for p in model.trainable_variables:
print(p.name, p.shape)
18.输出方式
tf.sigmoid() 压缩到0-1
tf.nn.softmax() e的yi方/sum(e的yi方) 压缩到总概率和为1
tf.tanh 压缩到(-1,1)
19.损失函数
MSE (Mean Squaared Error 均方差) loss=1/N sum((y-out)**2)
y=tf.constant([1,2,3,0,2])
y=tf.one_hot(y,depth=4)
y=tf.cast(y,dtype=tf.float32)
out=tf.ranfom.normal([5,4])
loss1=tf.reduce_mean(tf.square(y-out))
loss2=tf.square(tf.norm(y-out))/(5*4)
loss3=tf.reduce_mean(tf.losses.MSE(y,out))
Entropy 数字大,稳定 数字小,不均等
a=tf.fill([4],0.25)
a=tf.math.log(a)/tf.mat.log(2.) #PlogP
tf.reduce_sum(a*tf.matf.log(a)/tf.math.log(2.)) #2
a=tf.constant([0.1,0.1,0.1,0.7])
tf.reduce_sum(a*tf.matf.log(a)/tf.math.log(2.)) #1.35677
Cross Entropy交叉熵
tf.losser.categorical_crossentrop([0,1,0,0],[0.25,0.25,0.25,0.25])
criteon([0,1,0,0],[0.1,0.7,0.1,0.1])
tf.losses.BinaryCrossentropy()([1],[0.1])
tf.losses.categorical_crossentrop([0,1],logits,from_logits=True)
20.梯度下降
梯度方向代表函数增大的方向
w=tf.constant(1.)
x=tf.constant(2.)
With tf.GradientTape() as tape:
tape.watch([w])
y2=x*w
grda2=tape.gradient(y2,[w]) #2
With tf.GradientTape(persistent=True) as tape:
21.函数优化
Himmelblau function
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
import tensorflow as tf
def himmelblau(x):
return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2
x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])
fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()
# 初始位置不同 对应结果不同
x = tf.constant([4., 0.])
for step in range(200):
with tf.GradientTape() as tape:
tape.watch([x])
y = himmelblau(x)
grads = tape.gradient(y, [x])[0]
x -= 0.01*grads
if step % 20 == 0:
print ('step {}: x = {}, f(x) = {}'
.format(step, x.numpy(), y.numpy()))
fashion mnist实战
23.tensorboard 数据可视化
使用 pytorch visdom
#喂数据
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = 'logs/' + current_time
summary_writer = tf.summary.create_file_writer(log_dir)
#进入到logs目录
tenosrboard --logdir logs
24.keras高级API
主要功能 datasets / layers / losses / metrics / optimizers
metrics
acc_meter = metrics.Accuracy()
loss_meter = metrics.Mean()
loss_meter.update_state(loss)
acc_meter.update_state(y, pred)
print(step, 'loss:', loss_meter.result().numpy())
loss_meter.reset_states()
compile / fit / evaluate / predict
network.compile(optimizer=optimizers.Adam(lr=0.01),
loss=tf.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy']
)
network.fit(db, epochs=5, validation_data=ds_val, validation_freq=2) #2个epoch测试
network.evaluate(ds_val) #训练过程中验证
25.自定义网络层
keras.Swquential
keras.layers.Layer
keras.Model
class MyDense(layers.Layer): def __init__(self, inp_dim, outp_dim): super(MyDense, self).__init__() self.kernel = self.add_variable('w', [inp_dim, outp_dim]) self.bias = self.add_variable('b', [outp_dim]) def call(self, inputs, training=None): out = inputs @ self.kernel + self.bias return out
class MyModel(keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.fc1 = MyDense(28*28, 256)
self.fc2 = MyDense(256, 128)
self.fc3 = MyDense(128, 64)
self.fc4 = MyDense(64, 32)
self.fc5 = MyDense(32, 10)
def call(self, inputs, training=None):
x = self.fc1(inputs)
x = tf.nn.relu(x)
x = self.fc2(x)
x = tf.nn.relu(x)
x = self.fc3(x)
x = tf.nn.relu(x)
x = self.fc4(x)
x = tf.nn.relu(x)
x = self.fc5(x)
return x
## 26.模型保存与加载
- save/load weights
```python
network.save_weights('weights.ckpt') #保存参数
del network
#创建网络
network.load_weights('weights.ckpt')
- save/load entire model
network.save('model.h5') del network #不需要重新创建网络 network.tf.keras.models.load_model('model.h5')
- save_model #工业环境部署
tf.saved_model.save(m,'/目录') imported=tf.saved_model.load(path) f=impored.signatures["serving_default"] print(f(x=tf.ones([1,28,28,3])))
27.过拟合与欠拟合
under-fitting: Estimated < Ground-truth
train acc is bad ,test acc is bad as well.
over-fitting: Ground-truth < Estimated
train acc is much better ,test acc is worse .
==> Generalization Performance 泛化能力变差
28.数据集划分和交叉验证
train / val / test
如果test做验证 调参 会造成数据泄露 泛化能力降低
k-flod cross-validation
把train 划分为train和val训练 之后重新划分train和val训练
29.减轻过拟合的方法
- more data
- constraint model complexity
降低模型复杂度 shallow / regularization - Dropout
- Data argumentation
- Early Stopping
Regularization
loss function 中x的高次方参数 高维特征参数趋向0
L1-Regularization 取网络所有权值的一范数 weight decay
L2-Regularization 取网络所有权值的二范数
动量与学习率衰减
momentum
当前的梯度方向加入动量 有利于找到最优解
learning rate decay
太小运算过慢 太大不收敛
30.training tricks
- Early Stopping
提前停止训练 防止过拟合
validation set to select parameters
monitor validation performance
stop at the highest val performance - Dropout
learning less to learn better 减少全连接层之间的链接 - Stochastic Gradient Descent
取一个batch的梯度
31.卷积神经网络
Receptive Field # 类似人的眼睛 第一眼只看到感兴趣区域 通过扫描 滑动窗口获得全部信息
卷积核: 权值共享 滑动窗口 减少参数量
[b,5,5,c] => [c,3,3] kernel => [b,3,3,1] #图像c个通道 对应元素相乘累加
保持输入输出shape一致 padding & stride(步长)
[b,5,5,c] => [n,c,3,3] n个kernel => [b,3,3,n] #多通道输入多通道输出
layers.kernal #[3,3,c,n]
import tensorflow as tf
from tensprflow.keras import lyers
layer=layers.Conv2D(4,kernel_size=5,stride=1,padding='same')
layers.kernal #[5,5,c,n]
layers.bias #(4,)
32.池化与采样
- pooling 降维过程
max/avg pooling 取最大/平均
sridex #[1,14,14,4] pool=layers.MaxPool2D(2,strides=2) out=pool(x) # tensorshape [1,7,7,4]
out=tf.nn.max_pool2d(x,2,strides=2,padding=’VALID’)
- unsample 升维
```python
x=tf.random.normal([1,7,7,4])
layer=layers.UnSampling2D(size=3)
out=layer(x)
# tensorshape [1,21,21,4]
- ReLU 同relu函数