Tensorflow 学习笔记

[TOC]

Tensorflow

准备 D1

安装

  1. 下载地址:
    https://www.tensorflow.org/install/
    推荐使用Virtualenv 安装 TensorFlow
    只用安装某个python的ts
    每次在新的 shell 中使用 TensorFlow 时,您都必须激活 Virtualenv 环境

斯坦佛:http://web.stanford.edu/class/cs20si/
blibli视频:2017
https://www.bilibili.com/video/av9156347/?from=search&seid=6905181275544516403
youtube:https://www.youtube.com/watch?v=g-EvyKpZjmQ&list=PLQ0sVbIj3URf94DQtGPJV629ctn2c1zN-

数据集库:收集数据集
https://zhuanlan.zhihu.com/p/35399323
https://deeplearning4j.org/cn/opendata

版本:
TF learn
TF Slim
High level API:Keras

学会使用docker容器:
学习python:

概念 import tensorflow as tf

本质:产生计算图
可视化:tensorboard

tensor:
0-d:number
1-d:vector
2-d:matrix

神经网络结构
input layer –hidden layer– output layer(拟合数据)
怎么处理数据结构:

  1. 建立结构
  2. 放数据进结构里面
  3. weight 和 baias(权重和偏置)

GradientDescentOptimizer 优化器

eg

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import tensorflow as tf
import numpy as np
//create data
x_dara = np.random.rand(100).astype(np.float32) # ts一般数据都是float32的形式
y_data = x_data*0.1 + 0.3

### create tensorflow structure start ###
Weights = tf.Variable(tf.random_uniform([1], -1.0, 1.0))# V大写可能是多维矩阵,[1]表示一维,在-1到1位的范围
biases = tf.Variable(tf.zeros([1])) # 设置初始值是0

y = Weights*x_data + biases

loss = tf.reduce_mean(tf.square(y-y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)# 优化器,有很多optimizer,GradientDescentOptimizer,后面是学习效率。
train = optimizer.minimize(loss)

### create tensorflow structure end ###

sess = tf.Session() //神经网络激活
# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)

for step in range(201):
sess.run(train)
if step % 20 == 0:
print(step, sess.run(Weights), sess.run(biases))

sess.run

矩阵乘法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from __future__ import print_function
import tensorflow as tf

matrix1 = tf.constant([[3, 3]])
matrix2 = tf.constant([[2],
[2]])
product = tf.matmul(matrix1, matrix2) # matrix multiply np.dot(m1, m2) 矩阵乘法

# method 1
sess = tf.Session() # Session 大写
result = sess.run(product)
print(result)
sess.close()

# method 2
with tf.Session() as sess: # 自动关闭,在with语句中
result2 = sess.run(product)
print(result2)

变量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from __future__ import print_function
import tensorflow as tf

state = tf.Variable(0, name='counter') #定义一个变量,数值和名字
#print(state.name)
one = tf.constant(1) # 定义一个常量1

new_value = tf.add(state, one) # 加法运算
update = tf.assign(state, new_value)

# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12 ,最重要的一步
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()

with tf.Session() as sess:
sess.run(init)
for _ in range(3): # 做三次循环
sess.run(update)
print(sess.run(state))

feeds 和 placeholder 传入值

1
2
3
4
5
6
7
8
9
from __future__ import print_function
import tensorflow as tf

input1 = tf.placeholder(tf.float32)
input2 = tf.placeholder(tf.float32)
output = tf.multiply(input1, input2) #乘法

with tf.Session() as sess:
print(sess.run(output, feed_dict={input1: [7.], input2: [2.]})) # 这两个绑定,字典形式

激励函数 activation function

解决不能用线性方程解决的问题
y=Wx
y=AF(Wx) 必须可以微分的
AF()就是其他的函数,sigmoid,tanh,relu
隐藏层只有2-3层,不复杂的时候,任意的都可以;多的要考虑梯度爆炸梯度消失等问题
默认:卷积神经网络:relu ;循环神经网络:relu or tanh

  1. 方程:线性,阶梯(-1不激活,1为激励)
  2. 非线性:接近于0,或者1(分类问题)
    tf在layer2层多数,看是否要激活或者不激活 googletensorflow activation查看有哪些activation

添加定义神经层 def

1
2
3
4
5
6
7
8
9
10
11
12
13
from __future__ import print_function
import tensorflow as tf


def add_layer(inputs, in_size, out_size, activation_function=None): #添加一个层,传入数据
Weights = tf.Variable(tf.random_normal([in_size, out_size])) #定义矩阵就大写,in_size和out_size 的随机矩阵
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) #1行,out_size列,推荐不为0
Wx_plus_b = tf.matmul(inputs, Weights) + biases
if activation_function is None: #就是一个线性函数,就不用加层
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
return outputs

建造一个神经网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from __future__ import print_function
import tensorflow as tf
import numpy as np #用到numpy

def add_layer(inputs, in_size, out_size, activation_function=None):
# add one more layer and return the output of this layer
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
Wx_plus_b = tf.matmul(inputs, Weights) + biases
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
return outputs

# Make up some real data
x_data = np.linspace(-1,1,300)[:, np.newaxis] #-1-1区间有300个单位,300行,加一个维度
noise = np.random.normal(0, 0.05, x_data.shape) # 加入噪点,不完全拟合,方差是0.05,和xdata一个格式
y_data = np.square(x_data) - 0.5 + noise #用nonliner,二次方

# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 1])
ys = tf.placeholder(tf.float32, [None, 1])
# add hidden layer
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.relu) #in_size=1,,一个参数,output_size是10,就是输出10个神经元,激活函数。
# add output layer 隐藏层的输出值,输入10个,输出1个。没有激活函数
prediction = add_layer(l1, 10, 1, activation_function=None)

# the error between prediction and real data 平方差公式tf.square,然后求和tf.reduce_sum,平均误差tf.reduce_mean,reduction_indices=[1]定义轴,学习率是0.1,
loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction),
reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

# important step
# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
if int((tf.__version__).split('.')[1]) < 12:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

for i in range(1000):
# training
sess.run(train_step, feed_dict={xs: x_data, ys: y_data})
if i % 50 == 0:
# to see the step improvement
print(sess.run(loss, feed_dict={xs: x_data, ys: y_data}))

结果可视化 ???

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import matplotlib.pyplot as plt
# plot the real data
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(x_data, y_data)
plt.ion()
plt.show()

for i in range(1000):
# training
sess.run(train_step, feed_dict={xs: x_data, ys: y_data})
if i % 50 == 0:
# to visualize the result and improvement
try:
ax.lines.remove(lines[0])
except Exception:
pass
prediction_value = sess.run(prediction, feed_dict={xs: x_data})
# plot the prediction
lines = ax.plot(x_data, prediction_value, 'r-', lw=5)
plt.pause(0.1)

SGD stochastic gradient descent

每次使用批量数据
Mementum
Adagrad
RMSProp 两者合成
Adam 方法最好

Optimizer 优化器

https://www.tensorflow.org/api_guides/python/train

可视化的好帮手 Tensorboard

Tensorboard
with tf.name_scope(‘layer’):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from __future__ import print_function
import tensorflow as tf


def add_layer(inputs, in_size, out_size, activation_function=None):
# add one more layer and return the output of this layer
with tf.name_scope('layer'):
with tf.name_scope('weights'):
Weights = tf.Variable(tf.random_normal([in_size, out_size]), name='W')
with tf.name_scope('biases'):
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name='b')
with tf.name_scope('Wx_plus_b'):
Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases)
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b, )
return outputs


# define placeholder for inputs to network
with tf.name_scope('inputs'):
xs = tf.placeholder(tf.float32, [None, 1], name='x_input')
ys = tf.placeholder(tf.float32, [None, 1], name='y_input')

# add hidden layer
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.relu)
# add output layer
prediction = add_layer(l1, 10, 1, activation_function=None)

# the error between prediciton and real data
with tf.name_scope('loss'):
loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction),
reduction_indices=[1]))

with tf.name_scope('train'):
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

sess = tf.Session()

# tf.train.SummaryWriter soon be deprecated, use following
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1: # tensorflow version < 0.12
writer = tf.train.SummaryWriter('logs/', sess.graph)
else: # tensorflow version >= 0.12
writer = tf.summary.FileWriter("logs/", sess.graph)

# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)

# direct to the local dir and run this in terminal:
# $ tensorboard --logdir=logs

分类学习 MNIST

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

from __future__ import print_function
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

def add_layer(inputs, in_size, out_size, activation_function=None,):
# add one more layer and return the output of this layer
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1,)
Wx_plus_b = tf.matmul(inputs, Weights) + biases
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b,)
return outputs

def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys})
return result

# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 784]) # 28x28
ys = tf.placeholder(tf.float32, [None, 10])

# add output layer
prediction = add_layer(xs, 784, 10, activation_function=tf.nn.softmax)

# the error between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) # loss
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

sess = tf.Session()
# important step
# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)

for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys})
if i % 50 == 0:
print(compute_accuracy(
mnist.test.images, mnist.test.labels))

过度拟合

  1. 增加数据量
  2. L1,L2正规化
  3. dropout

过度拟合的dropout

ref:Sklearn:
https://morvanzhou.github.io/tutorials/machine-learning/sklearn/1-1-A-ML/

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# here to dropout 50%舍弃掉
Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob)
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b, )
tf.summary.histogram(layer_name + '/outputs', outputs)
return outputs

# define placeholder for inputs to network
keep_prob = tf.placeholder(tf.float32)
xs = tf.placeholder(tf.float32, [None, 64]) # 8x8
ys = tf.placeholder(tf.float32, [None, 10])


# here to determine the keeping probability
sess.run(train_step, feed_dict={xs: X_train, ys: y_train, keep_prob: 0.5})

#一定要有histogram_summary

CNN 卷积神经网络

基本原理

alpha go
卷积神经网络有一个批量过滤器, 持续不断的在图片上滚动收集图片里的信息,每一次收集的时候都只是收集一小块像素区域, 然后把收集来的信息进行整理, 这时候整理出来的信息有了一些实际上的呈现。
图片是如何被卷积的. 下面是一张猫的图片, 图片有长, 宽, 高 三个参数. 图片是有高度的! 这里的高指的是计算机用于产生颜色使用的信息. 如果是黑白照片的话, 高的单位就只有1, 如果是彩色照片, 就可能有红绿蓝三种颜色的信息, 这时的高度为3。

将图片的长宽再压缩, 高度再增加, 就有了对输入图片更深的理解。

池化(pooling):在每一次卷积的时候, 神经层可能会无意地丢失一些信息. 这时, 池化 (pooling) 就可以很好地解决这一问题。

流行的CNN结构:
2ec0f3d8b495841eec9eaea8e9bf1de4.png

CNN进阶

google 自己的CNN介绍
不断压缩——————运用厚度信息变成一个分类器(classifier)
抽离参数:stride(几个像素点)在patch(kernal)里面
方式是padding 两种方式
另外就是pooling 也分为两种

CNN代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
"""
from __future__ import print_function
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})
return result

def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1) #tf.truncted_normal产生随机变量来进行初始化
return tf.Variable(initial)

def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)#tf.constant常量函数来进行初始化,初始值是0.1,正值比较好,然后传参。
return tf.Variable(initial)

def conv2d(x, W):#定义卷积,x是输入值(图片),W是上面的weight
# stride [1, x_movement, y_movement, 1]
# Must have strides[0] = strides[3] = 1,步长第一和第四个都是1,x是1,y也是1;
# 二维的tf.nn.conv2d函数是tensoflow里面的二维的卷积函数,padding,一种是valid(抽取是全部图片里面的),SAME有部分抽取
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
# stride [1, x_movement, y_movement, 1],两种方法:max,average,相当于压缩了,因为把图片压缩了,不用传入参数,其他和conv2d类似
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 784])/255. # 28x28
ys = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, [-1, 28, 28, 1]) #传入层之前需要改下形状,-1代表先不考虑输入的图片例子多少这个维度,后面的1是channel的数量,因为我们输入的图片是黑白的,因此channel是1,例如如果是RGB图像,那么channel就是3。
# print(x_image.shape) # [n_samples, 28,28,1]

## conv1 layer ## 定义卷积层1
W_conv1 = weight_variable([5,5, 1,32]) # patch 5x5, in size 1, out size 32 ,提取5*5像素的图片,输入1个像素的单位,输出32个像素的色彩高度
b_conv1 = bias_variable([32])#32个长度
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # output size 28x28x32 same方式长款不变还是28,高度变成了32
h_pool1 = max_pool_2x2(h_conv1) # output size 14x14x32 ,就是28/2,因为pooling的时候步长多了1倍,图片小了1倍

## conv2 layer ##
W_conv2 = weight_variable([5,5, 32, 64]) # patch 5x5, in size 32, out size 64 ,传入32,传出变成64
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64
h_pool2 = max_pool_2x2(h_conv2) # output size 7x7x64

## fc1 layer ## 建立全联接层
W_fc1 = weight_variable([7*7*64, 1024]) #输入的是,输出1024的高度,变得更高
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])#变平,先不管多少个样品,由立方体变成扁平,[n_samples, 7, 7, 64] ->> [n_samples, 7*7*64],改形状
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)#做矩阵的乘法
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)#做一个dropout的处理,防止过度拟合的情况

## fc2 layer ##
W_fc2 = weight_variable([1024, 10])#输出结果是10位的
b_fc2 = bias_variable([10])
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)#用softmax做分类处理,算概率


# the error between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) # loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
#庞大系统,不用grient的优化器,选一个更小的学习参数
sess = tf.Session()
# important step
# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)

for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 0.5})
if i % 50 == 0:
print(compute_accuracy(
mnist.test.images[:1000], mnist.test.labels[:1000]))

saver 保存和读取

最后定义dtype以float32的格式,和名字name

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from __future__ import print_function
import tensorflow as tf
import numpy as np

# Save to file
# remember to define the same dtype and shape when restore
# W = tf.Variable([[1,2,3],[3,4,5]], dtype=tf.float32, name='weights')
# b = tf.Variable([[1,2,3]], dtype=tf.float32, name='biases')

# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
# if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
# init = tf.initialize_all_variables()
# else:
# init = tf.global_variables_initializer()
#
# saver = tf.train.Saver()
#
# with tf.Session() as sess:
# sess.run(init)
# save_path = saver.save(sess, "my_net/save_net.ckpt") #后面是路径,格式是ckpt
# print("Save to path: ", save_path)


################################################
# restore variables 提取变量
# redefine the same shape and same type for your variables ,还需要重新的定义,数据类型和形状是一样的要,上面是6个数据,然后形状一个(2,3)向量
W = tf.Variable(np.arange(6).reshape((2, 3)), dtype=tf.float32, name="weights")
b = tf.Variable(np.arange(3).reshape((1, 3)), dtype=tf.float32, name="biases")

# not need init step

saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, "my_net/save_net.ckpt")#后面是路径
print("weights:", sess.run(W))
print("biases:", sess.run(b))

RNN 循环神经网络

概念

st 用来影响 st1时刻,确定yn+1,有顺序的就可以用RNN,CNN用的滤波器的量是同一个,只不过rnn有时间顺序上有,
ef4488c42d2e3e1fa24556f5aa24654b.png
eg:
https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-07-B-LSTM/
Tensorflow PyTorch Keras

Cell:RNN中的滤波器叫cell,区别在于有部分存储。然后输出y2(考量的不仅仅是x2,还有y1)
state:上一步的结果叫state,然后输入x2,产生新的state。

更先进的:上面可能参数1.1的n次方导致梯度爆炸,LSTM RNN(深度学习)解决梯度爆炸消失,多三个控制器,多了一个gate,要不要记住这个点,输出的时候要不要读取,要不要忘记state,就是这个state要不要进入主线。 long short term memory:
2349eede69755bfd139099f0292dfa53.png

代码实现(分类例子,mnist)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# set random seed for comparing the two result calculations
tf.set_random_seed(1)

# this is data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

# hyperparameters
lr = 0.001 #学习率
training_iters = 100000 #循环次数
batch_size = 128 #自己定的 每次池子里面拿128个数据

n_inputs = 28 # MNIST data input (img shape: 28*28) ,每一次输入一行的像素
n_steps = 28 # time steps 有28行,输入28步
n_hidden_units = 128 # neurons in hidden layer
n_classes = 10 # MNIST classes (0-9 digits) 分成10个类,0-9

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])

# Define weights
weights = {
# (28, 128)
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
# (128, 10)
'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
# (128, )
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
# (10, )
'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}


def RNN(X, weights, biases):
# hidden layer for input to cell
########################################

# transpose the inputs shape from 定义传进来的 X ==> (128 batch * 28 steps, 28 inputs),转换成-1总起来,原始的 X 是 3 维数据, 我们需要把它变成 2 维数据才能使用 weights 的矩阵乘法

X = tf.reshape(X, [-1, n_inputs])

# into hidden
# X_in = (128 batch * 28 steps, 输出成128 hidden)
X_in = tf.matmul(X, weights['in']) + biases['in']
# X_in ==> (变成一个三维数据,128 batch, 28 steps, 128 hidden)
X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

# cell
##########################################

# basic LSTM Cell.有很多种cell,这边用BasicLSTMCell。初始不忘记为1。dynamic_rnn效果更好,state_is_tuple=True分成主线state(就是cstate)和分线mstate,这个是不是主线的,所以选择true
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
else:
cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
# lstm cell is divided into two parts (c_state, h_state)
init_state = cell.zero_state(batch_size, dtype=tf.float32)

# You have 2 options for following step.
# 1: tf.nn.rnn(cell, inputs);
# 2: tf.nn.dynamic_rnn(cell, inputs).
# If use option 1, you have to modified the shape of X_in, go and check out this:
# https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py
# In here, we go for option 2.
# dynamic_rnn receive Tensor (batch, steps, inputs) or (steps, batch, inputs) as X_in.
# Make sure the time_major is changed accordingly.outputs是个list,输出两个结果,time_major=False(时间是不是主要第一纬度,这边steps是第二个位置)
outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)

# hidden layer for output as the final results
#############################################
# results = tf.matmul(final_state[1], weights['out']) + biases['out']

# # or
# unpack to list [(batch, outputs)..] * steps
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
outputs = tf.unpack(tf.transpose(outputs, [1, 0, 2])) # states is the last outputs
else:
outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
results = tf.matmul(outputs[-1], weights['out']) + biases['out'] # shape = (128, 10)

return results


pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

with tf.Session() as sess:
# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
step = 0
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
sess.run([train_op], feed_dict={
x: batch_xs,
y: batch_ys,
})
if step % 20 == 0:
print(sess.run(accuracy, feed_dict={
x: batch_xs,
y: batch_ys,
}))
step += 1

Rnn 回归例子

基于tf0.10

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# View more python learning tutorial on my Youtube and Youku channel!!!

# Youtube video tutorial: https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg
# Youku video tutorial: http://i.youku.com/pythontutorial

"""
Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
Run this script on tensorflow r0.10. Errors appear when using lower versions.
"""
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt


BATCH_START = 0
TIME_STEPS = 20
BATCH_SIZE = 50
INPUT_SIZE = 1
OUTPUT_SIZE = 1
CELL_SIZE = 10
LR = 0.006


def get_batch(): #生成数据的function
global BATCH_START, TIME_STEPS
# xs shape (50batch, 20steps)
xs = np.arange(BATCH_START, BATCH_START+TIME_STEPS*BATCH_SIZE).reshape((BATCH_SIZE, TIME_STEPS)) / (10*np.pi)
seq = np.sin(xs)
res = np.cos(xs)
BATCH_START += TIME_STEPS
# plt.plot(xs[0, :], res[0, :], 'r', xs[0, :], seq[0, :], 'b--')
# plt.show()
# returned seq, res and xs: shape (batch, step, input)
return [seq[:, :, np.newaxis], res[:, :, np.newaxis], xs]


class LSTMRNN(object):
def __init__(self, n_steps, input_size, output_size, cell_size, batch_size):
self.n_steps = n_steps
self.input_size = input_size
self.output_size = output_size
self.cell_size = cell_size
self.batch_size = batch_size
with tf.name_scope('inputs'):
self.xs = tf.placeholder(tf.float32, [None, n_steps, input_size], name='xs')
self.ys = tf.placeholder(tf.float32, [None, n_steps, output_size], name='ys')
with tf.variable_scope('in_hidden'):
self.add_input_layer()
with tf.variable_scope('LSTM_cell'):
self.add_cell()
with tf.variable_scope('out_hidden'):
self.add_output_layer()
with tf.name_scope('cost'):
self.compute_cost()
with tf.name_scope('train'):
self.train_op = tf.train.AdamOptimizer(LR).minimize(self.cost)

def add_input_layer(self,):
l_in_x = tf.reshape(self.xs, [-1, self.input_size], name='2_2D') # (batch*n_step, in_size,把三维数据改成二维)
# Ws (in_size, cell_size)
Ws_in = self._weight_variable([self.input_size, self.cell_size])
# bs (cell_size, )
bs_in = self._bias_variable([self.cell_size,])
# l_in_y = (batch * n_steps, cell_size)
with tf.name_scope('Wx_plus_b'):
l_in_y = tf.matmul(l_in_x, Ws_in) + bs_in
# reshape l_in_y ==> (batch, n_steps, cell_size),再转化成3d
self.l_in_y = tf.reshape(l_in_y, [-1, self.n_steps, self.cell_size], name='2_3D')

def add_cell(self):
lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.cell_size, forget_bias=1.0, state_is_tuple=True)
with tf.name_scope('initial_state'):
self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(
lstm_cell, self.l_in_y, initial_state=self.cell_init_state, time_major=False)

def add_output_layer(self):
# shape = (batch * steps, cell_size)
l_out_x = tf.reshape(self.cell_outputs, [-1, self.cell_size], name='2_2D')
Ws_out = self._weight_variable([self.cell_size, self.output_size])
bs_out = self._bias_variable([self.output_size, ])
# shape = (batch * steps, output_size)
with tf.name_scope('Wx_plus_b'):
self.pred = tf.matmul(l_out_x, Ws_out) + bs_out

def compute_cost(self):
losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
[tf.reshape(self.pred, [-1], name='reshape_pred')],
[tf.reshape(self.ys, [-1], name='reshape_target')],
[tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
average_across_timesteps=True,
softmax_loss_function=self.ms_error,
name='losses'
)
with tf.name_scope('average_cost'):
self.cost = tf.div(
tf.reduce_sum(losses, name='losses_sum'),
self.batch_size,
name='average_cost')
tf.summary.scalar('cost', self.cost)

@staticmethod
def ms_error(labels, logits):
return tf.square(tf.subtract(labels, logits))

def _weight_variable(self, shape, name='weights'):
initializer = tf.random_normal_initializer(mean=0., stddev=1.,)
return tf.get_variable(shape=shape, initializer=initializer, name=name)

def _bias_variable(self, shape, name='biases'):
initializer = tf.constant_initializer(0.1)
return tf.get_variable(name=name, shape=shape, initializer=initializer)


if __name__ == '__main__':
model = LSTMRNN(TIME_STEPS, INPUT_SIZE, OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE)
sess = tf.Session()
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter("logs", sess.graph)
# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
# relocate to the local dir and run this line to view it on Chrome (http://0.0.0.0:6006/):
# $ tensorboard --logdir='logs'

plt.ion()
plt.show()
for i in range(200):
seq, res, xs = get_batch()
if i == 0:
feed_dict = {
model.xs: seq,
model.ys: res,
# create initial state
}
else:
feed_dict = {
model.xs: seq,
model.ys: res,
model.cell_init_state: state # use last state as the initial state for this run
}

_, cost, state, pred = sess.run(
[model.train_op, model.cost, model.cell_final_state, model.pred],
feed_dict=feed_dict)

# plotting
plt.plot(xs[0, :], res[0].flatten(), 'r', xs[0, :], pred.flatten()[:TIME_STEPS], 'b--')
plt.ylim((-1.2, 1.2))
plt.draw()
plt.pause(0.3)

if i % 20 == 0:
print('cost: ', round(cost, 4))
result = sess.run(merged, feed_dict)
writer.add_summary(result, i)


自编码(非监督学习) autoencoder

X——原数据精髓——黑的X
编码器能得到原数据的精髓, 然后我们只需要再创建一个小的神经网络学习这个精髓的数据,不仅减少了神经网络的负担, 而且同样能达到很好的效果.
PCA ???

https://github.com/MorvanZhou/tutorials/blob/master/tensorflowTUT/tf21_autoencoder/full_code.py

scope???

Batch Normalization 批标准化

迁移学习

VGG的CV 16层
直接用别人训练好的一部分CNN,参数可以固定
https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-16-transfer-learning/