用tensorflow迁移学习猫狗分类

2024年3月3日 118次阅读来源: tensorflow

笔者这几天在跟着莫烦学习TensorFlow，正好到迁移学习（至于什么是迁移学习，看这篇），莫烦老师做的是预测猫和老虎尺寸大小的学习。作为一个有为的学生，笔者当然不能再预测猫啊狗啊的大小啦，正好之前正好有做过猫狗大战数据集的图像分类，做好的数据都还在，二话不说，开撸。

既然是VGG16模型，当然首先上模型代码了：

  1 def conv_layers_simple_api(net_in):
  2     with tf.name_scope('preprocess'):
  3         # Notice that we include a preprocessing layer that takes the RGB image
  4         # with pixels values in the range of 0-255 and subtracts the mean image
  5         # values (calculated over the entire ImageNet training set).
  6         mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
  7         net_in.outputs = net_in.outputs - mean
  8 
  9     # conv1
 10     network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 11                      name='conv1_1')
 12     network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 13                      name='conv1_2')
 14     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
 15 
 16     # conv2
 17     network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 18                      name='conv2_1')
 19     network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 20                      name='conv2_2')
 21     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
 22 
 23     # conv3
 24     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 25                      name='conv3_1')
 26     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 27                      name='conv3_2')
 28     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 29                      name='conv3_3')
 30     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
 31 
 32     # conv4
 33     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 34                      name='conv4_1')
 35     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 36                      name='conv4_2')
 37     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 38                      name='conv4_3')
 39     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
 40 
 41     # conv5
 42     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 43                      name='conv5_1')
 44     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 45                      name='conv5_2')
 46     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 47                      name='conv5_3')
 48     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
 49     return network``
 50 def conv_layers_simple_api(net_in):
 51     with tf.name_scope('preprocess'):
 52         # Notice that we include a preprocessing layer that takes the RGB image
 53         # with pixels values in the range of 0-255 and subtracts the mean image
 54         # values (calculated over the entire ImageNet training set).
 55         mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
 56         net_in.outputs = net_in.outputs - mean
 57 
 58     # conv1
 59     network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 60                      name='conv1_1')
 61     network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 62                      name='conv1_2')
 63     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
 64 
 65     # conv2
 66     network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 67                      name='conv2_1')
 68     network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 69                      name='conv2_2')
 70     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
 71 
 72     # conv3
 73     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 74                      name='conv3_1')
 75     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 76                      name='conv3_2')
 77     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 78                      name='conv3_3')
 79     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
 80 
 81     # conv4
 82     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 83                      name='conv4_1')
 84     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 85                      name='conv4_2')
 86     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 87                      name='conv4_3')
 88     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
 89 
 90     # conv5
 91     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 92                      name='conv5_1')
 93     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 94                      name='conv5_2')
 95     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
 96                      name='conv5_3')
 97     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
 98     return network``
 99 def conv_layers_simple_api(net_in):
100     with tf.name_scope('preprocess'):
101         # Notice that we include a preprocessing layer that takes the RGB image
102         # with pixels values in the range of 0-255 and subtracts the mean image
103         # values (calculated over the entire ImageNet training set).
104         mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
105         net_in.outputs = net_in.outputs - mean
106 
107     # conv1
108     network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
109                      name='conv1_1')
110     network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
111                      name='conv1_2')
112     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
113 
114     # conv2
115     network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
116                      name='conv2_1')
117     network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
118                      name='conv2_2')
119     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
120 
121     # conv3
122     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
123                      name='conv3_1')
124     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
125                      name='conv3_2')
126     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
127                      name='conv3_3')
128     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
129 
130     # conv4
131     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
132                      name='conv4_1')
133     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
134                      name='conv4_2')
135     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
136                      name='conv4_3')
137     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
138 
139     # conv5
140     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
141                      name='conv5_1')
142     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
143                      name='conv5_2')
144     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME',
145                      name='conv5_3')
146     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
147     return network

笔者偷懒直接用的是TensorLayer库中的Vgg16模型，至于什么是tensorlayer请移步这里

按照莫烦老师的教程，改写最后的全连接层做二分类学习：

def fc_layers(net):
    # 全连接层前的预处理
    network = FlattenLayer(net, name='flatten')
    # tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6')
    network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc1_relu')
    # network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc2_relu')
    # self.out = tf.layers.dense(self.fc6, 1, name='out')
    network = DenseLayer(network, n_units=2, act=tf.identity, name='fc3_relu')
    return network

定义输入输出以及损失函数已及学习步骤：

 1 # 输入
 2 x = tf.placeholder(tf.float32, [None, 224, 224, 3])
 3 # 输出
 4 y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_')
 5 net_in = InputLayer(x, name='input')
 6 # net_cnn = conv_layers(net_in)               # professional CNN APIs
 7 net_cnn = conv_layers_simple_api(net_in)  # simplified CNN APIs
 8 network = fc_layers(net_cnn)
 9 y = network.outputs
10 # probs = tf.nn.softmax(y)
11 y_op = tf.argmax(tf.nn.softmax(y), 1)
12 cost = tl.cost.cross_entropy(y, y_, name='cost')
13 correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.float32), tf.cast(y_, tf.float32))
14 acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
15 # 定义 optimizer
16 train_params = network.all_params[26:]
17 # print(train_params)
18 global_step = tf.Variable(0)
19 # --------------学习速率的设置（学习速率呈指数下降）--------------------- #将 global_step/decay_steps 强制转换为整数
20 # learning_rate = tf.train.exponential_decay(1e-2, global_step, decay_steps=1000, decay_rate=0.98, staircase=True)
21 train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999,
22                                   epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)

读取数据读取训练、验证数据，加载模型参数：

 1 img, label = read_and_decode("F:\\001-python\\train.tfrecords")
 2 img_v, label_v = read_and_decode("F:\\001-python\\val.tfrecords")
 3 # 使用shuffle_batch可以随机打乱输入
 4 X_train, y_train = tf.train.shuffle_batch([img, label],
 5                                           batch_size=30, capacity=400,
 6                                           min_after_dequeue=300)
 7 X_Val, y_val = tf.train.shuffle_batch([img_v, label_v],
 8                                       batch_size=30, capacity=400,
 9                                       min_after_dequeue=300)
10 tl.layers.initialize_global_variables(sess)
11 network.print_params()
12 network.print_layers()
13 npz = np.load('vgg16_weights.npz')
14 params = []
15 for val in sorted(npz.items())[0:25]:
16     # print("  Loading %s" % str(val[1].shape))
17     params.append(val[1])
18 加载预训练的参数
19 tl.files.assign_params(sess, params, network)

加载好之后，开始训练，200个epoch：

 1 for epoch in range(n_epoch):
 2     start_time = time.time()
 3     val, l = sess.run([X_train, y_train])
 4     for X_train_a, y_train_a in tl.iterate.minibatches(val, l, batch_size, shuffle=True):
 5         sess.run(train_op, feed_dict={x: X_train_a, y_: y_train_a})
 6     if epoch + 1 == 1 or (epoch + 1) % 5 == 0:
 7         print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
 8         train_loss, train_acc, n_batch = 0, 0, 0
 9         for X_train_a, y_train_a in tl.iterate.minibatches(val, l, batch_size, shuffle=True):
10             err, ac = sess.run([cost, acc], feed_dict={x: X_train_a, y_: y_train_a})
11             train_loss += err
12             train_acc += ac
13             n_batch += 1
14         print("   train loss: %f" % (train_loss / n_batch))
15         print("   train acc: %f" % (train_acc / n_batch))

保存训练的参数：

1 tl.files.save_npz(network.all_params, name='model.npz', sess=sess)

下面就是开始训练啦，笔者很高兴的拿着自己的笔记本显卡呼呼的跑了一遍:

~~~~~~~~~~~~~~~~~~~~~~~~下面是漫长的等待

.......
[TL] Epoch 138 of 150 took 0.999402s
[TL]    val loss: 0.687194
[TL]    val acc: 0.562500
[TL] Epoch 140 of 150 took 3.782207s
[TL]    val loss: 0.619966
[TL]    val acc: 0.750000
[TL] Epoch 142 of 150 took 0.983802s
[TL]    val loss: 0.685686
[TL]    val acc: 0.562500
[TL] Epoch 144 of 150 took 0.986604s
[TL]    val loss: 0.661224
[TL]    val acc: 0.687500
[TL] Epoch 146 of 150 took 1.022403s
[TL]    val loss: 0.675885
[TL]    val acc: 0.687500
[TL] Epoch 148 of 150 took 0.991802s
[TL]    val loss: 0.682124
[TL]    val acc: 0.625000
[TL] Epoch 150 of 150 took 3.487811s
[TL]    val loss: 0.674932
[TL]    val acc: 0.687500
[TL] Total training time: 319.859640s
[TL] [*] model.npz saved

额~~~~~~~~~~~~~~~~~

0.68的正确率，群里一位朋友看了之后说：跟猜差不多了(一脸黑线)。问题出哪儿呢？难道是笔者训练的次数不够多？莫烦老师可是100次就能出很好的结果啊

不管怎么样，要试试，笔者于是加载刚刚保存的model.npz参数继续跑100个epoch

~~~~~~~~~~~~~~~~~~~~~~~~又是漫长的等待

[TL] Epoch 1 of 100 took 8.477617s
[TL]    val loss: 0.685957
[TL]    val acc: 0.562500
[TL] Epoch 2 of 100 took 0.999402s
[TL]    val loss: 0.661529
[TL]    val acc: 0.625000
......
[TL] Epoch 94 of 100 took 0.992208s
[TL]    val loss: 0.708815
[TL]    val acc: 0.562500
[TL] Epoch 96 of 100 took 0.998406s
[TL]    val loss: 0.710636
[TL]    val acc: 0.562500
[TL] Epoch 98 of 100 took 0.992807s
[TL]    val loss: 0.621505
[TL]    val acc: 0.687500
[TL] Epoch 100 of 100 took 0.986405s
[TL]    val loss: 0.670647
[TL]    val acc: 0.625000
[TL] Total training time: 156.734633s
[TL] [*] model.npz saved

坑爹啊这是，还不如之前的结果。

笔者陷入深深的沉思中，难道是改了全连接层导致的？于是笔者又把之前去掉的全连接层加上：

1 def fc_layers(net):
2     # 全连接层前的预处理
3     network = FlattenLayer(net, name='flatten')
4     # tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6')
5     network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc1_relu')
6     network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc2_relu')
7     # self.out = tf.layers.dense(self.fc6, 1, name='out')
8     network = DenseLayer(network, n_units=2, act=tf.identity, name='fc3_relu')
9     return network

接着训练

~~~~~~~~~~~~~~~~~~~~~~~~下面又是漫长的等待

1 [TL] Epoch 1 of 100 took 8.477229s
2 [TL]    val loss: 2.370650
3 [TL]    val acc: 0.562500
4 ...
5 [TL] Epoch 100 of 100 took 1.016002s
6 [TL]    val loss: 0.762171
7 [TL]    val acc: 0.437500
8 [TL] Total training time: 156.836465s
9 [TL] [*] model.npz saved

还是一样，笔者已崩溃了，一定是哪儿不对啊啊啊….于是笔者去翻莫烦老师的代码，一点点对下来，每一层参数肯定不会有错，那就是在训练设置的参数有问题。

1 self.train_op = tf.train.RMSPropOptimizer(0.001).minimize(self.loss) #莫烦的代码
2 train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999,
3                                   epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)#笔者的

看到train_params难道是这个train_params？笔者只优化了最后的全连接层参数而莫烦老师优化的是全部参数

已经深夜了，笔者表示即使不睡觉也要跑一遍试试，于是改成

 1 # 定义 optimizer
 2 train_params = network.all_params
 3 ~~~~~~~~~~~~~~~~~~~~~~~~于是又是是漫长的等待
 4 
 5 [TL] Epoch 1 of 100 took 20.286640s
 6 [TL]    val loss: 11.938850
 7 [TL]    val acc: 0.312500
 8 [TL] Epoch 2 of 100 took 3.091806s
 9 [TL]    val loss: 2.890055
10 [TL]    val acc: 0.625000
11 [TL] Epoch 4 of 100 took 3.074205s
12 [TL]    val loss: 24.055895
13 [TL]    val acc: 0.687500
14 [TL] ....
15 [TL]    val loss: 0.699907
16 [TL]    val acc: 0.500000
17 [TL] Epoch 98 of 100 took 3.089206s
18 [TL]    val loss: 0.683627
19 [TL]    val acc: 0.562500
20 [TL] Epoch 100 of 100 took 3.091806s
21 [TL]    val loss: 0.708496
22 [TL]    val acc: 0.562500
23 [TL] Total training time: 375.727307s
24 [TL] [*] model.npz saved

效果变得更差了….

排除参数的问题，已经深夜1点了，明天还要上班，不得不睡啦。

继续崩溃第三天~~~

第四天~~~

第五天，今天供应商过来公司调试机器，正好是一个学图像处理的小伙子，我提到这个说：我为啥训练了这么多代为啥还是像猜一样的概率….？小伙儿说：莫不是过拟合了吧？我说:不可能啊现成的数据现成的模型和参数，不应该的啊！

不过我还是得检查一下数据处理的代码

 1 # 生成是数据文件
 2 def create_record(filelist):
 3     random.shuffle(filelist)
 4     i = 0
 5     writer = tf.python_io.TFRecordWriter(recordpath)
 6     for file in filelist:
 7         name = file.split(sep='.')
 8         lable_val = 0
 9         if name[0] == 'cat':
10             lable_val = 0
11         else:
12             lable_val = 1
13         img_path = file_dir + file
14         img = Image.open(img_path)
15         img = img.resize((240, 240))
16         img_raw = img.tobytes()  # 将图片转化为原生bytes
17         example = tf.train.Example(features=tf.train.Features(feature={
18             "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[lable_val])),
19             'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
20        })) #example对象对label和image进行封装
21         writer.write(example.SerializeToString())
22         i=i+1
23         print(name[1])
24         print(lable_val)
25         print(i)
26     writer.close()
27 # 用队列形式读取文件
28 def read_and_decode(filename):
29     # 根据文件名生成一个队列
30     filename_queue = tf.train.string_input_producer([filename])
31     reader = tf.TFRecordReader()
32     _, serialized_example = reader.read(filename_queue)  # 返回文件名和文件
33     features = tf.parse_single_example(serialized_example,
34                                        features={
35                                            'label': tf.FixedLenFeature([], tf.int64),
36                                            'img_raw': tf.FixedLenFeature([], tf.string),
37                                        })
38     img = tf.decode_raw(features['img_raw'], tf.uint8)
39     img = tf.reshape(img, [224, 224, 3])
40     img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
41     label = tf.cast(features['label'], tf.int32)
42     return img, label

img = tf.cast(img, tf.float32) * (1. / 255) – 0.5 难道是这一步处理多余？注销之后，训练模型

 1 Epoch 85 of 200 took 1.234071s
 2    train loss: 14.689816
 3    train acc: 0.900000
 4 [TL] [*] model3.npz saved
 5 Epoch 90 of 200 took 1.241071s
 6    train loss: 17.104382
 7    train acc: 0.800000
 8 [TL] [*] model3.npz saved
 9 Epoch 95 of 200 took 1.236071s
10    train loss: 11.190630
11    train acc: 0.850000
12 [TL] [*] model3.npz saved
13 Epoch 100 of 200 took 1.238071s
14    train loss: 0.000000
15    train acc: 1.000000
16 [TL] [*] model3.npz saved
17 Epoch 105 of 200 took 1.236071s
18    train loss: 7.622324
19    train acc: 0.900000
20 [TL] [*] model3.npz saved
21 Epoch 110 of 200 took 1.234071s
22    train loss: 2.164670
23    train acc: 0.950000
24 [TL] [*] model3.npz saved
25 Epoch 115 of 200 took 1.237071s
26    train loss: 0.000000
27    train acc: 1.000000
28 [TL] [*] model3.npz saved

准确度1，停停停…不用跑完了，Perfect!

原来如此，必须要真实的像素值…….心好累……，笔者已经不记得哪儿抄来的这一行了。

嗯，VGG16模型的迁移学习到此结束，代码见github

    原文作者：tensorflow
    原文地址: https://www.cnblogs.com/zengfanlin/p/8886701.html
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。