《TensorFlow：实战Google深度学习框架》笔记、代码及勘误-第6章图像识别与卷积神经网络-1

2023年8月15日 91次阅读来源: runze Zheng

本文分为3个部分：

1) 定义LeNet5网络前向传播过程，保存为文件LeNet5inference.py

2) LeNet5神经网络训练，保存为LeNet5train.py —— 这部分与第5章MNIST最佳实践的代码几乎一致

3) LeNet5模型评价 —— 这部分与第5章MNIST最佳实践的代码几乎一致

LeNet5网络前向传播过程

将以下代码保存为LeNet5inference.py

# file name: LeNet5inference.py

import tensorflow as tf

# 配置神经网络的参数
INPUT_NODE = 784
OUTPUT_NODE = 10

IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10

# 第一层卷积层的尺寸和深度
CONV1_DEEP = 32
CONV1_SIZE = 5

# 第二层卷积层的尺寸和深度
CONV2_DEEP = 64
CONV2_SIZE = 5

# 全连接层的节点个数
FC_SIZE = 512

# 定义前向传播的过程。这里添加了一个新参数train,用于区分训练过程和测试过程。
# 在这个程序中将用到dropout方法，dropout可以进一步提升模型可靠性并防止过拟合
#  dropout过程只在训练时使用
def inference(input_tensor, train, regularizer):
    # 声明第一层卷积层的变量并实现前向传播过程
    # 通过使用不同的命名空间来隔离不同的变量，这使得每一层的变量命名只需要考虑在当前层的作用
    # 和标准的LeNet-5模型不大一样，这里定义的卷积层输入为28*28*1的原始MNIST像素
    # 卷积层用了全0填充，输出为28*28*32的矩阵
    with tf.variable_scope('layer1-conv1'):
        conv1_weights = tf.get_variable(
            "weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0))
        # 使用边长为5，深度为32的过滤器，过滤器的移动步长为1，且使用全0填充
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))

    # 实现第二层池化层的前向传播过程。这里选用最大池化层，池化层过滤器边长为2
    # 使用全0填充且移动的步长为2。这一层的输入是上一层的输出，也就是28*28*32的矩阵
    # 这一层的输出为14*14*32的矩阵
    with tf.name_scope("layer2-pool1"):
        pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")

    # 声明第三层卷积层的变量并实现前向传播过程
    # 这一层的输入是14*14*32的矩阵，输出为14*14*64的矩阵
    with tf.variable_scope("layer3-conv2"):
        conv2_weights = tf.get_variable(
            "weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))

    # 实现第四层池化层的前向传播过程，这一层的输入14*14*64，输出为7*7*64的矩阵
    with tf.name_scope("layer4-pool2"):
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        # 将第四层池化层和输出转化为第五层全连接的输入格式。第四层的输出为7*7*64的矩阵
        # 然而第五层全连接层需要的输入格式为向量，所以在这里需要将这个7*7*64的矩阵拉直成一个向量
        # pool2.get_shape函数可以得到第四层输出矩阵的维度而不需要手工计算
        # 【勘误】原书中pool_shape和with对齐，是错的，应该和pool2对齐
        pool_shape = pool2.get_shape().as_list()
        # 计算将矩阵拉直成向量后的长度。注意pool_shape[0]是一个batch中数据的个数
        nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
        # 通过tf.reshape函数将第四层的输出变成一个batch的向量
        reshaped = tf.reshape(pool2, [pool_shape[0], nodes])

    # 声明第五层全连接的变量并实现前向传播过程。这一层的输入是拉直之后的一组向量
    # 向量的长度是3136，输出是一组长度为512的向量
    # 这一层和之前在第5章中介绍的基本一致，唯一的区别是引入了dropout，dropout在训练中会随机将部分节点的输出改为0
    # dropout可以避免过拟合问题，从而使得模型在测试集上的效果更好
    # dropout一般只在全连接层而不是卷积层或池化层使用
    with tf.variable_scope('layer5-fc1'):
        fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        # 只有全连接层需要加入正则化
        if regularizer != None:
            tf.add_to_collection('losses', regularizer(fc1_weights))
        fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))

        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
        if train: # 若是训练过程，则用dropout
            fc1 = tf.nn.dropout(fc1, 0.5)

    # 声明第六层全连接的变量并实现前向传播过程。这一层的输入为一组长度为512的向量
    # 输出一个长度为10的向量，通过Softmax之后就得到最后的分类结果
    with tf.variable_scope('layer6-fc2'):
        fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: 
            tf.add_to_collection('losses', regularizer(fc2_weights))
        fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
        logit = tf.matmul(fc1, fc2_weights) + fc2_biases

    return logit

LeNet5 神经网络训练

将以下代码保存为LeNet5train.py

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import LeNet5inference # 载入前面设定好的LeNet5inference模块
import os
import numpy as np

# 配置神经网络相关的参数
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 55000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "LeNet5_model/" # 在当前目录下存在LeNet5_model子文件夹
MODEL_NAME = "LeNet5_model"

# 训练过程
def train(mnist):
    # 定义输出为4维矩阵的placeholder
    x = tf.placeholder(tf.float32, [
        BATCH_SIZE,
        LeNet5inference.IMAGE_SIZE,
        LeNet5inference.IMAGE_SIZE,
        LeNet5inference.NUM_CHANNELS], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, LeNet5inference.OUTPUT_NODE], name='y-input')

    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    # 这里是训练过程，因此第2个参数（train）为True
    y = LeNet5inference.inference(x, True, regularizer) 
    global_step = tf.Variable(0, trainable=False)

    # 定义损失函数、学习率、滑动平均操作以及训练过程。
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
        staircase=True)

    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 初始化TensorFlow持久化类。
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)

            reshaped_xs = np.reshape(xs, (
                BATCH_SIZE,
                LeNet5inference.IMAGE_SIZE,
                LeNet5inference.IMAGE_SIZE,
                LeNet5inference.NUM_CHANNELS))
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y_: ys})

            # 每训练1000次输出一次结果
            if i % 1000 == 0:
                print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)

# 主程序
def main(argv=None):
    mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
    train(mnist)

if __name__ == '__main__':
    main()

运行结果

After 1 training step(s), loss on training batch is 6.18424.
After 1001 training step(s), loss on training batch is 0.80934.
After 2001 training step(s), loss on training batch is 0.814512.
After 3001 training step(s), loss on training batch is 0.736529.
After 4001 training step(s), loss on training batch is 0.670304.
After 5001 training step(s), loss on training batch is 0.831034.
After 6001 training step(s), loss on training batch is 0.689095.
......
After 50001 training step(s), loss on training batch is 0.589476.
After 51001 training step(s), loss on training batch is 0.623979.
After 52001 training step(s), loss on training batch is 0.586515.
After 53001 training step(s), loss on training batch is 0.593741.
After 54001 training step(s), loss on training batch is 0.592667.

LeNet5模型评价

将以下代码保存为LeNet5eval.py

import time
import math
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import LeNet5infernece
import LeNet5train

def evaluate(mnist):
    with tf.Graph().as_default() as g:
        # 定义输出为4维矩阵的placeholder
        x = tf.placeholder(tf.float32, [
            mnist.test.num_examples,
            #LeNet5train.BATCH_SIZE,
            LeNet5infernece.IMAGE_SIZE,
            LeNet5infernece.IMAGE_SIZE,
            LeNet5infernece.NUM_CHANNELS],
                           name='x-input')
        y_ = tf.placeholder(tf.float32, [None, LeNet5infernece.OUTPUT_NODE], name='y-input')
        validate_feed = {x: mnist.test.images, y_: mnist.test.labels}
        global_step = tf.Variable(0, trainable=False)

        regularizer = tf.contrib.layers.l2_regularizer(LeNet5train.REGULARIZATION_RATE)
        y = LeNet5infernece.inference(x, False, regularizer)
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        variable_averages = tf.train.ExponentialMovingAverage(LeNet5train.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        #n = math.ceil(mnist.test.num_examples / LeNet5train.BATCH_SIZE)
        n = math.ceil(mnist.test.num_examples / mnist.test.num_examples)
        for i in range(n):
            with tf.Session() as sess:
                # tf.train.get_checkpoint_state函数会通过checkpoint文件自动找到最新模型的文件名
                ckpt = tf.train.get_checkpoint_state(LeNet5train.MODEL_SAVE_PATH)
                if ckpt and ckpt.model_checkpoint_path:
                    # 加载模型
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # 通过文件名得到模型保存时迭代的轮数
                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    xs, ys = mnist.test.next_batch(mnist.test.num_examples)
                    #xs, ys = mnist.test.next_batch(LeNet5train.BATCH_SIZE)
                    reshaped_xs = np.reshape(xs, (
                        mnist.test.num_examples,
                        #LeNet5train.BATCH_SIZE,
                        LeNet5infernece.IMAGE_SIZE,
                        LeNet5infernece.IMAGE_SIZE,
                        LeNet5infernece.NUM_CHANNELS))
                    accuracy_score = sess.run(accuracy, feed_dict={x:reshaped_xs, y_:ys})
                    print("After %s training step(s), test accuracy = %g" % (global_step, accuracy_score))
                else:
                    print('No checkpoint file found')
                    return

# 主程序
def main(argv=None):
    mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
    evaluate(mnist)

if __name__ == '__main__':
    main()

运行结果

After 54001 training step(s), test accuracy = 0.9919

    原文作者：runze Zheng
    原文地址: https://zhuanlan.zhihu.com/p/31534286
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。