基于Tensorflow提供的深度学习架构,在阿里云的数加平台运行,效率很高,但是由于目前频繁获取数据导致请求消耗金钱太多。在本地四核PC运行效率比C++实现也要高。
贴码:
# -*- coding: utf-8 -*-
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
import sys
import argparse
import numpy as np
import tensorflow as tf
FLAGS = None
class slqAlexNet():
def __init__(self):
self.accuray = 0
# tf.set_random_seed(2017)
self.weights = {
# kernel map: 11*11, input map: 3, output map: 96
'c1conv':tf.get_variable('c1conv', [11,11,3,96], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# output map: 96
's1pool':tf.get_variable('s1pool', [96,], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# kernel map: 5*5, input map: 48, output map: 96
'c2conv':tf.get_variable('c2conv', [5,5,96,256], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# output map: 256
's2pool':tf.get_variable('s2pool', [256,], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# kernel map: 3*3, input map: 256, output map: 384
'c3conv':tf.get_variable('c3conv', [3,3,256,384], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# kernel map: 3*3, input map: 192, output map: 384
'c4conv':tf.get_variable('c4conv', [3,3,384,384], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# kernel map: 3*3, input map: 192, output map: 256
'c5conv':tf.get_variable('c5conv', [3,3,384,256], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# output map: 256
's5pool':tf.get_variable('s5pool', [256,], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# intput map: 9216, output map: 4096
'f1conn':tf.get_variable('f1conn', [9216,4096], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# input map: 4096, output map: 4096
'f2conn':tf.get_variable('f2conn', [4096, 4096], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
# input map: 4096, output map: 1000
'f3conn':tf.get_variable('f3conn', [4096, 1000], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float32)),
}
self.bias={
'c1bias':tf.get_variable('c1bias', [96,], initializer=tf.zeros_initializer(dtype=tf.float32)),
's1bias':tf.get_variable('s1bias', [96,], initializer=tf.zeros_initializer(dtype=tf.float32)),
'c2bias':tf.get_variable('c2bias', [256,], initializer=tf.ones_initializer(dtype=tf.float32)),
's2bias':tf.get_variable('s2bias', [256,], initializer=tf.zeros_initializer(dtype=tf.float32)),
'c3bias':tf.get_variable('c3bias', [384,], initializer=tf.zeros_initializer(dtype=tf.float32)),
'c4bias':tf.get_variable('c4bias', [384,], initializer=tf.ones_initializer(dtype=tf.float32)),
'c5bias':tf.get_variable('c5bias', [256,], initializer=tf.ones_initializer(dtype=tf.float32)),
's5bias':tf.get_variable('s5bias', [256,], initializer=tf.zeros_initializer(dtype=tf.float32)),
'f1bias':tf.get_variable('f1bias', [4096,], initializer=tf.ones_initializer(dtype=tf.float32)),
'f2bias':tf.get_variable('f2bias', [4096,], initializer=tf.ones_initializer(dtype=tf.float32)),
'f3bias':tf.get_variable('f3bias', [1000,], initializer=tf.zeros_initializer(dtype=tf.float32)),
}
def __mode__(self, data):
c1map = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(data, self.weights['c1conv'], strides=[1,4,4,1], padding='VALID'), self.bias['c1bias']))
# s1map = tf.nn.tanh(tf.nn.bias_add(tf.nn.max_pool(c1map, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID'), self.bias['s1bias']))
s1map = tf.nn.bias_add(tf.nn.max_pool(c1map, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID'), self.bias['s1bias'])
drops1 = tf.nn.dropout(s1map, 0.5)
c2map = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(drops1, self.weights['c2conv'], strides=[1,1,1,1], padding='SAME'), self.bias['c2bias']))
# c2map = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(s1map, self.weights['c2conv'], strides=[1,1,1,1], padding='SAME'), self.bias['c2bias']))
# s2map = tf.nn.tanh(tf.nn.bias_add(tf.nn.max_pool(c2map, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID'), self.bias['s2bias']))
s2map = tf.nn.bias_add(tf.nn.max_pool(c2map, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID'), self.bias['s2bias'])
c3map = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(s2map, self.weights['c3conv'], strides=[1,1,1,1], padding='SAME'), self.bias['c3bias']))
dropc3 = tf.nn.dropout(c3map, 0.5)
c4map = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(dropc3, self.weights['c4conv'], strides=[1,1,1,1], padding='SAME'), self.bias['c4bias']))
# c4map = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(c3map, self.weights['c4conv'], strides=[1,1,1,1], padding='SAME'), self.bias['c4bias']))
dropc4 = tf.nn.dropout(c4map, 0.5)
c5map = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(dropc4, self.weights['c5conv'], strides=[1,1,1,1], padding='SAME'), self.bias['c5bias']))
# c5map = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(c4map, self.weights['c5conv'], strides=[1,1,1,1], padding='SAME'), self.bias['c5bias']))
# s5map = tf.nn.tanh(tf.nn.bias_add(tf.nn.max_pool(c5map, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID'), self.bias['s5bias']))
s5map = tf.nn.bias_add(tf.nn.max_pool(c5map, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID'), self.bias['s5bias'])
flatten = tf.reshape(s5map, [-1, self.weights['f1conn'].get_shape().as_list()[0]])
drop1 = tf.nn.dropout(flatten, 0.5)
f1map = tf.nn.relu(tf.matmul(drop1, self.weights['f1conn'])+self.bias['f1bias'])
drop2 = tf.nn.dropout(f1map, 0.5)
f2map = tf.nn.relu(tf.matmul(drop2, self.weights['f2conn'])+self.bias['f2bias'])
# f3map = tf.nn.tanh(tf.matmul(f2map, self.weights['f3conn'])+self.bias['f3bias'])
f3map = tf.matmul(f2map, self.weights['f3conn'])+self.bias['f3bias']
return f3map
def train(self):
witdthBytes = 227
heightBytes = 227
depthBytes = 3
recBytes = 3*227*227
print("create file dirctory list ...")
trainbatch1 = os.path.join(FLAGS.buckets, "imgTrainBatch01")
trainbatch2 = os.path.join(FLAGS.buckets, "imgTrainBatch02")
trainbatch3 = os.path.join(FLAGS.buckets, "imgTrainBatch03")
trainbatch4 = os.path.join(FLAGS.buckets, "imgTrainBatch04")
trainbatch5 = os.path.join(FLAGS.buckets, "imgTrainBatch05")
filename_queue = tf.train.string_input_producer([trainbatch1, trainbatch2, trainbatch3, trainbatch4, trainbatch5], shuffle=False)
filename_queue_t = tf.train.string_input_producer([os.path.join(FLAGS.buckets, "imgTestArray")])
filename_out_module = os.path.join(FLAGS.buckets, "alexnet.module")
reader = tf.FixedLengthRecordReader(record_bytes=recBytes)
print("create graph read train ...")
key,value = reader.read(filename_queue)
bytes = tf.decode_raw(value,out_type=tf.uint8)
originalImg = tf.reshape(bytes, [depthBytes,heightBytes,witdthBytes])
print(originalImg)
imgtrain = tf.transpose(originalImg,[1,2,0])
print(imgtrain)
imgtrain = tf.cast(imgtrain, tf.float32) * 2.0/255 - 1.0
print(imgtrain)
imgtrain = tf.reshape(imgtrain, shape=[-1, 227, 227, 3])
print(imgtrain)
print("create graph read test ...")
key_t,value_t = reader.read(filename_queue_t)
bytes_t = tf.decode_raw(value_t,out_type=tf.uint8)
originalImg_t = tf.reshape(tf.strided_slice(bytes_t,[0],[recBytes]),[depthBytes,heightBytes,witdthBytes])
img_t = tf.transpose(originalImg,[1,2,0])
img_t = tf.cast(img_t, tf.float32) * 2.0/255 - 1.0
img_t = tf.reshape(img_t, shape=[-1, 227, 227, 3])
label_reader = tf.FixedLengthRecordReader(record_bytes=1)
label_handle = tf.train.string_input_producer([os.path.join(FLAGS.buckets, "imgTrainLabel")])
label_t = tf.train.string_input_producer([os.path.join(FLAGS.buckets, "imgTestLabel")])
print("create graph read train label ...")
key_tr,value_tr = label_reader.read(label_handle)
bytes_tr = tf.decode_raw(value_tr,out_type=tf.uint8)
label_train = tf.strided_slice(bytes_tr,[0],[1])
one_hot_label_train = tf.one_hot(label_train, 1000, 1.0, 0.0, dtype=tf.float32)
print("create graph read test label ...")
key_tl,value_tl = label_reader.read(label_t)
bytes_tl = tf.decode_raw(value_tl,out_type=tf.uint8)
label_test = tf.strided_slice(bytes_tl,[0],[1])
one_hot_label_test = tf.one_hot(label_test, 1000, 1.0, 0.0, dtype=tf.float32)
print("create step ...")
train_tep = self.__mode__(imgtrain)
test_tep = self.__mode__(img_t)
print("create train graph ...")
train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_label_train, logits=train_tep))
train_loop = tf.train.GradientDescentOptimizer(0.0001).minimize(train_loss)
# print("create test graph ...")
# test_loop_one = tf.equal(tf.argmax(test_tep, 1), tf.argmax(one_hot_label_test, 1))
init=tf.global_variables_initializer()
saver = tf.train.Saver()
self.savePara()
with tf.Session() as sess:
print("start session ...")
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
max_iter = 100
iter=0
while iter < max_iter:
self.accuray = np.array([0,0,0,0,0])
for i in range(52500):
if (0 == (i%5000)):
print("train tep ", i)
sess.run(train_loop)
for i in range(5000):
if (0 == (i%500)):
print("test tep ", i)
test_v = sess.run(test_tep)
v_a = np.array([0.,1.,2.,3.,4.])
i_a = np.array([test_v[0,0],test_v[0,1],test_v[0,2],test_v[0,3],test_v[0,4]])
for j in range(5,1000):
if (test_v[j] > np.min(i_a)):
i_a[int(np.argmin(i_a))] = test_v[0,j]
v_a[int(np.argmin(i_a))] = j
test_l = sess.run(label_test)
# sort
for j in range(4):
for k in range(j+1, 5):
if (i_a[j] < i_a[k]):
ta = i_a[j]
i_a[j] = i_a[k]
i_a[k] = ta
ta = v_a[j]
v_a[j] = v_a[k]
v_a[k] = ta
for j in range(5):
if (v_a[i] == test_l):
self.accuray[i] += 1
# valid_v = sess.run(test_loop_one)
# if (False != valid_v[0]):
# self.accuray += 1.
iter+=1
print("epock loop ", iter, " acc ", self.accuray)
if (np.sum(self.accuray)/5000.0 > 0.98):
saver.save(sess, filename_out_module)
break
if (iter == max_iter):
saver.save(sess, filename_out_module)
coord.request_stop()#queue need to be closed
coord.join(threads)
def savePara(self):
tf.add_to_collection('alexparam', self.weights['c1conv'])
tf.add_to_collection('alexparam', self.weights['s1pool'])
tf.add_to_collection('alexparam', self.weights['c2conv'])
tf.add_to_collection('alexparam', self.weights['s2pool'])
tf.add_to_collection('alexparam', self.weights['c3conv'])
tf.add_to_collection('alexparam', self.weights['c4conv'])
tf.add_to_collection('alexparam', self.weights['c5conv'])
tf.add_to_collection('alexparam', self.weights['s5pool'])
tf.add_to_collection('alexparam', self.weights['f1conn'])
tf.add_to_collection('alexparam', self.weights['f2conn'])
tf.add_to_collection('alexparam', self.weights['f3conn'])
tf.add_to_collection('alexparam', self.bias['c1bias'])
tf.add_to_collection('alexparam', self.bias['s1bias'])
tf.add_to_collection('alexparam', self.bias['c2bias'])
tf.add_to_collection('alexparam', self.bias['s2bias'])
tf.add_to_collection('alexparam', self.bias['c3bias'])
tf.add_to_collection('alexparam', self.bias['c4bias'])
tf.add_to_collection('alexparam', self.bias['c5bias'])
tf.add_to_collection('alexparam', self.bias['s5bias'])
tf.add_to_collection('alexparam', self.bias['f1bias'])
tf.add_to_collection('alexparam', self.bias['f2bias'])
tf.add_to_collection('alexparam', self.bias['f3bias'])
def main(_):
print("create alexnet obj ...")
alexNet = slqAlexNet()
print("start train ...")
alexNet.train()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
#buckets
parser.add_argument('--buckets', type=str, default='',
help='input data path')
#checkpoint
parser.add_argument('--checkpointDir', type=str, default='',
help='output model path')
FLAGS, _ = parser.parse_known_args()
print(FLAGS.buckets)
tf.app.run(main=main)