· 训练鸢尾花分类模型
import pandas as pd
import tensorflow as tf
import iris_data
1. 准备数据
将数据集分为训练集和测试集,
x后缀为特征值,y后缀为标签
#下载数据集,定义常量
TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']
#定义数据集路径
def maybe_download():
train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
return train_path, test_path
train_path, test_path = maybe_download()
# '/root/.keras/datasets/iris_training.csv','/root/.keras/datasets/iris_test.csv'
# 构建特征集和标签集
train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES,header = 0)
train_x,train_y = train, train.pop('Species')
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES,header = 0)
test_x,test_y = test, test.pop('Species')
# test_x.size
2. 构造Dataset
#构造输入函数
def train_input_fn(features,labels,batch_size):
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))#此处传入的特征集应为python字典
return dataset.shuffle(1000).repeat().batch(batch_size)
# dataset = tf.data.Dataset.from_tensor_slices((dict(train_x), train_y))
# input_tem = dataset.shuffle(1000).repeat().batch(train_y.size)
def eval_input_fn(features,labels,batch_size):
features = dict(features)
if labels is None:
# 如果label不存在,就用只用特征作为输入
inputs = features
else:
inputs = (features, labels)
#转换输入数据集
dataset = tf.data.Dataset.from_tensor_slices(inputs)
# 分批处理例子
assert batch_size is not None, "batch_size must not be None"
dataset = dataset.batch(batch_size)
#返回数据集
return dataset
dataset = tf.data.Dataset.from_tensor_slices((dict(train_x), train_y))
# print(dataset)
dataset = dataset.shuffle(1000).repeat().batch(100)
# print dataset
3. 定义特征列
#定义特征列
my_feature_columns = []
for key in train_x.keys():
my_feature_columns.append(tf.feature_column.numeric_column(key=key))
# my_feature_columns
# 实例化分类器
# Checkpoints设置
my_checkpointing_config = tf.estimator.RunConfig(
save_checkpoints_secs=20*60,
keep_checkpoint_max=10,
)
classifier = tf.estimator.DNNClassifier(
feature_columns = my_feature_columns,
hidden_units=[10,10],
n_classes=3,
model_dir='iris',
config=my_checkpointing_config,
)
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 1200, '_session_config': None, '_keep_checkpoint_max': 10, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f59dbdd6c90>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_device_fn': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'iris', '_train_distribute': None, '_save_summary_steps': 100}
# 训练模型
classifier.train(
input_fn = lambda:train_input_fn(train_x,train_y,train_y.size),steps=1000
)
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from iris/model.ckpt-7050
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 7050 into iris/model.ckpt.
INFO:tensorflow:loss = 3.1232438, step = 7050
INFO:tensorflow:global_step/sec: 747.765
INFO:tensorflow:loss = 3.1091948, step = 7150 (0.135 sec)
INFO:tensorflow:global_step/sec: 983.205
INFO:tensorflow:loss = 3.0955226, step = 7250 (0.102 sec)
INFO:tensorflow:global_step/sec: 974.592
INFO:tensorflow:loss = 3.082192, step = 7350 (0.102 sec)
INFO:tensorflow:global_step/sec: 1007.09
INFO:tensorflow:loss = 3.0691144, step = 7450 (0.099 sec)
INFO:tensorflow:global_step/sec: 961.002
INFO:tensorflow:loss = 3.0563426, step = 7550 (0.104 sec)
INFO:tensorflow:global_step/sec: 956.992
INFO:tensorflow:loss = 3.0438962, step = 7650 (0.104 sec)
INFO:tensorflow:global_step/sec: 968.035
INFO:tensorflow:loss = 3.031734, step = 7750 (0.104 sec)
INFO:tensorflow:global_step/sec: 926.863
INFO:tensorflow:loss = 3.0198064, step = 7850 (0.108 sec)
INFO:tensorflow:global_step/sec: 949.937
INFO:tensorflow:loss = 3.008165, step = 7950 (0.105 sec)
INFO:tensorflow:Saving checkpoints for 8050 into iris/model.ckpt.
INFO:tensorflow:Loss for final step: 2.997815.
<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f59dbdd6a90>
#评估训练后的模型
eval_result = classifier.evaluate(
input_fn=lambda:eval_input_fn(test_x,test_y,test_y.size)
)
print ('\n Test set accuracy:{accuracy:0.3f}\n'.format(**eval_result))
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-29-08:28:37
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from iris/model.ckpt-8050
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-29-08:28:37
INFO:tensorflow:Saving dict for global step 8050: accuracy = 0.96666664, average_loss = 0.1475552, global_step = 8050, loss = 4.4266562
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 8050: iris/model.ckpt-8050
Test set accuracy:0.967
#预测实验
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
'SepalLength': [5.1, 5.9, 6.9],
'SepalWidth': [3.3, 3.0, 3.1],
'PetalLength': [1.7, 4.2, 5.4],
'PetalWidth': [0.5, 1.5, 2.1],
}
predictions = classifier.predict(
input_fn=lambda:eval_input_fn(predict_x,expected,batch_size=len(expected)))
template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
for pred_dict, expec in zip(predictions, expected):
class_id = pred_dict['class_ids'][0]
probability = pred_dict['probabilities'][class_id]
print(template.format(SPECIES[class_id],100 * probability, expec))
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from iris/model.ckpt-8050
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Prediction is "Setosa" (99.9%), expected "Setosa"
Prediction is "Versicolor" (100.0%), expected "Versicolor"
Prediction is "Virginica" (100.0%), expected "Virginica"
# !tensorboard --logdir /notebooks/deeplearn/tensorflow_samples/iris/iris_train_ckpt
[33mW0830 13:31:07.809473 Reloader plugin_event_accumulator.py:286] Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events. Overwriting the graph with the newest event.
[0mW0830 13:31:07.809473 140491457595136 plugin_event_accumulator.py:286] Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events. Overwriting the graph with the newest event.
[33mW0830 13:31:07.809792 Reloader plugin_event_accumulator.py:294] Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
[0mW0830 13:31:07.809792 140491457595136 plugin_event_accumulator.py:294] Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
TensorBoard 1.10.0 at http://8402702cc16f:6006 (Press CTRL+C to quit)
· 构造持久化服务
1. 导出模型
serving_input_fn 示例1
build_raw_serving_input_receiver_fn(…): 接受一个特征张量
由于该fn只接受tensor,所以在定义serving_input_receiver_fn时,需要将每个特征的tf.placeholder传入。
#导出SavedModel
# #定义cli输入属性名,并映射为模型的输入Tensor名
def serving_input_receiver_fn1():
feature_map = {}
for i in range(len(iris_data.CSV_COLUMN_NAMES) -1):
feature_map[iris_data.CSV_COLUMN_NAMES[i]] = tf.placeholder(
tf.float32,shape=[3],name='{}'.format(iris_data.CSV_COLUMN_NAMES[i]))
return tf.estimator.export.build_raw_serving_input_receiver_fn(feature_map)
#将训练后的模型导出到目录./iris/下
classifier.export_savedmodel(export_dir_base='./iris/',
serving_input_receiver_fn=serving_input_receiver_fn1(),
strip_default_attrs=True)
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures EXCLUDED from export because they cannot be be served via TensorFlow Serving APIs:
INFO:tensorflow:'serving_default' : Classification input must be a single string Tensor; got {'SepalLength': <tf.Tensor 'SepalLength:0' shape=(?,) dtype=float32>, 'PetalWidth': <tf.Tensor 'PetalWidth:0' shape=(?,) dtype=float32>, 'PetalLength': <tf.Tensor 'PetalLength:0' shape=(?,) dtype=float32>, 'SepalWidth': <tf.Tensor 'SepalWidth:0' shape=(?,) dtype=float32>}
INFO:tensorflow:'classification' : Classification input must be a single string Tensor; got {'SepalLength': <tf.Tensor 'SepalLength:0' shape=(?,) dtype=float32>, 'PetalWidth': <tf.Tensor 'PetalWidth:0' shape=(?,) dtype=float32>, 'PetalLength': <tf.Tensor 'PetalLength:0' shape=(?,) dtype=float32>, 'SepalWidth': <tf.Tensor 'SepalWidth:0' shape=(?,) dtype=float32>}
WARNING:tensorflow:Export includes no default signature!
INFO:tensorflow:Restoring parameters from iris/model.ckpt-8050
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: ./iris/temp-1535531324/saved_model.pb
'./iris/1535531324'
serving_input_fn 示例2
build_parse_serving_input_receiver_fn(…): 接受一个tf.example
由于该fn只接受tf.example,所以在定义serving_input_receiver_fn时,需要将每个特征作预处理。
传给feature_spec需要是特征列,前面定义了my_feature_columns可以用在这里
feature_spec = tf.feature_column.make_parse_example_spec(my_feature_columns)
serving_input_receiver_fn = \
tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
export_model = classifier.export_savedmodel('./iris/', serving_input_receiver_fn)
serving_input_fn 示例3
具体地,可将示例2拆解为示例3
feature_columns = [
tf.feature_column.numeric_column(key='SepalLength'),
tf.feature_column.numeric_column(key='SepalWidth'),
tf.feature_column.numeric_column(key='PetalLength'),
tf.feature_column.numeric_column(key='PetalWidth')
]
feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
print feature_spec
def serving_input_receiver_fn2():
#An input receiver that expects a serialized tf.Example."""
serialized_tf_example = tf.placeholder(dtype=tf.string, name='input_example_tensor')
receiver_tensors = {'examples': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
classifier.export_savedmodel(export_dir_base='./iris/',
serving_input_receiver_fn = serving_input_receiver_fn2,
strip_default_attrs=True)
{'SepalLength': FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=None), 'PetalLength': FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=None), 'PetalWidth': FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=None), 'SepalWidth': FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=None)}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Restoring parameters from iris/model.ckpt-7050
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: ./iris/temp-1535094252/saved_model.pb
'./iris/1535094252'
TensorServingInputReceive
由上,ServingInputReceive可以接受一个包含特征列的字典,这个字典可以只有一个特征列,但必须是字典。
但是model可以接受字典,也可以接受一个单一的原始特征。
为了解决这个bug,提供TensorServingInputReceive类,它可以接受一个单一原始特征。
quote: https://github.com/tensorflow/tensorflow/issues/11674