tf.gradients,tf.hessian 方法的使用以及错误 AttributeError: 'NoneType' object has no attribute 'op' 和 TypeError: Fetch argument None has invalid type

2023年4月11日 329次阅读来源: 大胖子球花

最近需要计算loss关于参数们的二阶偏导（Hessian矩阵）。

很自然的想到用tf.gradients或tf.hessian。但是却遇到了题目中或者和题目类似的错误。

搜索得到一篇解答不错，https://stackoverflow.com/questions/45396699/tf-hessians-valueerror-none-values-not-supported#

This is because in your graph, the node loss does not depend on the node tf.concat([W,b], axis=0). There is no backpropagation of one onto the other, and therefore no derivative.

Tensorflow is not a formal calculus engine, it can only estimate derivatives of a node by another node if the former is downstream of the later. So for example even

tf.hessian(loss, 2*W)

will fail for the same reasons (2*W is a new node and loss does not depend on it) even though the relationship to tf.hessian(loss, W) is straightfoward.

Note that the sitatuation is the same with tf.gradients, even though it fails differently: it returns Nones rather than throwing an exception.

也就是说。tf.gradients和hessian方法都没有那么智能，如果参数中的ys并不与xs有关，就会返回None，进而报错。这里的有关，还是必须直接有关，经过一些变化，比如我就整了各种reshape和concat之后，就不行了。

我做了几组实验，验证了这点。

import tensorflow as tf 
import numpy as np 

#标量
# W = tf.Variable(10)
# b = tf.Variable(3)
# X = tf.placeholder(dtype=tf.int32,shape=[])
# y = tf.add(tf.multiply(W,X),b)
# gradients = tf.gradients(y,[W,b])

# sess = tf.Session()
# _gra = sess.run(gradients,feed_dict={X:7})
# print(_gra)

'''[7, 1]
'''

# #1d向量
# W = tf.Variable([1,2,3])
# X = tf.placeholder(dtype=tf.int32,shape=[3])
# y = tf.reduce_sum(tf.multiply(X,W))
# gradients = tf.gradients(y,W)

# sess = tf.Session()
# sess.run(tf.global_variables_initializer())
# _gra,_y = sess.run((gradients,y),feed_dict={X:[5,7,11]})
# print(_gra)
# print(_y)


# #2d向量
# W = tf.Variable([[1,2,3],[4,5,6]])
# b = tf.Variable([5,7,11])
# X = tf.placeholder(dtype=tf.int32,shape=[4,2])
# mat = tf.matmul(X,W)
# y = tf.add(mat,b)
# gradients = tf.gradients(y,[W,b])

# sess = tf.Session()
# sess.run(tf.global_variables_initializer())
# _gra,_mat,_y = sess.run((gradients,mat,y),feed_dict={X:np.reshape(list(range(8)),(4,2))})
# print(_gra)
# print(_mat)
# print(_y)

# '''[array([[12, 12, 12],
#        [16, 16, 16]], dtype=int32), array([4, 4, 4], dtype=int32)]
# [[ 4  5  6]
#  [14 19 24]
#  [24 33 42]
#  [34 47 60]]

#  [[ 9 12 17]
#  [19 26 35]
#  [29 40 53]
#  [39 54 71]]
# '''

# #1d向量2阶导数错误版
# W = tf.Variable([1,2,3])
# X = tf.placeholder(dtype=tf.int32,shape=[3])
# y = tf.reduce_sum(tf.multiply(X,W))
# gradients = tf.gradients(y,W)
# print(gradients)
# gradients2 = tf.gradients(gradients,W)

# sess = tf.Session()
# sess.run(tf.global_variables_initializer())
# _gra,_y,_gra2 = sess.run((gradients,y,gradients2),feed_dict={X:[5,7,11]})
# print(_gra)
# print(_y)
# print(_gra2)

'''
报错TypeError: Fetch argument None has invalid type <class 'NoneType'>
解释：https://stackoverrun.com/cn/q/12004593
'''


# #1d向量2阶导数 依旧错误
# W = tf.Variable([1,2,3])
# W1 = tf.reshape(W,shape=[3,1])
# X = tf.placeholder(dtype=tf.int32,shape=[3])
# X1 = tf.reshape(X,shape=[3,1])
# y = tf.matmul(W1,X1,transpose_a=True)
# gradients = tf.gradients(y,W)
# print(gradients)
# # gradients2 = tf.gradients(gradients,W)

# sess = tf.Session()
# sess.run(tf.global_variables_initializer())
# # _gra,_y,_gra2 = sess.run((gradients,y,gradients2),feed_dict={X:[5,7,11]})
# _gra,_y = sess.run((gradients,y),feed_dict={X:[5,7,11]})
# print(_gra)
# print(_y)
# # print(_gra2)

#试试tf.hessians吧
# W = tf.Variable([1,2,3])
# W1 = tf.reshape(W,shape=[3,1])
# X = tf.placeholder(dtype=tf.int32,shape=[3])
# X1 = tf.reshape(X,shape=[3,1])
# y = tf.matmul(W1,X1,transpose_a=True)
# gradients = tf.hessians(y,W)

# sess = tf.Session()
# sess.run(tf.global_variables_initializer())
# _gra,_y = sess.run((gradients,y),feed_dict={X:[5,7,11]})
# print(_gra)
# print(_y)
'''
ValueError: None values not supported.
'''

# #1d向量新
# W = tf.Variable([1,2,3])
# X = tf.placeholder(dtype=tf.int32,shape=[3])
# b = tf.Variable(13)
# y = tf.add(tf.reduce_sum(tf.multiply(X,W)),b)
# weights = tf.concat((W,tf.reshape(b,shape=[1])),axis=0)
# gradients = tf.gradients(y,weights)

# sess = tf.Session()
# sess.run(tf.global_variables_initializer())
# _gra,_y = sess.run((gradients,y),feed_dict={X:[5,7,11]})
# print(_gra)
# print(_y)
'''TypeError: Fetch argument None has invalid type <class 'NoneType'>
'''


#2d向量新
W = tf.Variable([[1,2,3],[4,5,6]])
b = tf.Variable([5,7,11])
X = tf.placeholder(dtype=tf.int32,shape=[4,2])
mat = tf.matmul(X,W)
y = tf.add(mat,b)
W_flatten = tf.reshape(W,shape=[-1])
gradients = tf.gradients(y,W_flatten)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
_gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(8)),(4,2))})
print(_gra)
'''TypeError: Fetch argument None has invalid type <class 'NoneType'>
'''

说明，简单的tf.reshape还是可以找到的，加了concat就凉了。

那么怎么办呢。

尝试：

W = tf.Variable([[1,2,3],[4,5,6]])
b = tf.Variable([5,7,11])
X = tf.placeholder(dtype=tf.int32,shape=[4,2])
mat = tf.matmul(X,W)
y = tf.add(mat,b)
gradients = tf.gradients(y,W)
# gradients = tf.gradients(gradients,W)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
_gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(8)),(4,2))})
print(_gra)

成功

加入二阶偏导，

W = tf.Variable([[1,2,3],[4,5,6]])
b = tf.Variable([5,7,11])
X = tf.placeholder(dtype=tf.int32,shape=[4,2])
mat = tf.matmul(X,W)
y = tf.add(mat,b)
gradients = tf.gradients(y,W)
gradients = tf.gradients(gradients,W)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
_gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(8)),(4,2))})
print(_gra)

失败

TypeError: Fetch argument None has invalid type <class ‘NoneType’>

尝试

W = tf.Variable([[1,2],[4,5]])
b = tf.Variable([5,7])
X = tf.placeholder(dtype=tf.int32,shape=[2,2])
mat = tf.matmul(X,W)
# y = tf.add(mat,b)
y = tf.matmul(mat,W)
gradients = tf.gradients(y,W)
# gradients = tf.gradients(gradients,W)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
_gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(4)),(2,2))})
print(_gra)

成功，

尝试

W = tf.Variable([[1,2],[4,5]])
b = tf.Variable([5,7])
X = tf.placeholder(dtype=tf.int32,shape=[2,2])
mat = tf.matmul(X,W)
# y = tf.add(mat,b)
y = tf.matmul(mat,W)
gradients = tf.gradients(y,W)
gradients = tf.gradients(gradients,W)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
_gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(4)),(2,2))})
print(_gra)

也成功

说明只要是直接相关的，就可以求出。

那如果是可以求出的，直接上hessian呢？

#2d向量新
W = tf.Variable([[1,2],[4,5]])
b = tf.Variable([5,7])
X = tf.placeholder(dtype=tf.int32,shape=[2,2])
mat = tf.matmul(X,W)
# y = tf.add(mat,b)
y = tf.matmul(mat,W)
gradients = tf.gradients(y,W)
gradients = tf.gradients(gradients,W)
hessians = tf.hessians(y,W)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
_gra,_hs= sess.run((gradients,hessians),feed_dict={X:np.reshape(list(range(4)),(2,2))})
print(_gra)
print(_hs)

并不可以

ValueError: Cannot compute Hessian because element 0 of `xs` does not have rank one.. Tensor Variable/read:0 must have rank 1. Received rank 2, shape (2, 2)

说明我还是要通过tf.gradients计算的。

    原文作者：大胖子球花
    原文地址: https://www.cnblogs.com/chason95/articles/10049862.html
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。