最近需要计算loss关于参数们的二阶偏导(Hessian矩阵)。
很自然的想到用tf.gradients或tf.hessian。但是却遇到了题目中或者和题目类似的错误。
搜索得到一篇解答不错,https://stackoverflow.com/questions/45396699/tf-hessians-valueerror-none-values-not-supported#
This is because in your graph, the node loss
does not depend on the node tf.concat([W,b], axis=0)
. There is no backpropagation of one onto the other, and therefore no derivative.
Tensorflow is not a formal calculus engine, it can only estimate derivatives of a node by another node if the former is downstream of the later. So for example even
tf.hessian(loss, 2*W)
will fail for the same reasons (2*W
is a new node and loss
does not depend on it) even though the relationship to tf.hessian(loss, W)
is straightfoward.
Note that the sitatuation is the same with tf.gradients
, even though it fails differently: it returns None
s rather than throwing an exception.
也就是说。tf.gradients和hessian方法都没有那么智能,如果参数中的ys并不与xs有关,就会返回None,进而报错。这里的有关,还是必须直接有关,经过一些变化,比如我就整了各种reshape和concat之后,就不行了。
我做了几组实验,验证了这点。
import tensorflow as tf import numpy as np #标量 # W = tf.Variable(10) # b = tf.Variable(3) # X = tf.placeholder(dtype=tf.int32,shape=[]) # y = tf.add(tf.multiply(W,X),b) # gradients = tf.gradients(y,[W,b]) # sess = tf.Session() # _gra = sess.run(gradients,feed_dict={X:7}) # print(_gra) '''[7, 1] ''' # #1d向量 # W = tf.Variable([1,2,3]) # X = tf.placeholder(dtype=tf.int32,shape=[3]) # y = tf.reduce_sum(tf.multiply(X,W)) # gradients = tf.gradients(y,W) # sess = tf.Session() # sess.run(tf.global_variables_initializer()) # _gra,_y = sess.run((gradients,y),feed_dict={X:[5,7,11]}) # print(_gra) # print(_y) # #2d向量 # W = tf.Variable([[1,2,3],[4,5,6]]) # b = tf.Variable([5,7,11]) # X = tf.placeholder(dtype=tf.int32,shape=[4,2]) # mat = tf.matmul(X,W) # y = tf.add(mat,b) # gradients = tf.gradients(y,[W,b]) # sess = tf.Session() # sess.run(tf.global_variables_initializer()) # _gra,_mat,_y = sess.run((gradients,mat,y),feed_dict={X:np.reshape(list(range(8)),(4,2))}) # print(_gra) # print(_mat) # print(_y) # '''[array([[12, 12, 12], # [16, 16, 16]], dtype=int32), array([4, 4, 4], dtype=int32)] # [[ 4 5 6] # [14 19 24] # [24 33 42] # [34 47 60]] # [[ 9 12 17] # [19 26 35] # [29 40 53] # [39 54 71]] # ''' # #1d向量2阶导数错误版 # W = tf.Variable([1,2,3]) # X = tf.placeholder(dtype=tf.int32,shape=[3]) # y = tf.reduce_sum(tf.multiply(X,W)) # gradients = tf.gradients(y,W) # print(gradients) # gradients2 = tf.gradients(gradients,W) # sess = tf.Session() # sess.run(tf.global_variables_initializer()) # _gra,_y,_gra2 = sess.run((gradients,y,gradients2),feed_dict={X:[5,7,11]}) # print(_gra) # print(_y) # print(_gra2) ''' 报错TypeError: Fetch argument None has invalid type <class 'NoneType'> 解释:https://stackoverrun.com/cn/q/12004593 ''' # #1d向量2阶导数 依旧错误 # W = tf.Variable([1,2,3]) # W1 = tf.reshape(W,shape=[3,1]) # X = tf.placeholder(dtype=tf.int32,shape=[3]) # X1 = tf.reshape(X,shape=[3,1]) # y = tf.matmul(W1,X1,transpose_a=True) # gradients = tf.gradients(y,W) # print(gradients) # # gradients2 = tf.gradients(gradients,W) # sess = tf.Session() # sess.run(tf.global_variables_initializer()) # # _gra,_y,_gra2 = sess.run((gradients,y,gradients2),feed_dict={X:[5,7,11]}) # _gra,_y = sess.run((gradients,y),feed_dict={X:[5,7,11]}) # print(_gra) # print(_y) # # print(_gra2) #试试tf.hessians吧 # W = tf.Variable([1,2,3]) # W1 = tf.reshape(W,shape=[3,1]) # X = tf.placeholder(dtype=tf.int32,shape=[3]) # X1 = tf.reshape(X,shape=[3,1]) # y = tf.matmul(W1,X1,transpose_a=True) # gradients = tf.hessians(y,W) # sess = tf.Session() # sess.run(tf.global_variables_initializer()) # _gra,_y = sess.run((gradients,y),feed_dict={X:[5,7,11]}) # print(_gra) # print(_y) ''' ValueError: None values not supported. ''' # #1d向量新 # W = tf.Variable([1,2,3]) # X = tf.placeholder(dtype=tf.int32,shape=[3]) # b = tf.Variable(13) # y = tf.add(tf.reduce_sum(tf.multiply(X,W)),b) # weights = tf.concat((W,tf.reshape(b,shape=[1])),axis=0) # gradients = tf.gradients(y,weights) # sess = tf.Session() # sess.run(tf.global_variables_initializer()) # _gra,_y = sess.run((gradients,y),feed_dict={X:[5,7,11]}) # print(_gra) # print(_y) '''TypeError: Fetch argument None has invalid type <class 'NoneType'> ''' #2d向量新 W = tf.Variable([[1,2,3],[4,5,6]]) b = tf.Variable([5,7,11]) X = tf.placeholder(dtype=tf.int32,shape=[4,2]) mat = tf.matmul(X,W) y = tf.add(mat,b) W_flatten = tf.reshape(W,shape=[-1]) gradients = tf.gradients(y,W_flatten) sess = tf.Session() sess.run(tf.global_variables_initializer()) _gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(8)),(4,2))}) print(_gra) '''TypeError: Fetch argument None has invalid type <class 'NoneType'> '''
说明,简单的tf.reshape还是可以找到的,加了concat就凉了。
那么怎么办呢。
尝试:
W = tf.Variable([[1,2,3],[4,5,6]]) b = tf.Variable([5,7,11]) X = tf.placeholder(dtype=tf.int32,shape=[4,2]) mat = tf.matmul(X,W) y = tf.add(mat,b) gradients = tf.gradients(y,W) # gradients = tf.gradients(gradients,W) sess = tf.Session() sess.run(tf.global_variables_initializer()) _gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(8)),(4,2))}) print(_gra)
成功
加入二阶偏导,
W = tf.Variable([[1,2,3],[4,5,6]]) b = tf.Variable([5,7,11]) X = tf.placeholder(dtype=tf.int32,shape=[4,2]) mat = tf.matmul(X,W) y = tf.add(mat,b) gradients = tf.gradients(y,W) gradients = tf.gradients(gradients,W) sess = tf.Session() sess.run(tf.global_variables_initializer()) _gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(8)),(4,2))}) print(_gra)
失败
TypeError: Fetch argument None has invalid type <class ‘NoneType’>
尝试
W = tf.Variable([[1,2],[4,5]]) b = tf.Variable([5,7]) X = tf.placeholder(dtype=tf.int32,shape=[2,2]) mat = tf.matmul(X,W) # y = tf.add(mat,b) y = tf.matmul(mat,W) gradients = tf.gradients(y,W) # gradients = tf.gradients(gradients,W) sess = tf.Session() sess.run(tf.global_variables_initializer()) _gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(4)),(2,2))}) print(_gra)
成功,
尝试
W = tf.Variable([[1,2],[4,5]]) b = tf.Variable([5,7]) X = tf.placeholder(dtype=tf.int32,shape=[2,2]) mat = tf.matmul(X,W) # y = tf.add(mat,b) y = tf.matmul(mat,W) gradients = tf.gradients(y,W) gradients = tf.gradients(gradients,W) sess = tf.Session() sess.run(tf.global_variables_initializer()) _gra= sess.run((gradients),feed_dict={X:np.reshape(list(range(4)),(2,2))}) print(_gra)
也成功
说明只要是直接相关的,就可以求出。
那如果是可以求出的,直接上hessian呢?
#2d向量新 W = tf.Variable([[1,2],[4,5]]) b = tf.Variable([5,7]) X = tf.placeholder(dtype=tf.int32,shape=[2,2]) mat = tf.matmul(X,W) # y = tf.add(mat,b) y = tf.matmul(mat,W) gradients = tf.gradients(y,W) gradients = tf.gradients(gradients,W) hessians = tf.hessians(y,W) sess = tf.Session() sess.run(tf.global_variables_initializer()) _gra,_hs= sess.run((gradients,hessians),feed_dict={X:np.reshape(list(range(4)),(2,2))}) print(_gra) print(_hs)
并不可以
ValueError: Cannot compute Hessian because element 0 of `xs` does not have rank one.. Tensor Variable/read:0 must have rank 1. Received rank 2, shape (2, 2)
说明我还是要通过tf.gradients计算的。