TensorFlow学习笔记(11)tf.nn.dropout浅析

2023年9月2日 513次阅读来源: 谢昆明

作用：解决过拟合
tf.nn中的nn指Neural Net (NN)，也就是神经网络模块
prob: 概率，

源码看不懂啊，先学会用

  from __future__ import print_function                                                                                      
  import tensorflow as tf                                                                                                    
  import numpy as np                                                                                                         
                                                                                                                             
  with tf.Session() as sess:                                                                                                 
      x=np.asarray([1,2,3,4,5,6,7,8,9,10],dtype=np.float32)                                                                  
                                                                                                                             
      #  [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]                                                                           
      print(x)                                                                                                               
                                                                                                                             
      # keep_prob= 0.1，预期有9个数据*0, 实际上不一定                                                                        
      out = tf.nn.dropout(x, 0.1)                                                                                            
                                                                                                                             
      # 注意有可能是                            
      # 变为原来的10倍是因为里面有一个操作是 x / keep_prob                                                                               
      #  [ 0. 20. 30.  0.  0.  0.  0.  0.  0.  0.]                                                                           
      #  [ 0. 0. 30.  0.  0.  0.  0.  0.  0.  0.]                                                                            
      print(out.eval())

好，接下来看源码

tensorflow dropout源码

def _get_noise_shape(x, noise_shape):
  # If noise_shape is none return immediately.
  # 返回x的维度
  if noise_shape is None:
    return array_ops.shape(x)

  try:
    # Best effort to figure out the intended shape.
    # If not possible, let the op to handle it.
    # In eager mode exception will show up.
    noise_shape_ = tensor_shape.as_shape(noise_shape)
  except (TypeError, ValueError):
    return noise_shape

  if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_.dims):
    new_dims = []
    for i, dim in enumerate(x.shape.dims):
      if noise_shape_.dims[i].value is None and dim.value is not None:
        new_dims.append(dim.value)
      else:
        new_dims.append(noise_shape_.dims[i].value)
    return tensor_shape.TensorShape(new_dims)

  return noise_shape

@tf_export("nn.dropout")
def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):  # pylint: disable=invalid-name
  """Computes dropout.
  With probability `keep_prob`, outputs the input element scaled up by
  `1 / keep_prob`, otherwise outputs `0`.  The scaling is so that the expected
  sum is unchanged.
  By default, each element is kept or dropped independently.  If `noise_shape`
  is specified, it must be
  [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
  to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]`
  will make independent decisions.  For example, if `shape(x) = [k, l, m, n]`
  and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be
  kept independently and each row and column will be kept or not kept together.
  Args:
    x: A floating point tensor.
    keep_prob: A scalar `Tensor` with the same type as x. The probability
      that each element is kept.
    noise_shape: A 1-D `Tensor` of type `int32`, representing the
      shape for randomly generated keep/drop flags.
    seed: A Python integer. Used to create random seeds. See
      `tf.set_random_seed`
      for behavior.
    name: A name for this operation (optional).
  Returns:
    A Tensor of the same shape of `x`.
  Raises:
    ValueError: If `keep_prob` is not in `(0, 1]` or if `x` is not a floating
      point tensor.
  """
  with ops.name_scope(name, "dropout", [x]) as name:
  # 参数校验
    x = ops.convert_to_tensor(x, name="x")
    if not x.dtype.is_floating:
      raise ValueError("x has to be a floating point tensor since it's going to"
                       " be scaled. Got a %s tensor instead." % x.dtype)
    if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
      raise ValueError("keep_prob must be a scalar tensor or a float in the "
                       "range (0, 1], got %g" % keep_prob)

    # Early return if nothing needs to be dropped.
    # 如果keep_prob是1，也就是没有数据需要*0,直接返回
    if isinstance(keep_prob, float) and keep_prob == 1:
      return x
    if context.executing_eagerly():
      if isinstance(keep_prob, ops.EagerTensor):
        if keep_prob.numpy() == 1:
          return x
    else:
      # keep_prob转为张量
      keep_prob = ops.convert_to_tensor(
          keep_prob, dtype=x.dtype, name="keep_prob")
      keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

      # Do nothing if we know keep_prob == 1
      if tensor_util.constant_value(keep_prob) == 1:
        return x

    noise_shape = _get_noise_shape(x, noise_shape)

    # uniform [keep_prob, 1.0 + keep_prob)
    # 生成随机张量，取值范围[keep_prob, 1.0 + keep_prob)
    random_tensor = keep_prob
    random_tensor += random_ops.random_uniform(
        noise_shape, seed=seed, dtype=x.dtype)
    # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
    # 在期间[keep_prob, 1.0)之间的，取值为0.
    # 在期间[1.0, 1.0+keep_prob)之间的，取值为1.
    binary_tensor = math_ops.floor(random_tensor)
    ret = math_ops.div(x, keep_prob) * binary_tensor
    if not context.executing_eagerly():
      ret.set_shape(x.get_shape())
    return ret

总结下

dropout的操作如下：

将x转为张量，假设x = [1, 2, 3]
生成跟x一样维度的binary_tensor，binary_tensor = [0, 1, 0]
x / keep_prob * binary_tensor

    原文作者：谢昆明
    原文地址: https://www.jianshu.com/p/0519885f15cd
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。