该实现进一步涉及本机c
++代码。这是
ApplyGradientDescentGPU的实现(
core/kernels/training_ops_gpu.cu.cc):
template <typename T>struct ApplyGradientDescent<GPUDevice, T> { void operator()(const GPUDevice& d, typename TTypes<T>::Flat var, typename TTypes<T>::ConstScalar lr, typename TTypes<T>::ConstFlat grad) { Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast; bcast[0] = grad.dimension(0); Eigen::Sizes<1> single; var.device(d) -= lr.reshape(single).broadcast(bcast) * grad; }};CPU实现在此处(
core/kernels/training_ops.cc):
template <typename T>struct ApplyGradientDescent<CPUDevice, T> { void operator()(const CPUDevice& d, typename TTypes<T>::Flat var, typename TTypes<T>::ConstScalar lr, typename TTypes<T>::ConstFlat grad) { var.device(d) -= grad * lr(); }};


