在Tensorflow中实现单应性

由于您只对优化翻译感兴趣，因此我提出了使用以下功能来执行翻译并为向量设置渐变的功能

@tf.custom_gradient

：

import tensorflow as tf@tf.custom_gradientdef my_img_translate(imgs, translates):    # Interpolation model has to be fixed due to limitations of tf.custom_gradient    interpolation = 'NEAREST'    imgs_translated = tf.contrib.image.translate(imgs, translates, interpolation=interpolation)    def grad(img_translated_grads):        translates_x = translates[:, 0]        translates_y = translates[:, 1]        translates_zero = tf.zeros_like(translates_x)        # X gradients        imgs_x_grad = (imgs[:, :, :-2] - imgs[:, :, 2:]) / 2        imgs_x_grad = tf.concat([(imgs[:, :, :1] - imgs[:, :, 1:2]),imgs_x_grad,(imgs[:, :, -2:-1] - imgs[:, :, -1:])], axis=2)        imgs_x_grad_translated = tf.contrib.image.translate( imgs_x_grad, tf.stack([translates_x, translates_zero], axis=1), interpolation=interpolation)        translates_x_grad = tf.reduce_sum(img_translated_grads * imgs_x_grad_translated, axis=(1, 2, 3))        # Y gradients        imgs_y_grad = (imgs[:, :-2] - imgs[:, 2:]) / 2        imgs_y_grad = tf.concat([(imgs[:, :1] - imgs[:, 1:2]),imgs_y_grad,(imgs[:, -2:-1] - imgs[:, -1:])], axis=1)        imgs_y_grad_translated = tf.contrib.image.translate( imgs_y_grad, tf.stack([translates_zero, translates_y], axis=1), interpolation=interpolation)        translates_y_grad = tf.reduce_sum(img_translated_grads * imgs_y_grad_translated, axis=(1, 2, 3))        # Complete gradient        translates_grad = tf.stack([translates_x_grad, translates_y_grad], axis=1)        return None, translates_grad    return imgs_translated, grad

请注意，在这种情况下，我不会为图像返回任何渐变，因为它们不会被优化（但是如果您愿意，原则上可以将内置渐变用于平移操作）。

我测试了一个简单的翻译图像的用例，以使其中心具有最高的价值：

import tensorflow as tfimport numpy as np@tf.custom_gradientdef my_img_translate(imgs, translates):    # Interpolation model has to be fixed due to limitations of tf.custom_gradient    interpolation = 'NEAREST'    imgs_translated = tf.contrib.image.translate(imgs, translates, interpolation=interpolation)    def grad(img_translated_grads):        translates_x = translates[:, 0]        translates_y = translates[:, 1]        translates_zero = tf.zeros_like(translates_x)        # X gradients        imgs_x_grad = (imgs[:, :, :-2] - imgs[:, :, 2:]) / 2        imgs_x_grad = tf.concat([(imgs[:, :, :1] - imgs[:, :, 1:2]),imgs_x_grad,(imgs[:, :, -2:-1] - imgs[:, :, -1:])], axis=2)        imgs_x_grad_translated = tf.contrib.image.translate( imgs_x_grad, tf.stack([translates_x, translates_zero], axis=1), interpolation=interpolation)        translates_x_grad = tf.reduce_sum(img_translated_grads * imgs_x_grad_translated, axis=(1, 2, 3))        # Y gradients        imgs_y_grad = (imgs[:, :-2] - imgs[:, 2:]) / 2        imgs_y_grad = tf.concat([(imgs[:, :1] - imgs[:, 1:2]),imgs_y_grad,(imgs[:, -2:-1] - imgs[:, -1:])], axis=1)        imgs_y_grad_translated = tf.contrib.image.translate( imgs_y_grad, tf.stack([translates_zero, translates_y], axis=1), interpolation=interpolation)        translates_y_grad = tf.reduce_sum(img_translated_grads * imgs_y_grad_translated, axis=(1, 2, 3))        # Complete gradient        translates_grad = tf.stack([translates_x_grad, translates_y_grad], axis=1)        return None, translates_grad    return imgs_translated, grad# Test operationsimgs = tf.placeholder(tf.float32, [None, None, None, None])translates = tf.Variable([0, 0], dtype=tf.float32)translates_tiled = tf.tile(translates[tf.newaxis], (tf.shape(imgs)[0], 1))imgs_translated = my_img_translate(imgs, translates_tiled)imgs_midpoint = imgs_translated[:, tf.shape(imgs_translated)[1] // 2, tf.shape(imgs_translated)[2] // 2]loss = -tf.reduce_sum(tf.square(imgs_midpoint))train_op = tf.train.GradientDescentOptimizer(10).minimize(loss)init_op = tf.global_variables_initializer()with tf.Session() as sess:    # Make test image    xs, ys = np.meshgrid(np.linspace(-2, 1, 100), np.linspace(-1, 2, 75))    test_img = (1 / (1 + np.square(xs))) * (1 / (1 + np.square(ys)))    test_img /= np.max(test_img)    test_img_batch = test_img[np.newaxis, :, :, np.newaxis]    # Train    sess.run(init_op)    for _ in range(100):        sess.run(train_op, feed_dict={imgs: test_img_batch})    # Show result    test_img_result = sess.run(imgs_translated, feed_dict={imgs: test_img_batch})[0, :, :, 0]    plt.figure()    plt.subplot(121)    plt.imshow(test_img)    plt.subplot(122)    plt.imshow(test_img_result)

结果：

在Tensorflow中实现单应性

面试问答相关栏目本月热门文章