Use tf.layers instead of tf.contrib.layers

2026-01-31 03:57:55 +01:00 · 2017-04-30 10:21:27 +02:00
parent 14101abcf9
commit 326d32cae0
7 changed files with 531 additions and 258 deletions
--- a/15_autoencoders.ipynb
+++ b/15_autoencoders.ipynb
@@ -225,7 +225,23 @@
    "editable": true
   },
   "source": [
-    "Now let's build the Autoencoder:"
+    "Now let's build the Autoencoder..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
+    "\n",
+    "The main differences relevant to this chapter are:\n",
+    "* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
+    "* the `weights` parameter was renamed to `kernel` and the weights variable is now named `\"kernel\"` rather than `\"weights\"`,\n",
+    "* the bias variable is now named `\"bias\"` rather than `\"biases\"`,\n",
+    "* the default activation is `None` instead of `tf.nn.relu`"
   ]
  },
  {
@@ -240,8 +256,6 @@
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
    "n_inputs = 3\n",
    "n_hidden = 2  # codings\n",
    "n_outputs = n_inputs\n",
@@ -249,8 +263,8 @@
    "learning_rate = 0.01\n",
    "\n",
    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
-    "hidden = fully_connected(X, n_hidden, activation_fn=None)\n",
-    "outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n",
+    "hidden = tf.layers.dense(X, n_hidden)\n",
+    "outputs = tf.layers.dense(hidden, n_outputs)\n",
    "\n",
    "mse = tf.reduce_mean(tf.square(outputs - X))\n",
    "\n",
@@ -352,6 +366,16 @@
    "Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`)."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 11,
@@ -364,7 +388,7 @@
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
+    "from functools import partial\n",
    "\n",
    "n_inputs = 28*28\n",
    "n_hidden1 = 300\n",
@@ -380,15 +404,17 @@
    "#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n",
    "\n",
    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
-    "with tf.contrib.framework.arg_scope(\n",
-    "        [fully_connected],\n",
-    "        activation_fn=tf.nn.elu,\n",
-    "        weights_initializer=initializer,\n",
-    "        weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
-    "    hidden1 = fully_connected(X, n_hidden1)\n",
-    "    hidden2 = fully_connected(hidden1, n_hidden2)\n",
-    "    hidden3 = fully_connected(hidden2, n_hidden3)\n",
-    "    outputs = fully_connected(hidden3, n_outputs, activation_fn=None)\n",
+    "\n",
+    "my_dense_layer = partial(\n",
+    "    tf.layers.dense,\n",
+    "    activation=tf.nn.elu,\n",
+    "    kernel_initializer=initializer,\n",
+    "    kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
+    "\n",
+    "hidden1 = my_dense_layer(X, n_hidden1)\n",
+    "hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
+    "hidden3 = my_dense_layer(hidden2, n_hidden3)\n",
+    "outputs = my_dense_layer(hidden3, n_outputs, activation=None)\n",
    "\n",
    "mse = tf.reduce_mean(tf.square(outputs - X))\n",
    "\n",
@@ -528,19 +554,23 @@
   },
   "outputs": [],
   "source": [
-    "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation_fn=tf.nn.elu):\n",
+    "from functools import partial\n",
+    "\n",
+    "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation=tf.nn.elu):\n",
    "    graph = tf.Graph()\n",
    "    with graph.as_default():\n",
    "        n_inputs = X_train.shape[1]\n",
    "\n",
    "        X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
-    "        with tf.contrib.framework.arg_scope(\n",
-    "                [fully_connected],\n",
-    "                activation_fn=activation_fn,\n",
-    "                weights_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
-    "                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
-    "            hidden = fully_connected(X, n_neurons, scope=\"hidden\")\n",
-    "            outputs = fully_connected(hidden, n_inputs, activation_fn=None, scope=\"outputs\")\n",
+    "        \n",
+    "        my_dense_layer = partial(\n",
+    "            tf.layers.dense,\n",
+    "            activation=activation,\n",
+    "            kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
+    "            kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
+    "\n",
+    "        hidden = my_dense_layer(X, n_neurons, name=\"hidden\")\n",
+    "        outputs = my_dense_layer(hidden, n_inputs, activation=None, name=\"outputs\")\n",
    "\n",
    "        mse = tf.reduce_mean(tf.square(outputs - X))\n",
    "\n",
@@ -566,7 +596,7 @@
    "            print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
    "        params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n",
    "        hidden_val = hidden.eval(feed_dict={X: X_train})\n",
-    "        return hidden_val, params[\"hidden/weights:0\"], params[\"hidden/biases:0\"], params[\"outputs/weights:0\"], params[\"outputs/biases:0\"]"
+    "        return hidden_val, params[\"hidden/kernel:0\"], params[\"hidden/bias:0\"], params[\"outputs/kernel:0\"], params[\"outputs/bias:0\"]"
   ]
  },
  {
@@ -853,7 +883,7 @@
    "editable": true
   },
   "source": [
-    "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `fully_connected()` function, so we need to build the Autoencoder manually:"
+    "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:"
   ]
  },
  {
@@ -1114,11 +1144,20 @@
    "# Stacked denoising Autoencoder"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n",
+    "* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n",
+    "* the `is_training` parameter is renamed to `training`."
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 31,
   "metadata": {
-    "collapsed": true,
+    "collapsed": false,
    "deletable": true,
    "editable": true
   },
@@ -1126,8 +1165,6 @@
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
-    "from tensorflow.contrib.layers import dropout\n",
-    "\n",
    "n_inputs = 28 * 28\n",
    "n_hidden1 = 300\n",
    "n_hidden2 = 150  # codings\n",
@@ -1136,7 +1173,7 @@
    "\n",
    "learning_rate = 0.01\n",
    "l2_reg = 0.00001\n",
-    "keep_prob = 0.7\n",
+    "dropout_rate = 0.3\n",
    "\n",
    "activation = tf.nn.elu\n",
    "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
@@ -1145,7 +1182,7 @@
    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
    "is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n",
    "\n",
-    "X_drop = dropout(X, keep_prob, is_training=is_training)\n",
+    "X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n",
    "\n",
    "weights1_init = initializer([n_inputs, n_hidden1])\n",
    "weights2_init = initializer([n_hidden1, n_hidden2])\n",
@@ -1177,7 +1214,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 32,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1204,7 +1241,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 33,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1227,7 +1264,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 34,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1242,7 +1279,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 35,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1270,7 +1307,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 36,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1295,7 +1332,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 37,
   "metadata": {
    "collapsed": true,
    "deletable": true,
@@ -1310,7 +1347,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 38,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1360,7 +1397,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 39,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1387,7 +1424,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 40,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1410,7 +1447,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 43,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1478,7 +1515,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 44,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1488,6 +1525,8 @@
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
+    "from functools import partial\n",
+    "\n",
    "n_inputs = 28*28\n",
    "n_hidden1 = 500\n",
    "n_hidden2 = 500\n",
@@ -1500,20 +1539,22 @@
    "\n",
    "initializer = tf.contrib.layers.variance_scaling_initializer()\n",
    "\n",
-    "with tf.contrib.framework.arg_scope([fully_connected],\n",
-    "                                    activation_fn=tf.nn.elu,\n",
-    "                                    weights_initializer=initializer):\n",
-    "    X = tf.placeholder(tf.float32, [None, n_inputs])\n",
-    "    hidden1 = fully_connected(X, n_hidden1)\n",
-    "    hidden2 = fully_connected(hidden1, n_hidden2)\n",
-    "    hidden3_mean = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
-    "    hidden3_gamma = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
-    "    noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
-    "    hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
-    "    hidden4 = fully_connected(hidden3, n_hidden4)\n",
-    "    hidden5 = fully_connected(hidden4, n_hidden5)\n",
-    "    logits = fully_connected(hidden5, n_outputs, activation_fn=None)\n",
-    "    outputs = tf.sigmoid(logits)\n",
+    "my_dense_layer = partial(\n",
+    "    tf.layers.dense,\n",
+    "    activation=tf.nn.elu,\n",
+    "    kernel_initializer=initializer)\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "hidden1 = my_dense_layer(X, n_hidden1)\n",
+    "hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
+    "hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
+    "hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
+    "noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
+    "hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
+    "hidden4 = my_dense_layer(hidden3, n_hidden4)\n",
+    "hidden5 = my_dense_layer(hidden4, n_hidden5)\n",
+    "logits = my_dense_layer(hidden5, n_outputs, activation=None)\n",
+    "outputs = tf.sigmoid(logits)\n",
    "\n",
    "reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n",
    "latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n",
@@ -1528,7 +1569,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 45,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1565,7 +1606,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 46,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1594,7 +1635,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 47,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1619,7 +1660,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 48,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1647,7 +1688,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 49,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1667,7 +1708,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 50,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1682,7 +1723,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 51,
   "metadata": {
    "collapsed": true,
    "deletable": true,
@@ -1712,7 +1753,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 52,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@@ -1787,7 +1828,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.5.2+"
+   "version": "3.5.3"
  },
  "nav_menu": {
   "height": "381px",