From 326d32cae09d41f2a2593f036fbcb464bd1ac4ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Sun, 30 Apr 2017 10:21:27 +0200
Subject: [PATCH] Use tf.layers instead of tf.contrib.layers

---
 ...uction_to_artificial_neural_networks.ipynb |  22 +-
 11_deep_learning.ipynb                        | 214 ++++++++++++------
 12_distributed_tensorflow.ipynb               |   2 +-
 13_convolutional_neural_networks.ipynb        | 208 +++++++++++++----
 14_recurrent_neural_networks.ipynb            |  78 +++----
 15_autoencoders.ipynb                         | 171 ++++++++------
 16_reinforcement_learning.ipynb               |  94 +++++---
 7 files changed, 531 insertions(+), 258 deletions(-)

diff --git a/10_introduction_to_artificial_neural_networks.ipynb b/10_introduction_to_artificial_neural_networks.ipynb
index 9aef566..9441684 100644
--- a/10_introduction_to_artificial_neural_networks.ipynb
+++ b/10_introduction_to_artificial_neural_networks.ipynb
@@ -584,7 +584,17 @@
     "editable": true
    },
    "source": [
-    "## Using `fully_connected` instead of `neuron_layer()`"
+    "## Using `dense()` instead of `neuron_layer()`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n",
+    "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
+    "* the default `activation` is now `None` rather than `tf.nn.relu`.\n",
+    "* a few more differences are presented in chapter 11."
    ]
   },
   {
@@ -599,8 +609,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
     "n_inputs = 28*28  # MNIST\n",
     "n_hidden1 = 300\n",
     "n_hidden2 = 100\n",
@@ -611,9 +619,9 @@
     "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
     "\n",
     "with tf.name_scope(\"dnn\"):\n",
-    "    hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
-    "    hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
-    "    logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "    hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\", activation=tf.nn.relu)\n",
+    "    hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\", activation=tf.nn.relu)\n",
+    "    logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n",
     "\n",
     "with tf.name_scope(\"loss\"):\n",
     "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
@@ -719,7 +727,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2+"
+   "version": "3.5.3"
   },
   "nav_menu": {
    "height": "264px",
diff --git a/11_deep_learning.ipynb b/11_deep_learning.ipynb
index 5c440ec..74f0d00 100644
--- a/11_deep_learning.ipynb
+++ b/11_deep_learning.ipynb
@@ -297,6 +297,20 @@
     "    display(HTML(iframe))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
+    "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
+    "* the default `activation` is now `None` rather than `tf.nn.relu`.\n",
+    "* it does not support `tensorflow.contrib.framework.arg_scope()` (introduced later in chapter 11).\n",
+    "* it does not support regularizer params (introduced later in chapter 11)."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 12,
@@ -307,8 +321,6 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
     "tf.reset_default_graph()\n",
     "\n",
     "n_inputs = 28*28  # MNIST\n",
@@ -321,9 +333,9 @@
     "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
     "\n",
     "with tf.name_scope(\"dnn\"):\n",
-    "    hidden1 = fully_connected(X, n_hidden1, activation_fn=leaky_relu, scope=\"hidden1\")\n",
-    "    hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=leaky_relu, scope=\"hidden2\")\n",
-    "    logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "    hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name=\"hidden1\")\n",
+    "    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, name=\"hidden2\")\n",
+    "    logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n",
     "\n",
     "with tf.name_scope(\"loss\"):\n",
     "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
@@ -377,6 +389,24 @@
     "# Batch Normalization"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: the book uses `tensorflow.contrib.layers.batch_norm()` rather than `tf.layers.batch_normalization()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.batch_normalization()`, because anything in the contrib module may change or be deleted without notice. Instead of using the `batch_norm()` function as a regularizer parameter to the `fully_connected()` function, we now use `batch_normalization()` and we explicitly create a distinct layer. The parameters are a bit different, in particular:\n",
+    "* `decay` is renamed to `momentum`,\n",
+    "* `is_training` is renamed to `training`,\n",
+    "* `updates_collections` is removed: the update operations needed by batch normalization are added to the `UPDATE_OPS` collection and you need to explicity run these operations during training (see the execution phase below),\n",
+    "* we don't need to specify `scale=True`, as that is the default.\n",
+    "\n",
+    "Also note that in order to run batch norm just _before_ each hidden layer's activation function, we apply the ELU activation function manually, right after the batch norm layer.\n",
+    "\n",
+    "Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`). As you can see, the code remains very similar."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 14,
@@ -387,11 +417,10 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.contrib.layers import fully_connected, batch_norm\n",
-    "from tensorflow.contrib.framework import arg_scope\n",
-    "\n",
     "tf.reset_default_graph()\n",
     "\n",
+    "from functools import partial\n",
+    "\n",
     "n_inputs = 28 * 28  # MNIST\n",
     "n_hidden1 = 300\n",
     "n_hidden2 = 100\n",
@@ -405,22 +434,23 @@
     "\n",
     "with tf.name_scope(\"dnn\"):\n",
     "    he_init = tf.contrib.layers.variance_scaling_initializer()\n",
-    "    batch_norm_params = {\n",
-    "        'is_training': is_training,\n",
-    "        'decay': 0.9,\n",
-    "        'updates_collections': None,\n",
-    "        'scale': True,\n",
-    "    }\n",
     "\n",
-    "    with arg_scope(\n",
-    "            [fully_connected],\n",
-    "            activation_fn=tf.nn.elu,\n",
-    "            weights_initializer=he_init,\n",
-    "            normalizer_fn=batch_norm,\n",
-    "            normalizer_params=batch_norm_params):\n",
-    "        hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
-    "        hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
-    "        logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "    my_batch_norm_layer = partial(\n",
+    "            tf.layers.batch_normalization,\n",
+    "            training=is_training,\n",
+    "            momentum=0.9)\n",
+    "\n",
+    "    my_dense_layer = partial(\n",
+    "            tf.layers.dense,\n",
+    "            kernel_initializer=he_init)\n",
+    "\n",
+    "    hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
+    "    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n",
+    "    hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n",
+    "    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n",
+    "    logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n",
+    "    logits = my_batch_norm_layer(logits_before_bn)\n",
+    "    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n",
     "\n",
     "with tf.name_scope(\"loss\"):\n",
     "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
@@ -438,6 +468,16 @@
     "saver = tf.train.Saver()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: since we are using `tf.layers.batch_normalization()` rather than `tf.contrib.layers.batch_norm()` (as in the book), we need to explicitly run the extra update operations needed by batch normalization (`sess.run([training_op, extra_update_ops],...`)."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 15,
@@ -449,14 +489,14 @@
    "outputs": [],
    "source": [
     "n_epochs = 20\n",
-    "batch_size = 50\n",
+    "batch_size = 200\n",
     "\n",
     "with tf.Session() as sess:\n",
     "    init.run()\n",
     "    for epoch in range(n_epochs):\n",
     "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
     "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
-    "            sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
+    "            sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
     "        acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
     "        acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
     "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
@@ -464,11 +504,21 @@
     "    save_path = saver.save(sess, \"my_model_final.ckpt\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Now the same model with $\\ell_1$ regularization:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 16,
    "metadata": {
-    "collapsed": false,
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
@@ -476,29 +526,32 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
+    "from functools import partial\n",
+    "\n",
     "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
     "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
     "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
     "\n",
     "with tf.name_scope(\"dnn\"):\n",
     "    he_init = tf.contrib.layers.variance_scaling_initializer()\n",
-    "    batch_norm_params = {\n",
-    "        'is_training': is_training,\n",
-    "        'decay': 0.9,\n",
-    "        'updates_collections': None,\n",
-    "        'scale': True,\n",
-    "    }\n",
     "\n",
-    "    with arg_scope(\n",
-    "            [fully_connected],\n",
-    "            activation_fn=tf.nn.elu,\n",
-    "            weights_initializer=he_init,\n",
-    "            normalizer_fn=batch_norm,\n",
-    "            normalizer_params=batch_norm_params,\n",
-    "            weights_regularizer=tf.contrib.layers.l1_regularizer(0.01)):\n",
-    "        hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
-    "        hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
-    "        logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "    my_batch_norm_layer = partial(\n",
+    "            tf.layers.batch_normalization,\n",
+    "            training=is_training,\n",
+    "            momentum=0.9)\n",
+    "\n",
+    "    my_dense_layer = partial(\n",
+    "            tf.layers.dense,\n",
+    "            kernel_initializer=he_init,\n",
+    "            kernel_regularizer=tf.contrib.layers.l1_regularizer(0.01))\n",
+    "\n",
+    "    hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
+    "    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n",
+    "    hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n",
+    "    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n",
+    "    logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n",
+    "    logits = my_batch_norm_layer(logits_before_bn)\n",
+    "    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n",
     "\n",
     "with tf.name_scope(\"loss\"):\n",
     "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
@@ -513,7 +566,7 @@
     "with tf.name_scope(\"eval\"):\n",
     "    correct = tf.nn.in_top_k(logits, y, 1)\n",
     "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
-    "    \n",
+    "\n",
     "init = tf.global_variables_initializer()\n",
     "saver = tf.train.Saver()"
    ]
@@ -529,14 +582,14 @@
    "outputs": [],
    "source": [
     "n_epochs = 20\n",
-    "batch_size = 50\n",
+    "batch_size = 200\n",
     "\n",
     "with tf.Session() as sess:\n",
     "    init.run()\n",
     "    for epoch in range(n_epochs):\n",
     "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
     "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
-    "            sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
+    "            sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
     "        acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
     "        acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
     "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
@@ -557,6 +610,16 @@
     "[v.name for v in tf.global_variables()]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: the weights variable created by the `tf.layers.dense()` function is called `\"kernel\"` (instead of `\"weights\"` when using the `tf.contrib.layers.fully_connected()`, as in the book):"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 19,
@@ -568,8 +631,8 @@
    "outputs": [],
    "source": [
     "with tf.variable_scope(\"\", default_name=\"\", reuse=True):  # root scope\n",
-    "    weights1 = tf.get_variable(\"hidden1/weights\")\n",
-    "    weights2 = tf.get_variable(\"hidden2/weights\")\n",
+    "    weights1 = tf.get_variable(\"hidden1/kernel\")\n",
+    "    weights2 = tf.get_variable(\"hidden2/kernel\")\n",
     "    "
    ]
   },
@@ -689,6 +752,8 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
+    "from functools import partial\n",
+    "\n",
     "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
     "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
     "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
@@ -701,12 +766,15 @@
     "    return max_norm\n",
     "\n",
     "with tf.name_scope(\"dnn\"):\n",
-    "    with arg_scope(\n",
-    "            [fully_connected],\n",
-    "            weights_regularizer=max_norm_regularizer(1.5)):\n",
-    "        hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
-    "        hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
-    "        logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "    \n",
+    "    my_dense_layer = partial(\n",
+    "            tf.layers.dense,\n",
+    "            activation=tf.nn.relu,\n",
+    "            kernel_regularizer=max_norm_regularizer(1.5))\n",
+    "\n",
+    "    hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
+    "    hidden2 = my_dense_layer(hidden1, n_hidden2, name=\"hidden2\")\n",
+    "    logits = my_dense_layer(hidden2, n_outputs, activation=None, name=\"outputs\")\n",
     "\n",
     "clip_all_weights = tf.get_collection(\"max_norm\")\n",
     "        \n",
@@ -770,6 +838,18 @@
     "show_graph(tf.get_default_graph())"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n",
+    "* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n",
+    "* the `is_training` parameter is renamed to `training`."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 30,
@@ -780,7 +860,7 @@
    },
    "outputs": [],
    "source": [
-    "from tensorflow.contrib.layers import dropout\n",
+    "from functools import partial\n",
     "\n",
     "tf.reset_default_graph()\n",
     "\n",
@@ -795,20 +875,22 @@
     "learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n",
     "                                           decay_steps, decay_rate)\n",
     "\n",
-    "keep_prob = 0.5\n",
+    "dropout_rate = 0.5\n",
     "\n",
     "with tf.name_scope(\"dnn\"):\n",
     "    he_init = tf.contrib.layers.variance_scaling_initializer()\n",
-    "    with arg_scope(\n",
-    "            [fully_connected],\n",
-    "            activation_fn=tf.nn.elu,\n",
-    "            weights_initializer=he_init):\n",
-    "        X_drop = dropout(X, keep_prob, is_training=is_training)\n",
-    "        hidden1 = fully_connected(X_drop, n_hidden1, scope=\"hidden1\")\n",
-    "        hidden1_drop = dropout(hidden1, keep_prob, is_training=is_training)\n",
-    "        hidden2 = fully_connected(hidden1_drop, n_hidden2, scope=\"hidden2\")\n",
-    "        hidden2_drop = dropout(hidden2, keep_prob, is_training=is_training)\n",
-    "        logits = fully_connected(hidden2_drop, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "\n",
+    "    my_dense_layer = partial(\n",
+    "            tf.layers.dense,\n",
+    "            activation=tf.nn.elu,\n",
+    "            kernel_initializer=he_init)\n",
+    "\n",
+    "    X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n",
+    "    hidden1 = my_dense_layer(X_drop, n_hidden1, name=\"hidden1\")\n",
+    "    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=is_training)\n",
+    "    hidden2 = my_dense_layer(hidden1_drop, n_hidden2, name=\"hidden2\")\n",
+    "    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=is_training)\n",
+    "    logits = my_dense_layer(hidden2_drop, n_outputs, activation=None, name=\"outputs\")\n",
     "\n",
     "with tf.name_scope(\"loss\"):\n",
     "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
@@ -970,7 +1052,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2+"
+   "version": "3.5.3"
   },
   "nav_menu": {
    "height": "360px",
diff --git a/12_distributed_tensorflow.ipynb b/12_distributed_tensorflow.ipynb
index b91be4e..b95ee70 100644
--- a/12_distributed_tensorflow.ipynb
+++ b/12_distributed_tensorflow.ipynb
@@ -541,7 +541,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2+"
+   "version": "3.5.3"
   },
   "nav_menu": {},
   "toc": {
diff --git a/13_convolutional_neural_networks.ipynb b/13_convolutional_neural_networks.ipynb
index b6bbe01..504968b 100644
--- a/13_convolutional_neural_networks.ipynb
+++ b/13_convolutional_neural_networks.ipynb
@@ -402,50 +402,101 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {
-    "collapsed": true
+    "deletable": true,
+    "editable": true
    },
-   "outputs": [],
    "source": [
-    "from six.moves import urllib\n",
-    "from sklearn.datasets import fetch_mldata\n",
-    "try:\n",
-    "    mnist = fetch_mldata('MNIST original')\n",
-    "except urllib.error.HTTPError as ex:\n",
-    "    print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
+    "Note: instead of using the `fully_connected()`, `conv2d()` and `dropout()` functions from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()`, `conv2d()` and `dropout()` functions (respectively) from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
     "\n",
-    "    # Alternative method to load MNIST, if mldata.org is down\n",
-    "    from scipy.io import loadmat\n",
-    "    mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
-    "    mnist_path = \"./mnist-original.mat\"\n",
-    "    response = urllib.request.urlopen(mnist_alternative_url)\n",
-    "    with open(mnist_path, \"wb\") as f:\n",
-    "        content = response.read()\n",
-    "        f.write(content)\n",
-    "    mnist_raw = loadmat(mnist_path)\n",
-    "    mnist = {\n",
-    "        \"data\": mnist_raw[\"data\"].T,\n",
-    "        \"target\": mnist_raw[\"label\"][0],\n",
-    "        \"COL_NAMES\": [\"label\", \"data\"],\n",
-    "        \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
-    "    }\n",
-    "    print(\"Success!\")"
+    "For all these functions:\n",
+    "* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
+    "\n",
+    "The other main differences in `tf.layers.dense()` are:\n",
+    "* the `weights` parameter was renamed to `kernel` (and the weights variable is now named `\"kernel\"` rather than `\"weights\"`),\n",
+    "* the default activation is `None` instead of `tf.nn.relu`\n",
+    "\n",
+    "The other main differences in `tf.layers.conv2d()` are:\n",
+    "* the `num_outputs` parameter was renamed to `filters`,\n",
+    "* the `stride` parameter was renamed to `strides`,\n",
+    "* the default `activation` is now `None` instead of `tf.nn.relu`.\n",
+    "\n",
+    "The other main differences in `tf.layers.dropout()` are:\n",
+    "* it takes the dropout rate (`rate`) rather than the keep probability (`keep_prob`). Of course, `rate == 1 - keep_prob`,\n",
+    "* the `is_training` parameters was renamed to `training`."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 15,
    "metadata": {
-    "collapsed": true,
+    "collapsed": false,
     "deletable": true,
     "editable": true
    },
    "outputs": [],
    "source": [
-    "X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n",
-    "y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)"
+    "height = 28\n",
+    "width = 28\n",
+    "channels = 1\n",
+    "n_inputs = height * width\n",
+    "\n",
+    "conv1_fmaps = 32\n",
+    "conv1_ksize = 3\n",
+    "conv1_stride = 1\n",
+    "conv1_pad = \"SAME\"\n",
+    "\n",
+    "conv2_fmaps = 64\n",
+    "conv2_ksize = 3\n",
+    "conv2_stride = 1\n",
+    "conv2_pad = \"SAME\"\n",
+    "conv2_dropout_rate = 0.25\n",
+    "\n",
+    "pool3_fmaps = conv2_fmaps\n",
+    "\n",
+    "n_fc1 = 128\n",
+    "fc1_dropout_rate = 0.5\n",
+    "\n",
+    "n_outputs = 10\n",
+    "\n",
+    "graph = tf.Graph()\n",
+    "with graph.as_default():\n",
+    "    with tf.name_scope(\"inputs\"):\n",
+    "        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n",
+    "        X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])\n",
+    "        y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n",
+    "        is_training = tf.placeholder_with_default(False, shape=[], name='is_training')\n",
+    "\n",
+    "    conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize, strides=conv1_stride, padding=conv1_pad, activation=tf.nn.relu, name=\"conv1\")\n",
+    "    conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize, strides=conv2_stride, padding=conv2_pad, activation=tf.nn.relu, name=\"conv2\")\n",
+    "\n",
+    "    with tf.name_scope(\"pool3\"):\n",
+    "        pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=\"VALID\")\n",
+    "        pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 14 * 14])\n",
+    "        pool3_flat_drop = tf.layers.dropout(pool3_flat, conv2_dropout_rate, training=is_training)\n",
+    "\n",
+    "    with tf.name_scope(\"fc1\"):\n",
+    "        fc1 = tf.layers.dense(pool3_flat_drop, n_fc1, activation=tf.nn.relu, name=\"fc1\")\n",
+    "        fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=is_training)\n",
+    "\n",
+    "    with tf.name_scope(\"output\"):\n",
+    "        logits = tf.layers.dense(fc1, n_outputs, name=\"output\")\n",
+    "        Y_proba = tf.nn.softmax(logits, name=\"Y_proba\")\n",
+    "\n",
+    "    with tf.name_scope(\"train\"):\n",
+    "        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n",
+    "        loss = tf.reduce_mean(xentropy)\n",
+    "        optimizer = tf.train.AdamOptimizer()\n",
+    "        training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "    with tf.name_scope(\"eval\"):\n",
+    "        correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "\n",
+    "    with tf.name_scope(\"init_and_save\"):\n",
+    "        init = tf.global_variables_initializer()\n",
+    "        saver = tf.train.Saver()"
    ]
   },
   {
@@ -458,9 +509,78 @@
    },
    "outputs": [],
    "source": [
-    "height, width = 28, 28\n",
-    "images = X_test[5000].reshape(1, height, width, 1)\n",
-    "plot_image(images[0, :, :, 0])"
+    "from tensorflow.examples.tutorials.mnist import input_data\n",
+    "mnist = input_data.read_data_sets(\"/tmp/data/\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "def get_model_params():\n",
+    "    gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)\n",
+    "    return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}\n",
+    "\n",
+    "def restore_model_params(model_params):\n",
+    "    gvar_names = list(model_params.keys())\n",
+    "    assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + \"/Assign\")\n",
+    "                  for gvar_name in gvar_names}\n",
+    "    init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}\n",
+    "    feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}\n",
+    "    tf.get_default_session().run(assign_ops, feed_dict=feed_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 1000\n",
+    "batch_size = 50\n",
+    "\n",
+    "best_acc_val = 0\n",
+    "check_interval = 100\n",
+    "checks_since_last_progress = 0\n",
+    "max_checks_without_progress = 100\n",
+    "best_model_params = None \n",
+    "\n",
+    "with tf.Session(graph=graph) as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, is_training: True})\n",
+    "            if iteration % check_interval == 0:\n",
+    "                acc_val = accuracy.eval(feed_dict={X: mnist.test.images[:2000], y: mnist.test.labels[:2000]})\n",
+    "                if acc_val > best_acc_val:\n",
+    "                    best_acc_val = acc_val\n",
+    "                    checks_since_last_progress = 0\n",
+    "                    best_model_params = get_model_params()\n",
+    "                else:\n",
+    "                    checks_since_last_progress += 1\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test, \"Best validation accuracy:\", best_acc_val)\n",
+    "        if checks_since_last_progress > max_checks_without_progress:\n",
+    "            print(\"Early stopping!\")\n",
+    "            break\n",
+    "\n",
+    "    if best_model_params:\n",
+    "        restore_model_params(best_model_params)\n",
+    "    acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n",
+    "    print(\"Final accuracy on test set:\", acc_test)\n",
+    "    save_path = saver.save(sess, \"./my_mnist_model\")"
    ]
   },
   {
@@ -475,7 +595,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 21,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -511,7 +631,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 22,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -524,7 +644,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 23,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -544,7 +664,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 24,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -557,7 +677,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 25,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -572,7 +692,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 26,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -589,7 +709,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 27,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -611,7 +731,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 28,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -628,7 +748,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 29,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -641,7 +761,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 30,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -654,7 +774,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 31,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -717,7 +837,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2+"
+   "version": "3.5.3"
   },
   "nav_menu": {},
   "toc": {
diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb
index 4a1b2ad..5d26a58 100644
--- a/14_recurrent_neural_networks.ipynb
+++ b/14_recurrent_neural_networks.ipynb
@@ -573,6 +573,18 @@
     "## Training a sequence classifier"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
+    "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
+    "* the default `activation` is now `None` rather than `tf.nn.relu`."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 23,
@@ -585,8 +597,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
     "n_steps = 28\n",
     "n_inputs = 28\n",
     "n_neurons = 150\n",
@@ -601,7 +611,7 @@
     "    basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
     "    outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
     "\n",
-    "logits = fully_connected(states, n_outputs, activation_fn=None)\n",
+    "logits = tf.layers.dense(states, n_outputs)\n",
     "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
     "loss = tf.reduce_mean(xentropy)\n",
     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
@@ -675,8 +685,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
     "n_steps = 28\n",
     "n_inputs = 28\n",
     "n_neurons1 = 150\n",
@@ -693,7 +701,7 @@
     "multi_layer_cell = tf.contrib.rnn.MultiRNNCell([hidden1, hidden2])\n",
     "outputs, states_tuple = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
     "states = tf.concat(axis=1, values=states_tuple)\n",
-    "logits = fully_connected(states, n_outputs, activation_fn=None)\n",
+    "logits = tf.layers.dense(states, n_outputs)\n",
     "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
     "loss = tf.reduce_mean(xentropy)\n",
     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
@@ -847,8 +855,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
     "n_steps = 20\n",
     "n_inputs = 1\n",
     "n_neurons = 100\n",
@@ -942,8 +948,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
     "n_steps = 20\n",
     "n_inputs = 1\n",
     "n_neurons = 100\n",
@@ -958,7 +962,7 @@
     "learning_rate = 0.001\n",
     "\n",
     "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
-    "stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
+    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
     "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
     "\n",
     "loss = tf.reduce_sum(tf.square(outputs - y))\n",
@@ -1181,7 +1185,6 @@
    "outputs": [],
    "source": [
     "tf.reset_default_graph()\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
     "\n",
     "n_inputs = 1\n",
     "n_neurons = 100\n",
@@ -1202,7 +1205,7 @@
     "    rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
     "\n",
     "    stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
-    "    stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
+    "    stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
     "    outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
     "\n",
     "    loss = tf.reduce_sum(tf.square(outputs - y))\n",
@@ -1277,8 +1280,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
     "n_steps = 28\n",
     "n_inputs = 28\n",
     "n_neurons = 150\n",
@@ -1293,7 +1294,7 @@
     "multi_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*3)\n",
     "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
     "top_layer_h_state = states[-1][1]\n",
-    "logits = fully_connected(top_layer_h_state, n_outputs, activation_fn=None, scope=\"softmax\")\n",
+    "logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
     "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
     "loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
@@ -1336,7 +1337,8 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "scrolled": true
    },
    "outputs": [],
    "source": [
@@ -1466,7 +1468,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 52,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1509,7 +1511,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 53,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1522,7 +1524,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 54,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1545,7 +1547,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 55,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1565,7 +1567,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 56,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1578,7 +1580,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 57,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1591,7 +1593,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 58,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1614,7 +1616,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 59,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1652,7 +1654,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 60,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1666,7 +1668,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 61,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1679,7 +1681,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 62,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1702,7 +1704,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 63,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1728,7 +1730,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 64,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1787,7 +1789,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 65,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1795,7 +1797,7 @@
    },
    "outputs": [],
    "source": [
-    "num_steps = 100001\n",
+    "num_steps = 10001\n",
     "\n",
     "with tf.Session() as session:\n",
     "    init.run()\n",
@@ -1846,7 +1848,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 66,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1869,7 +1871,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 67,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1893,7 +1895,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 68,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1932,7 +1934,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 69,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1976,7 +1978,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 70,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -2033,7 +2035,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2+"
+   "version": "3.5.3"
   },
   "nav_menu": {},
   "toc": {
diff --git a/15_autoencoders.ipynb b/15_autoencoders.ipynb
index e1e20f5..4f7156e 100644
--- a/15_autoencoders.ipynb
+++ b/15_autoencoders.ipynb
@@ -225,7 +225,23 @@
     "editable": true
    },
    "source": [
-    "Now let's build the Autoencoder:"
+    "Now let's build the Autoencoder..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
+    "\n",
+    "The main differences relevant to this chapter are:\n",
+    "* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
+    "* the `weights` parameter was renamed to `kernel` and the weights variable is now named `\"kernel\"` rather than `\"weights\"`,\n",
+    "* the bias variable is now named `\"bias\"` rather than `\"biases\"`,\n",
+    "* the default activation is `None` instead of `tf.nn.relu`"
    ]
   },
   {
@@ -240,8 +256,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
-    "\n",
     "n_inputs = 3\n",
     "n_hidden = 2  # codings\n",
     "n_outputs = n_inputs\n",
@@ -249,8 +263,8 @@
     "learning_rate = 0.01\n",
     "\n",
     "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
-    "hidden = fully_connected(X, n_hidden, activation_fn=None)\n",
-    "outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n",
+    "hidden = tf.layers.dense(X, n_hidden)\n",
+    "outputs = tf.layers.dense(hidden, n_outputs)\n",
     "\n",
     "mse = tf.reduce_mean(tf.square(outputs - X))\n",
     "\n",
@@ -352,6 +366,16 @@
     "Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`)."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 11,
@@ -364,7 +388,7 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
+    "from functools import partial\n",
     "\n",
     "n_inputs = 28*28\n",
     "n_hidden1 = 300\n",
@@ -380,15 +404,17 @@
     "#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n",
     "\n",
     "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
-    "with tf.contrib.framework.arg_scope(\n",
-    "        [fully_connected],\n",
-    "        activation_fn=tf.nn.elu,\n",
-    "        weights_initializer=initializer,\n",
-    "        weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
-    "    hidden1 = fully_connected(X, n_hidden1)\n",
-    "    hidden2 = fully_connected(hidden1, n_hidden2)\n",
-    "    hidden3 = fully_connected(hidden2, n_hidden3)\n",
-    "    outputs = fully_connected(hidden3, n_outputs, activation_fn=None)\n",
+    "\n",
+    "my_dense_layer = partial(\n",
+    "    tf.layers.dense,\n",
+    "    activation=tf.nn.elu,\n",
+    "    kernel_initializer=initializer,\n",
+    "    kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
+    "\n",
+    "hidden1 = my_dense_layer(X, n_hidden1)\n",
+    "hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
+    "hidden3 = my_dense_layer(hidden2, n_hidden3)\n",
+    "outputs = my_dense_layer(hidden3, n_outputs, activation=None)\n",
     "\n",
     "mse = tf.reduce_mean(tf.square(outputs - X))\n",
     "\n",
@@ -528,19 +554,23 @@
    },
    "outputs": [],
    "source": [
-    "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation_fn=tf.nn.elu):\n",
+    "from functools import partial\n",
+    "\n",
+    "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation=tf.nn.elu):\n",
     "    graph = tf.Graph()\n",
     "    with graph.as_default():\n",
     "        n_inputs = X_train.shape[1]\n",
     "\n",
     "        X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
-    "        with tf.contrib.framework.arg_scope(\n",
-    "                [fully_connected],\n",
-    "                activation_fn=activation_fn,\n",
-    "                weights_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
-    "                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
-    "            hidden = fully_connected(X, n_neurons, scope=\"hidden\")\n",
-    "            outputs = fully_connected(hidden, n_inputs, activation_fn=None, scope=\"outputs\")\n",
+    "        \n",
+    "        my_dense_layer = partial(\n",
+    "            tf.layers.dense,\n",
+    "            activation=activation,\n",
+    "            kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
+    "            kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
+    "\n",
+    "        hidden = my_dense_layer(X, n_neurons, name=\"hidden\")\n",
+    "        outputs = my_dense_layer(hidden, n_inputs, activation=None, name=\"outputs\")\n",
     "\n",
     "        mse = tf.reduce_mean(tf.square(outputs - X))\n",
     "\n",
@@ -566,7 +596,7 @@
     "            print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
     "        params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n",
     "        hidden_val = hidden.eval(feed_dict={X: X_train})\n",
-    "        return hidden_val, params[\"hidden/weights:0\"], params[\"hidden/biases:0\"], params[\"outputs/weights:0\"], params[\"outputs/biases:0\"]"
+    "        return hidden_val, params[\"hidden/kernel:0\"], params[\"hidden/bias:0\"], params[\"outputs/kernel:0\"], params[\"outputs/bias:0\"]"
    ]
   },
   {
@@ -853,7 +883,7 @@
     "editable": true
    },
    "source": [
-    "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `fully_connected()` function, so we need to build the Autoencoder manually:"
+    "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:"
    ]
   },
   {
@@ -1114,11 +1144,20 @@
     "# Stacked denoising Autoencoder"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n",
+    "* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n",
+    "* the `is_training` parameter is renamed to `training`."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 31,
    "metadata": {
-    "collapsed": true,
+    "collapsed": false,
     "deletable": true,
     "editable": true
    },
@@ -1126,8 +1165,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import dropout\n",
-    "\n",
     "n_inputs = 28 * 28\n",
     "n_hidden1 = 300\n",
     "n_hidden2 = 150  # codings\n",
@@ -1136,7 +1173,7 @@
     "\n",
     "learning_rate = 0.01\n",
     "l2_reg = 0.00001\n",
-    "keep_prob = 0.7\n",
+    "dropout_rate = 0.3\n",
     "\n",
     "activation = tf.nn.elu\n",
     "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
@@ -1145,7 +1182,7 @@
     "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
     "is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n",
     "\n",
-    "X_drop = dropout(X, keep_prob, is_training=is_training)\n",
+    "X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n",
     "\n",
     "weights1_init = initializer([n_inputs, n_hidden1])\n",
     "weights2_init = initializer([n_hidden1, n_hidden2])\n",
@@ -1177,7 +1214,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 32,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1204,7 +1241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 33,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1227,7 +1264,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 34,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1242,7 +1279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 35,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1270,7 +1307,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 36,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1295,7 +1332,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 37,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1310,7 +1347,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 38,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1360,7 +1397,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 39,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1387,7 +1424,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 40,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1410,7 +1447,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 43,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1478,7 +1515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 44,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1488,6 +1525,8 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
+    "from functools import partial\n",
+    "\n",
     "n_inputs = 28*28\n",
     "n_hidden1 = 500\n",
     "n_hidden2 = 500\n",
@@ -1500,20 +1539,22 @@
     "\n",
     "initializer = tf.contrib.layers.variance_scaling_initializer()\n",
     "\n",
-    "with tf.contrib.framework.arg_scope([fully_connected],\n",
-    "                                    activation_fn=tf.nn.elu,\n",
-    "                                    weights_initializer=initializer):\n",
-    "    X = tf.placeholder(tf.float32, [None, n_inputs])\n",
-    "    hidden1 = fully_connected(X, n_hidden1)\n",
-    "    hidden2 = fully_connected(hidden1, n_hidden2)\n",
-    "    hidden3_mean = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
-    "    hidden3_gamma = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
-    "    noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
-    "    hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
-    "    hidden4 = fully_connected(hidden3, n_hidden4)\n",
-    "    hidden5 = fully_connected(hidden4, n_hidden5)\n",
-    "    logits = fully_connected(hidden5, n_outputs, activation_fn=None)\n",
-    "    outputs = tf.sigmoid(logits)\n",
+    "my_dense_layer = partial(\n",
+    "    tf.layers.dense,\n",
+    "    activation=tf.nn.elu,\n",
+    "    kernel_initializer=initializer)\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "hidden1 = my_dense_layer(X, n_hidden1)\n",
+    "hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
+    "hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
+    "hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
+    "noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
+    "hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
+    "hidden4 = my_dense_layer(hidden3, n_hidden4)\n",
+    "hidden5 = my_dense_layer(hidden4, n_hidden5)\n",
+    "logits = my_dense_layer(hidden5, n_outputs, activation=None)\n",
+    "outputs = tf.sigmoid(logits)\n",
     "\n",
     "reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n",
     "latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n",
@@ -1528,7 +1569,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 45,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1565,7 +1606,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 46,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1594,7 +1635,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 47,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1619,7 +1660,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 48,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1647,7 +1688,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 49,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1667,7 +1708,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 50,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1682,7 +1723,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 51,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1712,7 +1753,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 52,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1787,7 +1828,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2+"
+   "version": "3.5.3"
   },
   "nav_menu": {
    "height": "381px",
diff --git a/16_reinforcement_learning.ipynb b/16_reinforcement_learning.ipynb
index 3b6ce4a..7061f4e 100644
--- a/16_reinforcement_learning.ipynb
+++ b/16_reinforcement_learning.ipynb
@@ -986,6 +986,18 @@
     "Let's create a neural network that will take observations as inputs, and output the action to take for each observation. To choose an action, the network will first estimate a probability for each action, then select an action randomly according to the estimated probabilities. In the case of the Cart-Pole environment, there are just two possible actions (left or right), so we only need one output neuron: it will output the probability `p` of the action 0 (left), and of course the probability of action 1 (right) will be `1 - p`."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
+    "\n",
+    "The main differences relevant to this chapter are:\n",
+    "* the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
+    "* the `weights` parameter was renamed to `kernel`,\n",
+    "* the default activation is `None` instead of `tf.nn.relu`"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 34,
@@ -997,7 +1009,6 @@
    "outputs": [],
    "source": [
     "import tensorflow as tf\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
     "\n",
     "# 1. Specify the network architecture\n",
     "n_inputs = 4  # == env.observation_space.shape[0]\n",
@@ -1007,10 +1018,10 @@
     "\n",
     "# 2. Build the neural network\n",
     "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
-    "hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu,\n",
-    "                         weights_initializer=initializer)\n",
-    "outputs = fully_connected(hidden, n_outputs, activation_fn=tf.nn.sigmoid,\n",
-    "                          weights_initializer=initializer)\n",
+    "hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu,\n",
+    "                         kernel_initializer=initializer)\n",
+    "outputs = tf.layers.dense(hidden, n_outputs, activation=tf.nn.sigmoid,\n",
+    "                          kernel_initializer=initializer)\n",
     "\n",
     "# 3. Select a random action based on the estimated probabilities\n",
     "p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
@@ -1121,7 +1132,6 @@
    "outputs": [],
    "source": [
     "import tensorflow as tf\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
     "\n",
     "tf.reset_default_graph()\n",
     "\n",
@@ -1136,8 +1146,8 @@
     "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
     "y = tf.placeholder(tf.float32, shape=[None, n_outputs])\n",
     "\n",
-    "hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)\n",
-    "logits = fully_connected(hidden, n_outputs, activation_fn=None)\n",
+    "hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu, kernel_initializer=initializer)\n",
+    "logits = tf.layers.dense(hidden, n_outputs)\n",
     "outputs = tf.nn.sigmoid(logits) # probability of action 0 (left)\n",
     "p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
     "action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)\n",
@@ -1275,7 +1285,6 @@
    "outputs": [],
    "source": [
     "import tensorflow as tf\n",
-    "from tensorflow.contrib.layers import fully_connected\n",
     "\n",
     "tf.reset_default_graph()\n",
     "\n",
@@ -1289,8 +1298,8 @@
     "\n",
     "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
     "\n",
-    "hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)\n",
-    "logits = fully_connected(hidden, n_outputs, activation_fn=None)\n",
+    "hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu, kernel_initializer=initializer)\n",
+    "logits = tf.layers.dense(hidden, n_outputs)\n",
     "outputs = tf.nn.sigmoid(logits)  # probability of action 0 (left)\n",
     "p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
     "action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)\n",
@@ -1366,7 +1375,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 45,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1416,7 +1425,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 46,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1429,7 +1438,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 47,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1454,7 +1463,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 48,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1499,7 +1508,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 49,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1594,7 +1603,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 50,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1623,7 +1632,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 51,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1637,7 +1646,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 52,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1650,7 +1659,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 53,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1677,7 +1686,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 54,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1691,7 +1700,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 55,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1704,7 +1713,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 56,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1737,7 +1746,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 57,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1759,7 +1768,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 58,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1790,9 +1799,22 @@
     "## Build DQN"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: instead of using `tf.contrib.layers.convolution2d()` or `tf.contrib.layers.conv2d()` (as in the book), we now use the `tf.layers.conv2d()`, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same, except that the parameter names have changed slightly:\n",
+    "* the `num_outputs` parameter was renamed to `filters`,\n",
+    "* the `stride` parameter was renamed to `strides`,\n",
+    "* the `_fn` suffix was removed from parameter names that had it (e.g., `activation_fn` was renamed to `activation`),\n",
+    "* the `weights_initializer` parameter was renamed to `kernel_initializer`,\n",
+    "* the weights variable was renamed to `\"kernel\"` (instead of `\"weights\"`), and the biases variable was renamed from `\"biases\"` to `\"bias\"`,\n",
+    "* and the default `activation` is now `None` instead of `tf.nn.relu`."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 59,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1802,8 +1824,6 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "from tensorflow.contrib.layers import convolution2d, fully_connected\n",
-    "\n",
     "input_height = 88\n",
     "input_width = 80\n",
     "input_channels = 1\n",
@@ -1824,12 +1844,12 @@
     "    prev_layer = X_state\n",
     "    conv_layers = []\n",
     "    with tf.variable_scope(scope) as scope:\n",
-    "        for n_maps, kernel_size, stride, padding, activation in zip(conv_n_maps, conv_kernel_sizes, conv_strides, conv_paddings, conv_activation):\n",
-    "            prev_layer = convolution2d(prev_layer, num_outputs=n_maps, kernel_size=kernel_size, stride=stride, padding=padding, activation_fn=activation, weights_initializer=initializer)\n",
+    "        for n_maps, kernel_size, strides, padding, activation in zip(conv_n_maps, conv_kernel_sizes, conv_strides, conv_paddings, conv_activation):\n",
+    "            prev_layer = tf.layers.conv2d(prev_layer, filters=n_maps, kernel_size=kernel_size, strides=strides, padding=padding, activation=activation, kernel_initializer=initializer)\n",
     "            conv_layers.append(prev_layer)\n",
     "        last_conv_layer_flat = tf.reshape(prev_layer, shape=[-1, n_hidden_inputs])\n",
-    "        hidden = fully_connected(last_conv_layer_flat, n_hidden, activation_fn=hidden_activation, weights_initializer=initializer)\n",
-    "        outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n",
+    "        hidden = tf.layers.dense(last_conv_layer_flat, n_hidden, activation=hidden_activation, kernel_initializer=initializer)\n",
+    "        outputs = tf.layers.dense(hidden, n_outputs)\n",
     "    trainable_vars = {var.name[len(scope.name):]: var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)}\n",
     "    return outputs, trainable_vars\n",
     "\n",
@@ -1857,7 +1877,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 60,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1870,7 +1890,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 61,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1896,7 +1916,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 62,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1919,7 +1939,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 63,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -2023,7 +2043,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2+"
+   "version": "3.5.3"
   },
   "nav_menu": {},
   "toc": {