From 326d32cae09d41f2a2593f036fbcb464bd1ac4ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Sun, 30 Apr 2017 10:21:27 +0200 Subject: [PATCH] Use tf.layers instead of tf.contrib.layers --- ...uction_to_artificial_neural_networks.ipynb | 22 +- 11_deep_learning.ipynb | 214 ++++++++++++------ 12_distributed_tensorflow.ipynb | 2 +- 13_convolutional_neural_networks.ipynb | 208 +++++++++++++---- 14_recurrent_neural_networks.ipynb | 78 +++---- 15_autoencoders.ipynb | 171 ++++++++------ 16_reinforcement_learning.ipynb | 94 +++++--- 7 files changed, 531 insertions(+), 258 deletions(-) diff --git a/10_introduction_to_artificial_neural_networks.ipynb b/10_introduction_to_artificial_neural_networks.ipynb index 9aef566..9441684 100644 --- a/10_introduction_to_artificial_neural_networks.ipynb +++ b/10_introduction_to_artificial_neural_networks.ipynb @@ -584,7 +584,17 @@ "editable": true }, "source": [ - "## Using `fully_connected` instead of `neuron_layer()`" + "## Using `dense()` instead of `neuron_layer()`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n", + "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n", + "* the default `activation` is now `None` rather than `tf.nn.relu`.\n", + "* a few more differences are presented in chapter 11." ] }, { @@ -599,8 +609,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import fully_connected\n", - "\n", "n_inputs = 28*28 # MNIST\n", "n_hidden1 = 300\n", "n_hidden2 = 100\n", @@ -611,9 +619,9 @@ "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "\n", "with tf.name_scope(\"dnn\"):\n", - " hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n", - " hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n", - " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + " hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\", activation=tf.nn.relu)\n", + " hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\", activation=tf.nn.relu)\n", + " logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", @@ -719,7 +727,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2+" + "version": "3.5.3" }, "nav_menu": { "height": "264px", diff --git a/11_deep_learning.ipynb b/11_deep_learning.ipynb index 5c440ec..74f0d00 100644 --- a/11_deep_learning.ipynb +++ b/11_deep_learning.ipynb @@ -297,6 +297,20 @@ " display(HTML(iframe))" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n", + "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n", + "* the default `activation` is now `None` rather than `tf.nn.relu`.\n", + "* it does not support `tensorflow.contrib.framework.arg_scope()` (introduced later in chapter 11).\n", + "* it does not support regularizer params (introduced later in chapter 11)." + ] + }, { "cell_type": "code", "execution_count": 12, @@ -307,8 +321,6 @@ }, "outputs": [], "source": [ - "from tensorflow.contrib.layers import fully_connected\n", - "\n", "tf.reset_default_graph()\n", "\n", "n_inputs = 28*28 # MNIST\n", @@ -321,9 +333,9 @@ "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "\n", "with tf.name_scope(\"dnn\"):\n", - " hidden1 = fully_connected(X, n_hidden1, activation_fn=leaky_relu, scope=\"hidden1\")\n", - " hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=leaky_relu, scope=\"hidden2\")\n", - " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + " hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name=\"hidden1\")\n", + " hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, name=\"hidden2\")\n", + " logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", @@ -377,6 +389,24 @@ "# Batch Normalization" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note: the book uses `tensorflow.contrib.layers.batch_norm()` rather than `tf.layers.batch_normalization()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.batch_normalization()`, because anything in the contrib module may change or be deleted without notice. Instead of using the `batch_norm()` function as a regularizer parameter to the `fully_connected()` function, we now use `batch_normalization()` and we explicitly create a distinct layer. The parameters are a bit different, in particular:\n", + "* `decay` is renamed to `momentum`,\n", + "* `is_training` is renamed to `training`,\n", + "* `updates_collections` is removed: the update operations needed by batch normalization are added to the `UPDATE_OPS` collection and you need to explicity run these operations during training (see the execution phase below),\n", + "* we don't need to specify `scale=True`, as that is the default.\n", + "\n", + "Also note that in order to run batch norm just _before_ each hidden layer's activation function, we apply the ELU activation function manually, right after the batch norm layer.\n", + "\n", + "Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`). As you can see, the code remains very similar." + ] + }, { "cell_type": "code", "execution_count": 14, @@ -387,11 +417,10 @@ }, "outputs": [], "source": [ - "from tensorflow.contrib.layers import fully_connected, batch_norm\n", - "from tensorflow.contrib.framework import arg_scope\n", - "\n", "tf.reset_default_graph()\n", "\n", + "from functools import partial\n", + "\n", "n_inputs = 28 * 28 # MNIST\n", "n_hidden1 = 300\n", "n_hidden2 = 100\n", @@ -405,22 +434,23 @@ "\n", "with tf.name_scope(\"dnn\"):\n", " he_init = tf.contrib.layers.variance_scaling_initializer()\n", - " batch_norm_params = {\n", - " 'is_training': is_training,\n", - " 'decay': 0.9,\n", - " 'updates_collections': None,\n", - " 'scale': True,\n", - " }\n", "\n", - " with arg_scope(\n", - " [fully_connected],\n", - " activation_fn=tf.nn.elu,\n", - " weights_initializer=he_init,\n", - " normalizer_fn=batch_norm,\n", - " normalizer_params=batch_norm_params):\n", - " hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n", - " hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n", - " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + " my_batch_norm_layer = partial(\n", + " tf.layers.batch_normalization,\n", + " training=is_training,\n", + " momentum=0.9)\n", + "\n", + " my_dense_layer = partial(\n", + " tf.layers.dense,\n", + " kernel_initializer=he_init)\n", + "\n", + " hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n", + " bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n", + " hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n", + " bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n", + " logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n", + " logits = my_batch_norm_layer(logits_before_bn)\n", + " extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", @@ -438,6 +468,16 @@ "saver = tf.train.Saver()" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note: since we are using `tf.layers.batch_normalization()` rather than `tf.contrib.layers.batch_norm()` (as in the book), we need to explicitly run the extra update operations needed by batch normalization (`sess.run([training_op, extra_update_ops],...`)." + ] + }, { "cell_type": "code", "execution_count": 15, @@ -449,14 +489,14 @@ "outputs": [], "source": [ "n_epochs = 20\n", - "batch_size = 50\n", + "batch_size = 200\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(len(mnist.test.labels)//batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n", + " sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", @@ -464,11 +504,21 @@ " save_path = saver.save(sess, \"my_model_final.ckpt\")" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Now the same model with $\\ell_1$ regularization:" + ] + }, { "cell_type": "code", "execution_count": 16, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, @@ -476,29 +526,32 @@ "source": [ "tf.reset_default_graph()\n", "\n", + "from functools import partial\n", + "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", "\n", "with tf.name_scope(\"dnn\"):\n", " he_init = tf.contrib.layers.variance_scaling_initializer()\n", - " batch_norm_params = {\n", - " 'is_training': is_training,\n", - " 'decay': 0.9,\n", - " 'updates_collections': None,\n", - " 'scale': True,\n", - " }\n", "\n", - " with arg_scope(\n", - " [fully_connected],\n", - " activation_fn=tf.nn.elu,\n", - " weights_initializer=he_init,\n", - " normalizer_fn=batch_norm,\n", - " normalizer_params=batch_norm_params,\n", - " weights_regularizer=tf.contrib.layers.l1_regularizer(0.01)):\n", - " hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n", - " hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n", - " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + " my_batch_norm_layer = partial(\n", + " tf.layers.batch_normalization,\n", + " training=is_training,\n", + " momentum=0.9)\n", + "\n", + " my_dense_layer = partial(\n", + " tf.layers.dense,\n", + " kernel_initializer=he_init,\n", + " kernel_regularizer=tf.contrib.layers.l1_regularizer(0.01))\n", + "\n", + " hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n", + " bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n", + " hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n", + " bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n", + " logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n", + " logits = my_batch_norm_layer(logits_before_bn)\n", + " extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", @@ -513,7 +566,7 @@ "with tf.name_scope(\"eval\"):\n", " correct = tf.nn.in_top_k(logits, y, 1)\n", " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", - " \n", + "\n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] @@ -529,14 +582,14 @@ "outputs": [], "source": [ "n_epochs = 20\n", - "batch_size = 50\n", + "batch_size = 200\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(len(mnist.test.labels)//batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n", + " sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", @@ -557,6 +610,16 @@ "[v.name for v in tf.global_variables()]" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note: the weights variable created by the `tf.layers.dense()` function is called `\"kernel\"` (instead of `\"weights\"` when using the `tf.contrib.layers.fully_connected()`, as in the book):" + ] + }, { "cell_type": "code", "execution_count": 19, @@ -568,8 +631,8 @@ "outputs": [], "source": [ "with tf.variable_scope(\"\", default_name=\"\", reuse=True): # root scope\n", - " weights1 = tf.get_variable(\"hidden1/weights\")\n", - " weights2 = tf.get_variable(\"hidden2/weights\")\n", + " weights1 = tf.get_variable(\"hidden1/kernel\")\n", + " weights2 = tf.get_variable(\"hidden2/kernel\")\n", " " ] }, @@ -689,6 +752,8 @@ "source": [ "tf.reset_default_graph()\n", "\n", + "from functools import partial\n", + "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", @@ -701,12 +766,15 @@ " return max_norm\n", "\n", "with tf.name_scope(\"dnn\"):\n", - " with arg_scope(\n", - " [fully_connected],\n", - " weights_regularizer=max_norm_regularizer(1.5)):\n", - " hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n", - " hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n", - " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + " \n", + " my_dense_layer = partial(\n", + " tf.layers.dense,\n", + " activation=tf.nn.relu,\n", + " kernel_regularizer=max_norm_regularizer(1.5))\n", + "\n", + " hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n", + " hidden2 = my_dense_layer(hidden1, n_hidden2, name=\"hidden2\")\n", + " logits = my_dense_layer(hidden2, n_outputs, activation=None, name=\"outputs\")\n", "\n", "clip_all_weights = tf.get_collection(\"max_norm\")\n", " \n", @@ -770,6 +838,18 @@ "show_graph(tf.get_default_graph())" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n", + "* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n", + "* the `is_training` parameter is renamed to `training`." + ] + }, { "cell_type": "code", "execution_count": 30, @@ -780,7 +860,7 @@ }, "outputs": [], "source": [ - "from tensorflow.contrib.layers import dropout\n", + "from functools import partial\n", "\n", "tf.reset_default_graph()\n", "\n", @@ -795,20 +875,22 @@ "learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n", " decay_steps, decay_rate)\n", "\n", - "keep_prob = 0.5\n", + "dropout_rate = 0.5\n", "\n", "with tf.name_scope(\"dnn\"):\n", " he_init = tf.contrib.layers.variance_scaling_initializer()\n", - " with arg_scope(\n", - " [fully_connected],\n", - " activation_fn=tf.nn.elu,\n", - " weights_initializer=he_init):\n", - " X_drop = dropout(X, keep_prob, is_training=is_training)\n", - " hidden1 = fully_connected(X_drop, n_hidden1, scope=\"hidden1\")\n", - " hidden1_drop = dropout(hidden1, keep_prob, is_training=is_training)\n", - " hidden2 = fully_connected(hidden1_drop, n_hidden2, scope=\"hidden2\")\n", - " hidden2_drop = dropout(hidden2, keep_prob, is_training=is_training)\n", - " logits = fully_connected(hidden2_drop, n_outputs, activation_fn=None, scope=\"outputs\")\n", + "\n", + " my_dense_layer = partial(\n", + " tf.layers.dense,\n", + " activation=tf.nn.elu,\n", + " kernel_initializer=he_init)\n", + "\n", + " X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n", + " hidden1 = my_dense_layer(X_drop, n_hidden1, name=\"hidden1\")\n", + " hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=is_training)\n", + " hidden2 = my_dense_layer(hidden1_drop, n_hidden2, name=\"hidden2\")\n", + " hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=is_training)\n", + " logits = my_dense_layer(hidden2_drop, n_outputs, activation=None, name=\"outputs\")\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", @@ -970,7 +1052,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2+" + "version": "3.5.3" }, "nav_menu": { "height": "360px", diff --git a/12_distributed_tensorflow.ipynb b/12_distributed_tensorflow.ipynb index b91be4e..b95ee70 100644 --- a/12_distributed_tensorflow.ipynb +++ b/12_distributed_tensorflow.ipynb @@ -541,7 +541,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2+" + "version": "3.5.3" }, "nav_menu": {}, "toc": { diff --git a/13_convolutional_neural_networks.ipynb b/13_convolutional_neural_networks.ipynb index b6bbe01..504968b 100644 --- a/13_convolutional_neural_networks.ipynb +++ b/13_convolutional_neural_networks.ipynb @@ -402,50 +402,101 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { - "collapsed": true + "deletable": true, + "editable": true }, - "outputs": [], "source": [ - "from six.moves import urllib\n", - "from sklearn.datasets import fetch_mldata\n", - "try:\n", - " mnist = fetch_mldata('MNIST original')\n", - "except urllib.error.HTTPError as ex:\n", - " print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n", + "Note: instead of using the `fully_connected()`, `conv2d()` and `dropout()` functions from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()`, `conv2d()` and `dropout()` functions (respectively) from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n", "\n", - " # Alternative method to load MNIST, if mldata.org is down\n", - " from scipy.io import loadmat\n", - " mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n", - " mnist_path = \"./mnist-original.mat\"\n", - " response = urllib.request.urlopen(mnist_alternative_url)\n", - " with open(mnist_path, \"wb\") as f:\n", - " content = response.read()\n", - " f.write(content)\n", - " mnist_raw = loadmat(mnist_path)\n", - " mnist = {\n", - " \"data\": mnist_raw[\"data\"].T,\n", - " \"target\": mnist_raw[\"label\"][0],\n", - " \"COL_NAMES\": [\"label\", \"data\"],\n", - " \"DESCR\": \"mldata.org dataset: mnist-original\",\n", - " }\n", - " print(\"Success!\")" + "For all these functions:\n", + "* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n", + "\n", + "The other main differences in `tf.layers.dense()` are:\n", + "* the `weights` parameter was renamed to `kernel` (and the weights variable is now named `\"kernel\"` rather than `\"weights\"`),\n", + "* the default activation is `None` instead of `tf.nn.relu`\n", + "\n", + "The other main differences in `tf.layers.conv2d()` are:\n", + "* the `num_outputs` parameter was renamed to `filters`,\n", + "* the `stride` parameter was renamed to `strides`,\n", + "* the default `activation` is now `None` instead of `tf.nn.relu`.\n", + "\n", + "The other main differences in `tf.layers.dropout()` are:\n", + "* it takes the dropout rate (`rate`) rather than the keep probability (`keep_prob`). Of course, `rate == 1 - keep_prob`,\n", + "* the `is_training` parameters was renamed to `training`." ] }, { "cell_type": "code", "execution_count": 15, "metadata": { - "collapsed": true, + "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ - "X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n", - "y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)" + "height = 28\n", + "width = 28\n", + "channels = 1\n", + "n_inputs = height * width\n", + "\n", + "conv1_fmaps = 32\n", + "conv1_ksize = 3\n", + "conv1_stride = 1\n", + "conv1_pad = \"SAME\"\n", + "\n", + "conv2_fmaps = 64\n", + "conv2_ksize = 3\n", + "conv2_stride = 1\n", + "conv2_pad = \"SAME\"\n", + "conv2_dropout_rate = 0.25\n", + "\n", + "pool3_fmaps = conv2_fmaps\n", + "\n", + "n_fc1 = 128\n", + "fc1_dropout_rate = 0.5\n", + "\n", + "n_outputs = 10\n", + "\n", + "graph = tf.Graph()\n", + "with graph.as_default():\n", + " with tf.name_scope(\"inputs\"):\n", + " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", + " X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])\n", + " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", + " is_training = tf.placeholder_with_default(False, shape=[], name='is_training')\n", + "\n", + " conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize, strides=conv1_stride, padding=conv1_pad, activation=tf.nn.relu, name=\"conv1\")\n", + " conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize, strides=conv2_stride, padding=conv2_pad, activation=tf.nn.relu, name=\"conv2\")\n", + "\n", + " with tf.name_scope(\"pool3\"):\n", + " pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=\"VALID\")\n", + " pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 14 * 14])\n", + " pool3_flat_drop = tf.layers.dropout(pool3_flat, conv2_dropout_rate, training=is_training)\n", + "\n", + " with tf.name_scope(\"fc1\"):\n", + " fc1 = tf.layers.dense(pool3_flat_drop, n_fc1, activation=tf.nn.relu, name=\"fc1\")\n", + " fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=is_training)\n", + "\n", + " with tf.name_scope(\"output\"):\n", + " logits = tf.layers.dense(fc1, n_outputs, name=\"output\")\n", + " Y_proba = tf.nn.softmax(logits, name=\"Y_proba\")\n", + "\n", + " with tf.name_scope(\"train\"):\n", + " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", + " loss = tf.reduce_mean(xentropy)\n", + " optimizer = tf.train.AdamOptimizer()\n", + " training_op = optimizer.minimize(loss)\n", + "\n", + " with tf.name_scope(\"eval\"):\n", + " correct = tf.nn.in_top_k(logits, y, 1)\n", + " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + "\n", + " with tf.name_scope(\"init_and_save\"):\n", + " init = tf.global_variables_initializer()\n", + " saver = tf.train.Saver()" ] }, { @@ -458,9 +509,78 @@ }, "outputs": [], "source": [ - "height, width = 28, 28\n", - "images = X_test[5000].reshape(1, height, width, 1)\n", - "plot_image(images[0, :, :, 0])" + "from tensorflow.examples.tutorials.mnist import input_data\n", + "mnist = input_data.read_data_sets(\"/tmp/data/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def get_model_params():\n", + " gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)\n", + " return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}\n", + "\n", + "def restore_model_params(model_params):\n", + " gvar_names = list(model_params.keys())\n", + " assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + \"/Assign\")\n", + " for gvar_name in gvar_names}\n", + " init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}\n", + " feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}\n", + " tf.get_default_session().run(assign_ops, feed_dict=feed_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "n_epochs = 1000\n", + "batch_size = 50\n", + "\n", + "best_acc_val = 0\n", + "check_interval = 100\n", + "checks_since_last_progress = 0\n", + "max_checks_without_progress = 100\n", + "best_model_params = None \n", + "\n", + "with tf.Session(graph=graph) as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(mnist.train.num_examples // batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch, is_training: True})\n", + " if iteration % check_interval == 0:\n", + " acc_val = accuracy.eval(feed_dict={X: mnist.test.images[:2000], y: mnist.test.labels[:2000]})\n", + " if acc_val > best_acc_val:\n", + " best_acc_val = acc_val\n", + " checks_since_last_progress = 0\n", + " best_model_params = get_model_params()\n", + " else:\n", + " checks_since_last_progress += 1\n", + " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test, \"Best validation accuracy:\", best_acc_val)\n", + " if checks_since_last_progress > max_checks_without_progress:\n", + " print(\"Early stopping!\")\n", + " break\n", + "\n", + " if best_model_params:\n", + " restore_model_params(best_model_params)\n", + " acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n", + " print(\"Final accuracy on test set:\", acc_test)\n", + " save_path = saver.save(sess, \"./my_mnist_model\")" ] }, { @@ -475,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "metadata": { "collapsed": true, "deletable": true, @@ -511,7 +631,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, @@ -524,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 23, "metadata": { "collapsed": true, "deletable": true, @@ -544,7 +664,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, @@ -557,7 +677,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 25, "metadata": { "collapsed": true, "deletable": true, @@ -572,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 26, "metadata": { "collapsed": false, "deletable": true, @@ -589,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 27, "metadata": { "collapsed": false, "deletable": true, @@ -611,7 +731,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 28, "metadata": { "collapsed": false, "deletable": true, @@ -628,7 +748,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 29, "metadata": { "collapsed": false, "deletable": true, @@ -641,7 +761,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 30, "metadata": { "collapsed": false, "deletable": true, @@ -654,7 +774,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 31, "metadata": { "collapsed": false, "deletable": true, @@ -717,7 +837,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2+" + "version": "3.5.3" }, "nav_menu": {}, "toc": { diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb index 4a1b2ad..5d26a58 100644 --- a/14_recurrent_neural_networks.ipynb +++ b/14_recurrent_neural_networks.ipynb @@ -573,6 +573,18 @@ "## Training a sequence classifier" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n", + "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n", + "* the default `activation` is now `None` rather than `tf.nn.relu`." + ] + }, { "cell_type": "code", "execution_count": 23, @@ -585,8 +597,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import fully_connected\n", - "\n", "n_steps = 28\n", "n_inputs = 28\n", "n_neurons = 150\n", @@ -601,7 +611,7 @@ " basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", " outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", "\n", - "logits = fully_connected(states, n_outputs, activation_fn=None)\n", + "logits = tf.layers.dense(states, n_outputs)\n", "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", "loss = tf.reduce_mean(xentropy)\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", @@ -675,8 +685,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import fully_connected\n", - "\n", "n_steps = 28\n", "n_inputs = 28\n", "n_neurons1 = 150\n", @@ -693,7 +701,7 @@ "multi_layer_cell = tf.contrib.rnn.MultiRNNCell([hidden1, hidden2])\n", "outputs, states_tuple = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", "states = tf.concat(axis=1, values=states_tuple)\n", - "logits = fully_connected(states, n_outputs, activation_fn=None)\n", + "logits = tf.layers.dense(states, n_outputs)\n", "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", "loss = tf.reduce_mean(xentropy)\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", @@ -847,8 +855,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import fully_connected\n", - "\n", "n_steps = 20\n", "n_inputs = 1\n", "n_neurons = 100\n", @@ -942,8 +948,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import fully_connected\n", - "\n", "n_steps = 20\n", "n_inputs = 1\n", "n_neurons = 100\n", @@ -958,7 +962,7 @@ "learning_rate = 0.001\n", "\n", "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", - "stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n", + "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", "\n", "loss = tf.reduce_sum(tf.square(outputs - y))\n", @@ -1181,7 +1185,6 @@ "outputs": [], "source": [ "tf.reset_default_graph()\n", - "from tensorflow.contrib.layers import fully_connected\n", "\n", "n_inputs = 1\n", "n_neurons = 100\n", @@ -1202,7 +1205,7 @@ " rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", "\n", " stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", - " stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n", + " stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", " outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", "\n", " loss = tf.reduce_sum(tf.square(outputs - y))\n", @@ -1277,8 +1280,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import fully_connected\n", - "\n", "n_steps = 28\n", "n_inputs = 28\n", "n_neurons = 150\n", @@ -1293,7 +1294,7 @@ "multi_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*3)\n", "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n", "top_layer_h_state = states[-1][1]\n", - "logits = fully_connected(top_layer_h_state, n_outputs, activation_fn=None, scope=\"softmax\")\n", + "logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n", "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", "loss = tf.reduce_mean(xentropy, name=\"loss\")\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", @@ -1336,7 +1337,8 @@ "metadata": { "collapsed": false, "deletable": true, - "editable": true + "editable": true, + "scrolled": true }, "outputs": [], "source": [ @@ -1466,7 +1468,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 52, "metadata": { "collapsed": true, "deletable": true, @@ -1509,7 +1511,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 53, "metadata": { "collapsed": false, "deletable": true, @@ -1522,7 +1524,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 54, "metadata": { "collapsed": false, "deletable": true, @@ -1545,7 +1547,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 55, "metadata": { "collapsed": false, "deletable": true, @@ -1565,7 +1567,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 56, "metadata": { "collapsed": false, "deletable": true, @@ -1578,7 +1580,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 57, "metadata": { "collapsed": false, "deletable": true, @@ -1591,7 +1593,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 58, "metadata": { "collapsed": false, "deletable": true, @@ -1614,7 +1616,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 59, "metadata": { "collapsed": true, "deletable": true, @@ -1652,7 +1654,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 60, "metadata": { "collapsed": false, "deletable": true, @@ -1666,7 +1668,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 61, "metadata": { "collapsed": false, "deletable": true, @@ -1679,7 +1681,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 62, "metadata": { "collapsed": false, "deletable": true, @@ -1702,7 +1704,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 63, "metadata": { "collapsed": true, "deletable": true, @@ -1728,7 +1730,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 64, "metadata": { "collapsed": false, "deletable": true, @@ -1787,7 +1789,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 65, "metadata": { "collapsed": false, "deletable": true, @@ -1795,7 +1797,7 @@ }, "outputs": [], "source": [ - "num_steps = 100001\n", + "num_steps = 10001\n", "\n", "with tf.Session() as session:\n", " init.run()\n", @@ -1846,7 +1848,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 66, "metadata": { "collapsed": false, "deletable": true, @@ -1869,7 +1871,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 67, "metadata": { "collapsed": true, "deletable": true, @@ -1893,7 +1895,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 68, "metadata": { "collapsed": false, "deletable": true, @@ -1932,7 +1934,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 69, "metadata": { "collapsed": false, "deletable": true, @@ -1976,7 +1978,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 70, "metadata": { "collapsed": false, "deletable": true, @@ -2033,7 +2035,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2+" + "version": "3.5.3" }, "nav_menu": {}, "toc": { diff --git a/15_autoencoders.ipynb b/15_autoencoders.ipynb index e1e20f5..4f7156e 100644 --- a/15_autoencoders.ipynb +++ b/15_autoencoders.ipynb @@ -225,7 +225,23 @@ "editable": true }, "source": [ - "Now let's build the Autoencoder:" + "Now let's build the Autoencoder..." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n", + "\n", + "The main differences relevant to this chapter are:\n", + "* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n", + "* the `weights` parameter was renamed to `kernel` and the weights variable is now named `\"kernel\"` rather than `\"weights\"`,\n", + "* the bias variable is now named `\"bias\"` rather than `\"biases\"`,\n", + "* the default activation is `None` instead of `tf.nn.relu`" ] }, { @@ -240,8 +256,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import fully_connected\n", - "\n", "n_inputs = 3\n", "n_hidden = 2 # codings\n", "n_outputs = n_inputs\n", @@ -249,8 +263,8 @@ "learning_rate = 0.01\n", "\n", "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "hidden = fully_connected(X, n_hidden, activation_fn=None)\n", - "outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n", + "hidden = tf.layers.dense(X, n_hidden)\n", + "outputs = tf.layers.dense(hidden, n_outputs)\n", "\n", "mse = tf.reduce_mean(tf.square(outputs - X))\n", "\n", @@ -352,6 +366,16 @@ "Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization." ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`)." + ] + }, { "cell_type": "code", "execution_count": 11, @@ -364,7 +388,7 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import fully_connected\n", + "from functools import partial\n", "\n", "n_inputs = 28*28\n", "n_hidden1 = 300\n", @@ -380,15 +404,17 @@ "#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n", "\n", "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "with tf.contrib.framework.arg_scope(\n", - " [fully_connected],\n", - " activation_fn=tf.nn.elu,\n", - " weights_initializer=initializer,\n", - " weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n", - " hidden1 = fully_connected(X, n_hidden1)\n", - " hidden2 = fully_connected(hidden1, n_hidden2)\n", - " hidden3 = fully_connected(hidden2, n_hidden3)\n", - " outputs = fully_connected(hidden3, n_outputs, activation_fn=None)\n", + "\n", + "my_dense_layer = partial(\n", + " tf.layers.dense,\n", + " activation=tf.nn.elu,\n", + " kernel_initializer=initializer,\n", + " kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n", + "\n", + "hidden1 = my_dense_layer(X, n_hidden1)\n", + "hidden2 = my_dense_layer(hidden1, n_hidden2)\n", + "hidden3 = my_dense_layer(hidden2, n_hidden3)\n", + "outputs = my_dense_layer(hidden3, n_outputs, activation=None)\n", "\n", "mse = tf.reduce_mean(tf.square(outputs - X))\n", "\n", @@ -528,19 +554,23 @@ }, "outputs": [], "source": [ - "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation_fn=tf.nn.elu):\n", + "from functools import partial\n", + "\n", + "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation=tf.nn.elu):\n", " graph = tf.Graph()\n", " with graph.as_default():\n", " n_inputs = X_train.shape[1]\n", "\n", " X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - " with tf.contrib.framework.arg_scope(\n", - " [fully_connected],\n", - " activation_fn=activation_fn,\n", - " weights_initializer=tf.contrib.layers.variance_scaling_initializer(),\n", - " weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n", - " hidden = fully_connected(X, n_neurons, scope=\"hidden\")\n", - " outputs = fully_connected(hidden, n_inputs, activation_fn=None, scope=\"outputs\")\n", + " \n", + " my_dense_layer = partial(\n", + " tf.layers.dense,\n", + " activation=activation,\n", + " kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),\n", + " kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n", + "\n", + " hidden = my_dense_layer(X, n_neurons, name=\"hidden\")\n", + " outputs = my_dense_layer(hidden, n_inputs, activation=None, name=\"outputs\")\n", "\n", " mse = tf.reduce_mean(tf.square(outputs - X))\n", "\n", @@ -566,7 +596,7 @@ " print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n", " params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n", " hidden_val = hidden.eval(feed_dict={X: X_train})\n", - " return hidden_val, params[\"hidden/weights:0\"], params[\"hidden/biases:0\"], params[\"outputs/weights:0\"], params[\"outputs/biases:0\"]" + " return hidden_val, params[\"hidden/kernel:0\"], params[\"hidden/bias:0\"], params[\"outputs/kernel:0\"], params[\"outputs/bias:0\"]" ] }, { @@ -853,7 +883,7 @@ "editable": true }, "source": [ - "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `fully_connected()` function, so we need to build the Autoencoder manually:" + "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:" ] }, { @@ -1114,11 +1144,20 @@ "# Stacked denoising Autoencoder" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n", + "* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n", + "* the `is_training` parameter is renamed to `training`." + ] + }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": { - "collapsed": true, + "collapsed": false, "deletable": true, "editable": true }, @@ -1126,8 +1165,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import dropout\n", - "\n", "n_inputs = 28 * 28\n", "n_hidden1 = 300\n", "n_hidden2 = 150 # codings\n", @@ -1136,7 +1173,7 @@ "\n", "learning_rate = 0.01\n", "l2_reg = 0.00001\n", - "keep_prob = 0.7\n", + "dropout_rate = 0.3\n", "\n", "activation = tf.nn.elu\n", "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", @@ -1145,7 +1182,7 @@ "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", "is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n", "\n", - "X_drop = dropout(X, keep_prob, is_training=is_training)\n", + "X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n", "\n", "weights1_init = initializer([n_inputs, n_hidden1])\n", "weights2_init = initializer([n_hidden1, n_hidden2])\n", @@ -1177,7 +1214,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": { "collapsed": false, "deletable": true, @@ -1204,7 +1241,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": { "collapsed": false, "deletable": true, @@ -1227,7 +1264,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": { "collapsed": false, "deletable": true, @@ -1242,7 +1279,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "metadata": { "collapsed": false, "deletable": true, @@ -1270,7 +1307,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "metadata": { "collapsed": false, "deletable": true, @@ -1295,7 +1332,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "metadata": { "collapsed": true, "deletable": true, @@ -1310,7 +1347,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "metadata": { "collapsed": false, "deletable": true, @@ -1360,7 +1397,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "metadata": { "collapsed": false, "deletable": true, @@ -1387,7 +1424,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "metadata": { "collapsed": false, "deletable": true, @@ -1410,7 +1447,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 43, "metadata": { "collapsed": false, "deletable": true, @@ -1478,7 +1515,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 44, "metadata": { "collapsed": false, "deletable": true, @@ -1488,6 +1525,8 @@ "source": [ "tf.reset_default_graph()\n", "\n", + "from functools import partial\n", + "\n", "n_inputs = 28*28\n", "n_hidden1 = 500\n", "n_hidden2 = 500\n", @@ -1500,20 +1539,22 @@ "\n", "initializer = tf.contrib.layers.variance_scaling_initializer()\n", "\n", - "with tf.contrib.framework.arg_scope([fully_connected],\n", - " activation_fn=tf.nn.elu,\n", - " weights_initializer=initializer):\n", - " X = tf.placeholder(tf.float32, [None, n_inputs])\n", - " hidden1 = fully_connected(X, n_hidden1)\n", - " hidden2 = fully_connected(hidden1, n_hidden2)\n", - " hidden3_mean = fully_connected(hidden2, n_hidden3, activation_fn=None)\n", - " hidden3_gamma = fully_connected(hidden2, n_hidden3, activation_fn=None)\n", - " noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n", - " hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n", - " hidden4 = fully_connected(hidden3, n_hidden4)\n", - " hidden5 = fully_connected(hidden4, n_hidden5)\n", - " logits = fully_connected(hidden5, n_outputs, activation_fn=None)\n", - " outputs = tf.sigmoid(logits)\n", + "my_dense_layer = partial(\n", + " tf.layers.dense,\n", + " activation=tf.nn.elu,\n", + " kernel_initializer=initializer)\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_inputs])\n", + "hidden1 = my_dense_layer(X, n_hidden1)\n", + "hidden2 = my_dense_layer(hidden1, n_hidden2)\n", + "hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n", + "hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n", + "noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n", + "hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n", + "hidden4 = my_dense_layer(hidden3, n_hidden4)\n", + "hidden5 = my_dense_layer(hidden4, n_hidden5)\n", + "logits = my_dense_layer(hidden5, n_outputs, activation=None)\n", + "outputs = tf.sigmoid(logits)\n", "\n", "reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n", "latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n", @@ -1528,7 +1569,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 45, "metadata": { "collapsed": false, "deletable": true, @@ -1565,7 +1606,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 46, "metadata": { "collapsed": false, "deletable": true, @@ -1594,7 +1635,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 47, "metadata": { "collapsed": false, "deletable": true, @@ -1619,7 +1660,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 48, "metadata": { "collapsed": false, "deletable": true, @@ -1647,7 +1688,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 49, "metadata": { "collapsed": false, "deletable": true, @@ -1667,7 +1708,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 50, "metadata": { "collapsed": false, "deletable": true, @@ -1682,7 +1723,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 51, "metadata": { "collapsed": true, "deletable": true, @@ -1712,7 +1753,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 52, "metadata": { "collapsed": false, "deletable": true, @@ -1787,7 +1828,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2+" + "version": "3.5.3" }, "nav_menu": { "height": "381px", diff --git a/16_reinforcement_learning.ipynb b/16_reinforcement_learning.ipynb index 3b6ce4a..7061f4e 100644 --- a/16_reinforcement_learning.ipynb +++ b/16_reinforcement_learning.ipynb @@ -986,6 +986,18 @@ "Let's create a neural network that will take observations as inputs, and output the action to take for each observation. To choose an action, the network will first estimate a probability for each action, then select an action randomly according to the estimated probabilities. In the case of the Cart-Pole environment, there are just two possible actions (left or right), so we only need one output neuron: it will output the probability `p` of the action 0 (left), and of course the probability of action 1 (right) will be `1 - p`." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n", + "\n", + "The main differences relevant to this chapter are:\n", + "* the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n", + "* the `weights` parameter was renamed to `kernel`,\n", + "* the default activation is `None` instead of `tf.nn.relu`" + ] + }, { "cell_type": "code", "execution_count": 34, @@ -997,7 +1009,6 @@ "outputs": [], "source": [ "import tensorflow as tf\n", - "from tensorflow.contrib.layers import fully_connected\n", "\n", "# 1. Specify the network architecture\n", "n_inputs = 4 # == env.observation_space.shape[0]\n", @@ -1007,10 +1018,10 @@ "\n", "# 2. Build the neural network\n", "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu,\n", - " weights_initializer=initializer)\n", - "outputs = fully_connected(hidden, n_outputs, activation_fn=tf.nn.sigmoid,\n", - " weights_initializer=initializer)\n", + "hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu,\n", + " kernel_initializer=initializer)\n", + "outputs = tf.layers.dense(hidden, n_outputs, activation=tf.nn.sigmoid,\n", + " kernel_initializer=initializer)\n", "\n", "# 3. Select a random action based on the estimated probabilities\n", "p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n", @@ -1121,7 +1132,6 @@ "outputs": [], "source": [ "import tensorflow as tf\n", - "from tensorflow.contrib.layers import fully_connected\n", "\n", "tf.reset_default_graph()\n", "\n", @@ -1136,8 +1146,8 @@ "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", "y = tf.placeholder(tf.float32, shape=[None, n_outputs])\n", "\n", - "hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)\n", - "logits = fully_connected(hidden, n_outputs, activation_fn=None)\n", + "hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu, kernel_initializer=initializer)\n", + "logits = tf.layers.dense(hidden, n_outputs)\n", "outputs = tf.nn.sigmoid(logits) # probability of action 0 (left)\n", "p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n", "action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)\n", @@ -1275,7 +1285,6 @@ "outputs": [], "source": [ "import tensorflow as tf\n", - "from tensorflow.contrib.layers import fully_connected\n", "\n", "tf.reset_default_graph()\n", "\n", @@ -1289,8 +1298,8 @@ "\n", "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", "\n", - "hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)\n", - "logits = fully_connected(hidden, n_outputs, activation_fn=None)\n", + "hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu, kernel_initializer=initializer)\n", + "logits = tf.layers.dense(hidden, n_outputs)\n", "outputs = tf.nn.sigmoid(logits) # probability of action 0 (left)\n", "p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n", "action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)\n", @@ -1366,7 +1375,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 45, "metadata": { "collapsed": false, "deletable": true, @@ -1416,7 +1425,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 46, "metadata": { "collapsed": true, "deletable": true, @@ -1429,7 +1438,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 47, "metadata": { "collapsed": false, "deletable": true, @@ -1454,7 +1463,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 48, "metadata": { "collapsed": false, "deletable": true, @@ -1499,7 +1508,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 49, "metadata": { "collapsed": false, "deletable": true, @@ -1594,7 +1603,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 50, "metadata": { "collapsed": false, "deletable": true, @@ -1623,7 +1632,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 51, "metadata": { "collapsed": false, "deletable": true, @@ -1637,7 +1646,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 52, "metadata": { "collapsed": false, "deletable": true, @@ -1650,7 +1659,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 53, "metadata": { "collapsed": false, "deletable": true, @@ -1677,7 +1686,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 54, "metadata": { "collapsed": false, "deletable": true, @@ -1691,7 +1700,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 55, "metadata": { "collapsed": false, "deletable": true, @@ -1704,7 +1713,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 56, "metadata": { "collapsed": false, "deletable": true, @@ -1737,7 +1746,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 57, "metadata": { "collapsed": false, "deletable": true, @@ -1759,7 +1768,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 58, "metadata": { "collapsed": false, "deletable": true, @@ -1790,9 +1799,22 @@ "## Build DQN" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: instead of using `tf.contrib.layers.convolution2d()` or `tf.contrib.layers.conv2d()` (as in the book), we now use the `tf.layers.conv2d()`, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same, except that the parameter names have changed slightly:\n", + "* the `num_outputs` parameter was renamed to `filters`,\n", + "* the `stride` parameter was renamed to `strides`,\n", + "* the `_fn` suffix was removed from parameter names that had it (e.g., `activation_fn` was renamed to `activation`),\n", + "* the `weights_initializer` parameter was renamed to `kernel_initializer`,\n", + "* the weights variable was renamed to `\"kernel\"` (instead of `\"weights\"`), and the biases variable was renamed from `\"biases\"` to `\"bias\"`,\n", + "* and the default `activation` is now `None` instead of `tf.nn.relu`." + ] + }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 59, "metadata": { "collapsed": false, "deletable": true, @@ -1802,8 +1824,6 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "from tensorflow.contrib.layers import convolution2d, fully_connected\n", - "\n", "input_height = 88\n", "input_width = 80\n", "input_channels = 1\n", @@ -1824,12 +1844,12 @@ " prev_layer = X_state\n", " conv_layers = []\n", " with tf.variable_scope(scope) as scope:\n", - " for n_maps, kernel_size, stride, padding, activation in zip(conv_n_maps, conv_kernel_sizes, conv_strides, conv_paddings, conv_activation):\n", - " prev_layer = convolution2d(prev_layer, num_outputs=n_maps, kernel_size=kernel_size, stride=stride, padding=padding, activation_fn=activation, weights_initializer=initializer)\n", + " for n_maps, kernel_size, strides, padding, activation in zip(conv_n_maps, conv_kernel_sizes, conv_strides, conv_paddings, conv_activation):\n", + " prev_layer = tf.layers.conv2d(prev_layer, filters=n_maps, kernel_size=kernel_size, strides=strides, padding=padding, activation=activation, kernel_initializer=initializer)\n", " conv_layers.append(prev_layer)\n", " last_conv_layer_flat = tf.reshape(prev_layer, shape=[-1, n_hidden_inputs])\n", - " hidden = fully_connected(last_conv_layer_flat, n_hidden, activation_fn=hidden_activation, weights_initializer=initializer)\n", - " outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n", + " hidden = tf.layers.dense(last_conv_layer_flat, n_hidden, activation=hidden_activation, kernel_initializer=initializer)\n", + " outputs = tf.layers.dense(hidden, n_outputs)\n", " trainable_vars = {var.name[len(scope.name):]: var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)}\n", " return outputs, trainable_vars\n", "\n", @@ -1857,7 +1877,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 60, "metadata": { "collapsed": false, "deletable": true, @@ -1870,7 +1890,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 61, "metadata": { "collapsed": false, "deletable": true, @@ -1896,7 +1916,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 62, "metadata": { "collapsed": true, "deletable": true, @@ -1919,7 +1939,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 63, "metadata": { "collapsed": false, "deletable": true, @@ -2023,7 +2043,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2+" + "version": "3.5.3" }, "nav_menu": {}, "toc": {