Use tf.layers instead of tf.contrib.layers

This commit is contained in:
Aurélien Geron
2017-04-30 10:21:27 +02:00
parent 14101abcf9
commit 326d32cae0
7 changed files with 531 additions and 258 deletions

View File

@@ -225,7 +225,23 @@
"editable": true
},
"source": [
"Now let's build the Autoencoder:"
"Now let's build the Autoencoder..."
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
"\n",
"The main differences relevant to this chapter are:\n",
"* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
"* the `weights` parameter was renamed to `kernel` and the weights variable is now named `\"kernel\"` rather than `\"weights\"`,\n",
"* the bias variable is now named `\"bias\"` rather than `\"biases\"`,\n",
"* the default activation is `None` instead of `tf.nn.relu`"
]
},
{
@@ -240,8 +256,6 @@
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import fully_connected\n",
"\n",
"n_inputs = 3\n",
"n_hidden = 2 # codings\n",
"n_outputs = n_inputs\n",
@@ -249,8 +263,8 @@
"learning_rate = 0.01\n",
"\n",
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
"hidden = fully_connected(X, n_hidden, activation_fn=None)\n",
"outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n",
"hidden = tf.layers.dense(X, n_hidden)\n",
"outputs = tf.layers.dense(hidden, n_outputs)\n",
"\n",
"mse = tf.reduce_mean(tf.square(outputs - X))\n",
"\n",
@@ -352,6 +366,16 @@
"Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization."
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`)."
]
},
{
"cell_type": "code",
"execution_count": 11,
@@ -364,7 +388,7 @@
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import fully_connected\n",
"from functools import partial\n",
"\n",
"n_inputs = 28*28\n",
"n_hidden1 = 300\n",
@@ -380,15 +404,17 @@
"#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n",
"\n",
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
"with tf.contrib.framework.arg_scope(\n",
" [fully_connected],\n",
" activation_fn=tf.nn.elu,\n",
" weights_initializer=initializer,\n",
" weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
" hidden1 = fully_connected(X, n_hidden1)\n",
" hidden2 = fully_connected(hidden1, n_hidden2)\n",
" hidden3 = fully_connected(hidden2, n_hidden3)\n",
" outputs = fully_connected(hidden3, n_outputs, activation_fn=None)\n",
"\n",
"my_dense_layer = partial(\n",
" tf.layers.dense,\n",
" activation=tf.nn.elu,\n",
" kernel_initializer=initializer,\n",
" kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
"\n",
"hidden1 = my_dense_layer(X, n_hidden1)\n",
"hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
"hidden3 = my_dense_layer(hidden2, n_hidden3)\n",
"outputs = my_dense_layer(hidden3, n_outputs, activation=None)\n",
"\n",
"mse = tf.reduce_mean(tf.square(outputs - X))\n",
"\n",
@@ -528,19 +554,23 @@
},
"outputs": [],
"source": [
"def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation_fn=tf.nn.elu):\n",
"from functools import partial\n",
"\n",
"def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation=tf.nn.elu):\n",
" graph = tf.Graph()\n",
" with graph.as_default():\n",
" n_inputs = X_train.shape[1]\n",
"\n",
" X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
" with tf.contrib.framework.arg_scope(\n",
" [fully_connected],\n",
" activation_fn=activation_fn,\n",
" weights_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
" weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
" hidden = fully_connected(X, n_neurons, scope=\"hidden\")\n",
" outputs = fully_connected(hidden, n_inputs, activation_fn=None, scope=\"outputs\")\n",
" \n",
" my_dense_layer = partial(\n",
" tf.layers.dense,\n",
" activation=activation,\n",
" kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
" kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
"\n",
" hidden = my_dense_layer(X, n_neurons, name=\"hidden\")\n",
" outputs = my_dense_layer(hidden, n_inputs, activation=None, name=\"outputs\")\n",
"\n",
" mse = tf.reduce_mean(tf.square(outputs - X))\n",
"\n",
@@ -566,7 +596,7 @@
" print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
" params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n",
" hidden_val = hidden.eval(feed_dict={X: X_train})\n",
" return hidden_val, params[\"hidden/weights:0\"], params[\"hidden/biases:0\"], params[\"outputs/weights:0\"], params[\"outputs/biases:0\"]"
" return hidden_val, params[\"hidden/kernel:0\"], params[\"hidden/bias:0\"], params[\"outputs/kernel:0\"], params[\"outputs/bias:0\"]"
]
},
{
@@ -853,7 +883,7 @@
"editable": true
},
"source": [
"It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `fully_connected()` function, so we need to build the Autoencoder manually:"
"It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:"
]
},
{
@@ -1114,11 +1144,20 @@
"# Stacked denoising Autoencoder"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n",
"* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n",
"* the `is_training` parameter is renamed to `training`."
]
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 31,
"metadata": {
"collapsed": true,
"collapsed": false,
"deletable": true,
"editable": true
},
@@ -1126,8 +1165,6 @@
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import dropout\n",
"\n",
"n_inputs = 28 * 28\n",
"n_hidden1 = 300\n",
"n_hidden2 = 150 # codings\n",
@@ -1136,7 +1173,7 @@
"\n",
"learning_rate = 0.01\n",
"l2_reg = 0.00001\n",
"keep_prob = 0.7\n",
"dropout_rate = 0.3\n",
"\n",
"activation = tf.nn.elu\n",
"regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
@@ -1145,7 +1182,7 @@
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
"is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n",
"\n",
"X_drop = dropout(X, keep_prob, is_training=is_training)\n",
"X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n",
"\n",
"weights1_init = initializer([n_inputs, n_hidden1])\n",
"weights2_init = initializer([n_hidden1, n_hidden2])\n",
@@ -1177,7 +1214,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 32,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1204,7 +1241,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 33,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1227,7 +1264,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 34,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1242,7 +1279,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 35,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1270,7 +1307,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 36,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1295,7 +1332,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 37,
"metadata": {
"collapsed": true,
"deletable": true,
@@ -1310,7 +1347,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 38,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1360,7 +1397,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 39,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1387,7 +1424,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 40,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1410,7 +1447,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 43,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1478,7 +1515,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 44,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1488,6 +1525,8 @@
"source": [
"tf.reset_default_graph()\n",
"\n",
"from functools import partial\n",
"\n",
"n_inputs = 28*28\n",
"n_hidden1 = 500\n",
"n_hidden2 = 500\n",
@@ -1500,20 +1539,22 @@
"\n",
"initializer = tf.contrib.layers.variance_scaling_initializer()\n",
"\n",
"with tf.contrib.framework.arg_scope([fully_connected],\n",
" activation_fn=tf.nn.elu,\n",
" weights_initializer=initializer):\n",
" X = tf.placeholder(tf.float32, [None, n_inputs])\n",
" hidden1 = fully_connected(X, n_hidden1)\n",
" hidden2 = fully_connected(hidden1, n_hidden2)\n",
" hidden3_mean = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
" hidden3_gamma = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
" noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
" hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
" hidden4 = fully_connected(hidden3, n_hidden4)\n",
" hidden5 = fully_connected(hidden4, n_hidden5)\n",
" logits = fully_connected(hidden5, n_outputs, activation_fn=None)\n",
" outputs = tf.sigmoid(logits)\n",
"my_dense_layer = partial(\n",
" tf.layers.dense,\n",
" activation=tf.nn.elu,\n",
" kernel_initializer=initializer)\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_inputs])\n",
"hidden1 = my_dense_layer(X, n_hidden1)\n",
"hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
"hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
"hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
"noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
"hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
"hidden4 = my_dense_layer(hidden3, n_hidden4)\n",
"hidden5 = my_dense_layer(hidden4, n_hidden5)\n",
"logits = my_dense_layer(hidden5, n_outputs, activation=None)\n",
"outputs = tf.sigmoid(logits)\n",
"\n",
"reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n",
"latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n",
@@ -1528,7 +1569,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 45,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1565,7 +1606,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 46,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1594,7 +1635,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 47,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1619,7 +1660,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 48,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1647,7 +1688,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 49,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1667,7 +1708,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 50,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1682,7 +1723,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 51,
"metadata": {
"collapsed": true,
"deletable": true,
@@ -1712,7 +1753,7 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 52,
"metadata": {
"collapsed": false,
"deletable": true,
@@ -1787,7 +1828,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2+"
"version": "3.5.3"
},
"nav_menu": {
"height": "381px",