mirror of
https://github.com/ArthurDanjou/handson-ml3.git
synced 2026-01-31 03:57:55 +01:00
Use tf.layers instead of tf.contrib.layers
This commit is contained in:
@@ -225,7 +225,23 @@
|
||||
"editable": true
|
||||
},
|
||||
"source": [
|
||||
"Now let's build the Autoencoder:"
|
||||
"Now let's build the Autoencoder..."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"source": [
|
||||
"Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
|
||||
"\n",
|
||||
"The main differences relevant to this chapter are:\n",
|
||||
"* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
|
||||
"* the `weights` parameter was renamed to `kernel` and the weights variable is now named `\"kernel\"` rather than `\"weights\"`,\n",
|
||||
"* the bias variable is now named `\"bias\"` rather than `\"biases\"`,\n",
|
||||
"* the default activation is `None` instead of `tf.nn.relu`"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -240,8 +256,6 @@
|
||||
"source": [
|
||||
"tf.reset_default_graph()\n",
|
||||
"\n",
|
||||
"from tensorflow.contrib.layers import fully_connected\n",
|
||||
"\n",
|
||||
"n_inputs = 3\n",
|
||||
"n_hidden = 2 # codings\n",
|
||||
"n_outputs = n_inputs\n",
|
||||
@@ -249,8 +263,8 @@
|
||||
"learning_rate = 0.01\n",
|
||||
"\n",
|
||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||
"hidden = fully_connected(X, n_hidden, activation_fn=None)\n",
|
||||
"outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n",
|
||||
"hidden = tf.layers.dense(X, n_hidden)\n",
|
||||
"outputs = tf.layers.dense(hidden, n_outputs)\n",
|
||||
"\n",
|
||||
"mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
||||
"\n",
|
||||
@@ -352,6 +366,16 @@
|
||||
"Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"source": [
|
||||
"Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
@@ -364,7 +388,7 @@
|
||||
"source": [
|
||||
"tf.reset_default_graph()\n",
|
||||
"\n",
|
||||
"from tensorflow.contrib.layers import fully_connected\n",
|
||||
"from functools import partial\n",
|
||||
"\n",
|
||||
"n_inputs = 28*28\n",
|
||||
"n_hidden1 = 300\n",
|
||||
@@ -380,15 +404,17 @@
|
||||
"#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n",
|
||||
"\n",
|
||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||
"with tf.contrib.framework.arg_scope(\n",
|
||||
" [fully_connected],\n",
|
||||
" activation_fn=tf.nn.elu,\n",
|
||||
" weights_initializer=initializer,\n",
|
||||
" weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
|
||||
" hidden1 = fully_connected(X, n_hidden1)\n",
|
||||
" hidden2 = fully_connected(hidden1, n_hidden2)\n",
|
||||
" hidden3 = fully_connected(hidden2, n_hidden3)\n",
|
||||
" outputs = fully_connected(hidden3, n_outputs, activation_fn=None)\n",
|
||||
"\n",
|
||||
"my_dense_layer = partial(\n",
|
||||
" tf.layers.dense,\n",
|
||||
" activation=tf.nn.elu,\n",
|
||||
" kernel_initializer=initializer,\n",
|
||||
" kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
|
||||
"\n",
|
||||
"hidden1 = my_dense_layer(X, n_hidden1)\n",
|
||||
"hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
|
||||
"hidden3 = my_dense_layer(hidden2, n_hidden3)\n",
|
||||
"outputs = my_dense_layer(hidden3, n_outputs, activation=None)\n",
|
||||
"\n",
|
||||
"mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
||||
"\n",
|
||||
@@ -528,19 +554,23 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation_fn=tf.nn.elu):\n",
|
||||
"from functools import partial\n",
|
||||
"\n",
|
||||
"def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation=tf.nn.elu):\n",
|
||||
" graph = tf.Graph()\n",
|
||||
" with graph.as_default():\n",
|
||||
" n_inputs = X_train.shape[1]\n",
|
||||
"\n",
|
||||
" X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||
" with tf.contrib.framework.arg_scope(\n",
|
||||
" [fully_connected],\n",
|
||||
" activation_fn=activation_fn,\n",
|
||||
" weights_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
|
||||
" weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
|
||||
" hidden = fully_connected(X, n_neurons, scope=\"hidden\")\n",
|
||||
" outputs = fully_connected(hidden, n_inputs, activation_fn=None, scope=\"outputs\")\n",
|
||||
" \n",
|
||||
" my_dense_layer = partial(\n",
|
||||
" tf.layers.dense,\n",
|
||||
" activation=activation,\n",
|
||||
" kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
|
||||
" kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
|
||||
"\n",
|
||||
" hidden = my_dense_layer(X, n_neurons, name=\"hidden\")\n",
|
||||
" outputs = my_dense_layer(hidden, n_inputs, activation=None, name=\"outputs\")\n",
|
||||
"\n",
|
||||
" mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
||||
"\n",
|
||||
@@ -566,7 +596,7 @@
|
||||
" print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
|
||||
" params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n",
|
||||
" hidden_val = hidden.eval(feed_dict={X: X_train})\n",
|
||||
" return hidden_val, params[\"hidden/weights:0\"], params[\"hidden/biases:0\"], params[\"outputs/weights:0\"], params[\"outputs/biases:0\"]"
|
||||
" return hidden_val, params[\"hidden/kernel:0\"], params[\"hidden/bias:0\"], params[\"outputs/kernel:0\"], params[\"outputs/bias:0\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -853,7 +883,7 @@
|
||||
"editable": true
|
||||
},
|
||||
"source": [
|
||||
"It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `fully_connected()` function, so we need to build the Autoencoder manually:"
|
||||
"It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1114,11 +1144,20 @@
|
||||
"# Stacked denoising Autoencoder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n",
|
||||
"* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n",
|
||||
"* the `is_training` parameter is renamed to `training`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 31,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
@@ -1126,8 +1165,6 @@
|
||||
"source": [
|
||||
"tf.reset_default_graph()\n",
|
||||
"\n",
|
||||
"from tensorflow.contrib.layers import dropout\n",
|
||||
"\n",
|
||||
"n_inputs = 28 * 28\n",
|
||||
"n_hidden1 = 300\n",
|
||||
"n_hidden2 = 150 # codings\n",
|
||||
@@ -1136,7 +1173,7 @@
|
||||
"\n",
|
||||
"learning_rate = 0.01\n",
|
||||
"l2_reg = 0.00001\n",
|
||||
"keep_prob = 0.7\n",
|
||||
"dropout_rate = 0.3\n",
|
||||
"\n",
|
||||
"activation = tf.nn.elu\n",
|
||||
"regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
|
||||
@@ -1145,7 +1182,7 @@
|
||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||
"is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n",
|
||||
"\n",
|
||||
"X_drop = dropout(X, keep_prob, is_training=is_training)\n",
|
||||
"X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n",
|
||||
"\n",
|
||||
"weights1_init = initializer([n_inputs, n_hidden1])\n",
|
||||
"weights2_init = initializer([n_hidden1, n_hidden2])\n",
|
||||
@@ -1177,7 +1214,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 32,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1204,7 +1241,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 33,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1227,7 +1264,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 34,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1242,7 +1279,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 35,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1270,7 +1307,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 36,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1295,7 +1332,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 37,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
@@ -1310,7 +1347,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 38,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1360,7 +1397,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 39,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1387,7 +1424,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 40,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1410,7 +1447,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 43,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1478,7 +1515,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 44,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1488,6 +1525,8 @@
|
||||
"source": [
|
||||
"tf.reset_default_graph()\n",
|
||||
"\n",
|
||||
"from functools import partial\n",
|
||||
"\n",
|
||||
"n_inputs = 28*28\n",
|
||||
"n_hidden1 = 500\n",
|
||||
"n_hidden2 = 500\n",
|
||||
@@ -1500,20 +1539,22 @@
|
||||
"\n",
|
||||
"initializer = tf.contrib.layers.variance_scaling_initializer()\n",
|
||||
"\n",
|
||||
"with tf.contrib.framework.arg_scope([fully_connected],\n",
|
||||
" activation_fn=tf.nn.elu,\n",
|
||||
" weights_initializer=initializer):\n",
|
||||
" X = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||||
" hidden1 = fully_connected(X, n_hidden1)\n",
|
||||
" hidden2 = fully_connected(hidden1, n_hidden2)\n",
|
||||
" hidden3_mean = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
|
||||
" hidden3_gamma = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
|
||||
" noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
|
||||
" hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
|
||||
" hidden4 = fully_connected(hidden3, n_hidden4)\n",
|
||||
" hidden5 = fully_connected(hidden4, n_hidden5)\n",
|
||||
" logits = fully_connected(hidden5, n_outputs, activation_fn=None)\n",
|
||||
" outputs = tf.sigmoid(logits)\n",
|
||||
"my_dense_layer = partial(\n",
|
||||
" tf.layers.dense,\n",
|
||||
" activation=tf.nn.elu,\n",
|
||||
" kernel_initializer=initializer)\n",
|
||||
"\n",
|
||||
"X = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||||
"hidden1 = my_dense_layer(X, n_hidden1)\n",
|
||||
"hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
|
||||
"hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
|
||||
"hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
|
||||
"noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
|
||||
"hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
|
||||
"hidden4 = my_dense_layer(hidden3, n_hidden4)\n",
|
||||
"hidden5 = my_dense_layer(hidden4, n_hidden5)\n",
|
||||
"logits = my_dense_layer(hidden5, n_outputs, activation=None)\n",
|
||||
"outputs = tf.sigmoid(logits)\n",
|
||||
"\n",
|
||||
"reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n",
|
||||
"latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n",
|
||||
@@ -1528,7 +1569,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 45,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1565,7 +1606,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 46,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1594,7 +1635,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 47,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1619,7 +1660,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 48,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1647,7 +1688,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 49,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1667,7 +1708,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 50,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1682,7 +1723,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 51,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
@@ -1712,7 +1753,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"execution_count": 52,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
@@ -1787,7 +1828,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2+"
|
||||
"version": "3.5.3"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "381px",
|
||||
|
||||
Reference in New Issue
Block a user