diff --git a/11_training_deep_neural_networks.ipynb b/11_training_deep_neural_networks.ipynb index a9c7f98..2c84527 100644 --- a/11_training_deep_neural_networks.ipynb +++ b/11_training_deep_neural_networks.ipynb @@ -791,7 +791,7 @@ "source": [ "# extra code – just show that the model works! 😊\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\",\n", - " metrics=\"accuracy\")\n", + " metrics=[\"accuracy\"])\n", "model.fit(X_train, y_train, epochs=2, validation_data=(X_valid, y_valid))" ] }, @@ -860,7 +860,7 @@ "source": [ "# extra code – just show that the model works! 😊\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\",\n", - " metrics=\"accuracy\")\n", + " metrics=[\"accuracy\"])\n", "model.fit(X_train, y_train, epochs=2, validation_data=(X_valid, y_valid))" ] }, @@ -1031,7 +1031,7 @@ " metrics=[\"accuracy\"])\n", "history = model_A.fit(X_train_A, y_train_A, epochs=20,\n", " validation_data=(X_valid_A, y_valid_A))\n", - "model_A.save(\"my_model_A\")" + "model_A.save(\"my_model_A.keras\")" ] }, { @@ -1133,7 +1133,7 @@ "metadata": {}, "outputs": [], "source": [ - "model_A = tf.keras.models.load_model(\"my_model_A\")\n", + "model_A = tf.keras.models.load_model(\"my_model_A.keras\")\n", "model_B_on_A = tf.keras.Sequential(model_A.layers[:-1])\n", "model_B_on_A.add(tf.keras.layers.Dense(1, activation=\"sigmoid\"))" ] @@ -1910,7 +1910,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Note**: The `decay` argument in optimizers is deprecated. The old optimizers which implement the `decay` argument are still available in `tf.keras.optimizers.legacy`, but you should use the schedulers in `tf.keras.optimizers.schedules` instead." + "**Note**: Optimizers used to have a `decay` argument for this, but it was deprecated. You must use the schedulers in `tf.keras.optimizers.schedules` instead." ] }, { @@ -1919,17 +1919,6 @@ "metadata": {}, "outputs": [], "source": [ - "# DEPRECATED:\n", - "optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, decay=1e-4)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "# RECOMMENDED:\n", "lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(\n", " initial_learning_rate=0.01,\n", " decay_steps=10_000,\n", @@ -1948,7 +1937,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -1984,7 +1973,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 67, "metadata": {}, "outputs": [ { @@ -2038,7 +2027,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 68, "metadata": {}, "outputs": [], "source": [ @@ -2053,7 +2042,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -2089,7 +2078,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 70, "metadata": {}, "outputs": [ { @@ -2134,7 +2123,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 71, "metadata": {}, "outputs": [], "source": [ @@ -2144,7 +2133,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 72, "metadata": {}, "outputs": [], "source": [ @@ -2158,7 +2147,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ @@ -2173,7 +2162,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 74, "metadata": { "scrolled": true }, @@ -2253,7 +2242,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 75, "metadata": {}, "outputs": [], "source": [ @@ -2270,7 +2259,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 76, "metadata": {}, "outputs": [], "source": [ @@ -2283,18 +2272,18 @@ "\n", " def on_batch_begin(self, batch, logs=None):\n", " # Note: the `batch` argument is reset at each epoch\n", - " lr = K.get_value(self.model.optimizer.learning_rate)\n", + " lr = self.model.optimizer.learning_rate.numpy()\n", " new_learning_rate = lr * 0.1 ** (1 / self.n_steps)\n", - " K.set_value(self.model.optimizer.learning_rate, new_learning_rate)\n", + " self.model.optimizer.learning_rate = new_learning_rate\n", "\n", " def on_epoch_end(self, epoch, logs=None):\n", " logs = logs or {}\n", - " logs['lr'] = K.get_value(self.model.optimizer.learning_rate)" + " logs['lr'] = self.model.optimizer.learning_rate.numpy()" ] }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 77, "metadata": {}, "outputs": [], "source": [ @@ -2307,7 +2296,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 78, "metadata": {}, "outputs": [ { @@ -2387,7 +2376,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 79, "metadata": {}, "outputs": [], "source": [ @@ -2400,7 +2389,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 80, "metadata": {}, "outputs": [ { @@ -2436,7 +2425,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 81, "metadata": { "scrolled": true }, @@ -2482,7 +2471,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 82, "metadata": {}, "outputs": [], "source": [ @@ -2497,7 +2486,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 83, "metadata": {}, "outputs": [], "source": [ @@ -2516,7 +2505,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 84, "metadata": {}, "outputs": [ { @@ -2601,7 +2590,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 85, "metadata": {}, "outputs": [ { @@ -2635,7 +2624,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 86, "metadata": {}, "outputs": [], "source": [ @@ -2649,7 +2638,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 87, "metadata": {}, "outputs": [ { @@ -2718,7 +2707,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 88, "metadata": {}, "outputs": [ { @@ -2737,7 +2726,7 @@ "source": [ "# extra code – this cell plots performance scheduling\n", "\n", - "plt.plot(history.epoch, history.history[\"lr\"], \"bo-\")\n", + "plt.plot(history.epoch, history.history[\"learning_rate\"], \"bo-\")\n", "plt.xlabel(\"Epoch\")\n", "plt.ylabel(\"Learning Rate\", color='b')\n", "plt.tick_params('y', colors='b')\n", @@ -2769,7 +2758,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ @@ -2789,10 +2778,10 @@ " new_sum_of_epoch_losses = mean_epoch_loss * (batch + 1)\n", " batch_loss = new_sum_of_epoch_losses - self.sum_of_epoch_losses\n", " self.sum_of_epoch_losses = new_sum_of_epoch_losses\n", - " self.rates.append(K.get_value(self.model.optimizer.learning_rate))\n", + " lr = self.model.optimizer.learning_rate.numpy()\n", + " self.rates.append(lr)\n", " self.losses.append(batch_loss)\n", - " K.set_value(self.model.optimizer.learning_rate,\n", - " self.model.optimizer.learning_rate * self.factor)" + " self.model.optimizer.learning_rate = lr * self.factor" ] }, { @@ -2804,7 +2793,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 90, "metadata": {}, "outputs": [], "source": [ @@ -2814,11 +2803,11 @@ " iterations = math.ceil(len(X) / batch_size) * epochs\n", " factor = (max_rate / min_rate) ** (1 / iterations)\n", " init_lr = K.get_value(model.optimizer.learning_rate)\n", - " K.set_value(model.optimizer.learning_rate, min_rate)\n", + " model.optimizer.learning_rate = min_rate\n", " exp_lr = ExponentialLearningRate(factor)\n", " history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n", " callbacks=[exp_lr])\n", - " K.set_value(model.optimizer.learning_rate, init_lr)\n", + " model.optimizer.learning_rate = init_lr\n", " model.set_weights(init_weights)\n", " return exp_lr.rates, exp_lr.losses" ] @@ -2832,7 +2821,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 91, "metadata": {}, "outputs": [], "source": [ @@ -2856,7 +2845,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 92, "metadata": {}, "outputs": [], "source": [ @@ -2875,7 +2864,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 93, "metadata": {}, "outputs": [ { @@ -2921,7 +2910,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 94, "metadata": {}, "outputs": [], "source": [ @@ -2950,7 +2939,7 @@ " lr = self._interpolate(2 * self.half_iteration, self.iterations,\n", " self.start_lr, self.last_lr)\n", " self.iteration += 1\n", - " K.set_value(self.model.optimizer.learning_rate, lr)" + " self.model.optimizer.learning_rate = lr" ] }, { @@ -2962,7 +2951,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 95, "metadata": {}, "outputs": [ { @@ -3051,7 +3040,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ @@ -3069,7 +3058,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 97, "metadata": {}, "outputs": [], "source": [ @@ -3078,7 +3067,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 98, "metadata": {}, "outputs": [], "source": [ @@ -3099,7 +3088,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 99, "metadata": {}, "outputs": [ { @@ -3131,7 +3120,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 100, "metadata": {}, "outputs": [], "source": [ @@ -3140,7 +3129,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 101, "metadata": {}, "outputs": [], "source": [ @@ -3159,7 +3148,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 102, "metadata": {}, "outputs": [ { @@ -3207,7 +3196,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 103, "metadata": {}, "outputs": [ { @@ -3223,7 +3212,7 @@ "[0.30816400051116943, 0.8849090933799744]" ] }, - "execution_count": 104, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } @@ -3234,7 +3223,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 104, "metadata": {}, "outputs": [ { @@ -3250,7 +3239,7 @@ "[0.3628920316696167, 0.8700000047683716]" ] }, - "execution_count": 105, + "execution_count": 104, "metadata": {}, "output_type": "execute_result" } @@ -3275,7 +3264,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 105, "metadata": {}, "outputs": [], "source": [ @@ -3284,7 +3273,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 106, "metadata": {}, "outputs": [], "source": [ @@ -3295,7 +3284,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 107, "metadata": {}, "outputs": [ { @@ -3305,7 +3294,7 @@ " 0.844]], dtype=float32)" ] }, - "execution_count": 108, + "execution_count": 107, "metadata": {}, "output_type": "execute_result" } @@ -3316,7 +3305,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 108, "metadata": {}, "outputs": [ { @@ -3326,7 +3315,7 @@ " 0.723], dtype=float32)" ] }, - "execution_count": 109, + "execution_count": 108, "metadata": {}, "output_type": "execute_result" } @@ -3337,7 +3326,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 109, "metadata": {}, "outputs": [ { @@ -3347,7 +3336,7 @@ " 0.183], dtype=float32)" ] }, - "execution_count": 110, + "execution_count": 109, "metadata": {}, "output_type": "execute_result" } @@ -3359,7 +3348,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 110, "metadata": {}, "outputs": [ { @@ -3368,7 +3357,7 @@ "0.8717" ] }, - "execution_count": 111, + "execution_count": 110, "metadata": {}, "output_type": "execute_result" } @@ -3381,7 +3370,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 111, "metadata": {}, "outputs": [], "source": [ @@ -3392,7 +3381,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 112, "metadata": {}, "outputs": [], "source": [ @@ -3407,7 +3396,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 113, "metadata": {}, "outputs": [ { @@ -3452,7 +3441,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 114, "metadata": {}, "outputs": [ { @@ -3462,7 +3451,7 @@ " dtype=float32)" ] }, - "execution_count": 115, + "execution_count": 114, "metadata": {}, "output_type": "execute_result" } @@ -3483,7 +3472,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -3494,7 +3483,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 116, "metadata": {}, "outputs": [ { @@ -3589,7 +3578,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ @@ -3620,7 +3609,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 118, "metadata": {}, "outputs": [], "source": [ @@ -3636,7 +3625,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -3655,7 +3644,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 120, "metadata": {}, "outputs": [], "source": [ @@ -3677,13 +3666,13 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20,\n", " restore_best_weights=True)\n", - "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\"my_cifar10_model\",\n", + "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\"my_cifar10_model.keras\",\n", " save_best_only=True)\n", "run_index = 1 # increment every time you train the model\n", "run_logdir = Path() / \"my_cifar10_logs\" / f\"run_{run_index:03d}\"\n", @@ -3693,7 +3682,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 122, "metadata": {}, "outputs": [ { @@ -3730,7 +3719,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 123, "metadata": {}, "outputs": [ { @@ -3816,7 +3805,7 @@ "" ] }, - "execution_count": 124, + "execution_count": 123, "metadata": {}, "output_type": "execute_result" } @@ -3829,7 +3818,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 124, "metadata": {}, "outputs": [ { @@ -3845,7 +3834,7 @@ "[1.5061508417129517, 0.4675999879837036]" ] }, - "execution_count": 125, + "execution_count": 124, "metadata": {}, "output_type": "execute_result" } @@ -3882,7 +3871,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 125, "metadata": {}, "outputs": [ { @@ -3968,7 +3957,7 @@ "[1.4236289262771606, 0.5073999762535095]" ] }, - "execution_count": 126, + "execution_count": 125, "metadata": {}, "output_type": "execute_result" } @@ -3992,7 +3981,7 @@ "\n", "early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20,\n", " restore_best_weights=True)\n", - "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model\",\n", + "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model.keras\",\n", " save_best_only=True)\n", "run_index = 1 # increment every time you train the model\n", "run_logdir = Path() / \"my_cifar10_logs\" / f\"run_bn_{run_index:03d}\"\n", @@ -4025,7 +4014,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 126, "metadata": { "scrolled": true }, @@ -4119,7 +4108,7 @@ "[1.4607702493667603, 0.5026000142097473]" ] }, - "execution_count": 127, + "execution_count": 126, "metadata": {}, "output_type": "execute_result" } @@ -4144,7 +4133,7 @@ "early_stopping_cb = tf.keras.callbacks.EarlyStopping(\n", " patience=20, restore_best_weights=True)\n", "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\n", - " \"my_cifar10_selu_model\", save_best_only=True)\n", + " \"my_cifar10_selu_model.keras\", save_best_only=True)\n", "run_index = 1 # increment every time you train the model\n", "run_logdir = Path() / \"my_cifar10_logs\" / f\"run_selu_{run_index:03d}\"\n", "tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)\n", @@ -4178,6 +4167,22 @@ "*Exercise: Try regularizing the model with alpha dropout. Then, without retraining your model, see if you can achieve better accuracy using MC Dropout.*" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Warning**: there are now two versions of `AlphaDropout`. One is deprecated and also broken in some recent versions of TF, and unfortunately that's the version in the `tensorflow` library. Luckily, there's a perfectly fine version in the `keras` library (i.e., `keras`, not `tf.keras`). It's neither deprecated nor broken, so let's import and use that one:" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "import keras.layers.AlphaDropout" + ] + }, { "cell_type": "code", "execution_count": 128, @@ -4275,7 +4280,7 @@ " kernel_initializer=\"lecun_normal\",\n", " activation=\"selu\"))\n", "\n", - "model.add(tf.keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", "model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n", "\n", "optimizer = tf.keras.optimizers.Nadam(learning_rate=5e-4)\n", @@ -4286,7 +4291,7 @@ "early_stopping_cb = tf.keras.callbacks.EarlyStopping(\n", " patience=20, restore_best_weights=True)\n", "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\n", - " \"my_cifar10_alpha_dropout_model\", save_best_only=True)\n", + " \"my_cifar10_alpha_dropout_model.keras\", save_best_only=True)\n", "run_index = 1 # increment every time you train the model\n", "run_logdir = Path() / \"my_cifar10_logs\" / f\"run_alpha_dropout_{run_index:03d}\"\n", "tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)\n", @@ -4325,7 +4330,7 @@ "metadata": {}, "outputs": [], "source": [ - "class MCAlphaDropout(tf.keras.layers.AlphaDropout):\n", + "class MCAlphaDropout(keras.layers.AlphaDropout):\n", " def call(self, inputs):\n", " return super().call(inputs, training=True)" ] @@ -4346,7 +4351,7 @@ "mc_model = tf.keras.Sequential([\n", " (\n", " MCAlphaDropout(layer.rate)\n", - " if isinstance(layer, tf.keras.layers.AlphaDropout)\n", + " if isinstance(layer, keras.layers.AlphaDropout)\n", " else layer\n", " )\n", " for layer in model.layers\n", @@ -4438,7 +4443,7 @@ " kernel_initializer=\"lecun_normal\",\n", " activation=\"selu\"))\n", "\n", - "model.add(tf.keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", "model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n", "\n", "optimizer = tf.keras.optimizers.SGD()\n", @@ -4494,7 +4499,7 @@ " kernel_initializer=\"lecun_normal\",\n", " activation=\"selu\"))\n", "\n", - "model.add(tf.keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", "model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n", "\n", "optimizer = tf.keras.optimizers.SGD(learning_rate=2e-2)\n", @@ -4564,7 +4569,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -4578,7 +4583,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.9.10" }, "nav_menu": { "height": "360px",