Update notebook to latest Colab library versions

This commit is contained in:
Aurélien Geron
2024-10-05 16:58:19 +13:00
parent bdad82649e
commit c2826cbd0b

View File

@@ -791,7 +791,7 @@
"source": [ "source": [
"# extra code just show that the model works! 😊\n", "# extra code just show that the model works! 😊\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\",\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\",\n",
" metrics=\"accuracy\")\n", " metrics=[\"accuracy\"])\n",
"model.fit(X_train, y_train, epochs=2, validation_data=(X_valid, y_valid))" "model.fit(X_train, y_train, epochs=2, validation_data=(X_valid, y_valid))"
] ]
}, },
@@ -860,7 +860,7 @@
"source": [ "source": [
"# extra code just show that the model works! 😊\n", "# extra code just show that the model works! 😊\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\",\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\",\n",
" metrics=\"accuracy\")\n", " metrics=[\"accuracy\"])\n",
"model.fit(X_train, y_train, epochs=2, validation_data=(X_valid, y_valid))" "model.fit(X_train, y_train, epochs=2, validation_data=(X_valid, y_valid))"
] ]
}, },
@@ -1031,7 +1031,7 @@
" metrics=[\"accuracy\"])\n", " metrics=[\"accuracy\"])\n",
"history = model_A.fit(X_train_A, y_train_A, epochs=20,\n", "history = model_A.fit(X_train_A, y_train_A, epochs=20,\n",
" validation_data=(X_valid_A, y_valid_A))\n", " validation_data=(X_valid_A, y_valid_A))\n",
"model_A.save(\"my_model_A\")" "model_A.save(\"my_model_A.keras\")"
] ]
}, },
{ {
@@ -1133,7 +1133,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"model_A = tf.keras.models.load_model(\"my_model_A\")\n", "model_A = tf.keras.models.load_model(\"my_model_A.keras\")\n",
"model_B_on_A = tf.keras.Sequential(model_A.layers[:-1])\n", "model_B_on_A = tf.keras.Sequential(model_A.layers[:-1])\n",
"model_B_on_A.add(tf.keras.layers.Dense(1, activation=\"sigmoid\"))" "model_B_on_A.add(tf.keras.layers.Dense(1, activation=\"sigmoid\"))"
] ]
@@ -1910,7 +1910,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"**Note**: The `decay` argument in optimizers is deprecated. The old optimizers which implement the `decay` argument are still available in `tf.keras.optimizers.legacy`, but you should use the schedulers in `tf.keras.optimizers.schedules` instead." "**Note**: Optimizers used to have a `decay` argument for this, but it was deprecated. You must use the schedulers in `tf.keras.optimizers.schedules` instead."
] ]
}, },
{ {
@@ -1919,17 +1919,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# DEPRECATED:\n",
"optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, decay=1e-4)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"# RECOMMENDED:\n",
"lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(\n", "lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(\n",
" initial_learning_rate=0.01,\n", " initial_learning_rate=0.01,\n",
" decay_steps=10_000,\n", " decay_steps=10_000,\n",
@@ -1948,7 +1937,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 66,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -1984,7 +1973,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 68, "execution_count": 67,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2038,7 +2027,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 69, "execution_count": 68,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2053,7 +2042,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 70, "execution_count": 69,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2089,7 +2078,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 71, "execution_count": 70,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2134,7 +2123,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 72, "execution_count": 71,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2144,7 +2133,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 72,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2158,7 +2147,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 73,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2173,7 +2162,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 75, "execution_count": 74,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@@ -2253,7 +2242,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 75,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2270,7 +2259,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 76,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2283,18 +2272,18 @@
"\n", "\n",
" def on_batch_begin(self, batch, logs=None):\n", " def on_batch_begin(self, batch, logs=None):\n",
" # Note: the `batch` argument is reset at each epoch\n", " # Note: the `batch` argument is reset at each epoch\n",
" lr = K.get_value(self.model.optimizer.learning_rate)\n", " lr = self.model.optimizer.learning_rate.numpy()\n",
" new_learning_rate = lr * 0.1 ** (1 / self.n_steps)\n", " new_learning_rate = lr * 0.1 ** (1 / self.n_steps)\n",
" K.set_value(self.model.optimizer.learning_rate, new_learning_rate)\n", " self.model.optimizer.learning_rate = new_learning_rate\n",
"\n", "\n",
" def on_epoch_end(self, epoch, logs=None):\n", " def on_epoch_end(self, epoch, logs=None):\n",
" logs = logs or {}\n", " logs = logs or {}\n",
" logs['lr'] = K.get_value(self.model.optimizer.learning_rate)" " logs['lr'] = self.model.optimizer.learning_rate.numpy()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 77,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2307,7 +2296,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 79, "execution_count": 78,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2387,7 +2376,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 80, "execution_count": 79,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2400,7 +2389,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 81, "execution_count": 80,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2436,7 +2425,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 82, "execution_count": 81,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@@ -2482,7 +2471,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 83, "execution_count": 82,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2497,7 +2486,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 84, "execution_count": 83,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2516,7 +2505,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 85, "execution_count": 84,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2601,7 +2590,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 86, "execution_count": 85,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2635,7 +2624,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 87, "execution_count": 86,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2649,7 +2638,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 88, "execution_count": 87,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2718,7 +2707,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 89, "execution_count": 88,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2737,7 +2726,7 @@
"source": [ "source": [
"# extra code this cell plots performance scheduling\n", "# extra code this cell plots performance scheduling\n",
"\n", "\n",
"plt.plot(history.epoch, history.history[\"lr\"], \"bo-\")\n", "plt.plot(history.epoch, history.history[\"learning_rate\"], \"bo-\")\n",
"plt.xlabel(\"Epoch\")\n", "plt.xlabel(\"Epoch\")\n",
"plt.ylabel(\"Learning Rate\", color='b')\n", "plt.ylabel(\"Learning Rate\", color='b')\n",
"plt.tick_params('y', colors='b')\n", "plt.tick_params('y', colors='b')\n",
@@ -2769,7 +2758,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 90, "execution_count": 89,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2789,10 +2778,10 @@
" new_sum_of_epoch_losses = mean_epoch_loss * (batch + 1)\n", " new_sum_of_epoch_losses = mean_epoch_loss * (batch + 1)\n",
" batch_loss = new_sum_of_epoch_losses - self.sum_of_epoch_losses\n", " batch_loss = new_sum_of_epoch_losses - self.sum_of_epoch_losses\n",
" self.sum_of_epoch_losses = new_sum_of_epoch_losses\n", " self.sum_of_epoch_losses = new_sum_of_epoch_losses\n",
" self.rates.append(K.get_value(self.model.optimizer.learning_rate))\n", " lr = self.model.optimizer.learning_rate.numpy()\n",
" self.rates.append(lr)\n",
" self.losses.append(batch_loss)\n", " self.losses.append(batch_loss)\n",
" K.set_value(self.model.optimizer.learning_rate,\n", " self.model.optimizer.learning_rate = lr * self.factor"
" self.model.optimizer.learning_rate * self.factor)"
] ]
}, },
{ {
@@ -2804,7 +2793,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 91, "execution_count": 90,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2814,11 +2803,11 @@
" iterations = math.ceil(len(X) / batch_size) * epochs\n", " iterations = math.ceil(len(X) / batch_size) * epochs\n",
" factor = (max_rate / min_rate) ** (1 / iterations)\n", " factor = (max_rate / min_rate) ** (1 / iterations)\n",
" init_lr = K.get_value(model.optimizer.learning_rate)\n", " init_lr = K.get_value(model.optimizer.learning_rate)\n",
" K.set_value(model.optimizer.learning_rate, min_rate)\n", " model.optimizer.learning_rate = min_rate\n",
" exp_lr = ExponentialLearningRate(factor)\n", " exp_lr = ExponentialLearningRate(factor)\n",
" history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n", " history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n",
" callbacks=[exp_lr])\n", " callbacks=[exp_lr])\n",
" K.set_value(model.optimizer.learning_rate, init_lr)\n", " model.optimizer.learning_rate = init_lr\n",
" model.set_weights(init_weights)\n", " model.set_weights(init_weights)\n",
" return exp_lr.rates, exp_lr.losses" " return exp_lr.rates, exp_lr.losses"
] ]
@@ -2832,7 +2821,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 92, "execution_count": 91,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2856,7 +2845,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 93, "execution_count": 92,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2875,7 +2864,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 94, "execution_count": 93,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -2921,7 +2910,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 95, "execution_count": 94,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -2950,7 +2939,7 @@
" lr = self._interpolate(2 * self.half_iteration, self.iterations,\n", " lr = self._interpolate(2 * self.half_iteration, self.iterations,\n",
" self.start_lr, self.last_lr)\n", " self.start_lr, self.last_lr)\n",
" self.iteration += 1\n", " self.iteration += 1\n",
" K.set_value(self.model.optimizer.learning_rate, lr)" " self.model.optimizer.learning_rate = lr"
] ]
}, },
{ {
@@ -2962,7 +2951,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 96, "execution_count": 95,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3051,7 +3040,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 97, "execution_count": 96,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3069,7 +3058,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 98, "execution_count": 97,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3078,7 +3067,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 99, "execution_count": 98,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3099,7 +3088,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 100, "execution_count": 99,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3131,7 +3120,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 101, "execution_count": 100,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3140,7 +3129,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 102, "execution_count": 101,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3159,7 +3148,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 103, "execution_count": 102,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3207,7 +3196,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 104, "execution_count": 103,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3223,7 +3212,7 @@
"[0.30816400051116943, 0.8849090933799744]" "[0.30816400051116943, 0.8849090933799744]"
] ]
}, },
"execution_count": 104, "execution_count": 103,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3234,7 +3223,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 105, "execution_count": 104,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3250,7 +3239,7 @@
"[0.3628920316696167, 0.8700000047683716]" "[0.3628920316696167, 0.8700000047683716]"
] ]
}, },
"execution_count": 105, "execution_count": 104,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3275,7 +3264,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 106, "execution_count": 105,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3284,7 +3273,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 107, "execution_count": 106,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3295,7 +3284,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 108, "execution_count": 107,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3305,7 +3294,7 @@
" 0.844]], dtype=float32)" " 0.844]], dtype=float32)"
] ]
}, },
"execution_count": 108, "execution_count": 107,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3316,7 +3305,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 109, "execution_count": 108,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3326,7 +3315,7 @@
" 0.723], dtype=float32)" " 0.723], dtype=float32)"
] ]
}, },
"execution_count": 109, "execution_count": 108,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3337,7 +3326,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 110, "execution_count": 109,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3347,7 +3336,7 @@
" 0.183], dtype=float32)" " 0.183], dtype=float32)"
] ]
}, },
"execution_count": 110, "execution_count": 109,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3359,7 +3348,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 111, "execution_count": 110,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3368,7 +3357,7 @@
"0.8717" "0.8717"
] ]
}, },
"execution_count": 111, "execution_count": 110,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3381,7 +3370,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 112, "execution_count": 111,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3392,7 +3381,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 113, "execution_count": 112,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3407,7 +3396,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 114, "execution_count": 113,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3452,7 +3441,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 115, "execution_count": 114,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3462,7 +3451,7 @@
" dtype=float32)" " dtype=float32)"
] ]
}, },
"execution_count": 115, "execution_count": 114,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3483,7 +3472,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 116, "execution_count": 115,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3494,7 +3483,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 117, "execution_count": 116,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3589,7 +3578,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 118, "execution_count": 117,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3620,7 +3609,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 119, "execution_count": 118,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3636,7 +3625,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 120, "execution_count": 119,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3655,7 +3644,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 121, "execution_count": 120,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -3677,13 +3666,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 122, "execution_count": 121,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20,\n", "early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20,\n",
" restore_best_weights=True)\n", " restore_best_weights=True)\n",
"model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\"my_cifar10_model\",\n", "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\"my_cifar10_model.keras\",\n",
" save_best_only=True)\n", " save_best_only=True)\n",
"run_index = 1 # increment every time you train the model\n", "run_index = 1 # increment every time you train the model\n",
"run_logdir = Path() / \"my_cifar10_logs\" / f\"run_{run_index:03d}\"\n", "run_logdir = Path() / \"my_cifar10_logs\" / f\"run_{run_index:03d}\"\n",
@@ -3693,7 +3682,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 123, "execution_count": 122,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3730,7 +3719,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 124, "execution_count": 123,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3816,7 +3805,7 @@
"<keras.callbacks.History at 0x7fb9f02fc070>" "<keras.callbacks.History at 0x7fb9f02fc070>"
] ]
}, },
"execution_count": 124, "execution_count": 123,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3829,7 +3818,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 125, "execution_count": 124,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3845,7 +3834,7 @@
"[1.5061508417129517, 0.4675999879837036]" "[1.5061508417129517, 0.4675999879837036]"
] ]
}, },
"execution_count": 125, "execution_count": 124,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3882,7 +3871,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 126, "execution_count": 125,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -3968,7 +3957,7 @@
"[1.4236289262771606, 0.5073999762535095]" "[1.4236289262771606, 0.5073999762535095]"
] ]
}, },
"execution_count": 126, "execution_count": 125,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -3992,7 +3981,7 @@
"\n", "\n",
"early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20,\n", "early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20,\n",
" restore_best_weights=True)\n", " restore_best_weights=True)\n",
"model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model\",\n", "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model.keras\",\n",
" save_best_only=True)\n", " save_best_only=True)\n",
"run_index = 1 # increment every time you train the model\n", "run_index = 1 # increment every time you train the model\n",
"run_logdir = Path() / \"my_cifar10_logs\" / f\"run_bn_{run_index:03d}\"\n", "run_logdir = Path() / \"my_cifar10_logs\" / f\"run_bn_{run_index:03d}\"\n",
@@ -4025,7 +4014,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 127, "execution_count": 126,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@@ -4119,7 +4108,7 @@
"[1.4607702493667603, 0.5026000142097473]" "[1.4607702493667603, 0.5026000142097473]"
] ]
}, },
"execution_count": 127, "execution_count": 126,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@@ -4144,7 +4133,7 @@
"early_stopping_cb = tf.keras.callbacks.EarlyStopping(\n", "early_stopping_cb = tf.keras.callbacks.EarlyStopping(\n",
" patience=20, restore_best_weights=True)\n", " patience=20, restore_best_weights=True)\n",
"model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\n", "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\n",
" \"my_cifar10_selu_model\", save_best_only=True)\n", " \"my_cifar10_selu_model.keras\", save_best_only=True)\n",
"run_index = 1 # increment every time you train the model\n", "run_index = 1 # increment every time you train the model\n",
"run_logdir = Path() / \"my_cifar10_logs\" / f\"run_selu_{run_index:03d}\"\n", "run_logdir = Path() / \"my_cifar10_logs\" / f\"run_selu_{run_index:03d}\"\n",
"tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)\n", "tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)\n",
@@ -4178,6 +4167,22 @@
"*Exercise: Try regularizing the model with alpha dropout. Then, without retraining your model, see if you can achieve better accuracy using MC Dropout.*" "*Exercise: Try regularizing the model with alpha dropout. Then, without retraining your model, see if you can achieve better accuracy using MC Dropout.*"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Warning**: there are now two versions of `AlphaDropout`. One is deprecated and also broken in some recent versions of TF, and unfortunately that's the version in the `tensorflow` library. Luckily, there's a perfectly fine version in the `keras` library (i.e., `keras`, not `tf.keras`). It's neither deprecated nor broken, so let's import and use that one:"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
"import keras.layers.AlphaDropout"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 128, "execution_count": 128,
@@ -4275,7 +4280,7 @@
" kernel_initializer=\"lecun_normal\",\n", " kernel_initializer=\"lecun_normal\",\n",
" activation=\"selu\"))\n", " activation=\"selu\"))\n",
"\n", "\n",
"model.add(tf.keras.layers.AlphaDropout(rate=0.1))\n", "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
"model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n", "model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n",
"\n", "\n",
"optimizer = tf.keras.optimizers.Nadam(learning_rate=5e-4)\n", "optimizer = tf.keras.optimizers.Nadam(learning_rate=5e-4)\n",
@@ -4286,7 +4291,7 @@
"early_stopping_cb = tf.keras.callbacks.EarlyStopping(\n", "early_stopping_cb = tf.keras.callbacks.EarlyStopping(\n",
" patience=20, restore_best_weights=True)\n", " patience=20, restore_best_weights=True)\n",
"model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\n", "model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\n",
" \"my_cifar10_alpha_dropout_model\", save_best_only=True)\n", " \"my_cifar10_alpha_dropout_model.keras\", save_best_only=True)\n",
"run_index = 1 # increment every time you train the model\n", "run_index = 1 # increment every time you train the model\n",
"run_logdir = Path() / \"my_cifar10_logs\" / f\"run_alpha_dropout_{run_index:03d}\"\n", "run_logdir = Path() / \"my_cifar10_logs\" / f\"run_alpha_dropout_{run_index:03d}\"\n",
"tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)\n", "tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)\n",
@@ -4325,7 +4330,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"class MCAlphaDropout(tf.keras.layers.AlphaDropout):\n", "class MCAlphaDropout(keras.layers.AlphaDropout):\n",
" def call(self, inputs):\n", " def call(self, inputs):\n",
" return super().call(inputs, training=True)" " return super().call(inputs, training=True)"
] ]
@@ -4346,7 +4351,7 @@
"mc_model = tf.keras.Sequential([\n", "mc_model = tf.keras.Sequential([\n",
" (\n", " (\n",
" MCAlphaDropout(layer.rate)\n", " MCAlphaDropout(layer.rate)\n",
" if isinstance(layer, tf.keras.layers.AlphaDropout)\n", " if isinstance(layer, keras.layers.AlphaDropout)\n",
" else layer\n", " else layer\n",
" )\n", " )\n",
" for layer in model.layers\n", " for layer in model.layers\n",
@@ -4438,7 +4443,7 @@
" kernel_initializer=\"lecun_normal\",\n", " kernel_initializer=\"lecun_normal\",\n",
" activation=\"selu\"))\n", " activation=\"selu\"))\n",
"\n", "\n",
"model.add(tf.keras.layers.AlphaDropout(rate=0.1))\n", "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
"model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n", "model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n",
"\n", "\n",
"optimizer = tf.keras.optimizers.SGD()\n", "optimizer = tf.keras.optimizers.SGD()\n",
@@ -4494,7 +4499,7 @@
" kernel_initializer=\"lecun_normal\",\n", " kernel_initializer=\"lecun_normal\",\n",
" activation=\"selu\"))\n", " activation=\"selu\"))\n",
"\n", "\n",
"model.add(tf.keras.layers.AlphaDropout(rate=0.1))\n", "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
"model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n", "model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n",
"\n", "\n",
"optimizer = tf.keras.optimizers.SGD(learning_rate=2e-2)\n", "optimizer = tf.keras.optimizers.SGD(learning_rate=2e-2)\n",
@@ -4564,7 +4569,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 (ipykernel)", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -4578,7 +4583,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.13" "version": "3.9.10"
}, },
"nav_menu": { "nav_menu": {
"height": "360px", "height": "360px",