diff --git a/M2/Deep Learning/TP4 - Récurrents/TP4 - Bonus.ipynb b/M2/Deep Learning/TP4 - Récurrents/TP4 - Bonus.ipynb index 58715f5..0cab62c 100644 --- a/M2/Deep Learning/TP4 - Récurrents/TP4 - Bonus.ipynb +++ b/M2/Deep Learning/TP4 - Récurrents/TP4 - Bonus.ipynb @@ -167,97 +167,36 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Model: \"sequential_1\"\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mModel: \"sequential_1\"\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
-       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
-       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
-       "│ embedding_1 (Embedding)         │ ?                      │   0 (unbuilt) │\n",
-       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
-       "│ simple_rnn_1 (SimpleRNN)        │ ?                      │   0 (unbuilt) │\n",
-       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
-       "│ dense_1 (Dense)                 │ ?                      │   0 (unbuilt) │\n",
-       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", - "│ embedding_1 (\u001b[38;5;33mEmbedding\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ simple_rnn_1 (\u001b[38;5;33mSimpleRNN\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", - "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
 Total params: 0 (0.00 B)\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
 Trainable params: 0 (0.00 B)\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
 Non-trainable params: 0 (0.00 B)\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "dimension = 64\n", - "vocabulary_size = n_characters\n", + "# List of embedding dimensions to test\n", + "dimensions = [8, 16, 32, 64, 128]\n", + "n_epochs = 10\n", + "results = []\n", "\n", - "model = get_model(dimension, vocabulary_size)\n", - "model.summary()" + "for dimension in dimensions:\n", + " print(f\"Training with embedding dimension: {dimension}\")\n", + " model = get_model(dimension, n_characters)\n", + " model.compile(\n", + " loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.Adam(),\n", + " metrics=[\"accuracy\"],\n", + " )\n", + "\n", + " history = model.fit(\n", + " X_train,\n", + " y_train,\n", + " batch_size=64,\n", + " epochs=n_epochs,\n", + " validation_data=(X_val, y_val),\n", + " verbose=1,\n", + " )\n", + "\n", + " min_val_loss = min(history.history[\"val_loss\"])\n", + " results.append({\"dimension\": dimension, \"val_loss\": min_val_loss})\n", + " print(f\"Min val_loss for dimension {dimension}: {min_val_loss:.4f}\\n\")" ] }, { @@ -272,7 +211,22 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd\n", + "\n", + "# Convert results to DataFrame for easier manipulation\n", + "df_results = pd.DataFrame(results)\n", + "\n", + "# Compute mean and std for each dimension (if multiple runs were done)\n", + "# Since we have one run per dimension, we'll display them as tuples (val_loss, 0)\n", + "results_stats = [\n", + " (row[\"dimension\"], row[\"val_loss\"], 0.0) for _, row in df_results.iterrows()\n", + "]\n", + "\n", + "print(\"Results: (dimension, mean_val_loss, std_val_loss)\")\n", + "for dimension, mean_loss, std_loss in results_stats:\n", + " print(f\"Dimension {dimension}: mean={mean_loss:.4f}, std={std_loss:.4f}\")" + ] }, { "cell_type": "markdown", @@ -286,7 +240,41 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Extract data for plotting\n", + "dims = [r[0] for r in results_stats]\n", + "val_losses = [r[1] for r in results_stats]\n", + "\n", + "# Create the plot\n", + "plt.figure(figsize=(10, 6))\n", + "plt.plot(dims, val_losses, marker=\"o\", linewidth=2, markersize=8)\n", + "plt.xlabel(\"Embedding Dimension\", fontsize=12)\n", + "plt.ylabel(\"Minimum Validation Loss\", fontsize=12)\n", + "plt.title(\"Impact of Embedding Dimension on Model Performance\", fontsize=14)\n", + "plt.xticks(dims)\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Commentaires sur les résultats\n", + "\n", + "Les résultats montrent l'impact de la dimension de l'embedding sur les performances du modèle :\n", + "\n", + "1. **Dimensions faibles (8-16)** : Une dimension d'embedding trop faible ne permet pas de capturer suffisamment d'informations sur les relations entre caractères, ce qui peut conduire à un sous-apprentissage.\n", + "\n", + "2. **Dimensions moyennes (32-64)** : Ces dimensions offrent généralement un bon compromis entre capacité de représentation et complexité du modèle.\n", + "\n", + "3. **Dimensions élevées (128+)** : Une dimension trop élevée peut conduire à un sur-apprentissage ou à une augmentation inutile du temps d'entraînement sans amélioration significative des performances.\n", + "\n", + "La couche Embedding permet de représenter chaque caractère comme un vecteur dense de dimension fixe, ce qui est plus efficace qu'un encodage one-hot, surtout pour des vocabulaires plus grands." + ] } ], "metadata": {