diff --git a/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb b/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb
index d594d08..7e51dd0 100644
--- a/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb
+++ b/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb
@@ -46,7 +46,7 @@
},
{
"cell_type": "code",
- "execution_count": 157,
+ "execution_count": 56,
"id": "97d58527",
"metadata": {},
"outputs": [],
@@ -61,18 +61,17 @@
"sns.set()\n",
"import plotly.express as px\n",
"import plotly.graph_objects as gp\n",
+ "import sklearn.metrics as metrics\n",
"import sklearn.preprocessing as preproc\n",
"\n",
"#Statistiques\n",
"from scipy.stats import chi2_contingency\n",
- "from sklearn import metrics\n",
"\n",
"# Machine Learning\n",
"from sklearn.cluster import KMeans\n",
- "import sklearn.metrics as metrics\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.model_selection import KFold, train_test_split\n",
- "from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor"
+ "from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor\n"
]
},
{
@@ -101,7 +100,7 @@
},
{
"cell_type": "code",
- "execution_count": 158,
+ "execution_count": 57,
"id": "c9597b48",
"metadata": {},
"outputs": [],
@@ -120,7 +119,7 @@
},
{
"cell_type": "code",
- "execution_count": 159,
+ "execution_count": 58,
"id": "8051b5f4",
"metadata": {},
"outputs": [],
@@ -164,7 +163,7 @@
},
{
"cell_type": "code",
- "execution_count": 160,
+ "execution_count": 59,
"id": "c427a4b8",
"metadata": {},
"outputs": [
@@ -174,7 +173,7 @@
"(824, 14)"
]
},
- "execution_count": 160,
+ "execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
@@ -201,7 +200,7 @@
},
{
"cell_type": "code",
- "execution_count": 161,
+ "execution_count": 60,
"id": "c8fd3ee1",
"metadata": {},
"outputs": [
@@ -285,7 +284,7 @@
"type": "float"
}
],
- "ref": "e80a8f38-8160-41fb-bbfa-ae1f7b39de11",
+ "ref": "8fcd0abc-8334-4a0d-96b7-b6d7e17b3fb7",
"rows": [
[
"count",
@@ -761,7 +760,7 @@
"max NaN 83421.850000 "
]
},
- "execution_count": 161,
+ "execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
@@ -796,7 +795,7 @@
},
{
"cell_type": "code",
- "execution_count": 162,
+ "execution_count": 61,
"id": "1b156435",
"metadata": {},
"outputs": [
@@ -806,7 +805,7 @@
"(824, 13)"
]
},
- "execution_count": 162,
+ "execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
@@ -818,7 +817,7 @@
},
{
"cell_type": "code",
- "execution_count": 163,
+ "execution_count": 62,
"id": "0ef0fcc0",
"metadata": {},
"outputs": [],
@@ -854,7 +853,7 @@
},
{
"cell_type": "code",
- "execution_count": 164,
+ "execution_count": 63,
"id": "e130aae5",
"metadata": {},
"outputs": [],
@@ -864,7 +863,7 @@
},
{
"cell_type": "code",
- "execution_count": 165,
+ "execution_count": 64,
"id": "c39e2ad0",
"metadata": {},
"outputs": [
@@ -1812,7 +1811,7 @@
},
{
"cell_type": "code",
- "execution_count": 166,
+ "execution_count": 65,
"id": "a16215ab",
"metadata": {},
"outputs": [],
@@ -1822,7 +1821,7 @@
},
{
"cell_type": "code",
- "execution_count": 167,
+ "execution_count": 66,
"id": "532ca6c4",
"metadata": {},
"outputs": [
@@ -2765,7 +2764,7 @@
},
{
"cell_type": "code",
- "execution_count": 168,
+ "execution_count": 67,
"id": "b8530717",
"metadata": {},
"outputs": [],
@@ -2786,7 +2785,7 @@
},
{
"cell_type": "code",
- "execution_count": 169,
+ "execution_count": 68,
"id": "4ff3847d",
"metadata": {},
"outputs": [],
@@ -2815,7 +2814,7 @@
},
{
"cell_type": "code",
- "execution_count": 170,
+ "execution_count": 69,
"id": "6a1c7907",
"metadata": {},
"outputs": [],
@@ -2823,7 +2822,9 @@
"X = data_model_preprocessed = vars_numeriques_scaled.merge(vars_categorielles_enc, left_index=True, right_index=True) # type: ignore\n",
"Y = data_model[\"CM\"]\n",
"\n",
- "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)"
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, Y, test_size=0.2, random_state=42\n",
+ ")"
]
},
{
@@ -2844,14 +2845,14 @@
},
{
"cell_type": "code",
- "execution_count": 171,
+ "execution_count": 70,
"id": "053e013c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "
DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ "DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
],
"text/plain": [
"DecisionTreeRegressor()"
]
},
- "execution_count": 171,
+ "execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
@@ -3292,7 +3293,7 @@
},
{
"cell_type": "code",
- "execution_count": 172,
+ "execution_count": 71,
"id": "c4ca2cf9",
"metadata": {},
"outputs": [
@@ -3321,7 +3322,7 @@
},
{
"cell_type": "code",
- "execution_count": 173,
+ "execution_count": 72,
"id": "4b739d5b",
"metadata": {},
"outputs": [
@@ -3329,9 +3330,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "MAE: 5950.05\n",
- "MSE: 160067768.70\n",
- "RMSE: 12651.79\n"
+ "MAE: 5124.14\n",
+ "MSE: 84535204.52\n",
+ "RMSE: 9194.30\n"
]
}
],
@@ -3355,6 +3356,20 @@
"**Question :** que pensez-vous des performances de ce modèle ?"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "bdd7ccd6",
+ "metadata": {},
+ "source": [
+ "*Réponse*: \n",
+ "\n",
+ "Erreur Absolue Moyenne (MAE)\n",
+ "La MAE représente l'écart absolu moyen entre les prédictions du modèle et les valeurs réelles. Une MAE de 5950.05 signifie qu'en moyenne, notre modèle commet une erreur de cette magnitude, dans l'unité de la variable cible. C'est l'indicateur le plus direct de l'erreur de prédiction moyenne.\n",
+ "\n",
+ "Racine de l'Erreur Quadratique Moyenne (RMSE)\n",
+ "La RMSE est la racine carrée de la moyenne des erreurs au carré ($RMSE = \\sqrt{MSE}$). En raison de l'opération de mise au carré, cette métrique est particulièrement sensible aux grandes erreurs. La valeur obtenue est de 12651.79."
+ ]
+ },
{
"cell_type": "markdown",
"id": "7ecba832",
@@ -3393,11 +3408,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 73,
"id": "ab1e1367",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "X = data_model_preprocessed\n",
+ "Y = data_model[\"CM\"]\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, Y, test_size=0.2, random_state=42\n",
+ ")"
+ ]
},
{
"cell_type": "markdown",
@@ -3417,12 +3439,12 @@
},
{
"cell_type": "code",
- "execution_count": 174,
+ "execution_count": 74,
"id": "b515460e",
"metadata": {},
"outputs": [],
"source": [
- "#Initialisation\n",
+ "# Initialisation\n",
"# Nombre de sous-échantillons pour la cross-validation\n",
"num_splits = 5\n",
"\n",
@@ -3440,20 +3462,56 @@
},
{
"cell_type": "code",
- "execution_count": 175,
+ "execution_count": 75,
"id": "eebb394f",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Validation croisée terminée avec 5 folds\n"
+ ]
+ }
+ ],
"source": [
- "# Entrainement avec cross-validation\n"
+ "# Entrainement avec cross-validation\n",
+ "for train_index, val_index in kf.split(X_train):\n",
+ " X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]\n",
+ " y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]\n",
+ "\n",
+ " rf_regressor.fit(X_train_fold, y_train_fold)\n",
+ " y_pred_fold = rf_regressor.predict(X_val_fold)\n",
+ "\n",
+ " mae = metrics.mean_absolute_error(y_val_fold, y_pred_fold)\n",
+ " mse = metrics.mean_squared_error(y_val_fold, y_pred_fold)\n",
+ " rmse = metrics.root_mean_squared_error(y_val_fold, y_pred_fold)\n",
+ "\n",
+ " MAE_scores.append(mae)\n",
+ " MSE_scores.append(mse)\n",
+ " RMSE_scores.append(rmse)\n",
+ "\n",
+ "print(f\"Validation croisée terminée avec {len(MAE_scores)} folds\")"
]
},
{
"cell_type": "code",
- "execution_count": 176,
+ "execution_count": 76,
"id": "b067126c",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fold 1 MAE: 4472.5486946969695\n",
+ "Fold 2 MAE: 3859.4743234848484\n",
+ "Fold 3 MAE: 3633.0231541666662\n",
+ "Fold 4 MAE: 3888.3910715909087\n",
+ "Fold 5 MAE: 4808.59621832061\n"
+ ]
+ }
+ ],
"source": [
"# Métriques sur tous les folds\n",
"\n",
@@ -3464,10 +3522,22 @@
},
{
"cell_type": "code",
- "execution_count": 177,
+ "execution_count": 77,
"id": "6597152c",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fold 1 MSE: 85464414.44080053\n",
+ "Fold 2 MSE: 34396997.21755034\n",
+ "Fold 3 MSE: 55184512.50786593\n",
+ "Fold 4 MSE: 33191300.80751679\n",
+ "Fold 5 MSE: 68739370.63588645\n"
+ ]
+ }
+ ],
"source": [
"#MSE\n",
"for fold, mse in enumerate(MSE_scores, start=1):\n",
@@ -3476,10 +3546,22 @@
},
{
"cell_type": "code",
- "execution_count": 178,
+ "execution_count": 78,
"id": "63ff1c9d",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fold 1 RMSE: 9244.696557529649\n",
+ "Fold 2 RMSE: 5864.895328780415\n",
+ "Fold 3 RMSE: 7428.62790210049\n",
+ "Fold 4 RMSE: 5761.189183451346\n",
+ "Fold 5 RMSE: 8290.9209763383\n"
+ ]
+ }
+ ],
"source": [
"#RMSE\n",
"for fold, rmse in enumerate(RMSE_scores, start=1):\n",
@@ -3536,7 +3618,7 @@
},
{
"cell_type": "code",
- "execution_count": 179,
+ "execution_count": 79,
"id": "6d58dbc2",
"metadata": {},
"outputs": [],
@@ -3569,7 +3651,7 @@
},
{
"cell_type": "code",
- "execution_count": 180,
+ "execution_count": 80,
"id": "47da5172",
"metadata": {},
"outputs": [],
@@ -3579,7 +3661,7 @@
},
{
"cell_type": "code",
- "execution_count": 181,
+ "execution_count": 81,
"id": "d4936c46",
"metadata": {},
"outputs": [
@@ -3600,7 +3682,7 @@
},
{
"cell_type": "code",
- "execution_count": 182,
+ "execution_count": 82,
"id": "3215c463",
"metadata": {},
"outputs": [],
@@ -3614,7 +3696,7 @@
},
{
"cell_type": "code",
- "execution_count": 183,
+ "execution_count": 83,
"id": "bb9a5c9b",
"metadata": {},
"outputs": [],
@@ -3626,7 +3708,7 @@
},
{
"cell_type": "code",
- "execution_count": 184,
+ "execution_count": 84,
"id": "0f0768ad",
"metadata": {},
"outputs": [],