Refactor code formatting and improve readability in Jupyter notebooks for TP_4 and TP_5

- Adjusted indentation and line breaks for better clarity in function definitions and import statements. - Standardized string quotes for consistency across the codebase. - Enhanced readability of DataFrame creation and manipulation by breaking long lines into multiple lines. - Cleaned up print statements and comments for improved understanding. - Ensured consistent use of whitespace around operators and after commas.
2026-01-31 16:29:35 +01:00 · 2025-11-25 10:46:16 +01:00
parent 751412c1cd
commit e57995ba85
17 changed files with 11975 additions and 11713 deletions
--- a/Learning/TP_4/2025_M2_ISF_TP_4.ipynb
+++ b/Learning/TP_4/2025_M2_ISF_TP_4.ipynb
@@ -69,11 +69,11 @@
    "from sklearn import metrics\n",
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "from sklearn.model_selection import (\n",
-    "  GridSearchCV,\n",
-    "  StratifiedKFold,\n",
-    "  cross_val_score,\n",
-    "  train_test_split,\n",
-    ")\n"
+    "    GridSearchCV,\n",
+    "    StratifiedKFold,\n",
+    "    cross_val_score,\n",
+    "    train_test_split,\n",
+    ")"
   ]
  },
  {
@@ -91,12 +91,18 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def cramers_V(var1,var2) :\n",
-    "  crosstab = np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building\n",
-    "  stat = chi2_contingency(crosstab)[0] # Keeping of the test statistic of the Chi2 test\n",
-    "  obs = np.sum(crosstab) # Number of observations\n",
-    "  mini = min(crosstab.shape)-1 # Take the minimum value between the colmns and the rows of the cross table\n",
-    "  return (stat/(obs*mini))"
+    "def cramers_V(var1, var2):\n",
+    "    crosstab = np.array(\n",
+    "        pd.crosstab(var1, var2, rownames=None, colnames=None)\n",
+    "    )  # Cross table building\n",
+    "    stat = chi2_contingency(crosstab)[\n",
+    "        0\n",
+    "    ]  # Keeping of the test statistic of the Chi2 test\n",
+    "    obs = np.sum(crosstab)  # Number of observations\n",
+    "    mini = (\n",
+    "        min(crosstab.shape) - 1\n",
+    "    )  # Take the minimum value between the colmns and the rows of the cross table\n",
+    "    return stat / (obs * mini)"
   ]
  },
  {
@@ -133,7 +139,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "path = input_path + '/base_retraitee.csv'\n",
+    "path = input_path + \"/base_retraitee.csv\"\n",
    "data_retraitee = pd.read_csv(path, sep=\",\", decimal=\".\")"
   ]
  },
@@ -16225,7 +16231,7 @@
    "            if len(data_model[col].unique()) == 2:\n",
    "                variables_categorielles.append(data_model[col])\n",
    "            else:\n",
-    "                variables_categorielles.append(data_model[col])\n"
+    "                variables_categorielles.append(data_model[col])"
   ]
  },
  {
@@ -16653,7 +16659,7 @@
    "        if v_cramer_resultats.iloc[i, j] > 0.7:\n",
    "            print(\n",
    "                f\"{v_cramer_resultats.index.to_numpy()[i]} et {v_cramer_resultats.colmns[j]} sont trop dépendantes, V-CRAMER = {v_cramer_resultats.iloc[i, j]}\"\n",
-    "            )\n"
+    "            )"
   ]
  },
  {
@@ -16851,7 +16857,7 @@
    "        if abs(correlations_num.iloc[i, j]) > 0.7:\n",
    "            print(\n",
    "                f\"{correlations_num.index.to_numpy()[i]} et {correlations_num.columns[j]} sont trop dépendantes, corr = {correlations_num.iloc[i, j]}\"\n",
-    "            )\n"
+    "            )"
   ]
  },
  {
@@ -17820,7 +17826,7 @@
    "    cv=StratifiedKFold(\n",
    "        n_splits=num_folds, shuffle=True, random_state=42\n",
    "    ),  # Validation croisée avec 5 folds\n",
-    "    scoring='recall',  # Métrique d'évaluation (moins c'est mieux)\n",
+    "    scoring=\"recall\",  # Métrique d'évaluation (moins c'est mieux)\n",
    "    n_jobs=-1,  # Utiliser tous les cœurs du processeur\n",
    ")\n",
    "\n",
@@ -17877,14 +17883,18 @@
   ],
   "source": [
    "# Recall de chaque fold\n",
-    "recall_scores = cross_val_score(best_gbc, X_train, y_train, cv=num_folds, scoring='recall')\n",
+    "recall_scores = cross_val_score(\n",
+    "    best_gbc, X_train, y_train, cv=num_folds, scoring=\"recall\"\n",
+    ")\n",
    "\n",
    "# Afficher les scores pour chaque fold\n",
    "for i, score in enumerate(recall_scores):\n",
    "    print(f\"Recall pour le fold {i + 1}: {score}\")\n",
    "\n",
    "# Accuracy de chaque fold\n",
-    "accuracy_scores = cross_val_score(best_gbc, X_train, y_train, cv=num_folds, scoring='accuracy')\n",
+    "accuracy_scores = cross_val_score(\n",
+    "    best_gbc, X_train, y_train, cv=num_folds, scoring=\"accuracy\"\n",
+    ")\n",
    "\n",
    "# Afficher les scores pour chaque fold\n",
    "print(\"\\n\")\n",
@@ -17892,12 +17902,14 @@
    "    print(f\"Accuracy pour le fold {i + 1}: {score}\")\n",
    "\n",
    "# Precision de chaque fold\n",
-    "precision_scores = cross_val_score(best_gbc, X_train, y_train, cv=num_folds, scoring='precision')\n",
+    "precision_scores = cross_val_score(\n",
+    "    best_gbc, X_train, y_train, cv=num_folds, scoring=\"precision\"\n",
+    ")\n",
    "\n",
    "# Afficher les scores pour chaque fold\n",
    "print(\"\\n\")\n",
    "for i, score in enumerate(precision_scores):\n",
-    "    print(f\"Precision pour le fold {i + 1}: {score}\")\n"
+    "    print(f\"Precision pour le fold {i + 1}: {score}\")"
   ]
  },
  {
@@ -30178,7 +30190,7 @@
    "# Observation de la distribution sur Y_train\n",
    "df = pd.DataFrame(y_train, columns=[\"SINISTRE\"])\n",
    "fig = px.histogram(df, x=\"SINISTRE\", title=\"Distribution de la variable Y_train\")\n",
-    "fig.show()\n"
+    "fig.show()"
   ]
  },
  {
@@ -52502,7 +52514,7 @@
    "fig = px.histogram(\n",
    "    df, x=\"SINISTRE\", title=\"Distribution de la variable Y_train_resampled\"\n",
    ")\n",
-    "fig.show()\n"
+    "fig.show()"
   ]
  },
  {
@@ -52530,7 +52542,7 @@
    "num_folds = 5\n",
    "\n",
    "# Initialisation du modèle GradientBoostingClassifier\n",
-    "gb = GradientBoostingClassifier(random_state=42)\n"
+    "gb = GradientBoostingClassifier(random_state=42)"
   ]
  },
  {
@@ -52567,7 +52579,7 @@
    "print(\"Meilleurs hyperparamètres : \", best_params)\n",
    "\n",
    "# Initialiser un modèle avec les meilleurs hyperparamètres\n",
-    "best_gbc = GradientBoostingClassifier(random_state=42, **best_params)\n"
+    "best_gbc = GradientBoostingClassifier(random_state=42, **best_params)"
   ]
  },
  {