Refactor code for improved readability and consistency across notebooks

- Standardized spacing around operators and function arguments in TP7_Kmeans.ipynb and neural_network.ipynb. - Enhanced the formatting of model building and training code in neural_network.ipynb for better clarity. - Updated the pyproject.toml to remove a specific TensorFlow version and added linting configuration for Ruff. - Improved comments and organization in the code to facilitate easier understanding and maintenance.
2026-02-02 13:31:32 +01:00 · 2025-07-01 20:46:08 +02:00
parent e273cf90f7
commit f94ff07cab
34 changed files with 5713 additions and 5047 deletions
--- a/Learning/TP4_Ridge_Lasso_and_CV.ipynb
+++ b/Learning/TP4_Ridge_Lasso_and_CV.ipynb
@@ -43,7 +43,7 @@
   "source": [
    "import warnings\n",
    "\n",
-    "warnings.filterwarnings('ignore')"
+    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
@@ -434,7 +434,7 @@
   ],
   "source": [
    "import numpy as np\n",
-    "import pandas as pd  # dataframes are in pandas \n",
+    "import pandas as pd  # dataframes are in pandas\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "hitters = pd.read_csv(\"data/Hitters.csv\", index_col=\"Name\")\n",
@@ -895,9 +895,13 @@
   ],
   "source": [
    "# Hint for Question (4) :\n",
-    "ex = pd.DataFrame(dict(nom=['Alice', 'Nicolas', 'Jean'],\n",
-    "                       age=[19, np.NaN, np.NaN],\n",
-    "                       exam=[15, 14, np.NaN]))\n",
+    "ex = pd.DataFrame(\n",
+    "    dict(\n",
+    "        nom=[\"Alice\", \"Nicolas\", \"Jean\"],\n",
+    "        age=[19, np.NaN, np.NaN],\n",
+    "        exam=[15, 14, np.NaN],\n",
+    "    )\n",
+    ")\n",
    "\n",
    "print(\"data : \\n\", ex)\n",
    "print(\"First result : \\n\", ex.isnull())\n",
@@ -1080,10 +1084,10 @@
   ],
   "source": [
    "# We remove the players for whom Salary is missing\n",
-    "hitters.dropna(subset=['Salary'], inplace=True)\n",
+    "hitters.dropna(subset=[\"Salary\"], inplace=True)\n",
    "\n",
    "X = hitters.select_dtypes(include=int)\n",
-    "Y = hitters['Salary']\n",
+    "Y = hitters[\"Salary\"]\n",
    "\n",
    "# check-point\n",
    "print(Y.isnull().sum())  # should be 0\n",
@@ -1109,7 +1113,7 @@
   },
   "outputs": [],
   "source": [
-    "#Answer for Exercise 4\n",
+    "# Answer for Exercise 4\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, random_state=42)"
@@ -1697,8 +1701,8 @@
    }
   ],
   "source": [
-    "#the values of alphas chosen by defaults are also on a logarithmic scale\n",
-    "plt.plot(np.log10(alphas_lasso), '.')"
+    "# the values of alphas chosen by defaults are also on a logarithmic scale\n",
+    "plt.plot(np.log10(alphas_lasso), \".\")"
   ]
  },
  {
@@ -1735,8 +1739,8 @@
   "source": [
    "fig, ax = plt.subplots(figsize=(8, 6))\n",
    "ax.plot(np.log10(alphas_lasso), coefs_lasso)\n",
-    "ax.set_xlabel('log10(alpha)')\n",
-    "ax.set_ylabel('Lasso coefficients')"
+    "ax.set_xlabel(\"log10(alpha)\")\n",
+    "ax.set_ylabel(\"Lasso coefficients\")"
   ]
  },
  {
@@ -1792,7 +1796,7 @@
    "print(\"1.\\n\", ind)\n",
    "print(\"2.\\n\", ind == 0)\n",
    "print(\"3. Le nombre de 0 de chaque colonne est :\\n \", (ind == 0).sum(axis=0))\n",
-    "print(\"4. Le nombre de 0 de chaque ligne est : \\n\", (ind == 0).sum(axis=1))\n"
+    "print(\"4. Le nombre de 0 de chaque ligne est : \\n\", (ind == 0).sum(axis=1))"
   ]
  },
  {
@@ -2294,18 +2298,19 @@
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "linReg = LinearRegression()\n",
-    "linReg.fit(Xtrain,\n",
-    "           Ytrain)  # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
+    "linReg.fit(\n",
+    "    Xtrain, Ytrain\n",
+    ")  # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
    "# the predictions should not be different with or without standardization (could differ only owing to numerical problems)\n",
    "hatY_LinReg = linReg.predict(Xtest)\n",
    "\n",
    "fig, ax = plt.subplots()\n",
    "ax.scatter(Ytest, hatY_LinReg, s=5)\n",
-    "ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
-    "ax.set_xlabel('Ytest')\n",
-    "ax.set_ylabel('hatY')\n",
-    "ax.set_title('Predicted vs true salaries for OLS estimator')\n",
-    "ax.axis('square')"
+    "ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
+    "ax.set_xlabel(\"Ytest\")\n",
+    "ax.set_ylabel(\"hatY\")\n",
+    "ax.set_title(\"Predicted vs true salaries for OLS estimator\")\n",
+    "ax.axis(\"square\")"
   ]
  },
  {
@@ -2355,11 +2360,11 @@
    "\n",
    "fig, ax = plt.subplots()\n",
    "ax.scatter(Ytest, hatY_ridge, s=5)\n",
-    "ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
-    "ax.set_xlabel('Ytest')\n",
-    "ax.set_ylabel('hatY')\n",
-    "ax.set_title('Predicted vs true salaries for Ridge estimator')\n",
-    "ax.axis('square')"
+    "ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
+    "ax.set_xlabel(\"Ytest\")\n",
+    "ax.set_ylabel(\"hatY\")\n",
+    "ax.set_title(\"Predicted vs true salaries for Ridge estimator\")\n",
+    "ax.axis(\"square\")"
   ]
  },
  {
@@ -2408,11 +2413,11 @@
    "\n",
    "fig, ax = plt.subplots()\n",
    "ax.scatter(Ytest, hatY_lasso, s=5)\n",
-    "ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
-    "ax.set_xlabel('Ytest')\n",
-    "ax.set_ylabel('hatY')\n",
-    "ax.set_title('Predicted vs true salaries for Ridge estimator')\n",
-    "ax.axis('square')"
+    "ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
+    "ax.set_xlabel(\"Ytest\")\n",
+    "ax.set_ylabel(\"hatY\")\n",
+    "ax.set_title(\"Predicted vs true salaries for Ridge estimator\")\n",
+    "ax.axis(\"square\")"
   ]
  },
  {
@@ -2445,7 +2450,7 @@
   "source": [
    "from sklearn.linear_model import LassoLarsIC\n",
    "\n",
-    "lassoBIC = LassoLarsIC(criterion='bic')\n",
+    "lassoBIC = LassoLarsIC(criterion=\"bic\")\n",
    "lassoBIC.fit(XtrainScaled, Ytrain)\n",
    "print(\"best alpha chosen by BIC criterion :\", lassoBIC.alpha_)\n",
    "print(\"best alpha chosen by CV :\", lassoCV.alpha_)\n",
@@ -2499,11 +2504,11 @@
    "\n",
    "fig, ax = plt.subplots()\n",
    "ax.scatter(Ytest, hatY_BIC, s=5)\n",
-    "ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
-    "ax.set_xlabel('Ytest')\n",
-    "ax.set_ylabel('hatY')\n",
-    "ax.set_title('Predicted vs true salaries for LassoBIC estimator')\n",
-    "ax.axis('square')"
+    "ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
+    "ax.set_xlabel(\"Ytest\")\n",
+    "ax.set_ylabel(\"hatY\")\n",
+    "ax.set_title(\"Predicted vs true salaries for LassoBIC estimator\")\n",
+    "ax.axis(\"square\")"
   ]
  },
  {
@@ -2539,7 +2544,9 @@
    "from sklearn.metrics import mean_squared_error\n",
    "\n",
    "MSEs = []\n",
-    "for name, estimator in zip([\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg]):\n",
+    "for name, estimator in zip(\n",
+    "    [\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg]\n",
+    "):\n",
    "    y_pred = estimator.predict(Xtest)\n",
    "    MSE = mean_squared_error(Ytest, y_pred)\n",
    "    print(f\"MSE for {name} : {MSE}\")\n",
@@ -2584,10 +2591,12 @@
    "ols_errors = np.abs(Ytest - linReg.predict(Xtest))\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(10, 6))\n",
-    "ax.boxplot([ridge_cv_errors, lasso_cv_errors, lasso_bic_errors, ols_errors],\n",
-    "           labels=['RidgeCV', 'LassoCV', 'LassoBIC', 'OLS'])\n",
-    "ax.set_title('Boxplot of Absolute Errors')\n",
-    "ax.set_ylabel('Absolute Error')\n",
+    "ax.boxplot(\n",
+    "    [ridge_cv_errors, lasso_cv_errors, lasso_bic_errors, ols_errors],\n",
+    "    labels=[\"RidgeCV\", \"LassoCV\", \"LassoBIC\", \"OLS\"],\n",
+    ")\n",
+    "ax.set_title(\"Boxplot of Absolute Errors\")\n",
+    "ax.set_ylabel(\"Absolute Error\")\n",
    "plt.show()"
   ]
  },