mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-02-02 13:31:32 +01:00
Refactor code for improved readability and consistency across notebooks
- Standardized spacing around operators and function arguments in TP7_Kmeans.ipynb and neural_network.ipynb. - Enhanced the formatting of model building and training code in neural_network.ipynb for better clarity. - Updated the pyproject.toml to remove a specific TensorFlow version and added linting configuration for Ruff. - Improved comments and organization in the code to facilitate easier understanding and maintenance.
This commit is contained in:
@@ -43,7 +43,7 @@
|
||||
"source": [
|
||||
"import warnings\n",
|
||||
"\n",
|
||||
"warnings.filterwarnings('ignore')"
|
||||
"warnings.filterwarnings(\"ignore\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -434,7 +434,7 @@
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd # dataframes are in pandas \n",
|
||||
"import pandas as pd # dataframes are in pandas\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"hitters = pd.read_csv(\"data/Hitters.csv\", index_col=\"Name\")\n",
|
||||
@@ -895,9 +895,13 @@
|
||||
],
|
||||
"source": [
|
||||
"# Hint for Question (4) :\n",
|
||||
"ex = pd.DataFrame(dict(nom=['Alice', 'Nicolas', 'Jean'],\n",
|
||||
" age=[19, np.NaN, np.NaN],\n",
|
||||
" exam=[15, 14, np.NaN]))\n",
|
||||
"ex = pd.DataFrame(\n",
|
||||
" dict(\n",
|
||||
" nom=[\"Alice\", \"Nicolas\", \"Jean\"],\n",
|
||||
" age=[19, np.NaN, np.NaN],\n",
|
||||
" exam=[15, 14, np.NaN],\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"data : \\n\", ex)\n",
|
||||
"print(\"First result : \\n\", ex.isnull())\n",
|
||||
@@ -1080,10 +1084,10 @@
|
||||
],
|
||||
"source": [
|
||||
"# We remove the players for whom Salary is missing\n",
|
||||
"hitters.dropna(subset=['Salary'], inplace=True)\n",
|
||||
"hitters.dropna(subset=[\"Salary\"], inplace=True)\n",
|
||||
"\n",
|
||||
"X = hitters.select_dtypes(include=int)\n",
|
||||
"Y = hitters['Salary']\n",
|
||||
"Y = hitters[\"Salary\"]\n",
|
||||
"\n",
|
||||
"# check-point\n",
|
||||
"print(Y.isnull().sum()) # should be 0\n",
|
||||
@@ -1109,7 +1113,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Answer for Exercise 4\n",
|
||||
"# Answer for Exercise 4\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, random_state=42)"
|
||||
@@ -1697,8 +1701,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#the values of alphas chosen by defaults are also on a logarithmic scale\n",
|
||||
"plt.plot(np.log10(alphas_lasso), '.')"
|
||||
"# the values of alphas chosen by defaults are also on a logarithmic scale\n",
|
||||
"plt.plot(np.log10(alphas_lasso), \".\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1735,8 +1739,8 @@
|
||||
"source": [
|
||||
"fig, ax = plt.subplots(figsize=(8, 6))\n",
|
||||
"ax.plot(np.log10(alphas_lasso), coefs_lasso)\n",
|
||||
"ax.set_xlabel('log10(alpha)')\n",
|
||||
"ax.set_ylabel('Lasso coefficients')"
|
||||
"ax.set_xlabel(\"log10(alpha)\")\n",
|
||||
"ax.set_ylabel(\"Lasso coefficients\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1792,7 +1796,7 @@
|
||||
"print(\"1.\\n\", ind)\n",
|
||||
"print(\"2.\\n\", ind == 0)\n",
|
||||
"print(\"3. Le nombre de 0 de chaque colonne est :\\n \", (ind == 0).sum(axis=0))\n",
|
||||
"print(\"4. Le nombre de 0 de chaque ligne est : \\n\", (ind == 0).sum(axis=1))\n"
|
||||
"print(\"4. Le nombre de 0 de chaque ligne est : \\n\", (ind == 0).sum(axis=1))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2294,18 +2298,19 @@
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"\n",
|
||||
"linReg = LinearRegression()\n",
|
||||
"linReg.fit(Xtrain,\n",
|
||||
" Ytrain) # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
|
||||
"linReg.fit(\n",
|
||||
" Xtrain, Ytrain\n",
|
||||
") # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
|
||||
"# the predictions should not be different with or without standardization (could differ only owing to numerical problems)\n",
|
||||
"hatY_LinReg = linReg.predict(Xtest)\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.scatter(Ytest, hatY_LinReg, s=5)\n",
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
|
||||
"ax.set_xlabel('Ytest')\n",
|
||||
"ax.set_ylabel('hatY')\n",
|
||||
"ax.set_title('Predicted vs true salaries for OLS estimator')\n",
|
||||
"ax.axis('square')"
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
|
||||
"ax.set_xlabel(\"Ytest\")\n",
|
||||
"ax.set_ylabel(\"hatY\")\n",
|
||||
"ax.set_title(\"Predicted vs true salaries for OLS estimator\")\n",
|
||||
"ax.axis(\"square\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2355,11 +2360,11 @@
|
||||
"\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.scatter(Ytest, hatY_ridge, s=5)\n",
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
|
||||
"ax.set_xlabel('Ytest')\n",
|
||||
"ax.set_ylabel('hatY')\n",
|
||||
"ax.set_title('Predicted vs true salaries for Ridge estimator')\n",
|
||||
"ax.axis('square')"
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
|
||||
"ax.set_xlabel(\"Ytest\")\n",
|
||||
"ax.set_ylabel(\"hatY\")\n",
|
||||
"ax.set_title(\"Predicted vs true salaries for Ridge estimator\")\n",
|
||||
"ax.axis(\"square\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2408,11 +2413,11 @@
|
||||
"\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.scatter(Ytest, hatY_lasso, s=5)\n",
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
|
||||
"ax.set_xlabel('Ytest')\n",
|
||||
"ax.set_ylabel('hatY')\n",
|
||||
"ax.set_title('Predicted vs true salaries for Ridge estimator')\n",
|
||||
"ax.axis('square')"
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
|
||||
"ax.set_xlabel(\"Ytest\")\n",
|
||||
"ax.set_ylabel(\"hatY\")\n",
|
||||
"ax.set_title(\"Predicted vs true salaries for Ridge estimator\")\n",
|
||||
"ax.axis(\"square\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2445,7 +2450,7 @@
|
||||
"source": [
|
||||
"from sklearn.linear_model import LassoLarsIC\n",
|
||||
"\n",
|
||||
"lassoBIC = LassoLarsIC(criterion='bic')\n",
|
||||
"lassoBIC = LassoLarsIC(criterion=\"bic\")\n",
|
||||
"lassoBIC.fit(XtrainScaled, Ytrain)\n",
|
||||
"print(\"best alpha chosen by BIC criterion :\", lassoBIC.alpha_)\n",
|
||||
"print(\"best alpha chosen by CV :\", lassoCV.alpha_)\n",
|
||||
@@ -2499,11 +2504,11 @@
|
||||
"\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.scatter(Ytest, hatY_BIC, s=5)\n",
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
|
||||
"ax.set_xlabel('Ytest')\n",
|
||||
"ax.set_ylabel('hatY')\n",
|
||||
"ax.set_title('Predicted vs true salaries for LassoBIC estimator')\n",
|
||||
"ax.axis('square')"
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
|
||||
"ax.set_xlabel(\"Ytest\")\n",
|
||||
"ax.set_ylabel(\"hatY\")\n",
|
||||
"ax.set_title(\"Predicted vs true salaries for LassoBIC estimator\")\n",
|
||||
"ax.axis(\"square\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2539,7 +2544,9 @@
|
||||
"from sklearn.metrics import mean_squared_error\n",
|
||||
"\n",
|
||||
"MSEs = []\n",
|
||||
"for name, estimator in zip([\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg]):\n",
|
||||
"for name, estimator in zip(\n",
|
||||
" [\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg]\n",
|
||||
"):\n",
|
||||
" y_pred = estimator.predict(Xtest)\n",
|
||||
" MSE = mean_squared_error(Ytest, y_pred)\n",
|
||||
" print(f\"MSE for {name} : {MSE}\")\n",
|
||||
@@ -2584,10 +2591,12 @@
|
||||
"ols_errors = np.abs(Ytest - linReg.predict(Xtest))\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(10, 6))\n",
|
||||
"ax.boxplot([ridge_cv_errors, lasso_cv_errors, lasso_bic_errors, ols_errors],\n",
|
||||
" labels=['RidgeCV', 'LassoCV', 'LassoBIC', 'OLS'])\n",
|
||||
"ax.set_title('Boxplot of Absolute Errors')\n",
|
||||
"ax.set_ylabel('Absolute Error')\n",
|
||||
"ax.boxplot(\n",
|
||||
" [ridge_cv_errors, lasso_cv_errors, lasso_bic_errors, ols_errors],\n",
|
||||
" labels=[\"RidgeCV\", \"LassoCV\", \"LassoBIC\", \"OLS\"],\n",
|
||||
")\n",
|
||||
"ax.set_title(\"Boxplot of Absolute Errors\")\n",
|
||||
"ax.set_ylabel(\"Absolute Error\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user