mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-23 01:59:32 +01:00
Refactor code for improved readability and consistency across notebooks
- Standardized spacing around operators and function arguments in TP7_Kmeans.ipynb and neural_network.ipynb. - Enhanced the formatting of model building and training code in neural_network.ipynb for better clarity. - Updated the pyproject.toml to remove a specific TensorFlow version and added linting configuration for Ruff. - Improved comments and organization in the code to facilitate easier understanding and maintenance.
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -43,7 +43,7 @@
|
||||
"source": [
|
||||
"import warnings\n",
|
||||
"\n",
|
||||
"warnings.filterwarnings('ignore')"
|
||||
"warnings.filterwarnings(\"ignore\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -434,7 +434,7 @@
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd # dataframes are in pandas \n",
|
||||
"import pandas as pd # dataframes are in pandas\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"hitters = pd.read_csv(\"data/Hitters.csv\", index_col=\"Name\")\n",
|
||||
@@ -895,9 +895,13 @@
|
||||
],
|
||||
"source": [
|
||||
"# Hint for Question (4) :\n",
|
||||
"ex = pd.DataFrame(dict(nom=['Alice', 'Nicolas', 'Jean'],\n",
|
||||
" age=[19, np.NaN, np.NaN],\n",
|
||||
" exam=[15, 14, np.NaN]))\n",
|
||||
"ex = pd.DataFrame(\n",
|
||||
" dict(\n",
|
||||
" nom=[\"Alice\", \"Nicolas\", \"Jean\"],\n",
|
||||
" age=[19, np.NaN, np.NaN],\n",
|
||||
" exam=[15, 14, np.NaN],\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"data : \\n\", ex)\n",
|
||||
"print(\"First result : \\n\", ex.isnull())\n",
|
||||
@@ -1080,10 +1084,10 @@
|
||||
],
|
||||
"source": [
|
||||
"# We remove the players for whom Salary is missing\n",
|
||||
"hitters.dropna(subset=['Salary'], inplace=True)\n",
|
||||
"hitters.dropna(subset=[\"Salary\"], inplace=True)\n",
|
||||
"\n",
|
||||
"X = hitters.select_dtypes(include=int)\n",
|
||||
"Y = hitters['Salary']\n",
|
||||
"Y = hitters[\"Salary\"]\n",
|
||||
"\n",
|
||||
"# check-point\n",
|
||||
"print(Y.isnull().sum()) # should be 0\n",
|
||||
@@ -1109,7 +1113,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Answer for Exercise 4\n",
|
||||
"# Answer for Exercise 4\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, random_state=42)"
|
||||
@@ -1697,8 +1701,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#the values of alphas chosen by defaults are also on a logarithmic scale\n",
|
||||
"plt.plot(np.log10(alphas_lasso), '.')"
|
||||
"# the values of alphas chosen by defaults are also on a logarithmic scale\n",
|
||||
"plt.plot(np.log10(alphas_lasso), \".\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1735,8 +1739,8 @@
|
||||
"source": [
|
||||
"fig, ax = plt.subplots(figsize=(8, 6))\n",
|
||||
"ax.plot(np.log10(alphas_lasso), coefs_lasso)\n",
|
||||
"ax.set_xlabel('log10(alpha)')\n",
|
||||
"ax.set_ylabel('Lasso coefficients')"
|
||||
"ax.set_xlabel(\"log10(alpha)\")\n",
|
||||
"ax.set_ylabel(\"Lasso coefficients\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1792,7 +1796,7 @@
|
||||
"print(\"1.\\n\", ind)\n",
|
||||
"print(\"2.\\n\", ind == 0)\n",
|
||||
"print(\"3. Le nombre de 0 de chaque colonne est :\\n \", (ind == 0).sum(axis=0))\n",
|
||||
"print(\"4. Le nombre de 0 de chaque ligne est : \\n\", (ind == 0).sum(axis=1))\n"
|
||||
"print(\"4. Le nombre de 0 de chaque ligne est : \\n\", (ind == 0).sum(axis=1))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2294,18 +2298,19 @@
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"\n",
|
||||
"linReg = LinearRegression()\n",
|
||||
"linReg.fit(Xtrain,\n",
|
||||
" Ytrain) # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
|
||||
"linReg.fit(\n",
|
||||
" Xtrain, Ytrain\n",
|
||||
") # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
|
||||
"# the predictions should not be different with or without standardization (could differ only owing to numerical problems)\n",
|
||||
"hatY_LinReg = linReg.predict(Xtest)\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.scatter(Ytest, hatY_LinReg, s=5)\n",
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
|
||||
"ax.set_xlabel('Ytest')\n",
|
||||
"ax.set_ylabel('hatY')\n",
|
||||
"ax.set_title('Predicted vs true salaries for OLS estimator')\n",
|
||||
"ax.axis('square')"
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
|
||||
"ax.set_xlabel(\"Ytest\")\n",
|
||||
"ax.set_ylabel(\"hatY\")\n",
|
||||
"ax.set_title(\"Predicted vs true salaries for OLS estimator\")\n",
|
||||
"ax.axis(\"square\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2355,11 +2360,11 @@
|
||||
"\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.scatter(Ytest, hatY_ridge, s=5)\n",
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
|
||||
"ax.set_xlabel('Ytest')\n",
|
||||
"ax.set_ylabel('hatY')\n",
|
||||
"ax.set_title('Predicted vs true salaries for Ridge estimator')\n",
|
||||
"ax.axis('square')"
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
|
||||
"ax.set_xlabel(\"Ytest\")\n",
|
||||
"ax.set_ylabel(\"hatY\")\n",
|
||||
"ax.set_title(\"Predicted vs true salaries for Ridge estimator\")\n",
|
||||
"ax.axis(\"square\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2408,11 +2413,11 @@
|
||||
"\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.scatter(Ytest, hatY_lasso, s=5)\n",
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
|
||||
"ax.set_xlabel('Ytest')\n",
|
||||
"ax.set_ylabel('hatY')\n",
|
||||
"ax.set_title('Predicted vs true salaries for Ridge estimator')\n",
|
||||
"ax.axis('square')"
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
|
||||
"ax.set_xlabel(\"Ytest\")\n",
|
||||
"ax.set_ylabel(\"hatY\")\n",
|
||||
"ax.set_title(\"Predicted vs true salaries for Ridge estimator\")\n",
|
||||
"ax.axis(\"square\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2445,7 +2450,7 @@
|
||||
"source": [
|
||||
"from sklearn.linear_model import LassoLarsIC\n",
|
||||
"\n",
|
||||
"lassoBIC = LassoLarsIC(criterion='bic')\n",
|
||||
"lassoBIC = LassoLarsIC(criterion=\"bic\")\n",
|
||||
"lassoBIC.fit(XtrainScaled, Ytrain)\n",
|
||||
"print(\"best alpha chosen by BIC criterion :\", lassoBIC.alpha_)\n",
|
||||
"print(\"best alpha chosen by CV :\", lassoCV.alpha_)\n",
|
||||
@@ -2499,11 +2504,11 @@
|
||||
"\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.scatter(Ytest, hatY_BIC, s=5)\n",
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
|
||||
"ax.set_xlabel('Ytest')\n",
|
||||
"ax.set_ylabel('hatY')\n",
|
||||
"ax.set_title('Predicted vs true salaries for LassoBIC estimator')\n",
|
||||
"ax.axis('square')"
|
||||
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
|
||||
"ax.set_xlabel(\"Ytest\")\n",
|
||||
"ax.set_ylabel(\"hatY\")\n",
|
||||
"ax.set_title(\"Predicted vs true salaries for LassoBIC estimator\")\n",
|
||||
"ax.axis(\"square\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2539,7 +2544,9 @@
|
||||
"from sklearn.metrics import mean_squared_error\n",
|
||||
"\n",
|
||||
"MSEs = []\n",
|
||||
"for name, estimator in zip([\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg]):\n",
|
||||
"for name, estimator in zip(\n",
|
||||
" [\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg]\n",
|
||||
"):\n",
|
||||
" y_pred = estimator.predict(Xtest)\n",
|
||||
" MSE = mean_squared_error(Ytest, y_pred)\n",
|
||||
" print(f\"MSE for {name} : {MSE}\")\n",
|
||||
@@ -2584,10 +2591,12 @@
|
||||
"ols_errors = np.abs(Ytest - linReg.predict(Xtest))\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(10, 6))\n",
|
||||
"ax.boxplot([ridge_cv_errors, lasso_cv_errors, lasso_bic_errors, ols_errors],\n",
|
||||
" labels=['RidgeCV', 'LassoCV', 'LassoBIC', 'OLS'])\n",
|
||||
"ax.set_title('Boxplot of Absolute Errors')\n",
|
||||
"ax.set_ylabel('Absolute Error')\n",
|
||||
"ax.boxplot(\n",
|
||||
" [ridge_cv_errors, lasso_cv_errors, lasso_bic_errors, ols_errors],\n",
|
||||
" labels=[\"RidgeCV\", \"LassoCV\", \"LassoBIC\", \"OLS\"],\n",
|
||||
")\n",
|
||||
"ax.set_title(\"Boxplot of Absolute Errors\")\n",
|
||||
"ax.set_ylabel(\"Absolute Error\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd \n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
@@ -226,7 +226,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sms = pd.read_csv(\"data/spam.csv\", encoding='latin')\n",
|
||||
"sms = pd.read_csv(\"data/spam.csv\", encoding=\"latin\")\n",
|
||||
"\n",
|
||||
"sms.head()"
|
||||
]
|
||||
@@ -244,7 +244,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sms.rename(columns={'v1':'Label', 'v2':'Text'}, inplace=True)"
|
||||
"sms.rename(columns={\"v1\": \"Label\", \"v2\": \"Text\"}, inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -644,7 +644,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sms['Labelnum']=sms['Label'].map({'ham':0,'spam':1})\n",
|
||||
"sms[\"Labelnum\"] = sms[\"Label\"].map({\"ham\": 0, \"spam\": 1})\n",
|
||||
"\n",
|
||||
"sms.head()"
|
||||
]
|
||||
@@ -674,13 +674,13 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Hint 1 for Exercise 1 \n",
|
||||
"a=np.array([0,1,1,1,0])\n",
|
||||
"print (len(a))\n",
|
||||
"print (a[a==0])\n",
|
||||
"print (len(a[a==0]))\n",
|
||||
"print (a[a==1])\n",
|
||||
"print (len(a[a==1]))"
|
||||
"# Hint 1 for Exercise 1\n",
|
||||
"a = np.array([0, 1, 1, 1, 0])\n",
|
||||
"print(len(a))\n",
|
||||
"print(a[a == 0])\n",
|
||||
"print(len(a[a == 0]))\n",
|
||||
"print(a[a == 1])\n",
|
||||
"print(len(a[a == 1]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -881,8 +881,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Hint 2 for Exercise 1 \n",
|
||||
"sms[sms.Labelnum==0].head()"
|
||||
"# Hint 2 for Exercise 1\n",
|
||||
"sms[sms.Labelnum == 0].head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1083,8 +1083,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Hint 3 for Exercise 1 \n",
|
||||
"sms[sms.Labelnum==1].head()"
|
||||
"# Hint 3 for Exercise 1\n",
|
||||
"sms[sms.Labelnum == 1].head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1104,8 +1104,8 @@
|
||||
],
|
||||
"source": [
|
||||
"print(len(sms))\n",
|
||||
"print(sms[sms.Label == 'ham'].shape)\n",
|
||||
"print(sms[sms.Label == 'spam'].shape)"
|
||||
"print(sms[sms.Label == \"ham\"].shape)\n",
|
||||
"print(sms[sms.Label == \"spam\"].shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1136,8 +1136,8 @@
|
||||
],
|
||||
"source": [
|
||||
"# Hint 1 for Exercise 2\n",
|
||||
"print (sms.loc[0, 'Text']) \n",
|
||||
"print (\"--> The length of the first sms is\", len(sms.loc[0, 'Text']))"
|
||||
"print(sms.loc[0, \"Text\"])\n",
|
||||
"print(\"--> The length of the first sms is\", len(sms.loc[0, \"Text\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1160,10 +1160,13 @@
|
||||
],
|
||||
"source": [
|
||||
"plt.figure(figsize=(10, 6))\n",
|
||||
"plt.hist(sms.loc[:, 'Text'].apply(len), bins='stone',)\n",
|
||||
"plt.title('Histogram of SMS Lengths')\n",
|
||||
"plt.xlabel('Length')\t\n",
|
||||
"plt.ylabel('Frequency')\n",
|
||||
"plt.hist(\n",
|
||||
" sms.loc[:, \"Text\"].apply(len),\n",
|
||||
" bins=\"stone\",\n",
|
||||
")\n",
|
||||
"plt.title(\"Histogram of SMS Lengths\")\n",
|
||||
"plt.xlabel(\"Length\")\n",
|
||||
"plt.ylabel(\"Frequency\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
@@ -1222,30 +1225,41 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"Example = pd.DataFrame([['iphone gratuit iphone gratuit',1],['mille vert gratuit',0],\n",
|
||||
" ['iphone mille euro',0],['argent gratuit euro gratuit',1]],\n",
|
||||
" columns=['sms', 'label'])\n",
|
||||
"Example = pd.DataFrame(\n",
|
||||
" [\n",
|
||||
" [\"iphone gratuit iphone gratuit\", 1],\n",
|
||||
" [\"mille vert gratuit\", 0],\n",
|
||||
" [\"iphone mille euro\", 0],\n",
|
||||
" [\"argent gratuit euro gratuit\", 1],\n",
|
||||
" ],\n",
|
||||
" columns=[\"sms\", \"label\"],\n",
|
||||
")\n",
|
||||
"vec = CountVectorizer()\n",
|
||||
"X = vec.fit_transform(Example.sms)\n",
|
||||
"\n",
|
||||
"# 1. Displaying the vocabulary\n",
|
||||
"\n",
|
||||
"print (\"1. The vocabulary of Example is \", vec.vocabulary_)\n",
|
||||
"print(\"1. The vocabulary of Example is \", vec.vocabulary_)\n",
|
||||
"\n",
|
||||
"# 1 bis :\n",
|
||||
"\n",
|
||||
"print('The vocabulary arranged in alphabetical order : ', sorted(list(vec.vocabulary_.keys())))\n",
|
||||
"print(\n",
|
||||
" \"The vocabulary arranged in alphabetical order : \",\n",
|
||||
" sorted(list(vec.vocabulary_.keys())),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# 2. Displaying the vectors : \n",
|
||||
"# 2. Displaying the vectors :\n",
|
||||
"\n",
|
||||
"print (\"2. The vectors corresponding to the sms are : \\n\", X.toarray())# X.toarray because \n",
|
||||
"# X is a \"sparse\" matrix. \n",
|
||||
"print(\n",
|
||||
" \"2. The vectors corresponding to the sms are : \\n\", X.toarray()\n",
|
||||
") # X.toarray because\n",
|
||||
"# X is a \"sparse\" matrix.\n",
|
||||
"\n",
|
||||
"# 3. For a new data x_0=\"iphone gratuit\", \n",
|
||||
"# you must also transform x_0 into a numerical vector before predicting. \n",
|
||||
"# 3. For a new data x_0=\"iphone gratuit\",\n",
|
||||
"# you must also transform x_0 into a numerical vector before predicting.\n",
|
||||
"\n",
|
||||
"vec_x_0=vec.transform(['iphone gratuit']).toarray() # \n",
|
||||
"print (\"3. The numerical vector corresponding to (x_0=iphone gratuit) is \\n\", vec_x_0 )"
|
||||
"vec_x_0 = vec.transform([\"iphone gratuit\"]).toarray() #\n",
|
||||
"print(\"3. The numerical vector corresponding to (x_0=iphone gratuit) is \\n\", vec_x_0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1267,7 +1281,7 @@
|
||||
],
|
||||
"source": [
|
||||
"#'sparse' version (without \"to_array\")\n",
|
||||
"v = vec.transform(['iphone iphone gratuit'])\n",
|
||||
"v = vec.transform([\"iphone iphone gratuit\"])\n",
|
||||
"v"
|
||||
]
|
||||
},
|
||||
@@ -1309,8 +1323,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# \"(0,2) 1\" means : the element in row 0 and column 2 is equal to 1. \n",
|
||||
"# \"(0,3) 2\" means : the element in row 0 and column 3 is equal to 2. \n",
|
||||
"# \"(0,2) 1\" means : the element in row 0 and column 2 is equal to 1.\n",
|
||||
"# \"(0,3) 2\" means : the element in row 0 and column 3 is equal to 2.\n",
|
||||
"print(v)"
|
||||
]
|
||||
},
|
||||
@@ -1340,8 +1354,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vec_x_1 = vec.transform(['iphone vert gratuit']).toarray()\n",
|
||||
"vec_x_2 = vec.transform(['iphone rouge gratuit']).toarray()\n",
|
||||
"vec_x_1 = vec.transform([\"iphone vert gratuit\"]).toarray()\n",
|
||||
"vec_x_2 = vec.transform([\"iphone rouge gratuit\"]).toarray()\n",
|
||||
"print(vec_x_1)\n",
|
||||
"print(vec_x_2)"
|
||||
]
|
||||
@@ -1372,8 +1386,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorizer = CountVectorizer()\n",
|
||||
"X = vectorizer.fit_transform(sms['Text'])\n",
|
||||
"y = sms['Labelnum']"
|
||||
"X = vectorizer.fit_transform(sms[\"Text\"])\n",
|
||||
"y = sms[\"Labelnum\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1400,10 +1414,12 @@
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30,random_state=50)\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.30, random_state=50\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print (\"size of the training set: \", X_train.shape[0])\n",
|
||||
"print (\"size of the test set :\", X_test.shape[0])"
|
||||
"print(\"size of the training set: \", X_train.shape[0])\n",
|
||||
"print(\"size of the test set :\", X_test.shape[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1906,7 +1922,7 @@
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"y_pred = sms_bayes.predict(X_test)\n",
|
||||
"print (\"The accuracy score on the test set is \", accuracy_score(y_test, y_pred))"
|
||||
"print(\"The accuracy score on the test set is \", accuracy_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1969,10 +1985,17 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"my_sms = vectorizer.transform(['free trial!', 'Iphone 15 is now free', 'I want coffee', 'I want to buy a new iphone'])\n",
|
||||
"my_sms = vectorizer.transform(\n",
|
||||
" [\n",
|
||||
" \"free trial!\",\n",
|
||||
" \"Iphone 15 is now free\",\n",
|
||||
" \"I want coffee\",\n",
|
||||
" \"I want to buy a new iphone\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"pred_my_sms = sms_bayes.predict(my_sms)\n",
|
||||
"print (pred_my_sms)"
|
||||
"print(pred_my_sms)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1999,7 +2022,7 @@
|
||||
"from sklearn.naive_bayes import BernoulliNB\n",
|
||||
"\n",
|
||||
"# Load the MNIST dataset\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1, parser='auto')\n",
|
||||
"mnist = fetch_openml(\"mnist_784\", version=1, parser=\"auto\")\n",
|
||||
"X, y = mnist.data, mnist.target"
|
||||
]
|
||||
},
|
||||
@@ -2036,7 +2059,9 @@
|
||||
"source": [
|
||||
"X_copy = (X.copy() >= 127).astype(int)\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X_copy, y, test_size=0.25, random_state=42)\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X_copy, y, test_size=0.25, random_state=42\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ber_bayes = BernoulliNB()\n",
|
||||
"ber_bayes.fit(X_train, y_train)\n",
|
||||
@@ -2059,6 +2084,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from keras.datasets import cifar10\n",
|
||||
"\n",
|
||||
"(x_train, y_train), (x_test, y_test) = cifar10.load_data()"
|
||||
]
|
||||
},
|
||||
@@ -2077,7 +2103,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# reminder : the output is an RGB image 32 x 32 \n",
|
||||
"# reminder : the output is an RGB image 32 x 32\n",
|
||||
"print(x_train.shape)\n",
|
||||
"print(y_train.shape)"
|
||||
]
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -46,23 +46,23 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"np.random.seed(12)\n",
|
||||
"num_observations=400\n",
|
||||
"num_observations = 400\n",
|
||||
"\n",
|
||||
"center1=[0,0]\n",
|
||||
"center2=[1,4]\n",
|
||||
"center3=[-3,2]\n",
|
||||
"center1 = [0, 0]\n",
|
||||
"center2 = [1, 4]\n",
|
||||
"center3 = [-3, 2]\n",
|
||||
"\n",
|
||||
"x1=np.random.multivariate_normal(center1,[[1,0],[0,1]], num_observations)\n",
|
||||
"x2=np.random.multivariate_normal(center2,[[1,0],[0,1]], num_observations)\n",
|
||||
"x3=np.random.multivariate_normal(center3,[[1,0],[0,1]], num_observations)\n",
|
||||
"x1 = np.random.multivariate_normal(center1, [[1, 0], [0, 1]], num_observations)\n",
|
||||
"x2 = np.random.multivariate_normal(center2, [[1, 0], [0, 1]], num_observations)\n",
|
||||
"x3 = np.random.multivariate_normal(center3, [[1, 0], [0, 1]], num_observations)\n",
|
||||
"\n",
|
||||
"X= np.vstack((x1, x2, x3)).astype(np.float32)\n",
|
||||
"X = np.vstack((x1, x2, x3)).astype(np.float32)\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(8,6))\n",
|
||||
"plt.plot(X[:,0], X[:,1],\".b\",alpha=0.2)\n",
|
||||
"plt.plot(center1[0], center1[1], '.', color='red', markersize=10)\n",
|
||||
"plt.plot(center2[0], center2[1], '.', color='red', markersize=10)\n",
|
||||
"plt.plot(center3[0], center3[1], '.', color='red', markersize=10)\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plt.plot(X[:, 0], X[:, 1], \".b\", alpha=0.2)\n",
|
||||
"plt.plot(center1[0], center1[1], \".\", color=\"red\", markersize=10)\n",
|
||||
"plt.plot(center2[0], center2[1], \".\", color=\"red\", markersize=10)\n",
|
||||
"plt.plot(center3[0], center3[1], \".\", color=\"red\", markersize=10)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
@@ -540,10 +540,12 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"plt.figure(figsize=(8,6))\n",
|
||||
"plt.plot(X[:,0], X[:,1],\".b\",alpha=0.2)\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plt.plot(X[:, 0], X[:, 1], \".b\", alpha=0.2)\n",
|
||||
"for center in kmeans1.cluster_centers_:\n",
|
||||
" plt.plot(center[0], center[1], '.', color='red', markersize=10, label='Cluster center')\n",
|
||||
" plt.plot(\n",
|
||||
" center[0], center[1], \".\", color=\"red\", markersize=10, label=\"Cluster center\"\n",
|
||||
" )\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
@@ -585,11 +587,11 @@
|
||||
"# Hint: An example for plotting the Voronoi partition\n",
|
||||
"from scipy.spatial import Voronoi, voronoi_plot_2d\n",
|
||||
"\n",
|
||||
"points_generer_voronoi = np.array([[0,0],[1,4],[-3,2]])\n",
|
||||
"points_generer_voronoi = np.array([[0, 0], [1, 4], [-3, 2]])\n",
|
||||
"\n",
|
||||
"vor = Voronoi(points_generer_voronoi)\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(1,1,figsize=(4,4)) \n",
|
||||
"fig, ax = plt.subplots(1, 1, figsize=(4, 4))\n",
|
||||
"\n",
|
||||
"fig = voronoi_plot_2d(vor, ax=ax, show_vertices=False)"
|
||||
]
|
||||
@@ -614,14 +616,16 @@
|
||||
"# Answer for Exercise 3\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(1,1,figsize=(8,6)) \n",
|
||||
"plt.plot(X[:,0], X[:,1], \".b\", alpha=0.2)\n",
|
||||
"fig, ax = plt.subplots(1, 1, figsize=(8, 6))\n",
|
||||
"plt.plot(X[:, 0], X[:, 1], \".b\", alpha=0.2)\n",
|
||||
"\n",
|
||||
"vor = Voronoi(kmeans1.cluster_centers_)\n",
|
||||
"fig = voronoi_plot_2d(vor, ax=ax, show_vertices=False)\n",
|
||||
"\n",
|
||||
"for center in kmeans1.cluster_centers_:\n",
|
||||
" plt.plot(center[0], center[1], '.', color='red', markersize=10, label='Cluster center')\n",
|
||||
" plt.plot(\n",
|
||||
" center[0], center[1], \".\", color=\"red\", markersize=10, label=\"Cluster center\"\n",
|
||||
" )\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
@@ -1233,10 +1237,10 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print (\"1:\", compress_model.labels_)\n",
|
||||
"print (\"2:\", compress_model.labels_.shape)\n",
|
||||
"print (\"3:\", compress_model.cluster_centers_)\n",
|
||||
"print (\"4:\", compress_model.cluster_centers_.shape)"
|
||||
"print(\"1:\", compress_model.labels_)\n",
|
||||
"print(\"2:\", compress_model.labels_.shape)\n",
|
||||
"print(\"3:\", compress_model.cluster_centers_)\n",
|
||||
"print(\"4:\", compress_model.cluster_centers_.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1275,13 +1279,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"color_new=np.zeros_like(colors)\n",
|
||||
"color_new = np.zeros_like(colors)\n",
|
||||
"\n",
|
||||
"labels=compress_model.labels_\n",
|
||||
"centers=compress_model.cluster_centers_\n",
|
||||
"labels = compress_model.labels_\n",
|
||||
"centers = compress_model.cluster_centers_\n",
|
||||
"\n",
|
||||
"for i in range(len(colors)):\n",
|
||||
" color_new[i]= centers[labels[i]]"
|
||||
" color_new[i] = centers[labels[i]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1336,11 +1340,12 @@
|
||||
],
|
||||
"source": [
|
||||
"import matplotlib.image as mpimg\n",
|
||||
"\n",
|
||||
"mpimg.imsave(\"assets/zelda_new.png\", zelda_new)\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plt.imshow(zelda_new)\n",
|
||||
"plt.show()\n"
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1363,13 +1368,13 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"size_new=os.path.getsize('assets/zelda_new.png')\n",
|
||||
"size_old=os.path.getsize('assets/zelda.png')\n",
|
||||
"size_new = os.path.getsize(\"assets/zelda_new.png\")\n",
|
||||
"size_old = os.path.getsize(\"assets/zelda.png\")\n",
|
||||
"\n",
|
||||
"print (\"The original size is \", size_old, \"bytes.\")\n",
|
||||
"print (\"The compressed size is \", size_new, \"bytes.\")\n",
|
||||
"print(\"The original size is \", size_old, \"bytes.\")\n",
|
||||
"print(\"The compressed size is \", size_new, \"bytes.\")\n",
|
||||
"\n",
|
||||
"print (f\"The compression factor is {size_old/size_new : .3f}\")"
|
||||
"print(f\"The compression factor is {size_old / size_new: .3f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1407,8 +1412,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"partiel=plt.imread(\"assets/partiel.png\")\n",
|
||||
"plt.figure(figsize = (20,10))\n",
|
||||
"partiel = plt.imread(\"assets/partiel.png\")\n",
|
||||
"plt.figure(figsize=(20, 10))\n",
|
||||
"plt.imshow(partiel)"
|
||||
]
|
||||
},
|
||||
@@ -1426,7 +1431,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print (partiel.shape)"
|
||||
"print(partiel.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1472,23 +1477,23 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"partiel_new=np.zeros_like(partiel)\n",
|
||||
"partiel_new = np.zeros_like(partiel)\n",
|
||||
"\n",
|
||||
"noir_rgb=np.array([0,0,0])\n",
|
||||
"blanc_rgb=np.array([1,1,1])\n",
|
||||
"noir_rgb = np.array([0, 0, 0])\n",
|
||||
"blanc_rgb = np.array([1, 1, 1])\n",
|
||||
"\n",
|
||||
"epsilon = 0.5 # threshold\n",
|
||||
"\n",
|
||||
"epsilon=0.5 # threshold\n",
|
||||
" \n",
|
||||
"distances = np.linalg.norm(partiel - noir_rgb, axis=2)\n",
|
||||
"partiel_new = np.zeros_like(partiel)\n",
|
||||
"partiel_new[distances <= epsilon] = noir_rgb\n",
|
||||
"partiel_new[distances > epsilon] = blanc_rgb\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"mpimg.imsave(\"assets/partiel_new.png\", partiel_new)\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(20,10))\n",
|
||||
"plt.figure(figsize=(20, 10))\n",
|
||||
"plt.imshow(partiel_new)\n",
|
||||
"plt.show()\n"
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1531,16 +1536,20 @@
|
||||
"mnist = tf.keras.datasets.mnist\n",
|
||||
"(X_train, y_train), (X_test, y_test) = mnist.load_data()\n",
|
||||
"\n",
|
||||
"X_train = X_train.reshape(-1, 28*28)\n",
|
||||
"X_train = X_train.reshape(-1, 28 * 28)\n",
|
||||
"\n",
|
||||
"kmeans2 = KMeans(n_clusters=10)\n",
|
||||
"clusters = kmeans2.fit_predict(X_train)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def map_clusters_to_labels(clusters, true_labels):\n",
|
||||
" return np.array([mode(true_labels[clusters == i], keepdims=True).mode[0] for i in range(10)])\n",
|
||||
" return np.array(\n",
|
||||
" [mode(true_labels[clusters == i], keepdims=True).mode[0] for i in range(10)]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"cluster_to_label = map_clusters_to_labels(clusters, y_train)\n",
|
||||
"print(\"Cluster to label mapping:\", cluster_to_label)\n"
|
||||
"print(\"Cluster to label mapping:\", cluster_to_label)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np \n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import tensorflow as tf\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
@@ -178,16 +178,21 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def build_model():\n",
|
||||
" model = tf.keras.models.Sequential([\n",
|
||||
" tf.keras.layers.Dense(16, activation='relu', input_shape=(X.shape[1],), kernel_regularizer=tf.keras.regularizers.l2(0.01)),\n",
|
||||
" tf.keras.layers.Dense(8, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),\n",
|
||||
" tf.keras.layers.Dense(1, activation='sigmoid')\n",
|
||||
" ])\n",
|
||||
" model.compile(\n",
|
||||
" optimizer='adam',\n",
|
||||
" loss='binary_crossentropy',\n",
|
||||
" metrics=['accuracy']\n",
|
||||
" model = tf.keras.models.Sequential(\n",
|
||||
" [\n",
|
||||
" tf.keras.layers.Dense(\n",
|
||||
" 16,\n",
|
||||
" activation=\"relu\",\n",
|
||||
" input_shape=(X.shape[1],),\n",
|
||||
" kernel_regularizer=tf.keras.regularizers.l2(0.01),\n",
|
||||
" ),\n",
|
||||
" tf.keras.layers.Dense(\n",
|
||||
" 8, activation=\"relu\", kernel_regularizer=tf.keras.regularizers.l2(0.01)\n",
|
||||
" ),\n",
|
||||
" tf.keras.layers.Dense(1, activation=\"sigmoid\"),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\", metrics=[\"accuracy\"])\n",
|
||||
" return model"
|
||||
]
|
||||
},
|
||||
@@ -291,10 +296,7 @@
|
||||
"histories = []\n",
|
||||
"\n",
|
||||
"early_stopping = EarlyStopping(\n",
|
||||
" monitor='val_loss',\n",
|
||||
" patience=10,\n",
|
||||
" restore_best_weights=True,\n",
|
||||
" verbose=1\n",
|
||||
" monitor=\"val_loss\", patience=10, restore_best_weights=True, verbose=1\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):\n",
|
||||
@@ -305,29 +307,28 @@
|
||||
" scaler = StandardScaler()\n",
|
||||
" X_train_scaled = scaler.fit_transform(X_train)\n",
|
||||
" X_val_scaled = scaler.transform(X_val)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" model = build_model()\n",
|
||||
"\n",
|
||||
" model.compile(\n",
|
||||
" optimizer='adam',\n",
|
||||
" loss='binary_crossentropy',\n",
|
||||
" metrics=[\"f1_score\"]\n",
|
||||
" )\n",
|
||||
" model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\", metrics=[\"f1_score\"])\n",
|
||||
"\n",
|
||||
" # EarlyStopping\n",
|
||||
" callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)\n",
|
||||
" callback = tf.keras.callbacks.EarlyStopping(\n",
|
||||
" monitor=\"val_loss\", patience=10, restore_best_weights=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Entraînement\n",
|
||||
" history = model.fit(\n",
|
||||
" X_train_scaled, y_train,\n",
|
||||
" X_train_scaled,\n",
|
||||
" y_train,\n",
|
||||
" epochs=50,\n",
|
||||
" batch_size=8,\n",
|
||||
" validation_data=(X_val_scaled, y_val),\n",
|
||||
" callbacks=[callback],\n",
|
||||
" verbose=0,\n",
|
||||
" class_weight={0: 1.0, 1: 2.0}\n",
|
||||
" class_weight={0: 1.0, 1: 2.0},\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" histories.append(history.history)\n",
|
||||
"\n",
|
||||
" # Prédiction & F1\n",
|
||||
@@ -360,9 +361,9 @@
|
||||
"axes = axes.flatten() # Flatten to easily iterate\n",
|
||||
"\n",
|
||||
"for i, (hist, ax) in enumerate(zip(histories, axes)):\n",
|
||||
" ax.plot(hist['loss'], label='Train loss', alpha=0.6)\n",
|
||||
" ax.plot(hist['val_loss'], label='Val loss', linestyle='--', alpha=0.6)\n",
|
||||
" ax.set_title(f\"Fold {i+1}\")\n",
|
||||
" ax.plot(hist[\"loss\"], label=\"Train loss\", alpha=0.6)\n",
|
||||
" ax.plot(hist[\"val_loss\"], label=\"Val loss\", linestyle=\"--\", alpha=0.6)\n",
|
||||
" ax.set_title(f\"Fold {i + 1}\")\n",
|
||||
" ax.set_xlabel(\"Epochs\")\n",
|
||||
" if i % 2 == 0:\n",
|
||||
" ax.set_ylabel(\"Binary Crossentropy\")\n",
|
||||
@@ -436,7 +437,9 @@
|
||||
"import tensorflow as tf\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.2, random_state=42, stratify=y\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"scaler = StandardScaler()\n",
|
||||
"X_train_scaled = scaler.fit_transform(X_train)\n",
|
||||
@@ -444,21 +447,21 @@
|
||||
"\n",
|
||||
"model = build_model()\n",
|
||||
"\n",
|
||||
"model.compile(\n",
|
||||
" optimizer='adam',\n",
|
||||
" loss='binary_crossentropy'\n",
|
||||
"model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\")\n",
|
||||
"\n",
|
||||
"callback = tf.keras.callbacks.EarlyStopping(\n",
|
||||
" monitor=\"val_loss\", patience=10, restore_best_weights=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)\n",
|
||||
"\n",
|
||||
"history = model.fit(\n",
|
||||
" X_train_scaled, y_train,\n",
|
||||
" X_train_scaled,\n",
|
||||
" y_train,\n",
|
||||
" epochs=50,\n",
|
||||
" batch_size=8,\n",
|
||||
" validation_split=0.2,\n",
|
||||
" callbacks=[callback],\n",
|
||||
" verbose=0,\n",
|
||||
" class_weight={0: 1.0, 1: 2.0}\n",
|
||||
" class_weight={0: 1.0, 1: 2.0},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -486,11 +489,11 @@
|
||||
],
|
||||
"source": [
|
||||
"plt.figure(figsize=(8, 5))\n",
|
||||
"plt.plot(history.history['loss'], label='Loss (train)')\n",
|
||||
"plt.plot(history.history['val_loss'], label='Loss (val)', linestyle='--')\n",
|
||||
"plt.xlabel('Epochs')\n",
|
||||
"plt.ylabel('Binary Cross-Entropy Loss')\n",
|
||||
"plt.title('Courbe d\\'apprentissage')\n",
|
||||
"plt.plot(history.history[\"loss\"], label=\"Loss (train)\")\n",
|
||||
"plt.plot(history.history[\"val_loss\"], label=\"Loss (val)\", linestyle=\"--\")\n",
|
||||
"plt.xlabel(\"Epochs\")\n",
|
||||
"plt.ylabel(\"Binary Cross-Entropy Loss\")\n",
|
||||
"plt.title(\"Courbe d'apprentissage\")\n",
|
||||
"plt.legend()\n",
|
||||
"plt.grid(True)\n",
|
||||
"plt.tight_layout()\n",
|
||||
|
||||
Reference in New Issue
Block a user