Refactor code for improved readability and consistency across notebooks

- Standardized spacing around operators and function arguments in TP7_Kmeans.ipynb and neural_network.ipynb. - Enhanced the formatting of model building and training code in neural_network.ipynb for better clarity. - Updated the pyproject.toml to remove a specific TensorFlow version and added linting configuration for Ruff. - Improved comments and organization in the code to facilitate easier understanding and maintenance.
2026-01-30 09:28:42 +01:00 · 2025-07-01 20:46:08 +02:00
parent e273cf90f7
commit f94ff07cab
34 changed files with 5713 additions and 5047 deletions
--- a/Learning/TP5_Naive_Bayes.ipynb
+++ b/Learning/TP5_Naive_Bayes.ipynb
@@ -32,7 +32,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import pandas as pd  \n",
+    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
@@ -226,7 +226,7 @@
    }
   ],
   "source": [
-    "sms = pd.read_csv(\"data/spam.csv\", encoding='latin')\n",
+    "sms = pd.read_csv(\"data/spam.csv\", encoding=\"latin\")\n",
    "\n",
    "sms.head()"
   ]
@@ -244,7 +244,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "sms.rename(columns={'v1':'Label', 'v2':'Text'}, inplace=True)"
+    "sms.rename(columns={\"v1\": \"Label\", \"v2\": \"Text\"}, inplace=True)"
   ]
  },
  {
@@ -644,7 +644,7 @@
    }
   ],
   "source": [
-    "sms['Labelnum']=sms['Label'].map({'ham':0,'spam':1})\n",
+    "sms[\"Labelnum\"] = sms[\"Label\"].map({\"ham\": 0, \"spam\": 1})\n",
    "\n",
    "sms.head()"
   ]
@@ -674,13 +674,13 @@
    }
   ],
   "source": [
-    "# Hint 1 for Exercise 1 \n",
-    "a=np.array([0,1,1,1,0])\n",
-    "print (len(a))\n",
-    "print (a[a==0])\n",
-    "print (len(a[a==0]))\n",
-    "print (a[a==1])\n",
-    "print (len(a[a==1]))"
+    "# Hint 1 for Exercise 1\n",
+    "a = np.array([0, 1, 1, 1, 0])\n",
+    "print(len(a))\n",
+    "print(a[a == 0])\n",
+    "print(len(a[a == 0]))\n",
+    "print(a[a == 1])\n",
+    "print(len(a[a == 1]))"
   ]
  },
  {
@@ -881,8 +881,8 @@
    }
   ],
   "source": [
-    "# Hint 2 for Exercise 1 \n",
-    "sms[sms.Labelnum==0].head()"
+    "# Hint 2 for Exercise 1\n",
+    "sms[sms.Labelnum == 0].head()"
   ]
  },
  {
@@ -1083,8 +1083,8 @@
    }
   ],
   "source": [
-    "# Hint 3 for Exercise 1 \n",
-    "sms[sms.Labelnum==1].head()"
+    "# Hint 3 for Exercise 1\n",
+    "sms[sms.Labelnum == 1].head()"
   ]
  },
  {
@@ -1104,8 +1104,8 @@
   ],
   "source": [
    "print(len(sms))\n",
-    "print(sms[sms.Label == 'ham'].shape)\n",
-    "print(sms[sms.Label == 'spam'].shape)"
+    "print(sms[sms.Label == \"ham\"].shape)\n",
+    "print(sms[sms.Label == \"spam\"].shape)"
   ]
  },
  {
@@ -1136,8 +1136,8 @@
   ],
   "source": [
    "# Hint 1  for Exercise 2\n",
-    "print (sms.loc[0, 'Text']) \n",
-    "print (\"--> The length of the first sms is\", len(sms.loc[0, 'Text']))"
+    "print(sms.loc[0, \"Text\"])\n",
+    "print(\"--> The length of the first sms is\", len(sms.loc[0, \"Text\"]))"
   ]
  },
  {
@@ -1160,10 +1160,13 @@
   ],
   "source": [
    "plt.figure(figsize=(10, 6))\n",
-    "plt.hist(sms.loc[:, 'Text'].apply(len), bins='stone',)\n",
-    "plt.title('Histogram of SMS Lengths')\n",
-    "plt.xlabel('Length')\t\n",
-    "plt.ylabel('Frequency')\n",
+    "plt.hist(\n",
+    "    sms.loc[:, \"Text\"].apply(len),\n",
+    "    bins=\"stone\",\n",
+    ")\n",
+    "plt.title(\"Histogram of SMS Lengths\")\n",
+    "plt.xlabel(\"Length\")\n",
+    "plt.ylabel(\"Frequency\")\n",
    "plt.show()"
   ]
  },
@@ -1222,30 +1225,41 @@
    }
   ],
   "source": [
-    "Example = pd.DataFrame([['iphone gratuit iphone gratuit',1],['mille vert gratuit',0],\n",
-    "                              ['iphone mille euro',0],['argent gratuit euro gratuit',1]],\n",
-    "                             columns=['sms', 'label'])\n",
+    "Example = pd.DataFrame(\n",
+    "    [\n",
+    "        [\"iphone gratuit iphone gratuit\", 1],\n",
+    "        [\"mille vert gratuit\", 0],\n",
+    "        [\"iphone mille euro\", 0],\n",
+    "        [\"argent gratuit euro gratuit\", 1],\n",
+    "    ],\n",
+    "    columns=[\"sms\", \"label\"],\n",
+    ")\n",
    "vec = CountVectorizer()\n",
    "X = vec.fit_transform(Example.sms)\n",
    "\n",
    "# 1. Displaying the vocabulary\n",
    "\n",
-    "print (\"1. The vocabulary of Example is \", vec.vocabulary_)\n",
+    "print(\"1. The vocabulary of Example is \", vec.vocabulary_)\n",
    "\n",
    "# 1 bis  :\n",
    "\n",
-    "print('The vocabulary arranged in alphabetical order :  ', sorted(list(vec.vocabulary_.keys())))\n",
+    "print(\n",
+    "    \"The vocabulary arranged in alphabetical order :  \",\n",
+    "    sorted(list(vec.vocabulary_.keys())),\n",
+    ")\n",
    "\n",
-    "# 2. Displaying the vectors : \n",
+    "# 2. Displaying the vectors :\n",
    "\n",
-    "print (\"2. The vectors corresponding to the sms are :  \\n\", X.toarray())# X.toarray because \n",
-    "# X is a \"sparse\" matrix. \n",
+    "print(\n",
+    "    \"2. The vectors corresponding to the sms are :  \\n\", X.toarray()\n",
+    ")  # X.toarray because\n",
+    "# X is a \"sparse\" matrix.\n",
    "\n",
-    "# 3. For a new data x_0=\"iphone gratuit\", \n",
-    "# you must also transform x_0 into a numerical vector before predicting. \n",
+    "# 3. For a new data x_0=\"iphone gratuit\",\n",
+    "# you must also transform x_0 into a numerical vector before predicting.\n",
    "\n",
-    "vec_x_0=vec.transform(['iphone gratuit']).toarray() # \n",
-    "print (\"3. The numerical vector corresponding to  (x_0=iphone gratuit) is \\n\", vec_x_0 )"
+    "vec_x_0 = vec.transform([\"iphone gratuit\"]).toarray()  #\n",
+    "print(\"3. The numerical vector corresponding to  (x_0=iphone gratuit) is \\n\", vec_x_0)"
   ]
  },
  {
@@ -1267,7 +1281,7 @@
   ],
   "source": [
    "#'sparse' version  (without \"to_array\")\n",
-    "v = vec.transform(['iphone iphone gratuit'])\n",
+    "v = vec.transform([\"iphone iphone gratuit\"])\n",
    "v"
   ]
  },
@@ -1309,8 +1323,8 @@
    }
   ],
   "source": [
-    "# \"(0,2)  1\"  means :  the element in row 0 and column 2 is equal to 1.  \n",
-    "# \"(0,3)  2\"  means : the element in row 0 and column 3 is equal to  2. \n",
+    "# \"(0,2)  1\"  means :  the element in row 0 and column 2 is equal to 1.\n",
+    "# \"(0,3)  2\"  means : the element in row 0 and column 3 is equal to  2.\n",
    "print(v)"
   ]
  },
@@ -1340,8 +1354,8 @@
    }
   ],
   "source": [
-    "vec_x_1 = vec.transform(['iphone vert gratuit']).toarray()\n",
-    "vec_x_2 = vec.transform(['iphone rouge gratuit']).toarray()\n",
+    "vec_x_1 = vec.transform([\"iphone vert gratuit\"]).toarray()\n",
+    "vec_x_2 = vec.transform([\"iphone rouge gratuit\"]).toarray()\n",
    "print(vec_x_1)\n",
    "print(vec_x_2)"
   ]
@@ -1372,8 +1386,8 @@
   "outputs": [],
   "source": [
    "vectorizer = CountVectorizer()\n",
-    "X = vectorizer.fit_transform(sms['Text'])\n",
-    "y = sms['Labelnum']"
+    "X = vectorizer.fit_transform(sms[\"Text\"])\n",
+    "y = sms[\"Labelnum\"]"
   ]
  },
  {
@@ -1400,10 +1414,12 @@
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30,random_state=50)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.30, random_state=50\n",
+    ")\n",
    "\n",
-    "print (\"size of the training set: \", X_train.shape[0])\n",
-    "print (\"size of the test set :\", X_test.shape[0])"
+    "print(\"size of the training set: \", X_train.shape[0])\n",
+    "print(\"size of the test set :\", X_test.shape[0])"
   ]
  },
  {
@@ -1906,7 +1922,7 @@
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "y_pred = sms_bayes.predict(X_test)\n",
-    "print (\"The accuracy score on the test set is \", accuracy_score(y_test, y_pred))"
+    "print(\"The accuracy score on the test set is \", accuracy_score(y_test, y_pred))"
   ]
  },
  {
@@ -1969,10 +1985,17 @@
    }
   ],
   "source": [
-    "my_sms = vectorizer.transform(['free trial!', 'Iphone 15 is now free', 'I want coffee', 'I want to buy a new iphone'])\n",
+    "my_sms = vectorizer.transform(\n",
+    "    [\n",
+    "        \"free trial!\",\n",
+    "        \"Iphone 15 is now free\",\n",
+    "        \"I want coffee\",\n",
+    "        \"I want to buy a new iphone\",\n",
+    "    ]\n",
+    ")\n",
    "\n",
    "pred_my_sms = sms_bayes.predict(my_sms)\n",
-    "print (pred_my_sms)"
+    "print(pred_my_sms)"
   ]
  },
  {
@@ -1999,7 +2022,7 @@
    "from sklearn.naive_bayes import BernoulliNB\n",
    "\n",
    "# Load the MNIST dataset\n",
-    "mnist = fetch_openml('mnist_784', version=1, parser='auto')\n",
+    "mnist = fetch_openml(\"mnist_784\", version=1, parser=\"auto\")\n",
    "X, y = mnist.data, mnist.target"
   ]
  },
@@ -2036,7 +2059,9 @@
   "source": [
    "X_copy = (X.copy() >= 127).astype(int)\n",
    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_copy, y, test_size=0.25, random_state=42)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X_copy, y, test_size=0.25, random_state=42\n",
+    ")\n",
    "\n",
    "ber_bayes = BernoulliNB()\n",
    "ber_bayes.fit(X_train, y_train)\n",
@@ -2059,6 +2084,7 @@
   "outputs": [],
   "source": [
    "from keras.datasets import cifar10\n",
+    "\n",
    "(x_train, y_train), (x_test, y_test) = cifar10.load_data()"
   ]
  },
@@ -2077,7 +2103,7 @@
    }
   ],
   "source": [
-    "# reminder : the output is an RGB image 32 x 32 \n",
+    "# reminder : the output is an RGB image 32 x 32\n",
    "print(x_train.shape)\n",
    "print(y_train.shape)"
   ]