mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-30 09:28:42 +01:00
Refactor code for improved readability and consistency across notebooks
- Standardized spacing around operators and function arguments in TP7_Kmeans.ipynb and neural_network.ipynb. - Enhanced the formatting of model building and training code in neural_network.ipynb for better clarity. - Updated the pyproject.toml to remove a specific TensorFlow version and added linting configuration for Ruff. - Improved comments and organization in the code to facilitate easier understanding and maintenance.
This commit is contained in:
@@ -32,7 +32,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd \n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
@@ -226,7 +226,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sms = pd.read_csv(\"data/spam.csv\", encoding='latin')\n",
|
||||
"sms = pd.read_csv(\"data/spam.csv\", encoding=\"latin\")\n",
|
||||
"\n",
|
||||
"sms.head()"
|
||||
]
|
||||
@@ -244,7 +244,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sms.rename(columns={'v1':'Label', 'v2':'Text'}, inplace=True)"
|
||||
"sms.rename(columns={\"v1\": \"Label\", \"v2\": \"Text\"}, inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -644,7 +644,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sms['Labelnum']=sms['Label'].map({'ham':0,'spam':1})\n",
|
||||
"sms[\"Labelnum\"] = sms[\"Label\"].map({\"ham\": 0, \"spam\": 1})\n",
|
||||
"\n",
|
||||
"sms.head()"
|
||||
]
|
||||
@@ -674,13 +674,13 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Hint 1 for Exercise 1 \n",
|
||||
"a=np.array([0,1,1,1,0])\n",
|
||||
"print (len(a))\n",
|
||||
"print (a[a==0])\n",
|
||||
"print (len(a[a==0]))\n",
|
||||
"print (a[a==1])\n",
|
||||
"print (len(a[a==1]))"
|
||||
"# Hint 1 for Exercise 1\n",
|
||||
"a = np.array([0, 1, 1, 1, 0])\n",
|
||||
"print(len(a))\n",
|
||||
"print(a[a == 0])\n",
|
||||
"print(len(a[a == 0]))\n",
|
||||
"print(a[a == 1])\n",
|
||||
"print(len(a[a == 1]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -881,8 +881,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Hint 2 for Exercise 1 \n",
|
||||
"sms[sms.Labelnum==0].head()"
|
||||
"# Hint 2 for Exercise 1\n",
|
||||
"sms[sms.Labelnum == 0].head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1083,8 +1083,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Hint 3 for Exercise 1 \n",
|
||||
"sms[sms.Labelnum==1].head()"
|
||||
"# Hint 3 for Exercise 1\n",
|
||||
"sms[sms.Labelnum == 1].head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1104,8 +1104,8 @@
|
||||
],
|
||||
"source": [
|
||||
"print(len(sms))\n",
|
||||
"print(sms[sms.Label == 'ham'].shape)\n",
|
||||
"print(sms[sms.Label == 'spam'].shape)"
|
||||
"print(sms[sms.Label == \"ham\"].shape)\n",
|
||||
"print(sms[sms.Label == \"spam\"].shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1136,8 +1136,8 @@
|
||||
],
|
||||
"source": [
|
||||
"# Hint 1 for Exercise 2\n",
|
||||
"print (sms.loc[0, 'Text']) \n",
|
||||
"print (\"--> The length of the first sms is\", len(sms.loc[0, 'Text']))"
|
||||
"print(sms.loc[0, \"Text\"])\n",
|
||||
"print(\"--> The length of the first sms is\", len(sms.loc[0, \"Text\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1160,10 +1160,13 @@
|
||||
],
|
||||
"source": [
|
||||
"plt.figure(figsize=(10, 6))\n",
|
||||
"plt.hist(sms.loc[:, 'Text'].apply(len), bins='stone',)\n",
|
||||
"plt.title('Histogram of SMS Lengths')\n",
|
||||
"plt.xlabel('Length')\t\n",
|
||||
"plt.ylabel('Frequency')\n",
|
||||
"plt.hist(\n",
|
||||
" sms.loc[:, \"Text\"].apply(len),\n",
|
||||
" bins=\"stone\",\n",
|
||||
")\n",
|
||||
"plt.title(\"Histogram of SMS Lengths\")\n",
|
||||
"plt.xlabel(\"Length\")\n",
|
||||
"plt.ylabel(\"Frequency\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
@@ -1222,30 +1225,41 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"Example = pd.DataFrame([['iphone gratuit iphone gratuit',1],['mille vert gratuit',0],\n",
|
||||
" ['iphone mille euro',0],['argent gratuit euro gratuit',1]],\n",
|
||||
" columns=['sms', 'label'])\n",
|
||||
"Example = pd.DataFrame(\n",
|
||||
" [\n",
|
||||
" [\"iphone gratuit iphone gratuit\", 1],\n",
|
||||
" [\"mille vert gratuit\", 0],\n",
|
||||
" [\"iphone mille euro\", 0],\n",
|
||||
" [\"argent gratuit euro gratuit\", 1],\n",
|
||||
" ],\n",
|
||||
" columns=[\"sms\", \"label\"],\n",
|
||||
")\n",
|
||||
"vec = CountVectorizer()\n",
|
||||
"X = vec.fit_transform(Example.sms)\n",
|
||||
"\n",
|
||||
"# 1. Displaying the vocabulary\n",
|
||||
"\n",
|
||||
"print (\"1. The vocabulary of Example is \", vec.vocabulary_)\n",
|
||||
"print(\"1. The vocabulary of Example is \", vec.vocabulary_)\n",
|
||||
"\n",
|
||||
"# 1 bis :\n",
|
||||
"\n",
|
||||
"print('The vocabulary arranged in alphabetical order : ', sorted(list(vec.vocabulary_.keys())))\n",
|
||||
"print(\n",
|
||||
" \"The vocabulary arranged in alphabetical order : \",\n",
|
||||
" sorted(list(vec.vocabulary_.keys())),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# 2. Displaying the vectors : \n",
|
||||
"# 2. Displaying the vectors :\n",
|
||||
"\n",
|
||||
"print (\"2. The vectors corresponding to the sms are : \\n\", X.toarray())# X.toarray because \n",
|
||||
"# X is a \"sparse\" matrix. \n",
|
||||
"print(\n",
|
||||
" \"2. The vectors corresponding to the sms are : \\n\", X.toarray()\n",
|
||||
") # X.toarray because\n",
|
||||
"# X is a \"sparse\" matrix.\n",
|
||||
"\n",
|
||||
"# 3. For a new data x_0=\"iphone gratuit\", \n",
|
||||
"# you must also transform x_0 into a numerical vector before predicting. \n",
|
||||
"# 3. For a new data x_0=\"iphone gratuit\",\n",
|
||||
"# you must also transform x_0 into a numerical vector before predicting.\n",
|
||||
"\n",
|
||||
"vec_x_0=vec.transform(['iphone gratuit']).toarray() # \n",
|
||||
"print (\"3. The numerical vector corresponding to (x_0=iphone gratuit) is \\n\", vec_x_0 )"
|
||||
"vec_x_0 = vec.transform([\"iphone gratuit\"]).toarray() #\n",
|
||||
"print(\"3. The numerical vector corresponding to (x_0=iphone gratuit) is \\n\", vec_x_0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1267,7 +1281,7 @@
|
||||
],
|
||||
"source": [
|
||||
"#'sparse' version (without \"to_array\")\n",
|
||||
"v = vec.transform(['iphone iphone gratuit'])\n",
|
||||
"v = vec.transform([\"iphone iphone gratuit\"])\n",
|
||||
"v"
|
||||
]
|
||||
},
|
||||
@@ -1309,8 +1323,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# \"(0,2) 1\" means : the element in row 0 and column 2 is equal to 1. \n",
|
||||
"# \"(0,3) 2\" means : the element in row 0 and column 3 is equal to 2. \n",
|
||||
"# \"(0,2) 1\" means : the element in row 0 and column 2 is equal to 1.\n",
|
||||
"# \"(0,3) 2\" means : the element in row 0 and column 3 is equal to 2.\n",
|
||||
"print(v)"
|
||||
]
|
||||
},
|
||||
@@ -1340,8 +1354,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vec_x_1 = vec.transform(['iphone vert gratuit']).toarray()\n",
|
||||
"vec_x_2 = vec.transform(['iphone rouge gratuit']).toarray()\n",
|
||||
"vec_x_1 = vec.transform([\"iphone vert gratuit\"]).toarray()\n",
|
||||
"vec_x_2 = vec.transform([\"iphone rouge gratuit\"]).toarray()\n",
|
||||
"print(vec_x_1)\n",
|
||||
"print(vec_x_2)"
|
||||
]
|
||||
@@ -1372,8 +1386,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorizer = CountVectorizer()\n",
|
||||
"X = vectorizer.fit_transform(sms['Text'])\n",
|
||||
"y = sms['Labelnum']"
|
||||
"X = vectorizer.fit_transform(sms[\"Text\"])\n",
|
||||
"y = sms[\"Labelnum\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1400,10 +1414,12 @@
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30,random_state=50)\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.30, random_state=50\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print (\"size of the training set: \", X_train.shape[0])\n",
|
||||
"print (\"size of the test set :\", X_test.shape[0])"
|
||||
"print(\"size of the training set: \", X_train.shape[0])\n",
|
||||
"print(\"size of the test set :\", X_test.shape[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1906,7 +1922,7 @@
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"y_pred = sms_bayes.predict(X_test)\n",
|
||||
"print (\"The accuracy score on the test set is \", accuracy_score(y_test, y_pred))"
|
||||
"print(\"The accuracy score on the test set is \", accuracy_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1969,10 +1985,17 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"my_sms = vectorizer.transform(['free trial!', 'Iphone 15 is now free', 'I want coffee', 'I want to buy a new iphone'])\n",
|
||||
"my_sms = vectorizer.transform(\n",
|
||||
" [\n",
|
||||
" \"free trial!\",\n",
|
||||
" \"Iphone 15 is now free\",\n",
|
||||
" \"I want coffee\",\n",
|
||||
" \"I want to buy a new iphone\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"pred_my_sms = sms_bayes.predict(my_sms)\n",
|
||||
"print (pred_my_sms)"
|
||||
"print(pred_my_sms)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1999,7 +2022,7 @@
|
||||
"from sklearn.naive_bayes import BernoulliNB\n",
|
||||
"\n",
|
||||
"# Load the MNIST dataset\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1, parser='auto')\n",
|
||||
"mnist = fetch_openml(\"mnist_784\", version=1, parser=\"auto\")\n",
|
||||
"X, y = mnist.data, mnist.target"
|
||||
]
|
||||
},
|
||||
@@ -2036,7 +2059,9 @@
|
||||
"source": [
|
||||
"X_copy = (X.copy() >= 127).astype(int)\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X_copy, y, test_size=0.25, random_state=42)\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X_copy, y, test_size=0.25, random_state=42\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ber_bayes = BernoulliNB()\n",
|
||||
"ber_bayes.fit(X_train, y_train)\n",
|
||||
@@ -2059,6 +2084,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from keras.datasets import cifar10\n",
|
||||
"\n",
|
||||
"(x_train, y_train), (x_test, y_test) = cifar10.load_data()"
|
||||
]
|
||||
},
|
||||
@@ -2077,7 +2103,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# reminder : the output is an RGB image 32 x 32 \n",
|
||||
"# reminder : the output is an RGB image 32 x 32\n",
|
||||
"print(x_train.shape)\n",
|
||||
"print(y_train.shape)"
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user