mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-31 16:29:35 +01:00
Refactor code formatting and improve readability in Jupyter notebooks for TP_4 and TP_5
- Adjusted indentation and line breaks for better clarity in function definitions and import statements. - Standardized string quotes for consistency across the codebase. - Enhanced readability of DataFrame creation and manipulation by breaking long lines into multiple lines. - Cleaned up print statements and comments for improved understanding. - Ensured consistent use of whitespace around operators and after commas.
This commit is contained in:
@@ -69,11 +69,11 @@
|
||||
"from sklearn import metrics\n",
|
||||
"from sklearn.ensemble import GradientBoostingClassifier\n",
|
||||
"from sklearn.model_selection import (\n",
|
||||
" GridSearchCV,\n",
|
||||
" StratifiedKFold,\n",
|
||||
" cross_val_score,\n",
|
||||
" train_test_split,\n",
|
||||
")\n"
|
||||
" GridSearchCV,\n",
|
||||
" StratifiedKFold,\n",
|
||||
" cross_val_score,\n",
|
||||
" train_test_split,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -91,12 +91,18 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def cramers_V(var1,var2) :\n",
|
||||
" crosstab = np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building\n",
|
||||
" stat = chi2_contingency(crosstab)[0] # Keeping of the test statistic of the Chi2 test\n",
|
||||
" obs = np.sum(crosstab) # Number of observations\n",
|
||||
" mini = min(crosstab.shape)-1 # Take the minimum value between the colmns and the rows of the cross table\n",
|
||||
" return (stat/(obs*mini))"
|
||||
"def cramers_V(var1, var2):\n",
|
||||
" crosstab = np.array(\n",
|
||||
" pd.crosstab(var1, var2, rownames=None, colnames=None)\n",
|
||||
" ) # Cross table building\n",
|
||||
" stat = chi2_contingency(crosstab)[\n",
|
||||
" 0\n",
|
||||
" ] # Keeping of the test statistic of the Chi2 test\n",
|
||||
" obs = np.sum(crosstab) # Number of observations\n",
|
||||
" mini = (\n",
|
||||
" min(crosstab.shape) - 1\n",
|
||||
" ) # Take the minimum value between the colmns and the rows of the cross table\n",
|
||||
" return stat / (obs * mini)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,7 +139,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"path = input_path + '/base_retraitee.csv'\n",
|
||||
"path = input_path + \"/base_retraitee.csv\"\n",
|
||||
"data_retraitee = pd.read_csv(path, sep=\",\", decimal=\".\")"
|
||||
]
|
||||
},
|
||||
@@ -16225,7 +16231,7 @@
|
||||
" if len(data_model[col].unique()) == 2:\n",
|
||||
" variables_categorielles.append(data_model[col])\n",
|
||||
" else:\n",
|
||||
" variables_categorielles.append(data_model[col])\n"
|
||||
" variables_categorielles.append(data_model[col])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -16653,7 +16659,7 @@
|
||||
" if v_cramer_resultats.iloc[i, j] > 0.7:\n",
|
||||
" print(\n",
|
||||
" f\"{v_cramer_resultats.index.to_numpy()[i]} et {v_cramer_resultats.colmns[j]} sont trop dépendantes, V-CRAMER = {v_cramer_resultats.iloc[i, j]}\"\n",
|
||||
" )\n"
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -16851,7 +16857,7 @@
|
||||
" if abs(correlations_num.iloc[i, j]) > 0.7:\n",
|
||||
" print(\n",
|
||||
" f\"{correlations_num.index.to_numpy()[i]} et {correlations_num.columns[j]} sont trop dépendantes, corr = {correlations_num.iloc[i, j]}\"\n",
|
||||
" )\n"
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17820,7 +17826,7 @@
|
||||
" cv=StratifiedKFold(\n",
|
||||
" n_splits=num_folds, shuffle=True, random_state=42\n",
|
||||
" ), # Validation croisée avec 5 folds\n",
|
||||
" scoring='recall', # Métrique d'évaluation (moins c'est mieux)\n",
|
||||
" scoring=\"recall\", # Métrique d'évaluation (moins c'est mieux)\n",
|
||||
" n_jobs=-1, # Utiliser tous les cœurs du processeur\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -17877,14 +17883,18 @@
|
||||
],
|
||||
"source": [
|
||||
"# Recall de chaque fold\n",
|
||||
"recall_scores = cross_val_score(best_gbc, X_train, y_train, cv=num_folds, scoring='recall')\n",
|
||||
"recall_scores = cross_val_score(\n",
|
||||
" best_gbc, X_train, y_train, cv=num_folds, scoring=\"recall\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Afficher les scores pour chaque fold\n",
|
||||
"for i, score in enumerate(recall_scores):\n",
|
||||
" print(f\"Recall pour le fold {i + 1}: {score}\")\n",
|
||||
"\n",
|
||||
"# Accuracy de chaque fold\n",
|
||||
"accuracy_scores = cross_val_score(best_gbc, X_train, y_train, cv=num_folds, scoring='accuracy')\n",
|
||||
"accuracy_scores = cross_val_score(\n",
|
||||
" best_gbc, X_train, y_train, cv=num_folds, scoring=\"accuracy\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Afficher les scores pour chaque fold\n",
|
||||
"print(\"\\n\")\n",
|
||||
@@ -17892,12 +17902,14 @@
|
||||
" print(f\"Accuracy pour le fold {i + 1}: {score}\")\n",
|
||||
"\n",
|
||||
"# Precision de chaque fold\n",
|
||||
"precision_scores = cross_val_score(best_gbc, X_train, y_train, cv=num_folds, scoring='precision')\n",
|
||||
"precision_scores = cross_val_score(\n",
|
||||
" best_gbc, X_train, y_train, cv=num_folds, scoring=\"precision\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Afficher les scores pour chaque fold\n",
|
||||
"print(\"\\n\")\n",
|
||||
"for i, score in enumerate(precision_scores):\n",
|
||||
" print(f\"Precision pour le fold {i + 1}: {score}\")\n"
|
||||
" print(f\"Precision pour le fold {i + 1}: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -30178,7 +30190,7 @@
|
||||
"# Observation de la distribution sur Y_train\n",
|
||||
"df = pd.DataFrame(y_train, columns=[\"SINISTRE\"])\n",
|
||||
"fig = px.histogram(df, x=\"SINISTRE\", title=\"Distribution de la variable Y_train\")\n",
|
||||
"fig.show()\n"
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -52502,7 +52514,7 @@
|
||||
"fig = px.histogram(\n",
|
||||
" df, x=\"SINISTRE\", title=\"Distribution de la variable Y_train_resampled\"\n",
|
||||
")\n",
|
||||
"fig.show()\n"
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -52530,7 +52542,7 @@
|
||||
"num_folds = 5\n",
|
||||
"\n",
|
||||
"# Initialisation du modèle GradientBoostingClassifier\n",
|
||||
"gb = GradientBoostingClassifier(random_state=42)\n"
|
||||
"gb = GradientBoostingClassifier(random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -52567,7 +52579,7 @@
|
||||
"print(\"Meilleurs hyperparamètres : \", best_params)\n",
|
||||
"\n",
|
||||
"# Initialiser un modèle avec les meilleurs hyperparamètres\n",
|
||||
"best_gbc = GradientBoostingClassifier(random_state=42, **best_params)\n"
|
||||
"best_gbc = GradientBoostingClassifier(random_state=42, **best_params)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user