Ajout de l'exécution de cellules pour le One Hot Encoding, la normalisation des variables numériques et la séparation des données en ensembles d'apprentissage et de test.

This commit is contained in:
2025-10-13 18:24:13 +02:00
parent d5ac1ff381
commit af98a95bea

View File

@@ -2104,7 +2104,7 @@
"\n",
"# Visualiser avec Plotly\n",
"fig = px.imshow(correlation_cat,\n",
" text_auto='.2f',\n",
" text_auto='.2f', # type: ignore\n",
" aspect=\"auto\",\n",
" color_continuous_scale='RdBu_r',\n",
" title='Matrice de corrélation des variables catégorielles (V de Cramér)')\n",
@@ -3066,11 +3066,16 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 38,
"id": "b8530717",
"metadata": {},
"outputs": [],
"source": []
"source": [
"encoder = preproc.OneHotEncoder(sparse_output=False, drop='first')\n",
"encoder.fit(vars_categorielles)\n",
"vars_categorielles_enc = encoder.transform(vars_categorielles)\n",
"vars_categorielles_enc = pd.DataFrame(vars_categorielles_enc, columns=encoder.get_feature_names_out()) # type: ignore"
]
},
{
"cell_type": "markdown",
@@ -3082,11 +3087,16 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 39,
"id": "4ff3847d",
"metadata": {},
"outputs": [],
"source": []
"source": [
"scaler = preproc.StandardScaler()\n",
"scaler.fit(vars_numeriques)\n",
"vars_numeriques_scaled = scaler.transform(vars_numeriques)\n",
"vars_numeriques_scaled = pd.DataFrame(vars_numeriques_scaled, columns=vars_numeriques.columns)"
]
},
{
"cell_type": "markdown",
@@ -3106,11 +3116,13 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 40,
"id": "6a1c7907",
"metadata": {},
"outputs": [],
"source": []
"source": [
"train, test = train_test_split(data_model, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "markdown",