Ajout de l'exécution de cellules pour le One Hot Encoding, la normalisation des variables numériques et la séparation des données en ensembles d'apprentissage et de test.

This commit is contained in:
2025-10-13 18:24:13 +02:00
parent a63b1bf94c
commit 1ccdcb3803

View File

@@ -2104,7 +2104,7 @@
"\n", "\n",
"# Visualiser avec Plotly\n", "# Visualiser avec Plotly\n",
"fig = px.imshow(correlation_cat,\n", "fig = px.imshow(correlation_cat,\n",
" text_auto='.2f',\n", " text_auto='.2f', # type: ignore\n",
" aspect=\"auto\",\n", " aspect=\"auto\",\n",
" color_continuous_scale='RdBu_r',\n", " color_continuous_scale='RdBu_r',\n",
" title='Matrice de corrélation des variables catégorielles (V de Cramér)')\n", " title='Matrice de corrélation des variables catégorielles (V de Cramér)')\n",
@@ -3066,11 +3066,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 38,
"id": "b8530717", "id": "b8530717",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"encoder = preproc.OneHotEncoder(sparse_output=False, drop='first')\n",
"encoder.fit(vars_categorielles)\n",
"vars_categorielles_enc = encoder.transform(vars_categorielles)\n",
"vars_categorielles_enc = pd.DataFrame(vars_categorielles_enc, columns=encoder.get_feature_names_out()) # type: ignore"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@@ -3082,11 +3087,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 39,
"id": "4ff3847d", "id": "4ff3847d",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"scaler = preproc.StandardScaler()\n",
"scaler.fit(vars_numeriques)\n",
"vars_numeriques_scaled = scaler.transform(vars_numeriques)\n",
"vars_numeriques_scaled = pd.DataFrame(vars_numeriques_scaled, columns=vars_numeriques.columns)"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@@ -3106,11 +3116,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 40,
"id": "6a1c7907", "id": "6a1c7907",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"train, test = train_test_split(data_model, test_size=0.2, random_state=42)"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",