From af98a95beabc2411a44d9c74ff4b0eada8bb0b47 Mon Sep 17 00:00:00 2001 From: Arthur DANJOU Date: Mon, 13 Oct 2025 18:24:13 +0200 Subject: [PATCH] =?UTF-8?q?Ajout=20de=20l'ex=C3=A9cution=20de=20cellules?= =?UTF-8?q?=20pour=20le=20One=20Hot=20Encoding,=20la=20normalisation=20des?= =?UTF-8?q?=20variables=20num=C3=A9riques=20et=20la=20s=C3=A9paration=20de?= =?UTF-8?q?s=20donn=C3=A9es=20en=20ensembles=20d'apprentissage=20et=20de?= =?UTF-8?q?=20test.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../TP_3/2025_TP_3_M2_ISF.ipynb | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb b/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb index e894875..ea17293 100644 --- a/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb +++ b/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb @@ -2104,7 +2104,7 @@ "\n", "# Visualiser avec Plotly\n", "fig = px.imshow(correlation_cat,\n", - " text_auto='.2f',\n", + " text_auto='.2f', # type: ignore\n", " aspect=\"auto\",\n", " color_continuous_scale='RdBu_r',\n", " title='Matrice de corrélation des variables catégorielles (V de Cramér)')\n", @@ -3066,11 +3066,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "b8530717", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "encoder = preproc.OneHotEncoder(sparse_output=False, drop='first')\n", + "encoder.fit(vars_categorielles)\n", + "vars_categorielles_enc = encoder.transform(vars_categorielles)\n", + "vars_categorielles_enc = pd.DataFrame(vars_categorielles_enc, columns=encoder.get_feature_names_out()) # type: ignore" + ] }, { "cell_type": "markdown", @@ -3082,11 +3087,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "id": "4ff3847d", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "scaler = preproc.StandardScaler()\n", + "scaler.fit(vars_numeriques)\n", + "vars_numeriques_scaled = scaler.transform(vars_numeriques)\n", + "vars_numeriques_scaled = pd.DataFrame(vars_numeriques_scaled, columns=vars_numeriques.columns)" + ] }, { "cell_type": "markdown", @@ -3106,11 +3116,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "6a1c7907", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "train, test = train_test_split(data_model, test_size=0.2, random_state=42)" + ] }, { "cell_type": "markdown",