diff --git a/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb b/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb index e894875..ea17293 100644 --- a/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb +++ b/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb @@ -2104,7 +2104,7 @@ "\n", "# Visualiser avec Plotly\n", "fig = px.imshow(correlation_cat,\n", - " text_auto='.2f',\n", + " text_auto='.2f', # type: ignore\n", " aspect=\"auto\",\n", " color_continuous_scale='RdBu_r',\n", " title='Matrice de corrélation des variables catégorielles (V de Cramér)')\n", @@ -3066,11 +3066,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "b8530717", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "encoder = preproc.OneHotEncoder(sparse_output=False, drop='first')\n", + "encoder.fit(vars_categorielles)\n", + "vars_categorielles_enc = encoder.transform(vars_categorielles)\n", + "vars_categorielles_enc = pd.DataFrame(vars_categorielles_enc, columns=encoder.get_feature_names_out()) # type: ignore" + ] }, { "cell_type": "markdown", @@ -3082,11 +3087,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "id": "4ff3847d", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "scaler = preproc.StandardScaler()\n", + "scaler.fit(vars_numeriques)\n", + "vars_numeriques_scaled = scaler.transform(vars_numeriques)\n", + "vars_numeriques_scaled = pd.DataFrame(vars_numeriques_scaled, columns=vars_numeriques.columns)" + ] }, { "cell_type": "markdown", @@ -3106,11 +3116,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "6a1c7907", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "train, test = train_test_split(data_model, test_size=0.2, random_state=42)" + ] }, { "cell_type": "markdown",