mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 15:54:13 +01:00
Ajout de l'exécution de cellules pour le One Hot Encoding, la normalisation des variables numériques et la séparation des données en ensembles d'apprentissage et de test.
This commit is contained in:
@@ -2104,7 +2104,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Visualiser avec Plotly\n",
|
"# Visualiser avec Plotly\n",
|
||||||
"fig = px.imshow(correlation_cat,\n",
|
"fig = px.imshow(correlation_cat,\n",
|
||||||
" text_auto='.2f',\n",
|
" text_auto='.2f', # type: ignore\n",
|
||||||
" aspect=\"auto\",\n",
|
" aspect=\"auto\",\n",
|
||||||
" color_continuous_scale='RdBu_r',\n",
|
" color_continuous_scale='RdBu_r',\n",
|
||||||
" title='Matrice de corrélation des variables catégorielles (V de Cramér)')\n",
|
" title='Matrice de corrélation des variables catégorielles (V de Cramér)')\n",
|
||||||
@@ -3066,11 +3066,16 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 38,
|
||||||
"id": "b8530717",
|
"id": "b8530717",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": [
|
||||||
|
"encoder = preproc.OneHotEncoder(sparse_output=False, drop='first')\n",
|
||||||
|
"encoder.fit(vars_categorielles)\n",
|
||||||
|
"vars_categorielles_enc = encoder.transform(vars_categorielles)\n",
|
||||||
|
"vars_categorielles_enc = pd.DataFrame(vars_categorielles_enc, columns=encoder.get_feature_names_out()) # type: ignore"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
@@ -3082,11 +3087,16 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 39,
|
||||||
"id": "4ff3847d",
|
"id": "4ff3847d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": [
|
||||||
|
"scaler = preproc.StandardScaler()\n",
|
||||||
|
"scaler.fit(vars_numeriques)\n",
|
||||||
|
"vars_numeriques_scaled = scaler.transform(vars_numeriques)\n",
|
||||||
|
"vars_numeriques_scaled = pd.DataFrame(vars_numeriques_scaled, columns=vars_numeriques.columns)"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
@@ -3106,11 +3116,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 40,
|
||||||
"id": "6a1c7907",
|
"id": "6a1c7907",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": [
|
||||||
|
"train, test = train_test_split(data_model, test_size=0.2, random_state=42)"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
|||||||
Reference in New Issue
Block a user