Arthur and Moritz changing on cross val

This commit is contained in:
AntoninDurousseau
2025-06-06 12:28:25 +02:00
parent fa0b16b9d5
commit 6b90a4b6d5
2 changed files with 115 additions and 110 deletions

File diff suppressed because one or more lines are too long

View File

@@ -191,9 +191,45 @@
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"train test split and scaling of the features "
]
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import f1_score, classification_report\n",
"import tensorflow as tf\n",
"import numpy as np\n",
"\n",
"# Splitting the dataset into training and testing sets\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"\n",
"# Scaling the features\n",
"scaler = StandardScaler()\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
"X_test_scaled = scaler.transform(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cross validation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -297,14 +333,9 @@
" verbose=1\n",
")\n",
"\n",
"for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):\n",
" X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]\n",
" y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]\n",
"\n",
" # Standardisation\n",
" scaler = StandardScaler()\n",
" X_train_scaled = scaler.fit_transform(X_train)\n",
" X_val_scaled = scaler.transform(X_val)\n",
"for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_scaled, y_train), 1):\n",
" X_cv_train, X_cv_val = X_train.iloc[train_idx], X_train.iloc[val_idx]\n",
" y_cv_train, y_cv_val = y_train.iloc[train_idx], y_train.iloc[val_idx]\n",
" \n",
" model = build_model()\n",
"\n",
@@ -319,10 +350,10 @@
"\n",
" # Entraînement\n",
" history = model.fit(\n",
" X_train_scaled, y_train,\n",
" X_cv_train, y_cv_train,\n",
" epochs=50,\n",
" batch_size=8,\n",
" validation_data=(X_val_scaled, y_val),\n",
" validation_data=(X_cv_val, y_cv_val),\n",
" callbacks=[callback],\n",
" verbose=0,\n",
" class_weight={0: 1.0, 1: 2.0}\n",
@@ -331,8 +362,8 @@
" histories.append(history.history)\n",
"\n",
" # Prédiction & F1\n",
" y_pred_val = (model.predict(X_val_scaled) > 0.5).astype(int)\n",
" score = f1_score(y_val, y_pred_val)\n",
" y_pred_val = (model.predict(X_cv_val) > 0.5).astype(int)\n",
" score = f1_score(y_cv_val, y_pred_val)\n",
" f1_scores.append(score)\n",
" print(f\"Fold {fold} - F1-score : {score:.4f}\")\n",
"\n",
@@ -400,7 +431,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -430,18 +461,6 @@
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import f1_score, classification_report\n",
"import tensorflow as tf\n",
"import numpy as np\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"\n",
"scaler = StandardScaler()\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
"X_test_scaled = scaler.transform(X_test)\n",
"\n",
"model = build_model()\n",
"\n",
"model.compile(\n",