Refactor code formatting and improve readability in Jupyter notebooks for TP_4 and TP_5

- Adjusted indentation and line breaks for better clarity in function definitions and import statements. - Standardized string quotes for consistency across the codebase. - Enhanced readability of DataFrame creation and manipulation by breaking long lines into multiple lines. - Cleaned up print statements and comments for improved understanding. - Ensured consistent use of whitespace around operators and after commas.
2026-02-02 21:31:32 +01:00 · 2025-11-25 10:46:16 +01:00
parent 751412c1cd
commit e57995ba85
17 changed files with 11975 additions and 11713 deletions
--- a/Learning/TP_1/2025_TP_1_M2_ISF.ipynb
+++ b/Learning/TP_1/2025_TP_1_M2_ISF.ipynb
@@ -60,11 +60,11 @@
    }
   ],
   "source": [
-    "print(3+4)\n",
-    "print(5*2.5)\n",
+    "print(3 + 4)\n",
+    "print(5 * 2.5)\n",
    "print(int)\n",
    "print(float)\n",
-    "a=2**90\n",
+    "a = 2**90\n",
    "print(a)\n",
    "print(type(a))"
   ]
@@ -90,11 +90,11 @@
    "\n",
    "import pandas as pd\n",
    "\n",
-    "#from math import *\n",
-    "#help(math)\n",
-    "print (math.sqrt(3))\n",
-    "print (math.floor(3.2))\n",
-    "print (math.cos(math.pi/3.0))"
+    "# from math import *\n",
+    "# help(math)\n",
+    "print(math.sqrt(3))\n",
+    "print(math.floor(3.2))\n",
+    "print(math.cos(math.pi / 3.0))"
   ]
  },
  {
@@ -121,11 +121,11 @@
    }
   ],
   "source": [
-    "print('33' + \"42\")\n",
-    "a=\"toto est toto\"\n",
+    "print(\"33\" + \"42\")\n",
+    "a = \"toto est toto\"\n",
    "a[4:]\n",
-    "#Tout est objet\n",
-    "print(a.split(' '))"
+    "# Tout est objet\n",
+    "print(a.split(\" \"))"
   ]
  },
  {
@@ -143,7 +143,9 @@
    }
   ],
   "source": [
-    "salutation = \"Bonjour, monsieur {}. Comment allez vous en ce {}?\".format(\"XX\", \"Mardi 19 septembre\")\n",
+    "salutation = \"Bonjour, monsieur {}. Comment allez vous en ce {}?\".format(\n",
+    "    \"XX\", \"Mardi 19 septembre\"\n",
+    ")\n",
    "print(salutation)"
   ]
  },
@@ -195,12 +197,12 @@
    }
   ],
   "source": [
-    "a = [1,'q',2,3,5,8,'TOTO']\n",
-    "print (a[1])\n",
-    "print (a[-1])\n",
-    "print (a[1:3])\n",
-    "print (a[-4:-2])\n",
-    "print([5]*4)"
+    "a = [1, \"q\", 2, 3, 5, 8, \"TOTO\"]\n",
+    "print(a[1])\n",
+    "print(a[-1])\n",
+    "print(a[1:3])\n",
+    "print(a[-4:-2])\n",
+    "print([5] * 4)"
   ]
  },
  {
@@ -221,10 +223,10 @@
    }
   ],
   "source": [
-    "print (range(4, 10))\n",
-    "print (range(5, 50, 3))\n",
-    "print ([3,1,4] + [1,5,9])\n",
-    "print (len(range(4, 10)))"
+    "print(range(4, 10))\n",
+    "print(range(5, 50, 3))\n",
+    "print([3, 1, 4] + [1, 5, 9])\n",
+    "print(len(range(4, 10)))"
   ]
  },
  {
@@ -258,28 +260,28 @@
   "source": [
    "a = 2\n",
    "if 5 > a:\n",
-    "    print (\"Cinq!\")\n",
+    "    print(\"Cinq!\")\n",
    "else:\n",
-    "    print (\"a!\")\n",
+    "    print(\"a!\")\n",
    "\n",
-    "#Zoom sur indentation\n",
-    "num=3\n",
+    "# Zoom sur indentation\n",
+    "num = 3\n",
    "if num == 1:\n",
-    "    print (\"C'est 1\")\n",
+    "    print(\"C'est 1\")\n",
    "elif num == 2:\n",
-    "    print (\"C'est 2\")\n",
+    "    print(\"C'est 2\")\n",
    "elif num == 3:\n",
-    "    print (\"C'est 3\")\n",
-    "else :\n",
-    "    print (\"Autre que 1, 2 et 3\")\n",
+    "    print(\"C'est 3\")\n",
+    "else:\n",
+    "    print(\"Autre que 1, 2 et 3\")\n",
    "\n",
-    "print(num in [1,2,3])\n",
-    "print(num not in [1,2,3])\n",
+    "print(num in [1, 2, 3])\n",
+    "print(num not in [1, 2, 3])\n",
    "print(num != 5)\n",
    "\n",
-    "print(num in [1,2,3] and num >0)\n",
+    "print(num in [1, 2, 3] and num > 0)\n",
    "\n",
-    "print(not 5==3)"
+    "print(not 5 == 3)"
   ]
  },
  {
@@ -313,21 +315,21 @@
    }
   ],
   "source": [
-    "a=[3,4,5]\n",
-    "#Boucle for\n",
+    "a = [3, 4, 5]\n",
+    "# Boucle for\n",
    "for i in range(len(a)):\n",
    "    a[i] += 3\n",
-    "print (a)\n",
+    "print(a)\n",
    "%timeit for i in range(len(a)):a[i] += 3\n",
-    "print (a)\n",
+    "print(a)\n",
    "\n",
-    "b=[3,4,5]\n",
-    "print([i+3 for i in b])\n",
+    "b = [3, 4, 5]\n",
+    "print([i + 3 for i in b])\n",
    "%timeit [i+3 for i in b]\n",
-    "print (b)\n",
+    "print(b)\n",
    "\n",
-    "for i,item in enumerate(b):\n",
-    "    print(i,item)"
+    "for i, item in enumerate(b):\n",
+    "    print(i, item)"
   ]
  },
  {
@@ -369,18 +371,18 @@
    }
   ],
   "source": [
-    "#Boucle While , break et continue\n",
+    "# Boucle While , break et continue\n",
    "print(\"démarrage boucle\")\n",
-    "i=0\n",
-    "while i<100:\n",
+    "i = 0\n",
+    "while i < 100:\n",
    "    print(i)\n",
-    "    i+=1\n",
-    "    if i==52:\n",
-    "        i+=10\n",
+    "    i += 1\n",
+    "    if i == 52:\n",
+    "        i += 10\n",
    "        print(\"increment de 10\")\n",
    "        continue\n",
-    "    i+=2\n",
-    "    if i ==77:\n",
+    "    i += 2\n",
+    "    if i == 77:\n",
    "        break"
   ]
  },
@@ -408,8 +410,10 @@
   ],
   "source": [
    "def square(x):\n",
-    "    return x*x\n",
-    "print (square(3))"
+    "    return x * x\n",
+    "\n",
+    "\n",
+    "print(square(3))"
   ]
  },
  {
@@ -486,14 +490,14 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
+    "# Ecrivez votre code ici\n",
    "\n",
-    "serie = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5})\n",
+    "serie = pd.Series({\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 4, \"e\": 5})\n",
    "\n",
    "print(serie)\n",
    "print(serie.index)\n",
    "print(serie.mean())\n",
-    "print(serie['b'])\n",
+    "print(serie[\"b\"])\n",
    "print(serie.b)"
   ]
  },
@@ -594,11 +598,8 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
-    "df = pd.DataFrame({\n",
-    "    's': s,\n",
-    "    't': t\n",
-    "})\n",
+    "# Ecrivez votre code ici\n",
+    "df = pd.DataFrame({\"s\": s, \"t\": t})\n",
    "\n",
    "print(df)"
   ]
@@ -633,8 +634,8 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
-    "df['SUM'] = df['s'] + df['t']\n",
+    "# Ecrivez votre code ici\n",
+    "df[\"SUM\"] = df[\"s\"] + df[\"t\"]\n",
    "\n",
    "print(df)"
   ]
@@ -662,8 +663,8 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
-    "print(df['SUM'].mean())"
+    "# Ecrivez votre code ici\n",
+    "print(df[\"SUM\"].mean())"
   ]
  },
  {
@@ -774,7 +775,7 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
+    "# Ecrivez votre code ici\n",
    "import numpy as np\n",
    "\n",
    "X = np.random.standard_normal(size=250)\n",
@@ -809,7 +810,7 @@
    }
   ],
   "source": [
-    "index = pd.date_range('2012-01-01', periods=250, freq='D')\n",
+    "index = pd.date_range(\"2012-01-01\", periods=250, freq=\"D\")\n",
    "\n",
    "serie = pd.Series(X, index=index)\n",
    "print(serie)"
@@ -872,7 +873,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "#Data frame\n",
+    "# Data frame\n",
    "import pandas as pd\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as gp\n",
@@ -923,8 +924,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "path = input_path + '/base_modelisation.csv'\n",
-    "data_set = pd.read_csv(path,sep=\";\",decimal=\",\")"
+    "path = input_path + \"/base_modelisation.csv\"\n",
+    "data_set = pd.read_csv(path, sep=\";\", decimal=\",\")"
   ]
  },
  {
@@ -2413,7 +2414,7 @@
    }
   ],
   "source": [
-    "#Dimensions\n",
+    "# Dimensions\n",
    "data_set.shape"
   ]
  },
@@ -2568,7 +2569,7 @@
    }
   ],
   "source": [
-    "#Liste des colonnes selon leur type\n",
+    "# Liste des colonnes selon leur type\n",
    "data_set.dtypes"
   ]
  },
@@ -2600,13 +2601,13 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
+    "# Ecrivez votre code ici\n",
    "quantitatives = []\n",
    "categorielles = []\n",
    "binaires = []\n",
    "\n",
    "for col in data_set.columns:\n",
-    "    if data_set[col].dtype in ['int64', 'float64']:\n",
+    "    if data_set[col].dtype in [\"int64\", \"float64\"]:\n",
    "        if len(data_set[col].dropna().unique()) == 2:\n",
    "            binaires.append(col)\n",
    "        else:\n",
@@ -2619,7 +2620,7 @@
    "\n",
    "print(\"Variables quantitatives :\", quantitatives)\n",
    "print(\"\\nVariables catégorielles :\", categorielles)\n",
-    "print(\"\\nVariables binaires :\", binaires)\n"
+    "print(\"\\nVariables binaires :\", binaires)"
   ]
  },
  {
@@ -2638,7 +2639,7 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
+    "# Ecrivez votre code ici\n",
    "variables_avec_na = []\n",
    "\n",
    "for col in data_set.columns:\n",
@@ -3524,8 +3525,10 @@
    }
   ],
   "source": [
-    "fig = px.histogram(data_set.sort_values('ANNEE_CTR'), x=\"ANNEE_CTR\")\n",
-    "fig.update_xaxes(type='category') #Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
+    "fig = px.histogram(data_set.sort_values(\"ANNEE_CTR\"), x=\"ANNEE_CTR\")\n",
+    "fig.update_xaxes(\n",
+    "    type=\"category\"\n",
+    ")  # Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
    "\n",
    "fig.show()"
   ]
@@ -18655,8 +18658,13 @@
    }
   ],
   "source": [
-    "fig = px.histogram(data_set, x=\"CONTRAT_ANCIENNETE\",\n",
-    "                category_orders={'CONTRAT_ANCIENNETE': ['(-1,0]','(0,1]',\"(1,2]\",\"(2,5]\",\"(5,10]\"]})\n",
+    "fig = px.histogram(\n",
+    "    data_set,\n",
+    "    x=\"CONTRAT_ANCIENNETE\",\n",
+    "    category_orders={\n",
+    "        \"CONTRAT_ANCIENNETE\": [\"(-1,0]\", \"(0,1]\", \"(1,2]\", \"(2,5]\", \"(5,10]\"]\n",
+    "    },\n",
+    ")\n",
    "fig.show()"
   ]
  },
@@ -48890,8 +48898,13 @@
    }
   ],
   "source": [
-    "fig = px.histogram(data_set, x=\"GROUPE_KM\",\n",
-    "                category_orders={'GROUPE_KM': [\"[0;20000[\",\"[20000;40000[\",\"[40000;60000[\",\"[60000;99999[\"]})\n",
+    "fig = px.histogram(\n",
+    "    data_set,\n",
+    "    x=\"GROUPE_KM\",\n",
+    "    category_orders={\n",
+    "        \"GROUPE_KM\": [\"[0;20000[\", \"[20000;40000[\", \"[40000;60000[\", \"[60000;99999[\"]\n",
+    "    },\n",
+    ")\n",
    "fig.show()"
   ]
  },
@@ -64005,9 +64018,11 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
-    "fig = px.histogram(data_set.sort_values('ZONE_RISQUE'), x=\"ZONE_RISQUE\")\n",
-    "fig.update_xaxes(type='category') #Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
+    "# Ecrivez votre code ici\n",
+    "fig = px.histogram(data_set.sort_values(\"ZONE_RISQUE\"), x=\"ZONE_RISQUE\")\n",
+    "fig.update_xaxes(\n",
+    "    type=\"category\"\n",
+    ")  # Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
    "\n",
    "fig.show()"
   ]
@@ -64860,9 +64875,13 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
-    "fig = px.histogram(data_set.sort_values('AGE_ASSURE_PRINCIPAL'), x=\"AGE_ASSURE_PRINCIPAL\")\n",
-    "fig.update_xaxes(type='category') #Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
+    "# Ecrivez votre code ici\n",
+    "fig = px.histogram(\n",
+    "    data_set.sort_values(\"AGE_ASSURE_PRINCIPAL\"), x=\"AGE_ASSURE_PRINCIPAL\"\n",
+    ")\n",
+    "fig.update_xaxes(\n",
+    "    type=\"category\"\n",
+    ")  # Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
    "\n",
    "fig.show()"
   ]
@@ -79977,9 +79996,11 @@
    }
   ],
   "source": [
-    "#Ecrivez votre code ici\n",
-    "fig = px.histogram(data_set.sort_values('GENRE'), x=\"GENRE\")\n",
-    "fig.update_xaxes(type='category') #Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
+    "# Ecrivez votre code ici\n",
+    "fig = px.histogram(data_set.sort_values(\"GENRE\"), x=\"GENRE\")\n",
+    "fig.update_xaxes(\n",
+    "    type=\"category\"\n",
+    ")  # Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
    "\n",
    "fig.show()"
   ]
@@ -80010,14 +80031,19 @@
    }
   ],
   "source": [
-    "#Préparation des données : compter le nombre de femmes et hommes par âge\n",
-    "tmp = data_set[[\"AGE_ASSURE_PRINCIPAL\",\"GENRE\"]].value_counts().to_frame('counts').reset_index()\n",
+    "# Préparation des données : compter le nombre de femmes et hommes par âge\n",
+    "tmp = (\n",
+    "    data_set[[\"AGE_ASSURE_PRINCIPAL\", \"GENRE\"]]\n",
+    "    .value_counts()\n",
+    "    .to_frame(\"counts\")\n",
+    "    .reset_index()\n",
+    ")\n",
    "data_f = tmp[tmp[\"GENRE\"] == \"F\"]\n",
    "data_h = tmp[tmp[\"GENRE\"] == \"M\"]\n",
    "\n",
-    "#Comparaison des âges\n",
-    "list_1=sorted(data_f[\"AGE_ASSURE_PRINCIPAL\"].unique())\n",
-    "list_2=sorted(data_h[\"AGE_ASSURE_PRINCIPAL\"].unique())\n",
+    "# Comparaison des âges\n",
+    "list_1 = sorted(data_f[\"AGE_ASSURE_PRINCIPAL\"].unique())\n",
+    "list_2 = sorted(data_h[\"AGE_ASSURE_PRINCIPAL\"].unique())\n",
    "\n",
    "print(\"Eléments dans la liste 2 mais pas dans la liste 1 \")\n",
    "print(list(set(list_2) - set(list_1)))\n",
@@ -80034,11 +80060,19 @@
   "outputs": [],
   "source": [
    "# Il faut ajouter l'âge 13 dans la liste 2 (data_h)\n",
-    "data_h = pd.concat([data_h, pd.DataFrame([[13, \"M\",0]],columns=['AGE_ASSURE_PRINCIPAL', 'GENRE','counts'])], ignore_index=True)\n",
+    "data_h = pd.concat(\n",
+    "    [\n",
+    "        data_h,\n",
+    "        pd.DataFrame(\n",
+    "            [[13, \"M\", 0]], columns=[\"AGE_ASSURE_PRINCIPAL\", \"GENRE\", \"counts\"]\n",
+    "        ),\n",
+    "    ],\n",
+    "    ignore_index=True,\n",
+    ")\n",
    "\n",
-    "#On ordonne les dataframes\n",
-    "data_h = data_h.sort_values('AGE_ASSURE_PRINCIPAL', ascending = True)\n",
-    "data_f = data_f.sort_values('AGE_ASSURE_PRINCIPAL', ascending = True)"
+    "# On ordonne les dataframes\n",
+    "data_h = data_h.sort_values(\"AGE_ASSURE_PRINCIPAL\", ascending=True)\n",
+    "data_f = data_f.sort_values(\"AGE_ASSURE_PRINCIPAL\", ascending=True)"
   ]
  },
  {
@@ -80049,9 +80083,9 @@
   "outputs": [],
   "source": [
    "# Graphique\n",
-    "y_age = data_h['AGE_ASSURE_PRINCIPAL']\n",
-    "x_M = data_h['counts']\n",
-    "x_F = data_f['counts'] * -1"
+    "y_age = data_h[\"AGE_ASSURE_PRINCIPAL\"]\n",
+    "x_M = data_h[\"counts\"]\n",
+    "x_F = data_f[\"counts\"] * -1"
   ]
  },
  {
@@ -80907,23 +80941,21 @@
    "fig = gp.Figure()\n",
    "\n",
    "# Ajout des données H\n",
-    "fig.add_trace(gp.Bar(y= y_age, x = x_M,\n",
-    "                     name = 'Hommes',\n",
-    "                     orientation = 'h'))\n",
+    "fig.add_trace(gp.Bar(y=y_age, x=x_M, name=\"Hommes\", orientation=\"h\"))\n",
    "\n",
    "# Ajout des données F\n",
-    "fig.add_trace(gp.Bar(y = y_age, x = x_F,\n",
-    "                     name = 'Femmes', orientation = 'h'))\n",
+    "fig.add_trace(gp.Bar(y=y_age, x=x_F, name=\"Femmes\", orientation=\"h\"))\n",
    "\n",
    "# layout du graphique\n",
-    "fig.update_layout(title = 'Population du portefeuille',\n",
-    "                 title_font_size = 22, barmode = 'relative',\n",
-    "                 bargap = 0.0, bargroupgap = 0,\n",
-    "                 xaxis = {'title': 'Count',\n",
-    "                              'title_font_size': 14},\n",
-    "                  yaxis = {'title': 'Age',\n",
-    "                              'title_font_size': 14}\n",
-    "                 )\n",
+    "fig.update_layout(\n",
+    "    title=\"Population du portefeuille\",\n",
+    "    title_font_size=22,\n",
+    "    barmode=\"relative\",\n",
+    "    bargap=0.0,\n",
+    "    bargroupgap=0,\n",
+    "    xaxis={\"title\": \"Count\", \"title_font_size\": 14},\n",
+    "    yaxis={\"title\": \"Age\", \"title_font_size\": 14},\n",
+    ")\n",
    "\n",
    "fig.show()"
   ]
@@ -81795,7 +81827,7 @@
    }
   ],
   "source": [
-    "fig = px.histogram(data_set[data_set['CHARGE'] >= 0], x=\"CHARGE\", nbins=50)\n",
+    "fig = px.histogram(data_set[data_set[\"CHARGE\"] >= 0], x=\"CHARGE\", nbins=50)\n",
    "fig.update_layout(title=\"Distribution des coûts des sinistres\")\n",
    "fig.show()"
   ]
@@ -81859,8 +81891,8 @@
    "# Nombre de NA par variable\n",
    "nan_count = pd.DataFrame(data_set.isna().sum(), columns=[\"Nombre_NA\"])\n",
    "\n",
-    "#Ajout du % par rapport à la taille de la BD\n",
-    "nan_count[\"Pourcentage\"]= nan_count.divide(data_set.shape[0])*100\n",
+    "# Ajout du % par rapport à la taille de la BD\n",
+    "nan_count[\"Pourcentage\"] = nan_count.divide(data_set.shape[0]) * 100\n",
    "\n",
    "print(nan_count)"
   ]
@@ -82271,7 +82303,7 @@
    "data_retraitee = data_set\n",
    "\n",
    "# Option 1 : Suppression des variables avec trop de NA (PUISSANCE_VEHICULE)\n",
-    "data_retraitee = data_retraitee.drop(\"PUISSANCE_VEHICULE\", axis='columns')\n",
+    "data_retraitee = data_retraitee.drop(\"PUISSANCE_VEHICULE\", axis=\"columns\")\n",
    "data_retraitee.head()"
   ]
  },
@@ -82293,14 +82325,22 @@
    }
   ],
   "source": [
-    "#Option 2 : Remplacer par la classe la plus représentée/valeur moyenne (GROUPE_KM,GENRE,\n",
+    "# Option 2 : Remplacer par la classe la plus représentée/valeur moyenne (GROUPE_KM,GENRE,\n",
    "# ANNEE_CONSTRUCTION,VALEUR_DU_BIEN,DEUXIEME_CONDUCTEUR)\n",
    "\n",
-    "data_retraitee[\"GROUPE_KM\"] = data_retraitee[\"GROUPE_KM\"].fillna(data_retraitee[\"GROUPE_KM\"].mode()[0])\n",
+    "data_retraitee[\"GROUPE_KM\"] = data_retraitee[\"GROUPE_KM\"].fillna(\n",
+    "    data_retraitee[\"GROUPE_KM\"].mode()[0]\n",
+    ")\n",
    "data_retraitee[\"GENRE\"] = data_retraitee[\"GENRE\"].fillna(\"M\")\n",
-    "data_retraitee[\"ANNEE_CONSTRUCTION\"] = data_retraitee[\"ANNEE_CONSTRUCTION\"].fillna(data_retraitee[\"ANNEE_CONSTRUCTION\"].median())\n",
-    "data_retraitee[\"VALEUR_DU_BIEN\"] = data_retraitee[\"VALEUR_DU_BIEN\"].fillna(data_retraitee[\"VALEUR_DU_BIEN\"].mode()[0])\n",
-    "data_retraitee[\"DEUXIEME_CONDUCTEUR\"] = data_retraitee[\"DEUXIEME_CONDUCTEUR\"].fillna(False)"
+    "data_retraitee[\"ANNEE_CONSTRUCTION\"] = data_retraitee[\"ANNEE_CONSTRUCTION\"].fillna(\n",
+    "    data_retraitee[\"ANNEE_CONSTRUCTION\"].median()\n",
+    ")\n",
+    "data_retraitee[\"VALEUR_DU_BIEN\"] = data_retraitee[\"VALEUR_DU_BIEN\"].fillna(\n",
+    "    data_retraitee[\"VALEUR_DU_BIEN\"].mode()[0]\n",
+    ")\n",
+    "data_retraitee[\"DEUXIEME_CONDUCTEUR\"] = data_retraitee[\"DEUXIEME_CONDUCTEUR\"].fillna(\n",
+    "    False\n",
+    ")"
   ]
  },
  {
@@ -82310,11 +82350,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "#Option 3 : Remplacer par une valeur prudente (ZONE_RISQUE,DEUXIEME_CONDUCTEUR)\n",
+    "# Option 3 : Remplacer par une valeur prudente (ZONE_RISQUE,DEUXIEME_CONDUCTEUR)\n",
    "\n",
-    "#Remplacer par la zone avec le plus de sinistres\n",
-    "zone_plus_sinsitree = data_retraitee[[\"ZONE_RISQUE\", \"NB\"]].groupby([\"ZONE_RISQUE\"]).sum()\n",
-    "zone_plus_sinsitree.sort_values(\"NB\",ascending = False)\n",
+    "# Remplacer par la zone avec le plus de sinistres\n",
+    "zone_plus_sinsitree = (\n",
+    "    data_retraitee[[\"ZONE_RISQUE\", \"NB\"]].groupby([\"ZONE_RISQUE\"]).sum()\n",
+    ")\n",
+    "zone_plus_sinsitree.sort_values(\"NB\", ascending=False)\n",
    "\n",
    "data_retraitee[\"ZONE_RISQUE\"] = data_retraitee[\"ZONE_RISQUE\"].fillna(\"C\")"
   ]
@@ -82781,7 +82823,7 @@
    }
   ],
   "source": [
-    "data_retraitee.to_csv(\"./2_outputs/base_retraitee.csv\", index =  False)"
+    "data_retraitee.to_csv(\"./2_outputs/base_retraitee.csv\", index=False)"
   ]
  },
  {
@@ -82809,11 +82851,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "#Calcul de la fréquence\n",
+    "# Calcul de la fréquence\n",
    "data_retraitee[\"FREQ\"] = data_retraitee[\"NB\"] / data_retraitee[\"EXPO\"]\n",
    "data_retraitee[\"FREQ\"] = data_retraitee[\"FREQ\"].fillna(0)\n",
    "\n",
-    "#Calcul du coût moyen\n",
+    "# Calcul du coût moyen\n",
    "data_retraitee[\"CM\"] = data_retraitee[\"CHARGE\"] / data_retraitee[\"NB\"]\n",
    "data_retraitee[\"CM\"] = data_retraitee[\"CM\"].fillna(0)"
   ]
@@ -82846,11 +82888,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "#Agrégation selon la variable d'intérêt\n",
-    "plot_data = data_retraitee[[\"AGE_ASSURE_PRINCIPAL\", \"NB\",\"EXPO\"]]\n",
+    "# Agrégation selon la variable d'intérêt\n",
+    "plot_data = data_retraitee[[\"AGE_ASSURE_PRINCIPAL\", \"NB\", \"EXPO\"]]\n",
    "plot_data = plot_data.groupby([\"AGE_ASSURE_PRINCIPAL\"], as_index=False).sum()\n",
    "\n",
-    "#Calcul de la fréquence\n",
+    "# Calcul de la fréquence\n",
    "plot_data[\"FREQ\"] = plot_data[\"NB\"] / plot_data[\"EXPO\"]\n",
    "plot_data[\"FREQ\"] = plot_data[\"FREQ\"].fillna(0)"
   ]
@@ -83706,8 +83748,10 @@
    }
   ],
   "source": [
-    "#Représentation graphique\n",
-    "fig = px.line(plot_data, x=\"AGE_ASSURE_PRINCIPAL\", y=\"FREQ\", title=\"Sinistralité selon l'âge\")\n",
+    "# Représentation graphique\n",
+    "fig = px.line(\n",
+    "    plot_data, x=\"AGE_ASSURE_PRINCIPAL\", y=\"FREQ\", title=\"Sinistralité selon l'âge\"\n",
+    ")\n",
    "fig.show()"
   ]
  },
@@ -84567,16 +84611,16 @@
    }
   ],
   "source": [
-    "#Agrégation selon la variable d'intérêt\n",
-    "plot_data = data_retraitee[[\"GENRE\", \"NB\",\"EXPO\"]]\n",
+    "# Agrégation selon la variable d'intérêt\n",
+    "plot_data = data_retraitee[[\"GENRE\", \"NB\", \"EXPO\"]]\n",
    "plot_data = plot_data.groupby([\"GENRE\"], as_index=False).sum()\n",
    "\n",
-    "#Calcul de la fréquence\n",
+    "# Calcul de la fréquence\n",
    "plot_data[\"FREQ\"] = plot_data[\"NB\"] / plot_data[\"EXPO\"]\n",
    "plot_data[\"FREQ\"] = plot_data[\"FREQ\"].fillna(0)\n",
    "print(plot_data)\n",
    "\n",
-    "#Représentation graphique\n",
+    "# Représentation graphique\n",
    "fig = px.scatter(plot_data, x=\"GENRE\", y=\"FREQ\", title=\"Sinistralité selon le genre\")\n",
    "fig.show()"
   ]
@@ -84608,17 +84652,22 @@
    }
   ],
   "source": [
-    "#Agrégation selon la variable d'intérêt\n",
-    "plot_data = data_retraitee[[\"ZONE_RISQUE\", \"NB\",\"EXPO\"]]\n",
+    "# Agrégation selon la variable d'intérêt\n",
+    "plot_data = data_retraitee[[\"ZONE_RISQUE\", \"NB\", \"EXPO\"]]\n",
    "plot_data = plot_data.groupby([\"ZONE_RISQUE\"], as_index=False).sum()\n",
    "\n",
-    "#Calcul de la fréquence\n",
+    "# Calcul de la fréquence\n",
    "plot_data[\"FREQ\"] = plot_data[\"NB\"] / plot_data[\"EXPO\"]\n",
    "plot_data[\"FREQ\"] = plot_data[\"FREQ\"].fillna(0)\n",
    "print(plot_data)\n",
    "\n",
-    "#Représentation graphique\n",
-    "fig = px.scatter(plot_data, x=\"ZONE_RISQUE\", y=\"FREQ\", title=\"Sinistralité selon la zone géographique\")\n",
+    "# Représentation graphique\n",
+    "fig = px.scatter(\n",
+    "    plot_data,\n",
+    "    x=\"ZONE_RISQUE\",\n",
+    "    y=\"FREQ\",\n",
+    "    title=\"Sinistralité selon la zone géographique\",\n",
+    ")\n",
    "fig.show()"
   ]
  },
@@ -85479,17 +85528,19 @@
    }
   ],
   "source": [
-    "#Agrégation selon la variable d'intérêt\n",
-    "plot_data = data_retraitee[[\"ENERGIE\", \"NB\",\"EXPO\"]]\n",
+    "# Agrégation selon la variable d'intérêt\n",
+    "plot_data = data_retraitee[[\"ENERGIE\", \"NB\", \"EXPO\"]]\n",
    "plot_data = plot_data.groupby([\"ENERGIE\"], as_index=False).sum()\n",
    "\n",
-    "#Calcul de la fréquence\n",
+    "# Calcul de la fréquence\n",
    "plot_data[\"FREQ\"] = plot_data[\"NB\"] / plot_data[\"EXPO\"]\n",
    "plot_data[\"FREQ\"] = plot_data[\"FREQ\"].fillna(0)\n",
    "print(plot_data)\n",
    "\n",
-    "#Représentation graphique\n",
-    "fig = px.scatter(plot_data, x=\"ENERGIE\", y=\"FREQ\", title=\"Sinistralité selon le carburant\")\n",
+    "# Représentation graphique\n",
+    "fig = px.scatter(\n",
+    "    plot_data, x=\"ENERGIE\", y=\"FREQ\", title=\"Sinistralité selon le carburant\"\n",
+    ")\n",
    "fig.show()"
   ]
  },
@@ -86353,18 +86404,20 @@
    }
   ],
   "source": [
-    "#Agrégation selon la variable d'intérêt\n",
-    "plot_data = data_retraitee[[\"VALEUR_DU_BIEN\", \"CHARGE\",\"NB\"]]\n",
-    "plot_data= plot_data[plot_data['CHARGE'] > 0]\n",
+    "# Agrégation selon la variable d'intérêt\n",
+    "plot_data = data_retraitee[[\"VALEUR_DU_BIEN\", \"CHARGE\", \"NB\"]]\n",
+    "plot_data = plot_data[plot_data[\"CHARGE\"] > 0]\n",
    "plot_data = plot_data.groupby([\"VALEUR_DU_BIEN\"], as_index=False).sum()\n",
    "\n",
-    "#Calcul du CM\n",
+    "# Calcul du CM\n",
    "plot_data[\"CM\"] = plot_data[\"CHARGE\"] / plot_data[\"NB\"]\n",
    "plot_data[\"CM\"] = plot_data[\"CM\"].fillna(0)\n",
    "print(plot_data)\n",
    "\n",
-    "#Représentation graphique\n",
-    "fig = px.scatter(plot_data, x=\"VALEUR_DU_BIEN\", y=\"CM\", title=\"Coût moyen selon le prix\")\n",
+    "# Représentation graphique\n",
+    "fig = px.scatter(\n",
+    "    plot_data, x=\"VALEUR_DU_BIEN\", y=\"CM\", title=\"Coût moyen selon le prix\"\n",
+    ")\n",
    "fig.show()"
   ]
  },
@@ -87227,18 +87280,23 @@
    }
   ],
   "source": [
-    "#Agrégation selon la variable d'intérêt\n",
-    "plot_data = data_retraitee[[\"ANNEE_CONSTRUCTION\", \"CHARGE\",\"NB\"]]\n",
-    "plot_data= plot_data[plot_data['CHARGE'] > 0]\n",
+    "# Agrégation selon la variable d'intérêt\n",
+    "plot_data = data_retraitee[[\"ANNEE_CONSTRUCTION\", \"CHARGE\", \"NB\"]]\n",
+    "plot_data = plot_data[plot_data[\"CHARGE\"] > 0]\n",
    "plot_data = plot_data.groupby([\"ANNEE_CONSTRUCTION\"], as_index=False).sum()\n",
    "\n",
-    "#Calcul du CM\n",
+    "# Calcul du CM\n",
    "plot_data[\"CM\"] = plot_data[\"CHARGE\"] / plot_data[\"NB\"]\n",
    "plot_data[\"CM\"] = plot_data[\"CM\"].fillna(0)\n",
    "print(plot_data)\n",
    "\n",
-    "#Représentation graphique\n",
-    "fig = px.line(plot_data, x=\"ANNEE_CONSTRUCTION\", y=\"CM\", title=\"Coût moyen selon l'ancienneté du bien\")\n",
+    "# Représentation graphique\n",
+    "fig = px.line(\n",
+    "    plot_data,\n",
+    "    x=\"ANNEE_CONSTRUCTION\",\n",
+    "    y=\"CM\",\n",
+    "    title=\"Coût moyen selon l'ancienneté du bien\",\n",
+    ")\n",
    "fig.show()"
   ]
  },
@@ -88098,18 +88156,23 @@
    }
   ],
   "source": [
-    "#Agrégation selon la variable d'intérêt\n",
-    "plot_data = data_retraitee[[\"AGE_ASSURE_PRINCIPAL\", \"CHARGE\",\"NB\"]]\n",
-    "plot_data= plot_data[plot_data['CHARGE'] > 0]\n",
+    "# Agrégation selon la variable d'intérêt\n",
+    "plot_data = data_retraitee[[\"AGE_ASSURE_PRINCIPAL\", \"CHARGE\", \"NB\"]]\n",
+    "plot_data = plot_data[plot_data[\"CHARGE\"] > 0]\n",
    "plot_data = plot_data.groupby([\"AGE_ASSURE_PRINCIPAL\"], as_index=False).sum()\n",
    "\n",
-    "#Calcul du CM\n",
+    "# Calcul du CM\n",
    "plot_data[\"CM\"] = plot_data[\"CHARGE\"] / plot_data[\"NB\"]\n",
    "plot_data[\"CM\"] = plot_data[\"CM\"].fillna(0)\n",
    "print(plot_data)\n",
    "\n",
-    "#Représentation graphique\n",
-    "fig = px.scatter(plot_data, x=\"AGE_ASSURE_PRINCIPAL\", y=\"CM\", title=\"Coût moyen selon l'âge de l'assuré\")\n",
+    "# Représentation graphique\n",
+    "fig = px.scatter(\n",
+    "    plot_data,\n",
+    "    x=\"AGE_ASSURE_PRINCIPAL\",\n",
+    "    y=\"CM\",\n",
+    "    title=\"Coût moyen selon l'âge de l'assuré\",\n",
+    ")\n",
    "fig.show()"
   ]
  },
@@ -88969,18 +89032,20 @@
    }
   ],
   "source": [
-    "#Agrégation selon la variable d'intérêt\n",
-    "plot_data = data_retraitee[[\"GENRE\", \"CHARGE\",\"NB\"]]\n",
-    "plot_data= plot_data[plot_data['CHARGE'] > 0]\n",
+    "# Agrégation selon la variable d'intérêt\n",
+    "plot_data = data_retraitee[[\"GENRE\", \"CHARGE\", \"NB\"]]\n",
+    "plot_data = plot_data[plot_data[\"CHARGE\"] > 0]\n",
    "plot_data = plot_data.groupby([\"GENRE\"], as_index=False).sum()\n",
    "\n",
-    "#Calcul du CM\n",
+    "# Calcul du CM\n",
    "plot_data[\"CM\"] = plot_data[\"CHARGE\"] / plot_data[\"NB\"]\n",
    "plot_data[\"CM\"] = plot_data[\"CM\"].fillna(0)\n",
    "print(plot_data)\n",
    "\n",
-    "#Représentation graphique\n",
-    "fig = px.scatter(plot_data, x=\"GENRE\", y=\"CM\", title=\"Coût moyen selon l'âge de l'assuré\")\n",
+    "# Représentation graphique\n",
+    "fig = px.scatter(\n",
+    "    plot_data, x=\"GENRE\", y=\"CM\", title=\"Coût moyen selon l'âge de l'assuré\"\n",
+    ")\n",
    "fig.show()"
   ]
  },