Refactor code formatting and improve readability in Jupyter notebooks for TP_4 and TP_5

- Adjusted indentation and line breaks for better clarity in function definitions and import statements.
- Standardized string quotes for consistency across the codebase.
- Enhanced readability of DataFrame creation and manipulation by breaking long lines into multiple lines.
- Cleaned up print statements and comments for improved understanding.
- Ensured consistent use of whitespace around operators and after commas.
This commit is contained in:
2025-11-25 10:46:16 +01:00
parent 751412c1cd
commit e57995ba85
17 changed files with 11975 additions and 11713 deletions

View File

@@ -60,11 +60,11 @@
}
],
"source": [
"print(3+4)\n",
"print(5*2.5)\n",
"print(3 + 4)\n",
"print(5 * 2.5)\n",
"print(int)\n",
"print(float)\n",
"a=2**90\n",
"a = 2**90\n",
"print(a)\n",
"print(type(a))"
]
@@ -90,11 +90,11 @@
"\n",
"import pandas as pd\n",
"\n",
"#from math import *\n",
"#help(math)\n",
"print (math.sqrt(3))\n",
"print (math.floor(3.2))\n",
"print (math.cos(math.pi/3.0))"
"# from math import *\n",
"# help(math)\n",
"print(math.sqrt(3))\n",
"print(math.floor(3.2))\n",
"print(math.cos(math.pi / 3.0))"
]
},
{
@@ -121,11 +121,11 @@
}
],
"source": [
"print('33' + \"42\")\n",
"a=\"toto est toto\"\n",
"print(\"33\" + \"42\")\n",
"a = \"toto est toto\"\n",
"a[4:]\n",
"#Tout est objet\n",
"print(a.split(' '))"
"# Tout est objet\n",
"print(a.split(\" \"))"
]
},
{
@@ -143,7 +143,9 @@
}
],
"source": [
"salutation = \"Bonjour, monsieur {}. Comment allez vous en ce {}?\".format(\"XX\", \"Mardi 19 septembre\")\n",
"salutation = \"Bonjour, monsieur {}. Comment allez vous en ce {}?\".format(\n",
" \"XX\", \"Mardi 19 septembre\"\n",
")\n",
"print(salutation)"
]
},
@@ -195,12 +197,12 @@
}
],
"source": [
"a = [1,'q',2,3,5,8,'TOTO']\n",
"print (a[1])\n",
"print (a[-1])\n",
"print (a[1:3])\n",
"print (a[-4:-2])\n",
"print([5]*4)"
"a = [1, \"q\", 2, 3, 5, 8, \"TOTO\"]\n",
"print(a[1])\n",
"print(a[-1])\n",
"print(a[1:3])\n",
"print(a[-4:-2])\n",
"print([5] * 4)"
]
},
{
@@ -221,10 +223,10 @@
}
],
"source": [
"print (range(4, 10))\n",
"print (range(5, 50, 3))\n",
"print ([3,1,4] + [1,5,9])\n",
"print (len(range(4, 10)))"
"print(range(4, 10))\n",
"print(range(5, 50, 3))\n",
"print([3, 1, 4] + [1, 5, 9])\n",
"print(len(range(4, 10)))"
]
},
{
@@ -258,28 +260,28 @@
"source": [
"a = 2\n",
"if 5 > a:\n",
" print (\"Cinq!\")\n",
" print(\"Cinq!\")\n",
"else:\n",
" print (\"a!\")\n",
" print(\"a!\")\n",
"\n",
"#Zoom sur indentation\n",
"num=3\n",
"# Zoom sur indentation\n",
"num = 3\n",
"if num == 1:\n",
" print (\"C'est 1\")\n",
" print(\"C'est 1\")\n",
"elif num == 2:\n",
" print (\"C'est 2\")\n",
" print(\"C'est 2\")\n",
"elif num == 3:\n",
" print (\"C'est 3\")\n",
"else :\n",
" print (\"Autre que 1, 2 et 3\")\n",
" print(\"C'est 3\")\n",
"else:\n",
" print(\"Autre que 1, 2 et 3\")\n",
"\n",
"print(num in [1,2,3])\n",
"print(num not in [1,2,3])\n",
"print(num in [1, 2, 3])\n",
"print(num not in [1, 2, 3])\n",
"print(num != 5)\n",
"\n",
"print(num in [1,2,3] and num >0)\n",
"print(num in [1, 2, 3] and num > 0)\n",
"\n",
"print(not 5==3)"
"print(not 5 == 3)"
]
},
{
@@ -313,21 +315,21 @@
}
],
"source": [
"a=[3,4,5]\n",
"#Boucle for\n",
"a = [3, 4, 5]\n",
"# Boucle for\n",
"for i in range(len(a)):\n",
" a[i] += 3\n",
"print (a)\n",
"print(a)\n",
"%timeit for i in range(len(a)):a[i] += 3\n",
"print (a)\n",
"print(a)\n",
"\n",
"b=[3,4,5]\n",
"print([i+3 for i in b])\n",
"b = [3, 4, 5]\n",
"print([i + 3 for i in b])\n",
"%timeit [i+3 for i in b]\n",
"print (b)\n",
"print(b)\n",
"\n",
"for i,item in enumerate(b):\n",
" print(i,item)"
"for i, item in enumerate(b):\n",
" print(i, item)"
]
},
{
@@ -369,18 +371,18 @@
}
],
"source": [
"#Boucle While , break et continue\n",
"# Boucle While , break et continue\n",
"print(\"démarrage boucle\")\n",
"i=0\n",
"while i<100:\n",
"i = 0\n",
"while i < 100:\n",
" print(i)\n",
" i+=1\n",
" if i==52:\n",
" i+=10\n",
" i += 1\n",
" if i == 52:\n",
" i += 10\n",
" print(\"increment de 10\")\n",
" continue\n",
" i+=2\n",
" if i ==77:\n",
" i += 2\n",
" if i == 77:\n",
" break"
]
},
@@ -408,8 +410,10 @@
],
"source": [
"def square(x):\n",
" return x*x\n",
"print (square(3))"
" return x * x\n",
"\n",
"\n",
"print(square(3))"
]
},
{
@@ -486,14 +490,14 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"# Ecrivez votre code ici\n",
"\n",
"serie = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5})\n",
"serie = pd.Series({\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 4, \"e\": 5})\n",
"\n",
"print(serie)\n",
"print(serie.index)\n",
"print(serie.mean())\n",
"print(serie['b'])\n",
"print(serie[\"b\"])\n",
"print(serie.b)"
]
},
@@ -594,11 +598,8 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"df = pd.DataFrame({\n",
" 's': s,\n",
" 't': t\n",
"})\n",
"# Ecrivez votre code ici\n",
"df = pd.DataFrame({\"s\": s, \"t\": t})\n",
"\n",
"print(df)"
]
@@ -633,8 +634,8 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"df['SUM'] = df['s'] + df['t']\n",
"# Ecrivez votre code ici\n",
"df[\"SUM\"] = df[\"s\"] + df[\"t\"]\n",
"\n",
"print(df)"
]
@@ -662,8 +663,8 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"print(df['SUM'].mean())"
"# Ecrivez votre code ici\n",
"print(df[\"SUM\"].mean())"
]
},
{
@@ -774,7 +775,7 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"# Ecrivez votre code ici\n",
"import numpy as np\n",
"\n",
"X = np.random.standard_normal(size=250)\n",
@@ -809,7 +810,7 @@
}
],
"source": [
"index = pd.date_range('2012-01-01', periods=250, freq='D')\n",
"index = pd.date_range(\"2012-01-01\", periods=250, freq=\"D\")\n",
"\n",
"serie = pd.Series(X, index=index)\n",
"print(serie)"
@@ -872,7 +873,7 @@
"metadata": {},
"outputs": [],
"source": [
"#Data frame\n",
"# Data frame\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.graph_objects as gp\n",
@@ -923,8 +924,8 @@
"metadata": {},
"outputs": [],
"source": [
"path = input_path + '/base_modelisation.csv'\n",
"data_set = pd.read_csv(path,sep=\";\",decimal=\",\")"
"path = input_path + \"/base_modelisation.csv\"\n",
"data_set = pd.read_csv(path, sep=\";\", decimal=\",\")"
]
},
{
@@ -2413,7 +2414,7 @@
}
],
"source": [
"#Dimensions\n",
"# Dimensions\n",
"data_set.shape"
]
},
@@ -2568,7 +2569,7 @@
}
],
"source": [
"#Liste des colonnes selon leur type\n",
"# Liste des colonnes selon leur type\n",
"data_set.dtypes"
]
},
@@ -2600,13 +2601,13 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"# Ecrivez votre code ici\n",
"quantitatives = []\n",
"categorielles = []\n",
"binaires = []\n",
"\n",
"for col in data_set.columns:\n",
" if data_set[col].dtype in ['int64', 'float64']:\n",
" if data_set[col].dtype in [\"int64\", \"float64\"]:\n",
" if len(data_set[col].dropna().unique()) == 2:\n",
" binaires.append(col)\n",
" else:\n",
@@ -2619,7 +2620,7 @@
"\n",
"print(\"Variables quantitatives :\", quantitatives)\n",
"print(\"\\nVariables catégorielles :\", categorielles)\n",
"print(\"\\nVariables binaires :\", binaires)\n"
"print(\"\\nVariables binaires :\", binaires)"
]
},
{
@@ -2638,7 +2639,7 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"# Ecrivez votre code ici\n",
"variables_avec_na = []\n",
"\n",
"for col in data_set.columns:\n",
@@ -3524,8 +3525,10 @@
}
],
"source": [
"fig = px.histogram(data_set.sort_values('ANNEE_CTR'), x=\"ANNEE_CTR\")\n",
"fig.update_xaxes(type='category') #Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
"fig = px.histogram(data_set.sort_values(\"ANNEE_CTR\"), x=\"ANNEE_CTR\")\n",
"fig.update_xaxes(\n",
" type=\"category\"\n",
") # Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
"\n",
"fig.show()"
]
@@ -18655,8 +18658,13 @@
}
],
"source": [
"fig = px.histogram(data_set, x=\"CONTRAT_ANCIENNETE\",\n",
" category_orders={'CONTRAT_ANCIENNETE': ['(-1,0]','(0,1]',\"(1,2]\",\"(2,5]\",\"(5,10]\"]})\n",
"fig = px.histogram(\n",
" data_set,\n",
" x=\"CONTRAT_ANCIENNETE\",\n",
" category_orders={\n",
" \"CONTRAT_ANCIENNETE\": [\"(-1,0]\", \"(0,1]\", \"(1,2]\", \"(2,5]\", \"(5,10]\"]\n",
" },\n",
")\n",
"fig.show()"
]
},
@@ -48890,8 +48898,13 @@
}
],
"source": [
"fig = px.histogram(data_set, x=\"GROUPE_KM\",\n",
" category_orders={'GROUPE_KM': [\"[0;20000[\",\"[20000;40000[\",\"[40000;60000[\",\"[60000;99999[\"]})\n",
"fig = px.histogram(\n",
" data_set,\n",
" x=\"GROUPE_KM\",\n",
" category_orders={\n",
" \"GROUPE_KM\": [\"[0;20000[\", \"[20000;40000[\", \"[40000;60000[\", \"[60000;99999[\"]\n",
" },\n",
")\n",
"fig.show()"
]
},
@@ -64005,9 +64018,11 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"fig = px.histogram(data_set.sort_values('ZONE_RISQUE'), x=\"ZONE_RISQUE\")\n",
"fig.update_xaxes(type='category') #Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
"# Ecrivez votre code ici\n",
"fig = px.histogram(data_set.sort_values(\"ZONE_RISQUE\"), x=\"ZONE_RISQUE\")\n",
"fig.update_xaxes(\n",
" type=\"category\"\n",
") # Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
"\n",
"fig.show()"
]
@@ -64860,9 +64875,13 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"fig = px.histogram(data_set.sort_values('AGE_ASSURE_PRINCIPAL'), x=\"AGE_ASSURE_PRINCIPAL\")\n",
"fig.update_xaxes(type='category') #Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
"# Ecrivez votre code ici\n",
"fig = px.histogram(\n",
" data_set.sort_values(\"AGE_ASSURE_PRINCIPAL\"), x=\"AGE_ASSURE_PRINCIPAL\"\n",
")\n",
"fig.update_xaxes(\n",
" type=\"category\"\n",
") # Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
"\n",
"fig.show()"
]
@@ -79977,9 +79996,11 @@
}
],
"source": [
"#Ecrivez votre code ici\n",
"fig = px.histogram(data_set.sort_values('GENRE'), x=\"GENRE\")\n",
"fig.update_xaxes(type='category') #Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
"# Ecrivez votre code ici\n",
"fig = px.histogram(data_set.sort_values(\"GENRE\"), x=\"GENRE\")\n",
"fig.update_xaxes(\n",
" type=\"category\"\n",
") # Cette ligne permet de forcer la variable comme variable catégorielle et non numérique\n",
"\n",
"fig.show()"
]
@@ -80010,14 +80031,19 @@
}
],
"source": [
"#Préparation des données : compter le nombre de femmes et hommes par âge\n",
"tmp = data_set[[\"AGE_ASSURE_PRINCIPAL\",\"GENRE\"]].value_counts().to_frame('counts').reset_index()\n",
"# Préparation des données : compter le nombre de femmes et hommes par âge\n",
"tmp = (\n",
" data_set[[\"AGE_ASSURE_PRINCIPAL\", \"GENRE\"]]\n",
" .value_counts()\n",
" .to_frame(\"counts\")\n",
" .reset_index()\n",
")\n",
"data_f = tmp[tmp[\"GENRE\"] == \"F\"]\n",
"data_h = tmp[tmp[\"GENRE\"] == \"M\"]\n",
"\n",
"#Comparaison des âges\n",
"list_1=sorted(data_f[\"AGE_ASSURE_PRINCIPAL\"].unique())\n",
"list_2=sorted(data_h[\"AGE_ASSURE_PRINCIPAL\"].unique())\n",
"# Comparaison des âges\n",
"list_1 = sorted(data_f[\"AGE_ASSURE_PRINCIPAL\"].unique())\n",
"list_2 = sorted(data_h[\"AGE_ASSURE_PRINCIPAL\"].unique())\n",
"\n",
"print(\"Eléments dans la liste 2 mais pas dans la liste 1 \")\n",
"print(list(set(list_2) - set(list_1)))\n",
@@ -80034,11 +80060,19 @@
"outputs": [],
"source": [
"# Il faut ajouter l'âge 13 dans la liste 2 (data_h)\n",
"data_h = pd.concat([data_h, pd.DataFrame([[13, \"M\",0]],columns=['AGE_ASSURE_PRINCIPAL', 'GENRE','counts'])], ignore_index=True)\n",
"data_h = pd.concat(\n",
" [\n",
" data_h,\n",
" pd.DataFrame(\n",
" [[13, \"M\", 0]], columns=[\"AGE_ASSURE_PRINCIPAL\", \"GENRE\", \"counts\"]\n",
" ),\n",
" ],\n",
" ignore_index=True,\n",
")\n",
"\n",
"#On ordonne les dataframes\n",
"data_h = data_h.sort_values('AGE_ASSURE_PRINCIPAL', ascending = True)\n",
"data_f = data_f.sort_values('AGE_ASSURE_PRINCIPAL', ascending = True)"
"# On ordonne les dataframes\n",
"data_h = data_h.sort_values(\"AGE_ASSURE_PRINCIPAL\", ascending=True)\n",
"data_f = data_f.sort_values(\"AGE_ASSURE_PRINCIPAL\", ascending=True)"
]
},
{
@@ -80049,9 +80083,9 @@
"outputs": [],
"source": [
"# Graphique\n",
"y_age = data_h['AGE_ASSURE_PRINCIPAL']\n",
"x_M = data_h['counts']\n",
"x_F = data_f['counts'] * -1"
"y_age = data_h[\"AGE_ASSURE_PRINCIPAL\"]\n",
"x_M = data_h[\"counts\"]\n",
"x_F = data_f[\"counts\"] * -1"
]
},
{
@@ -80907,23 +80941,21 @@
"fig = gp.Figure()\n",
"\n",
"# Ajout des données H\n",
"fig.add_trace(gp.Bar(y= y_age, x = x_M,\n",
" name = 'Hommes',\n",
" orientation = 'h'))\n",
"fig.add_trace(gp.Bar(y=y_age, x=x_M, name=\"Hommes\", orientation=\"h\"))\n",
"\n",
"# Ajout des données F\n",
"fig.add_trace(gp.Bar(y = y_age, x = x_F,\n",
" name = 'Femmes', orientation = 'h'))\n",
"fig.add_trace(gp.Bar(y=y_age, x=x_F, name=\"Femmes\", orientation=\"h\"))\n",
"\n",
"# layout du graphique\n",
"fig.update_layout(title = 'Population du portefeuille',\n",
" title_font_size = 22, barmode = 'relative',\n",
" bargap = 0.0, bargroupgap = 0,\n",
" xaxis = {'title': 'Count',\n",
" 'title_font_size': 14},\n",
" yaxis = {'title': 'Age',\n",
" 'title_font_size': 14}\n",
" )\n",
"fig.update_layout(\n",
" title=\"Population du portefeuille\",\n",
" title_font_size=22,\n",
" barmode=\"relative\",\n",
" bargap=0.0,\n",
" bargroupgap=0,\n",
" xaxis={\"title\": \"Count\", \"title_font_size\": 14},\n",
" yaxis={\"title\": \"Age\", \"title_font_size\": 14},\n",
")\n",
"\n",
"fig.show()"
]
@@ -81795,7 +81827,7 @@
}
],
"source": [
"fig = px.histogram(data_set[data_set['CHARGE'] >= 0], x=\"CHARGE\", nbins=50)\n",
"fig = px.histogram(data_set[data_set[\"CHARGE\"] >= 0], x=\"CHARGE\", nbins=50)\n",
"fig.update_layout(title=\"Distribution des coûts des sinistres\")\n",
"fig.show()"
]
@@ -81859,8 +81891,8 @@
"# Nombre de NA par variable\n",
"nan_count = pd.DataFrame(data_set.isna().sum(), columns=[\"Nombre_NA\"])\n",
"\n",
"#Ajout du % par rapport à la taille de la BD\n",
"nan_count[\"Pourcentage\"]= nan_count.divide(data_set.shape[0])*100\n",
"# Ajout du % par rapport à la taille de la BD\n",
"nan_count[\"Pourcentage\"] = nan_count.divide(data_set.shape[0]) * 100\n",
"\n",
"print(nan_count)"
]
@@ -82271,7 +82303,7 @@
"data_retraitee = data_set\n",
"\n",
"# Option 1 : Suppression des variables avec trop de NA (PUISSANCE_VEHICULE)\n",
"data_retraitee = data_retraitee.drop(\"PUISSANCE_VEHICULE\", axis='columns')\n",
"data_retraitee = data_retraitee.drop(\"PUISSANCE_VEHICULE\", axis=\"columns\")\n",
"data_retraitee.head()"
]
},
@@ -82293,14 +82325,22 @@
}
],
"source": [
"#Option 2 : Remplacer par la classe la plus représentée/valeur moyenne (GROUPE_KM,GENRE,\n",
"# Option 2 : Remplacer par la classe la plus représentée/valeur moyenne (GROUPE_KM,GENRE,\n",
"# ANNEE_CONSTRUCTION,VALEUR_DU_BIEN,DEUXIEME_CONDUCTEUR)\n",
"\n",
"data_retraitee[\"GROUPE_KM\"] = data_retraitee[\"GROUPE_KM\"].fillna(data_retraitee[\"GROUPE_KM\"].mode()[0])\n",
"data_retraitee[\"GROUPE_KM\"] = data_retraitee[\"GROUPE_KM\"].fillna(\n",
" data_retraitee[\"GROUPE_KM\"].mode()[0]\n",
")\n",
"data_retraitee[\"GENRE\"] = data_retraitee[\"GENRE\"].fillna(\"M\")\n",
"data_retraitee[\"ANNEE_CONSTRUCTION\"] = data_retraitee[\"ANNEE_CONSTRUCTION\"].fillna(data_retraitee[\"ANNEE_CONSTRUCTION\"].median())\n",
"data_retraitee[\"VALEUR_DU_BIEN\"] = data_retraitee[\"VALEUR_DU_BIEN\"].fillna(data_retraitee[\"VALEUR_DU_BIEN\"].mode()[0])\n",
"data_retraitee[\"DEUXIEME_CONDUCTEUR\"] = data_retraitee[\"DEUXIEME_CONDUCTEUR\"].fillna(False)"
"data_retraitee[\"ANNEE_CONSTRUCTION\"] = data_retraitee[\"ANNEE_CONSTRUCTION\"].fillna(\n",
" data_retraitee[\"ANNEE_CONSTRUCTION\"].median()\n",
")\n",
"data_retraitee[\"VALEUR_DU_BIEN\"] = data_retraitee[\"VALEUR_DU_BIEN\"].fillna(\n",
" data_retraitee[\"VALEUR_DU_BIEN\"].mode()[0]\n",
")\n",
"data_retraitee[\"DEUXIEME_CONDUCTEUR\"] = data_retraitee[\"DEUXIEME_CONDUCTEUR\"].fillna(\n",
" False\n",
")"
]
},
{
@@ -82310,11 +82350,13 @@
"metadata": {},
"outputs": [],
"source": [
"#Option 3 : Remplacer par une valeur prudente (ZONE_RISQUE,DEUXIEME_CONDUCTEUR)\n",
"# Option 3 : Remplacer par une valeur prudente (ZONE_RISQUE,DEUXIEME_CONDUCTEUR)\n",
"\n",
"#Remplacer par la zone avec le plus de sinistres\n",
"zone_plus_sinsitree = data_retraitee[[\"ZONE_RISQUE\", \"NB\"]].groupby([\"ZONE_RISQUE\"]).sum()\n",
"zone_plus_sinsitree.sort_values(\"NB\",ascending = False)\n",
"# Remplacer par la zone avec le plus de sinistres\n",
"zone_plus_sinsitree = (\n",
" data_retraitee[[\"ZONE_RISQUE\", \"NB\"]].groupby([\"ZONE_RISQUE\"]).sum()\n",
")\n",
"zone_plus_sinsitree.sort_values(\"NB\", ascending=False)\n",
"\n",
"data_retraitee[\"ZONE_RISQUE\"] = data_retraitee[\"ZONE_RISQUE\"].fillna(\"C\")"
]
@@ -82781,7 +82823,7 @@
}
],
"source": [
"data_retraitee.to_csv(\"./2_outputs/base_retraitee.csv\", index = False)"
"data_retraitee.to_csv(\"./2_outputs/base_retraitee.csv\", index=False)"
]
},
{
@@ -82809,11 +82851,11 @@
"metadata": {},
"outputs": [],
"source": [
"#Calcul de la fréquence\n",
"# Calcul de la fréquence\n",
"data_retraitee[\"FREQ\"] = data_retraitee[\"NB\"] / data_retraitee[\"EXPO\"]\n",
"data_retraitee[\"FREQ\"] = data_retraitee[\"FREQ\"].fillna(0)\n",
"\n",
"#Calcul du coût moyen\n",
"# Calcul du coût moyen\n",
"data_retraitee[\"CM\"] = data_retraitee[\"CHARGE\"] / data_retraitee[\"NB\"]\n",
"data_retraitee[\"CM\"] = data_retraitee[\"CM\"].fillna(0)"
]
@@ -82846,11 +82888,11 @@
"metadata": {},
"outputs": [],
"source": [
"#Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"AGE_ASSURE_PRINCIPAL\", \"NB\",\"EXPO\"]]\n",
"# Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"AGE_ASSURE_PRINCIPAL\", \"NB\", \"EXPO\"]]\n",
"plot_data = plot_data.groupby([\"AGE_ASSURE_PRINCIPAL\"], as_index=False).sum()\n",
"\n",
"#Calcul de la fréquence\n",
"# Calcul de la fréquence\n",
"plot_data[\"FREQ\"] = plot_data[\"NB\"] / plot_data[\"EXPO\"]\n",
"plot_data[\"FREQ\"] = plot_data[\"FREQ\"].fillna(0)"
]
@@ -83706,8 +83748,10 @@
}
],
"source": [
"#Représentation graphique\n",
"fig = px.line(plot_data, x=\"AGE_ASSURE_PRINCIPAL\", y=\"FREQ\", title=\"Sinistralité selon l'âge\")\n",
"# Représentation graphique\n",
"fig = px.line(\n",
" plot_data, x=\"AGE_ASSURE_PRINCIPAL\", y=\"FREQ\", title=\"Sinistralité selon l'âge\"\n",
")\n",
"fig.show()"
]
},
@@ -84567,16 +84611,16 @@
}
],
"source": [
"#Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"GENRE\", \"NB\",\"EXPO\"]]\n",
"# Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"GENRE\", \"NB\", \"EXPO\"]]\n",
"plot_data = plot_data.groupby([\"GENRE\"], as_index=False).sum()\n",
"\n",
"#Calcul de la fréquence\n",
"# Calcul de la fréquence\n",
"plot_data[\"FREQ\"] = plot_data[\"NB\"] / plot_data[\"EXPO\"]\n",
"plot_data[\"FREQ\"] = plot_data[\"FREQ\"].fillna(0)\n",
"print(plot_data)\n",
"\n",
"#Représentation graphique\n",
"# Représentation graphique\n",
"fig = px.scatter(plot_data, x=\"GENRE\", y=\"FREQ\", title=\"Sinistralité selon le genre\")\n",
"fig.show()"
]
@@ -84608,17 +84652,22 @@
}
],
"source": [
"#Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"ZONE_RISQUE\", \"NB\",\"EXPO\"]]\n",
"# Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"ZONE_RISQUE\", \"NB\", \"EXPO\"]]\n",
"plot_data = plot_data.groupby([\"ZONE_RISQUE\"], as_index=False).sum()\n",
"\n",
"#Calcul de la fréquence\n",
"# Calcul de la fréquence\n",
"plot_data[\"FREQ\"] = plot_data[\"NB\"] / plot_data[\"EXPO\"]\n",
"plot_data[\"FREQ\"] = plot_data[\"FREQ\"].fillna(0)\n",
"print(plot_data)\n",
"\n",
"#Représentation graphique\n",
"fig = px.scatter(plot_data, x=\"ZONE_RISQUE\", y=\"FREQ\", title=\"Sinistralité selon la zone géographique\")\n",
"# Représentation graphique\n",
"fig = px.scatter(\n",
" plot_data,\n",
" x=\"ZONE_RISQUE\",\n",
" y=\"FREQ\",\n",
" title=\"Sinistralité selon la zone géographique\",\n",
")\n",
"fig.show()"
]
},
@@ -85479,17 +85528,19 @@
}
],
"source": [
"#Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"ENERGIE\", \"NB\",\"EXPO\"]]\n",
"# Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"ENERGIE\", \"NB\", \"EXPO\"]]\n",
"plot_data = plot_data.groupby([\"ENERGIE\"], as_index=False).sum()\n",
"\n",
"#Calcul de la fréquence\n",
"# Calcul de la fréquence\n",
"plot_data[\"FREQ\"] = plot_data[\"NB\"] / plot_data[\"EXPO\"]\n",
"plot_data[\"FREQ\"] = plot_data[\"FREQ\"].fillna(0)\n",
"print(plot_data)\n",
"\n",
"#Représentation graphique\n",
"fig = px.scatter(plot_data, x=\"ENERGIE\", y=\"FREQ\", title=\"Sinistralité selon le carburant\")\n",
"# Représentation graphique\n",
"fig = px.scatter(\n",
" plot_data, x=\"ENERGIE\", y=\"FREQ\", title=\"Sinistralité selon le carburant\"\n",
")\n",
"fig.show()"
]
},
@@ -86353,18 +86404,20 @@
}
],
"source": [
"#Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"VALEUR_DU_BIEN\", \"CHARGE\",\"NB\"]]\n",
"plot_data= plot_data[plot_data['CHARGE'] > 0]\n",
"# Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"VALEUR_DU_BIEN\", \"CHARGE\", \"NB\"]]\n",
"plot_data = plot_data[plot_data[\"CHARGE\"] > 0]\n",
"plot_data = plot_data.groupby([\"VALEUR_DU_BIEN\"], as_index=False).sum()\n",
"\n",
"#Calcul du CM\n",
"# Calcul du CM\n",
"plot_data[\"CM\"] = plot_data[\"CHARGE\"] / plot_data[\"NB\"]\n",
"plot_data[\"CM\"] = plot_data[\"CM\"].fillna(0)\n",
"print(plot_data)\n",
"\n",
"#Représentation graphique\n",
"fig = px.scatter(plot_data, x=\"VALEUR_DU_BIEN\", y=\"CM\", title=\"Coût moyen selon le prix\")\n",
"# Représentation graphique\n",
"fig = px.scatter(\n",
" plot_data, x=\"VALEUR_DU_BIEN\", y=\"CM\", title=\"Coût moyen selon le prix\"\n",
")\n",
"fig.show()"
]
},
@@ -87227,18 +87280,23 @@
}
],
"source": [
"#Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"ANNEE_CONSTRUCTION\", \"CHARGE\",\"NB\"]]\n",
"plot_data= plot_data[plot_data['CHARGE'] > 0]\n",
"# Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"ANNEE_CONSTRUCTION\", \"CHARGE\", \"NB\"]]\n",
"plot_data = plot_data[plot_data[\"CHARGE\"] > 0]\n",
"plot_data = plot_data.groupby([\"ANNEE_CONSTRUCTION\"], as_index=False).sum()\n",
"\n",
"#Calcul du CM\n",
"# Calcul du CM\n",
"plot_data[\"CM\"] = plot_data[\"CHARGE\"] / plot_data[\"NB\"]\n",
"plot_data[\"CM\"] = plot_data[\"CM\"].fillna(0)\n",
"print(plot_data)\n",
"\n",
"#Représentation graphique\n",
"fig = px.line(plot_data, x=\"ANNEE_CONSTRUCTION\", y=\"CM\", title=\"Coût moyen selon l'ancienneté du bien\")\n",
"# Représentation graphique\n",
"fig = px.line(\n",
" plot_data,\n",
" x=\"ANNEE_CONSTRUCTION\",\n",
" y=\"CM\",\n",
" title=\"Coût moyen selon l'ancienneté du bien\",\n",
")\n",
"fig.show()"
]
},
@@ -88098,18 +88156,23 @@
}
],
"source": [
"#Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"AGE_ASSURE_PRINCIPAL\", \"CHARGE\",\"NB\"]]\n",
"plot_data= plot_data[plot_data['CHARGE'] > 0]\n",
"# Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"AGE_ASSURE_PRINCIPAL\", \"CHARGE\", \"NB\"]]\n",
"plot_data = plot_data[plot_data[\"CHARGE\"] > 0]\n",
"plot_data = plot_data.groupby([\"AGE_ASSURE_PRINCIPAL\"], as_index=False).sum()\n",
"\n",
"#Calcul du CM\n",
"# Calcul du CM\n",
"plot_data[\"CM\"] = plot_data[\"CHARGE\"] / plot_data[\"NB\"]\n",
"plot_data[\"CM\"] = plot_data[\"CM\"].fillna(0)\n",
"print(plot_data)\n",
"\n",
"#Représentation graphique\n",
"fig = px.scatter(plot_data, x=\"AGE_ASSURE_PRINCIPAL\", y=\"CM\", title=\"Coût moyen selon l'âge de l'assuré\")\n",
"# Représentation graphique\n",
"fig = px.scatter(\n",
" plot_data,\n",
" x=\"AGE_ASSURE_PRINCIPAL\",\n",
" y=\"CM\",\n",
" title=\"Coût moyen selon l'âge de l'assuré\",\n",
")\n",
"fig.show()"
]
},
@@ -88969,18 +89032,20 @@
}
],
"source": [
"#Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"GENRE\", \"CHARGE\",\"NB\"]]\n",
"plot_data= plot_data[plot_data['CHARGE'] > 0]\n",
"# Agrégation selon la variable d'intérêt\n",
"plot_data = data_retraitee[[\"GENRE\", \"CHARGE\", \"NB\"]]\n",
"plot_data = plot_data[plot_data[\"CHARGE\"] > 0]\n",
"plot_data = plot_data.groupby([\"GENRE\"], as_index=False).sum()\n",
"\n",
"#Calcul du CM\n",
"# Calcul du CM\n",
"plot_data[\"CM\"] = plot_data[\"CHARGE\"] / plot_data[\"NB\"]\n",
"plot_data[\"CM\"] = plot_data[\"CM\"].fillna(0)\n",
"print(plot_data)\n",
"\n",
"#Représentation graphique\n",
"fig = px.scatter(plot_data, x=\"GENRE\", y=\"CM\", title=\"Coût moyen selon l'âge de l'assuré\")\n",
"# Représentation graphique\n",
"fig = px.scatter(\n",
" plot_data, x=\"GENRE\", y=\"CM\", title=\"Coût moyen selon l'âge de l'assuré\"\n",
")\n",
"fig.show()"
]
},