diff --git a/M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb b/M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb
index 167171a..6897d8e 100644
--- a/M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb
+++ b/M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb
@@ -45,17 +45,18 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 15,
"id": "97d58527",
"metadata": {},
"outputs": [],
"source": [
"# Données\n",
- "import pandas as pd\n",
"import numpy as np\n",
+ "import pandas as pd\n",
"\n",
- "# Graphiques \n",
+ "# Graphiques\n",
"import seaborn as sns\n",
+ "\n",
"sns.set()\n",
"import plotly.express as px\n",
"import plotly.graph_objects as gp\n",
@@ -87,13 +88,13 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "c67db932",
"metadata": {},
"outputs": [],
"source": [
"def cramers_V(var1,var2) :\n",
- " crosstab =np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building\n",
+ " crosstab = np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building\n",
" stat = chi2_contingency(crosstab)[0] # Keeping of the test statistic of the Chi2 test\n",
" obs = np.sum(crosstab) # Number of observations\n",
" mini = min(crosstab.shape)-1 # Take the minimum value between the columns and the rows of the cross table\n",
@@ -110,7 +111,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 17,
"id": "c9597b48",
"metadata": {},
"outputs": [],
@@ -129,30 +130,13 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 18,
"id": "8051b5f4",
"metadata": {},
- "outputs": [
- {
- "ename": "FileNotFoundError",
- "evalue": "[Errno 2] No such file or directory: './1_inputs/base_retraitee.csv'",
- "output_type": "error",
- "traceback": [
- "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
- "\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)",
- "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m path =input_path + \u001b[33m'\u001b[39m\u001b[33m/base_retraitee.csv\u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m data_retraitee = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43msep\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m,\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mdecimal\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m.\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1026\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m 1013\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m 1014\u001b[39m dialect,\n\u001b[32m 1015\u001b[39m delimiter,\n\u001b[32m (...)\u001b[39m\u001b[32m 1022\u001b[39m dtype_backend=dtype_backend,\n\u001b[32m 1023\u001b[39m )\n\u001b[32m 1024\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:620\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m 617\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m 619\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m620\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 622\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m 623\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1620\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m 1617\u001b[39m \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 1619\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1620\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1880\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m 1878\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m 1879\u001b[39m mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1880\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1881\u001b[39m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1882\u001b[39m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1883\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1884\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1885\u001b[39m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1886\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1887\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1888\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1889\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1890\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1891\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/common.py:873\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m 868\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m 869\u001b[39m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m 870\u001b[39m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m 871\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m 872\u001b[39m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m 874\u001b[39m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 875\u001b[39m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 876\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 877\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 878\u001b[39m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 879\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 880\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 881\u001b[39m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m 882\u001b[39m handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n",
- "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: './1_inputs/base_retraitee.csv'"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "path =input_path + '/base_retraitee.csv'\n",
- "data_retraitee = pd.read_csv(path,sep=\",\",decimal=\".\")"
+ "path = input_path + '/base_retraitee.csv'\n",
+ "data_retraitee = pd.read_csv(path, sep=\",\", decimal=\".\")"
]
},
{
@@ -175,12 +159,1443 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"id": "b9b98d36",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "ANNEE_CTR",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "FREQUENCE_PAIEMENT_COTISATION",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "GROUPE_KM",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "ZONE_RISQUE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "AGE_ASSURE_PRINCIPAL",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "GENRE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "DEUXIEME_CONDUCTEUR",
+ "rawType": "bool",
+ "type": "boolean"
+ },
+ {
+ "name": "ANCIENNETE_PERMIS",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "ANNEE_CONSTRUCTION",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ENERGIE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "EQUIPEMENT_SECURITE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "VALEUR_DU_BIEN",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "NB",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "CHARGE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "EXPO",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "sinistré",
+ "rawType": "int64",
+ "type": "integer"
+ }
+ ],
+ "ref": "3a5c9b57-04ea-45e3-9475-dee04d53694d",
+ "rows": [
+ [
+ "0",
+ "2019",
+ "(-1,0]",
+ "ANNUEL",
+ "[20000;40000[",
+ "B",
+ "54",
+ "M",
+ "False",
+ "47",
+ "2016.0",
+ "ESSENCE",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "245.3278688524592",
+ "0"
+ ],
+ [
+ "1",
+ "2019",
+ "(-1,0]",
+ "ANNUEL",
+ "[20000;40000[",
+ "B",
+ "88",
+ "F",
+ "True",
+ "55",
+ "2018.0",
+ "DIESEL",
+ "VRAI",
+ "[20000;25000[",
+ "0",
+ "0.0",
+ "230.36885245901655",
+ "0"
+ ],
+ [
+ "2",
+ "2021",
+ "(1,2]",
+ "ANNUEL",
+ "[0;20000[",
+ "D",
+ "35",
+ "F",
+ "True",
+ "16",
+ "2017.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "300.0",
+ "0"
+ ],
+ [
+ "3",
+ "2021",
+ "(2,5]",
+ "ANNUEL",
+ "[0;20000[",
+ "C",
+ "46",
+ "M",
+ "False",
+ "44",
+ "2018.0",
+ "ESSENCE",
+ "VRAI",
+ "[35000;99999[",
+ "0",
+ "0.0",
+ "303.99999999999994",
+ "0"
+ ],
+ [
+ "4",
+ "2018",
+ "(2,5]",
+ "MENSUEL",
+ "[20000;40000[",
+ "A",
+ "46",
+ "F",
+ "False",
+ "31",
+ "2009.0",
+ "DIESEL",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "5",
+ "2019",
+ "(2,5]",
+ "MENSUEL",
+ "[0;20000[",
+ "A",
+ "67",
+ "M",
+ "False",
+ "22",
+ "2015.0",
+ "ESSENCE",
+ "VRAI",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "364.5874316939892",
+ "0"
+ ],
+ [
+ "6",
+ "2016",
+ "(0,1]",
+ "MENSUEL",
+ "[0;20000[",
+ "C",
+ "37",
+ "F",
+ "False",
+ "15",
+ "2016.0",
+ "ESSENCE",
+ "VRAI",
+ "[10000;15000[",
+ "0",
+ "868.11",
+ "365.0",
+ "0"
+ ],
+ [
+ "7",
+ "2017",
+ "(1,2]",
+ "MENSUEL",
+ "[0;20000[",
+ "A",
+ "46",
+ "F",
+ "False",
+ "37",
+ "2015.0",
+ "ESSENCE",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "300.0",
+ "0"
+ ],
+ [
+ "8",
+ "2016",
+ "(0,1]",
+ "MENSUEL",
+ "[0;20000[",
+ "A",
+ "44",
+ "F",
+ "False",
+ "63",
+ "2014.0",
+ "ESSENCE",
+ "FAUX",
+ "[0;10000[",
+ "0",
+ "0.0",
+ "56.84426229508204",
+ "0"
+ ],
+ [
+ "9",
+ "2019",
+ "(2,5]",
+ "MENSUEL",
+ "[0;20000[",
+ "B",
+ "59",
+ "F",
+ "False",
+ "68",
+ "2014.0",
+ "ESSENCE",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "2794.96",
+ "364.00000000000006",
+ "0"
+ ],
+ [
+ "10",
+ "2019",
+ "(0,1]",
+ "MENSUEL",
+ "[0;20000[",
+ "C",
+ "40",
+ "M",
+ "False",
+ "37",
+ "2017.0",
+ "ESSENCE",
+ "VRAI",
+ "[15000;20000[",
+ "1",
+ "1072.98",
+ "364.8415300546447",
+ "1"
+ ],
+ [
+ "11",
+ "2018",
+ "(-1,0]",
+ "MENSUEL",
+ "[0;20000[",
+ "C",
+ "30",
+ "M",
+ "False",
+ "12",
+ "2017.0",
+ "DIESEL",
+ "FAUX",
+ "[20000;25000[",
+ "0",
+ "0.0",
+ "272.00000000000006",
+ "0"
+ ],
+ [
+ "12",
+ "2020",
+ "(0,1]",
+ "MENSUEL",
+ "[20000;40000[",
+ "D",
+ "30",
+ "M",
+ "True",
+ "15",
+ "2020.0",
+ "ESSENCE",
+ "FAUX",
+ "[20000;25000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "13",
+ "2021",
+ "(0,1]",
+ "MENSUEL",
+ "[20000;40000[",
+ "B",
+ "58",
+ "M",
+ "False",
+ "39",
+ "2017.0",
+ "DIESEL",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "303.99999999999994",
+ "0"
+ ],
+ [
+ "14",
+ "2019",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "39",
+ "M",
+ "False",
+ "36",
+ "2014.0",
+ "DIESEL",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "203.44262295081973",
+ "0"
+ ],
+ [
+ "15",
+ "2019",
+ "(0,1]",
+ "ANNUEL",
+ "[0;20000[",
+ "A",
+ "26",
+ "F",
+ "False",
+ "14",
+ "2016.0",
+ "DIESEL",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "364.2049180327869",
+ "0"
+ ],
+ [
+ "16",
+ "2017",
+ "(-1,0]",
+ "ANNUEL",
+ "[0;20000[",
+ "D",
+ "26",
+ "M",
+ "False",
+ "17",
+ "2018.0",
+ "ESSENCE",
+ "FAUX",
+ "[35000;99999[",
+ "0",
+ "0.0",
+ "268.00000000000006",
+ "0"
+ ],
+ [
+ "17",
+ "2016",
+ "(0,1]",
+ "TRIMESTRIEL",
+ "[0;20000[",
+ "A",
+ "57",
+ "F",
+ "False",
+ "61",
+ "2011.0",
+ "ESSENCE",
+ "VRAI",
+ "[10000;15000[",
+ "0",
+ "287.73",
+ "365.0",
+ "0"
+ ],
+ [
+ "18",
+ "2018",
+ "(-1,0]",
+ "TRIMESTRIEL",
+ "[0;20000[",
+ "B",
+ "25",
+ "M",
+ "False",
+ "17",
+ "2017.0",
+ "DIESEL",
+ "VRAI",
+ "[35000;99999[",
+ "0",
+ "0.0",
+ "350.99999999999983",
+ "0"
+ ],
+ [
+ "19",
+ "2018",
+ "(2,5]",
+ "ANNUEL",
+ "[20000;40000[",
+ "D",
+ "61",
+ "M",
+ "True",
+ "28",
+ "2014.0",
+ "DIESEL",
+ "FAUX",
+ "[20000;25000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "20",
+ "2020",
+ "(1,2]",
+ "MENSUEL",
+ "[20000;40000[",
+ "F",
+ "37",
+ "F",
+ "False",
+ "20",
+ "2018.0",
+ "DIESEL",
+ "FAUX",
+ "[25000;35000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "21",
+ "2020",
+ "(2,5]",
+ "TRIMESTRIEL",
+ "[0;20000[",
+ "D",
+ "25",
+ "M",
+ "True",
+ "18",
+ "2014.0",
+ "DIESEL",
+ "VRAI",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "102.71857923497252",
+ "0"
+ ],
+ [
+ "22",
+ "2021",
+ "(2,5]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "30",
+ "F",
+ "True",
+ "14",
+ "2018.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "303.99999999999994",
+ "0"
+ ],
+ [
+ "23",
+ "2017",
+ "(-1,0]",
+ "MENSUEL",
+ "[0;20000[",
+ "B",
+ "26",
+ "F",
+ "False",
+ "15",
+ "2016.0",
+ "DIESEL",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "158.99999999999986",
+ "0"
+ ],
+ [
+ "24",
+ "2016",
+ "(0,1]",
+ "TRIMESTRIEL",
+ "[0;20000[",
+ "A",
+ "62",
+ "M",
+ "False",
+ "64",
+ "2013.0",
+ "DIESEL",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "25",
+ "2020",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "45",
+ "F",
+ "False",
+ "44",
+ "2020.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "330.42349726775944",
+ "0"
+ ],
+ [
+ "26",
+ "2020",
+ "(0,1]",
+ "MENSUEL",
+ "[20000;40000[",
+ "E",
+ "60",
+ "M",
+ "False",
+ "66",
+ "2018.0",
+ "DIESEL",
+ "FAUX",
+ "[35000;99999[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "27",
+ "2020",
+ "(0,1]",
+ "TRIMESTRIEL",
+ "[0;20000[",
+ "C",
+ "42",
+ "F",
+ "False",
+ "18",
+ "2018.0",
+ "ESSENCE",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "28",
+ "2021",
+ "(2,5]",
+ "MENSUEL",
+ "[0;20000[",
+ "C",
+ "60",
+ "M",
+ "False",
+ "52",
+ "2016.0",
+ "DIESEL",
+ "VRAI",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "277.9999999999999",
+ "0"
+ ],
+ [
+ "29",
+ "2021",
+ "(2,5]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "44",
+ "M",
+ "False",
+ "27",
+ "2017.0",
+ "ESSENCE",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "234.99999999999991",
+ "0"
+ ],
+ [
+ "30",
+ "2021",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "D",
+ "44",
+ "F",
+ "False",
+ "40",
+ "2020.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "180.99999999999997",
+ "0"
+ ],
+ [
+ "31",
+ "2017",
+ "(1,2]",
+ "ANNUEL",
+ "[20000;40000[",
+ "A",
+ "37",
+ "M",
+ "False",
+ "56",
+ "2013.0",
+ "DIESEL",
+ "VRAI",
+ "[35000;99999[",
+ "0",
+ "0.0",
+ "93.99999999999984",
+ "0"
+ ],
+ [
+ "32",
+ "2017",
+ "(0,1]",
+ "ANNUEL",
+ "[20000;40000[",
+ "A",
+ "25",
+ "F",
+ "True",
+ "12",
+ "2016.0",
+ "DIESEL",
+ "FAUX",
+ "[20000;25000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "33",
+ "2021",
+ "(1,2]",
+ "ANNUEL",
+ "[0;20000[",
+ "B",
+ "62",
+ "M",
+ "False",
+ "50",
+ "2014.0",
+ "DIESEL",
+ "FAUX",
+ "[20000;25000[",
+ "0",
+ "0.0",
+ "238.99999999999991",
+ "0"
+ ],
+ [
+ "34",
+ "2020",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "27",
+ "M",
+ "True",
+ "13",
+ "2018.0",
+ "AUTRE",
+ "FAUX",
+ "[35000;99999[",
+ "1",
+ "3750.0",
+ "306.9945355191256",
+ "1"
+ ],
+ [
+ "35",
+ "2021",
+ "(1,2]",
+ "ANNUEL",
+ "[0;20000[",
+ "C",
+ "60",
+ "F",
+ "False",
+ "61",
+ "2020.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "303.99999999999994",
+ "0"
+ ],
+ [
+ "36",
+ "2019",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "L",
+ "19",
+ "M",
+ "False",
+ "2",
+ "2017.0",
+ "ESSENCE",
+ "VRAI",
+ "[0;10000[",
+ "1",
+ "1838.49",
+ "344.80327868852464",
+ "1"
+ ],
+ [
+ "37",
+ "2016",
+ "(-1,0]",
+ "ANNUEL",
+ "[0;20000[",
+ "C",
+ "56",
+ "F",
+ "False",
+ "65",
+ "2010.0",
+ "ESSENCE",
+ "FAUX",
+ "[0;10000[",
+ "0",
+ "0.0",
+ "280.0",
+ "0"
+ ],
+ [
+ "38",
+ "2019",
+ "(0,1]",
+ "MENSUEL",
+ "[0;20000[",
+ "C",
+ "57",
+ "F",
+ "False",
+ "36",
+ "2021.0",
+ "ESSENCE",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "364.2677595628415",
+ "0"
+ ],
+ [
+ "39",
+ "2017",
+ "(-1,0]",
+ "MENSUEL",
+ "[0;20000[",
+ "A",
+ "24",
+ "F",
+ "False",
+ "12",
+ "2017.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "2637.39",
+ "195.00000000000009",
+ "0"
+ ],
+ [
+ "40",
+ "2018",
+ "(0,1]",
+ "ANNUEL",
+ "[20000;40000[",
+ "C",
+ "49",
+ "M",
+ "True",
+ "20",
+ "2017.0",
+ "DIESEL",
+ "FAUX",
+ "[20000;25000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "41",
+ "2018",
+ "(0,1]",
+ "ANNUEL",
+ "[0;20000[",
+ "B",
+ "51",
+ "M",
+ "True",
+ "42",
+ "2017.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "42",
+ "2020",
+ "(1,2]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "57",
+ "M",
+ "False",
+ "63",
+ "2018.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "43",
+ "2019",
+ "(1,2]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "40",
+ "M",
+ "False",
+ "69",
+ "2013.0",
+ "ESSENCE",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "364.2240437158468",
+ "0"
+ ],
+ [
+ "44",
+ "2021",
+ "(1,2]",
+ "MENSUEL",
+ "[20000;40000[",
+ "B",
+ "60",
+ "M",
+ "False",
+ "28",
+ "2018.0",
+ "DIESEL",
+ "FAUX",
+ "[35000;99999[",
+ "0",
+ "0.0",
+ "303.99999999999994",
+ "0"
+ ],
+ [
+ "45",
+ "2020",
+ "(2,5]",
+ "ANNUEL",
+ "[0;20000[",
+ "B",
+ "52",
+ "F",
+ "False",
+ "55",
+ "2017.0",
+ "DIESEL",
+ "VRAI",
+ "[35000;99999[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "46",
+ "2020",
+ "(2,5]",
+ "ANNUEL",
+ "[0;20000[",
+ "C",
+ "41",
+ "M",
+ "False",
+ "47",
+ "2018.0",
+ "ESSENCE",
+ "FAUX",
+ "[15000;20000[",
+ "0",
+ "0.0",
+ "365.0",
+ "0"
+ ],
+ [
+ "47",
+ "2020",
+ "(0,1]",
+ "MENSUEL",
+ "[0;20000[",
+ "B",
+ "51",
+ "F",
+ "False",
+ "59",
+ "2016.0",
+ "ESSENCE",
+ "FAUX",
+ "[10000;15000[",
+ "0",
+ "0.0",
+ "118.67486338797818",
+ "0"
+ ],
+ [
+ "48",
+ "2019",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "49",
+ "M",
+ "False",
+ "21",
+ "2020.0",
+ "ESSENCE",
+ "FAUX",
+ "[25000;35000[",
+ "0",
+ "0.0",
+ "267.26775956284155",
+ "0"
+ ],
+ [
+ "49",
+ "2020",
+ "(2,5]",
+ "ANNUEL",
+ "[0;20000[",
+ "B",
+ "73",
+ "M",
+ "True",
+ "24",
+ "2018.0",
+ "DIESEL",
+ "FAUX",
+ "[20000;25000[",
+ "0",
+ "0.0",
+ "193.4699453551912",
+ "0"
+ ]
+ ],
+ "shape": {
+ "columns": 17,
+ "rows": 14236
+ }
+ },
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ANNEE_CTR | \n",
+ " CONTRAT_ANCIENNETE | \n",
+ " FREQUENCE_PAIEMENT_COTISATION | \n",
+ " GROUPE_KM | \n",
+ " ZONE_RISQUE | \n",
+ " AGE_ASSURE_PRINCIPAL | \n",
+ " GENRE | \n",
+ " DEUXIEME_CONDUCTEUR | \n",
+ " ANCIENNETE_PERMIS | \n",
+ " ANNEE_CONSTRUCTION | \n",
+ " ENERGIE | \n",
+ " EQUIPEMENT_SECURITE | \n",
+ " VALEUR_DU_BIEN | \n",
+ " NB | \n",
+ " CHARGE | \n",
+ " EXPO | \n",
+ " sinistré | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2019 | \n",
+ " (-1,0] | \n",
+ " ANNUEL | \n",
+ " [20000;40000[ | \n",
+ " B | \n",
+ " 54 | \n",
+ " M | \n",
+ " False | \n",
+ " 47 | \n",
+ " 2016.0 | \n",
+ " ESSENCE | \n",
+ " FAUX | \n",
+ " [10000;15000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 245.327869 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2019 | \n",
+ " (-1,0] | \n",
+ " ANNUEL | \n",
+ " [20000;40000[ | \n",
+ " B | \n",
+ " 88 | \n",
+ " F | \n",
+ " True | \n",
+ " 55 | \n",
+ " 2018.0 | \n",
+ " DIESEL | \n",
+ " VRAI | \n",
+ " [20000;25000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 230.368852 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2021 | \n",
+ " (1,2] | \n",
+ " ANNUEL | \n",
+ " [0;20000[ | \n",
+ " D | \n",
+ " 35 | \n",
+ " F | \n",
+ " True | \n",
+ " 16 | \n",
+ " 2017.0 | \n",
+ " ESSENCE | \n",
+ " FAUX | \n",
+ " [15000;20000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 300.000000 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2021 | \n",
+ " (2,5] | \n",
+ " ANNUEL | \n",
+ " [0;20000[ | \n",
+ " C | \n",
+ " 46 | \n",
+ " M | \n",
+ " False | \n",
+ " 44 | \n",
+ " 2018.0 | \n",
+ " ESSENCE | \n",
+ " VRAI | \n",
+ " [35000;99999[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 304.000000 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2018 | \n",
+ " (2,5] | \n",
+ " MENSUEL | \n",
+ " [20000;40000[ | \n",
+ " A | \n",
+ " 46 | \n",
+ " F | \n",
+ " False | \n",
+ " 31 | \n",
+ " 2009.0 | \n",
+ " DIESEL | \n",
+ " FAUX | \n",
+ " [10000;15000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 365.000000 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 14231 | \n",
+ " 2021 | \n",
+ " (2,5] | \n",
+ " MENSUEL | \n",
+ " [0;20000[ | \n",
+ " D | \n",
+ " 55 | \n",
+ " M | \n",
+ " False | \n",
+ " 49 | \n",
+ " 2017.0 | \n",
+ " ESSENCE | \n",
+ " FAUX | \n",
+ " [20000;25000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 181.000000 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 14232 | \n",
+ " 2019 | \n",
+ " (2,5] | \n",
+ " MENSUEL | \n",
+ " [20000;40000[ | \n",
+ " A | \n",
+ " 33 | \n",
+ " M | \n",
+ " False | \n",
+ " 14 | \n",
+ " 2017.0 | \n",
+ " ESSENCE | \n",
+ " FAUX | \n",
+ " [10000;15000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 364.669399 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 14233 | \n",
+ " 2017 | \n",
+ " (-1,0] | \n",
+ " ANNUEL | \n",
+ " [0;20000[ | \n",
+ " A | \n",
+ " 62 | \n",
+ " M | \n",
+ " False | \n",
+ " 58 | \n",
+ " 2017.0 | \n",
+ " ESSENCE | \n",
+ " VRAI | \n",
+ " [10000;15000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 182.000000 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 14234 | \n",
+ " 2018 | \n",
+ " (-1,0] | \n",
+ " TRIMESTRIEL | \n",
+ " [20000;40000[ | \n",
+ " D | \n",
+ " 20 | \n",
+ " M | \n",
+ " False | \n",
+ " 7 | \n",
+ " 2016.0 | \n",
+ " DIESEL | \n",
+ " FAUX | \n",
+ " [25000;35000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 9.000000 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 14235 | \n",
+ " 2017 | \n",
+ " (-1,0] | \n",
+ " ANNUEL | \n",
+ " [0;20000[ | \n",
+ " C | \n",
+ " 73 | \n",
+ " F | \n",
+ " False | \n",
+ " 41 | \n",
+ " 2017.0 | \n",
+ " ESSENCE | \n",
+ " FAUX | \n",
+ " [10000;15000[ | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 52.000000 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
14236 rows × 17 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ANNEE_CTR CONTRAT_ANCIENNETE FREQUENCE_PAIEMENT_COTISATION \\\n",
+ "0 2019 (-1,0] ANNUEL \n",
+ "1 2019 (-1,0] ANNUEL \n",
+ "2 2021 (1,2] ANNUEL \n",
+ "3 2021 (2,5] ANNUEL \n",
+ "4 2018 (2,5] MENSUEL \n",
+ "... ... ... ... \n",
+ "14231 2021 (2,5] MENSUEL \n",
+ "14232 2019 (2,5] MENSUEL \n",
+ "14233 2017 (-1,0] ANNUEL \n",
+ "14234 2018 (-1,0] TRIMESTRIEL \n",
+ "14235 2017 (-1,0] ANNUEL \n",
+ "\n",
+ " GROUPE_KM ZONE_RISQUE AGE_ASSURE_PRINCIPAL GENRE \\\n",
+ "0 [20000;40000[ B 54 M \n",
+ "1 [20000;40000[ B 88 F \n",
+ "2 [0;20000[ D 35 F \n",
+ "3 [0;20000[ C 46 M \n",
+ "4 [20000;40000[ A 46 F \n",
+ "... ... ... ... ... \n",
+ "14231 [0;20000[ D 55 M \n",
+ "14232 [20000;40000[ A 33 M \n",
+ "14233 [0;20000[ A 62 M \n",
+ "14234 [20000;40000[ D 20 M \n",
+ "14235 [0;20000[ C 73 F \n",
+ "\n",
+ " DEUXIEME_CONDUCTEUR ANCIENNETE_PERMIS ANNEE_CONSTRUCTION ENERGIE \\\n",
+ "0 False 47 2016.0 ESSENCE \n",
+ "1 True 55 2018.0 DIESEL \n",
+ "2 True 16 2017.0 ESSENCE \n",
+ "3 False 44 2018.0 ESSENCE \n",
+ "4 False 31 2009.0 DIESEL \n",
+ "... ... ... ... ... \n",
+ "14231 False 49 2017.0 ESSENCE \n",
+ "14232 False 14 2017.0 ESSENCE \n",
+ "14233 False 58 2017.0 ESSENCE \n",
+ "14234 False 7 2016.0 DIESEL \n",
+ "14235 False 41 2017.0 ESSENCE \n",
+ "\n",
+ " EQUIPEMENT_SECURITE VALEUR_DU_BIEN NB CHARGE EXPO sinistré \n",
+ "0 FAUX [10000;15000[ 0 0.0 245.327869 0 \n",
+ "1 VRAI [20000;25000[ 0 0.0 230.368852 0 \n",
+ "2 FAUX [15000;20000[ 0 0.0 300.000000 0 \n",
+ "3 VRAI [35000;99999[ 0 0.0 304.000000 0 \n",
+ "4 FAUX [10000;15000[ 0 0.0 365.000000 0 \n",
+ "... ... ... .. ... ... ... \n",
+ "14231 FAUX [20000;25000[ 0 0.0 181.000000 0 \n",
+ "14232 FAUX [10000;15000[ 0 0.0 364.669399 0 \n",
+ "14233 VRAI [10000;15000[ 0 0.0 182.000000 0 \n",
+ "14234 FAUX [25000;35000[ 0 0.0 9.000000 0 \n",
+ "14235 FAUX [10000;15000[ 0 0.0 52.000000 0 \n",
+ "\n",
+ "[14236 rows x 17 columns]"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#Calculez la variable \"sinistré\" qui est vraie si la personne a eu un ou plusieurs sinistres ou faux le cas échéant \n"
+ "# Calculez la variable \"sinistré\" qui est 1 si la personne a eu un ou plusieurs sinistres, 0 sinon\n",
+ "data_retraitee[\"sinistré\"] = data_retraitee[\"NB\"] > 0\n",
+ "data_retraitee[\"sinistré\"] = data_retraitee[\"sinistré\"].astype(int)\n",
+ "data_retraitee"
]
},
{
@@ -193,11 +1608,855 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"id": "47cf4b69",
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "bingroup": "x",
+ "hovertemplate": "sinistré=%{x}
count=%{y}",
+ "legendgroup": "",
+ "marker": {
+ "color": "#636efa",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "",
+ "orientation": "v",
+ "showlegend": false,
+ "type": "histogram",
+ "x": {
+ "bdata": "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    ",
+ "dtype": "i1"
+ },
+ "xaxis": "x",
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "relative",
+ "legend": {
+ "tracegroupgap": 0
+ },
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermap": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermap"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "Distribution de la variable 'sinistré'"
+ },
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "title": {
+ "text": "sinistré"
+ }
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "title": {
+ "text": "count"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Observation de la distribution\n",
+ "fig = px.histogram(data_retraitee, x=\"sinistré\", title=\"Distribution de la variable 'sinistré'\")\n",
+ "fig.show()"
+ ]
},
{
"cell_type": "markdown",
@@ -209,11 +2468,53 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"id": "a0bc6278",
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(14236, 16)"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_set = data_retraitee.drop(\"sinistré\", axis=1)\n",
+ "data_set.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "73d31ea4",
+ "metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "# Séparation en variables qualitatives ou catégorielles\n",
+ "variables_na = []\n",
+ "variables_numeriques = []\n",
+ "variables_01 = []\n",
+ "variables_categorielles = []\n",
+ "for colu in data_set.columns:\n",
+ " if True in data_set[colu].isna().unique():\n",
+ " variables_na.append(data_set[colu])\n",
+ " else:\n",
+ " if str(data_set[colu].dtypes) in [\"int32\", \"int64\", \"float64\"]:\n",
+ " if len(data_set[colu].unique()) == 2:\n",
+ " variables_categorielles.append(data_set[colu])\n",
+ " else:\n",
+ " variables_numeriques.append(data_set[colu])\n",
+ " else:\n",
+ " if len(data_set[colu].unique()) == 2:\n",
+ " variables_categorielles.append(data_set[colu])\n",
+ " else:\n",
+ " variables_categorielles.append(data_set[colu])\n"
+ ]
},
{
"cell_type": "markdown",
@@ -229,7 +2530,423 @@
"id": "30df8bd5",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "vars_categorielles = pd.DataFrame(variables_categorielles).transpose()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "be7a7d00",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "FREQUENCE_PAIEMENT_COTISATION",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GROUPE_KM",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GENRE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "DEUXIEME_CONDUCTEUR",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ENERGIE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "EQUIPEMENT_SECURITE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "cdaf33f1-78b7-4df1-9a7c-93b778e94756",
+ "rows": [
+ [
+ "CONTRAT_ANCIENNETE",
+ "1.0",
+ "0.0",
+ "0.01",
+ "0.02",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.01",
+ "0.0"
+ ],
+ [
+ "FREQUENCE_PAIEMENT_COTISATION",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.01",
+ "0.0",
+ "0.0",
+ "0.01",
+ "0.02"
+ ],
+ [
+ "GROUPE_KM",
+ "0.01",
+ "0.0",
+ "1.0",
+ "0.01",
+ "0.01",
+ "0.0",
+ "0.04",
+ "0.01",
+ "0.02"
+ ],
+ [
+ "ZONE_RISQUE",
+ "0.02",
+ "0.0",
+ "0.01",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.01",
+ "0.03",
+ "0.0"
+ ],
+ [
+ "GENRE",
+ "0.0",
+ "0.01",
+ "0.01",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.02",
+ "0.01",
+ "0.07"
+ ],
+ [
+ "DEUXIEME_CONDUCTEUR",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0"
+ ],
+ [
+ "ENERGIE",
+ "0.0",
+ "0.0",
+ "0.04",
+ "0.01",
+ "0.02",
+ "0.0",
+ "1.0",
+ "0.02",
+ "0.08"
+ ],
+ [
+ "EQUIPEMENT_SECURITE",
+ "0.01",
+ "0.01",
+ "0.01",
+ "0.03",
+ "0.01",
+ "0.0",
+ "0.02",
+ "1.0",
+ "0.07"
+ ],
+ [
+ "VALEUR_DU_BIEN",
+ "0.0",
+ "0.02",
+ "0.02",
+ "0.0",
+ "0.07",
+ "0.0",
+ "0.08",
+ "0.07",
+ "1.0"
+ ]
+ ],
+ "shape": {
+ "columns": 9,
+ "rows": 9
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CONTRAT_ANCIENNETE | \n",
+ " FREQUENCE_PAIEMENT_COTISATION | \n",
+ " GROUPE_KM | \n",
+ " ZONE_RISQUE | \n",
+ " GENRE | \n",
+ " DEUXIEME_CONDUCTEUR | \n",
+ " ENERGIE | \n",
+ " EQUIPEMENT_SECURITE | \n",
+ " VALEUR_DU_BIEN | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | CONTRAT_ANCIENNETE | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " | FREQUENCE_PAIEMENT_COTISATION | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ "
\n",
+ " \n",
+ " | GROUPE_KM | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.0 | \n",
+ " 0.04 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ "
\n",
+ " \n",
+ " | ZONE_RISQUE | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.01 | \n",
+ " 0.03 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " | GENRE | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.0 | \n",
+ " 0.02 | \n",
+ " 0.01 | \n",
+ " 0.07 | \n",
+ "
\n",
+ " \n",
+ " | DEUXIEME_CONDUCTEUR | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 1.0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " | ENERGIE | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.04 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ " 0.0 | \n",
+ " 1.00 | \n",
+ " 0.02 | \n",
+ " 0.08 | \n",
+ "
\n",
+ " \n",
+ " | EQUIPEMENT_SECURITE | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.03 | \n",
+ " 0.01 | \n",
+ " 0.0 | \n",
+ " 0.02 | \n",
+ " 1.00 | \n",
+ " 0.07 | \n",
+ "
\n",
+ " \n",
+ " | VALEUR_DU_BIEN | \n",
+ " 0.00 | \n",
+ " 0.02 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ " 0.07 | \n",
+ " 0.0 | \n",
+ " 0.08 | \n",
+ " 0.07 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CONTRAT_ANCIENNETE \\\n",
+ "CONTRAT_ANCIENNETE 1.00 \n",
+ "FREQUENCE_PAIEMENT_COTISATION 0.00 \n",
+ "GROUPE_KM 0.01 \n",
+ "ZONE_RISQUE 0.02 \n",
+ "GENRE 0.00 \n",
+ "DEUXIEME_CONDUCTEUR 0.00 \n",
+ "ENERGIE 0.00 \n",
+ "EQUIPEMENT_SECURITE 0.01 \n",
+ "VALEUR_DU_BIEN 0.00 \n",
+ "\n",
+ " FREQUENCE_PAIEMENT_COTISATION GROUPE_KM \\\n",
+ "CONTRAT_ANCIENNETE 0.00 0.01 \n",
+ "FREQUENCE_PAIEMENT_COTISATION 1.00 0.00 \n",
+ "GROUPE_KM 0.00 1.00 \n",
+ "ZONE_RISQUE 0.00 0.01 \n",
+ "GENRE 0.01 0.01 \n",
+ "DEUXIEME_CONDUCTEUR 0.00 0.00 \n",
+ "ENERGIE 0.00 0.04 \n",
+ "EQUIPEMENT_SECURITE 0.01 0.01 \n",
+ "VALEUR_DU_BIEN 0.02 0.02 \n",
+ "\n",
+ " ZONE_RISQUE GENRE DEUXIEME_CONDUCTEUR \\\n",
+ "CONTRAT_ANCIENNETE 0.02 0.00 0.0 \n",
+ "FREQUENCE_PAIEMENT_COTISATION 0.00 0.01 0.0 \n",
+ "GROUPE_KM 0.01 0.01 0.0 \n",
+ "ZONE_RISQUE 1.00 0.00 0.0 \n",
+ "GENRE 0.00 1.00 0.0 \n",
+ "DEUXIEME_CONDUCTEUR 0.00 0.00 1.0 \n",
+ "ENERGIE 0.01 0.02 0.0 \n",
+ "EQUIPEMENT_SECURITE 0.03 0.01 0.0 \n",
+ "VALEUR_DU_BIEN 0.00 0.07 0.0 \n",
+ "\n",
+ " ENERGIE EQUIPEMENT_SECURITE VALEUR_DU_BIEN \n",
+ "CONTRAT_ANCIENNETE 0.00 0.01 0.00 \n",
+ "FREQUENCE_PAIEMENT_COTISATION 0.00 0.01 0.02 \n",
+ "GROUPE_KM 0.04 0.01 0.02 \n",
+ "ZONE_RISQUE 0.01 0.03 0.00 \n",
+ "GENRE 0.02 0.01 0.07 \n",
+ "DEUXIEME_CONDUCTEUR 0.00 0.00 0.00 \n",
+ "ENERGIE 1.00 0.02 0.08 \n",
+ "EQUIPEMENT_SECURITE 0.02 1.00 0.07 \n",
+ "VALEUR_DU_BIEN 0.08 0.07 1.00 "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Test du V de Cramer\n",
+ "rows = []\n",
+ "\n",
+ "for var1 in vars_categorielles:\n",
+ " col = []\n",
+ " for var2 in vars_categorielles:\n",
+ " cramers = cramers_V(\n",
+ " vars_categorielles[var1], vars_categorielles[var2]\n",
+ " ) # V de Cramer\n",
+ " col.append(round(cramers, 2)) # arrondi du résultat\n",
+ " rows.append(col)\n",
+ "\n",
+ "cramers_results = np.array(rows)\n",
+ "v_cramer_resultats = pd.DataFrame(\n",
+ " cramers_results,\n",
+ " columns=vars_categorielles.columns,\n",
+ " index=vars_categorielles.columns,\n",
+ ")\n",
+ "\n",
+ "v_cramer_resultats\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "b3297dca",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# On repère les variables trop corrélées\n",
+ "for i in range(v_cramer_resultats.shape[0]):\n",
+ " for j in range(i + 1, v_cramer_resultats.shape[0]):\n",
+ " if v_cramer_resultats.iloc[i, j] > 0.7:\n",
+ " print(\n",
+ " v_cramer_resultats.index.to_numpy()[i]\n",
+ " + \" et \"\n",
+ " + v_cramer_resultats.columns[j]\n",
+ " + \" sont trop dépendantes, V-CRAMER = \"\n",
+ " + str(v_cramer_resultats.iloc[i, j])\n",
+ " )\n"
+ ]
},
{
"cell_type": "markdown",
@@ -241,11 +2958,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"id": "d1fa12fc",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "vars_numeriques = pd.DataFrame(variables_numeriques).transpose()"
+ ]
},
{
"cell_type": "markdown",
@@ -255,6 +2974,290 @@
"**Question :** quels sont vos commentaires ?"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "c70946b4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "ANNEE_CTR",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "AGE_ASSURE_PRINCIPAL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ANCIENNETE_PERMIS",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ANNEE_CONSTRUCTION",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "NB",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "CHARGE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "EXPO",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "5ae1d96a-bfa4-47eb-bc85-b1de1b32bf1e",
+ "rows": [
+ [
+ "ANNEE_CTR",
+ "1.0",
+ "0.048023234802924315",
+ "0.043983174120495815",
+ "0.3615499864845018",
+ "-0.05775190894636334",
+ "-0.028901069139582642",
+ "-0.04770515515535773"
+ ],
+ [
+ "AGE_ASSURE_PRINCIPAL",
+ "0.048023234802924315",
+ "1.0",
+ "0.4987430846753776",
+ "-0.0591835157827114",
+ "-0.012425345899111317",
+ "-0.020907992524227155",
+ "0.06096340138959582"
+ ],
+ [
+ "ANCIENNETE_PERMIS",
+ "0.043983174120495815",
+ "0.4987430846753776",
+ "1.0",
+ "-0.0298138263902136",
+ "-0.008703999957333864",
+ "-0.011347002839350888",
+ "0.0324606537737922"
+ ],
+ [
+ "ANNEE_CONSTRUCTION",
+ "0.3615499864845018",
+ "-0.0591835157827114",
+ "-0.0298138263902136",
+ "1.0",
+ "-0.01437673371578632",
+ "-0.0012301736578250726",
+ "-0.07395284013392618"
+ ],
+ [
+ "NB",
+ "-0.05775190894636334",
+ "-0.012425345899111317",
+ "-0.008703999957333864",
+ "-0.01437673371578632",
+ "1.0",
+ "0.5071071150738479",
+ "0.0507022890091039"
+ ],
+ [
+ "CHARGE",
+ "-0.028901069139582642",
+ "-0.020907992524227155",
+ "-0.011347002839350888",
+ "-0.0012301736578250726",
+ "0.5071071150738479",
+ "1.0",
+ "-0.021418687122216843"
+ ],
+ [
+ "EXPO",
+ "-0.04770515515535773",
+ "0.06096340138959582",
+ "0.0324606537737922",
+ "-0.07395284013392618",
+ "0.0507022890091039",
+ "-0.021418687122216843",
+ "1.0"
+ ]
+ ],
+ "shape": {
+ "columns": 7,
+ "rows": 7
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ANNEE_CTR | \n",
+ " AGE_ASSURE_PRINCIPAL | \n",
+ " ANCIENNETE_PERMIS | \n",
+ " ANNEE_CONSTRUCTION | \n",
+ " NB | \n",
+ " CHARGE | \n",
+ " EXPO | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ANNEE_CTR | \n",
+ " 1.000000 | \n",
+ " 0.048023 | \n",
+ " 0.043983 | \n",
+ " 0.361550 | \n",
+ " -0.057752 | \n",
+ " -0.028901 | \n",
+ " -0.047705 | \n",
+ "
\n",
+ " \n",
+ " | AGE_ASSURE_PRINCIPAL | \n",
+ " 0.048023 | \n",
+ " 1.000000 | \n",
+ " 0.498743 | \n",
+ " -0.059184 | \n",
+ " -0.012425 | \n",
+ " -0.020908 | \n",
+ " 0.060963 | \n",
+ "
\n",
+ " \n",
+ " | ANCIENNETE_PERMIS | \n",
+ " 0.043983 | \n",
+ " 0.498743 | \n",
+ " 1.000000 | \n",
+ " -0.029814 | \n",
+ " -0.008704 | \n",
+ " -0.011347 | \n",
+ " 0.032461 | \n",
+ "
\n",
+ " \n",
+ " | ANNEE_CONSTRUCTION | \n",
+ " 0.361550 | \n",
+ " -0.059184 | \n",
+ " -0.029814 | \n",
+ " 1.000000 | \n",
+ " -0.014377 | \n",
+ " -0.001230 | \n",
+ " -0.073953 | \n",
+ "
\n",
+ " \n",
+ " | NB | \n",
+ " -0.057752 | \n",
+ " -0.012425 | \n",
+ " -0.008704 | \n",
+ " -0.014377 | \n",
+ " 1.000000 | \n",
+ " 0.507107 | \n",
+ " 0.050702 | \n",
+ "
\n",
+ " \n",
+ " | CHARGE | \n",
+ " -0.028901 | \n",
+ " -0.020908 | \n",
+ " -0.011347 | \n",
+ " -0.001230 | \n",
+ " 0.507107 | \n",
+ " 1.000000 | \n",
+ " -0.021419 | \n",
+ "
\n",
+ " \n",
+ " | EXPO | \n",
+ " -0.047705 | \n",
+ " 0.060963 | \n",
+ " 0.032461 | \n",
+ " -0.073953 | \n",
+ " 0.050702 | \n",
+ " -0.021419 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ANNEE_CTR AGE_ASSURE_PRINCIPAL ANCIENNETE_PERMIS \\\n",
+ "ANNEE_CTR 1.000000 0.048023 0.043983 \n",
+ "AGE_ASSURE_PRINCIPAL 0.048023 1.000000 0.498743 \n",
+ "ANCIENNETE_PERMIS 0.043983 0.498743 1.000000 \n",
+ "ANNEE_CONSTRUCTION 0.361550 -0.059184 -0.029814 \n",
+ "NB -0.057752 -0.012425 -0.008704 \n",
+ "CHARGE -0.028901 -0.020908 -0.011347 \n",
+ "EXPO -0.047705 0.060963 0.032461 \n",
+ "\n",
+ " ANNEE_CONSTRUCTION NB CHARGE EXPO \n",
+ "ANNEE_CTR 0.361550 -0.057752 -0.028901 -0.047705 \n",
+ "AGE_ASSURE_PRINCIPAL -0.059184 -0.012425 -0.020908 0.060963 \n",
+ "ANCIENNETE_PERMIS -0.029814 -0.008704 -0.011347 0.032461 \n",
+ "ANNEE_CONSTRUCTION 1.000000 -0.014377 -0.001230 -0.073953 \n",
+ "NB -0.014377 1.000000 0.507107 0.050702 \n",
+ "CHARGE -0.001230 0.507107 1.000000 -0.021419 \n",
+ "EXPO -0.073953 0.050702 -0.021419 1.000000 "
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Corrélation de Pearson\n",
+ "correlations_num = vars_numeriques.corr(method=\"pearson\")\n",
+ "correlations_num"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "4c29f1f0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# On repère les variables trop corrélées\n",
+ "nb_variables = correlations_num.shape[0]\n",
+ "for i in range(nb_variables):\n",
+ " for j in range(i + 1, nb_variables):\n",
+ " if abs(correlations_num.iloc[i, j]) > 0.7:\n",
+ " print(\n",
+ " correlations_num.index.to_numpy()[i]\n",
+ " + \" et \"\n",
+ " + correlations_num.columns[j]\n",
+ " + \" sont trop dépendantes, corr = \"\n",
+ " + str(correlations_num.iloc[i, j])\n",
+ " )"
+ ]
+ },
{
"cell_type": "markdown",
"id": "212209ec",
@@ -284,11 +3287,647 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"id": "b8530717",
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE_(0,1]",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE_(1,2]",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE_(2,5]",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE_(5,10]",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "FREQUENCE_PAIEMENT_COTISATION_MENSUEL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "FREQUENCE_PAIEMENT_COTISATION_TRIMESTRIEL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GROUPE_KM_[20000;40000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GROUPE_KM_[40000;60000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GROUPE_KM_[60000;99999[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_B",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_C",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_D",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_E",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_F",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_G",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_H",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_I",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_J",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_K",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_L",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_M",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_R",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_S",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_T",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_X",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GENRE_M",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "DEUXIEME_CONDUCTEUR_True",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ENERGIE_DIESEL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ENERGIE_ESSENCE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "EQUIPEMENT_SECURITE_VRAI",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[10000;15000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[15000;20000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[20000;25000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[25000;35000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[35000;99999[",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "a0294dee-6844-4af1-9ee3-1bdc53a57dfa",
+ "rows": [
+ [
+ "0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0"
+ ],
+ [
+ "1",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0"
+ ],
+ [
+ "2",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0"
+ ],
+ [
+ "3",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0"
+ ],
+ [
+ "4",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0"
+ ]
+ ],
+ "shape": {
+ "columns": 35,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CONTRAT_ANCIENNETE_(0,1] | \n",
+ " CONTRAT_ANCIENNETE_(1,2] | \n",
+ " CONTRAT_ANCIENNETE_(2,5] | \n",
+ " CONTRAT_ANCIENNETE_(5,10] | \n",
+ " FREQUENCE_PAIEMENT_COTISATION_MENSUEL | \n",
+ " FREQUENCE_PAIEMENT_COTISATION_TRIMESTRIEL | \n",
+ " GROUPE_KM_[20000;40000[ | \n",
+ " GROUPE_KM_[40000;60000[ | \n",
+ " GROUPE_KM_[60000;99999[ | \n",
+ " ZONE_RISQUE_B | \n",
+ " ... | \n",
+ " GENRE_M | \n",
+ " DEUXIEME_CONDUCTEUR_True | \n",
+ " ENERGIE_DIESEL | \n",
+ " ENERGIE_ESSENCE | \n",
+ " EQUIPEMENT_SECURITE_VRAI | \n",
+ " VALEUR_DU_BIEN_[10000;15000[ | \n",
+ " VALEUR_DU_BIEN_[15000;20000[ | \n",
+ " VALEUR_DU_BIEN_[20000;25000[ | \n",
+ " VALEUR_DU_BIEN_[25000;35000[ | \n",
+ " VALEUR_DU_BIEN_[35000;99999[ | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 35 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CONTRAT_ANCIENNETE_(0,1] CONTRAT_ANCIENNETE_(1,2] \\\n",
+ "0 0.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 1.0 \n",
+ "3 0.0 0.0 \n",
+ "4 0.0 0.0 \n",
+ "\n",
+ " CONTRAT_ANCIENNETE_(2,5] CONTRAT_ANCIENNETE_(5,10] \\\n",
+ "0 0.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 1.0 0.0 \n",
+ "4 1.0 0.0 \n",
+ "\n",
+ " FREQUENCE_PAIEMENT_COTISATION_MENSUEL \\\n",
+ "0 0.0 \n",
+ "1 0.0 \n",
+ "2 0.0 \n",
+ "3 0.0 \n",
+ "4 1.0 \n",
+ "\n",
+ " FREQUENCE_PAIEMENT_COTISATION_TRIMESTRIEL GROUPE_KM_[20000;40000[ \\\n",
+ "0 0.0 1.0 \n",
+ "1 0.0 1.0 \n",
+ "2 0.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 0.0 1.0 \n",
+ "\n",
+ " GROUPE_KM_[40000;60000[ GROUPE_KM_[60000;99999[ ZONE_RISQUE_B ... \\\n",
+ "0 0.0 0.0 1.0 ... \n",
+ "1 0.0 0.0 1.0 ... \n",
+ "2 0.0 0.0 0.0 ... \n",
+ "3 0.0 0.0 0.0 ... \n",
+ "4 0.0 0.0 0.0 ... \n",
+ "\n",
+ " GENRE_M DEUXIEME_CONDUCTEUR_True ENERGIE_DIESEL ENERGIE_ESSENCE \\\n",
+ "0 1.0 0.0 0.0 1.0 \n",
+ "1 0.0 1.0 1.0 0.0 \n",
+ "2 0.0 1.0 0.0 1.0 \n",
+ "3 1.0 0.0 0.0 1.0 \n",
+ "4 0.0 0.0 1.0 0.0 \n",
+ "\n",
+ " EQUIPEMENT_SECURITE_VRAI VALEUR_DU_BIEN_[10000;15000[ \\\n",
+ "0 0.0 1.0 \n",
+ "1 1.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 1.0 0.0 \n",
+ "4 0.0 1.0 \n",
+ "\n",
+ " VALEUR_DU_BIEN_[15000;20000[ VALEUR_DU_BIEN_[20000;25000[ \\\n",
+ "0 0.0 0.0 \n",
+ "1 0.0 1.0 \n",
+ "2 1.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 0.0 0.0 \n",
+ "\n",
+ " VALEUR_DU_BIEN_[25000;35000[ VALEUR_DU_BIEN_[35000;99999[ \n",
+ "0 0.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 0.0 1.0 \n",
+ "4 0.0 0.0 \n",
+ "\n",
+ "[5 rows x 35 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# One hot encoding des variables catégorielles\n",
+ "preproc_ohe = preproc.OneHotEncoder(handle_unknown=\"ignore\")\n",
+ "preproc_ohe = preproc.OneHotEncoder(drop=\"first\", sparse_output=False).fit(\n",
+ " vars_categorielles\n",
+ ")\n",
+ "\n",
+ "variables_categorielles_ohe = preproc_ohe.transform(vars_categorielles)\n",
+ "variables_categorielles_ohe = pd.DataFrame(\n",
+ " variables_categorielles_ohe,\n",
+ " columns=preproc_ohe.get_feature_names_out(vars_categorielles.columns),\n",
+ ")\n",
+ "variables_categorielles_ohe.head()"
+ ]
},
{
"cell_type": "markdown",
@@ -300,11 +3939,228 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 36,
"id": "4ff3847d",
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "ANNEE_CTR",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "AGE_ASSURE_PRINCIPAL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ANCIENNETE_PERMIS",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ANNEE_CONSTRUCTION",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "NB",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "CHARGE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "EXPO",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "72afd0da-ac68-4aee-87ae-5e375d6d237d",
+ "rows": [
+ [
+ "0",
+ "0.1393559608666301",
+ "0.6582867283271144",
+ "0.5635879287137437",
+ "0.1740107784615837",
+ "-0.24202868219585674",
+ "-0.181253980627111",
+ "-0.289146035458737"
+ ],
+ [
+ "1",
+ "0.1393559608666301",
+ "3.1516280073827847",
+ "0.9874335016275682",
+ "0.7442069902648635",
+ "-0.24202868219585674",
+ "-0.181253980627111",
+ "-0.42709265252699025"
+ ],
+ [
+ "2",
+ "1.3471924655222902",
+ "-0.7350510452628191",
+ "-1.078813666327326",
+ "0.45910888436322356",
+ "-0.24202868219585674",
+ "-0.181253980627111",
+ "0.215020504730438"
+ ],
+ [
+ "3",
+ "1.3471924655222902",
+ "0.0716181920787214",
+ "0.40464583887105954",
+ "0.7442069902648635",
+ "-0.24202868219585674",
+ "-0.181253980627111",
+ "0.25190705219855114"
+ ],
+ [
+ "4",
+ "-0.4645622914611999",
+ "0.0716181920787214",
+ "-0.28410321711390524",
+ "-1.8216759628498953",
+ "-0.24202868219585674",
+ "-0.181253980627111",
+ "0.8144269010872852"
+ ]
+ ],
+ "shape": {
+ "columns": 7,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ANNEE_CTR | \n",
+ " AGE_ASSURE_PRINCIPAL | \n",
+ " ANCIENNETE_PERMIS | \n",
+ " ANNEE_CONSTRUCTION | \n",
+ " NB | \n",
+ " CHARGE | \n",
+ " EXPO | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.139356 | \n",
+ " 0.658287 | \n",
+ " 0.563588 | \n",
+ " 0.174011 | \n",
+ " -0.242029 | \n",
+ " -0.181254 | \n",
+ " -0.289146 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.139356 | \n",
+ " 3.151628 | \n",
+ " 0.987434 | \n",
+ " 0.744207 | \n",
+ " -0.242029 | \n",
+ " -0.181254 | \n",
+ " -0.427093 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1.347192 | \n",
+ " -0.735051 | \n",
+ " -1.078814 | \n",
+ " 0.459109 | \n",
+ " -0.242029 | \n",
+ " -0.181254 | \n",
+ " 0.215021 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1.347192 | \n",
+ " 0.071618 | \n",
+ " 0.404646 | \n",
+ " 0.744207 | \n",
+ " -0.242029 | \n",
+ " -0.181254 | \n",
+ " 0.251907 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -0.464562 | \n",
+ " 0.071618 | \n",
+ " -0.284103 | \n",
+ " -1.821676 | \n",
+ " -0.242029 | \n",
+ " -0.181254 | \n",
+ " 0.814427 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ANNEE_CTR AGE_ASSURE_PRINCIPAL ANCIENNETE_PERMIS ANNEE_CONSTRUCTION \\\n",
+ "0 0.139356 0.658287 0.563588 0.174011 \n",
+ "1 0.139356 3.151628 0.987434 0.744207 \n",
+ "2 1.347192 -0.735051 -1.078814 0.459109 \n",
+ "3 1.347192 0.071618 0.404646 0.744207 \n",
+ "4 -0.464562 0.071618 -0.284103 -1.821676 \n",
+ "\n",
+ " NB CHARGE EXPO \n",
+ "0 -0.242029 -0.181254 -0.289146 \n",
+ "1 -0.242029 -0.181254 -0.427093 \n",
+ "2 -0.242029 -0.181254 0.215021 \n",
+ "3 -0.242029 -0.181254 0.251907 \n",
+ "4 -0.242029 -0.181254 0.814427 "
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Normalisation des varibales numériques\n",
+ "preproc_scale = preproc.StandardScaler(with_mean=True, with_std=True)\n",
+ "preproc_scale.fit(vars_numeriques)\n",
+ "\n",
+ "vars_numeriques_scaled = preproc_scale.transform(vars_numeriques)\n",
+ "vars_numeriques_scaled = pd.DataFrame(\n",
+ " vars_numeriques_scaled, columns=vars_numeriques.columns\n",
+ ")\n",
+ "vars_numeriques_scaled.head()"
+ ]
},
{
"cell_type": "markdown",
@@ -347,11 +4203,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"id": "d9342ad6",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "X_global = vars_numeriques_scaled.merge(\n",
+ " variables_categorielles_ohe, left_index=True, right_index=True\n",
+ ")\n",
+ "\n",
+ "# Réorganisation des données\n",
+ "X = X_global.to_numpy()\n",
+ "Y = data_retraitee[\"sinistré\"]\n",
+ "\n",
+ "# Sampling en 80% train et 20% test\n",
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, Y, test_size=0.2, random_state=42\n",
+ ")"
+ ]
},
{
"cell_type": "markdown",
@@ -367,7 +4236,135 @@
"id": "cb60fe19",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "# Définir la grille d'hyperparamètres à rechercher\n",
+ "param_grid = {\n",
+ " \"n_estimators\": [60, 65, 70, 75],\n",
+ " \"max_depth\": [None, 1, 2, 3],\n",
+ " \"min_samples_split\": [5, 8, 10, 11, 13, 14, 15],\n",
+ "}\n",
+ "# Nombre de folds pour la validation croisée\n",
+ "num_folds = 5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "b976720e",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "InvalidParameterError",
+ "evalue": "The 'scoring' parameter of GridSearchCV must be a str among {'average_precision', 'adjusted_rand_score', 'roc_auc', 'top_k_accuracy', 'recall', 'neg_negative_likelihood_ratio', 'neg_mean_squared_error', 'positive_likelihood_ratio', 'precision', 'neg_mean_squared_log_error', 'precision_micro', 'neg_mean_poisson_deviance', 'completeness_score', 'accuracy', 'adjusted_mutual_info_score', 'precision_macro', 'neg_max_error', 'mutual_info_score', 'jaccard_samples', 'recall_samples', 'neg_mean_absolute_percentage_error', 'fowlkes_mallows_score', 'neg_brier_score', 'f1_samples', 'jaccard_weighted', 'recall_micro', 'd2_absolute_error_score', 'homogeneity_score', 'matthews_corrcoef', 'f1_micro', 'f1_macro', 'neg_root_mean_squared_error', 'precision_samples', 'neg_root_mean_squared_log_error', 'neg_mean_gamma_deviance', 'jaccard', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'roc_auc_ovr', 'jaccard_micro', 'jaccard_macro', 'roc_auc_ovo', 'neg_log_loss', 'normalized_mutual_info_score', 'balanced_accuracy', 'f1_weighted', 'r2', 'recall_macro', 'rand_score', 'v_measure_score', 'explained_variance', 'roc_auc_ovo_weighted', 'precision_weighted', 'roc_auc_ovr_weighted', 'f1', 'recall_weighted'}, a callable, an instance of 'list', an instance of 'tuple', an instance of 'dict' or None. Got '' instead.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+ "\u001b[31mInvalidParameterError\u001b[39m Traceback (most recent call last)",
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[47]\u001b[39m\u001b[32m, line 16\u001b[39m\n\u001b[32m 5\u001b[39m grid_search = GridSearchCV(\n\u001b[32m 6\u001b[39m estimator = rf,\n\u001b[32m 7\u001b[39m param_grid = param_grid,\n\u001b[32m (...)\u001b[39m\u001b[32m 12\u001b[39m n_jobs = -\u001b[32m1\u001b[39m, \u001b[38;5;66;03m# Utiliser tous les cœurs du processeur\u001b[39;00m\n\u001b[32m 13\u001b[39m )\n\u001b[32m 15\u001b[39m \u001b[38;5;66;03m# Exécution de la recherche sur grille\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m \u001b[43mgrid_search\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 18\u001b[39m \u001b[38;5;66;03m# Afficher les meilleurs hyperparamètres\u001b[39;00m\n\u001b[32m 19\u001b[39m best_params = grid_search.best_params_\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/sklearn/base.py:1382\u001b[39m, in \u001b[36m_fit_context..decorator..wrapper\u001b[39m\u001b[34m(estimator, *args, **kwargs)\u001b[39m\n\u001b[32m 1377\u001b[39m partial_fit_and_fitted = (\n\u001b[32m 1378\u001b[39m fit_method.\u001b[34m__name__\u001b[39m == \u001b[33m\"\u001b[39m\u001b[33mpartial_fit\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m _is_fitted(estimator)\n\u001b[32m 1379\u001b[39m )\n\u001b[32m 1381\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m global_skip_validation \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m partial_fit_and_fitted:\n\u001b[32m-> \u001b[39m\u001b[32m1382\u001b[39m \u001b[43mestimator\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_validate_params\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1384\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[32m 1385\u001b[39m skip_parameter_validation=(\n\u001b[32m 1386\u001b[39m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[32m 1387\u001b[39m )\n\u001b[32m 1388\u001b[39m ):\n\u001b[32m 1389\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, *args, **kwargs)\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/sklearn/base.py:436\u001b[39m, in \u001b[36mBaseEstimator._validate_params\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 428\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_validate_params\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[32m 429\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Validate types and values of constructor parameters\u001b[39;00m\n\u001b[32m 430\u001b[39m \n\u001b[32m 431\u001b[39m \u001b[33;03m The expected type and values must be defined in the `_parameter_constraints`\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 434\u001b[39m \u001b[33;03m accepted constraints.\u001b[39;00m\n\u001b[32m 435\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m436\u001b[39m \u001b[43mvalidate_parameter_constraints\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 437\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_parameter_constraints\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 438\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mget_params\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdeep\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 439\u001b[39m \u001b[43m \u001b[49m\u001b[43mcaller_name\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[34;43m__class__\u001b[39;49m\u001b[43m.\u001b[49m\u001b[34;43m__name__\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 440\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/sklearn/utils/_param_validation.py:98\u001b[39m, in \u001b[36mvalidate_parameter_constraints\u001b[39m\u001b[34m(parameter_constraints, params, caller_name)\u001b[39m\n\u001b[32m 92\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 93\u001b[39m constraints_str = (\n\u001b[32m 94\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m.join([\u001b[38;5;28mstr\u001b[39m(c)\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39mconstraints[:-\u001b[32m1\u001b[39m]])\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m or\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 95\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstraints[-\u001b[32m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 96\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m98\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m InvalidParameterError(\n\u001b[32m 99\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mThe \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparam_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[33m parameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcaller_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m must be\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 100\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstraints_str\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. Got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparam_val\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[33m instead.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 101\u001b[39m )\n",
+ "\u001b[31mInvalidParameterError\u001b[39m: The 'scoring' parameter of GridSearchCV must be a str among {'average_precision', 'adjusted_rand_score', 'roc_auc', 'top_k_accuracy', 'recall', 'neg_negative_likelihood_ratio', 'neg_mean_squared_error', 'positive_likelihood_ratio', 'precision', 'neg_mean_squared_log_error', 'precision_micro', 'neg_mean_poisson_deviance', 'completeness_score', 'accuracy', 'adjusted_mutual_info_score', 'precision_macro', 'neg_max_error', 'mutual_info_score', 'jaccard_samples', 'recall_samples', 'neg_mean_absolute_percentage_error', 'fowlkes_mallows_score', 'neg_brier_score', 'f1_samples', 'jaccard_weighted', 'recall_micro', 'd2_absolute_error_score', 'homogeneity_score', 'matthews_corrcoef', 'f1_micro', 'f1_macro', 'neg_root_mean_squared_error', 'precision_samples', 'neg_root_mean_squared_log_error', 'neg_mean_gamma_deviance', 'jaccard', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'roc_auc_ovr', 'jaccard_micro', 'jaccard_macro', 'roc_auc_ovo', 'neg_log_loss', 'normalized_mutual_info_score', 'balanced_accuracy', 'f1_weighted', 'r2', 'recall_macro', 'rand_score', 'v_measure_score', 'explained_variance', 'roc_auc_ovo_weighted', 'precision_weighted', 'roc_auc_ovr_weighted', 'f1', 'recall_weighted'}, a callable, an instance of 'list', an instance of 'tuple', an instance of 'dict' or None. Got '' instead."
+ ]
+ }
+ ],
+ "source": [
+ "# Initialisation du modèle GradientBoostingClassifier\n",
+ "rf = GradientBoostingClassifier(random_state=42)\n",
+ "\n",
+ "# Création de l'objet GridSearchCV pour la recherche sur grille avec validation croisée\n",
+ "grid_search = GridSearchCV(\n",
+ " estimator = rf,\n",
+ " param_grid = param_grid,\n",
+ " cv = StratifiedKFold(\n",
+ " n_splits = num_folds, shuffle = True, random_state = 42\n",
+ " ), # Validation croisée avec 5 folds\n",
+ " scoring = \"\", # Métrique d'évaluation (moins c'est mieux)\n",
+ " n_jobs = -1, # Utiliser tous les cœurs du processeur\n",
+ ")\n",
+ "\n",
+ "# Exécution de la recherche sur grille\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "\n",
+ "# Afficher les meilleurs hyperparamètres\n",
+ "best_params = grid_search.best_params_\n",
+ "print(\"Meilleurs hyperparamètres : \", best_params)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "0a35a4bf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialiser le modèle final avec les meilleurs hyperparamètres\n",
+ "best_rf = GradientBoostingClassifier(random_state=42, **best_params)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "a7f59ea7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE pour le fold 1: -0.0\n",
+ "RMSE pour le fold 2: -0.0\n",
+ "RMSE pour le fold 3: -0.0\n",
+ "RMSE pour le fold 4: -0.0\n",
+ "RMSE pour le fold 5: -0.0\n",
+ "\n",
+ "\n",
+ "MSE pour le fold 1: -0.0\n",
+ "MSE pour le fold 2: -0.0\n",
+ "MSE pour le fold 3: -0.0\n",
+ "MSE pour le fold 4: -0.0\n",
+ "MSE pour le fold 5: -0.0\n",
+ "\n",
+ "\n",
+ "MAE pour le fold 1: -0.0\n",
+ "MAE pour le fold 2: -0.0\n",
+ "MAE pour le fold 3: -0.0\n",
+ "MAE pour le fold 4: -0.0\n",
+ "MAE pour le fold 5: -0.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Cross validation\n",
+ "# RMSE de chaque fold\n",
+ "rmse_scores = cross_val_score(\n",
+ " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_root_mean_squared_error\"\n",
+ ")\n",
+ "\n",
+ "# Afficher les scores pour chaque fold\n",
+ "for i, score in enumerate(rmse_scores):\n",
+ " print(f\"RMSE pour le fold {i + 1}: {score}\")\n",
+ "\n",
+ "# MSE de chaque fold\n",
+ "mse_scores = cross_val_score(\n",
+ " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_mean_squared_error\"\n",
+ ")\n",
+ "\n",
+ "# Afficher les scores pour chaque fold\n",
+ "print(\"\\n\")\n",
+ "for i, score in enumerate(mse_scores):\n",
+ " print(f\"MSE pour le fold {i + 1}: {score}\")\n",
+ "\n",
+ "# MAE de chaque fold\n",
+ "mae_scores = cross_val_score(\n",
+ " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_mean_absolute_error\"\n",
+ ")\n",
+ "\n",
+ "# Afficher les scores pour chaque fold\n",
+ "print(\"\\n\")\n",
+ "for i, score in enumerate(mae_scores):\n",
+ " print(f\"MAE pour le fold {i + 1}: {score}\")"
+ ]
},
{
"cell_type": "markdown",