From 3f2cd3a308418857e4ce19f158093f7944f4d515 Mon Sep 17 00:00:00 2001 From: Arthur DANJOU Date: Mon, 20 Oct 2025 18:44:22 +0200 Subject: [PATCH] Implement feature X to enhance user experience and fix bug Y in module Z --- .../TP_4/2025_M2_ISF_TP_4.ipynb | 4091 ++++++++++++++++- 1 file changed, 4044 insertions(+), 47 deletions(-) diff --git a/M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb b/M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb index 167171a..6897d8e 100644 --- a/M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb +++ b/M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb @@ -45,17 +45,18 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 15, "id": "97d58527", "metadata": {}, "outputs": [], "source": [ "# Données\n", - "import pandas as pd\n", "import numpy as np\n", + "import pandas as pd\n", "\n", - "# Graphiques \n", + "# Graphiques\n", "import seaborn as sns\n", + "\n", "sns.set()\n", "import plotly.express as px\n", "import plotly.graph_objects as gp\n", @@ -87,13 +88,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "c67db932", "metadata": {}, "outputs": [], "source": [ "def cramers_V(var1,var2) :\n", - " crosstab =np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building\n", + " crosstab = np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building\n", " stat = chi2_contingency(crosstab)[0] # Keeping of the test statistic of the Chi2 test\n", " obs = np.sum(crosstab) # Number of observations\n", " mini = min(crosstab.shape)-1 # Take the minimum value between the columns and the rows of the cross table\n", @@ -110,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 17, "id": "c9597b48", "metadata": {}, "outputs": [], @@ -129,30 +130,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 18, "id": "8051b5f4", "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: './1_inputs/base_retraitee.csv'", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m path =input_path + \u001b[33m'\u001b[39m\u001b[33m/base_retraitee.csv\u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m data_retraitee = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43msep\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m,\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mdecimal\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m.\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1026\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m 1013\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m 1014\u001b[39m dialect,\n\u001b[32m 1015\u001b[39m delimiter,\n\u001b[32m (...)\u001b[39m\u001b[32m 1022\u001b[39m dtype_backend=dtype_backend,\n\u001b[32m 1023\u001b[39m )\n\u001b[32m 1024\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:620\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m 617\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m 619\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m620\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 622\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m 623\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1620\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m 1617\u001b[39m \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 1619\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1620\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1880\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m 1878\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m 1879\u001b[39m mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1880\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1881\u001b[39m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1882\u001b[39m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1883\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1884\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1885\u001b[39m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1886\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1887\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1888\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1889\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1890\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1891\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/common.py:873\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m 868\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m 869\u001b[39m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m 870\u001b[39m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m 871\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m 872\u001b[39m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m 874\u001b[39m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 875\u001b[39m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 876\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 877\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 878\u001b[39m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 879\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 880\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 881\u001b[39m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m 882\u001b[39m handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n", - "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: './1_inputs/base_retraitee.csv'" - ] - } - ], + "outputs": [], "source": [ - "path =input_path + '/base_retraitee.csv'\n", - "data_retraitee = pd.read_csv(path,sep=\",\",decimal=\".\")" + "path = input_path + '/base_retraitee.csv'\n", + "data_retraitee = pd.read_csv(path, sep=\",\", decimal=\".\")" ] }, { @@ -175,12 +159,1443 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "b9b98d36", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "ANNEE_CTR", + "rawType": "int64", + "type": "integer" + }, + { + "name": "CONTRAT_ANCIENNETE", + "rawType": "object", + "type": "string" + }, + { + "name": "FREQUENCE_PAIEMENT_COTISATION", + "rawType": "object", + "type": "string" + }, + { + "name": "GROUPE_KM", + "rawType": "object", + "type": "string" + }, + { + "name": "ZONE_RISQUE", + "rawType": "object", + "type": "string" + }, + { + "name": "AGE_ASSURE_PRINCIPAL", + "rawType": "int64", + "type": "integer" + }, + { + "name": "GENRE", + "rawType": "object", + "type": "string" + }, + { + "name": "DEUXIEME_CONDUCTEUR", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "ANCIENNETE_PERMIS", + "rawType": "int64", + "type": "integer" + }, + { + "name": "ANNEE_CONSTRUCTION", + "rawType": "float64", + "type": "float" + }, + { + "name": "ENERGIE", + "rawType": "object", + "type": "string" + }, + { + "name": "EQUIPEMENT_SECURITE", + "rawType": "object", + "type": "string" + }, + { + "name": "VALEUR_DU_BIEN", + "rawType": "object", + "type": "string" + }, + { + "name": "NB", + "rawType": "int64", + "type": "integer" + }, + { + "name": "CHARGE", + "rawType": "float64", + "type": "float" + }, + { + "name": "EXPO", + "rawType": "float64", + "type": "float" + }, + { + "name": "sinistré", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "3a5c9b57-04ea-45e3-9475-dee04d53694d", + "rows": [ + [ + "0", + "2019", + "(-1,0]", + "ANNUEL", + "[20000;40000[", + "B", + "54", + "M", + "False", + "47", + "2016.0", + "ESSENCE", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "245.3278688524592", + "0" + ], + [ + "1", + "2019", + "(-1,0]", + "ANNUEL", + "[20000;40000[", + "B", + "88", + "F", + "True", + "55", + "2018.0", + "DIESEL", + "VRAI", + "[20000;25000[", + "0", + "0.0", + "230.36885245901655", + "0" + ], + [ + "2", + "2021", + "(1,2]", + "ANNUEL", + "[0;20000[", + "D", + "35", + "F", + "True", + "16", + "2017.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "300.0", + "0" + ], + [ + "3", + "2021", + "(2,5]", + "ANNUEL", + "[0;20000[", + "C", + "46", + "M", + "False", + "44", + "2018.0", + "ESSENCE", + "VRAI", + "[35000;99999[", + "0", + "0.0", + "303.99999999999994", + "0" + ], + [ + "4", + "2018", + "(2,5]", + "MENSUEL", + "[20000;40000[", + "A", + "46", + "F", + "False", + "31", + "2009.0", + "DIESEL", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "5", + "2019", + "(2,5]", + "MENSUEL", + "[0;20000[", + "A", + "67", + "M", + "False", + "22", + "2015.0", + "ESSENCE", + "VRAI", + "[10000;15000[", + "0", + "0.0", + "364.5874316939892", + "0" + ], + [ + "6", + "2016", + "(0,1]", + "MENSUEL", + "[0;20000[", + "C", + "37", + "F", + "False", + "15", + "2016.0", + "ESSENCE", + "VRAI", + "[10000;15000[", + "0", + "868.11", + "365.0", + "0" + ], + [ + "7", + "2017", + "(1,2]", + "MENSUEL", + "[0;20000[", + "A", + "46", + "F", + "False", + "37", + "2015.0", + "ESSENCE", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "300.0", + "0" + ], + [ + "8", + "2016", + "(0,1]", + "MENSUEL", + "[0;20000[", + "A", + "44", + "F", + "False", + "63", + "2014.0", + "ESSENCE", + "FAUX", + "[0;10000[", + "0", + "0.0", + "56.84426229508204", + "0" + ], + [ + "9", + "2019", + "(2,5]", + "MENSUEL", + "[0;20000[", + "B", + "59", + "F", + "False", + "68", + "2014.0", + "ESSENCE", + "FAUX", + "[10000;15000[", + "0", + "2794.96", + "364.00000000000006", + "0" + ], + [ + "10", + "2019", + "(0,1]", + "MENSUEL", + "[0;20000[", + "C", + "40", + "M", + "False", + "37", + "2017.0", + "ESSENCE", + "VRAI", + "[15000;20000[", + "1", + "1072.98", + "364.8415300546447", + "1" + ], + [ + "11", + "2018", + "(-1,0]", + "MENSUEL", + "[0;20000[", + "C", + "30", + "M", + "False", + "12", + "2017.0", + "DIESEL", + "FAUX", + "[20000;25000[", + "0", + "0.0", + "272.00000000000006", + "0" + ], + [ + "12", + "2020", + "(0,1]", + "MENSUEL", + "[20000;40000[", + "D", + "30", + "M", + "True", + "15", + "2020.0", + "ESSENCE", + "FAUX", + "[20000;25000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "13", + "2021", + "(0,1]", + "MENSUEL", + "[20000;40000[", + "B", + "58", + "M", + "False", + "39", + "2017.0", + "DIESEL", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "303.99999999999994", + "0" + ], + [ + "14", + "2019", + "(-1,0]", + "MENSUEL", + "[20000;40000[", + "C", + "39", + "M", + "False", + "36", + "2014.0", + "DIESEL", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "203.44262295081973", + "0" + ], + [ + "15", + "2019", + "(0,1]", + "ANNUEL", + "[0;20000[", + "A", + "26", + "F", + "False", + "14", + "2016.0", + "DIESEL", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "364.2049180327869", + "0" + ], + [ + "16", + "2017", + "(-1,0]", + "ANNUEL", + "[0;20000[", + "D", + "26", + "M", + "False", + "17", + "2018.0", + "ESSENCE", + "FAUX", + "[35000;99999[", + "0", + "0.0", + "268.00000000000006", + "0" + ], + [ + "17", + "2016", + "(0,1]", + "TRIMESTRIEL", + "[0;20000[", + "A", + "57", + "F", + "False", + "61", + "2011.0", + "ESSENCE", + "VRAI", + "[10000;15000[", + "0", + "287.73", + "365.0", + "0" + ], + [ + "18", + "2018", + "(-1,0]", + "TRIMESTRIEL", + "[0;20000[", + "B", + "25", + "M", + "False", + "17", + "2017.0", + "DIESEL", + "VRAI", + "[35000;99999[", + "0", + "0.0", + "350.99999999999983", + "0" + ], + [ + "19", + "2018", + "(2,5]", + "ANNUEL", + "[20000;40000[", + "D", + "61", + "M", + "True", + "28", + "2014.0", + "DIESEL", + "FAUX", + "[20000;25000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "20", + "2020", + "(1,2]", + "MENSUEL", + "[20000;40000[", + "F", + "37", + "F", + "False", + "20", + "2018.0", + "DIESEL", + "FAUX", + "[25000;35000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "21", + "2020", + "(2,5]", + "TRIMESTRIEL", + "[0;20000[", + "D", + "25", + "M", + "True", + "18", + "2014.0", + "DIESEL", + "VRAI", + "[15000;20000[", + "0", + "0.0", + "102.71857923497252", + "0" + ], + [ + "22", + "2021", + "(2,5]", + "MENSUEL", + "[20000;40000[", + "C", + "30", + "F", + "True", + "14", + "2018.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "303.99999999999994", + "0" + ], + [ + "23", + "2017", + "(-1,0]", + "MENSUEL", + "[0;20000[", + "B", + "26", + "F", + "False", + "15", + "2016.0", + "DIESEL", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "158.99999999999986", + "0" + ], + [ + "24", + "2016", + "(0,1]", + "TRIMESTRIEL", + "[0;20000[", + "A", + "62", + "M", + "False", + "64", + "2013.0", + "DIESEL", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "25", + "2020", + "(-1,0]", + "MENSUEL", + "[20000;40000[", + "C", + "45", + "F", + "False", + "44", + "2020.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "330.42349726775944", + "0" + ], + [ + "26", + "2020", + "(0,1]", + "MENSUEL", + "[20000;40000[", + "E", + "60", + "M", + "False", + "66", + "2018.0", + "DIESEL", + "FAUX", + "[35000;99999[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "27", + "2020", + "(0,1]", + "TRIMESTRIEL", + "[0;20000[", + "C", + "42", + "F", + "False", + "18", + "2018.0", + "ESSENCE", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "28", + "2021", + "(2,5]", + "MENSUEL", + "[0;20000[", + "C", + "60", + "M", + "False", + "52", + "2016.0", + "DIESEL", + "VRAI", + "[15000;20000[", + "0", + "0.0", + "277.9999999999999", + "0" + ], + [ + "29", + "2021", + "(2,5]", + "MENSUEL", + "[20000;40000[", + "C", + "44", + "M", + "False", + "27", + "2017.0", + "ESSENCE", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "234.99999999999991", + "0" + ], + [ + "30", + "2021", + "(-1,0]", + "MENSUEL", + "[20000;40000[", + "D", + "44", + "F", + "False", + "40", + "2020.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "180.99999999999997", + "0" + ], + [ + "31", + "2017", + "(1,2]", + "ANNUEL", + "[20000;40000[", + "A", + "37", + "M", + "False", + "56", + "2013.0", + "DIESEL", + "VRAI", + "[35000;99999[", + "0", + "0.0", + "93.99999999999984", + "0" + ], + [ + "32", + "2017", + "(0,1]", + "ANNUEL", + "[20000;40000[", + "A", + "25", + "F", + "True", + "12", + "2016.0", + "DIESEL", + "FAUX", + "[20000;25000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "33", + "2021", + "(1,2]", + "ANNUEL", + "[0;20000[", + "B", + "62", + "M", + "False", + "50", + "2014.0", + "DIESEL", + "FAUX", + "[20000;25000[", + "0", + "0.0", + "238.99999999999991", + "0" + ], + [ + "34", + "2020", + "(-1,0]", + "MENSUEL", + "[20000;40000[", + "C", + "27", + "M", + "True", + "13", + "2018.0", + "AUTRE", + "FAUX", + "[35000;99999[", + "1", + "3750.0", + "306.9945355191256", + "1" + ], + [ + "35", + "2021", + "(1,2]", + "ANNUEL", + "[0;20000[", + "C", + "60", + "F", + "False", + "61", + "2020.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "303.99999999999994", + "0" + ], + [ + "36", + "2019", + "(-1,0]", + "MENSUEL", + "[20000;40000[", + "L", + "19", + "M", + "False", + "2", + "2017.0", + "ESSENCE", + "VRAI", + "[0;10000[", + "1", + "1838.49", + "344.80327868852464", + "1" + ], + [ + "37", + "2016", + "(-1,0]", + "ANNUEL", + "[0;20000[", + "C", + "56", + "F", + "False", + "65", + "2010.0", + "ESSENCE", + "FAUX", + "[0;10000[", + "0", + "0.0", + "280.0", + "0" + ], + [ + "38", + "2019", + "(0,1]", + "MENSUEL", + "[0;20000[", + "C", + "57", + "F", + "False", + "36", + "2021.0", + "ESSENCE", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "364.2677595628415", + "0" + ], + [ + "39", + "2017", + "(-1,0]", + "MENSUEL", + "[0;20000[", + "A", + "24", + "F", + "False", + "12", + "2017.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "2637.39", + "195.00000000000009", + "0" + ], + [ + "40", + "2018", + "(0,1]", + "ANNUEL", + "[20000;40000[", + "C", + "49", + "M", + "True", + "20", + "2017.0", + "DIESEL", + "FAUX", + "[20000;25000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "41", + "2018", + "(0,1]", + "ANNUEL", + "[0;20000[", + "B", + "51", + "M", + "True", + "42", + "2017.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "42", + "2020", + "(1,2]", + "MENSUEL", + "[20000;40000[", + "C", + "57", + "M", + "False", + "63", + "2018.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "43", + "2019", + "(1,2]", + "MENSUEL", + "[20000;40000[", + "C", + "40", + "M", + "False", + "69", + "2013.0", + "ESSENCE", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "364.2240437158468", + "0" + ], + [ + "44", + "2021", + "(1,2]", + "MENSUEL", + "[20000;40000[", + "B", + "60", + "M", + "False", + "28", + "2018.0", + "DIESEL", + "FAUX", + "[35000;99999[", + "0", + "0.0", + "303.99999999999994", + "0" + ], + [ + "45", + "2020", + "(2,5]", + "ANNUEL", + "[0;20000[", + "B", + "52", + "F", + "False", + "55", + "2017.0", + "DIESEL", + "VRAI", + "[35000;99999[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "46", + "2020", + "(2,5]", + "ANNUEL", + "[0;20000[", + "C", + "41", + "M", + "False", + "47", + "2018.0", + "ESSENCE", + "FAUX", + "[15000;20000[", + "0", + "0.0", + "365.0", + "0" + ], + [ + "47", + "2020", + "(0,1]", + "MENSUEL", + "[0;20000[", + "B", + "51", + "F", + "False", + "59", + "2016.0", + "ESSENCE", + "FAUX", + "[10000;15000[", + "0", + "0.0", + "118.67486338797818", + "0" + ], + [ + "48", + "2019", + "(-1,0]", + "MENSUEL", + "[20000;40000[", + "C", + "49", + "M", + "False", + "21", + "2020.0", + "ESSENCE", + "FAUX", + "[25000;35000[", + "0", + "0.0", + "267.26775956284155", + "0" + ], + [ + "49", + "2020", + "(2,5]", + "ANNUEL", + "[0;20000[", + "B", + "73", + "M", + "True", + "24", + "2018.0", + "DIESEL", + "FAUX", + "[20000;25000[", + "0", + "0.0", + "193.4699453551912", + "0" + ] + ], + "shape": { + "columns": 17, + "rows": 14236 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ANNEE_CTRCONTRAT_ANCIENNETEFREQUENCE_PAIEMENT_COTISATIONGROUPE_KMZONE_RISQUEAGE_ASSURE_PRINCIPALGENREDEUXIEME_CONDUCTEURANCIENNETE_PERMISANNEE_CONSTRUCTIONENERGIEEQUIPEMENT_SECURITEVALEUR_DU_BIENNBCHARGEEXPOsinistré
02019(-1,0]ANNUEL[20000;40000[B54MFalse472016.0ESSENCEFAUX[10000;15000[00.0245.3278690
12019(-1,0]ANNUEL[20000;40000[B88FTrue552018.0DIESELVRAI[20000;25000[00.0230.3688520
22021(1,2]ANNUEL[0;20000[D35FTrue162017.0ESSENCEFAUX[15000;20000[00.0300.0000000
32021(2,5]ANNUEL[0;20000[C46MFalse442018.0ESSENCEVRAI[35000;99999[00.0304.0000000
42018(2,5]MENSUEL[20000;40000[A46FFalse312009.0DIESELFAUX[10000;15000[00.0365.0000000
......................................................
142312021(2,5]MENSUEL[0;20000[D55MFalse492017.0ESSENCEFAUX[20000;25000[00.0181.0000000
142322019(2,5]MENSUEL[20000;40000[A33MFalse142017.0ESSENCEFAUX[10000;15000[00.0364.6693990
142332017(-1,0]ANNUEL[0;20000[A62MFalse582017.0ESSENCEVRAI[10000;15000[00.0182.0000000
142342018(-1,0]TRIMESTRIEL[20000;40000[D20MFalse72016.0DIESELFAUX[25000;35000[00.09.0000000
142352017(-1,0]ANNUEL[0;20000[C73FFalse412017.0ESSENCEFAUX[10000;15000[00.052.0000000
\n", + "

14236 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " ANNEE_CTR CONTRAT_ANCIENNETE FREQUENCE_PAIEMENT_COTISATION \\\n", + "0 2019 (-1,0] ANNUEL \n", + "1 2019 (-1,0] ANNUEL \n", + "2 2021 (1,2] ANNUEL \n", + "3 2021 (2,5] ANNUEL \n", + "4 2018 (2,5] MENSUEL \n", + "... ... ... ... \n", + "14231 2021 (2,5] MENSUEL \n", + "14232 2019 (2,5] MENSUEL \n", + "14233 2017 (-1,0] ANNUEL \n", + "14234 2018 (-1,0] TRIMESTRIEL \n", + "14235 2017 (-1,0] ANNUEL \n", + "\n", + " GROUPE_KM ZONE_RISQUE AGE_ASSURE_PRINCIPAL GENRE \\\n", + "0 [20000;40000[ B 54 M \n", + "1 [20000;40000[ B 88 F \n", + "2 [0;20000[ D 35 F \n", + "3 [0;20000[ C 46 M \n", + "4 [20000;40000[ A 46 F \n", + "... ... ... ... ... \n", + "14231 [0;20000[ D 55 M \n", + "14232 [20000;40000[ A 33 M \n", + "14233 [0;20000[ A 62 M \n", + "14234 [20000;40000[ D 20 M \n", + "14235 [0;20000[ C 73 F \n", + "\n", + " DEUXIEME_CONDUCTEUR ANCIENNETE_PERMIS ANNEE_CONSTRUCTION ENERGIE \\\n", + "0 False 47 2016.0 ESSENCE \n", + "1 True 55 2018.0 DIESEL \n", + "2 True 16 2017.0 ESSENCE \n", + "3 False 44 2018.0 ESSENCE \n", + "4 False 31 2009.0 DIESEL \n", + "... ... ... ... ... \n", + "14231 False 49 2017.0 ESSENCE \n", + "14232 False 14 2017.0 ESSENCE \n", + "14233 False 58 2017.0 ESSENCE \n", + "14234 False 7 2016.0 DIESEL \n", + "14235 False 41 2017.0 ESSENCE \n", + "\n", + " EQUIPEMENT_SECURITE VALEUR_DU_BIEN NB CHARGE EXPO sinistré \n", + "0 FAUX [10000;15000[ 0 0.0 245.327869 0 \n", + "1 VRAI [20000;25000[ 0 0.0 230.368852 0 \n", + "2 FAUX [15000;20000[ 0 0.0 300.000000 0 \n", + "3 VRAI [35000;99999[ 0 0.0 304.000000 0 \n", + "4 FAUX [10000;15000[ 0 0.0 365.000000 0 \n", + "... ... ... .. ... ... ... \n", + "14231 FAUX [20000;25000[ 0 0.0 181.000000 0 \n", + "14232 FAUX [10000;15000[ 0 0.0 364.669399 0 \n", + "14233 VRAI [10000;15000[ 0 0.0 182.000000 0 \n", + "14234 FAUX [25000;35000[ 0 0.0 9.000000 0 \n", + "14235 FAUX [10000;15000[ 0 0.0 52.000000 0 \n", + "\n", + "[14236 rows x 17 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#Calculez la variable \"sinistré\" qui est vraie si la personne a eu un ou plusieurs sinistres ou faux le cas échéant \n" + "# Calculez la variable \"sinistré\" qui est 1 si la personne a eu un ou plusieurs sinistres, 0 sinon\n", + "data_retraitee[\"sinistré\"] = data_retraitee[\"NB\"] > 0\n", + "data_retraitee[\"sinistré\"] = data_retraitee[\"sinistré\"].astype(int)\n", + "data_retraitee" ] }, { @@ -193,11 +1608,855 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "47cf4b69", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "bingroup": "x", + "hovertemplate": "sinistré=%{x}
count=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "orientation": "v", + "showlegend": false, + "type": "histogram", + "x": { + "bdata": "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    ", + "dtype": "i1" + }, + "xaxis": "x", + "yaxis": "y" + } + ], + "layout": { + "barmode": "relative", + "legend": { + "tracegroupgap": 0 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Distribution de la variable 'sinistré'" + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "sinistré" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "count" + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Observation de la distribution\n", + "fig = px.histogram(data_retraitee, x=\"sinistré\", title=\"Distribution de la variable 'sinistré'\")\n", + "fig.show()" + ] }, { "cell_type": "markdown", @@ -209,11 +2468,53 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "a0bc6278", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(14236, 16)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_set = data_retraitee.drop(\"sinistré\", axis=1)\n", + "data_set.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "73d31ea4", + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Séparation en variables qualitatives ou catégorielles\n", + "variables_na = []\n", + "variables_numeriques = []\n", + "variables_01 = []\n", + "variables_categorielles = []\n", + "for colu in data_set.columns:\n", + " if True in data_set[colu].isna().unique():\n", + " variables_na.append(data_set[colu])\n", + " else:\n", + " if str(data_set[colu].dtypes) in [\"int32\", \"int64\", \"float64\"]:\n", + " if len(data_set[colu].unique()) == 2:\n", + " variables_categorielles.append(data_set[colu])\n", + " else:\n", + " variables_numeriques.append(data_set[colu])\n", + " else:\n", + " if len(data_set[colu].unique()) == 2:\n", + " variables_categorielles.append(data_set[colu])\n", + " else:\n", + " variables_categorielles.append(data_set[colu])\n" + ] }, { "cell_type": "markdown", @@ -229,7 +2530,423 @@ "id": "30df8bd5", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "vars_categorielles = pd.DataFrame(variables_categorielles).transpose()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "be7a7d00", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "object", + "type": "string" + }, + { + "name": "CONTRAT_ANCIENNETE", + "rawType": "float64", + "type": "float" + }, + { + "name": "FREQUENCE_PAIEMENT_COTISATION", + "rawType": "float64", + "type": "float" + }, + { + "name": "GROUPE_KM", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE", + "rawType": "float64", + "type": "float" + }, + { + "name": "GENRE", + "rawType": "float64", + "type": "float" + }, + { + "name": "DEUXIEME_CONDUCTEUR", + "rawType": "float64", + "type": "float" + }, + { + "name": "ENERGIE", + "rawType": "float64", + "type": "float" + }, + { + "name": "EQUIPEMENT_SECURITE", + "rawType": "float64", + "type": "float" + }, + { + "name": "VALEUR_DU_BIEN", + "rawType": "float64", + "type": "float" + } + ], + "ref": "cdaf33f1-78b7-4df1-9a7c-93b778e94756", + "rows": [ + [ + "CONTRAT_ANCIENNETE", + "1.0", + "0.0", + "0.01", + "0.02", + "0.0", + "0.0", + "0.0", + "0.01", + "0.0" + ], + [ + "FREQUENCE_PAIEMENT_COTISATION", + "0.0", + "1.0", + "0.0", + "0.0", + "0.01", + "0.0", + "0.0", + "0.01", + "0.02" + ], + [ + "GROUPE_KM", + "0.01", + "0.0", + "1.0", + "0.01", + "0.01", + "0.0", + "0.04", + "0.01", + "0.02" + ], + [ + "ZONE_RISQUE", + "0.02", + "0.0", + "0.01", + "1.0", + "0.0", + "0.0", + "0.01", + "0.03", + "0.0" + ], + [ + "GENRE", + "0.0", + "0.01", + "0.01", + "0.0", + "1.0", + "0.0", + "0.02", + "0.01", + "0.07" + ], + [ + "DEUXIEME_CONDUCTEUR", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0" + ], + [ + "ENERGIE", + "0.0", + "0.0", + "0.04", + "0.01", + "0.02", + "0.0", + "1.0", + "0.02", + "0.08" + ], + [ + "EQUIPEMENT_SECURITE", + "0.01", + "0.01", + "0.01", + "0.03", + "0.01", + "0.0", + "0.02", + "1.0", + "0.07" + ], + [ + "VALEUR_DU_BIEN", + "0.0", + "0.02", + "0.02", + "0.0", + "0.07", + "0.0", + "0.08", + "0.07", + "1.0" + ] + ], + "shape": { + "columns": 9, + "rows": 9 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CONTRAT_ANCIENNETEFREQUENCE_PAIEMENT_COTISATIONGROUPE_KMZONE_RISQUEGENREDEUXIEME_CONDUCTEURENERGIEEQUIPEMENT_SECURITEVALEUR_DU_BIEN
CONTRAT_ANCIENNETE1.000.000.010.020.000.00.000.010.00
FREQUENCE_PAIEMENT_COTISATION0.001.000.000.000.010.00.000.010.02
GROUPE_KM0.010.001.000.010.010.00.040.010.02
ZONE_RISQUE0.020.000.011.000.000.00.010.030.00
GENRE0.000.010.010.001.000.00.020.010.07
DEUXIEME_CONDUCTEUR0.000.000.000.000.001.00.000.000.00
ENERGIE0.000.000.040.010.020.01.000.020.08
EQUIPEMENT_SECURITE0.010.010.010.030.010.00.021.000.07
VALEUR_DU_BIEN0.000.020.020.000.070.00.080.071.00
\n", + "
" + ], + "text/plain": [ + " CONTRAT_ANCIENNETE \\\n", + "CONTRAT_ANCIENNETE 1.00 \n", + "FREQUENCE_PAIEMENT_COTISATION 0.00 \n", + "GROUPE_KM 0.01 \n", + "ZONE_RISQUE 0.02 \n", + "GENRE 0.00 \n", + "DEUXIEME_CONDUCTEUR 0.00 \n", + "ENERGIE 0.00 \n", + "EQUIPEMENT_SECURITE 0.01 \n", + "VALEUR_DU_BIEN 0.00 \n", + "\n", + " FREQUENCE_PAIEMENT_COTISATION GROUPE_KM \\\n", + "CONTRAT_ANCIENNETE 0.00 0.01 \n", + "FREQUENCE_PAIEMENT_COTISATION 1.00 0.00 \n", + "GROUPE_KM 0.00 1.00 \n", + "ZONE_RISQUE 0.00 0.01 \n", + "GENRE 0.01 0.01 \n", + "DEUXIEME_CONDUCTEUR 0.00 0.00 \n", + "ENERGIE 0.00 0.04 \n", + "EQUIPEMENT_SECURITE 0.01 0.01 \n", + "VALEUR_DU_BIEN 0.02 0.02 \n", + "\n", + " ZONE_RISQUE GENRE DEUXIEME_CONDUCTEUR \\\n", + "CONTRAT_ANCIENNETE 0.02 0.00 0.0 \n", + "FREQUENCE_PAIEMENT_COTISATION 0.00 0.01 0.0 \n", + "GROUPE_KM 0.01 0.01 0.0 \n", + "ZONE_RISQUE 1.00 0.00 0.0 \n", + "GENRE 0.00 1.00 0.0 \n", + "DEUXIEME_CONDUCTEUR 0.00 0.00 1.0 \n", + "ENERGIE 0.01 0.02 0.0 \n", + "EQUIPEMENT_SECURITE 0.03 0.01 0.0 \n", + "VALEUR_DU_BIEN 0.00 0.07 0.0 \n", + "\n", + " ENERGIE EQUIPEMENT_SECURITE VALEUR_DU_BIEN \n", + "CONTRAT_ANCIENNETE 0.00 0.01 0.00 \n", + "FREQUENCE_PAIEMENT_COTISATION 0.00 0.01 0.02 \n", + "GROUPE_KM 0.04 0.01 0.02 \n", + "ZONE_RISQUE 0.01 0.03 0.00 \n", + "GENRE 0.02 0.01 0.07 \n", + "DEUXIEME_CONDUCTEUR 0.00 0.00 0.00 \n", + "ENERGIE 1.00 0.02 0.08 \n", + "EQUIPEMENT_SECURITE 0.02 1.00 0.07 \n", + "VALEUR_DU_BIEN 0.08 0.07 1.00 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test du V de Cramer\n", + "rows = []\n", + "\n", + "for var1 in vars_categorielles:\n", + " col = []\n", + " for var2 in vars_categorielles:\n", + " cramers = cramers_V(\n", + " vars_categorielles[var1], vars_categorielles[var2]\n", + " ) # V de Cramer\n", + " col.append(round(cramers, 2)) # arrondi du résultat\n", + " rows.append(col)\n", + "\n", + "cramers_results = np.array(rows)\n", + "v_cramer_resultats = pd.DataFrame(\n", + " cramers_results,\n", + " columns=vars_categorielles.columns,\n", + " index=vars_categorielles.columns,\n", + ")\n", + "\n", + "v_cramer_resultats\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "b3297dca", + "metadata": {}, + "outputs": [], + "source": [ + "# On repère les variables trop corrélées\n", + "for i in range(v_cramer_resultats.shape[0]):\n", + " for j in range(i + 1, v_cramer_resultats.shape[0]):\n", + " if v_cramer_resultats.iloc[i, j] > 0.7:\n", + " print(\n", + " v_cramer_resultats.index.to_numpy()[i]\n", + " + \" et \"\n", + " + v_cramer_resultats.columns[j]\n", + " + \" sont trop dépendantes, V-CRAMER = \"\n", + " + str(v_cramer_resultats.iloc[i, j])\n", + " )\n" + ] }, { "cell_type": "markdown", @@ -241,11 +2958,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "d1fa12fc", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "vars_numeriques = pd.DataFrame(variables_numeriques).transpose()" + ] }, { "cell_type": "markdown", @@ -255,6 +2974,290 @@ "**Question :** quels sont vos commentaires ?" ] }, + { + "cell_type": "code", + "execution_count": 33, + "id": "c70946b4", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "object", + "type": "string" + }, + { + "name": "ANNEE_CTR", + "rawType": "float64", + "type": "float" + }, + { + "name": "AGE_ASSURE_PRINCIPAL", + "rawType": "float64", + "type": "float" + }, + { + "name": "ANCIENNETE_PERMIS", + "rawType": "float64", + "type": "float" + }, + { + "name": "ANNEE_CONSTRUCTION", + "rawType": "float64", + "type": "float" + }, + { + "name": "NB", + "rawType": "float64", + "type": "float" + }, + { + "name": "CHARGE", + "rawType": "float64", + "type": "float" + }, + { + "name": "EXPO", + "rawType": "float64", + "type": "float" + } + ], + "ref": "5ae1d96a-bfa4-47eb-bc85-b1de1b32bf1e", + "rows": [ + [ + "ANNEE_CTR", + "1.0", + "0.048023234802924315", + "0.043983174120495815", + "0.3615499864845018", + "-0.05775190894636334", + "-0.028901069139582642", + "-0.04770515515535773" + ], + [ + "AGE_ASSURE_PRINCIPAL", + "0.048023234802924315", + "1.0", + "0.4987430846753776", + "-0.0591835157827114", + "-0.012425345899111317", + "-0.020907992524227155", + "0.06096340138959582" + ], + [ + "ANCIENNETE_PERMIS", + "0.043983174120495815", + "0.4987430846753776", + "1.0", + "-0.0298138263902136", + "-0.008703999957333864", + "-0.011347002839350888", + "0.0324606537737922" + ], + [ + "ANNEE_CONSTRUCTION", + "0.3615499864845018", + "-0.0591835157827114", + "-0.0298138263902136", + "1.0", + "-0.01437673371578632", + "-0.0012301736578250726", + "-0.07395284013392618" + ], + [ + "NB", + "-0.05775190894636334", + "-0.012425345899111317", + "-0.008703999957333864", + "-0.01437673371578632", + "1.0", + "0.5071071150738479", + "0.0507022890091039" + ], + [ + "CHARGE", + "-0.028901069139582642", + "-0.020907992524227155", + "-0.011347002839350888", + "-0.0012301736578250726", + "0.5071071150738479", + "1.0", + "-0.021418687122216843" + ], + [ + "EXPO", + "-0.04770515515535773", + "0.06096340138959582", + "0.0324606537737922", + "-0.07395284013392618", + "0.0507022890091039", + "-0.021418687122216843", + "1.0" + ] + ], + "shape": { + "columns": 7, + "rows": 7 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ANNEE_CTRAGE_ASSURE_PRINCIPALANCIENNETE_PERMISANNEE_CONSTRUCTIONNBCHARGEEXPO
ANNEE_CTR1.0000000.0480230.0439830.361550-0.057752-0.028901-0.047705
AGE_ASSURE_PRINCIPAL0.0480231.0000000.498743-0.059184-0.012425-0.0209080.060963
ANCIENNETE_PERMIS0.0439830.4987431.000000-0.029814-0.008704-0.0113470.032461
ANNEE_CONSTRUCTION0.361550-0.059184-0.0298141.000000-0.014377-0.001230-0.073953
NB-0.057752-0.012425-0.008704-0.0143771.0000000.5071070.050702
CHARGE-0.028901-0.020908-0.011347-0.0012300.5071071.000000-0.021419
EXPO-0.0477050.0609630.032461-0.0739530.050702-0.0214191.000000
\n", + "
" + ], + "text/plain": [ + " ANNEE_CTR AGE_ASSURE_PRINCIPAL ANCIENNETE_PERMIS \\\n", + "ANNEE_CTR 1.000000 0.048023 0.043983 \n", + "AGE_ASSURE_PRINCIPAL 0.048023 1.000000 0.498743 \n", + "ANCIENNETE_PERMIS 0.043983 0.498743 1.000000 \n", + "ANNEE_CONSTRUCTION 0.361550 -0.059184 -0.029814 \n", + "NB -0.057752 -0.012425 -0.008704 \n", + "CHARGE -0.028901 -0.020908 -0.011347 \n", + "EXPO -0.047705 0.060963 0.032461 \n", + "\n", + " ANNEE_CONSTRUCTION NB CHARGE EXPO \n", + "ANNEE_CTR 0.361550 -0.057752 -0.028901 -0.047705 \n", + "AGE_ASSURE_PRINCIPAL -0.059184 -0.012425 -0.020908 0.060963 \n", + "ANCIENNETE_PERMIS -0.029814 -0.008704 -0.011347 0.032461 \n", + "ANNEE_CONSTRUCTION 1.000000 -0.014377 -0.001230 -0.073953 \n", + "NB -0.014377 1.000000 0.507107 0.050702 \n", + "CHARGE -0.001230 0.507107 1.000000 -0.021419 \n", + "EXPO -0.073953 0.050702 -0.021419 1.000000 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Corrélation de Pearson\n", + "correlations_num = vars_numeriques.corr(method=\"pearson\")\n", + "correlations_num" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "4c29f1f0", + "metadata": {}, + "outputs": [], + "source": [ + "# On repère les variables trop corrélées\n", + "nb_variables = correlations_num.shape[0]\n", + "for i in range(nb_variables):\n", + " for j in range(i + 1, nb_variables):\n", + " if abs(correlations_num.iloc[i, j]) > 0.7:\n", + " print(\n", + " correlations_num.index.to_numpy()[i]\n", + " + \" et \"\n", + " + correlations_num.columns[j]\n", + " + \" sont trop dépendantes, corr = \"\n", + " + str(correlations_num.iloc[i, j])\n", + " )" + ] + }, { "cell_type": "markdown", "id": "212209ec", @@ -284,11 +3287,647 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "b8530717", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "CONTRAT_ANCIENNETE_(0,1]", + "rawType": "float64", + "type": "float" + }, + { + "name": "CONTRAT_ANCIENNETE_(1,2]", + "rawType": "float64", + "type": "float" + }, + { + "name": "CONTRAT_ANCIENNETE_(2,5]", + "rawType": "float64", + "type": "float" + }, + { + "name": "CONTRAT_ANCIENNETE_(5,10]", + "rawType": "float64", + "type": "float" + }, + { + "name": "FREQUENCE_PAIEMENT_COTISATION_MENSUEL", + "rawType": "float64", + "type": "float" + }, + { + "name": "FREQUENCE_PAIEMENT_COTISATION_TRIMESTRIEL", + "rawType": "float64", + "type": "float" + }, + { + "name": "GROUPE_KM_[20000;40000[", + "rawType": "float64", + "type": "float" + }, + { + "name": "GROUPE_KM_[40000;60000[", + "rawType": "float64", + "type": "float" + }, + { + "name": "GROUPE_KM_[60000;99999[", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_B", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_C", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_D", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_E", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_F", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_G", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_H", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_I", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_J", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_K", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_L", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_M", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_R", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_S", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_T", + "rawType": "float64", + "type": "float" + }, + { + "name": "ZONE_RISQUE_X", + "rawType": "float64", + "type": "float" + }, + { + "name": "GENRE_M", + "rawType": "float64", + "type": "float" + }, + { + "name": "DEUXIEME_CONDUCTEUR_True", + "rawType": "float64", + "type": "float" + }, + { + "name": "ENERGIE_DIESEL", + "rawType": "float64", + "type": "float" + }, + { + "name": "ENERGIE_ESSENCE", + "rawType": "float64", + "type": "float" + }, + { + "name": "EQUIPEMENT_SECURITE_VRAI", + "rawType": "float64", + "type": "float" + }, + { + "name": "VALEUR_DU_BIEN_[10000;15000[", + "rawType": "float64", + "type": "float" + }, + { + "name": "VALEUR_DU_BIEN_[15000;20000[", + "rawType": "float64", + "type": "float" + }, + { + "name": "VALEUR_DU_BIEN_[20000;25000[", + "rawType": "float64", + "type": "float" + }, + { + "name": "VALEUR_DU_BIEN_[25000;35000[", + "rawType": "float64", + "type": "float" + }, + { + "name": "VALEUR_DU_BIEN_[35000;99999[", + "rawType": "float64", + "type": "float" + } + ], + "ref": "a0294dee-6844-4af1-9ee3-1bdc53a57dfa", + "rows": [ + [ + "0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "1.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0" + ], + [ + "1", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "1.0", + "0.0", + "1.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0" + ], + [ + "2", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "1.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0" + ], + [ + "3", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "1.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0" + ], + [ + "4", + "0.0", + "0.0", + "1.0", + "0.0", + "1.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "1.0", + "0.0", + "0.0", + "0.0", + "0.0" + ] + ], + "shape": { + "columns": 35, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CONTRAT_ANCIENNETE_(0,1]CONTRAT_ANCIENNETE_(1,2]CONTRAT_ANCIENNETE_(2,5]CONTRAT_ANCIENNETE_(5,10]FREQUENCE_PAIEMENT_COTISATION_MENSUELFREQUENCE_PAIEMENT_COTISATION_TRIMESTRIELGROUPE_KM_[20000;40000[GROUPE_KM_[40000;60000[GROUPE_KM_[60000;99999[ZONE_RISQUE_B...GENRE_MDEUXIEME_CONDUCTEUR_TrueENERGIE_DIESELENERGIE_ESSENCEEQUIPEMENT_SECURITE_VRAIVALEUR_DU_BIEN_[10000;15000[VALEUR_DU_BIEN_[15000;20000[VALEUR_DU_BIEN_[20000;25000[VALEUR_DU_BIEN_[25000;35000[VALEUR_DU_BIEN_[35000;99999[
00.00.00.00.00.00.01.00.00.01.0...1.00.00.01.00.01.00.00.00.00.0
10.00.00.00.00.00.01.00.00.01.0...0.01.01.00.01.00.00.01.00.00.0
20.01.00.00.00.00.00.00.00.00.0...0.01.00.01.00.00.01.00.00.00.0
30.00.01.00.00.00.00.00.00.00.0...1.00.00.01.01.00.00.00.00.01.0
40.00.01.00.01.00.01.00.00.00.0...0.00.01.00.00.01.00.00.00.00.0
\n", + "

5 rows × 35 columns

\n", + "
" + ], + "text/plain": [ + " CONTRAT_ANCIENNETE_(0,1] CONTRAT_ANCIENNETE_(1,2] \\\n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 1.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " CONTRAT_ANCIENNETE_(2,5] CONTRAT_ANCIENNETE_(5,10] \\\n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 1.0 0.0 \n", + "4 1.0 0.0 \n", + "\n", + " FREQUENCE_PAIEMENT_COTISATION_MENSUEL \\\n", + "0 0.0 \n", + "1 0.0 \n", + "2 0.0 \n", + "3 0.0 \n", + "4 1.0 \n", + "\n", + " FREQUENCE_PAIEMENT_COTISATION_TRIMESTRIEL GROUPE_KM_[20000;40000[ \\\n", + "0 0.0 1.0 \n", + "1 0.0 1.0 \n", + "2 0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 1.0 \n", + "\n", + " GROUPE_KM_[40000;60000[ GROUPE_KM_[60000;99999[ ZONE_RISQUE_B ... \\\n", + "0 0.0 0.0 1.0 ... \n", + "1 0.0 0.0 1.0 ... \n", + "2 0.0 0.0 0.0 ... \n", + "3 0.0 0.0 0.0 ... \n", + "4 0.0 0.0 0.0 ... \n", + "\n", + " GENRE_M DEUXIEME_CONDUCTEUR_True ENERGIE_DIESEL ENERGIE_ESSENCE \\\n", + "0 1.0 0.0 0.0 1.0 \n", + "1 0.0 1.0 1.0 0.0 \n", + "2 0.0 1.0 0.0 1.0 \n", + "3 1.0 0.0 0.0 1.0 \n", + "4 0.0 0.0 1.0 0.0 \n", + "\n", + " EQUIPEMENT_SECURITE_VRAI VALEUR_DU_BIEN_[10000;15000[ \\\n", + "0 0.0 1.0 \n", + "1 1.0 0.0 \n", + "2 0.0 0.0 \n", + "3 1.0 0.0 \n", + "4 0.0 1.0 \n", + "\n", + " VALEUR_DU_BIEN_[15000;20000[ VALEUR_DU_BIEN_[20000;25000[ \\\n", + "0 0.0 0.0 \n", + "1 0.0 1.0 \n", + "2 1.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " VALEUR_DU_BIEN_[25000;35000[ VALEUR_DU_BIEN_[35000;99999[ \n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 1.0 \n", + "4 0.0 0.0 \n", + "\n", + "[5 rows x 35 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# One hot encoding des variables catégorielles\n", + "preproc_ohe = preproc.OneHotEncoder(handle_unknown=\"ignore\")\n", + "preproc_ohe = preproc.OneHotEncoder(drop=\"first\", sparse_output=False).fit(\n", + " vars_categorielles\n", + ")\n", + "\n", + "variables_categorielles_ohe = preproc_ohe.transform(vars_categorielles)\n", + "variables_categorielles_ohe = pd.DataFrame(\n", + " variables_categorielles_ohe,\n", + " columns=preproc_ohe.get_feature_names_out(vars_categorielles.columns),\n", + ")\n", + "variables_categorielles_ohe.head()" + ] }, { "cell_type": "markdown", @@ -300,11 +3939,228 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "4ff3847d", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "ANNEE_CTR", + "rawType": "float64", + "type": "float" + }, + { + "name": "AGE_ASSURE_PRINCIPAL", + "rawType": "float64", + "type": "float" + }, + { + "name": "ANCIENNETE_PERMIS", + "rawType": "float64", + "type": "float" + }, + { + "name": "ANNEE_CONSTRUCTION", + "rawType": "float64", + "type": "float" + }, + { + "name": "NB", + "rawType": "float64", + "type": "float" + }, + { + "name": "CHARGE", + "rawType": "float64", + "type": "float" + }, + { + "name": "EXPO", + "rawType": "float64", + "type": "float" + } + ], + "ref": "72afd0da-ac68-4aee-87ae-5e375d6d237d", + "rows": [ + [ + "0", + "0.1393559608666301", + "0.6582867283271144", + "0.5635879287137437", + "0.1740107784615837", + "-0.24202868219585674", + "-0.181253980627111", + "-0.289146035458737" + ], + [ + "1", + "0.1393559608666301", + "3.1516280073827847", + "0.9874335016275682", + "0.7442069902648635", + "-0.24202868219585674", + "-0.181253980627111", + "-0.42709265252699025" + ], + [ + "2", + "1.3471924655222902", + "-0.7350510452628191", + "-1.078813666327326", + "0.45910888436322356", + "-0.24202868219585674", + "-0.181253980627111", + "0.215020504730438" + ], + [ + "3", + "1.3471924655222902", + "0.0716181920787214", + "0.40464583887105954", + "0.7442069902648635", + "-0.24202868219585674", + "-0.181253980627111", + "0.25190705219855114" + ], + [ + "4", + "-0.4645622914611999", + "0.0716181920787214", + "-0.28410321711390524", + "-1.8216759628498953", + "-0.24202868219585674", + "-0.181253980627111", + "0.8144269010872852" + ] + ], + "shape": { + "columns": 7, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ANNEE_CTRAGE_ASSURE_PRINCIPALANCIENNETE_PERMISANNEE_CONSTRUCTIONNBCHARGEEXPO
00.1393560.6582870.5635880.174011-0.242029-0.181254-0.289146
10.1393563.1516280.9874340.744207-0.242029-0.181254-0.427093
21.347192-0.735051-1.0788140.459109-0.242029-0.1812540.215021
31.3471920.0716180.4046460.744207-0.242029-0.1812540.251907
4-0.4645620.071618-0.284103-1.821676-0.242029-0.1812540.814427
\n", + "
" + ], + "text/plain": [ + " ANNEE_CTR AGE_ASSURE_PRINCIPAL ANCIENNETE_PERMIS ANNEE_CONSTRUCTION \\\n", + "0 0.139356 0.658287 0.563588 0.174011 \n", + "1 0.139356 3.151628 0.987434 0.744207 \n", + "2 1.347192 -0.735051 -1.078814 0.459109 \n", + "3 1.347192 0.071618 0.404646 0.744207 \n", + "4 -0.464562 0.071618 -0.284103 -1.821676 \n", + "\n", + " NB CHARGE EXPO \n", + "0 -0.242029 -0.181254 -0.289146 \n", + "1 -0.242029 -0.181254 -0.427093 \n", + "2 -0.242029 -0.181254 0.215021 \n", + "3 -0.242029 -0.181254 0.251907 \n", + "4 -0.242029 -0.181254 0.814427 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Normalisation des varibales numériques\n", + "preproc_scale = preproc.StandardScaler(with_mean=True, with_std=True)\n", + "preproc_scale.fit(vars_numeriques)\n", + "\n", + "vars_numeriques_scaled = preproc_scale.transform(vars_numeriques)\n", + "vars_numeriques_scaled = pd.DataFrame(\n", + " vars_numeriques_scaled, columns=vars_numeriques.columns\n", + ")\n", + "vars_numeriques_scaled.head()" + ] }, { "cell_type": "markdown", @@ -347,11 +4203,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "d9342ad6", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "X_global = vars_numeriques_scaled.merge(\n", + " variables_categorielles_ohe, left_index=True, right_index=True\n", + ")\n", + "\n", + "# Réorganisation des données\n", + "X = X_global.to_numpy()\n", + "Y = data_retraitee[\"sinistré\"]\n", + "\n", + "# Sampling en 80% train et 20% test\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, Y, test_size=0.2, random_state=42\n", + ")" + ] }, { "cell_type": "markdown", @@ -367,7 +4236,135 @@ "id": "cb60fe19", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Définir la grille d'hyperparamètres à rechercher\n", + "param_grid = {\n", + " \"n_estimators\": [60, 65, 70, 75],\n", + " \"max_depth\": [None, 1, 2, 3],\n", + " \"min_samples_split\": [5, 8, 10, 11, 13, 14, 15],\n", + "}\n", + "# Nombre de folds pour la validation croisée\n", + "num_folds = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "b976720e", + "metadata": {}, + "outputs": [ + { + "ename": "InvalidParameterError", + "evalue": "The 'scoring' parameter of GridSearchCV must be a str among {'average_precision', 'adjusted_rand_score', 'roc_auc', 'top_k_accuracy', 'recall', 'neg_negative_likelihood_ratio', 'neg_mean_squared_error', 'positive_likelihood_ratio', 'precision', 'neg_mean_squared_log_error', 'precision_micro', 'neg_mean_poisson_deviance', 'completeness_score', 'accuracy', 'adjusted_mutual_info_score', 'precision_macro', 'neg_max_error', 'mutual_info_score', 'jaccard_samples', 'recall_samples', 'neg_mean_absolute_percentage_error', 'fowlkes_mallows_score', 'neg_brier_score', 'f1_samples', 'jaccard_weighted', 'recall_micro', 'd2_absolute_error_score', 'homogeneity_score', 'matthews_corrcoef', 'f1_micro', 'f1_macro', 'neg_root_mean_squared_error', 'precision_samples', 'neg_root_mean_squared_log_error', 'neg_mean_gamma_deviance', 'jaccard', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'roc_auc_ovr', 'jaccard_micro', 'jaccard_macro', 'roc_auc_ovo', 'neg_log_loss', 'normalized_mutual_info_score', 'balanced_accuracy', 'f1_weighted', 'r2', 'recall_macro', 'rand_score', 'v_measure_score', 'explained_variance', 'roc_auc_ovo_weighted', 'precision_weighted', 'roc_auc_ovr_weighted', 'f1', 'recall_weighted'}, a callable, an instance of 'list', an instance of 'tuple', an instance of 'dict' or None. Got '' instead.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mInvalidParameterError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[47]\u001b[39m\u001b[32m, line 16\u001b[39m\n\u001b[32m 5\u001b[39m grid_search = GridSearchCV(\n\u001b[32m 6\u001b[39m estimator = rf,\n\u001b[32m 7\u001b[39m param_grid = param_grid,\n\u001b[32m (...)\u001b[39m\u001b[32m 12\u001b[39m n_jobs = -\u001b[32m1\u001b[39m, \u001b[38;5;66;03m# Utiliser tous les cœurs du processeur\u001b[39;00m\n\u001b[32m 13\u001b[39m )\n\u001b[32m 15\u001b[39m \u001b[38;5;66;03m# Exécution de la recherche sur grille\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m \u001b[43mgrid_search\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 18\u001b[39m \u001b[38;5;66;03m# Afficher les meilleurs hyperparamètres\u001b[39;00m\n\u001b[32m 19\u001b[39m best_params = grid_search.best_params_\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/sklearn/base.py:1382\u001b[39m, in \u001b[36m_fit_context..decorator..wrapper\u001b[39m\u001b[34m(estimator, *args, **kwargs)\u001b[39m\n\u001b[32m 1377\u001b[39m partial_fit_and_fitted = (\n\u001b[32m 1378\u001b[39m fit_method.\u001b[34m__name__\u001b[39m == \u001b[33m\"\u001b[39m\u001b[33mpartial_fit\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m _is_fitted(estimator)\n\u001b[32m 1379\u001b[39m )\n\u001b[32m 1381\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m global_skip_validation \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m partial_fit_and_fitted:\n\u001b[32m-> \u001b[39m\u001b[32m1382\u001b[39m \u001b[43mestimator\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_validate_params\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1384\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[32m 1385\u001b[39m skip_parameter_validation=(\n\u001b[32m 1386\u001b[39m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[32m 1387\u001b[39m )\n\u001b[32m 1388\u001b[39m ):\n\u001b[32m 1389\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, *args, **kwargs)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/sklearn/base.py:436\u001b[39m, in \u001b[36mBaseEstimator._validate_params\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 428\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_validate_params\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[32m 429\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Validate types and values of constructor parameters\u001b[39;00m\n\u001b[32m 430\u001b[39m \n\u001b[32m 431\u001b[39m \u001b[33;03m The expected type and values must be defined in the `_parameter_constraints`\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 434\u001b[39m \u001b[33;03m accepted constraints.\u001b[39;00m\n\u001b[32m 435\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m436\u001b[39m \u001b[43mvalidate_parameter_constraints\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 437\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_parameter_constraints\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 438\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mget_params\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdeep\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 439\u001b[39m \u001b[43m \u001b[49m\u001b[43mcaller_name\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[34;43m__class__\u001b[39;49m\u001b[43m.\u001b[49m\u001b[34;43m__name__\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 440\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/sklearn/utils/_param_validation.py:98\u001b[39m, in \u001b[36mvalidate_parameter_constraints\u001b[39m\u001b[34m(parameter_constraints, params, caller_name)\u001b[39m\n\u001b[32m 92\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 93\u001b[39m constraints_str = (\n\u001b[32m 94\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m.join([\u001b[38;5;28mstr\u001b[39m(c)\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39mconstraints[:-\u001b[32m1\u001b[39m]])\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m or\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 95\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstraints[-\u001b[32m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 96\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m98\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m InvalidParameterError(\n\u001b[32m 99\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mThe \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparam_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[33m parameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcaller_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m must be\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 100\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstraints_str\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. Got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparam_val\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[33m instead.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 101\u001b[39m )\n", + "\u001b[31mInvalidParameterError\u001b[39m: The 'scoring' parameter of GridSearchCV must be a str among {'average_precision', 'adjusted_rand_score', 'roc_auc', 'top_k_accuracy', 'recall', 'neg_negative_likelihood_ratio', 'neg_mean_squared_error', 'positive_likelihood_ratio', 'precision', 'neg_mean_squared_log_error', 'precision_micro', 'neg_mean_poisson_deviance', 'completeness_score', 'accuracy', 'adjusted_mutual_info_score', 'precision_macro', 'neg_max_error', 'mutual_info_score', 'jaccard_samples', 'recall_samples', 'neg_mean_absolute_percentage_error', 'fowlkes_mallows_score', 'neg_brier_score', 'f1_samples', 'jaccard_weighted', 'recall_micro', 'd2_absolute_error_score', 'homogeneity_score', 'matthews_corrcoef', 'f1_micro', 'f1_macro', 'neg_root_mean_squared_error', 'precision_samples', 'neg_root_mean_squared_log_error', 'neg_mean_gamma_deviance', 'jaccard', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'roc_auc_ovr', 'jaccard_micro', 'jaccard_macro', 'roc_auc_ovo', 'neg_log_loss', 'normalized_mutual_info_score', 'balanced_accuracy', 'f1_weighted', 'r2', 'recall_macro', 'rand_score', 'v_measure_score', 'explained_variance', 'roc_auc_ovo_weighted', 'precision_weighted', 'roc_auc_ovr_weighted', 'f1', 'recall_weighted'}, a callable, an instance of 'list', an instance of 'tuple', an instance of 'dict' or None. Got '' instead." + ] + } + ], + "source": [ + "# Initialisation du modèle GradientBoostingClassifier\n", + "rf = GradientBoostingClassifier(random_state=42)\n", + "\n", + "# Création de l'objet GridSearchCV pour la recherche sur grille avec validation croisée\n", + "grid_search = GridSearchCV(\n", + " estimator = rf,\n", + " param_grid = param_grid,\n", + " cv = StratifiedKFold(\n", + " n_splits = num_folds, shuffle = True, random_state = 42\n", + " ), # Validation croisée avec 5 folds\n", + " scoring = \"\", # Métrique d'évaluation (moins c'est mieux)\n", + " n_jobs = -1, # Utiliser tous les cœurs du processeur\n", + ")\n", + "\n", + "# Exécution de la recherche sur grille\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "# Afficher les meilleurs hyperparamètres\n", + "best_params = grid_search.best_params_\n", + "print(\"Meilleurs hyperparamètres : \", best_params)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "0a35a4bf", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialiser le modèle final avec les meilleurs hyperparamètres\n", + "best_rf = GradientBoostingClassifier(random_state=42, **best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "a7f59ea7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE pour le fold 1: -0.0\n", + "RMSE pour le fold 2: -0.0\n", + "RMSE pour le fold 3: -0.0\n", + "RMSE pour le fold 4: -0.0\n", + "RMSE pour le fold 5: -0.0\n", + "\n", + "\n", + "MSE pour le fold 1: -0.0\n", + "MSE pour le fold 2: -0.0\n", + "MSE pour le fold 3: -0.0\n", + "MSE pour le fold 4: -0.0\n", + "MSE pour le fold 5: -0.0\n", + "\n", + "\n", + "MAE pour le fold 1: -0.0\n", + "MAE pour le fold 2: -0.0\n", + "MAE pour le fold 3: -0.0\n", + "MAE pour le fold 4: -0.0\n", + "MAE pour le fold 5: -0.0\n" + ] + } + ], + "source": [ + "# Cross validation\n", + "# RMSE de chaque fold\n", + "rmse_scores = cross_val_score(\n", + " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_root_mean_squared_error\"\n", + ")\n", + "\n", + "# Afficher les scores pour chaque fold\n", + "for i, score in enumerate(rmse_scores):\n", + " print(f\"RMSE pour le fold {i + 1}: {score}\")\n", + "\n", + "# MSE de chaque fold\n", + "mse_scores = cross_val_score(\n", + " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_mean_squared_error\"\n", + ")\n", + "\n", + "# Afficher les scores pour chaque fold\n", + "print(\"\\n\")\n", + "for i, score in enumerate(mse_scores):\n", + " print(f\"MSE pour le fold {i + 1}: {score}\")\n", + "\n", + "# MAE de chaque fold\n", + "mae_scores = cross_val_score(\n", + " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_mean_absolute_error\"\n", + ")\n", + "\n", + "# Afficher les scores pour chaque fold\n", + "print(\"\\n\")\n", + "for i, score in enumerate(mae_scores):\n", + " print(f\"MAE pour le fold {i + 1}: {score}\")" + ] }, { "cell_type": "markdown",