diff --git a/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb b/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb
index d298183..a3d72a0 100644
--- a/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb
+++ b/M2/Machine Learning/TP_3/2025_TP_3_M2_ISF.ipynb
@@ -46,7 +46,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 191,
"id": "97d58527",
"metadata": {},
"outputs": [],
@@ -60,7 +60,6 @@
"\n",
"sns.set()\n",
"import plotly.express as px\n",
- "import plotly.graph_objects as gp\n",
"import sklearn.metrics as metrics\n",
"import sklearn.preprocessing as preproc\n",
"\n",
@@ -68,10 +67,9 @@
"from scipy.stats import chi2_contingency\n",
"\n",
"# Machine Learning\n",
- "from sklearn.cluster import KMeans\n",
"from sklearn.ensemble import RandomForestRegressor\n",
- "from sklearn.model_selection import KFold, train_test_split\n",
- "from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor\n"
+ "from sklearn.model_selection import KFold, cross_val_score, train_test_split\n",
+ "from sklearn.tree import DecisionTreeRegressor\n"
]
},
{
@@ -84,11 +82,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 192,
"id": "c67db932",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "def cramers_V(var1, var2):\n",
+ " crosstab = np.array(\n",
+ " pd.crosstab(var1, var2, rownames=None, colnames=None)\n",
+ " ) # Cross table building\n",
+ " stat = chi2_contingency(crosstab)[\n",
+ " 0\n",
+ " ] # Keeping of the test statistic of the Chi2 test\n",
+ " obs = np.sum(crosstab) # Number of observations\n",
+ " mini = (\n",
+ " min(crosstab.shape) - 1\n",
+ " ) # Take the minimum value between the columns and the rows of the cross table\n",
+ " return stat / (obs * mini)\n"
+ ]
},
{
"cell_type": "markdown",
@@ -100,7 +111,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 193,
"id": "c9597b48",
"metadata": {},
"outputs": [],
@@ -119,7 +130,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 194,
"id": "8051b5f4",
"metadata": {},
"outputs": [],
@@ -163,31 +174,367 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 195,
"id": "c427a4b8",
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/tp/_ld5_pzs6nx6mv1pbjhq1l740000gn/T/ipykernel_41302/358057511.py:7: SettingWithCopyWarning:\n",
+ "\n",
+ "\n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ "\n"
+ ]
+ },
{
"data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "ANNEE_CTR",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "FREQUENCE_PAIEMENT_COTISATION",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "GROUPE_KM",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "ZONE_RISQUE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "AGE_ASSURE_PRINCIPAL",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "GENRE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "DEUXIEME_CONDUCTEUR",
+ "rawType": "bool",
+ "type": "boolean"
+ },
+ {
+ "name": "ANCIENNETE_PERMIS",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "ANNEE_CONSTRUCTION",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ENERGIE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "EQUIPEMENT_SECURITE",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "VALEUR_DU_BIEN",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "CM",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "9e024176-1fe1-4a76-bb33-627401a1ea24",
+ "rows": [
+ [
+ "10",
+ "2019",
+ "(0,1]",
+ "MENSUEL",
+ "[0;20000[",
+ "C",
+ "40",
+ "M",
+ "False",
+ "37",
+ "2017.0",
+ "ESSENCE",
+ "VRAI",
+ "[15000;20000[",
+ "1072.98"
+ ],
+ [
+ "34",
+ "2020",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "27",
+ "M",
+ "True",
+ "13",
+ "2018.0",
+ "AUTRE",
+ "FAUX",
+ "[35000;99999[",
+ "3750.0"
+ ],
+ [
+ "36",
+ "2019",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "L",
+ "19",
+ "M",
+ "False",
+ "2",
+ "2017.0",
+ "ESSENCE",
+ "VRAI",
+ "[0;10000[",
+ "1838.49"
+ ],
+ [
+ "78",
+ "2019",
+ "(-1,0]",
+ "MENSUEL",
+ "[20000;40000[",
+ "B",
+ "40",
+ "M",
+ "False",
+ "45",
+ "2018.0",
+ "DIESEL",
+ "FAUX",
+ "[15000;20000[",
+ "4892.74"
+ ],
+ [
+ "89",
+ "2018",
+ "(1,2]",
+ "MENSUEL",
+ "[20000;40000[",
+ "C",
+ "20",
+ "M",
+ "False",
+ "11",
+ "2014.0",
+ "ESSENCE",
+ "FAUX",
+ "[25000;35000[",
+ "166.73"
+ ]
+ ],
+ "shape": {
+ "columns": 14,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ANNEE_CTR | \n",
+ " CONTRAT_ANCIENNETE | \n",
+ " FREQUENCE_PAIEMENT_COTISATION | \n",
+ " GROUPE_KM | \n",
+ " ZONE_RISQUE | \n",
+ " AGE_ASSURE_PRINCIPAL | \n",
+ " GENRE | \n",
+ " DEUXIEME_CONDUCTEUR | \n",
+ " ANCIENNETE_PERMIS | \n",
+ " ANNEE_CONSTRUCTION | \n",
+ " ENERGIE | \n",
+ " EQUIPEMENT_SECURITE | \n",
+ " VALEUR_DU_BIEN | \n",
+ " CM | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 10 | \n",
+ " 2019 | \n",
+ " (0,1] | \n",
+ " MENSUEL | \n",
+ " [0;20000[ | \n",
+ " C | \n",
+ " 40 | \n",
+ " M | \n",
+ " False | \n",
+ " 37 | \n",
+ " 2017.0 | \n",
+ " ESSENCE | \n",
+ " VRAI | \n",
+ " [15000;20000[ | \n",
+ " 1072.98 | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 2020 | \n",
+ " (-1,0] | \n",
+ " MENSUEL | \n",
+ " [20000;40000[ | \n",
+ " C | \n",
+ " 27 | \n",
+ " M | \n",
+ " True | \n",
+ " 13 | \n",
+ " 2018.0 | \n",
+ " AUTRE | \n",
+ " FAUX | \n",
+ " [35000;99999[ | \n",
+ " 3750.00 | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " 2019 | \n",
+ " (-1,0] | \n",
+ " MENSUEL | \n",
+ " [20000;40000[ | \n",
+ " L | \n",
+ " 19 | \n",
+ " M | \n",
+ " False | \n",
+ " 2 | \n",
+ " 2017.0 | \n",
+ " ESSENCE | \n",
+ " VRAI | \n",
+ " [0;10000[ | \n",
+ " 1838.49 | \n",
+ "
\n",
+ " \n",
+ " | 78 | \n",
+ " 2019 | \n",
+ " (-1,0] | \n",
+ " MENSUEL | \n",
+ " [20000;40000[ | \n",
+ " B | \n",
+ " 40 | \n",
+ " M | \n",
+ " False | \n",
+ " 45 | \n",
+ " 2018.0 | \n",
+ " DIESEL | \n",
+ " FAUX | \n",
+ " [15000;20000[ | \n",
+ " 4892.74 | \n",
+ "
\n",
+ " \n",
+ " | 89 | \n",
+ " 2018 | \n",
+ " (1,2] | \n",
+ " MENSUEL | \n",
+ " [20000;40000[ | \n",
+ " C | \n",
+ " 20 | \n",
+ " M | \n",
+ " False | \n",
+ " 11 | \n",
+ " 2014.0 | \n",
+ " ESSENCE | \n",
+ " FAUX | \n",
+ " [25000;35000[ | \n",
+ " 166.73 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
"text/plain": [
- "(824, 14)"
+ " ANNEE_CTR CONTRAT_ANCIENNETE FREQUENCE_PAIEMENT_COTISATION GROUPE_KM \\\n",
+ "10 2019 (0,1] MENSUEL [0;20000[ \n",
+ "34 2020 (-1,0] MENSUEL [20000;40000[ \n",
+ "36 2019 (-1,0] MENSUEL [20000;40000[ \n",
+ "78 2019 (-1,0] MENSUEL [20000;40000[ \n",
+ "89 2018 (1,2] MENSUEL [20000;40000[ \n",
+ "\n",
+ " ZONE_RISQUE AGE_ASSURE_PRINCIPAL GENRE DEUXIEME_CONDUCTEUR \\\n",
+ "10 C 40 M False \n",
+ "34 C 27 M True \n",
+ "36 L 19 M False \n",
+ "78 B 40 M False \n",
+ "89 C 20 M False \n",
+ "\n",
+ " ANCIENNETE_PERMIS ANNEE_CONSTRUCTION ENERGIE EQUIPEMENT_SECURITE \\\n",
+ "10 37 2017.0 ESSENCE VRAI \n",
+ "34 13 2018.0 AUTRE FAUX \n",
+ "36 2 2017.0 ESSENCE VRAI \n",
+ "78 45 2018.0 DIESEL FAUX \n",
+ "89 11 2014.0 ESSENCE FAUX \n",
+ "\n",
+ " VALEUR_DU_BIEN CM \n",
+ "10 [15000;20000[ 1072.98 \n",
+ "34 [35000;99999[ 3750.00 \n",
+ "36 [0;10000[ 1838.49 \n",
+ "78 [15000;20000[ 4892.74 \n",
+ "89 [25000;35000[ 166.73 "
]
},
- "execution_count": 4,
+ "execution_count": 195,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "data_model = data_retraitee.copy()\n",
+ "data_model = data_retraitee\n",
"\n",
"# Filtre pour ne garder que les lignes qui ont un sinistre (NB > 0)\n",
- "data_model = data_model[data_model['NB'] > 0]\n",
+ "data_model = data_model[data_model[\"NB\"] > 0]\n",
"\n",
"# Calcul du cout moyen \"théorique\" des sinistres\n",
- "data_model[\"CM\"] = (data_model[\"CHARGE\"] / data_model[\"NB\"])\n",
- "data_model = data_model.drop(['CHARGE', 'NB', \"EXPO\"], axis=1)\n",
- "data_model.shape"
+ "data_model[\"CM\"] = data_model[\"CHARGE\"] / data_model[\"NB\"]\n",
+ "data_model = data_model.drop([\"CHARGE\", \"NB\", \"EXPO\"], axis=1)\n",
+ "data_model.head()"
]
},
{
@@ -200,7 +547,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 196,
"id": "c8fd3ee1",
"metadata": {},
"outputs": [
@@ -284,7 +631,7 @@
"type": "float"
}
],
- "ref": "e29190e7-d62c-4ab7-ab0a-43dd875c8b98",
+ "ref": "59cf3e53-3de4-4283-9dac-6a29f574b6fe",
"rows": [
[
"count",
@@ -760,7 +1107,7 @@
"max NaN 83421.850000 "
]
},
- "execution_count": 5,
+ "execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
@@ -769,6 +1116,858 @@
"data_model.describe(include='all')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 197,
+ "id": "2d32ae2b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "bingroup": "x",
+ "hovertemplate": "CM=%{x}
count=%{y}",
+ "legendgroup": "",
+ "marker": {
+ "color": "#636efa",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "",
+ "orientation": "v",
+ "showlegend": false,
+ "type": "histogram",
+ "x": {
+ "bdata": "UrgehevDkEAAAAAAAEytQClcj8L1uZxACtejcL0cs0CPwvUoXNdkQK5H4XqU+7JAKVyPwvXhoECuR+F6VBiiQClcj8L1D5lApHA9CtfUmUBxPQrXo6KrQGZmZmZm7ZdAAAAAACCV2UCkcD0KV8K/QPYoXI9CcbxAj8L1KFwZikAzMzMzs669QK5H4XpUdrlAXI/C9SiMi0BmZmZmpueuQOF6FK5H2qlAZmZmZma+Y0AK16NwPQOoQArXo3D9b7JAmpmZmZmjwEAAAAAAAEytQAAAAAAAmIlAXI/C9agVtECamZmZGZe/QClcj8L1BJ9Aj8L1KFzXZEDsUbgehUKVQEjhehSuV5BAPQrXo/DAqEBI4XoULvGnQOxRuB6FFJxAzczMzAwtuUDhehSuR/a3QKRwPQq3FNBAAAAAAABMrUB7FK5H4SbAQKRwPQrXmrNAAAAAAABMrUCPwvUoXF2SQPYoXI/CuYdAzczMzMypwUAAAAAAAEytQK5H4XoUPJpAUrgehauDuEAAAAAAAAAeQGZmZmZm+KhAAAAAAABMrUB7FK5H4TnDQEjhehSuX25A16NwPcqW1EA9CtejcMqRQAAAAAAATK1A16NwPQqfZEAzMzMz86O1QB+F61E4pp9ACtejcL1+pEBI4XoUrtOFQAAAAAAAoGRAAAAAAABMrUDNzMzMzK6KQHsUrkfhQJdAzczMzMyEmkDXo3A9Cp9kQFyPwvUoRHNAAAAAAADSqkBmZmZmZr5jQLgehetRCGdAMzMzMzP4kECamZmZmWmNQIXrUbieDbBAFK5H4XrSg0CkcD0KN4/IQOF6FK5H6JZAcT0K16Nse0D2KFyPwu2MQFK4HoXr66tACtejcL0OskCPwvUoXNdkQI/C9Shc12RAH4XrUbjLlkB7FK5HodrMQNejcD0KxqhAw/UoXI/qcEAAAAAAAPS6QPYoXI8iXNNAH4XrUbhjmECuR+F69L3IQAAAAAAATK1AFK5H4TposEBmZmZmZr5jQOF6FK7Hs6ZAZmZmZma+Y0AAAAAAAAWiQI/C9SjcY7RAAAAAAACQlkDsUbgehUacQOxRuB4FeKlAZmZmZma+Y0CkcD0KV+iYQMP1KFyPhZ9ASOF6FO5gsEDNzMzMTJilQM3MzMzMAptAAAAAAAAqlEC4HoXrURiLQGZmZmZmbpRAmpmZmZnKmkCPwvUoXNdkQI/C9Shc12RAAAAAAADwZUB7FK5H4WCVQBSuR+F6joRAAAAAAABMrUC4HoXrURKGQGZmZmZmZLBAKVyPwvXIm0A9CtejcKWWQBSuR+F6u5ZAexSuR+GMq0AAAAAAkMvZQM3MzMxMy6tACtejcD0xnUBxPQrXox+tQDMzMzOz2bVAcT0K16PUrEC4HoXr0QWoQOxRuB6FkYhA9ihcj2K7z0CPwvUoXNvIQPYoXI/ChWRAuB6F69Fzo0AAAAAAAEytQPYoXI/CU6dAj8L1KFzXZEDsUbgetd/XQIXrUbgeb5hAXI/C9RjN0EAzMzMzMziiQPYoXI9C7a9AhetRuB5xf0AAAAAAAPBlQIXrUbh+xMNAAAAAADCG1kDXo3A9CpdQQGZmZmZmvmNAmpmZmZmroECPwvUoXHePQM3MzMxMlqFASOF6FH46wEA9CtejcJahQAAAAACAXr5AKVyPwvX3l0DD9ShcjzycQJqZmZmZGqpA4XoUruffwEBcj8L1KDyhQAAAAAAATK1AAAAAAACrlUBmZmZmZgSlQAAAAAAA8GVAAAAAAADwZUC4HoXrUbg/QPYoXI/CxKRAPQrXo3CsqEBcj8L1KE+wQAAAAACAX6hAmpmZmZlxpEBxPQrX43W3QAAAAAAATK1ArkfhetSUskAfhetROIGuQOxRuB6FcahAMzMzMzOffEC4HoXrUfuqQMP1KFyPV7ZAXI/C9ajxs0CamZmZGRqSQMP1KFyPxMFA16NwPQpUs0CuR+F6FPu1QAAAAAAATK1AAAAAAADanEBxPQrXozSMQDMzMzMzl5xAcT0K12MNskAAAAAAgKGkQFK4HoWL5uBAAAAAAADwZUBcj8L1KLB7QIXrUbgWz/BA16NwPQqfZEAzMzMzs8yiQD0K16NwU7JACtejcL2frEDXo3A9irK2QK5H4Xr0qdRA16NwPQpInkDhehSuR6WVQBSuR+F6q7xAAAAAAADwZUA9CtejcPC2QClcj8L1dalA9ihcj8KFZEAK16Nw/SHIQHsUrkfh37ZAmZmZmdlrtkA9CtejcLOKQKRwPQrXK7NA9ihcj4ITsUDhehSuR8V1QHE9Ctejo6JAmpmZmZmZnEDD9Shcj6yTQDMzMzOz16lAj8L1KNyXq0DD9Shcj8bQQOF6FK6nb8FAAAAAAADUhEDhehSuRxSkQJqZmZkZ5KBAUrgehevMn0CamZmZ2Yu5QOxRuB6Fms9AzczMzMy9nUAUrkfheiulQAAAAAAAEolAUrgehesFgkDD9Shcj3CSQPYoXI9CKKRA16NwPYruq0BI4XoU7vy8QMP1KFyPinlA4XoUrkdkm0Bcj8L1KOuuQKRwPQpXGaZAXI/C9ShwlUBSuB6F61WYQDMzMzOzZaJAAAAAAACsgEBxPQrXo7yYQI/C9SjcdbNAXI/C9Sjkq0AUrkfhepOSQFK4HoXre6dA4XoUrkeroUCamZmZGRapQAAAAAAATK1Aj8L1KFzXZECPwvUoXEdmQM3MzMzMVptAZmZmZma/pkAAAAAAAEytQJqZmZmZHpZAH4XrUTh7rUBSuB6F62GEQNejcD0KtaJA16NwPYpuqkDsUbgedZfUQB+F61G4wJ9A16NwPQqfZEAUrkfhepagQGZmZmZmybRAAAAAAADwZUBI4XoUro2MQHE9CtejMpZA4XoUrke2m0AAAAAAANuTQAAAAAAATK1AexSuR+GirkB7FK5HYX+kQHE9CtfjGMNAAAAAAABMrUApXI/C1RbAQHsUrkfhm5NAUrgehetzoEAAAAAAAABoQAAAAAAATK1AmpmZmRmpoEAAAAAAAPWwQOF6FK5H2YZAhetRuJ4hsEAzMzMzMxOvQEjhehSus4dAFK5H4Xq5k0CPwvUo3I20QDMzMzOzpbZAAAAAAACgZEAAAAAAAPBlQArXo3B9uLZAMzMzMzMTskD2KFyPMijaQI/C9Shc12RAKVyPwnVep0BmZmZmZr5jQAAAAAAAIIxASOF6FK7poUDhehSux6GmQOF6FK4n4sJA7FG4HgWUoUC4HoXr0QaxQIXrUbgesYBAAAAAAABMrUDD9Shcj4SUQKRwPQrXN6hA7FG4HoVhlkBSuB6Fq2DMQNejcD0Kd51A16NwPQqYpEDsUbgehcqdQB+F61F4B7JA4XoUrkfZqkCPwvUoXNdkQFyPwvWovahAPQrXo3CKsEC4HoXrUb+mQLgehetRCq5A16NwPQodrkCPwvUonEy5QNejcD0Kh7VAj8L1KFzXZECPwvUo3JKkQPYoXI/CDYZAAAAAAABMrUAAAAAAAABoQI/C9ShcHZxAAAAAAAAAaEAAAAAAAABoQEjhehSuYKRASOF6FK6ymECPwvUo3IqgQNejcD0qfc9ACtejcD0In0Bcj8L1uCrQQAAAAAAA8GVAzczMzNRy10BmZmZmZr+dQLgehetR869ASOF6FO57tkD2KFyPwp6QQMP1KFyPbqRAMzMzMzOXgEBcj8L1KGeQQAAAAAAATK1AcT0K16OMvUCkcD0K1ymgQAAAAAAAv7VA16NwPQp3UkC4HoXrEVa0QD0K16NweJ5AuB6F61Gju0AAAAAAAEytQI/C9Shcb3pAZmZmZma8jkAAAAAAAPGkQK5H4XoUwbhACtejcD1TlkCPwvUoXM6cQAAAAAAA8GVA9ihcj0L7rUAAAAAAAPBlQMP1KFyPm8lAj8L1KFzXZEA9Ctej8KiiQNejcD0Kn2RAAAAAAADwZUD2KFyPwqViQAAAAAAA8GVAH4XrUbg5nkAK16NwPZmhQAAAAAAATK1AmpmZmZnYokAAAAAAAImgQGZmZma6K/BAZmZmZmaeokCuR+F6NDrNQD0K16OAdddAj8L1KFzXZECPwvUoXLCfQI/C9Shc8qVAPQrXo3AMlkA9CtejcGWOQM3MzMwc1tpAhetRuF6xtEDhehSuR1S4QBSuR+F6hoJAAAAAAAAYpUDXo3A9Cp9kQPYoXI/Cm4pASOF6FK4rjkCPwvUoHCLDQHE9CtcjH6VA16NwPQqnnkDD9Shcj+6bQArXo3A94JVAAAAAAADwZUDNzMzMzBSnQJqZmZnJFNNAPQrXo/AqwkCF61G4HgmBQD0K16PwYqBAMzMzMzMakUC4HoXrUWCWQFK4HoXrUZ9AXI/C9Sg4m0BmZmZm5l+qQBSuR+HaGdNAAAAAAADwZUCF61G4HlODQKRwPQrXALFAhetRuF6DskBmZmZmZr5jQAAAAAAATK1ArkfhepQ8oUAK16NwPS6iQPYoXI/CyqBAzczMzBw110ApXI/C9eCYQB+F61G4H5ZAj8L1KFzXZECF61G4HpK3QM3MzMzMwINAKVyPwvUVl0AzMzMzM4qZQFyPwvUoJItASOF6FC78rkAAAAAAAPBlQGZmZmZmba5AmpmZmd1d9EAzMzMzMySnQBSuR+F6RZFA4XoUrkdnn0AUrkfheqGnQOxRuB7FmbNAZmZmZma+Y0DsUbgeRZqxQD0K16PwlaJA16NwPQqfZECPwvUoXOahQNejcD0KJqFAXI/C9Sh8qECamZmZWYDaQAAAAAAAAGhAzczMzEwBoEB7FK5H4Y2iQPYoXI/CK6dAUrgehettgkCamZmZmdOEQKRwPQrXMKhA4XoUrscpsECkcD0K16CaQK5H4XoUEJ9AMzMzMzMBpkAAAAAAAPBlQArXo3C90KhASOF6FK6bg0C4HoXrUTSbQLgehetRVoFArkfhevzm5EAK16NwPbqkQJqZmZmZyJdAXI/C9ei+oEAK16NwvQOgQEjhehSuJ4FAAAAAAABMrUDNzMzMzMKeQAAAAAAATK1AzczMzMwEpUB7FK5HYW6qQIXrUbieTrdArkfhetTgsUAAAAAAAPBlQI/C9ShckK5AAAAAAACIdkC4HoXrUdeUQLgehetRCIRAj8L1KFwmyUCPwvUo3Pa5QK5H4XqU4rBApHA9CneNtEBcj8L1KFeeQArXo3C97qNAMzMzM7OntkCPwvUoXNmUQHsUrkfhooRAj8L1KFxrikBmZmZmZtqmQOxRuB6FY5hA7FG4HoUHl0B7FK5HoQ3BQKRwPQrXd9JAAAAAAABMrUDNzMzMTPynQBSuR+F6vpZAj8L1KNzuqEDXo3A9ismiQB+F61G45nlA16NwPQowoEAUrkfhOkK8QNejcD0KzJtAAAAAAADwZUBSuB6Fi3vQQEjhehQuralAMzMzM7PwqEAAAAAAAABoQEjhehR+qdBAAAAAAABwh0AK16NwPWymQArXo3A9uJNAH4XrUbi9ukAAAAAAAEytQAAAAAAATK1APQrXo3ARgkCkcD0KVzSwQI/C9ShcqapAPQrXo3A9nkDNzMzMzDqmQBSuR+F6xpVAPQrXo7AOuUDhehSuRzqxQI/C9Shcj5ZApHA9ChdZzEBI4XoUTnC0QD0K16PwEKdAAAAAAABMrUAK16NwPYuqQB+F61G4VatAXI/C9SgStUDhehSuR/WvQHE9CtcjBbBAAAAAAADwZUBmZmZmZr5jQB+F61G4T5hAuB6F6/GrxUB7FK5HQdHBQAAAAAAATK1AzczMzEzHuEBcj8L1KHWaQM3MzMxMkKpAj8L1KDxWskAAAAAAgOLbQI/C9Shc12RACtejcD2EgkAAAAAAAPBlQB+F61G4zapAj8L1KFy3jkAfhetRuH6dQPYoXI/ChWRArkfhehRFlkC4HoXrUdujQOF6FK5H76RAUrgeheuYzUCPwvUoPCDBQJqZmZmZarFA4XoUrseIoUBmZmZmZhqLQGZmZmYGodVAMzMzMzMnoUCkcD0Kl5S8QI/C9SjcL8VAcT0K16Pjw0DD9Shcj9qqQAAAAAAATK1AzczMzEx1skAAAAAAABBXQM3MzMzM7KZAuB6F69E4okDD9Shcjz2iQB+F61G4ZJZAUrgehesTnUAAAAAAAJehQB+F61G4DZRAMzMzMzNzv0DNzMzMTJ6lQIXrUbgeu5ZAUrgehSukskAK16NwPSCJQDMzMzMz44NAmpmZmdmSokAAAAAAAGivQAAAAAAA0FpAmpmZmRnxqUCPwvUoXNdkQAAAAAAATK1AhetRuD5/wkDsUbgexam7QGZmZmZmvmNApHA9Ctdtl0DNzMzMzGGsQOF6FK5H1YtAH4XrUThwskBI4XoULn2vQClcj8L1iJxAXI/C9ejL2EAAAAAAAEytQGZmZmZm7rZA9ihcj6JuwUAzMzMzs+WkQI/C9Shc12RA9ihcj8JckkA9CtejcGCeQMP1KFyPhZZAMzMzMzO6o0AUrkfhOpC1QOxRuB6FFaZAcT0K16Nfk0AAAAAAAEytQNejcD3aMMlAZmZmZma+Y0A9CtejcKl7QEjhehSu0p9ACtejcL3JpEBmZmZmZjykQKRwPQrXw55A9ihcj8KdZEAzMzMzM/GXQI/C9Shc12RAexSuR6HVykDhehSuR9ujQHsUrkdh27FAPQrXo/B9vkAAAAAAAABoQPYoXI/CMKNAXI/C9ShUnUB7FK5H4VuQQNejcD0KuZ1AZmZmZma+Y0AK16Nwvdm3QOF6FK5HHq1AAAAAAAAAZUCPwvUoXCKQQNejcD0KY6pAAAAAAABMrUCPwvUo3GmkQNejcD0Kn2RA9ihcj0Kuo0Bcj8L1qOOjQD0K16MwJ7FA16NwPQqfZEC4HoXrUayeQDMzMzNDHNBA4XoUrkeZpEApXI/CdTasQAAAAAAA8GVAw/UoXI+znUAUrkfhej2oQMP1KFyPYJBAAAAAAADwZUAAAAAAACmwQArXo3A9gINA7FG4HgWxoEBSuB6Fawe2QOF6FK7ng75AcT0K10M6wkAzMzMzM06bQJqZmZmZuKpAPQrXo3CwqECF61G4HqquQArXo3A9uItAKVyPwvVQjkBmZmZmZgGrQAAAAAAATK1A16NwPYq6r0BmZmZmZlaUQAAAAAAATK1AAAAAAADwZUB7FK5H4YyzQClcj8L1qFJAH4XrUbi5nkCkcD0KV9GyQClcj8J1AKVACtejcL1Lr0DD9Shcz66oQI/C9Shc12RA4XoUrscV1UCPwvUoHDe1QClcj8L1WJpAUrgeheupg0DXo3A9Cn98QAAAAAAATK1AAAAAAABMrUDD9ShcD5etQAAAAAAALpRASOF6FK4Ey0CPwvUoXNdkQGZmZmZmvmNAj8L1KFxInUCamZmZmTGsQI/C9Sjcl6hAj8L1KFyBsUA9CtejcF1xQK5H4XoU/l1AcT0K16OtsEDD9Shcj6KZQAAAAAAATK1Aj8L1KJyHz0CPwvUoXKCxQI/C9ShcDqhAAAAAAABMrUAK16NwvUmrQIXrUbgeGYdApHA9CteunkB7FK5HYUShQOxRuB6Fw5tAXI/C9ajQuECPwvUoXOuTQHE9Ctejh55A9ihcj8IhkkCamZmZmTqkQM3MzMwMJNZAw/UoXI+8kUDNzMzMzIxBQJqZmZmZh4JA7FG4HiUHw0BI4XoUriOOQFK4HoXrz5hAPQrXo3Dww0CkcD0K15WZQIXrUbiudd1AZmZmZma+Y0D2KFyPwsWhQKRwPQrXRrZAMzMzMzOzIUAzMzMz8zGyQArXo3B96LlAFK5H4XrMi0BxPQrXYznEQMP1KFyPwJZAZmZmZmbekUBcj8L1aPu8QAAAAAAA8GVASOF6FG4k1UDhehSuR4KaQHsUrkfhI5tAhetRuB55skAUrkfheoulQKRwPQq3VMtAcT0K16OPoEAzMzMzM9t2QAAAAAAATK1AzczMzMyIiEAfhetR+CHJQNejcD0Kn2RAMzMzM+vQ4EDXo3A9Cp9kQHsUrkfhioFAj8L1KFysr0BxPQrXI16hQArXo3A9DoZAXI/C9Sguo0AzMzMzM+evQGZmZmZmSqJASOF6FA4bz0DsUbgexW22QJqZmZmZgJZAXI/C9SiZmkDNzMzMzLSTQD0K16MQm9RAH4XrUXiosEAzMzMzMwmdQNejcD0KqpBAH4XrUXgwrEDD9Shcj1iPQAAAAAAAFa5A16NwPQr5pkDNzMzMzFKzQJqZmZkZ5qVAAAAAAADJmECPwvUoXNdkQArXo3C9E7BA7FG4HoVjrUBmZmZmZkurQAAAAAAA6apAexSuR+FWrEDhehSuR5StQDMzMzOzor5AcT0K16OFmkAAAAAAAPBlQMP1KFw3oetAzczMzMyVrUAAAAAAAPBlQOF6FK7HOKNAFK5H4XoTskDNzMzMzF6cQClcj8L1VphAUrgehetjlEDXo3A9CpCnQGZmZmZmvmNAUrgehevci0AfhetRuF6HQIXrUbh+/MRAFK5H4XpQjEAzMzMzsxKkQOxRuB6FxqJAj8L1KFxFmEBSuB6Fa8a4QLgeheuRaLJAKVyPwvVghUBmZmZmZouzQJqZmZmZF6ZA9ihcj8L5oUAAAAAAAHCHQJqZmZmZeKRAAAAAAADwZUAUrkfhuoy3QFyPwvUooq9APQrXo3D9ZkAfhetR2N7QQB+F61G4FKhAMzMzM7NuqUCF61G4HvykQOF6FK5ndMBArkfhevQFxECPwvUoXNdkQJqZmZkZg6FA7FG4HgUDk0CF61G4Hi+gQHE9Ctej1bNAzczMzIxstECkcD0KV/i2QA==",
+ "dtype": "f8"
+ },
+ "xaxis": "x",
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "relative",
+ "legend": {
+ "tracegroupgap": 0
+ },
+ "margin": {
+ "t": 60
+ },
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermap": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermap"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "title": {
+ "text": "CM"
+ }
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "title": {
+ "text": "count"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Observation de la distribution\n",
+ "fig = px.histogram(data_model, x=\"CM\")\n",
+ "fig.show()"
+ ]
+ },
{
"cell_type": "markdown",
"id": "92d6156a",
@@ -795,7 +1994,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 198,
"id": "1b156435",
"metadata": {},
"outputs": [
@@ -805,7 +2004,7 @@
"(824, 13)"
]
},
- "execution_count": 6,
+ "execution_count": 198,
"metadata": {},
"output_type": "execute_result"
}
@@ -817,30 +2016,30 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 199,
"id": "0ef0fcc0",
"metadata": {},
"outputs": [],
"source": [
- "#Séparation en variables qualitatives ou catégorielles\n",
+ "# Séparation en variables qualitatives ou catégorielles\n",
"variables_na = []\n",
"variables_numeriques = []\n",
"variables_01 = []\n",
"variables_categorielles = []\n",
"for colu in data_set.columns:\n",
- " if True in data_set[colu].isna().unique() :\n",
+ " if True in data_set[colu].isna().unique():\n",
" variables_na.append(data_set[colu])\n",
- " else :\n",
- " if str(data_set[colu].dtypes) in [\"int32\",\"int64\",\"float64\"]:\n",
- " if len(data_set[colu].unique())==2 :\n",
+ " else:\n",
+ " if str(data_set[colu].dtypes) in [\"int32\", \"int64\", \"float64\"]:\n",
+ " if len(data_set[colu].unique()) == 2:\n",
" variables_categorielles.append(data_set[colu])\n",
- " else :\n",
+ " else:\n",
" variables_numeriques.append(data_set[colu])\n",
- " else :\n",
- " if len(data_set[colu].unique())==2 :\n",
+ " else:\n",
+ " if len(data_set[colu].unique()) == 2:\n",
" variables_categorielles.append(data_set[colu])\n",
- " else :\n",
- " variables_categorielles.append(data_set[colu])"
+ " else:\n",
+ " variables_categorielles.append(data_set[colu])\n"
]
},
{
@@ -853,7 +2052,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 200,
"id": "e130aae5",
"metadata": {},
"outputs": [],
@@ -863,942 +2062,416 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 201,
"id": "c39e2ad0",
"metadata": {},
"outputs": [
{
"data": {
- "application/vnd.plotly.v1+json": {
- "config": {
- "plotlyServerURL": "https://plot.ly"
- },
- "data": [
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
{
- "coloraxis": "coloraxis",
- "hovertemplate": "x: %{x}
y: %{y}
color: %{z}",
- "name": "0",
- "texttemplate": "%{z:.2f}",
- "type": "heatmap",
- "x": [
- "CONTRAT_ANCIENNETE",
- "FREQUENCE_PAIEMENT_COTISATION",
- "GROUPE_KM",
- "ZONE_RISQUE",
- "GENRE",
- "DEUXIEME_CONDUCTEUR",
- "ENERGIE",
- "EQUIPEMENT_SECURITE",
- "VALEUR_DU_BIEN"
- ],
- "xaxis": "x",
- "y": [
- "CONTRAT_ANCIENNETE",
- "FREQUENCE_PAIEMENT_COTISATION",
- "GROUPE_KM",
- "ZONE_RISQUE",
- "GENRE",
- "DEUXIEME_CONDUCTEUR",
- "ENERGIE",
- "EQUIPEMENT_SECURITE",
- "VALEUR_DU_BIEN"
- ],
- "yaxis": "y",
- "z": {
- "bdata": "AAAAAAAA8D8AAAAAAAAAACoCGzzITrA/jS6+t390sj/aAKYMJa2eP5RMqUS3uZs/ytNpsBVXkz8AAAAAAAAAAJsekiMPM4I/AAAAAAAAAAAAAAAAAADwPwAAAAAAAAAAAAAAAAAAAABgNwyfFOK3Px3tLvtk1qI/VTS7w965nj/DbHQwNU6sP6xOyIjBVMQ/KwIbPMhOsD8AAAAAAAAAAAAAAAAAAPA/JGwWgOwjwz/Y12crRVC2P1AU8aUpk3Y/tZ25v8HgyT9++YWBDBq6PxMKBP1KAMk/ki6+t390sj8AAAAAAAAAACNsFoDsI8M/AAAAAAAA8D8AAAAAAAAAAOzpAHMW1bU/OToUIB5twT+gpoD1ZjrEP/5ATjN+vpg/0gCmDCWtnj9gNwyfFOK3P9jXZytFULY/AAAAAAAAAAAAAAAAAADwPwAAAAAAAAAA2p0N4q1bwz/UsLoqS0u5PxFqf8IHB9E/lEypRLe5mz8d7S77ZNaiP1AU8aUpk3Y/7OkAcxbVtT8AAAAAAAAAAAAAAAAAAPA/AAAAAAAAAAAAAAAAAAAAAOYlMsJ0brs/ytNpsBVXkz9RNLvD3rmeP7edub/B4Mk/OjoUIB5twT/anQ3irVvDPwAAAAAAAAAAAAAAAAAA8D8nEbUEUmnAP+SA2g/TvNE/AAAAAAAAAADDbHQwNU6sP335hYEMGro/oKaA9WY6xD/UsLoqS0u5PwAAAAAAAAAAJxG1BFJpwD8AAAAAAADwP+fmCf6XRco/mx6SIw8zgj+rTsiIwVTEPxIKBP1KAMk//kBOM36+mD8Ran/CBwfRP+YlMsJ0brs/5YDaD9O80T/n5gn+l0XKPwAAAAAAAPA/",
- "dtype": "f8",
- "shape": "9, 9"
- }
+ "name": "index",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "FREQUENCE_PAIEMENT_COTISATION",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GROUPE_KM",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GENRE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "DEUXIEME_CONDUCTEUR",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ENERGIE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "EQUIPEMENT_SECURITE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN",
+ "rawType": "float64",
+ "type": "float"
}
],
- "layout": {
- "coloraxis": {
- "colorscale": [
- [
- 0,
- "rgb(5,48,97)"
- ],
- [
- 0.1,
- "rgb(33,102,172)"
- ],
- [
- 0.2,
- "rgb(67,147,195)"
- ],
- [
- 0.3,
- "rgb(146,197,222)"
- ],
- [
- 0.4,
- "rgb(209,229,240)"
- ],
- [
- 0.5,
- "rgb(247,247,247)"
- ],
- [
- 0.6,
- "rgb(253,219,199)"
- ],
- [
- 0.7,
- "rgb(244,165,130)"
- ],
- [
- 0.8,
- "rgb(214,96,77)"
- ],
- [
- 0.9,
- "rgb(178,24,43)"
- ],
- [
- 1,
- "rgb(103,0,31)"
- ]
- ]
- },
- "template": {
- "data": {
- "bar": [
- {
- "error_x": {
- "color": "#2a3f5f"
- },
- "error_y": {
- "color": "#2a3f5f"
- },
- "marker": {
- "line": {
- "color": "#E5ECF6",
- "width": 0.5
- },
- "pattern": {
- "fillmode": "overlay",
- "size": 10,
- "solidity": 0.2
- }
- },
- "type": "bar"
- }
- ],
- "barpolar": [
- {
- "marker": {
- "line": {
- "color": "#E5ECF6",
- "width": 0.5
- },
- "pattern": {
- "fillmode": "overlay",
- "size": 10,
- "solidity": 0.2
- }
- },
- "type": "barpolar"
- }
- ],
- "carpet": [
- {
- "aaxis": {
- "endlinecolor": "#2a3f5f",
- "gridcolor": "white",
- "linecolor": "white",
- "minorgridcolor": "white",
- "startlinecolor": "#2a3f5f"
- },
- "baxis": {
- "endlinecolor": "#2a3f5f",
- "gridcolor": "white",
- "linecolor": "white",
- "minorgridcolor": "white",
- "startlinecolor": "#2a3f5f"
- },
- "type": "carpet"
- }
- ],
- "choropleth": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "type": "choropleth"
- }
- ],
- "contour": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "contour"
- }
- ],
- "contourcarpet": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "type": "contourcarpet"
- }
- ],
- "heatmap": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "heatmap"
- }
- ],
- "histogram": [
- {
- "marker": {
- "pattern": {
- "fillmode": "overlay",
- "size": 10,
- "solidity": 0.2
- }
- },
- "type": "histogram"
- }
- ],
- "histogram2d": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "histogram2d"
- }
- ],
- "histogram2dcontour": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "histogram2dcontour"
- }
- ],
- "mesh3d": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "type": "mesh3d"
- }
- ],
- "parcoords": [
- {
- "line": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "parcoords"
- }
- ],
- "pie": [
- {
- "automargin": true,
- "type": "pie"
- }
- ],
- "scatter": [
- {
- "fillpattern": {
- "fillmode": "overlay",
- "size": 10,
- "solidity": 0.2
- },
- "type": "scatter"
- }
- ],
- "scatter3d": [
- {
- "line": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scatter3d"
- }
- ],
- "scattercarpet": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattercarpet"
- }
- ],
- "scattergeo": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattergeo"
- }
- ],
- "scattergl": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattergl"
- }
- ],
- "scattermap": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattermap"
- }
- ],
- "scattermapbox": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattermapbox"
- }
- ],
- "scatterpolar": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scatterpolar"
- }
- ],
- "scatterpolargl": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scatterpolargl"
- }
- ],
- "scatterternary": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scatterternary"
- }
- ],
- "surface": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "surface"
- }
- ],
- "table": [
- {
- "cells": {
- "fill": {
- "color": "#EBF0F8"
- },
- "line": {
- "color": "white"
- }
- },
- "header": {
- "fill": {
- "color": "#C8D4E3"
- },
- "line": {
- "color": "white"
- }
- },
- "type": "table"
- }
- ]
- },
- "layout": {
- "annotationdefaults": {
- "arrowcolor": "#2a3f5f",
- "arrowhead": 0,
- "arrowwidth": 1
- },
- "autotypenumbers": "strict",
- "coloraxis": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "colorscale": {
- "diverging": [
- [
- 0,
- "#8e0152"
- ],
- [
- 0.1,
- "#c51b7d"
- ],
- [
- 0.2,
- "#de77ae"
- ],
- [
- 0.3,
- "#f1b6da"
- ],
- [
- 0.4,
- "#fde0ef"
- ],
- [
- 0.5,
- "#f7f7f7"
- ],
- [
- 0.6,
- "#e6f5d0"
- ],
- [
- 0.7,
- "#b8e186"
- ],
- [
- 0.8,
- "#7fbc41"
- ],
- [
- 0.9,
- "#4d9221"
- ],
- [
- 1,
- "#276419"
- ]
- ],
- "sequential": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "sequentialminus": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ]
- },
- "colorway": [
- "#636efa",
- "#EF553B",
- "#00cc96",
- "#ab63fa",
- "#FFA15A",
- "#19d3f3",
- "#FF6692",
- "#B6E880",
- "#FF97FF",
- "#FECB52"
- ],
- "font": {
- "color": "#2a3f5f"
- },
- "geo": {
- "bgcolor": "white",
- "lakecolor": "white",
- "landcolor": "#E5ECF6",
- "showlakes": true,
- "showland": true,
- "subunitcolor": "white"
- },
- "hoverlabel": {
- "align": "left"
- },
- "hovermode": "closest",
- "mapbox": {
- "style": "light"
- },
- "paper_bgcolor": "white",
- "plot_bgcolor": "#E5ECF6",
- "polar": {
- "angularaxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- },
- "bgcolor": "#E5ECF6",
- "radialaxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- }
- },
- "scene": {
- "xaxis": {
- "backgroundcolor": "#E5ECF6",
- "gridcolor": "white",
- "gridwidth": 2,
- "linecolor": "white",
- "showbackground": true,
- "ticks": "",
- "zerolinecolor": "white"
- },
- "yaxis": {
- "backgroundcolor": "#E5ECF6",
- "gridcolor": "white",
- "gridwidth": 2,
- "linecolor": "white",
- "showbackground": true,
- "ticks": "",
- "zerolinecolor": "white"
- },
- "zaxis": {
- "backgroundcolor": "#E5ECF6",
- "gridcolor": "white",
- "gridwidth": 2,
- "linecolor": "white",
- "showbackground": true,
- "ticks": "",
- "zerolinecolor": "white"
- }
- },
- "shapedefaults": {
- "line": {
- "color": "#2a3f5f"
- }
- },
- "ternary": {
- "aaxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- },
- "baxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- },
- "bgcolor": "#E5ECF6",
- "caxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- }
- },
- "title": {
- "x": 0.05
- },
- "xaxis": {
- "automargin": true,
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": "",
- "title": {
- "standoff": 15
- },
- "zerolinecolor": "white",
- "zerolinewidth": 2
- },
- "yaxis": {
- "automargin": true,
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": "",
- "title": {
- "standoff": 15
- },
- "zerolinecolor": "white",
- "zerolinewidth": 2
- }
- }
- },
- "title": {
- "text": "Matrice de corrélation des variables catégorielles (V de Cramér)"
- },
- "xaxis": {
- "anchor": "y",
- "domain": [
- 0,
- 1
- ]
- },
- "yaxis": {
- "anchor": "x",
- "autorange": "reversed",
- "domain": [
- 0,
- 1
- ]
- }
+ "ref": "2f5478f2-7cdc-47d7-aeff-3055a9f87820",
+ "rows": [
+ [
+ "CONTRAT_ANCIENNETE",
+ "1.0",
+ "0.0",
+ "0.01",
+ "0.02",
+ "0.01",
+ "0.01",
+ "0.01",
+ "0.0",
+ "0.01"
+ ],
+ [
+ "FREQUENCE_PAIEMENT_COTISATION",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.01",
+ "0.01",
+ "0.0",
+ "0.0",
+ "0.01",
+ "0.03"
+ ],
+ [
+ "GROUPE_KM",
+ "0.01",
+ "0.0",
+ "1.0",
+ "0.04",
+ "0.01",
+ "0.0",
+ "0.04",
+ "0.01",
+ "0.04"
+ ],
+ [
+ "ZONE_RISQUE",
+ "0.02",
+ "0.01",
+ "0.04",
+ "1.0",
+ "0.01",
+ "0.02",
+ "0.03",
+ "0.04",
+ "0.02"
+ ],
+ [
+ "GENRE",
+ "0.01",
+ "0.01",
+ "0.01",
+ "0.01",
+ "1.0",
+ "0.0",
+ "0.03",
+ "0.01",
+ "0.08"
+ ],
+ [
+ "DEUXIEME_CONDUCTEUR",
+ "0.01",
+ "0.0",
+ "0.0",
+ "0.02",
+ "0.0",
+ "0.99",
+ "0.0",
+ "0.0",
+ "0.02"
+ ],
+ [
+ "ENERGIE",
+ "0.01",
+ "0.0",
+ "0.04",
+ "0.03",
+ "0.03",
+ "0.0",
+ "1.0",
+ "0.02",
+ "0.08"
+ ],
+ [
+ "EQUIPEMENT_SECURITE",
+ "0.0",
+ "0.01",
+ "0.01",
+ "0.04",
+ "0.01",
+ "0.0",
+ "0.02",
+ "0.99",
+ "0.05"
+ ],
+ [
+ "VALEUR_DU_BIEN",
+ "0.01",
+ "0.03",
+ "0.04",
+ "0.02",
+ "0.08",
+ "0.02",
+ "0.08",
+ "0.05",
+ "1.0"
+ ]
+ ],
+ "shape": {
+ "columns": 9,
+ "rows": 9
}
- }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CONTRAT_ANCIENNETE | \n",
+ " FREQUENCE_PAIEMENT_COTISATION | \n",
+ " GROUPE_KM | \n",
+ " ZONE_RISQUE | \n",
+ " GENRE | \n",
+ " DEUXIEME_CONDUCTEUR | \n",
+ " ENERGIE | \n",
+ " EQUIPEMENT_SECURITE | \n",
+ " VALEUR_DU_BIEN | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | CONTRAT_ANCIENNETE | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ "
\n",
+ " \n",
+ " | FREQUENCE_PAIEMENT_COTISATION | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.03 | \n",
+ "
\n",
+ " \n",
+ " | GROUPE_KM | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.04 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.04 | \n",
+ " 0.01 | \n",
+ " 0.04 | \n",
+ "
\n",
+ " \n",
+ " | ZONE_RISQUE | \n",
+ " 0.02 | \n",
+ " 0.01 | \n",
+ " 0.04 | \n",
+ " 1.00 | \n",
+ " 0.01 | \n",
+ " 0.02 | \n",
+ " 0.03 | \n",
+ " 0.04 | \n",
+ " 0.02 | \n",
+ "
\n",
+ " \n",
+ " | GENRE | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 0.03 | \n",
+ " 0.01 | \n",
+ " 0.08 | \n",
+ "
\n",
+ " \n",
+ " | DEUXIEME_CONDUCTEUR | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.02 | \n",
+ " 0.00 | \n",
+ " 0.99 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.02 | \n",
+ "
\n",
+ " \n",
+ " | ENERGIE | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.04 | \n",
+ " 0.03 | \n",
+ " 0.03 | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.02 | \n",
+ " 0.08 | \n",
+ "
\n",
+ " \n",
+ " | EQUIPEMENT_SECURITE | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.04 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.02 | \n",
+ " 0.99 | \n",
+ " 0.05 | \n",
+ "
\n",
+ " \n",
+ " | VALEUR_DU_BIEN | \n",
+ " 0.01 | \n",
+ " 0.03 | \n",
+ " 0.04 | \n",
+ " 0.02 | \n",
+ " 0.08 | \n",
+ " 0.02 | \n",
+ " 0.08 | \n",
+ " 0.05 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CONTRAT_ANCIENNETE \\\n",
+ "CONTRAT_ANCIENNETE 1.00 \n",
+ "FREQUENCE_PAIEMENT_COTISATION 0.00 \n",
+ "GROUPE_KM 0.01 \n",
+ "ZONE_RISQUE 0.02 \n",
+ "GENRE 0.01 \n",
+ "DEUXIEME_CONDUCTEUR 0.01 \n",
+ "ENERGIE 0.01 \n",
+ "EQUIPEMENT_SECURITE 0.00 \n",
+ "VALEUR_DU_BIEN 0.01 \n",
+ "\n",
+ " FREQUENCE_PAIEMENT_COTISATION GROUPE_KM \\\n",
+ "CONTRAT_ANCIENNETE 0.00 0.01 \n",
+ "FREQUENCE_PAIEMENT_COTISATION 1.00 0.00 \n",
+ "GROUPE_KM 0.00 1.00 \n",
+ "ZONE_RISQUE 0.01 0.04 \n",
+ "GENRE 0.01 0.01 \n",
+ "DEUXIEME_CONDUCTEUR 0.00 0.00 \n",
+ "ENERGIE 0.00 0.04 \n",
+ "EQUIPEMENT_SECURITE 0.01 0.01 \n",
+ "VALEUR_DU_BIEN 0.03 0.04 \n",
+ "\n",
+ " ZONE_RISQUE GENRE DEUXIEME_CONDUCTEUR \\\n",
+ "CONTRAT_ANCIENNETE 0.02 0.01 0.01 \n",
+ "FREQUENCE_PAIEMENT_COTISATION 0.01 0.01 0.00 \n",
+ "GROUPE_KM 0.04 0.01 0.00 \n",
+ "ZONE_RISQUE 1.00 0.01 0.02 \n",
+ "GENRE 0.01 1.00 0.00 \n",
+ "DEUXIEME_CONDUCTEUR 0.02 0.00 0.99 \n",
+ "ENERGIE 0.03 0.03 0.00 \n",
+ "EQUIPEMENT_SECURITE 0.04 0.01 0.00 \n",
+ "VALEUR_DU_BIEN 0.02 0.08 0.02 \n",
+ "\n",
+ " ENERGIE EQUIPEMENT_SECURITE VALEUR_DU_BIEN \n",
+ "CONTRAT_ANCIENNETE 0.01 0.00 0.01 \n",
+ "FREQUENCE_PAIEMENT_COTISATION 0.00 0.01 0.03 \n",
+ "GROUPE_KM 0.04 0.01 0.04 \n",
+ "ZONE_RISQUE 0.03 0.04 0.02 \n",
+ "GENRE 0.03 0.01 0.08 \n",
+ "DEUXIEME_CONDUCTEUR 0.00 0.00 0.02 \n",
+ "ENERGIE 1.00 0.02 0.08 \n",
+ "EQUIPEMENT_SECURITE 0.02 0.99 0.05 \n",
+ "VALEUR_DU_BIEN 0.08 0.05 1.00 "
+ ]
},
+ "execution_count": 201,
"metadata": {},
- "output_type": "display_data"
+ "output_type": "execute_result"
}
],
"source": [
- "# Matrice de corrélation pour les variables catégorielles (V de Cramér)\n",
- "def cramers_v(confusion_matrix):\n",
- " \"\"\"Calcule le V de Cramér à partir d'une matrice de contingence\"\"\"\n",
- " chi2 = chi2_contingency(confusion_matrix)[0]\n",
- " n = confusion_matrix.sum().sum()\n",
- " phi2 = chi2 / n\n",
- " r, k = confusion_matrix.shape\n",
- " phi2corr = max(0, phi2 - ((k-1)*(r-1))/(n-1))\n",
- " rcorr = r - ((r-1)**2)/(n-1)\n",
- " kcorr = k - ((k-1)**2)/(n-1)\n",
- " return np.sqrt(phi2corr / min((kcorr-1), (rcorr-1)))\n",
+ "# Test du V de Cramer\n",
+ "rows = []\n",
"\n",
- "# Créer la matrice de corrélation\n",
- "categorical_cols = vars_categorielles.columns\n",
- "n_vars = len(categorical_cols)\n",
- "cramers_matrix = np.zeros((n_vars, n_vars))\n",
+ "for var1 in vars_categorielles:\n",
+ " col = []\n",
+ " for var2 in vars_categorielles:\n",
+ " cramers = cramers_V(\n",
+ " vars_categorielles[var1], vars_categorielles[var2]\n",
+ " ) # V de Cramer\n",
+ " col.append(round(cramers, 2)) # arrondi du résultat\n",
+ " rows.append(col)\n",
"\n",
- "for i, col1 in enumerate(categorical_cols):\n",
- " for j, col2 in enumerate(categorical_cols):\n",
- " if i == j:\n",
- " cramers_matrix[i, j] = 1.0\n",
- " else:\n",
- " confusion_matrix = pd.crosstab(vars_categorielles[col1], vars_categorielles[col2])\n",
- " cramers_matrix[i, j] = cramers_v(confusion_matrix)\n",
+ "cramers_results = np.array(rows)\n",
+ "v_cramer_resultats = pd.DataFrame(\n",
+ " cramers_results,\n",
+ " columns=vars_categorielles.columns,\n",
+ " index=vars_categorielles.columns,\n",
+ ")\n",
"\n",
- "# Créer le DataFrame de corrélation\n",
- "correlation_cat = pd.DataFrame(cramers_matrix,\n",
- " index=categorical_cols,\n",
- " columns=categorical_cols)\n",
- "\n",
- "# Visualiser avec Plotly\n",
- "fig = px.imshow(correlation_cat,\n",
- " text_auto='.2f',\n",
- " aspect=\"auto\",\n",
- " color_continuous_scale='RdBu_r',\n",
- " title='Matrice de corrélation des variables catégorielles (V de Cramér)')\n",
- "fig.show()"
+ "v_cramer_resultats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 202,
+ "id": "1755a2a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# On repère les variables trop corrélées\n",
+ "for i in range(v_cramer_resultats.shape[0]):\n",
+ " for j in range(i + 1, v_cramer_resultats.shape[0]):\n",
+ " if v_cramer_resultats.iloc[i, j] > 0.7:\n",
+ " print(\n",
+ " v_cramer_resultats.index.to_numpy()[i]\n",
+ " + \" et \"\n",
+ " + v_cramer_resultats.columns[j]\n",
+ " + \" sont trop dépendantes, V-CRAMER = \"\n",
+ " + str(v_cramer_resultats.iloc[i, j])\n",
+ " )\n"
]
},
{
@@ -1811,912 +2484,188 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 203,
"id": "a16215ab",
"metadata": {},
"outputs": [],
"source": [
- "vars_numeriques = pd.DataFrame(variables_numeriques).transpose()"
+ "vars_numeriques = pd.DataFrame(variables_numeriques).transpose()\n"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 204,
"id": "532ca6c4",
"metadata": {},
"outputs": [
{
"data": {
- "application/vnd.plotly.v1+json": {
- "config": {
- "plotlyServerURL": "https://plot.ly"
- },
- "data": [
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
{
- "coloraxis": "coloraxis",
- "hovertemplate": "x: %{x}
y: %{y}
color: %{z}",
- "name": "0",
- "texttemplate": "%{z}",
- "type": "heatmap",
- "x": [
- "ANNEE_CTR",
- "AGE_ASSURE_PRINCIPAL",
- "ANCIENNETE_PERMIS",
- "ANNEE_CONSTRUCTION"
- ],
- "xaxis": "x",
- "y": [
- "ANNEE_CTR",
- "AGE_ASSURE_PRINCIPAL",
- "ANCIENNETE_PERMIS",
- "ANNEE_CONSTRUCTION"
- ],
- "yaxis": "y",
- "z": {
- "bdata": "AAAAAAAA8D+ybZcEUUCbP/CBLCtO46Q/qr2Q49LN2D+ybZcEUUCbPwAAAAAAAPA/slV7SAtP4T84L73yETWgv/CBLCtO46Q/slV7SAtP4T8AAAAAAADwP0I6y25dD6E/qr2Q49LN2D84L73yETWgv0I6y25dD6E/AAAAAAAA8D8=",
- "dtype": "f8",
- "shape": "4, 4"
- }
+ "name": "index",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "ANNEE_CTR",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "AGE_ASSURE_PRINCIPAL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ANCIENNETE_PERMIS",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ANNEE_CONSTRUCTION",
+ "rawType": "float64",
+ "type": "float"
}
],
- "layout": {
- "coloraxis": {
- "colorscale": [
- [
- 0,
- "rgb(5,48,97)"
- ],
- [
- 0.1,
- "rgb(33,102,172)"
- ],
- [
- 0.2,
- "rgb(67,147,195)"
- ],
- [
- 0.3,
- "rgb(146,197,222)"
- ],
- [
- 0.4,
- "rgb(209,229,240)"
- ],
- [
- 0.5,
- "rgb(247,247,247)"
- ],
- [
- 0.6,
- "rgb(253,219,199)"
- ],
- [
- 0.7,
- "rgb(244,165,130)"
- ],
- [
- 0.8,
- "rgb(214,96,77)"
- ],
- [
- 0.9,
- "rgb(178,24,43)"
- ],
- [
- 1,
- "rgb(103,0,31)"
- ]
- ]
- },
- "template": {
- "data": {
- "bar": [
- {
- "error_x": {
- "color": "#2a3f5f"
- },
- "error_y": {
- "color": "#2a3f5f"
- },
- "marker": {
- "line": {
- "color": "#E5ECF6",
- "width": 0.5
- },
- "pattern": {
- "fillmode": "overlay",
- "size": 10,
- "solidity": 0.2
- }
- },
- "type": "bar"
- }
- ],
- "barpolar": [
- {
- "marker": {
- "line": {
- "color": "#E5ECF6",
- "width": 0.5
- },
- "pattern": {
- "fillmode": "overlay",
- "size": 10,
- "solidity": 0.2
- }
- },
- "type": "barpolar"
- }
- ],
- "carpet": [
- {
- "aaxis": {
- "endlinecolor": "#2a3f5f",
- "gridcolor": "white",
- "linecolor": "white",
- "minorgridcolor": "white",
- "startlinecolor": "#2a3f5f"
- },
- "baxis": {
- "endlinecolor": "#2a3f5f",
- "gridcolor": "white",
- "linecolor": "white",
- "minorgridcolor": "white",
- "startlinecolor": "#2a3f5f"
- },
- "type": "carpet"
- }
- ],
- "choropleth": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "type": "choropleth"
- }
- ],
- "contour": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "contour"
- }
- ],
- "contourcarpet": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "type": "contourcarpet"
- }
- ],
- "heatmap": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "heatmap"
- }
- ],
- "histogram": [
- {
- "marker": {
- "pattern": {
- "fillmode": "overlay",
- "size": 10,
- "solidity": 0.2
- }
- },
- "type": "histogram"
- }
- ],
- "histogram2d": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "histogram2d"
- }
- ],
- "histogram2dcontour": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "histogram2dcontour"
- }
- ],
- "mesh3d": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "type": "mesh3d"
- }
- ],
- "parcoords": [
- {
- "line": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "parcoords"
- }
- ],
- "pie": [
- {
- "automargin": true,
- "type": "pie"
- }
- ],
- "scatter": [
- {
- "fillpattern": {
- "fillmode": "overlay",
- "size": 10,
- "solidity": 0.2
- },
- "type": "scatter"
- }
- ],
- "scatter3d": [
- {
- "line": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scatter3d"
- }
- ],
- "scattercarpet": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattercarpet"
- }
- ],
- "scattergeo": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattergeo"
- }
- ],
- "scattergl": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattergl"
- }
- ],
- "scattermap": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattermap"
- }
- ],
- "scattermapbox": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scattermapbox"
- }
- ],
- "scatterpolar": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scatterpolar"
- }
- ],
- "scatterpolargl": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scatterpolargl"
- }
- ],
- "scatterternary": [
- {
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "type": "scatterternary"
- }
- ],
- "surface": [
- {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- },
- "colorscale": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "type": "surface"
- }
- ],
- "table": [
- {
- "cells": {
- "fill": {
- "color": "#EBF0F8"
- },
- "line": {
- "color": "white"
- }
- },
- "header": {
- "fill": {
- "color": "#C8D4E3"
- },
- "line": {
- "color": "white"
- }
- },
- "type": "table"
- }
- ]
- },
- "layout": {
- "annotationdefaults": {
- "arrowcolor": "#2a3f5f",
- "arrowhead": 0,
- "arrowwidth": 1
- },
- "autotypenumbers": "strict",
- "coloraxis": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
- },
- "colorscale": {
- "diverging": [
- [
- 0,
- "#8e0152"
- ],
- [
- 0.1,
- "#c51b7d"
- ],
- [
- 0.2,
- "#de77ae"
- ],
- [
- 0.3,
- "#f1b6da"
- ],
- [
- 0.4,
- "#fde0ef"
- ],
- [
- 0.5,
- "#f7f7f7"
- ],
- [
- 0.6,
- "#e6f5d0"
- ],
- [
- 0.7,
- "#b8e186"
- ],
- [
- 0.8,
- "#7fbc41"
- ],
- [
- 0.9,
- "#4d9221"
- ],
- [
- 1,
- "#276419"
- ]
- ],
- "sequential": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ],
- "sequentialminus": [
- [
- 0,
- "#0d0887"
- ],
- [
- 0.1111111111111111,
- "#46039f"
- ],
- [
- 0.2222222222222222,
- "#7201a8"
- ],
- [
- 0.3333333333333333,
- "#9c179e"
- ],
- [
- 0.4444444444444444,
- "#bd3786"
- ],
- [
- 0.5555555555555556,
- "#d8576b"
- ],
- [
- 0.6666666666666666,
- "#ed7953"
- ],
- [
- 0.7777777777777778,
- "#fb9f3a"
- ],
- [
- 0.8888888888888888,
- "#fdca26"
- ],
- [
- 1,
- "#f0f921"
- ]
- ]
- },
- "colorway": [
- "#636efa",
- "#EF553B",
- "#00cc96",
- "#ab63fa",
- "#FFA15A",
- "#19d3f3",
- "#FF6692",
- "#B6E880",
- "#FF97FF",
- "#FECB52"
- ],
- "font": {
- "color": "#2a3f5f"
- },
- "geo": {
- "bgcolor": "white",
- "lakecolor": "white",
- "landcolor": "#E5ECF6",
- "showlakes": true,
- "showland": true,
- "subunitcolor": "white"
- },
- "hoverlabel": {
- "align": "left"
- },
- "hovermode": "closest",
- "mapbox": {
- "style": "light"
- },
- "paper_bgcolor": "white",
- "plot_bgcolor": "#E5ECF6",
- "polar": {
- "angularaxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- },
- "bgcolor": "#E5ECF6",
- "radialaxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- }
- },
- "scene": {
- "xaxis": {
- "backgroundcolor": "#E5ECF6",
- "gridcolor": "white",
- "gridwidth": 2,
- "linecolor": "white",
- "showbackground": true,
- "ticks": "",
- "zerolinecolor": "white"
- },
- "yaxis": {
- "backgroundcolor": "#E5ECF6",
- "gridcolor": "white",
- "gridwidth": 2,
- "linecolor": "white",
- "showbackground": true,
- "ticks": "",
- "zerolinecolor": "white"
- },
- "zaxis": {
- "backgroundcolor": "#E5ECF6",
- "gridcolor": "white",
- "gridwidth": 2,
- "linecolor": "white",
- "showbackground": true,
- "ticks": "",
- "zerolinecolor": "white"
- }
- },
- "shapedefaults": {
- "line": {
- "color": "#2a3f5f"
- }
- },
- "ternary": {
- "aaxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- },
- "baxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- },
- "bgcolor": "#E5ECF6",
- "caxis": {
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": ""
- }
- },
- "title": {
- "x": 0.05
- },
- "xaxis": {
- "automargin": true,
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": "",
- "title": {
- "standoff": 15
- },
- "zerolinecolor": "white",
- "zerolinewidth": 2
- },
- "yaxis": {
- "automargin": true,
- "gridcolor": "white",
- "linecolor": "white",
- "ticks": "",
- "title": {
- "standoff": 15
- },
- "zerolinecolor": "white",
- "zerolinewidth": 2
- }
- }
- },
- "title": {
- "text": "Matrice de corrélation des variables numériques"
- },
- "xaxis": {
- "anchor": "y",
- "domain": [
- 0,
- 1
- ]
- },
- "yaxis": {
- "anchor": "x",
- "autorange": "reversed",
- "domain": [
- 0,
- 1
- ]
- }
+ "ref": "cbf56bf5-5e8b-495e-a03c-dc55b1f8dfd7",
+ "rows": [
+ [
+ "ANNEE_CTR",
+ "1.0",
+ "0.0266125353863182",
+ "0.04079670216583853",
+ "0.38756248686965"
+ ],
+ [
+ "AGE_ASSURE_PRINCIPAL",
+ "0.0266125353863182",
+ "1.0",
+ "0.5408989349040694",
+ "-0.03165489280817585"
+ ],
+ [
+ "ANCIENNETE_PERMIS",
+ "0.04079670216583853",
+ "0.5408989349040694",
+ "1.0",
+ "0.033320350432053406"
+ ],
+ [
+ "ANNEE_CONSTRUCTION",
+ "0.38756248686965",
+ "-0.03165489280817585",
+ "0.033320350432053406",
+ "1.0"
+ ]
+ ],
+ "shape": {
+ "columns": 4,
+ "rows": 4
}
- }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ANNEE_CTR | \n",
+ " AGE_ASSURE_PRINCIPAL | \n",
+ " ANCIENNETE_PERMIS | \n",
+ " ANNEE_CONSTRUCTION | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ANNEE_CTR | \n",
+ " 1.000000 | \n",
+ " 0.026613 | \n",
+ " 0.040797 | \n",
+ " 0.387562 | \n",
+ "
\n",
+ " \n",
+ " | AGE_ASSURE_PRINCIPAL | \n",
+ " 0.026613 | \n",
+ " 1.000000 | \n",
+ " 0.540899 | \n",
+ " -0.031655 | \n",
+ "
\n",
+ " \n",
+ " | ANCIENNETE_PERMIS | \n",
+ " 0.040797 | \n",
+ " 0.540899 | \n",
+ " 1.000000 | \n",
+ " 0.033320 | \n",
+ "
\n",
+ " \n",
+ " | ANNEE_CONSTRUCTION | \n",
+ " 0.387562 | \n",
+ " -0.031655 | \n",
+ " 0.033320 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ANNEE_CTR AGE_ASSURE_PRINCIPAL ANCIENNETE_PERMIS \\\n",
+ "ANNEE_CTR 1.000000 0.026613 0.040797 \n",
+ "AGE_ASSURE_PRINCIPAL 0.026613 1.000000 0.540899 \n",
+ "ANCIENNETE_PERMIS 0.040797 0.540899 1.000000 \n",
+ "ANNEE_CONSTRUCTION 0.387562 -0.031655 0.033320 \n",
+ "\n",
+ " ANNEE_CONSTRUCTION \n",
+ "ANNEE_CTR 0.387562 \n",
+ "AGE_ASSURE_PRINCIPAL -0.031655 \n",
+ "ANCIENNETE_PERMIS 0.033320 \n",
+ "ANNEE_CONSTRUCTION 1.000000 "
+ ]
},
+ "execution_count": 204,
"metadata": {},
- "output_type": "display_data"
+ "output_type": "execute_result"
}
],
"source": [
- "vars_numeriques.corr()\n",
- "fig = px.imshow(vars_numeriques.corr(),\n",
- " text_auto=True,\n",
- " aspect=\"auto\",\n",
- " color_continuous_scale='RdBu_r',\n",
- " title='Matrice de corrélation des variables numériques')\n",
- "fig.show()"
+ "# Corrélation de Pearson\n",
+ "correlations_num = vars_numeriques.corr(method=\"pearson\")\n",
+ "correlations_num"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 205,
+ "id": "6c3bd9b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# On repère les variables trop corrélées\n",
+ "nb_variables = correlations_num.shape[0]\n",
+ "for i in range(nb_variables):\n",
+ " for j in range(i + 1, nb_variables):\n",
+ " if abs(correlations_num.iloc[i, j]) > 0.7:\n",
+ " print(\n",
+ " correlations_num.index.to_numpy()[i]\n",
+ " + \" et \"\n",
+ " + correlations_num.columns[j]\n",
+ " + \" sont trop dépendantes, corr = \"\n",
+ " + str(correlations_num.iloc[i, j])\n",
+ " )\n"
]
},
{
@@ -2764,15 +2713,616 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 206,
"id": "b8530717",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE_(0,1]",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE_(1,2]",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE_(2,5]",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "CONTRAT_ANCIENNETE_(5,10]",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "FREQUENCE_PAIEMENT_COTISATION_MENSUEL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "FREQUENCE_PAIEMENT_COTISATION_TRIMESTRIEL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GROUPE_KM_[20000;40000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GROUPE_KM_[40000;60000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GROUPE_KM_[60000;99999[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_B",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_C",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_D",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_E",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_F",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_G",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_H",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_I",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_J",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_K",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_L",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_M",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ZONE_RISQUE_T",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "GENRE_M",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "DEUXIEME_CONDUCTEUR_True",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ENERGIE_DIESEL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ENERGIE_ESSENCE",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "EQUIPEMENT_SECURITE_VRAI",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[10000;15000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[15000;20000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[20000;25000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[25000;35000[",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VALEUR_DU_BIEN_[35000;99999[",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "a62943ce-0b7b-4ed1-9ec2-fe8c4868e843",
+ "rows": [
+ [
+ "0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0"
+ ],
+ [
+ "1",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0"
+ ],
+ [
+ "2",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0"
+ ],
+ [
+ "3",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0"
+ ],
+ [
+ "4",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "1.0",
+ "0.0"
+ ]
+ ],
+ "shape": {
+ "columns": 32,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CONTRAT_ANCIENNETE_(0,1] | \n",
+ " CONTRAT_ANCIENNETE_(1,2] | \n",
+ " CONTRAT_ANCIENNETE_(2,5] | \n",
+ " CONTRAT_ANCIENNETE_(5,10] | \n",
+ " FREQUENCE_PAIEMENT_COTISATION_MENSUEL | \n",
+ " FREQUENCE_PAIEMENT_COTISATION_TRIMESTRIEL | \n",
+ " GROUPE_KM_[20000;40000[ | \n",
+ " GROUPE_KM_[40000;60000[ | \n",
+ " GROUPE_KM_[60000;99999[ | \n",
+ " ZONE_RISQUE_B | \n",
+ " ... | \n",
+ " GENRE_M | \n",
+ " DEUXIEME_CONDUCTEUR_True | \n",
+ " ENERGIE_DIESEL | \n",
+ " ENERGIE_ESSENCE | \n",
+ " EQUIPEMENT_SECURITE_VRAI | \n",
+ " VALEUR_DU_BIEN_[10000;15000[ | \n",
+ " VALEUR_DU_BIEN_[15000;20000[ | \n",
+ " VALEUR_DU_BIEN_[20000;25000[ | \n",
+ " VALEUR_DU_BIEN_[25000;35000[ | \n",
+ " VALEUR_DU_BIEN_[35000;99999[ | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 32 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CONTRAT_ANCIENNETE_(0,1] CONTRAT_ANCIENNETE_(1,2] \\\n",
+ "0 1.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 0.0 1.0 \n",
+ "\n",
+ " CONTRAT_ANCIENNETE_(2,5] CONTRAT_ANCIENNETE_(5,10] \\\n",
+ "0 0.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 0.0 0.0 \n",
+ "\n",
+ " FREQUENCE_PAIEMENT_COTISATION_MENSUEL \\\n",
+ "0 1.0 \n",
+ "1 1.0 \n",
+ "2 1.0 \n",
+ "3 1.0 \n",
+ "4 1.0 \n",
+ "\n",
+ " FREQUENCE_PAIEMENT_COTISATION_TRIMESTRIEL GROUPE_KM_[20000;40000[ \\\n",
+ "0 0.0 0.0 \n",
+ "1 0.0 1.0 \n",
+ "2 0.0 1.0 \n",
+ "3 0.0 1.0 \n",
+ "4 0.0 1.0 \n",
+ "\n",
+ " GROUPE_KM_[40000;60000[ GROUPE_KM_[60000;99999[ ZONE_RISQUE_B ... \\\n",
+ "0 0.0 0.0 0.0 ... \n",
+ "1 0.0 0.0 0.0 ... \n",
+ "2 0.0 0.0 0.0 ... \n",
+ "3 0.0 0.0 1.0 ... \n",
+ "4 0.0 0.0 0.0 ... \n",
+ "\n",
+ " GENRE_M DEUXIEME_CONDUCTEUR_True ENERGIE_DIESEL ENERGIE_ESSENCE \\\n",
+ "0 1.0 0.0 0.0 1.0 \n",
+ "1 1.0 1.0 0.0 0.0 \n",
+ "2 1.0 0.0 0.0 1.0 \n",
+ "3 1.0 0.0 1.0 0.0 \n",
+ "4 1.0 0.0 0.0 1.0 \n",
+ "\n",
+ " EQUIPEMENT_SECURITE_VRAI VALEUR_DU_BIEN_[10000;15000[ \\\n",
+ "0 1.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 1.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 0.0 0.0 \n",
+ "\n",
+ " VALEUR_DU_BIEN_[15000;20000[ VALEUR_DU_BIEN_[20000;25000[ \\\n",
+ "0 1.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 1.0 0.0 \n",
+ "4 0.0 0.0 \n",
+ "\n",
+ " VALEUR_DU_BIEN_[25000;35000[ VALEUR_DU_BIEN_[35000;99999[ \n",
+ "0 0.0 0.0 \n",
+ "1 0.0 1.0 \n",
+ "2 0.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 1.0 0.0 \n",
+ "\n",
+ "[5 rows x 32 columns]"
+ ]
+ },
+ "execution_count": 206,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "encoder = preproc.OneHotEncoder()\n",
- "encoder.fit(vars_categorielles)\n",
- "vars_categorielles_enc = encoder.transform(vars_categorielles)\n",
- "vars_categorielles_enc = pd.DataFrame(vars_categorielles_enc.toarray(), columns=encoder.get_feature_names_out(vars_categorielles.columns))"
+ "# One hot encoding des variables catégorielles\n",
+ "preproc_ohe = preproc.OneHotEncoder(handle_unknown=\"ignore\")\n",
+ "preproc_ohe = preproc.OneHotEncoder(drop=\"first\", sparse_output=False).fit(\n",
+ " vars_categorielles\n",
+ ")\n",
+ "\n",
+ "variables_categorielles_ohe = preproc_ohe.transform(vars_categorielles)\n",
+ "variables_categorielles_ohe = pd.DataFrame(\n",
+ " variables_categorielles_ohe,\n",
+ " columns=preproc_ohe.get_feature_names_out(vars_categorielles.columns),\n",
+ ")\n",
+ "variables_categorielles_ohe.head()"
]
},
{
@@ -2785,15 +3335,172 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 207,
"id": "4ff3847d",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "ANNEE_CTR",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "AGE_ASSURE_PRINCIPAL",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ANCIENNETE_PERMIS",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ANNEE_CONSTRUCTION",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "012814d2-2bb4-463c-b907-53ba71631da2",
+ "rows": [
+ [
+ "0",
+ "0.40615626262983295",
+ "-0.31764836563527515",
+ "0.067767057718506",
+ "0.5653698304986595"
+ ],
+ [
+ "1",
+ "1.06626032654885",
+ "-1.2596885906311412",
+ "-1.1719751563806404",
+ "0.8816391722032739"
+ ],
+ [
+ "2",
+ "0.40615626262983295",
+ "-1.839405652167059",
+ "-1.740190337842749",
+ "0.5653698304986595"
+ ],
+ [
+ "3",
+ "0.40615626262983295",
+ "-0.31764836563527515",
+ "0.48101446241822143",
+ "0.8816391722032739"
+ ],
+ [
+ "4",
+ "-0.25394780128918387",
+ "-1.7669410194750692",
+ "-1.2752870075555691",
+ "-0.38343819461518397"
+ ]
+ ],
+ "shape": {
+ "columns": 4,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ANNEE_CTR | \n",
+ " AGE_ASSURE_PRINCIPAL | \n",
+ " ANCIENNETE_PERMIS | \n",
+ " ANNEE_CONSTRUCTION | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.406156 | \n",
+ " -0.317648 | \n",
+ " 0.067767 | \n",
+ " 0.565370 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1.066260 | \n",
+ " -1.259689 | \n",
+ " -1.171975 | \n",
+ " 0.881639 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.406156 | \n",
+ " -1.839406 | \n",
+ " -1.740190 | \n",
+ " 0.565370 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.406156 | \n",
+ " -0.317648 | \n",
+ " 0.481014 | \n",
+ " 0.881639 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -0.253948 | \n",
+ " -1.766941 | \n",
+ " -1.275287 | \n",
+ " -0.383438 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ANNEE_CTR AGE_ASSURE_PRINCIPAL ANCIENNETE_PERMIS ANNEE_CONSTRUCTION\n",
+ "0 0.406156 -0.317648 0.067767 0.565370\n",
+ "1 1.066260 -1.259689 -1.171975 0.881639\n",
+ "2 0.406156 -1.839406 -1.740190 0.565370\n",
+ "3 0.406156 -0.317648 0.481014 0.881639\n",
+ "4 -0.253948 -1.766941 -1.275287 -0.383438"
+ ]
+ },
+ "execution_count": 207,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "scaler = preproc.StandardScaler()\n",
- "scaler.fit(vars_numeriques)\n",
- "vars_numeriques_scaled = scaler.transform(vars_numeriques)\n",
- "vars_numeriques_scaled = pd.DataFrame(vars_numeriques_scaled, columns=vars_numeriques.columns)"
+ "# Normalisation des varibales numériques\n",
+ "preproc_scale = preproc.StandardScaler(with_mean=True, with_std=True)\n",
+ "preproc_scale.fit(vars_numeriques)\n",
+ "\n",
+ "vars_numeriques_scaled = preproc_scale.transform(vars_numeriques)\n",
+ "vars_numeriques_scaled = pd.DataFrame(\n",
+ " vars_numeriques_scaled, columns=vars_numeriques.columns\n",
+ ")\n",
+ "vars_numeriques_scaled.head()\n"
]
},
{
@@ -2814,14 +3521,28 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 208,
"id": "6a1c7907",
"metadata": {},
"outputs": [],
"source": [
- "X = data_model_preprocessed = vars_numeriques_scaled.merge(vars_categorielles_enc, left_index=True, right_index=True)\n",
+ "X_global = vars_numeriques_scaled.merge(\n",
+ " variables_categorielles_ohe, left_index=True, right_index=True\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 209,
+ "id": "58a14153",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Réorganisation des données\n",
+ "X = X_global.to_numpy()\n",
"Y = data_model[\"CM\"]\n",
"\n",
+ "# Sampling en 80% train et 20% test\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, Y, test_size=0.2, random_state=42\n",
")"
@@ -2845,442 +3566,16 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 210,
"id": "053e013c",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
- ],
- "text/plain": [
- "DecisionTreeRegressor()"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "tree = DecisionTreeRegressor()\n",
- "tree.fit(X_train, y_train)"
+ "# Initialisation de l'objet\n",
+ "model_CART = DecisionTreeRegressor()\n",
+ "\n",
+ "# Train Decision Tree Classifer\n",
+ "model_CART = model_CART.fit(X_train, y_train)"
]
},
{
@@ -3293,7 +3588,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 211,
"id": "c4ca2cf9",
"metadata": {},
"outputs": [
@@ -3309,7 +3604,7 @@
],
"source": [
"# Prédictions sur l'ensemble d'entraînement\n",
- "y_pred_train = tree.predict(X_train)\n",
+ "y_pred_train = model_CART.predict(X_train)\n",
"\n",
"mae = metrics.mean_absolute_error(y_train, y_pred_train)\n",
"mse = metrics.mean_squared_error(y_train, y_pred_train)\n",
@@ -3322,7 +3617,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 212,
"id": "4b739d5b",
"metadata": {},
"outputs": [
@@ -3330,14 +3625,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "MAE: 5969.32\n",
- "MSE: 161922043.77\n",
- "RMSE: 12724.86\n"
+ "MAE: 5186.37\n",
+ "MSE: 94029342.74\n",
+ "RMSE: 9696.87\n"
]
}
],
"source": [
- "y_pred_test = tree.predict(X_test)\n",
+ "y_pred_test = model_CART.predict(X_test)\n",
"\n",
"mae = metrics.mean_absolute_error(y_test, y_pred_test)\n",
"mse = metrics.mean_squared_error(y_test, y_pred_test)\n",
@@ -3408,17 +3703,18 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 213,
"id": "ab1e1367",
"metadata": {},
"outputs": [],
"source": [
- "X = data_model_preprocessed\n",
- "Y = data_model[\"CM\"]\n",
+ "X_global = vars_numeriques_scaled.merge(\n",
+ " variables_categorielles_ohe, left_index=True, right_index=True\n",
+ ")\n",
"\n",
- "X_train, X_test, y_train, y_test = train_test_split(\n",
- " X, Y, test_size=0.2, random_state=42\n",
- ")"
+ "# Réorganisation des données\n",
+ "X = X_global.to_numpy()\n",
+ "Y = np.array(data_model[\"CM\"])"
]
},
{
@@ -3439,7 +3735,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 214,
"id": "b515460e",
"metadata": {},
"outputs": [],
@@ -3462,41 +3758,35 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 215,
"id": "eebb394f",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Validation croisée terminée avec 5 folds\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Entrainement avec cross-validation\n",
- "for train_index, val_index in kf.split(X_train):\n",
- " X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]\n",
- " y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]\n",
+ "for train_index, test_index in kf.split(X):\n",
+ " X_train, X_test = X[train_index], X[test_index]\n",
+ " y_train, y_test = Y[train_index], Y[test_index]\n",
"\n",
- " rf_regressor.fit(X_train_fold, y_train_fold)\n",
- " y_pred_fold = rf_regressor.predict(X_val_fold)\n",
+ " # Fitting\n",
+ " rf_regressor.fit(X_train, y_train)\n",
"\n",
- " mae = metrics.mean_absolute_error(y_val_fold, y_pred_fold)\n",
- " mse = metrics.mean_squared_error(y_val_fold, y_pred_fold)\n",
- " rmse = metrics.root_mean_squared_error(y_val_fold, y_pred_fold)\n",
+ " # Evaluation du modèle\n",
+ " y_pred_test = rf_regressor.predict(X_test)\n",
"\n",
- " MAE_scores.append(mae)\n",
- " MSE_scores.append(mse)\n",
- " RMSE_scores.append(rmse)\n",
+ " MAE = metrics.mean_absolute_error(y_test, y_pred_test)\n",
+ " MSE = metrics.mean_squared_error(y_test, y_pred_test)\n",
+ " RMSE = metrics.root_mean_squared_error(y_test, y_pred_test)\n",
"\n",
- "print(f\"Validation croisée terminée avec {len(MAE_scores)} folds\")"
+ " # Concaténation des résultats\n",
+ " MAE_scores.append(MAE)\n",
+ " MSE_scores.append(MSE)\n",
+ " RMSE_scores.append(RMSE)\n"
]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 216,
"id": "b067126c",
"metadata": {},
"outputs": [
@@ -3504,11 +3794,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Fold 1 MAE: 4472.5486946969695\n",
- "Fold 2 MAE: 3859.4743234848484\n",
- "Fold 3 MAE: 3633.0231541666662\n",
- "Fold 4 MAE: 3888.3910715909087\n",
- "Fold 5 MAE: 4808.59621832061\n"
+ "Fold 1 MAE: 4007.8326951515155\n",
+ "Fold 2 MAE: 3651.8632978787878\n",
+ "Fold 3 MAE: 4718.226707878788\n",
+ "Fold 4 MAE: 4031.310562727273\n",
+ "Fold 5 MAE: 4410.05992957317\n"
]
}
],
@@ -3522,7 +3812,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 217,
"id": "6597152c",
"metadata": {},
"outputs": [
@@ -3530,11 +3820,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Fold 1 MSE: 85464414.44080053\n",
- "Fold 2 MSE: 34396997.21755034\n",
- "Fold 3 MSE: 55184512.50786593\n",
- "Fold 4 MSE: 33191300.80751679\n",
- "Fold 5 MSE: 68739370.63588645\n"
+ "Fold 1 MSE: 32761893.668576293\n",
+ "Fold 2 MSE: 50894497.0512714\n",
+ "Fold 3 MSE: 106861487.03512044\n",
+ "Fold 4 MSE: 35487273.569623545\n",
+ "Fold 5 MSE: 54729524.04672807\n"
]
}
],
@@ -3546,7 +3836,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 218,
"id": "63ff1c9d",
"metadata": {},
"outputs": [
@@ -3554,11 +3844,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Fold 1 RMSE: 9244.696557529649\n",
- "Fold 2 RMSE: 5864.895328780415\n",
- "Fold 3 RMSE: 7428.62790210049\n",
- "Fold 4 RMSE: 5761.189183451346\n",
- "Fold 5 RMSE: 8290.9209763383\n"
+ "Fold 1 RMSE: 5723.8006314490285\n",
+ "Fold 2 RMSE: 7134.037920509772\n",
+ "Fold 3 RMSE: 10337.38298773536\n",
+ "Fold 4 RMSE: 5957.119569861222\n",
+ "Fold 5 RMSE: 7397.940527385177\n"
]
}
],
@@ -3594,17 +3884,17 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 219,
"id": "d9342ad6",
"metadata": {},
"outputs": [],
"source": [
- "X = data_model_preprocessed\n",
- "Y = data_model[\"CM\"]\n",
- "\n",
- "X_train, X_test, y_train, y_test = train_test_split(\n",
- " X, Y, test_size=0.2, random_state=42\n",
- ")"
+ "X_global = vars_numeriques_scaled.merge(\n",
+ " variables_categorielles_ohe, left_index=True, right_index=True\n",
+ ")\n",
+ "# Réorganisation des données\n",
+ "X = X_global.to_numpy()\n",
+ "Y = np.array(data_model[\"CM\"])"
]
},
{
@@ -3625,11 +3915,12 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 220,
"id": "6d58dbc2",
"metadata": {},
"outputs": [],
"source": [
+ "# Initialisation\n",
"# Nombre de sous-échantillons pour la cross-validation\n",
"num_splits = 5\n",
"\n",
@@ -3642,9 +3933,9 @@
"RMSE_scores = []\n",
"\n",
"# Hyperparamètres à tester\n",
- "n_estimators_values = [50, 100, 200, 300]\n",
- "max_depth_values = [2, 5, 10]\n",
- "min_samples_split_values = [2, 5, 10]\n",
+ "n_estimators_values = [60, 65, 70, 75]\n",
+ "max_depth_values = [None, 1, 2, 3]\n",
+ "min_samples_split_values = [5, 8, 10, 11, 13, 14, 15]\n",
"\n",
"# Liste pour sauveagrder les meilleurs résultats\n",
"best_score = np.inf\n",
@@ -3657,58 +3948,71 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 221,
"id": "47da5172",
"metadata": {},
"outputs": [],
"source": [
+ "# grid search à la main\n",
"for n_estimators in n_estimators_values:\n",
" for max_depth in max_depth_values:\n",
" for min_samples_split in min_samples_split_values:\n",
- " rf_regressor = RandomForestRegressor(\n",
- " n_estimators=n_estimators,\n",
- " max_depth=max_depth,\n",
- " min_samples_split=min_samples_split,\n",
- " random_state=42\n",
- " )\n",
+ " # Réinitialisation des résultats\n",
" MAE_scores = []\n",
" MSE_scores = []\n",
" RMSE_scores = []\n",
"\n",
- " for train_index, val_index in kf.split(X_train):\n",
- " X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]\n",
- " y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]\n",
+ " # Boucle de Cross-Validation\n",
+ " for train_index, test_index in kf.split(X):\n",
+ " X_train, X_test = X[train_index], X[test_index]\n",
+ " y_train, y_test = Y[train_index], Y[test_index]\n",
"\n",
- " rf_regressor.fit(X_train_fold, y_train_fold)\n",
- " y_pred_fold = rf_regressor.predict(X_val_fold)\n",
+ " # Modèle avec hyperparamètres actuels\n",
+ " rf_regressor = RandomForestRegressor(\n",
+ " n_estimators = n_estimators,\n",
+ " max_depth = max_depth,\n",
+ " min_samples_split = min_samples_split,\n",
+ " random_state = 42,\n",
+ " )\n",
"\n",
- " mae = metrics.mean_absolute_error(y_val_fold, y_pred_fold)\n",
- " mse = metrics.mean_squared_error(y_val_fold, y_pred_fold)\n",
- " rmse = metrics.root_mean_squared_error(y_val_fold, y_pred_fold)\n",
+ " rf_regressor.fit(X_train, y_train)\n",
"\n",
- " MAE_scores.append(mae)\n",
- " MSE_scores.append(mse)\n",
- " RMSE_scores.append(rmse)\n",
+ " # Evaluation du modèle\n",
+ " y_pred_test = rf_regressor.predict(X_test)\n",
"\n",
- " avg_mae = np.mean(MAE_scores)\n",
- " avg_mse = np.mean(MSE_scores)\n",
- " avg_rmse = np.mean(RMSE_scores)\n",
+ " MAE = metrics.mean_absolute_error(y_test, y_pred_test)\n",
+ " MSE = metrics.mean_squared_error(y_test, y_pred_test)\n",
+ " RMSE = metrics.root_mean_squared_error(y_test, y_pred_test)\n",
"\n",
- " if avg_rmse < best_score:\n",
- " best_score = avg_rmse\n",
+ " # Concaténation des résultats\n",
+ " MAE_scores.append(MAE)\n",
+ " MSE_scores.append(MSE)\n",
+ " RMSE_scores.append(RMSE)\n",
+ "\n",
+ " # Calcul du meilleur score pour le jeu de paramètres\n",
+ " min_rmse = np.min(RMSE_scores)\n",
+ "\n",
+ " # Mise à jour du meilleur score si besoin\n",
+ " if min_rmse < best_score:\n",
+ " best_score = min_rmse\n",
" best_params = {\n",
- " 'n_estimators': n_estimators,\n",
- " 'max_depth': max_depth,\n",
- " 'min_samples_split': min_samples_split\n",
+ " \"n_estimators\": n_estimators,\n",
+ " \"max_depth\": max_depth,\n",
+ " \"min_samples_split\": min_samples_split,\n",
" }\n",
+ "\n",
+ " # Sauvegarde des scores pour analyse\n",
" MAE_best_score = MAE_scores\n",
" MSE_best_score = MSE_scores\n",
- " RMSE_best_score = RMSE_scores"
+ " RMSE_best_score = RMSE_scores\n",
+ "\n",
+ " # Sauvegarde du modèle pour l'utiliser directement\n",
+ " best_model_regressor = rf_regressor\n"
]
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 222,
"id": "d4936c46",
"metadata": {},
"outputs": [
@@ -3716,8 +4020,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Meilleurs paramètres: {}\n",
- "Meilleure RMSE : inf\n"
+ "Meilleurs paramètres: {'n_estimators': 65, 'max_depth': 1, 'min_samples_split': 5}\n",
+ "Meilleure RMSE : 4548.156488811854\n"
]
}
],
@@ -3729,10 +4033,22 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 223,
"id": "3215c463",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fold 1 RMSE: 5168.96443207593\n",
+ "Fold 2 RMSE: 6779.919772901815\n",
+ "Fold 3 RMSE: 10081.628056733409\n",
+ "Fold 4 RMSE: 4548.156488811854\n",
+ "Fold 5 RMSE: 6713.822743503048\n"
+ ]
+ }
+ ],
"source": [
"# Métriques sur tous les folds\n",
"\n",
@@ -3743,10 +4059,22 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 224,
"id": "bb9a5c9b",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fold 1 MSE: 26718193.300066035\n",
+ "Fold 2 MSE: 45967312.126985006\n",
+ "Fold 3 MSE: 101639224.27431424\n",
+ "Fold 4 MSE: 20685727.446721368\n",
+ "Fold 5 MSE: 45075415.831178784\n"
+ ]
+ }
+ ],
"source": [
"#MAE\n",
"for fold, mse in enumerate(MSE_best_score, start=1):\n",
@@ -3755,10 +4083,22 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 225,
"id": "0f0768ad",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fold 1 MAE: 3516.8014139306597\n",
+ "Fold 2 MAE: 3209.253810522964\n",
+ "Fold 3 MAE: 4545.1440942571835\n",
+ "Fold 4 MAE: 3088.226098509521\n",
+ "Fold 5 MAE: 3576.4647056529234\n"
+ ]
+ }
+ ],
"source": [
"#MSE\n",
"for fold, mae in enumerate(MAE_best_score, start=1):\n",
@@ -3772,6 +4112,220 @@
"source": [
"**Question :** Commentez les résultats"
]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd1e91ee",
+ "metadata": {},
+ "source": [
+ "### Implémentation avec les librairies existantes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 226,
+ "id": "4b8cc48d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from sklearn.ensemble import RandomForestRegressor\n",
+ "from sklearn.model_selection import GridSearchCV, KFold"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 227,
+ "id": "f0e5d591",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Sampling en 80% train et 20% test\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 228,
+ "id": "71177a63",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Supposons que vous ayez des données d'entraînement X_train et y_train\n",
+ "\n",
+ "# Définir la grille d'hyperparamètres à rechercher\n",
+ "param_grid = {\n",
+ " \"n_estimators\": [60, 65, 70, 75],\n",
+ " \"max_depth\": [None, 1, 2, 3],\n",
+ " \"min_samples_split\": [5, 8, 10, 11, 13, 14, 15],\n",
+ "}\n",
+ "# Nombre de folds pour la validation croisée\n",
+ "num_folds = 5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 229,
+ "id": "e463b9d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Meilleurs hyperparamètres : {'max_depth': 1, 'min_samples_split': 5, 'n_estimators': 60}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Initialisation du modèle RandomForestRegressor\n",
+ "rf = RandomForestRegressor(random_state=42)\n",
+ "\n",
+ "# Création de l'objet GridSearchCV pour la recherche sur grille avec validation croisée\n",
+ "grid_search = GridSearchCV(\n",
+ " estimator = rf,\n",
+ " param_grid = param_grid,\n",
+ " cv = KFold(\n",
+ " n_splits = num_folds, shuffle = True, random_state = 42\n",
+ " ), # Validation croisée avec 5 folds\n",
+ " scoring = \"neg_mean_squared_error\", # Métrique d'évaluation (moins c'est mieux)\n",
+ " n_jobs = -1, # Utiliser tous les cœurs du processeur\n",
+ ")\n",
+ "\n",
+ "# Exécution de la recherche sur grille\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "\n",
+ "# Afficher les meilleurs hyperparamètres\n",
+ "best_params = grid_search.best_params_\n",
+ "print(\"Meilleurs hyperparamètres : \", best_params)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 230,
+ "id": "d1b84e91",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialiser le modèle final avec les meilleurs hyperparamètres\n",
+ "best_rf = RandomForestRegressor(random_state = 42, **best_params)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 231,
+ "id": "c46d32a7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE pour le fold 1: -8836.353449486982\n",
+ "RMSE pour le fold 2: -5242.128416843558\n",
+ "RMSE pour le fold 3: -7205.432382938018\n",
+ "RMSE pour le fold 4: -4902.177844748944\n",
+ "RMSE pour le fold 5: -7707.687751500834\n",
+ "\n",
+ "\n",
+ "MSE pour le fold 1: -78081142.28426048\n",
+ "MSE pour le fold 2: -27479910.338678744\n",
+ "MSE pour le fold 3: -51918255.825091854\n",
+ "MSE pour le fold 4: -24031347.6215474\n",
+ "MSE pour le fold 5: -59408450.47463598\n",
+ "\n",
+ "\n",
+ "MAE pour le fold 1: -4047.520107345083\n",
+ "MAE pour le fold 2: -3389.6166968886077\n",
+ "MAE pour le fold 3: -3373.620497619359\n",
+ "MAE pour le fold 4: -3186.2100657449696\n",
+ "MAE pour le fold 5: -4145.078817961569\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Cross validation\n",
+ "# RMSE de chaque fold\n",
+ "rmse_scores = cross_val_score(\n",
+ " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_root_mean_squared_error\"\n",
+ ")\n",
+ "\n",
+ "# Afficher les scores pour chaque fold\n",
+ "for i, score in enumerate(rmse_scores):\n",
+ " print(f\"RMSE pour le fold {i + 1}: {score}\")\n",
+ "\n",
+ "# MSE de chaque fold\n",
+ "mse_scores = cross_val_score(\n",
+ " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_mean_squared_error\"\n",
+ ")\n",
+ "\n",
+ "# Afficher les scores pour chaque fold\n",
+ "print(\"\\n\")\n",
+ "for i, score in enumerate(mse_scores):\n",
+ " print(f\"MSE pour le fold {i + 1}: {score}\")\n",
+ "\n",
+ "# MAE de chaque fold\n",
+ "mae_scores = cross_val_score(\n",
+ " best_rf, X_train, y_train, cv=num_folds, scoring=\"neg_mean_absolute_error\"\n",
+ ")\n",
+ "\n",
+ "# Afficher les scores pour chaque fold\n",
+ "print(\"\\n\")\n",
+ "for i, score in enumerate(mae_scores):\n",
+ " print(f\"MAE pour le fold {i + 1}: {score}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 232,
+ "id": "3ba2274c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Entraîner le modèle final sur toute la base\n",
+ "best_rf.fit(X_train, y_train)\n",
+ "\n",
+ "# Faire des prédictions sur l'ensemble de test\n",
+ "y_pred = best_rf.predict(X_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 233,
+ "id": "ec717a0c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE : 6792.775060864194\n",
+ "MSE : 46141793.02749855\n",
+ "MAE : 3387.6746891178996\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Calculer la métrique de performance (dans ce cas, RMSE)\n",
+ "rmse = metrics.root_mean_squared_error(y_test, y_pred)\n",
+ "print(f\"RMSE : {rmse}\")\n",
+ "\n",
+ "# Calculer la métrique de performance (dans ce cas, MSE)\n",
+ "mse = metrics.mean_squared_error(y_test, y_pred)\n",
+ "print(f\"MSE : {mse}\")\n",
+ "\n",
+ "# Calculer la métrique de performance (dans ce cas, MAE)\n",
+ "mae = metrics.mean_absolute_error(y_test, y_pred)\n",
+ "print(f\"MAE : {mae}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "001baf7d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {