mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 13:54:06 +01:00
Add Jupyter notebook for supervised machine learning algorithms and update dependencies
- Created a new Jupyter notebook: 2025_M2_ISF_TP_4.ipynb for supervised machine learning exercises, including data preparation, model building, and performance analysis. - Added 'imblearn' as a dependency in pyproject.toml to support handling imbalanced datasets. - Updated uv.lock to include the 'imbalanced-learn' package and its dependencies.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -46,7 +46,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"execution_count": 1,
|
||||
"id": "97d58527",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -100,7 +100,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 2,
|
||||
"id": "c9597b48",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -119,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 3,
|
||||
"id": "8051b5f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -163,7 +163,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 4,
|
||||
"id": "c427a4b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -173,7 +173,7 @@
|
||||
"(824, 14)"
|
||||
]
|
||||
},
|
||||
"execution_count": 59,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -200,7 +200,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"execution_count": 5,
|
||||
"id": "c8fd3ee1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -284,7 +284,7 @@
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"ref": "8fcd0abc-8334-4a0d-96b7-b6d7e17b3fb7",
|
||||
"ref": "e29190e7-d62c-4ab7-ab0a-43dd875c8b98",
|
||||
"rows": [
|
||||
[
|
||||
"count",
|
||||
@@ -760,7 +760,7 @@
|
||||
"max NaN 83421.850000 "
|
||||
]
|
||||
},
|
||||
"execution_count": 60,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -795,7 +795,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 6,
|
||||
"id": "1b156435",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -805,7 +805,7 @@
|
||||
"(824, 13)"
|
||||
]
|
||||
},
|
||||
"execution_count": 61,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -817,7 +817,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 7,
|
||||
"id": "0ef0fcc0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -853,7 +853,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"execution_count": 8,
|
||||
"id": "e130aae5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -863,7 +863,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"id": "c39e2ad0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1811,7 +1811,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 10,
|
||||
"id": "a16215ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -1821,7 +1821,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 11,
|
||||
"id": "532ca6c4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -2764,7 +2764,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 12,
|
||||
"id": "b8530717",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -2785,7 +2785,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 13,
|
||||
"id": "4ff3847d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -2814,7 +2814,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 14,
|
||||
"id": "6a1c7907",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -2845,14 +2845,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 15,
|
||||
"id": "053e013c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<style>#sk-container-id-3 {\n",
|
||||
"<style>#sk-container-id-1 {\n",
|
||||
" /* Definition of color scheme common for light and dark mode */\n",
|
||||
" --sklearn-color-text: #000;\n",
|
||||
" --sklearn-color-text-muted: #666;\n",
|
||||
@@ -2883,15 +2883,15 @@
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 {\n",
|
||||
"#sk-container-id-1 {\n",
|
||||
" color: var(--sklearn-color-text);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 pre {\n",
|
||||
"#sk-container-id-1 pre {\n",
|
||||
" padding: 0;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 input.sk-hidden--visually {\n",
|
||||
"#sk-container-id-1 input.sk-hidden--visually {\n",
|
||||
" border: 0;\n",
|
||||
" clip: rect(1px 1px 1px 1px);\n",
|
||||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||||
@@ -2903,7 +2903,7 @@
|
||||
" width: 1px;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-dashed-wrapped {\n",
|
||||
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
|
||||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||||
" box-sizing: border-box;\n",
|
||||
@@ -2911,7 +2911,7 @@
|
||||
" background-color: var(--sklearn-color-background);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-container {\n",
|
||||
"#sk-container-id-1 div.sk-container {\n",
|
||||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||||
" so we also need the `!important` here to be able to override the\n",
|
||||
@@ -2921,7 +2921,7 @@
|
||||
" position: relative;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-text-repr-fallback {\n",
|
||||
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
|
||||
" display: none;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
@@ -2937,14 +2937,14 @@
|
||||
"\n",
|
||||
"/* Parallel-specific style estimator block */\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-parallel-item::after {\n",
|
||||
"#sk-container-id-1 div.sk-parallel-item::after {\n",
|
||||
" content: \"\";\n",
|
||||
" width: 100%;\n",
|
||||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||||
" flex-grow: 1;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-parallel {\n",
|
||||
"#sk-container-id-1 div.sk-parallel {\n",
|
||||
" display: flex;\n",
|
||||
" align-items: stretch;\n",
|
||||
" justify-content: center;\n",
|
||||
@@ -2952,28 +2952,28 @@
|
||||
" position: relative;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-parallel-item {\n",
|
||||
"#sk-container-id-1 div.sk-parallel-item {\n",
|
||||
" display: flex;\n",
|
||||
" flex-direction: column;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-parallel-item:first-child::after {\n",
|
||||
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
|
||||
" align-self: flex-end;\n",
|
||||
" width: 50%;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-parallel-item:last-child::after {\n",
|
||||
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
|
||||
" align-self: flex-start;\n",
|
||||
" width: 50%;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-parallel-item:only-child::after {\n",
|
||||
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
|
||||
" width: 0;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* Serial-specific style estimator block */\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-serial {\n",
|
||||
"#sk-container-id-1 div.sk-serial {\n",
|
||||
" display: flex;\n",
|
||||
" flex-direction: column;\n",
|
||||
" align-items: center;\n",
|
||||
@@ -2991,14 +2991,14 @@
|
||||
"\n",
|
||||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-toggleable {\n",
|
||||
"#sk-container-id-1 div.sk-toggleable {\n",
|
||||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||||
" background-color: var(--sklearn-color-background);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* Toggleable label */\n",
|
||||
"#sk-container-id-3 label.sk-toggleable__label {\n",
|
||||
"#sk-container-id-1 label.sk-toggleable__label {\n",
|
||||
" cursor: pointer;\n",
|
||||
" display: flex;\n",
|
||||
" width: 100%;\n",
|
||||
@@ -3011,13 +3011,13 @@
|
||||
" gap: 0.5em;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 label.sk-toggleable__label .caption {\n",
|
||||
"#sk-container-id-1 label.sk-toggleable__label .caption {\n",
|
||||
" font-size: 0.6rem;\n",
|
||||
" font-weight: lighter;\n",
|
||||
" color: var(--sklearn-color-text-muted);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 label.sk-toggleable__label-arrow:before {\n",
|
||||
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
|
||||
" /* Arrow on the left of the label */\n",
|
||||
" content: \"▸\";\n",
|
||||
" float: left;\n",
|
||||
@@ -3025,13 +3025,13 @@
|
||||
" color: var(--sklearn-color-icon);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {\n",
|
||||
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
|
||||
" color: var(--sklearn-color-text);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* Toggleable content - dropdown */\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-toggleable__content {\n",
|
||||
"#sk-container-id-1 div.sk-toggleable__content {\n",
|
||||
" max-height: 0;\n",
|
||||
" max-width: 0;\n",
|
||||
" overflow: hidden;\n",
|
||||
@@ -3040,12 +3040,12 @@
|
||||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-toggleable__content.fitted {\n",
|
||||
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
|
||||
" /* fitted */\n",
|
||||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-toggleable__content pre {\n",
|
||||
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
|
||||
" margin: 0.2em;\n",
|
||||
" border-radius: 0.25em;\n",
|
||||
" color: var(--sklearn-color-text);\n",
|
||||
@@ -3053,79 +3053,79 @@
|
||||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-toggleable__content.fitted pre {\n",
|
||||
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
|
||||
" /* unfitted */\n",
|
||||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||||
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||||
" /* Expand drop-down */\n",
|
||||
" max-height: 200px;\n",
|
||||
" max-width: 100%;\n",
|
||||
" overflow: auto;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||||
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||||
" content: \"▾\";\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||
" color: var(--sklearn-color-text);\n",
|
||||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* Estimator-specific style */\n",
|
||||
"\n",
|
||||
"/* Colorize estimator box */\n",
|
||||
"#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||
" /* unfitted */\n",
|
||||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||||
" /* fitted */\n",
|
||||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-label label.sk-toggleable__label,\n",
|
||||
"#sk-container-id-3 div.sk-label label {\n",
|
||||
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
|
||||
"#sk-container-id-1 div.sk-label label {\n",
|
||||
" /* The background is the default theme color */\n",
|
||||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* On hover, darken the color of the background */\n",
|
||||
"#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {\n",
|
||||
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
|
||||
" color: var(--sklearn-color-text);\n",
|
||||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* Label box, darken color on hover, fitted */\n",
|
||||
"#sk-container-id-3 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||||
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||||
" color: var(--sklearn-color-text);\n",
|
||||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* Estimator label */\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-label label {\n",
|
||||
"#sk-container-id-1 div.sk-label label {\n",
|
||||
" font-family: monospace;\n",
|
||||
" font-weight: bold;\n",
|
||||
" display: inline-block;\n",
|
||||
" line-height: 1.2em;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-label-container {\n",
|
||||
"#sk-container-id-1 div.sk-label-container {\n",
|
||||
" text-align: center;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* Estimator-specific */\n",
|
||||
"#sk-container-id-3 div.sk-estimator {\n",
|
||||
"#sk-container-id-1 div.sk-estimator {\n",
|
||||
" font-family: monospace;\n",
|
||||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||||
" border-radius: 0.25em;\n",
|
||||
@@ -3135,18 +3135,18 @@
|
||||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-estimator.fitted {\n",
|
||||
"#sk-container-id-1 div.sk-estimator.fitted {\n",
|
||||
" /* fitted */\n",
|
||||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* on hover */\n",
|
||||
"#sk-container-id-3 div.sk-estimator:hover {\n",
|
||||
"#sk-container-id-1 div.sk-estimator:hover {\n",
|
||||
" /* unfitted */\n",
|
||||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 div.sk-estimator.fitted:hover {\n",
|
||||
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
|
||||
" /* fitted */\n",
|
||||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||||
"}\n",
|
||||
@@ -3234,7 +3234,7 @@
|
||||
"\n",
|
||||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 a.estimator_doc_link {\n",
|
||||
"#sk-container-id-1 a.estimator_doc_link {\n",
|
||||
" float: right;\n",
|
||||
" font-size: 1rem;\n",
|
||||
" line-height: 1em;\n",
|
||||
@@ -3249,31 +3249,31 @@
|
||||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 a.estimator_doc_link.fitted {\n",
|
||||
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
|
||||
" /* fitted */\n",
|
||||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"/* On hover */\n",
|
||||
"#sk-container-id-3 a.estimator_doc_link:hover {\n",
|
||||
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
|
||||
" /* unfitted */\n",
|
||||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||||
" color: var(--sklearn-color-background);\n",
|
||||
" text-decoration: none;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#sk-container-id-3 a.estimator_doc_link.fitted:hover {\n",
|
||||
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
|
||||
" /* fitted */\n",
|
||||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||||
"}\n",
|
||||
"</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeRegressor()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" checked><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>DecisionTreeRegressor</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.tree.DecisionTreeRegressor.html\">?<span>Documentation for DecisionTreeRegressor</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeRegressor()</pre></div> </div></div></div></div>"
|
||||
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeRegressor()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>DecisionTreeRegressor</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.tree.DecisionTreeRegressor.html\">?<span>Documentation for DecisionTreeRegressor</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeRegressor()</pre></div> </div></div></div></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"DecisionTreeRegressor()"
|
||||
]
|
||||
},
|
||||
"execution_count": 70,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -3293,7 +3293,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"execution_count": 16,
|
||||
"id": "c4ca2cf9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -3322,7 +3322,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 17,
|
||||
"id": "4b739d5b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -3330,9 +3330,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAE: 5124.14\n",
|
||||
"MSE: 84535204.52\n",
|
||||
"RMSE: 9194.30\n"
|
||||
"MAE: 5969.32\n",
|
||||
"MSE: 161922043.77\n",
|
||||
"RMSE: 12724.86\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -3408,7 +3408,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 18,
|
||||
"id": "ab1e1367",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -3439,7 +3439,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 19,
|
||||
"id": "b515460e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -3462,7 +3462,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 20,
|
||||
"id": "eebb394f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -3496,7 +3496,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 21,
|
||||
"id": "b067126c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -3522,7 +3522,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": 22,
|
||||
"id": "6597152c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -3546,7 +3546,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 23,
|
||||
"id": "63ff1c9d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -3594,7 +3594,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 24,
|
||||
"id": "d9342ad6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -3625,12 +3625,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 36,
|
||||
"id": "6d58dbc2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Initialisation\n",
|
||||
"# Nombre de sous-échantillons pour la cross-validation\n",
|
||||
"num_splits = 5\n",
|
||||
"\n",
|
||||
@@ -3643,9 +3642,9 @@
|
||||
"RMSE_scores = []\n",
|
||||
"\n",
|
||||
"# Hyperparamètres à tester\n",
|
||||
"n_estimators_values = []\n",
|
||||
"max_depth_values = []\n",
|
||||
"min_samples_split_values = []\n",
|
||||
"n_estimators_values = [50, 100, 200, 300]\n",
|
||||
"max_depth_values = [2, 5, 10]\n",
|
||||
"min_samples_split_values = [2, 5, 10]\n",
|
||||
"\n",
|
||||
"# Liste pour sauveagrder les meilleurs résultats\n",
|
||||
"best_score = np.inf\n",
|
||||
@@ -3658,12 +3657,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"execution_count": 37,
|
||||
"id": "47da5172",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Complétez ici avec votre code\n",
|
||||
"for n_estimators in n_estimators_values:\n",
|
||||
" for max_depth in max_depth_values:\n",
|
||||
" for min_samples_split in min_samples_split_values:\n",
|
||||
@@ -3710,7 +3708,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"execution_count": 33,
|
||||
"id": "d4936c46",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -3731,7 +3729,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"execution_count": 34,
|
||||
"id": "3215c463",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -3745,7 +3743,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"execution_count": 29,
|
||||
"id": "bb9a5c9b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -3757,7 +3755,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"execution_count": 30,
|
||||
"id": "0f0768ad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
||||
14237
M2/Machine Learning/TP_4/1_inputs/base_retraitee.csv
Normal file
14237
M2/Machine Learning/TP_4/1_inputs/base_retraitee.csv
Normal file
File diff suppressed because it is too large
Load Diff
420
M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb
Normal file
420
M2/Machine Learning/TP_4/2025_M2_ISF_TP_4.ipynb
Normal file
@@ -0,0 +1,420 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8750d15b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cours 4 : Machine Learning - Algorithmes supervisés (2/2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f7c08ae5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Préambule"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec7ecb4b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Les objectifs de cette séance (3h) sont :\n",
|
||||
"* Préparation des bases de modélisation (sampling)\n",
|
||||
"* Construire un modèle de Machine Learning (cross-validation et hyperparamétrage) pour résoudre un problème de classification\n",
|
||||
"* Analyser les performances du modèle"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4e99c600",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Préparation du workspace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1b01045",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import de librairies "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "97d58527",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Données\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Graphiques \n",
|
||||
"import seaborn as sns\n",
|
||||
"sns.set()\n",
|
||||
"import plotly.express as px\n",
|
||||
"import plotly.graph_objects as gp\n",
|
||||
"\n",
|
||||
"# Machine Learning\n",
|
||||
"import sklearn.preprocessing as preproc\n",
|
||||
"from imblearn.over_sampling import RandomOverSampler\n",
|
||||
"\n",
|
||||
"# Statistiques\n",
|
||||
"from scipy.stats import chi2_contingency\n",
|
||||
"from sklearn import metrics\n",
|
||||
"from sklearn.ensemble import GradientBoostingClassifier\n",
|
||||
"from sklearn.model_selection import (\n",
|
||||
" GridSearchCV,\n",
|
||||
" KFold,\n",
|
||||
" StratifiedKFold,\n",
|
||||
" cross_val_score,\n",
|
||||
" train_test_split,\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "06153286",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Définition des fonctions "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "c67db932",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def cramers_V(var1,var2) :\n",
|
||||
" crosstab =np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building\n",
|
||||
" stat = chi2_contingency(crosstab)[0] # Keeping of the test statistic of the Chi2 test\n",
|
||||
" obs = np.sum(crosstab) # Number of observations\n",
|
||||
" mini = min(crosstab.shape)-1 # Take the minimum value between the columns and the rows of the cross table\n",
|
||||
" return (stat/(obs*mini))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "985e4e97",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Constantes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c9597b48",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_path = \"./1_inputs\"\n",
|
||||
"output_path = \"./2_outputs\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2b035d2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import des données"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "8051b5f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "FileNotFoundError",
|
||||
"evalue": "[Errno 2] No such file or directory: './1_inputs/base_retraitee.csv'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m path =input_path + \u001b[33m'\u001b[39m\u001b[33m/base_retraitee.csv\u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m data_retraitee = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43msep\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m,\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mdecimal\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m.\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1026\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m 1013\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m 1014\u001b[39m dialect,\n\u001b[32m 1015\u001b[39m delimiter,\n\u001b[32m (...)\u001b[39m\u001b[32m 1022\u001b[39m dtype_backend=dtype_backend,\n\u001b[32m 1023\u001b[39m )\n\u001b[32m 1024\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:620\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m 617\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m 619\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m620\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 622\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m 623\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1620\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m 1617\u001b[39m \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 1619\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1620\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/parsers/readers.py:1880\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m 1878\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m 1879\u001b[39m mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1880\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1881\u001b[39m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1882\u001b[39m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1883\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1884\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1885\u001b[39m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1886\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1887\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1888\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1889\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1890\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1891\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/Workspace/studies/.venv/lib/python3.13/site-packages/pandas/io/common.py:873\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m 868\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m 869\u001b[39m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m 870\u001b[39m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m 871\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m 872\u001b[39m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m 874\u001b[39m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 875\u001b[39m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 876\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 877\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 878\u001b[39m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 879\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 880\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 881\u001b[39m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m 882\u001b[39m handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n",
|
||||
"\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: './1_inputs/base_retraitee.csv'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"path =input_path + '/base_retraitee.csv'\n",
|
||||
"data_retraitee = pd.read_csv(path,sep=\",\",decimal=\".\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a2578ba1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Préparation de la base de données"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3715c37",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Dans cette partie nous souhaitons expliquer la survenance d'un sinistre en fonction des variables explicatives i.e. une variable binaire qui : \n",
|
||||
"* est égale à 1 si la personne a eu 1 ou plus de sinistres.\n",
|
||||
"* est égale à 0 le cas échéant."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b9b98d36",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Calculez la variable \"sinistré\" qui est vraie si la personne a eu un ou plusieurs sinistres ou faux le cas échéant \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "657ebd89",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Exercice :** construisez les statistiques descriptives de la base utilisée. Notamment la distribution de la variable réponse."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "47cf4b69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92d6156a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Etude des corrélations parmi les variables explicatives"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a0bc6278",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e82fcade",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Corrélation des variables catégorielles :"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "30df8bd5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f615121",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Corrélation des variables numériques :"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d1fa12fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5777d20f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Question :** quels sont vos commentaires ?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "212209ec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Preprocessing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "65aca700",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Deux étapes sont nécessaires avant de lancer l'apprentissage d'un modèle, c'est ce qu'on connait comme le *Preprocessing* :\n",
|
||||
"\n",
|
||||
"* Les modèles proposés par la librairie \"sklearn\" ne gèrent que des variables numériques. Il est donc nécessaire de transformer les variables catégorielles en variables numériques : ce processus s'appelle le *One Hot Encoding*.\n",
|
||||
"* Normaliser les données numériques"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6c23d236",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Exercice :** proposez un bout de code permettant de réaliser le One Hot Encoding des variables catégorielles. Vous pourrez utiliser la fonction \"preproc.OneHotEncoder\" de la librairie sklearn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b8530717",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2be6a3e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Exercice :** proposez un bout de code permettant noramliser les variables numériques présentes dans la base. Vous pourrez utiliser la fonction \"preproc.StandardScaler\" de la librairie sklearn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ff3847d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ecba832",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Algorithme supervisé : Gradient Boosting"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "efcb8987",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A ce stade, nous avons vu les différentes étapes pour lancer un algorithme de Machine Learning. Néanmoins, ces étapes ne sont pas suffisantes pour construire un modèle performant. \n",
|
||||
"En effet, afin de construire un modèle performant le Data Scientist doit agir sur l'apprentissage du modèle. Dans ce qui suit nous :\n",
|
||||
"* Changerons d'algorithme pour utiliser un algorithme plus performant (Gradient Boosting)\n",
|
||||
"* Raliserons un *grid search* sur les paramètres du modèle\n",
|
||||
"* Appliquerons l'apprentissage par validation croisée\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3feaff44",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Exercice :** Implémentez l'algorithme du Gradient Boosting en appliquant les techniques vues lors des derniers cours (sampling, Grid search et Cross Validation) \n",
|
||||
"**Remarques :**\n",
|
||||
"* Vous pouvez utiliser les modèles \"GradientBoostingClassifier\" et \"GridSearchCV\" de la libraire Sklearn. \n",
|
||||
"* Pensez à utiliser les métriques relatives aux problèmes de classification."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5a6adbfe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Sampling"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9342ad6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "76ece01f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Fitting avec Cross-Validation et *Grid Search*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb60fe19",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a723cbc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Validation du modèle - métriques"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60c0312d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Exercice :** \n",
|
||||
"* Construisez la matrice de confusion (metrics.confusion_matrix).\n",
|
||||
"* Calculez les métriques : accuracy, recall & precision."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5d9ef448",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "studies",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,6 +5,7 @@ description = "A curated collection of mathematics and data science projects dev
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"imblearn>=0.0",
|
||||
"ipykernel>=6.29.5",
|
||||
"keras>=3.11.3",
|
||||
"matplotlib>=3.10.1",
|
||||
|
||||
30
uv.lock
generated
30
uv.lock
generated
@@ -416,6 +416,34 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "imbalanced-learn"
|
||||
version = "0.14.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "joblib" },
|
||||
{ name = "numpy" },
|
||||
{ name = "scikit-learn" },
|
||||
{ name = "scipy" },
|
||||
{ name = "threadpoolctl" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7b/5a/ad42e366914abfea7434b9ab7a821778be52dae4a8cbeac165918e66e7b0/imbalanced_learn-0.14.0.tar.gz", hash = "sha256:22b9ba6dbd681a9ec613cd6e08c21d39639fb5ccbf2a3c991f9c36415b70522c", size = 534790, upload-time = "2025-08-14T14:15:31.923Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/65/60/103dc71019ec2fa987f42f9dbe88641a74edc57f8499fac8896955b66065/imbalanced_learn-0.14.0-py3-none-any.whl", hash = "sha256:8a8700c02ca185e113064815513f990fbf84eb4e7701f1d4e944ce67fb259a60", size = 239958, upload-time = "2025-08-14T14:15:30.174Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "imblearn"
|
||||
version = "0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "imbalanced-learn" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/32/0a/f83099534a77757abf27427d339590c83cc68c3386690d4741d6454e185f/imblearn-0.0.tar.gz", hash = "sha256:d8fbb662919c1b16f438ad91a8256220e53bcf6815c9ad5502c518b798de34f2", size = 945, upload-time = "2017-01-19T11:52:35.89Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/81/a7/4179e6ebfd654bd0eac0b9c06125b8b4c96a9d0a8ff9e9507eb2a26d2d7e/imblearn-0.0-py2.py3-none-any.whl", hash = "sha256:d42c2d709d22c00d2b9a91e638d57240a8b79b4014122d92181fcd2549a2f79a", size = 1874, upload-time = "2017-01-19T11:52:37.416Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipykernel"
|
||||
version = "6.29.5"
|
||||
@@ -1518,6 +1546,7 @@ name = "studies"
|
||||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "imblearn" },
|
||||
{ name = "ipykernel" },
|
||||
{ name = "keras" },
|
||||
{ name = "matplotlib" },
|
||||
@@ -1542,6 +1571,7 @@ dev = [
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "imblearn", specifier = ">=0.0" },
|
||||
{ name = "ipykernel", specifier = ">=6.29.5" },
|
||||
{ name = "keras", specifier = ">=3.11.3" },
|
||||
{ name = "matplotlib", specifier = ">=3.10.1" },
|
||||
|
||||
Reference in New Issue
Block a user