diff --git a/M2/Advanced Machine Learning/TP1.ipynb b/M2/Advanced Machine Learning/TP1.ipynb new file mode 100644 index 0000000..903f25d --- /dev/null +++ b/M2/Advanced Machine Learning/TP1.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8226e658", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "7e95cb09", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "X1", + "rawType": "float64", + "type": "float" + }, + { + "name": "X2", + "rawType": "float64", + "type": "float" + }, + { + "name": "Y", + "rawType": "float64", + "type": "float" + } + ], + "ref": "018727a2-2342-424f-8395-021f40817c5a", + "rows": [ + [ + "0", + "-0.8363543", + "4.520502", + "-19.868094121443526" + ], + [ + "1", + "0.4020083", + "3.252834", + "-10.46598545005849" + ], + [ + "2", + "-0.2492138", + "3.610425", + "-12.91499193423918" + ], + [ + "3", + "-0.6257167", + "4.58877", + "-20.67839639765537" + ], + [ + "4", + "-0.9899948", + "4.893924", + "-22.99404413854238" + ] + ], + "shape": { + "columns": 3, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
X1X2Y
0-0.8363544.520502-19.868094
10.4020083.252834-10.465985
2-0.2492143.610425-12.914992
3-0.6257174.588770-20.678396
4-0.9899954.893924-22.994044
\n", + "
" + ], + "text/plain": [ + " X1 X2 Y\n", + "0 -0.836354 4.520502 -19.868094\n", + "1 0.402008 3.252834 -10.465985\n", + "2 -0.249214 3.610425 -12.914992\n", + "3 -0.625717 4.588770 -20.678396\n", + "4 -0.989995 4.893924 -22.994044" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_excel(\"./data/data_pdp.xlsx\")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "4e9a9a97", + "metadata": {}, + "outputs": [], + "source": [ + "def partial_dependant_function(data: pd.DataFrame, model: object, feature: str, grid_points: list) -> list:\n", + " \"\"\"Compute the Partial Dependence Plot (PDP) for a given feature.\"\"\"\n", + " pdp = []\n", + " for val in grid_points:\n", + " data_temp = data.copy()\n", + " data_temp[feature] = val\n", + " preds = model.predict(data_temp)\n", + " pdp.append(preds.mean())\n", + " return pdp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9553a1d8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "studies", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/M2/Advanced Machine Learning/data/data_pdp.xlsx b/M2/Advanced Machine Learning/data/data_pdp.xlsx new file mode 100644 index 0000000..3f9441d Binary files /dev/null and b/M2/Advanced Machine Learning/data/data_pdp.xlsx differ diff --git a/pyproject.toml b/pyproject.toml index 4eb4a16..f024f7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "nbformat>=5.10.4", "numpy>=2.2.5", "opencv-python>=4.11.0.86", + "openpyxl>=3.1.5", "pandas>=2.2.3", "pandas-stubs>=2.3.2.250926", "plotly>=6.3.0", diff --git a/uv.lock b/uv.lock index 3a84ce8..e107bbe 100644 --- a/uv.lock +++ b/uv.lock @@ -514,6 +514,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, ] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, +] + [[package]] name = "executing" version = "2.2.0" @@ -2101,6 +2110,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044, upload-time = "2025-01-16T13:52:21.928Z" }, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "et-xmlfile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, +] + [[package]] name = "opt-einsum" version = "3.4.0" @@ -3486,6 +3507,7 @@ dependencies = [ { name = "nbformat" }, { name = "numpy" }, { name = "opencv-python" }, + { name = "openpyxl" }, { name = "pandas" }, { name = "pandas-stubs" }, { name = "plotly" }, @@ -3521,6 +3543,7 @@ requires-dist = [ { name = "nbformat", specifier = ">=5.10.4" }, { name = "numpy", specifier = ">=2.2.5" }, { name = "opencv-python", specifier = ">=4.11.0.86" }, + { name = "openpyxl", specifier = ">=3.1.5" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "pandas-stubs", specifier = ">=2.3.2.250926" }, { name = "plotly", specifier = ">=6.3.0" },