{ "cells": [ { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "# TP4 Ridge, Lasso, CV\n", "\n", "\n", "\n", "### Table of Contents\n", "\n", "* [0. Data Preparation ](#chapter0)\n", "* [1. Ridge and Lasso Regression ](#chapter1)\n", "* [2. Cross validation for the hyperparameters $\\alpha$ of Ridge and Lasso](#chapter2)\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 0. Data Preparation \n", "\n", "We will predict the salary of a baseball player and use the dataset `Hitters`.\n", "\n", "Reference : book \"James, Gareth, Daniela Witten, Trevor Hastie, and Robert Tibshirani. An introduction to statistical learning. Vol. 112. New York: springer, 2013\"." ] }, { "cell_type": "code", "metadata": { "ExecuteTime": { "end_time": "2025-03-19T09:18:59.373219Z", "start_time": "2025-03-19T09:18:59.369013Z" } }, "source": [ "\n", "import warnings\n", "\n", "warnings.filterwarnings('ignore')" ], "outputs": [], "execution_count": 1 }, { "cell_type": "code", "metadata": { "ExecuteTime": { "end_time": "2025-03-19T09:19:03.853918Z", "start_time": "2025-03-19T09:19:02.315325Z" } }, "source": [ "import numpy as np\n", "import pandas as pd # dataframes are in pandas \n", "import matplotlib.pyplot as plt\n", "\n", "hitters = pd.read_csv(\"data/Hitters.csv\", index_col=\"Name\")\n", "\n", "hitters" ], "outputs": [ { "data": { "text/plain": [ " AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits \\\n", "Name \n", "-Andy Allanson 293 66 1 30 29 14 1 293 66 \n", "-Alan Ashby 315 81 7 24 38 39 14 3449 835 \n", "-Alvin Davis 479 130 18 66 72 76 3 1624 457 \n", "-Andre Dawson 496 141 20 65 78 37 11 5628 1575 \n", "-Andres Galarraga 321 87 10 39 42 30 2 396 101 \n", "... ... ... ... ... ... ... ... ... ... \n", "-Willie McGee 497 127 7 65 48 37 5 2703 806 \n", "-Willie Randolph 492 136 5 76 50 94 12 5511 1511 \n", "-Wayne Tolleson 475 126 3 61 43 52 6 1700 433 \n", "-Willie Upshaw 573 144 9 85 60 78 8 3198 857 \n", "-Willie Wilson 631 170 9 77 44 31 11 4908 1457 \n", "\n", " CHmRun CRuns CRBI CWalks League Division PutOuts \\\n", "Name \n", "-Andy Allanson 1 30 29 14 A E 446 \n", "-Alan Ashby 69 321 414 375 N W 632 \n", "-Alvin Davis 63 224 266 263 A W 880 \n", "-Andre Dawson 225 828 838 354 N E 200 \n", "-Andres Galarraga 12 48 46 33 N E 805 \n", "... ... ... ... ... ... ... ... \n", "-Willie McGee 32 379 311 138 N E 325 \n", "-Willie Randolph 39 897 451 875 A E 313 \n", "-Wayne Tolleson 7 217 93 146 A W 37 \n", "-Willie Upshaw 97 470 420 332 A E 1314 \n", "-Willie Wilson 30 775 357 249 A W 408 \n", "\n", " Assists Errors Salary NewLeague \n", "Name \n", "-Andy Allanson 33 20 NaN A \n", "-Alan Ashby 43 10 475.0 N \n", "-Alvin Davis 82 14 480.0 A \n", "-Andre Dawson 11 3 500.0 N \n", "-Andres Galarraga 40 4 91.5 N \n", "... ... ... ... ... \n", "-Willie McGee 9 3 700.0 N \n", "-Willie Randolph 381 20 875.0 A \n", "-Wayne Tolleson 113 7 385.0 A \n", "-Willie Upshaw 131 12 960.0 A \n", "-Willie Wilson 4 3 1000.0 A \n", "\n", "[322 rows x 20 columns]" ], "text/html": [ "
| \n", " | AtBat | \n", "Hits | \n", "HmRun | \n", "Runs | \n", "RBI | \n", "Walks | \n", "Years | \n", "CAtBat | \n", "CHits | \n", "CHmRun | \n", "CRuns | \n", "CRBI | \n", "CWalks | \n", "League | \n", "Division | \n", "PutOuts | \n", "Assists | \n", "Errors | \n", "Salary | \n", "NewLeague | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Name | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| -Andy Allanson | \n", "293 | \n", "66 | \n", "1 | \n", "30 | \n", "29 | \n", "14 | \n", "1 | \n", "293 | \n", "66 | \n", "1 | \n", "30 | \n", "29 | \n", "14 | \n", "A | \n", "E | \n", "446 | \n", "33 | \n", "20 | \n", "NaN | \n", "A | \n", "
| -Alan Ashby | \n", "315 | \n", "81 | \n", "7 | \n", "24 | \n", "38 | \n", "39 | \n", "14 | \n", "3449 | \n", "835 | \n", "69 | \n", "321 | \n", "414 | \n", "375 | \n", "N | \n", "W | \n", "632 | \n", "43 | \n", "10 | \n", "475.0 | \n", "N | \n", "
| -Alvin Davis | \n", "479 | \n", "130 | \n", "18 | \n", "66 | \n", "72 | \n", "76 | \n", "3 | \n", "1624 | \n", "457 | \n", "63 | \n", "224 | \n", "266 | \n", "263 | \n", "A | \n", "W | \n", "880 | \n", "82 | \n", "14 | \n", "480.0 | \n", "A | \n", "
| -Andre Dawson | \n", "496 | \n", "141 | \n", "20 | \n", "65 | \n", "78 | \n", "37 | \n", "11 | \n", "5628 | \n", "1575 | \n", "225 | \n", "828 | \n", "838 | \n", "354 | \n", "N | \n", "E | \n", "200 | \n", "11 | \n", "3 | \n", "500.0 | \n", "N | \n", "
| -Andres Galarraga | \n", "321 | \n", "87 | \n", "10 | \n", "39 | \n", "42 | \n", "30 | \n", "2 | \n", "396 | \n", "101 | \n", "12 | \n", "48 | \n", "46 | \n", "33 | \n", "N | \n", "E | \n", "805 | \n", "40 | \n", "4 | \n", "91.5 | \n", "N | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| -Willie McGee | \n", "497 | \n", "127 | \n", "7 | \n", "65 | \n", "48 | \n", "37 | \n", "5 | \n", "2703 | \n", "806 | \n", "32 | \n", "379 | \n", "311 | \n", "138 | \n", "N | \n", "E | \n", "325 | \n", "9 | \n", "3 | \n", "700.0 | \n", "N | \n", "
| -Willie Randolph | \n", "492 | \n", "136 | \n", "5 | \n", "76 | \n", "50 | \n", "94 | \n", "12 | \n", "5511 | \n", "1511 | \n", "39 | \n", "897 | \n", "451 | \n", "875 | \n", "A | \n", "E | \n", "313 | \n", "381 | \n", "20 | \n", "875.0 | \n", "A | \n", "
| -Wayne Tolleson | \n", "475 | \n", "126 | \n", "3 | \n", "61 | \n", "43 | \n", "52 | \n", "6 | \n", "1700 | \n", "433 | \n", "7 | \n", "217 | \n", "93 | \n", "146 | \n", "A | \n", "W | \n", "37 | \n", "113 | \n", "7 | \n", "385.0 | \n", "A | \n", "
| -Willie Upshaw | \n", "573 | \n", "144 | \n", "9 | \n", "85 | \n", "60 | \n", "78 | \n", "8 | \n", "3198 | \n", "857 | \n", "97 | \n", "470 | \n", "420 | \n", "332 | \n", "A | \n", "E | \n", "1314 | \n", "131 | \n", "12 | \n", "960.0 | \n", "A | \n", "
| -Willie Wilson | \n", "631 | \n", "170 | \n", "9 | \n", "77 | \n", "44 | \n", "31 | \n", "11 | \n", "4908 | \n", "1457 | \n", "30 | \n", "775 | \n", "357 | \n", "249 | \n", "A | \n", "W | \n", "408 | \n", "4 | \n", "3 | \n", "1000.0 | \n", "A | \n", "
322 rows × 20 columns
\n", "| \n", " | Name | \n", "AtBat | \n", "Hits | \n", "HmRun | \n", "Runs | \n", "RBI | \n", "Walks | \n", "Years | \n", "CAtBat | \n", "CHits | \n", "... | \n", "CRuns | \n", "CRBI | \n", "CWalks | \n", "League | \n", "Division | \n", "PutOuts | \n", "Assists | \n", "Errors | \n", "Salary | \n", "NewLeague | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "-Andy Allanson | \n", "293 | \n", "66 | \n", "1 | \n", "30 | \n", "29 | \n", "14 | \n", "1 | \n", "293 | \n", "66 | \n", "... | \n", "30 | \n", "29 | \n", "14 | \n", "A | \n", "E | \n", "446 | \n", "33 | \n", "20 | \n", "NaN | \n", "A | \n", "
| 1 | \n", "-Alan Ashby | \n", "315 | \n", "81 | \n", "7 | \n", "24 | \n", "38 | \n", "39 | \n", "14 | \n", "3449 | \n", "835 | \n", "... | \n", "321 | \n", "414 | \n", "375 | \n", "N | \n", "W | \n", "632 | \n", "43 | \n", "10 | \n", "475.0 | \n", "N | \n", "
| 2 | \n", "-Alvin Davis | \n", "479 | \n", "130 | \n", "18 | \n", "66 | \n", "72 | \n", "76 | \n", "3 | \n", "1624 | \n", "457 | \n", "... | \n", "224 | \n", "266 | \n", "263 | \n", "A | \n", "W | \n", "880 | \n", "82 | \n", "14 | \n", "480.0 | \n", "A | \n", "
| 3 | \n", "-Andre Dawson | \n", "496 | \n", "141 | \n", "20 | \n", "65 | \n", "78 | \n", "37 | \n", "11 | \n", "5628 | \n", "1575 | \n", "... | \n", "828 | \n", "838 | \n", "354 | \n", "N | \n", "E | \n", "200 | \n", "11 | \n", "3 | \n", "500.0 | \n", "N | \n", "
| 4 | \n", "-Andres Galarraga | \n", "321 | \n", "87 | \n", "10 | \n", "39 | \n", "42 | \n", "30 | \n", "2 | \n", "396 | \n", "101 | \n", "... | \n", "48 | \n", "46 | \n", "33 | \n", "N | \n", "E | \n", "805 | \n", "40 | \n", "4 | \n", "91.5 | \n", "N | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 317 | \n", "-Willie McGee | \n", "497 | \n", "127 | \n", "7 | \n", "65 | \n", "48 | \n", "37 | \n", "5 | \n", "2703 | \n", "806 | \n", "... | \n", "379 | \n", "311 | \n", "138 | \n", "N | \n", "E | \n", "325 | \n", "9 | \n", "3 | \n", "700.0 | \n", "N | \n", "
| 318 | \n", "-Willie Randolph | \n", "492 | \n", "136 | \n", "5 | \n", "76 | \n", "50 | \n", "94 | \n", "12 | \n", "5511 | \n", "1511 | \n", "... | \n", "897 | \n", "451 | \n", "875 | \n", "A | \n", "E | \n", "313 | \n", "381 | \n", "20 | \n", "875.0 | \n", "A | \n", "
| 319 | \n", "-Wayne Tolleson | \n", "475 | \n", "126 | \n", "3 | \n", "61 | \n", "43 | \n", "52 | \n", "6 | \n", "1700 | \n", "433 | \n", "... | \n", "217 | \n", "93 | \n", "146 | \n", "A | \n", "W | \n", "37 | \n", "113 | \n", "7 | \n", "385.0 | \n", "A | \n", "
| 320 | \n", "-Willie Upshaw | \n", "573 | \n", "144 | \n", "9 | \n", "85 | \n", "60 | \n", "78 | \n", "8 | \n", "3198 | \n", "857 | \n", "... | \n", "470 | \n", "420 | \n", "332 | \n", "A | \n", "E | \n", "1314 | \n", "131 | \n", "12 | \n", "960.0 | \n", "A | \n", "
| 321 | \n", "-Willie Wilson | \n", "631 | \n", "170 | \n", "9 | \n", "77 | \n", "44 | \n", "31 | \n", "11 | \n", "4908 | \n", "1457 | \n", "... | \n", "775 | \n", "357 | \n", "249 | \n", "A | \n", "W | \n", "408 | \n", "4 | \n", "3 | \n", "1000.0 | \n", "A | \n", "
322 rows × 21 columns
\n", "