Update chapters 1, 2 and 4

2026-01-27 02:00:27 +01:00 · 2016-09-27 16:39:16 +02:00
parent 8195045f15
commit 68fb1971d7
3 changed files with 290 additions and 59 deletions
--- a/training_linear_models.ipynb
+++ b/training_linear_models.ipynb
@@ -4,25 +4,50 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "**Training Linear Models**"
+    "**Chapter 4 – Training Linear Models**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 4._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
+    "# To support both python 2 and python 3\n",
    "from __future__ import division, print_function, unicode_literals\n",
    "\n",
-    "import os\n",
-    "\n",
+    "# Common imports\n",
    "import numpy as np\n",
    "import numpy.random as rnd\n",
-    "rnd.seed(42) # to make this notebook's output stable across runs\n",
+    "import os\n",
    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
@@ -30,6 +55,7 @@
    "plt.rcParams['xtick.labelsize'] = 12\n",
    "plt.rcParams['ytick.labelsize'] = 12\n",
    "\n",
+    "# Where to save the figures\n",
    "PROJECT_ROOT_DIR = \".\"\n",
    "CHAPTER_ID = \"training_linear_models\"\n",
    "\n",
@@ -38,7 +64,7 @@
    "    print(\"Saving figure\", fig_id)\n",
    "    if tight_layout:\n",
    "        plt.tight_layout()\n",
-    "    plt.savefig(path, format='png', dpi=300)"
+    "    plt.savefig(path, format='png', dpi=300)\n"
   ]
  },
  {
@@ -72,7 +98,7 @@
    "plt.xlabel(\"$x_1$\", fontsize=18)\n",
    "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
    "plt.axis([0, 2, 0, 15])\n",
-    "save_fig(\"generated_data\")\n",
+    "save_fig(\"generated_data_plot\")\n",
    "plt.show()"
   ]
  },
@@ -86,8 +112,8 @@
   "source": [
    "import numpy.linalg as LA\n",
    "\n",
-    "Xb = np.c_[np.ones((100, 1)), X]  # add x0 = 1 to each instance\n",
-    "theta_best = LA.inv(Xb.T.dot(Xb)).dot(Xb.T).dot(y)"
+    "X_b = np.c_[np.ones((100, 1)), X]  # add x0 = 1 to each instance\n",
+    "theta_best = LA.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)"
   ]
  },
  {
@@ -110,8 +136,8 @@
   "outputs": [],
   "source": [
    "X_new = np.array([[0], [2]])\n",
-    "X_newb = np.c_[np.ones((2, 1)), X_new]  # add x0 = 1 to each instance\n",
-    "y_predict = X_newb.dot(theta_best)\n",
+    "X_new_b = np.c_[np.ones((2, 1)), X_new]  # add x0 = 1 to each instance\n",
+    "y_predict = X_new_b.dot(theta_best)\n",
    "y_predict"
   ]
  },
@@ -176,15 +202,15 @@
    "theta_path_bgd = []\n",
    "\n",
    "def plot_gradient_descent(theta, eta, theta_path=None):\n",
-    "    m = len(Xb)\n",
+    "    m = len(X_b)\n",
    "    plt.plot(X, y, \"b.\")\n",
    "    n_iterations = 1000\n",
    "    for iteration in range(n_iterations):\n",
    "        if iteration < 10:\n",
-    "            y_predict = X_newb.dot(theta)\n",
+    "            y_predict = X_new_b.dot(theta)\n",
    "            style = \"b-\" if iteration > 0 else \"r--\"\n",
    "            plt.plot(X_new, y_predict, style)\n",
-    "        gradients = 2/m * Xb.T.dot(Xb.dot(theta) - y)\n",
+    "        gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)\n",
    "        theta = theta - eta * gradients\n",
    "        if theta_path is not None:\n",
    "            theta_path.append(theta)\n",
@@ -231,19 +257,17 @@
    "def learning_schedule(t):\n",
    "    return t0 / (t + t1)\n",
    "\n",
-    "m = len(Xb)\n",
+    "m = len(X_b)\n",
    "\n",
    "for epoch in range(n_iterations):\n",
-    "    shuffled_indices = rnd.permutation(m)\n",
-    "    Xb_shuffled = Xb[shuffled_indices]\n",
-    "    y_shuffled = y[shuffled_indices]\n",
    "    for i in range(m):\n",
    "        if epoch == 0 and i < 20:\n",
-    "            y_predict = X_newb.dot(theta)\n",
+    "            y_predict = X_new_b.dot(theta)\n",
    "            style = \"b-\" if i > 0 else \"r--\"\n",
    "            plt.plot(X_new, y_predict, style)\n",
-    "        xi = Xb_shuffled[i:i+1]\n",
-    "        yi = y_shuffled[i:i+1]\n",
+    "        random_index = rnd.randint(m)\n",
+    "        xi = X_b[random_index:random_index+1]\n",
+    "        yi = y[random_index:random_index+1]\n",
    "        gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
    "        eta = learning_schedule(epoch * m + i)\n",
    "        theta = theta - eta * gradients\n",
@@ -322,11 +346,11 @@
    "t = 0\n",
    "for epoch in range(n_iterations):\n",
    "    shuffled_indices = rnd.permutation(m)\n",
-    "    Xb_shuffled = Xb[shuffled_indices]\n",
+    "    X_b_shuffled = X_b[shuffled_indices]\n",
    "    y_shuffled = y[shuffled_indices]\n",
    "    for i in range(0, m, minibatch_size):\n",
    "        t += 1\n",
-    "        xi = Xb_shuffled[i:i+minibatch_size]\n",
+    "        xi = X_b_shuffled[i:i+minibatch_size]\n",
    "        yi = y_shuffled[i:i+minibatch_size]\n",
    "        gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
    "        eta = learning_schedule(t)\n",
@@ -796,6 +820,114 @@
    "best_epoch, best_model"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "t1a, t1b, t2a, t2b = -1, 3, -1.5, 1.5\n",
+    "\n",
+    "# ignoring bias term\n",
+    "t1s = np.linspace(t1a, t1b, 500)\n",
+    "t2s = np.linspace(t2a, t2b, 500)\n",
+    "t1, t2 = np.meshgrid(t1s, t2s)\n",
+    "T = np.c_[t1.ravel(), t2.ravel()]\n",
+    "Xr = np.array([[-1, 1], [-0.3, -1], [1, 0.1]])\n",
+    "yr = 2 * Xr[:, :1] + 0.5 * Xr[:, 1:]\n",
+    "\n",
+    "J = (1/len(Xr) * np.sum((T.dot(Xr.T) - yr.T)**2, axis=1)).reshape(t1.shape)\n",
+    "\n",
+    "N1 = np.linalg.norm(T, ord=1, axis=1).reshape(t1.shape)\n",
+    "N2 = np.linalg.norm(T, ord=2, axis=1).reshape(t1.shape)\n",
+    "\n",
+    "t_min_idx = np.unravel_index(np.argmin(J), J.shape)\n",
+    "t1_min, t2_min = t1[t_min_idx], t2[t_min_idx]\n",
+    "\n",
+    "t_init = np.array([[0.25], [-1]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def bgd_path(theta, X, y, l1, l2, core = 1, eta = 0.1, n_iterations = 50):\n",
+    "    path = [theta]\n",
+    "    for iteration in range(n_iterations):\n",
+    "        gradients = core * 2/len(X) * X.T.dot(X.dot(theta) - y) + l1 * np.sign(theta) + 2 * l2 * theta\n",
+    "\n",
+    "        theta = theta - eta * gradients\n",
+    "        path.append(theta)\n",
+    "    return np.array(path)\n",
+    "\n",
+    "plt.figure(figsize=(12, 8))\n",
+    "for i, N, l1, l2, title in ((0, N1, 0.5, 0, \"Lasso\"), (1, N2, 0,  0.1, \"Ridge\")):\n",
+    "    JR = J + l1 * N1 + l2 * N2**2\n",
+    "    \n",
+    "    tr_min_idx = np.unravel_index(np.argmin(JR), JR.shape)\n",
+    "    t1r_min, t2r_min = t1[tr_min_idx], t2[tr_min_idx]\n",
+    "\n",
+    "    levelsJ=(np.exp(np.linspace(0, 1, 20)) - 1) * (np.max(J) - np.min(J)) + np.min(J)\n",
+    "    levelsJR=(np.exp(np.linspace(0, 1, 20)) - 1) * (np.max(JR) - np.min(JR)) + np.min(JR)\n",
+    "    levelsN=np.linspace(0, np.max(N), 10)\n",
+    "    \n",
+    "    path_J = bgd_path(t_init, Xr, yr, l1=0, l2=0)\n",
+    "    path_JR = bgd_path(t_init, Xr, yr, l1, l2)\n",
+    "    path_N = bgd_path(t_init, Xr, yr, np.sign(l1)/3, np.sign(l2), core=0)\n",
+    "\n",
+    "    plt.subplot(221 + i * 2)\n",
+    "    plt.grid(True)\n",
+    "    plt.axhline(y=0, color='k')\n",
+    "    plt.axvline(x=0, color='k')\n",
+    "    plt.contourf(t1, t2, J, levels=levelsJ, alpha=0.9)\n",
+    "    plt.contour(t1, t2, N, levels=levelsN)\n",
+    "    plt.plot(path_J[:, 0], path_J[:, 1], \"w-o\")\n",
+    "    plt.plot(path_N[:, 0], path_N[:, 1], \"y-^\")\n",
+    "    plt.plot(t1_min, t2_min, \"rs\")\n",
+    "    plt.title(r\"$\\ell_{}$ penalty\".format(i + 1), fontsize=16)\n",
+    "    plt.axis([t1a, t1b, t2a, t2b])\n",
+    "\n",
+    "    plt.subplot(222 + i * 2)\n",
+    "    plt.grid(True)\n",
+    "    plt.axhline(y=0, color='k')\n",
+    "    plt.axvline(x=0, color='k')\n",
+    "    plt.contourf(t1, t2, JR, levels=levelsJR, alpha=0.9)\n",
+    "    plt.plot(path_JR[:, 0], path_JR[:, 1], \"w-o\")\n",
+    "    plt.plot(t1r_min, t2r_min, \"rs\")\n",
+    "    plt.title(title, fontsize=16)\n",
+    "    plt.axis([t1a, t1b, t2a, t2b])\n",
+    "\n",
+    "for subplot in (221, 223):\n",
+    "    plt.subplot(subplot)\n",
+    "    plt.ylabel(r\"$\\theta_2$\", fontsize=20, rotation=0)\n",
+    "\n",
+    "for subplot in (223, 224):\n",
+    "    plt.subplot(subplot)\n",
+    "    plt.xlabel(r\"$\\theta_1$\", fontsize=20)\n",
+    "\n",
+    "save_fig(\"lasso_vs_ridge_plot\")\n",
+    "plt.show()"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -805,7 +937,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
@@ -828,7 +960,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
@@ -841,7 +973,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
@@ -852,7 +984,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
@@ -889,7 +1021,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
@@ -900,7 +1032,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
@@ -911,7 +1043,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
@@ -957,7 +1089,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
@@ -1005,7 +1137,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
@@ -1016,7 +1148,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
@@ -1024,6 +1156,29 @@
   "source": [
    "softmax_reg.predict_proba([[5, 2]])"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
@@ -1044,10 +1199,14 @@
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  },
+  "nav_menu": {},
  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
   "toc_cell": false,
-   "toc_number_sections": true,
-   "toc_threshold": 6,
+   "toc_section_display": "block",
   "toc_window_display": false
  }
 },