mirror of
https://github.com/ArthurDanjou/handson-ml3.git
synced 2026-01-27 02:00:27 +01:00
Update chapters 1, 2 and 4
This commit is contained in:
@@ -4,25 +4,50 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Training Linear Models**"
|
||||
"**Chapter 4 – Training Linear Models**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"_This notebook contains all the sample code and solutions to the exercices in chapter 4._"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# To support both python 2 and python 3\n",
|
||||
"from __future__ import division, print_function, unicode_literals\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Common imports\n",
|
||||
"import numpy as np\n",
|
||||
"import numpy.random as rnd\n",
|
||||
"rnd.seed(42) # to make this notebook's output stable across runs\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# to make this notebook's output stable across runs\n",
|
||||
"rnd.seed(42)\n",
|
||||
"\n",
|
||||
"# To plot pretty figures\n",
|
||||
"%matplotlib inline\n",
|
||||
"import matplotlib\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
@@ -30,6 +55,7 @@
|
||||
"plt.rcParams['xtick.labelsize'] = 12\n",
|
||||
"plt.rcParams['ytick.labelsize'] = 12\n",
|
||||
"\n",
|
||||
"# Where to save the figures\n",
|
||||
"PROJECT_ROOT_DIR = \".\"\n",
|
||||
"CHAPTER_ID = \"training_linear_models\"\n",
|
||||
"\n",
|
||||
@@ -38,7 +64,7 @@
|
||||
" print(\"Saving figure\", fig_id)\n",
|
||||
" if tight_layout:\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.savefig(path, format='png', dpi=300)"
|
||||
" plt.savefig(path, format='png', dpi=300)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -72,7 +98,7 @@
|
||||
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
|
||||
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
|
||||
"plt.axis([0, 2, 0, 15])\n",
|
||||
"save_fig(\"generated_data\")\n",
|
||||
"save_fig(\"generated_data_plot\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
@@ -86,8 +112,8 @@
|
||||
"source": [
|
||||
"import numpy.linalg as LA\n",
|
||||
"\n",
|
||||
"Xb = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance\n",
|
||||
"theta_best = LA.inv(Xb.T.dot(Xb)).dot(Xb.T).dot(y)"
|
||||
"X_b = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance\n",
|
||||
"theta_best = LA.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -110,8 +136,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_new = np.array([[0], [2]])\n",
|
||||
"X_newb = np.c_[np.ones((2, 1)), X_new] # add x0 = 1 to each instance\n",
|
||||
"y_predict = X_newb.dot(theta_best)\n",
|
||||
"X_new_b = np.c_[np.ones((2, 1)), X_new] # add x0 = 1 to each instance\n",
|
||||
"y_predict = X_new_b.dot(theta_best)\n",
|
||||
"y_predict"
|
||||
]
|
||||
},
|
||||
@@ -176,15 +202,15 @@
|
||||
"theta_path_bgd = []\n",
|
||||
"\n",
|
||||
"def plot_gradient_descent(theta, eta, theta_path=None):\n",
|
||||
" m = len(Xb)\n",
|
||||
" m = len(X_b)\n",
|
||||
" plt.plot(X, y, \"b.\")\n",
|
||||
" n_iterations = 1000\n",
|
||||
" for iteration in range(n_iterations):\n",
|
||||
" if iteration < 10:\n",
|
||||
" y_predict = X_newb.dot(theta)\n",
|
||||
" y_predict = X_new_b.dot(theta)\n",
|
||||
" style = \"b-\" if iteration > 0 else \"r--\"\n",
|
||||
" plt.plot(X_new, y_predict, style)\n",
|
||||
" gradients = 2/m * Xb.T.dot(Xb.dot(theta) - y)\n",
|
||||
" gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)\n",
|
||||
" theta = theta - eta * gradients\n",
|
||||
" if theta_path is not None:\n",
|
||||
" theta_path.append(theta)\n",
|
||||
@@ -231,19 +257,17 @@
|
||||
"def learning_schedule(t):\n",
|
||||
" return t0 / (t + t1)\n",
|
||||
"\n",
|
||||
"m = len(Xb)\n",
|
||||
"m = len(X_b)\n",
|
||||
"\n",
|
||||
"for epoch in range(n_iterations):\n",
|
||||
" shuffled_indices = rnd.permutation(m)\n",
|
||||
" Xb_shuffled = Xb[shuffled_indices]\n",
|
||||
" y_shuffled = y[shuffled_indices]\n",
|
||||
" for i in range(m):\n",
|
||||
" if epoch == 0 and i < 20:\n",
|
||||
" y_predict = X_newb.dot(theta)\n",
|
||||
" y_predict = X_new_b.dot(theta)\n",
|
||||
" style = \"b-\" if i > 0 else \"r--\"\n",
|
||||
" plt.plot(X_new, y_predict, style)\n",
|
||||
" xi = Xb_shuffled[i:i+1]\n",
|
||||
" yi = y_shuffled[i:i+1]\n",
|
||||
" random_index = rnd.randint(m)\n",
|
||||
" xi = X_b[random_index:random_index+1]\n",
|
||||
" yi = y[random_index:random_index+1]\n",
|
||||
" gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
|
||||
" eta = learning_schedule(epoch * m + i)\n",
|
||||
" theta = theta - eta * gradients\n",
|
||||
@@ -322,11 +346,11 @@
|
||||
"t = 0\n",
|
||||
"for epoch in range(n_iterations):\n",
|
||||
" shuffled_indices = rnd.permutation(m)\n",
|
||||
" Xb_shuffled = Xb[shuffled_indices]\n",
|
||||
" X_b_shuffled = X_b[shuffled_indices]\n",
|
||||
" y_shuffled = y[shuffled_indices]\n",
|
||||
" for i in range(0, m, minibatch_size):\n",
|
||||
" t += 1\n",
|
||||
" xi = Xb_shuffled[i:i+minibatch_size]\n",
|
||||
" xi = X_b_shuffled[i:i+minibatch_size]\n",
|
||||
" yi = y_shuffled[i:i+minibatch_size]\n",
|
||||
" gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
|
||||
" eta = learning_schedule(t)\n",
|
||||
@@ -796,6 +820,114 @@
|
||||
"best_epoch, best_model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"t1a, t1b, t2a, t2b = -1, 3, -1.5, 1.5\n",
|
||||
"\n",
|
||||
"# ignoring bias term\n",
|
||||
"t1s = np.linspace(t1a, t1b, 500)\n",
|
||||
"t2s = np.linspace(t2a, t2b, 500)\n",
|
||||
"t1, t2 = np.meshgrid(t1s, t2s)\n",
|
||||
"T = np.c_[t1.ravel(), t2.ravel()]\n",
|
||||
"Xr = np.array([[-1, 1], [-0.3, -1], [1, 0.1]])\n",
|
||||
"yr = 2 * Xr[:, :1] + 0.5 * Xr[:, 1:]\n",
|
||||
"\n",
|
||||
"J = (1/len(Xr) * np.sum((T.dot(Xr.T) - yr.T)**2, axis=1)).reshape(t1.shape)\n",
|
||||
"\n",
|
||||
"N1 = np.linalg.norm(T, ord=1, axis=1).reshape(t1.shape)\n",
|
||||
"N2 = np.linalg.norm(T, ord=2, axis=1).reshape(t1.shape)\n",
|
||||
"\n",
|
||||
"t_min_idx = np.unravel_index(np.argmin(J), J.shape)\n",
|
||||
"t1_min, t2_min = t1[t_min_idx], t2[t_min_idx]\n",
|
||||
"\n",
|
||||
"t_init = np.array([[0.25], [-1]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def bgd_path(theta, X, y, l1, l2, core = 1, eta = 0.1, n_iterations = 50):\n",
|
||||
" path = [theta]\n",
|
||||
" for iteration in range(n_iterations):\n",
|
||||
" gradients = core * 2/len(X) * X.T.dot(X.dot(theta) - y) + l1 * np.sign(theta) + 2 * l2 * theta\n",
|
||||
"\n",
|
||||
" theta = theta - eta * gradients\n",
|
||||
" path.append(theta)\n",
|
||||
" return np.array(path)\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(12, 8))\n",
|
||||
"for i, N, l1, l2, title in ((0, N1, 0.5, 0, \"Lasso\"), (1, N2, 0, 0.1, \"Ridge\")):\n",
|
||||
" JR = J + l1 * N1 + l2 * N2**2\n",
|
||||
" \n",
|
||||
" tr_min_idx = np.unravel_index(np.argmin(JR), JR.shape)\n",
|
||||
" t1r_min, t2r_min = t1[tr_min_idx], t2[tr_min_idx]\n",
|
||||
"\n",
|
||||
" levelsJ=(np.exp(np.linspace(0, 1, 20)) - 1) * (np.max(J) - np.min(J)) + np.min(J)\n",
|
||||
" levelsJR=(np.exp(np.linspace(0, 1, 20)) - 1) * (np.max(JR) - np.min(JR)) + np.min(JR)\n",
|
||||
" levelsN=np.linspace(0, np.max(N), 10)\n",
|
||||
" \n",
|
||||
" path_J = bgd_path(t_init, Xr, yr, l1=0, l2=0)\n",
|
||||
" path_JR = bgd_path(t_init, Xr, yr, l1, l2)\n",
|
||||
" path_N = bgd_path(t_init, Xr, yr, np.sign(l1)/3, np.sign(l2), core=0)\n",
|
||||
"\n",
|
||||
" plt.subplot(221 + i * 2)\n",
|
||||
" plt.grid(True)\n",
|
||||
" plt.axhline(y=0, color='k')\n",
|
||||
" plt.axvline(x=0, color='k')\n",
|
||||
" plt.contourf(t1, t2, J, levels=levelsJ, alpha=0.9)\n",
|
||||
" plt.contour(t1, t2, N, levels=levelsN)\n",
|
||||
" plt.plot(path_J[:, 0], path_J[:, 1], \"w-o\")\n",
|
||||
" plt.plot(path_N[:, 0], path_N[:, 1], \"y-^\")\n",
|
||||
" plt.plot(t1_min, t2_min, \"rs\")\n",
|
||||
" plt.title(r\"$\\ell_{}$ penalty\".format(i + 1), fontsize=16)\n",
|
||||
" plt.axis([t1a, t1b, t2a, t2b])\n",
|
||||
"\n",
|
||||
" plt.subplot(222 + i * 2)\n",
|
||||
" plt.grid(True)\n",
|
||||
" plt.axhline(y=0, color='k')\n",
|
||||
" plt.axvline(x=0, color='k')\n",
|
||||
" plt.contourf(t1, t2, JR, levels=levelsJR, alpha=0.9)\n",
|
||||
" plt.plot(path_JR[:, 0], path_JR[:, 1], \"w-o\")\n",
|
||||
" plt.plot(t1r_min, t2r_min, \"rs\")\n",
|
||||
" plt.title(title, fontsize=16)\n",
|
||||
" plt.axis([t1a, t1b, t2a, t2b])\n",
|
||||
"\n",
|
||||
"for subplot in (221, 223):\n",
|
||||
" plt.subplot(subplot)\n",
|
||||
" plt.ylabel(r\"$\\theta_2$\", fontsize=20, rotation=0)\n",
|
||||
"\n",
|
||||
"for subplot in (223, 224):\n",
|
||||
" plt.subplot(subplot)\n",
|
||||
" plt.xlabel(r\"$\\theta_1$\", fontsize=20)\n",
|
||||
"\n",
|
||||
"save_fig(\"lasso_vs_ridge_plot\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -805,7 +937,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 40,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -828,7 +960,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 41,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -841,7 +973,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 42,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -852,7 +984,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 43,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -889,7 +1021,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 44,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -900,7 +1032,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 45,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -911,7 +1043,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 46,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -957,7 +1089,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 47,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -1005,7 +1137,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 48,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -1016,7 +1148,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 49,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -1024,6 +1156,29 @@
|
||||
"source": [
|
||||
"softmax_reg.predict_proba([[5, 2]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Exercise solutions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Coming soon**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -1044,10 +1199,14 @@
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.1"
|
||||
},
|
||||
"nav_menu": {},
|
||||
"toc": {
|
||||
"navigate_menu": true,
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"threshold": 6,
|
||||
"toc_cell": false,
|
||||
"toc_number_sections": true,
|
||||
"toc_threshold": 6,
|
||||
"toc_section_display": "block",
|
||||
"toc_window_display": false
|
||||
}
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user