mirror of
https://github.com/ArthurDanjou/handson-ml3.git
synced 2026-01-14 12:14:36 +01:00
Sync notebook with book's code examples, and better identify extra code
This commit is contained in:
@@ -91,8 +91,8 @@
|
||||
"plt.rc('font', size=14)\n",
|
||||
"plt.rc('axes', labelsize=14, titlesize=14)\n",
|
||||
"plt.rc('legend', fontsize=14)\n",
|
||||
"plt.rc('xtick',labelsize=10)\n",
|
||||
"plt.rc('ytick',labelsize=10)"
|
||||
"plt.rc('xtick', labelsize=10)\n",
|
||||
"plt.rc('ytick', labelsize=10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -154,7 +154,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – generates and saves Figure 4–1\n",
|
||||
"# extra code – generates and saves Figure 4–1\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
@@ -209,11 +209,11 @@
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(6, 4)) # not in the book – not needed, just formatting\n",
|
||||
"plt.figure(figsize=(6, 4)) # extra code – not needed, just formatting\n",
|
||||
"plt.plot(X_new, y_predict, \"r-\", label=\"Predictions\")\n",
|
||||
"plt.plot(X, y, \"b.\")\n",
|
||||
"\n",
|
||||
"# not in the book – beautifies and saves Figure 4–2\n",
|
||||
"# extra code – beautifies and saves Figure 4–2\n",
|
||||
"plt.xlabel(\"$x_1$\")\n",
|
||||
"plt.ylabel(\"$y$\", rotation=0)\n",
|
||||
"plt.axis([0, 2, 0, 15])\n",
|
||||
@@ -327,7 +327,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – generates and saves Figure 4–8\n",
|
||||
"# extra code – generates and saves Figure 4–8\n",
|
||||
"\n",
|
||||
"import matplotlib as mpl\n",
|
||||
"\n",
|
||||
@@ -352,9 +352,9 @@
|
||||
" return theta_path\n",
|
||||
"\n",
|
||||
"np.random.seed(42)\n",
|
||||
"theta = np.random.randn(2,1) # random initialization\n",
|
||||
"theta = np.random.randn(2, 1) # random initialization\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(10,4))\n",
|
||||
"plt.figure(figsize=(10, 4))\n",
|
||||
"plt.subplot(131)\n",
|
||||
"plot_gradient_descent(theta, eta=0.02)\n",
|
||||
"plt.ylabel(\"$y$\", rotation=0)\n",
|
||||
@@ -381,8 +381,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"theta_path_sgd = [] # not in the book – we need to store the path of theta in\n",
|
||||
" # the parameter space to plot the next figure"
|
||||
"theta_path_sgd = [] # extra code – we need to store the path of theta in the\n",
|
||||
" # parameter space to plot the next figure"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -400,13 +400,13 @@
|
||||
"np.random.seed(42)\n",
|
||||
"theta = np.random.randn(2, 1) # random initialization\n",
|
||||
"\n",
|
||||
"n_shown = 20 # not in the book – just needed to generate the figure below\n",
|
||||
"plt.figure(figsize=(6, 4)) # not in the book – not needed, just formatting\n",
|
||||
"n_shown = 20 # extra code – just needed to generate the figure below\n",
|
||||
"plt.figure(figsize=(6, 4)) # extra code – not needed, just formatting\n",
|
||||
"\n",
|
||||
"for epoch in range(n_epochs):\n",
|
||||
" for iteration in range(m):\n",
|
||||
"\n",
|
||||
" # not in the book – these 4 lines are used to generate the figure\n",
|
||||
" # extra code – these 4 lines are used to generate the figure\n",
|
||||
" if epoch == 0 and iteration < n_shown:\n",
|
||||
" y_predict = X_new_b @ theta\n",
|
||||
" color = mpl.colors.rgb2hex(plt.cm.OrRd(iteration / n_shown + 0.15))\n",
|
||||
@@ -415,12 +415,12 @@
|
||||
" random_index = np.random.randint(m)\n",
|
||||
" xi = X_b[random_index : random_index + 1]\n",
|
||||
" yi = y[random_index : random_index + 1]\n",
|
||||
" gradients = 2 / 1 * xi.T @ (xi @ theta - yi)\n",
|
||||
" gradients = 2 * xi.T @ (xi @ theta - yi) # for SGD, do not divide by m\n",
|
||||
" eta = learning_schedule(epoch * m + iteration)\n",
|
||||
" theta = theta - eta * gradients\n",
|
||||
" theta_path_sgd.append(theta) # not in the book – to generate the figure\n",
|
||||
" theta_path_sgd.append(theta) # extra code – to generate the figure\n",
|
||||
"\n",
|
||||
"# not in the book – this section beautifies and saves Figure 4–10\n",
|
||||
"# extra code – this section beautifies and saves Figure 4–10\n",
|
||||
"plt.plot(X, y, \"b.\")\n",
|
||||
"plt.xlabel(\"$x_1$\")\n",
|
||||
"plt.ylabel(\"$y$\", rotation=0)\n",
|
||||
@@ -449,9 +449,9 @@
|
||||
"source": [
|
||||
"from sklearn.linear_model import SGDRegressor\n",
|
||||
"\n",
|
||||
"sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=0.1,\n",
|
||||
" random_state=42)\n",
|
||||
"sgd_reg.fit(X, y.ravel()) # y.ravel() because fit() expects 1D targets"
|
||||
"sgd_reg = SGDRegressor(max_iter=1000, tol=1e-5, penalty=None, eta0=0.01,\n",
|
||||
" n_iter_no_change=100, random_state=42)\n",
|
||||
"sgd_reg.fit(X, y.ravel()) # y.ravel() because fit() expects 1D targets\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -483,7 +483,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this cell generates and saves Figure 4–11\n",
|
||||
"# extra code – this cell generates and saves Figure 4–11\n",
|
||||
"\n",
|
||||
"from math import ceil\n",
|
||||
"\n",
|
||||
@@ -558,7 +558,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this cell generates and saves Figure 4–12\n",
|
||||
"# extra code – this cell generates and saves Figure 4–12\n",
|
||||
"plt.figure(figsize=(6, 4))\n",
|
||||
"plt.plot(X, y, \"b.\")\n",
|
||||
"plt.xlabel(\"$x_1$\")\n",
|
||||
@@ -608,7 +608,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this cell generates and saves Figure 4–13\n",
|
||||
"# extra code – this cell generates and saves Figure 4–13\n",
|
||||
"\n",
|
||||
"X_new = np.linspace(-3, 3, 100).reshape(100, 1)\n",
|
||||
"X_new_poly = poly_features.transform(X_new)\n",
|
||||
@@ -632,7 +632,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this cell generates and saves Figure 4–14\n",
|
||||
"# extra code – this cell generates and saves Figure 4–14\n",
|
||||
"\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"from sklearn.pipeline import make_pipeline\n",
|
||||
@@ -680,11 +680,11 @@
|
||||
"train_errors = -train_scores.mean(axis=1)\n",
|
||||
"valid_errors = -valid_scores.mean(axis=1)\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(6, 4)) # not in the book – not need, just formatting\n",
|
||||
"plt.figure(figsize=(6, 4)) # extra code – not need, just formatting\n",
|
||||
"plt.plot(train_sizes, train_errors, \"r-+\", linewidth=2, label=\"train\")\n",
|
||||
"plt.plot(train_sizes, valid_errors, \"b-\", linewidth=3, label=\"valid\")\n",
|
||||
"\n",
|
||||
"# not in the book – beautifies and saves Figure 4–15\n",
|
||||
"# extra code – beautifies and saves Figure 4–15\n",
|
||||
"plt.xlabel(\"Training set size\")\n",
|
||||
"plt.ylabel(\"RMSE\")\n",
|
||||
"plt.grid()\n",
|
||||
@@ -718,7 +718,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – generates and saves Figure 4–16\n",
|
||||
"# extra code – generates and saves Figure 4–16\n",
|
||||
"\n",
|
||||
"train_errors = -train_scores.mean(axis=1)\n",
|
||||
"valid_errors = -valid_scores.mean(axis=1)\n",
|
||||
@@ -762,7 +762,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – we've done this type of generation several times before\n",
|
||||
"# extra code – we've done this type of generation several times before\n",
|
||||
"np.random.seed(42)\n",
|
||||
"m = 20\n",
|
||||
"X = 3 * np.random.rand(m, 1)\n",
|
||||
@@ -776,7 +776,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – a quick peek at the dataset we just generated\n",
|
||||
"# extra code – a quick peek at the dataset we just generated\n",
|
||||
"plt.figure(figsize=(6, 4))\n",
|
||||
"plt.plot(X, y, \".\")\n",
|
||||
"plt.xlabel(\"$x_1$\")\n",
|
||||
@@ -794,7 +794,7 @@
|
||||
"source": [
|
||||
"from sklearn.linear_model import Ridge\n",
|
||||
"\n",
|
||||
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\")\n",
|
||||
"ridge_reg = Ridge(alpha=0.1, solver=\"cholesky\")\n",
|
||||
"ridge_reg.fit(X, y)\n",
|
||||
"ridge_reg.predict([[1.5]])"
|
||||
]
|
||||
@@ -805,7 +805,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this cell generates and saves Figure 4–17\n",
|
||||
"# extra code – this cell generates and saves Figure 4–17\n",
|
||||
"\n",
|
||||
"def plot_model(model_class, polynomial, alphas, **model_kargs):\n",
|
||||
" plt.plot(X, y, \"b.\", linewidth=3)\n",
|
||||
@@ -845,8 +845,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sgd_reg = SGDRegressor(penalty=\"l2\", random_state=42)\n",
|
||||
"sgd_reg.fit(X, y.ravel())\n",
|
||||
"sgd_reg = SGDRegressor(penalty=\"l2\", alpha=0.1 / m, tol=None,\n",
|
||||
" max_iter=1000, eta0=0.01, random_state=42)\n",
|
||||
"sgd_reg.fit(X, y.ravel()) # y.ravel() because fit() expects 1D targets\n",
|
||||
"sgd_reg.predict([[1.5]])"
|
||||
]
|
||||
},
|
||||
@@ -856,13 +857,36 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – show that we get roughly the same solution as earlier when\n",
|
||||
"# we use Stochastic Average GD (solver=\"sag\")\n",
|
||||
"ridge_reg = Ridge(alpha=1, solver=\"sag\", random_state=42)\n",
|
||||
"# extra code – show that we get roughly the same solution as earlier when\n",
|
||||
"# we use Stochastic Average GD (solver=\"sag\")\n",
|
||||
"ridge_reg = Ridge(alpha=0.1, solver=\"sag\", random_state=42)\n",
|
||||
"ridge_reg.fit(X, y)\n",
|
||||
"ridge_reg.predict([[1.5]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# extra code – shows the closed form solution of Ridge regression,\n",
|
||||
"# compare with the next Ridge model's learned parameters below\n",
|
||||
"alpha = 0.1\n",
|
||||
"A = np.array([[0., 0.], [0., 1.]])\n",
|
||||
"X_b = np.c_[np.ones(m), X]\n",
|
||||
"np.linalg.inv(X_b.T @ X_b + alpha * A) @ X_b.T @ y"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ridge_reg.intercept_, ridge_reg.coef_ # extra code"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -872,7 +896,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -885,11 +909,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this cell generates and saves Figure 4–18\n",
|
||||
"# extra code – this cell generates and saves Figure 4–18\n",
|
||||
"plt.figure(figsize=(9, 3.5))\n",
|
||||
"plt.subplot(121)\n",
|
||||
"plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1), random_state=42)\n",
|
||||
@@ -903,11 +927,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this BIG cell generates and saves Figure 4–19\n",
|
||||
"# extra code – this BIG cell generates and saves Figure 4–19\n",
|
||||
"\n",
|
||||
"t1a, t1b, t2a, t2b = -1, 3, -1.5, 1.5\n",
|
||||
"\n",
|
||||
@@ -996,7 +1020,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1023,22 +1047,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this is the same code as earlier\n",
|
||||
"np.random.seed(42)\n",
|
||||
"m = 100\n",
|
||||
"X = 6 * np.random.rand(m, 1) - 3\n",
|
||||
"y = 0.5 * X ** 2 + X + 2 + np.random.randn(m, 1)\n",
|
||||
"X_train, y_train = X[: m // 2], y[: m // 2, 0]\n",
|
||||
"X_valid, y_valid = X[m // 2 :], y[m // 2 :, 0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1046,6 +1055,14 @@
|
||||
"from sklearn.metrics import mean_squared_error\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"\n",
|
||||
"# extra code – creates the same quadratic dataset as earlier and splits it\n",
|
||||
"np.random.seed(42)\n",
|
||||
"m = 100\n",
|
||||
"X = 6 * np.random.rand(m, 1) - 3\n",
|
||||
"y = 0.5 * X ** 2 + X + 2 + np.random.randn(m, 1)\n",
|
||||
"X_train, y_train = X[: m // 2], y[: m // 2, 0]\n",
|
||||
"X_valid, y_valid = X[m // 2 :], y[m // 2 :, 0]\n",
|
||||
"\n",
|
||||
"preprocessing = make_pipeline(PolynomialFeatures(degree=90, include_bias=False),\n",
|
||||
" StandardScaler())\n",
|
||||
"X_train_prep = preprocessing.fit_transform(X_train)\n",
|
||||
@@ -1053,7 +1070,7 @@
|
||||
"sgd_reg = SGDRegressor(penalty=None, eta0=0.002, random_state=42)\n",
|
||||
"n_epochs = 500\n",
|
||||
"best_valid_rmse = float('inf')\n",
|
||||
"train_errors, val_errors = [], [] # not in the book – it's for the figure below\n",
|
||||
"train_errors, val_errors = [], [] # extra code – it's for the figure below\n",
|
||||
"\n",
|
||||
"for epoch in range(n_epochs):\n",
|
||||
" sgd_reg.partial_fit(X_train_prep, y_train)\n",
|
||||
@@ -1063,13 +1080,13 @@
|
||||
" best_valid_rmse = val_error\n",
|
||||
" best_model = deepcopy(sgd_reg)\n",
|
||||
"\n",
|
||||
" # not in the book – we evaluate the train error and save it for the figure\n",
|
||||
" # extra code – we evaluate the train error and save it for the figure\n",
|
||||
" y_train_predict = sgd_reg.predict(X_train_prep)\n",
|
||||
" train_error = mean_squared_error(y_train, y_train_predict, squared=False)\n",
|
||||
" val_errors.append(val_error)\n",
|
||||
" train_errors.append(train_error)\n",
|
||||
"\n",
|
||||
"# not in the book – this section generates and saves Figure 4–20\n",
|
||||
"# extra code – this section generates and saves Figure 4–20\n",
|
||||
"best_epoch = np.argmin(val_errors)\n",
|
||||
"plt.figure(figsize=(6, 4))\n",
|
||||
"plt.annotate('Best model',\n",
|
||||
@@ -1106,11 +1123,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – generates and saves Figure 4–21\n",
|
||||
"# extra code – generates and saves Figure 4–21\n",
|
||||
"\n",
|
||||
"lim = 6\n",
|
||||
"t = np.linspace(-lim, lim, 100)\n",
|
||||
@@ -1140,7 +1157,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1150,22 +1167,13 @@
|
||||
"list(iris)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(iris.DESCR) # not in the book – it's a bit too long"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris.data.head(3)"
|
||||
"print(iris.DESCR) # extra code – it's a bit too long"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1174,7 +1182,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris.target.head(3) # note that the instances are not shuffled"
|
||||
"iris.data.head(3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1183,7 +1191,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris.target_names"
|
||||
"iris.target.head(3) # note that the instances are not shuffled"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1191,6 +1199,15 @@
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris.target_names"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
@@ -1205,7 +1222,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1213,14 +1230,14 @@
|
||||
"y_proba = log_reg.predict_proba(X_new)\n",
|
||||
"decision_boundary = X_new[y_proba[:, 1] >= 0.5][0, 0]\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(8, 3)) # not in the book – not needed, just formatting\n",
|
||||
"plt.figure(figsize=(8, 3)) # extra code – not needed, just formatting\n",
|
||||
"plt.plot(X_new, y_proba[:, 0], \"b--\", linewidth=2,\n",
|
||||
" label=\"Not Iris virginica proba\")\n",
|
||||
"plt.plot(X_new, y_proba[:, 1], \"g-\", linewidth=2, label=\"Iris virginica proba\")\n",
|
||||
"plt.plot([decision_boundary, decision_boundary], [0, 1], \"k:\", linewidth=2,\n",
|
||||
" label=\"Decision boundary\")\n",
|
||||
"\n",
|
||||
"# not in the book – this section beautifies and saves Figure 4–21\n",
|
||||
"# extra code – this section beautifies and saves Figure 4–21\n",
|
||||
"plt.arrow(x=decision_boundary, y=0.08, dx=-0.3, dy=0,\n",
|
||||
" head_width=0.05, head_length=0.1, fc=\"b\", ec=\"b\")\n",
|
||||
"plt.arrow(x=decision_boundary, y=0.92, dx=0.3, dy=0,\n",
|
||||
@@ -1239,7 +1256,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1248,7 +1265,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1257,11 +1274,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this cell generates and saves Figure 4–22\n",
|
||||
"# extra code – this cell generates and saves Figure 4–22\n",
|
||||
"\n",
|
||||
"X = iris.data[[\"petal length (cm)\", \"petal width (cm)\"]].values\n",
|
||||
"y = iris.target_names[iris.target] == 'virginica'\n",
|
||||
@@ -1307,7 +1324,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1321,7 +1338,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 59,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -1332,7 +1349,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 60,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -1343,11 +1360,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# not in the book – this cell generates and saves Figure 4–23\n",
|
||||
"# extra code – this cell generates and saves Figure 4–23\n",
|
||||
"\n",
|
||||
"from matplotlib.colors import ListedColormap\n",
|
||||
"\n",
|
||||
@@ -1419,7 +1436,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 12. Batch Gradient Descent with early stopping for Softmax Regression\n",
|
||||
"Exercise: _Implement Batch Gradient Descent with early stopping for Softmax Regression without using Scikit-Learn, only NumPy._"
|
||||
"Exercise: _Implement Batch Gradient Descent with early stopping for Softmax Regression without using Scikit-Learn, only NumPy. Use it on a classification task such as the iris dataset._"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1431,7 +1448,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1448,7 +1465,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1464,7 +1481,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1491,12 +1508,12 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The targets are currently class indices (0, 1 or 2), but we need target class probabilities to train the Softmax Regression model. Each instance will have target class probabilities equal to 0.0 for all classes except for the target class which will have a probability of 1.0 (in other words, the vector of class probabilities for any given instance is a one-hot vector). Let's write a small function to convert the vector of class indices into a matrix containing a one-hot vector for each instance. To understand this code, you need to know that `np.diag(np.ones(n))` creates an n×n matrix full of 0s except for 1s on the main diagonal. Moreover, if `a` in a NumPy array, then `a[[1,3,2]]` returns an array with 3 rows equal to `a[1]`, `a[3]` and `a[2]` (this is [advanced NumPy indexing](https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing))."
|
||||
"The targets are currently class indices (0, 1 or 2), but we need target class probabilities to train the Softmax Regression model. Each instance will have target class probabilities equal to 0.0 for all classes except for the target class which will have a probability of 1.0 (in other words, the vector of class probabilities for any given instance is a one-hot vector). Let's write a small function to convert the vector of class indices into a matrix containing a one-hot vector for each instance. To understand this code, you need to know that `np.diag(np.ones(n))` creates an n×n matrix full of 0s except for 1s on the main diagonal. Moreover, if `a` in a NumPy array, then `a[[1, 3, 2]]` returns an array with 3 rows equal to `a[1]`, `a[3]` and `a[2]` (this is [advanced NumPy indexing](https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing))."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1513,7 +1530,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1522,7 +1539,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 67,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1538,7 +1555,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 68,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1556,7 +1573,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 69,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1578,7 +1595,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 70,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1597,7 +1614,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 71,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1625,7 +1642,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"execution_count": 72,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1658,7 +1675,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1674,7 +1691,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 74,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1695,7 +1712,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 75,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1732,7 +1749,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1760,7 +1777,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1797,7 +1814,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1825,11 +1842,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_cmap = mpl.colors.ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n",
|
||||
"custom_cmap = mpl.colors.ListedColormap(['#fafab0', '#9898ff', '#a0faa0'])\n",
|
||||
"\n",
|
||||
"x0, x1 = np.meshgrid(np.linspace(0, 8, 500).reshape(-1, 1),\n",
|
||||
" np.linspace(0, 3.5, 200).reshape(-1, 1))\n",
|
||||
@@ -1869,7 +1886,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"execution_count": 80,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
||||
Reference in New Issue
Block a user