Sync notebook with book's code examples, and better identify extra code

2026-01-28 18:50:26 +01:00 · 2022-02-19 18:17:36 +13:00
parent 1c2421fc88
commit b63019fd28
9 changed files with 318 additions and 301 deletions
--- a/09_unsupervised_learning.ipynb
+++ b/09_unsupervised_learning.ipynb
@@ -91,8 +91,8 @@
    "plt.rc('font', size=14)\n",
    "plt.rc('axes', labelsize=14, titlesize=14)\n",
    "plt.rc('legend', fontsize=14)\n",
-    "plt.rc('xtick',labelsize=10)\n",
-    "plt.rc('ytick',labelsize=10)"
+    "plt.rc('xtick', labelsize=10)\n",
+    "plt.rc('ytick', labelsize=10)"
   ]
  },
  {
@@ -147,7 +147,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–1\n",
+    "# extra code – this cell generates and saves Figure 9–1\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.datasets import load_iris\n",
@@ -192,7 +192,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book\n",
+    "# extra code\n",
    "\n",
    "import numpy as np\n",
    "from scipy import stats\n",
@@ -263,7 +263,7 @@
    "from sklearn.cluster import KMeans\n",
    "from sklearn.datasets import make_blobs\n",
    "\n",
-    "# not in the book – the exact arguments of make_blobs() are not important\n",
+    "# extra code – the exact arguments of make_blobs() are not important\n",
    "blob_centers = np.array([[ 0.2,  2.3], [-1.5 ,  2.3], [-2.8,  1.8],\n",
    "                         [-2.8,  2.8], [-2.8,  1.3]])\n",
    "blob_std = np.array([0.4, 0.3, 0.1, 0.1, 0.1])\n",
@@ -288,7 +288,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–2\n",
+    "# extra code – this cell generates and saves Figure 9–2\n",
    "\n",
    "def plot_clusters(X, y=None):\n",
    "    plt.scatter(X[:, 0], X[:, 1], c=y, s=1)\n",
@@ -399,7 +399,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–3\n",
+    "# extra code – this cell generates and saves Figure 9–3\n",
    "\n",
    "def plot_data(X):\n",
    "    plt.plot(X[:, 0], X[:, 1], 'k.', markersize=2)\n",
@@ -489,7 +489,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book\n",
+    "# extra code\n",
    "np.linalg.norm(np.tile(X_new, (1, k)).reshape(-1, k, 2)\n",
    "               - kmeans.cluster_centers_, axis=2).round(2)"
   ]
@@ -532,7 +532,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–4\n",
+    "# extra code – this cell generates and saves Figure 9–4\n",
    "\n",
    "kmeans_iter1 = KMeans(n_clusters=5, init=\"random\", n_init=1, max_iter=1,\n",
    "                      random_state=5)\n",
@@ -600,7 +600,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–5\n",
+    "# extra code – this cell generates and saves Figure 9–5\n",
    "\n",
    "def plot_clusterer_comparison(clusterer1, clusterer2, X, title1=None,\n",
    "                              title2=None):\n",
@@ -647,7 +647,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book\n",
+    "# extra code\n",
    "plt.figure(figsize=(8, 4))\n",
    "plot_decision_boundaries(kmeans, X)"
   ]
@@ -681,7 +681,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "kmeans_rnd_init1.inertia_  # not in the book"
+    "kmeans_rnd_init1.inertia_  # extra code"
   ]
  },
  {
@@ -690,7 +690,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "kmeans_rnd_init2.inertia_  # not in the book"
+    "kmeans_rnd_init2.inertia_  # extra code"
   ]
  },
  {
@@ -706,7 +706,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book\n",
+    "# extra code\n",
    "X_dist = kmeans.transform(X)\n",
    "(X_dist[np.arange(len(X_dist)), kmeans.labels_] ** 2).sum()"
   ]
@@ -754,7 +754,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book\n",
+    "# extra code\n",
    "kmeans_rnd_10_inits = KMeans(n_clusters=5, init=\"random\", n_init=10,\n",
    "                             random_state=2)\n",
    "kmeans_rnd_10_inits.fit(X)"
@@ -773,7 +773,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book\n",
+    "# extra code\n",
    "plt.figure(figsize=(8, 4))\n",
    "plot_decision_boundaries(kmeans_rnd_10_inits, X)\n",
    "plt.show()"
@@ -964,7 +964,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–6\n",
+    "# extra code – this cell generates and saves Figure 9–6\n",
    "\n",
    "from timeit import timeit\n",
    "\n",
@@ -981,7 +981,7 @@
    "    inertias[k - 1, 0] = kmeans_.inertia_\n",
    "    inertias[k - 1, 1] = minibatch_kmeans.inertia_\n",
    "\n",
-    "plt.figure(figsize=(10,4))\n",
+    "plt.figure(figsize=(10, 4))\n",
    "\n",
    "plt.subplot(121)\n",
    "plt.plot(range(1, max_k + 1), inertias[:, 0], \"r--\", label=\"K-Means\")\n",
@@ -1024,7 +1024,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–7\n",
+    "# extra code – this cell generates and saves Figure 9–7\n",
    "\n",
    "kmeans_k3 = KMeans(n_clusters=3, random_state=42)\n",
    "kmeans_k8 = KMeans(n_clusters=8, random_state=42)\n",
@@ -1072,7 +1072,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–8\n",
+    "# extra code – this cell generates and saves Figure 9–8\n",
    "\n",
    "kmeans_per_k = [KMeans(n_clusters=k, random_state=42).fit(X)\n",
    "                for k in range(1, 10)]\n",
@@ -1104,7 +1104,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book\n",
+    "# extra code\n",
    "plot_decision_boundaries(kmeans_per_k[4 - 1], X)\n",
    "plt.show()"
   ]
@@ -1147,7 +1147,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–9\n",
+    "# extra code – this cell generates and saves Figure 9–9\n",
    "\n",
    "silhouette_scores = [silhouette_score(X, model.labels_)\n",
    "                     for model in kmeans_per_k[1:]]\n",
@@ -1182,7 +1182,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–10\n",
+    "# extra code – this cell generates and saves Figure 9–10\n",
    "\n",
    "from sklearn.metrics import silhouette_samples\n",
    "from matplotlib.ticker import FixedLocator, FixedFormatter\n",
@@ -1253,7 +1253,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–11\n",
+    "# extra code – this cell generates and saves Figure 9–11\n",
    "\n",
    "X1, y1 = make_blobs(n_samples=1000, centers=((4, -4), (0, 0)), random_state=42)\n",
    "X1 = X1.dot(np.array([[0.374, 0.95], [0.732, 0.598]]))\n",
@@ -1303,7 +1303,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book\n",
+    "# extra code\n",
    "\n",
    "root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
    "filename = \"ladybug.png\"\n",
@@ -1344,7 +1344,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–12\n",
+    "# extra code – this cell generates and saves Figure 9–12\n",
    "\n",
    "segmented_imgs = []\n",
    "n_colors = (10, 8, 6, 4, 2)\n",
@@ -1353,7 +1353,7 @@
    "    segmented_img = kmeans.cluster_centers_[kmeans.labels_]\n",
    "    segmented_imgs.append(segmented_img.reshape(image.shape))\n",
    "\n",
-    "plt.figure(figsize=(10,5))\n",
+    "plt.figure(figsize=(10, 5))\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.1)\n",
    "\n",
    "plt.subplot(2, 3, 1)\n",
@@ -1367,7 +1367,7 @@
    "    plt.title(f\"{n_clusters} colors\")\n",
    "    plt.axis('off')\n",
    "\n",
-    "save_fig('image_segmentation_diagram', tight_layout=False)\n",
+    "save_fig('image_segmentation_plot', tight_layout=False)\n",
    "plt.show()"
   ]
  },
@@ -1440,7 +1440,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – measure the accuracy when we use the whole training set\n",
+    "# extra code – measure the accuracy when we use the whole training set\n",
    "log_reg_full = LogisticRegression(max_iter=10_000)\n",
    "log_reg_full.fit(X_train, y_train)\n",
    "log_reg_full.score(X_test, y_test)"
@@ -1479,7 +1479,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this cell generates and saves Figure 9–13\n",
+    "# extra code – this cell generates and saves Figure 9–13\n",
    "\n",
    "plt.figure(figsize=(8, 2))\n",
    "for index, X_representative_digit in enumerate(X_representative_digits):\n",
@@ -1488,7 +1488,7 @@
    "               interpolation=\"bilinear\")\n",
    "    plt.axis('off')\n",
    "\n",
-    "save_fig(\"representative_images_diagram\", tight_layout=False)\n",
+    "save_fig(\"representative_images_plot\", tight_layout=False)\n",
    "plt.show()"
   ]
  },
@@ -1694,7 +1694,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this cell generates and saves Figure 9–14\n",
+    "# extra code – this cell generates and saves Figure 9–14\n",
    "\n",
    "def plot_dbscan(dbscan, X, size, show_xlabels=True, show_ylabels=True):\n",
    "    core_mask = np.zeros_like(dbscan.labels_, dtype=bool)\n",
@@ -1747,7 +1747,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "dbscan = dbscan2  # not in the book – the text says we now use eps=0.2"
+    "dbscan = dbscan2  # extra code – the text says we now use eps=0.2"
   ]
  },
  {
@@ -1787,7 +1787,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this cell generates and saves Figure 9–15\n",
+    "# extra code – this cell generates and saves Figure 9–15\n",
    "\n",
    "plt.figure(figsize=(6, 3))\n",
    "plot_decision_boundaries(knn, X, show_centroids=False)\n",
@@ -2172,7 +2172,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – bonus material\n",
+    "# extra code – bonus material\n",
    "\n",
    "resolution = 100\n",
    "grid = np.arange(-10, 10, 1 / resolution)\n",
@@ -2197,7 +2197,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this cells generates and saves Figure 9–16\n",
+    "# extra code – this cells generates and saves Figure 9–16\n",
    "\n",
    "from matplotlib.colors import LogNorm\n",
    "\n",
@@ -2256,7 +2256,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–17\n",
+    "# extra code – this cell generates and saves Figure 9–17\n",
    "\n",
    "gm_full = GaussianMixture(n_components=3, n_init=10,\n",
    "                          covariance_type=\"full\", random_state=42)\n",
@@ -2294,7 +2294,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – comparing covariance_type=\"full\" and covariance_type=\"diag\"\n",
+    "# extra code – comparing covariance_type=\"full\" and covariance_type=\"diag\"\n",
    "compare_gaussian_mixtures(gm_full, gm_diag, X)\n",
    "plt.tight_layout()\n",
    "plt.show()"
@@ -2331,7 +2331,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this code generates and saves Figure 9–18\n",
+    "# extra code – this cell generates and saves Figure 9–18\n",
    "\n",
    "plt.figure(figsize=(8, 4))\n",
    "\n",
@@ -2373,7 +2373,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this cell generates and saves Figure 9–19\n",
+    "# extra code – this cell generates and saves Figure 9–19\n",
    "\n",
    "from scipy.stats import norm\n",
    "\n",
@@ -2387,7 +2387,7 @@
    "stds = np.linspace(stds_range[0], stds_range[1], 501)\n",
    "Xs, Stds = np.meshgrid(xs, stds)\n",
    "Z = 2 * norm.pdf(Xs - 1.0, 0, Stds) + norm.pdf(Xs + 4.0, 0, Stds)\n",
-    "Z = Z / Z.sum(axis=1)[:,np.newaxis] / (xs[1] - xs[0])\n",
+    "Z = Z / Z.sum(axis=1)[:, np.newaxis] / (xs[1] - xs[0])\n",
    "\n",
    "x_example_idx = (xs >= x_val).argmax()  # index of the first value >= x_val\n",
    "max_idx = Z[:, x_example_idx].argmax()\n",
@@ -2479,7 +2479,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – bonus material\n",
+    "# extra code – bonus material\n",
    "n_clusters = 3\n",
    "n_dims = 2\n",
    "n_params_for_weights = n_clusters - 1\n",
@@ -2514,7 +2514,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this cell generates and saves Figure 9–20\n",
+    "# extra code – this cell generates and saves Figure 9–20\n",
    "\n",
    "gms_per_k = [GaussianMixture(n_components=k, n_init=10, random_state=42).fit(X)\n",
    "             for k in range(1, 11)]\n",
@@ -2576,7 +2576,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this figure is almost identical to Figure 9–16\n",
+    "# extra code – this figure is almost identical to Figure 9–16\n",
    "plt.figure(figsize=(8, 5))\n",
    "plot_gaussian_mixture(bgm, X)\n",
    "plt.show()"
@@ -2588,7 +2588,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# not in the book – this cell generates and saves Figure 9–21\n",
+    "# extra code – this cell generates and saves Figure 9–21\n",
    "\n",
    "X_moons, y_moons = make_moons(n_samples=1000, noise=0.05, random_state=42)\n",
    "\n",