Sync notebook with book's code examples, and better identify extra code

This commit is contained in:
Aurélien Geron
2022-02-19 18:17:36 +13:00
parent 1c2421fc88
commit b63019fd28
9 changed files with 318 additions and 301 deletions

View File

@@ -91,8 +91,8 @@
"plt.rc('font', size=14)\n",
"plt.rc('axes', labelsize=14, titlesize=14)\n",
"plt.rc('legend', fontsize=14)\n",
"plt.rc('xtick',labelsize=10)\n",
"plt.rc('ytick',labelsize=10)"
"plt.rc('xtick', labelsize=10)\n",
"plt.rc('ytick', labelsize=10)"
]
},
{
@@ -147,7 +147,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 91\n",
"# extra code this cell generates and saves Figure 91\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.datasets import load_iris\n",
@@ -192,7 +192,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"\n",
"import numpy as np\n",
"from scipy import stats\n",
@@ -263,7 +263,7 @@
"from sklearn.cluster import KMeans\n",
"from sklearn.datasets import make_blobs\n",
"\n",
"# not in the book the exact arguments of make_blobs() are not important\n",
"# extra code the exact arguments of make_blobs() are not important\n",
"blob_centers = np.array([[ 0.2, 2.3], [-1.5 , 2.3], [-2.8, 1.8],\n",
" [-2.8, 2.8], [-2.8, 1.3]])\n",
"blob_std = np.array([0.4, 0.3, 0.1, 0.1, 0.1])\n",
@@ -288,7 +288,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 92\n",
"# extra code this cell generates and saves Figure 92\n",
"\n",
"def plot_clusters(X, y=None):\n",
" plt.scatter(X[:, 0], X[:, 1], c=y, s=1)\n",
@@ -399,7 +399,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 93\n",
"# extra code this cell generates and saves Figure 93\n",
"\n",
"def plot_data(X):\n",
" plt.plot(X[:, 0], X[:, 1], 'k.', markersize=2)\n",
@@ -489,7 +489,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"np.linalg.norm(np.tile(X_new, (1, k)).reshape(-1, k, 2)\n",
" - kmeans.cluster_centers_, axis=2).round(2)"
]
@@ -532,7 +532,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 94\n",
"# extra code this cell generates and saves Figure 94\n",
"\n",
"kmeans_iter1 = KMeans(n_clusters=5, init=\"random\", n_init=1, max_iter=1,\n",
" random_state=5)\n",
@@ -600,7 +600,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 95\n",
"# extra code this cell generates and saves Figure 95\n",
"\n",
"def plot_clusterer_comparison(clusterer1, clusterer2, X, title1=None,\n",
" title2=None):\n",
@@ -647,7 +647,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"plt.figure(figsize=(8, 4))\n",
"plot_decision_boundaries(kmeans, X)"
]
@@ -681,7 +681,7 @@
"metadata": {},
"outputs": [],
"source": [
"kmeans_rnd_init1.inertia_ # not in the book"
"kmeans_rnd_init1.inertia_ # extra code"
]
},
{
@@ -690,7 +690,7 @@
"metadata": {},
"outputs": [],
"source": [
"kmeans_rnd_init2.inertia_ # not in the book"
"kmeans_rnd_init2.inertia_ # extra code"
]
},
{
@@ -706,7 +706,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"X_dist = kmeans.transform(X)\n",
"(X_dist[np.arange(len(X_dist)), kmeans.labels_] ** 2).sum()"
]
@@ -754,7 +754,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"kmeans_rnd_10_inits = KMeans(n_clusters=5, init=\"random\", n_init=10,\n",
" random_state=2)\n",
"kmeans_rnd_10_inits.fit(X)"
@@ -773,7 +773,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"plt.figure(figsize=(8, 4))\n",
"plot_decision_boundaries(kmeans_rnd_10_inits, X)\n",
"plt.show()"
@@ -964,7 +964,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 96\n",
"# extra code this cell generates and saves Figure 96\n",
"\n",
"from timeit import timeit\n",
"\n",
@@ -981,7 +981,7 @@
" inertias[k - 1, 0] = kmeans_.inertia_\n",
" inertias[k - 1, 1] = minibatch_kmeans.inertia_\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"plt.figure(figsize=(10, 4))\n",
"\n",
"plt.subplot(121)\n",
"plt.plot(range(1, max_k + 1), inertias[:, 0], \"r--\", label=\"K-Means\")\n",
@@ -1024,7 +1024,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 97\n",
"# extra code this cell generates and saves Figure 97\n",
"\n",
"kmeans_k3 = KMeans(n_clusters=3, random_state=42)\n",
"kmeans_k8 = KMeans(n_clusters=8, random_state=42)\n",
@@ -1072,7 +1072,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 98\n",
"# extra code this cell generates and saves Figure 98\n",
"\n",
"kmeans_per_k = [KMeans(n_clusters=k, random_state=42).fit(X)\n",
" for k in range(1, 10)]\n",
@@ -1104,7 +1104,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"plot_decision_boundaries(kmeans_per_k[4 - 1], X)\n",
"plt.show()"
]
@@ -1147,7 +1147,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 99\n",
"# extra code this cell generates and saves Figure 99\n",
"\n",
"silhouette_scores = [silhouette_score(X, model.labels_)\n",
" for model in kmeans_per_k[1:]]\n",
@@ -1182,7 +1182,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 910\n",
"# extra code this cell generates and saves Figure 910\n",
"\n",
"from sklearn.metrics import silhouette_samples\n",
"from matplotlib.ticker import FixedLocator, FixedFormatter\n",
@@ -1253,7 +1253,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 911\n",
"# extra code this cell generates and saves Figure 911\n",
"\n",
"X1, y1 = make_blobs(n_samples=1000, centers=((4, -4), (0, 0)), random_state=42)\n",
"X1 = X1.dot(np.array([[0.374, 0.95], [0.732, 0.598]]))\n",
@@ -1303,7 +1303,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"\n",
"root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
"filename = \"ladybug.png\"\n",
@@ -1344,7 +1344,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 912\n",
"# extra code this cell generates and saves Figure 912\n",
"\n",
"segmented_imgs = []\n",
"n_colors = (10, 8, 6, 4, 2)\n",
@@ -1353,7 +1353,7 @@
" segmented_img = kmeans.cluster_centers_[kmeans.labels_]\n",
" segmented_imgs.append(segmented_img.reshape(image.shape))\n",
"\n",
"plt.figure(figsize=(10,5))\n",
"plt.figure(figsize=(10, 5))\n",
"plt.subplots_adjust(wspace=0.05, hspace=0.1)\n",
"\n",
"plt.subplot(2, 3, 1)\n",
@@ -1367,7 +1367,7 @@
" plt.title(f\"{n_clusters} colors\")\n",
" plt.axis('off')\n",
"\n",
"save_fig('image_segmentation_diagram', tight_layout=False)\n",
"save_fig('image_segmentation_plot', tight_layout=False)\n",
"plt.show()"
]
},
@@ -1440,7 +1440,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book measure the accuracy when we use the whole training set\n",
"# extra code measure the accuracy when we use the whole training set\n",
"log_reg_full = LogisticRegression(max_iter=10_000)\n",
"log_reg_full.fit(X_train, y_train)\n",
"log_reg_full.score(X_test, y_test)"
@@ -1479,7 +1479,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 913\n",
"# extra code this cell generates and saves Figure 913\n",
"\n",
"plt.figure(figsize=(8, 2))\n",
"for index, X_representative_digit in enumerate(X_representative_digits):\n",
@@ -1488,7 +1488,7 @@
" interpolation=\"bilinear\")\n",
" plt.axis('off')\n",
"\n",
"save_fig(\"representative_images_diagram\", tight_layout=False)\n",
"save_fig(\"representative_images_plot\", tight_layout=False)\n",
"plt.show()"
]
},
@@ -1694,7 +1694,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 914\n",
"# extra code this cell generates and saves Figure 914\n",
"\n",
"def plot_dbscan(dbscan, X, size, show_xlabels=True, show_ylabels=True):\n",
" core_mask = np.zeros_like(dbscan.labels_, dtype=bool)\n",
@@ -1747,7 +1747,7 @@
"metadata": {},
"outputs": [],
"source": [
"dbscan = dbscan2 # not in the book the text says we now use eps=0.2"
"dbscan = dbscan2 # extra code the text says we now use eps=0.2"
]
},
{
@@ -1787,7 +1787,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 915\n",
"# extra code this cell generates and saves Figure 915\n",
"\n",
"plt.figure(figsize=(6, 3))\n",
"plot_decision_boundaries(knn, X, show_centroids=False)\n",
@@ -2172,7 +2172,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book bonus material\n",
"# extra code bonus material\n",
"\n",
"resolution = 100\n",
"grid = np.arange(-10, 10, 1 / resolution)\n",
@@ -2197,7 +2197,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cells generates and saves Figure 916\n",
"# extra code this cells generates and saves Figure 916\n",
"\n",
"from matplotlib.colors import LogNorm\n",
"\n",
@@ -2256,7 +2256,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 917\n",
"# extra code this cell generates and saves Figure 917\n",
"\n",
"gm_full = GaussianMixture(n_components=3, n_init=10,\n",
" covariance_type=\"full\", random_state=42)\n",
@@ -2294,7 +2294,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book comparing covariance_type=\"full\" and covariance_type=\"diag\"\n",
"# extra code comparing covariance_type=\"full\" and covariance_type=\"diag\"\n",
"compare_gaussian_mixtures(gm_full, gm_diag, X)\n",
"plt.tight_layout()\n",
"plt.show()"
@@ -2331,7 +2331,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 918\n",
"# extra code this cell generates and saves Figure 918\n",
"\n",
"plt.figure(figsize=(8, 4))\n",
"\n",
@@ -2373,7 +2373,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 919\n",
"# extra code this cell generates and saves Figure 919\n",
"\n",
"from scipy.stats import norm\n",
"\n",
@@ -2387,7 +2387,7 @@
"stds = np.linspace(stds_range[0], stds_range[1], 501)\n",
"Xs, Stds = np.meshgrid(xs, stds)\n",
"Z = 2 * norm.pdf(Xs - 1.0, 0, Stds) + norm.pdf(Xs + 4.0, 0, Stds)\n",
"Z = Z / Z.sum(axis=1)[:,np.newaxis] / (xs[1] - xs[0])\n",
"Z = Z / Z.sum(axis=1)[:, np.newaxis] / (xs[1] - xs[0])\n",
"\n",
"x_example_idx = (xs >= x_val).argmax() # index of the first value >= x_val\n",
"max_idx = Z[:, x_example_idx].argmax()\n",
@@ -2479,7 +2479,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book bonus material\n",
"# extra code bonus material\n",
"n_clusters = 3\n",
"n_dims = 2\n",
"n_params_for_weights = n_clusters - 1\n",
@@ -2514,7 +2514,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 920\n",
"# extra code this cell generates and saves Figure 920\n",
"\n",
"gms_per_k = [GaussianMixture(n_components=k, n_init=10, random_state=42).fit(X)\n",
" for k in range(1, 11)]\n",
@@ -2576,7 +2576,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this figure is almost identical to Figure 916\n",
"# extra code this figure is almost identical to Figure 916\n",
"plt.figure(figsize=(8, 5))\n",
"plot_gaussian_mixture(bgm, X)\n",
"plt.show()"
@@ -2588,7 +2588,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 921\n",
"# extra code this cell generates and saves Figure 921\n",
"\n",
"X_moons, y_moons = make_moons(n_samples=1000, noise=0.05, random_state=42)\n",
"\n",