diff --git a/09_unsupervised_learning.ipynb b/09_unsupervised_learning.ipynb index ad9a3b8..1078195 100644 --- a/09_unsupervised_learning.ipynb +++ b/09_unsupervised_learning.ipynb @@ -1660,7 +1660,8 @@ "metadata": {}, "outputs": [], "source": [ - "log_reg.score(X_test, y_test)" + "log_reg_score = log_reg.score(X_test, y_test)\n", + "log_reg_score" ] }, { @@ -1698,7 +1699,15 @@ "metadata": {}, "outputs": [], "source": [ - "pipeline.score(X_test, y_test)" + "pipeline_score = pipeline.score(X_test, y_test)\n", + "pipeline_score" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How much did the error rate drop?" ] }, { @@ -1707,14 +1716,14 @@ "metadata": {}, "outputs": [], "source": [ - "1 - (1 - 0.977777) / (1 - 0.968888)" + "1 - (1 - pipeline_score) / (1 - log_reg_score)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "How about that? We reduced the error rate by over 28%! But we chose the number of clusters $k$ completely arbitrarily, we can surely do better. Since K-Means is just a preprocessing step in a classification pipeline, finding a good value for $k$ is much simpler than earlier: there's no need to perform silhouette analysis or minimize the inertia, the best value of $k$ is simply the one that results in the best classification performance." + "How about that? We reduced the error rate by over 35%! But we chose the number of clusters $k$ completely arbitrarily, we can surely do better. Since K-Means is just a preprocessing step in a classification pipeline, finding a good value for $k$ is much simpler than earlier: there's no need to perform silhouette analysis or minimize the inertia, the best value of $k$ is simply the one that results in the best classification performance." ] }, { @@ -3473,11 +3482,12 @@ "outputs": [], "source": [ "def plot_faces(faces, labels, n_cols=5):\n", + " faces = faces.reshape(-1, 64, 64)\n", " n_rows = (len(faces) - 1) // n_cols + 1\n", " plt.figure(figsize=(n_cols, n_rows * 1.1))\n", " for index, (face, label) in enumerate(zip(faces, labels)):\n", " plt.subplot(n_rows, n_cols, index + 1)\n", - " plt.imshow(face.reshape(64, 64), cmap=\"gray\")\n", + " plt.imshow(face, cmap=\"gray\")\n", " plt.axis(\"off\")\n", " plt.title(label)\n", " plt.show()\n", @@ -3485,7 +3495,7 @@ "for cluster_id in np.unique(best_model.labels_):\n", " print(\"Cluster\", cluster_id)\n", " in_cluster = best_model.labels_==cluster_id\n", - " faces = X_train[in_cluster].reshape(-1, 64, 64)\n", + " faces = X_train[in_cluster]\n", " labels = y_train[in_cluster]\n", " plot_faces(faces, labels)" ] @@ -3709,7 +3719,6 @@ "n_darkened = 3\n", "darkened = X_train[:n_darkened].copy()\n", "darkened[:, 1:-1] *= 0.3\n", - "darkened = darkened.reshape(-1, 64*64)\n", "y_darkened = y_train[:n_darkened]\n", "\n", "X_bad_faces = np.r_[rotated, flipped, darkened]\n", @@ -3819,7 +3828,7 @@ "metadata": {}, "outputs": [], "source": [ - "plot_faces(X_bad_faces, y_gen_faces)" + "plot_faces(X_bad_faces, y_bad)" ] }, { @@ -3829,7 +3838,7 @@ "outputs": [], "source": [ "X_bad_faces_reconstructed = pca.inverse_transform(X_bad_faces_pca)\n", - "plot_faces(X_bad_faces_reconstructed, y_gen_faces)" + "plot_faces(X_bad_faces_reconstructed, y_bad)" ] }, {