From 52d4f0a8c63df24fdec4c83fc20b4a444a5a817e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Sat, 27 Nov 2021 23:03:26 +1300 Subject: [PATCH] Make font sizes consistent across notebooks --- 01_the_machine_learning_landscape.ipynb | 28 +-- 02_end_to_end_machine_learning_project.ipynb | 2 +- 03_classification.ipynb | 185 ++++++++---------- 04_training_linear_models.ipynb | 2 +- 05_support_vector_machines.ipynb | 2 +- 06_decision_trees.ipynb | 2 +- 07_ensemble_learning_and_random_forests.ipynb | 10 +- 08_dimensionality_reduction.ipynb | 40 ++-- 09_unsupervised_learning.ipynb | 26 +-- 9 files changed, 141 insertions(+), 156 deletions(-) diff --git a/01_the_machine_learning_landscape.ipynb b/01_the_machine_learning_landscape.ipynb index 2488840..e5644b5 100644 --- a/01_the_machine_learning_landscape.ipynb +++ b/01_the_machine_learning_landscape.ipynb @@ -93,7 +93,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To plot pretty figures directly within Jupyter:" + "Let's define the default font sizes, to plot pretty figures:" ] }, { @@ -104,8 +104,11 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "plt.rc('font', size=12)\n", - "plt.rc('axes', labelsize=14)" + "plt.rc('font', size=14)\n", + "plt.rc('axes', labelsize=14, titlesize=14)\n", + "plt.rc('legend', fontsize=14)\n", + "plt.rc('xtick',labelsize=10)\n", + "plt.rc('ytick',labelsize=10)" ] }, { @@ -403,11 +406,12 @@ "max_life_sat = 9\n", "\n", "position_text = {\n", - " \"Hungary\": (28_000, 4.2),\n", + " \"Turkey\": (29_500, 4.2),\n", + " \"Hungary\": (28_000, 6.9),\n", " \"France\": (40_000, 5),\n", " \"New Zealand\": (28_000, 8.2),\n", " \"Australia\": (50_000, 5.5),\n", - " \"United States\": (59_000, 5.5),\n", + " \"United States\": (59_000, 5.3),\n", " \"Denmark\": (46_000, 8.5)\n", "}\n", "\n", @@ -416,7 +420,7 @@ " pos_data_y = country_stats[lifesat_col].loc[country]\n", " country = \"U.S.\" if country == \"United States\" else country\n", " plt.annotate(country, xy=(pos_data_x, pos_data_y),\n", - " xytext=pos_text,\n", + " xytext=pos_text, fontsize=12,\n", " arrowprops=dict(facecolor='black', width=0.5,\n", " shrink=0.15, headwidth=5))\n", " plt.plot(pos_data_x, pos_data_y, \"ro\")\n", @@ -499,9 +503,9 @@ "X = np.linspace(min_gdp, max_gdp, 1000)\n", "plt.plot(X, t0 + t1 * X, \"b\")\n", "\n", - "plt.text(max_gdp - 20_000, min_life_sat + 1.5,\n", + "plt.text(max_gdp - 20_000, min_life_sat + 1.9,\n", " fr\"$\\theta_0 = {t0:.2f}$\", color=\"b\")\n", - "plt.text(max_gdp - 20_000, min_life_sat + 1,\n", + "plt.text(max_gdp - 20_000, min_life_sat + 1.3,\n", " fr\"$\\theta_1 = {t1 * 1e5:.2f} \\times 10^{{-5}}$\", color=\"b\")\n", "\n", "plt.axis([min_gdp, max_gdp, min_life_sat, max_life_sat])\n", @@ -542,9 +546,9 @@ "X = np.linspace(min_gdp, max_gdp, 1000)\n", "plt.plot(X, t0 + t1 * X, \"b\")\n", "\n", - "plt.text(min_gdp + 15_000, max_life_sat - 1.5,\n", + "plt.text(min_gdp + 22_000, max_life_sat - 1.1,\n", " fr\"$\\theta_0 = {t0:.2f}$\", color=\"b\")\n", - "plt.text(min_gdp + 15_000, max_life_sat - 1,\n", + "plt.text(min_gdp + 22_000, max_life_sat - 0.6,\n", " fr\"$\\theta_1 = {t1 * 1e5:.2f} \\times 10^{{-5}}$\", color=\"b\")\n", "\n", "plt.plot([cyprus_gdp_per_capita, cyprus_gdp_per_capita],\n", @@ -600,7 +604,7 @@ "for country, pos_text in position_text_missing_countries.items():\n", " pos_data_x, pos_data_y = missing_data.loc[country]\n", " plt.annotate(country, xy=(pos_data_x, pos_data_y),\n", - " xytext=pos_text,\n", + " xytext=pos_text, fontsize=12,\n", " arrowprops=dict(facecolor='black', width=0.5,\n", " shrink=0.1, headwidth=5))\n", " plt.plot(pos_data_x, pos_data_y, \"rs\")\n", @@ -694,7 +698,7 @@ "t0ridge, t1ridge = ridge.intercept_[0], ridge.coef_[0][0]\n", "plt.plot(X, t0ridge + t1ridge * X, \"b--\",\n", " label=\"Regularized linear model on partial data\")\n", - "plt.legend(loc=\"lower right\")\n", + "plt.legend(loc=\"lower right\", fontsize=13)\n", "\n", "plt.axis([0, 115_000, min_life_sat, max_life_sat])\n", "\n", diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index eb3e977..42e3c8d 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -198,7 +198,7 @@ "import matplotlib.pyplot as plt\n", "\n", "# not in the book – the next 5 lines define the default font sizes\n", - "plt.rc('font', size=12)\n", + "plt.rc('font', size=14)\n", "plt.rc('axes', labelsize=14, titlesize=14)\n", "plt.rc('legend', fontsize=14)\n", "plt.rc('xtick',labelsize=10)\n", diff --git a/03_classification.ipynb b/03_classification.ipynb index 6b3406b..025b2cf 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -86,7 +86,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "plt.rc('font', size=12)\n", + "plt.rc('font', size=14)\n", "plt.rc('axes', labelsize=14, titlesize=14)\n", "plt.rc('legend', fontsize=14)\n", "plt.rc('xtick',labelsize=10)\n", @@ -689,7 +689,7 @@ "plt.ylabel('True Positive Rate (Recall)')\n", "plt.grid()\n", "plt.axis([0, 1, 0, 1])\n", - "plt.legend(loc=\"lower right\")\n", + "plt.legend(loc=\"lower right\", fontsize=13)\n", "save_fig(\"roc_curve_plot\")\n", "\n", "plt.show()" @@ -1033,6 +1033,7 @@ "from sklearn.metrics import ConfusionMatrixDisplay\n", "\n", "y_train_pred = cross_val_predict(sgd_clf, X_train_scaled, y_train, cv=3)\n", + "plt.rc('font', size=9) # not in the book – make the text smaller\n", "ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred)\n", "plt.show()" ] @@ -1043,6 +1044,7 @@ "metadata": {}, "outputs": [], "source": [ + "plt.rc('font', size=10) # not in the book\n", "ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred,\n", " normalize=\"true\", values_format=\".0%\")\n", "plt.show()" @@ -1055,6 +1057,7 @@ "outputs": [], "source": [ "sample_weight = (y_train_pred != y_train)\n", + "plt.rc('font', size=10) # not in the book\n", "ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred,\n", " sample_weight=sample_weight,\n", " normalize=\"true\", values_format=\".0%\")\n", @@ -1076,8 +1079,10 @@ "source": [ "# not in the book – this code generates Figure 3–9\n", "fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(9, 4))\n", + "plt.rc('font', size=9)\n", "ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred, ax=axs[0])\n", "axs[0].set_title(\"Confusion matrix\")\n", + "plt.rc('font', size=10)\n", "ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred, ax=axs[1],\n", " normalize=\"true\", values_format=\".0%\")\n", "axs[1].set_title(\"CM normalized by row\")\n", @@ -1093,6 +1098,7 @@ "source": [ "# not in the book – this code generates Figure 3–10\n", "fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(9, 4))\n", + "plt.rc('font', size=10)\n", "ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred, ax=axs[0],\n", " sample_weight=sample_weight,\n", " normalize=\"true\", values_format=\".0%\")\n", @@ -1102,7 +1108,8 @@ " normalize=\"pred\", values_format=\".0%\")\n", "axs[1].set_title(\"Errors normalized by column\")\n", "save_fig(\"confusion_matrix_plot_2\")\n", - "plt.show()" + "plt.show()\n", + "plt.rc('font', size=14) # make fonts great again" ] }, { @@ -1293,36 +1300,6 @@ "plt.show()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Extra Material — Calibrating Estimated Probabilities" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -1353,7 +1330,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ @@ -1379,7 +1356,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 90, "metadata": {}, "outputs": [], "source": [ @@ -1394,7 +1371,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 91, "metadata": {}, "outputs": [], "source": [ @@ -1403,7 +1380,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 92, "metadata": {}, "outputs": [], "source": [ @@ -1419,7 +1396,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ @@ -1458,7 +1435,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 94, "metadata": {}, "outputs": [], "source": [ @@ -1467,7 +1444,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ @@ -1486,7 +1463,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ @@ -1519,7 +1496,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 97, "metadata": {}, "outputs": [], "source": [ @@ -1544,7 +1521,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 98, "metadata": {}, "outputs": [], "source": [ @@ -1562,7 +1539,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 99, "metadata": {}, "outputs": [], "source": [ @@ -1571,7 +1548,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 100, "metadata": {}, "outputs": [], "source": [ @@ -1587,7 +1564,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 101, "metadata": {}, "outputs": [], "source": [ @@ -1603,7 +1580,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 102, "metadata": { "tags": [] }, @@ -1643,7 +1620,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 103, "metadata": {}, "outputs": [], "source": [ @@ -1667,7 +1644,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 104, "metadata": {}, "outputs": [], "source": [ @@ -1690,7 +1667,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 105, "metadata": {}, "outputs": [], "source": [ @@ -1730,7 +1707,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 106, "metadata": {}, "outputs": [], "source": [ @@ -1747,7 +1724,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 107, "metadata": {}, "outputs": [], "source": [ @@ -1756,7 +1733,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 108, "metadata": {}, "outputs": [], "source": [ @@ -1786,7 +1763,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 109, "metadata": {}, "outputs": [], "source": [ @@ -1811,7 +1788,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 110, "metadata": {}, "outputs": [], "source": [ @@ -1827,7 +1804,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 111, "metadata": {}, "outputs": [], "source": [ @@ -1836,7 +1813,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 112, "metadata": {}, "outputs": [], "source": [ @@ -1845,7 +1822,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ @@ -1868,7 +1845,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ @@ -1890,7 +1867,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -1899,7 +1876,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 116, "metadata": {}, "outputs": [], "source": [ @@ -1919,7 +1896,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ @@ -1943,7 +1920,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 118, "metadata": {}, "outputs": [], "source": [ @@ -1960,7 +1937,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -1976,7 +1953,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 120, "metadata": {}, "outputs": [], "source": [ @@ -1993,7 +1970,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 121, "metadata": {}, "outputs": [], "source": [ @@ -2010,7 +1987,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 122, "metadata": {}, "outputs": [], "source": [ @@ -2034,7 +2011,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 123, "metadata": {}, "outputs": [], "source": [ @@ -2061,7 +2038,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 124, "metadata": {}, "outputs": [], "source": [ @@ -2095,7 +2072,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 125, "metadata": {}, "outputs": [], "source": [ @@ -2105,7 +2082,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 126, "metadata": {}, "outputs": [], "source": [ @@ -2139,7 +2116,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -2166,7 +2143,7 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 128, "metadata": {}, "outputs": [], "source": [ @@ -2182,7 +2159,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 129, "metadata": {}, "outputs": [], "source": [ @@ -2192,7 +2169,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 130, "metadata": {}, "outputs": [], "source": [ @@ -2201,7 +2178,7 @@ }, { "cell_type": "code", - "execution_count": 132, + "execution_count": 131, "metadata": {}, "outputs": [], "source": [ @@ -2217,7 +2194,7 @@ }, { "cell_type": "code", - "execution_count": 133, + "execution_count": 132, "metadata": {}, "outputs": [], "source": [ @@ -2231,7 +2208,7 @@ }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 133, "metadata": {}, "outputs": [], "source": [ @@ -2248,7 +2225,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 134, "metadata": {}, "outputs": [], "source": [ @@ -2257,7 +2234,7 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 135, "metadata": {}, "outputs": [], "source": [ @@ -2273,7 +2250,7 @@ }, { "cell_type": "code", - "execution_count": 137, + "execution_count": 136, "metadata": {}, "outputs": [], "source": [ @@ -2291,7 +2268,7 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 137, "metadata": {}, "outputs": [], "source": [ @@ -2307,7 +2284,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 138, "metadata": {}, "outputs": [], "source": [ @@ -2316,7 +2293,7 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": 139, "metadata": {}, "outputs": [], "source": [ @@ -2339,7 +2316,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 140, "metadata": {}, "outputs": [], "source": [ @@ -2356,7 +2333,7 @@ }, { "cell_type": "code", - "execution_count": 142, + "execution_count": 141, "metadata": {}, "outputs": [], "source": [ @@ -2372,7 +2349,7 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": 142, "metadata": {}, "outputs": [], "source": [ @@ -2395,7 +2372,7 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 143, "metadata": {}, "outputs": [], "source": [ @@ -2419,7 +2396,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 144, "metadata": {}, "outputs": [], "source": [ @@ -2438,7 +2415,7 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 145, "metadata": {}, "outputs": [], "source": [ @@ -2454,7 +2431,7 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": 146, "metadata": {}, "outputs": [], "source": [ @@ -2478,7 +2455,7 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 147, "metadata": {}, "outputs": [], "source": [ @@ -2494,7 +2471,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 148, "metadata": {}, "outputs": [], "source": [ @@ -2515,7 +2492,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 149, "metadata": {}, "outputs": [], "source": [ @@ -2537,7 +2514,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 150, "metadata": {}, "outputs": [], "source": [ @@ -2558,7 +2535,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 151, "metadata": {}, "outputs": [], "source": [ @@ -2611,7 +2588,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 152, "metadata": {}, "outputs": [], "source": [ @@ -2636,7 +2613,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 153, "metadata": {}, "outputs": [], "source": [ @@ -2669,7 +2646,7 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 154, "metadata": {}, "outputs": [], "source": [ @@ -2680,7 +2657,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 155, "metadata": {}, "outputs": [], "source": [ @@ -2696,7 +2673,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 156, "metadata": {}, "outputs": [], "source": [ @@ -2712,7 +2689,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 157, "metadata": {}, "outputs": [], "source": [ @@ -2728,7 +2705,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 158, "metadata": {}, "outputs": [], "source": [ @@ -2751,7 +2728,7 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 159, "metadata": {}, "outputs": [], "source": [ diff --git a/04_training_linear_models.ipynb b/04_training_linear_models.ipynb index 12d358f..72f2182 100644 --- a/04_training_linear_models.ipynb +++ b/04_training_linear_models.ipynb @@ -88,7 +88,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "plt.rc('font', size=12)\n", + "plt.rc('font', size=14)\n", "plt.rc('axes', labelsize=14, titlesize=14)\n", "plt.rc('legend', fontsize=14)\n", "plt.rc('xtick',labelsize=10)\n", diff --git a/05_support_vector_machines.ipynb b/05_support_vector_machines.ipynb index 3b08d58..6885af0 100644 --- a/05_support_vector_machines.ipynb +++ b/05_support_vector_machines.ipynb @@ -88,7 +88,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "plt.rc('font', size=12)\n", + "plt.rc('font', size=14)\n", "plt.rc('axes', labelsize=14, titlesize=14)\n", "plt.rc('legend', fontsize=14)\n", "plt.rc('xtick',labelsize=10)\n", diff --git a/06_decision_trees.ipynb b/06_decision_trees.ipynb index 8e96ac4..2c50d35 100644 --- a/06_decision_trees.ipynb +++ b/06_decision_trees.ipynb @@ -88,7 +88,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "plt.rc('font', size=12)\n", + "plt.rc('font', size=14)\n", "plt.rc('axes', labelsize=14, titlesize=14)\n", "plt.rc('legend', fontsize=14)\n", "plt.rc('xtick',labelsize=10)\n", diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb index 19e8a79..08360fc 100644 --- a/07_ensemble_learning_and_random_forests.ipynb +++ b/07_ensemble_learning_and_random_forests.ipynb @@ -86,11 +86,13 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", "\n", - "mpl.rc('font', size=12)\n", - "mpl.rc('axes', labelsize=14, titlesize=14)\n", - "mpl.rc('legend', fontsize=14)" + "plt.rc('font', size=14)\n", + "plt.rc('axes', labelsize=14, titlesize=14)\n", + "plt.rc('legend', fontsize=14)\n", + "plt.rc('xtick',labelsize=10)\n", + "plt.rc('ytick',labelsize=10)" ] }, { diff --git a/08_dimensionality_reduction.ipynb b/08_dimensionality_reduction.ipynb index cab3a8f..c99d455 100644 --- a/08_dimensionality_reduction.ipynb +++ b/08_dimensionality_reduction.ipynb @@ -86,11 +86,13 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", "\n", - "mpl.rc('font', size=12)\n", - "mpl.rc('axes', labelsize=14, titlesize=14)\n", - "mpl.rc('legend', fontsize=14)" + "plt.rc('font', size=14)\n", + "plt.rc('axes', labelsize=14, titlesize=14)\n", + "plt.rc('legend', fontsize=14)\n", + "plt.rc('xtick',labelsize=10)\n", + "plt.rc('ytick',labelsize=10)" ] }, { @@ -136,7 +138,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's generate a small 3D dataset:" + "Let's generate a small 3D dataset. It's an oval shape, rotated in 3D space, with points distributed unevenly, and with quite a lot of noise:" ] }, { @@ -145,19 +147,19 @@ "metadata": {}, "outputs": [], "source": [ - "# not in the book – we've generated plenty of datasets before with similar code\n", + "# not in the book\n", "\n", "import numpy as np\n", + "from scipy.spatial.transform import Rotation\n", "\n", - "np.random.seed(42)\n", "m = 60\n", - "angles = (np.random.rand(m) ** 3 + 0.5) * 2 * np.pi\n", - "X = np.zeros((m, 3))\n", - "X[:, 0] = np.cos(angles)\n", - "X[:, 1] = np.sin(angles) * 0.5\n", - "X += 0.28 * np.random.randn(m, 3)\n", - "X = rotate_3d(X, -np.pi / 4, np.pi / 30, -np.pi / 20)\n", - "X += [0.2, 0, 0.2]" + "X = np.zeros((m, 3)) # initialize 3D dataset\n", + "np.random.seed(42)\n", + "angles = (np.random.rand(m) ** 3 + 0.5) * 2 * np.pi # uneven distribution\n", + "X[:, 0], X[:, 1] = np.cos(angles), np.sin(angles) * 0.5 # oval\n", + "X += 0.28 * np.random.randn(m, 3) # add more noise\n", + "X = Rotation.from_rotvec([np.pi / 29, -np.pi / 20, np.pi / 4]).apply(X)\n", + "X += [0.2, 0, 0.2] # shift a bit" ] }, { @@ -435,10 +437,8 @@ " length_includes_head=True, head_length=0.1, fc=\"b\", ec=\"b\", zorder=10)\n", "plt.arrow(0, 0, u3[0], u3[1], head_width=0.1, linewidth=1, alpha=0.9,\n", " length_includes_head=True, head_length=0.1, fc=\"b\", ec=\"b\", zorder=10)\n", - "plt.text(u1[0] + 0.1, u1[1] - 0.05, r\"$\\mathbf{c_1}$\",\n", - " color=\"blue\", fontsize=14)\n", - "plt.text(u3[0] + 0.1, u3[1], r\"$\\mathbf{c_2}$\",\n", - " color=\"blue\", fontsize=14)\n", + "plt.text(u1[0] + 0.1, u1[1] - 0.05, r\"$\\mathbf{c_1}$\", color=\"blue\")\n", + "plt.text(u3[0] + 0.1, u3[1], r\"$\\mathbf{c_2}$\", color=\"blue\")\n", "plt.xlabel(\"$x_1$\")\n", "plt.ylabel(\"$x_2$\", rotation=0)\n", "plt.axis([-1.4, 1.4, -1.4, 1.4])\n", @@ -1029,7 +1029,6 @@ "source": [ "# not in the book – this cell generates and saves Figure 8–10\n", "\n", - "plt.title(\"Unrolled swiss roll using LLE\")\n", "plt.scatter(X_unrolled[:, 0], X_unrolled[:, 1],\n", " c=t, cmap=darker_hot)\n", "plt.xlabel(\"$z_1$\")\n", @@ -1038,6 +1037,7 @@ "plt.grid(True)\n", "\n", "save_fig(\"lle_unrolling_plot\")\n", + "plt.title(\"Unrolled swiss roll using LLE\")\n", "plt.show()" ] }, @@ -1826,7 +1826,7 @@ "\n", "lda = LinearDiscriminantAnalysis(n_components=2)\n", "%time X_lda_reduced = lda.fit_transform(X_sample, y_sample)\n", - "plot_digits(X_lda_reduced, y_sample, figsize=(12,12))\n", + "plot_digits(X_lda_reduced, y_sample, figsize=(12, 12))\n", "plt.show()" ] }, diff --git a/09_unsupervised_learning.ipynb b/09_unsupervised_learning.ipynb index 2552ce6..bf45b3f 100644 --- a/09_unsupervised_learning.ipynb +++ b/09_unsupervised_learning.ipynb @@ -86,11 +86,13 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", "\n", - "mpl.rc('font', size=12)\n", - "mpl.rc('axes', labelsize=14, titlesize=14)\n", - "mpl.rc('legend', fontsize=14)" + "plt.rc('font', size=14)\n", + "plt.rc('axes', labelsize=14, titlesize=14)\n", + "plt.rc('legend', fontsize=14)\n", + "plt.rc('xtick',labelsize=10)\n", + "plt.rc('ytick',labelsize=10)" ] }, { @@ -1082,7 +1084,7 @@ "plt.ylabel(\"Inertia\")\n", "plt.annotate(\"\", xy=(4, inertias[3]), xytext=(4.45, 650),\n", " arrowprops=dict(facecolor='black', shrink=0.1))\n", - "plt.text(4.5, 650, \"Elbow\", fontsize=14, horizontalalignment=\"center\")\n", + "plt.text(4.5, 650, \"Elbow\", horizontalalignment=\"center\")\n", "plt.axis([1, 8.5, 0, 1300])\n", "plt.grid()\n", "save_fig(\"inertia_vs_k_plot\")\n", @@ -2410,9 +2412,9 @@ "plt.plot(stds[max_idx], max_val, \"r.\")\n", "plt.plot([stds[max_idx], stds[max_idx]], [0, max_val], \"r:\")\n", "plt.plot([0, stds[max_idx]], [max_val, max_val], \"r:\")\n", - "plt.text(stds[max_idx]+ 0.01, 0.081, r\"$\\hat{\\theta}$\", fontsize=14)\n", - "plt.text(stds[max_idx]+ 0.01, max_val - 0.005, r\"$Max$\", fontsize=14)\n", - "plt.text(1.01, max_val - 0.008, r\"$\\hat{\\mathcal{L}}$\", fontsize=14)\n", + "plt.text(stds[max_idx]+ 0.01, 0.081, r\"$\\hat{\\theta}$\")\n", + "plt.text(stds[max_idx]+ 0.01, max_val - 0.006, r\"$Max$\")\n", + "plt.text(1.01, max_val - 0.008, r\"$\\hat{\\mathcal{L}}$\")\n", "plt.ylabel(r\"$\\mathcal{L}$\", rotation=0, labelpad=10)\n", "plt.title(fr\"$\\mathcal{{L}}(\\theta|x={x_val}) = f(x={x_val}; \\theta)$\")\n", "plt.grid()\n", @@ -2433,9 +2435,9 @@ "plt.plot(stds[log_max_idx], log_max_val, \"r.\")\n", "plt.plot([stds[log_max_idx], stds[log_max_idx]], [-5, log_max_val], \"r:\")\n", "plt.plot([0, stds[log_max_idx]], [log_max_val, log_max_val], \"r:\")\n", - "plt.text(stds[log_max_idx]+ 0.01, log_max_val - 0.05, r\"$Max$\", fontsize=14)\n", - "plt.text(stds[log_max_idx]+ 0.01, -2.49, r\"$\\hat{\\theta}$\", fontsize=14)\n", - "plt.text(1.01, log_max_val - 0.07, r\"$\\log \\, \\hat{\\mathcal{L}}$\", fontsize=14)\n", + "plt.text(stds[log_max_idx]+ 0.01, log_max_val - 0.06, r\"$Max$\")\n", + "plt.text(stds[log_max_idx]+ 0.01, -2.49, r\"$\\hat{\\theta}$\")\n", + "plt.text(1.01, log_max_val - 0.08, r\"$\\log \\, \\hat{\\mathcal{L}}$\")\n", "plt.xlabel(r\"$\\theta$\")\n", "plt.ylabel(r\"$\\log\\mathcal{L}$\", rotation=0, labelpad=10)\n", "plt.title(fr\"$\\log \\, \\mathcal{{L}}(\\theta|x={x_val})$\")\n", @@ -2527,7 +2529,7 @@ "plt.axis([1, 9.5, min(aics) - 50, max(aics) + 50])\n", "plt.annotate(\"\", xy=(3, bics[2]), xytext=(3.4, 8650),\n", " arrowprops=dict(facecolor='black', shrink=0.1))\n", - "plt.text(3.5, 8660, \"Minimum\", fontsize=14, horizontalalignment=\"center\")\n", + "plt.text(3.5, 8660, \"Minimum\", horizontalalignment=\"center\")\n", "plt.legend()\n", "plt.grid()\n", "save_fig(\"aic_bic_vs_k_plot\")\n",