From d2518a679bed191be50855347e349531b3807fda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Wed, 16 Jan 2019 23:42:00 +0800 Subject: [PATCH] Drop Python 2 (woohoo!) and import matplotlib as mpl --- 01_the_machine_learning_landscape.ipynb | 178 +++++++++-------- 02_end_to_end_machine_learning_project.ipynb | 26 +-- 03_classification.ipynb | 179 +++++++----------- 04_training_linear_models.ipynb | 19 +- 05_support_vector_machines.ipynb | 19 +- 06_decision_trees.ipynb | 19 +- 07_ensemble_learning_and_random_forests.ipynb | 21 +- 08_dimensionality_reduction.ipynb | 21 +- 8 files changed, 223 insertions(+), 259 deletions(-) diff --git a/01_the_machine_learning_landscape.ipynb b/01_the_machine_learning_landscape.ipynb index 1ca8b0b..e944e1d 100644 --- a/01_the_machine_learning_landscape.ipynb +++ b/01_the_machine_learning_landscape.ipynb @@ -13,14 +13,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Setup" + "# Code example 1-1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "Although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead." ] }, { @@ -33,45 +33,9 @@ }, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", - "\n", - "# Common imports\n", - "import numpy as np\n", - "import os\n", - "\n", - "# to make this notebook's output stable across runs\n", - "np.random.seed(42)\n", - "\n", - "# To plot pretty figures\n", - "%matplotlib inline\n", - "import matplotlib\n", - "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", - "\n", - "# Where to save the figures\n", - "PROJECT_ROOT_DIR = \".\"\n", - "CHAPTER_ID = \"fundamentals\"\n", - "\n", - "def save_fig(fig_id, tight_layout=True):\n", - " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", - " print(\"Saving figure\", fig_id)\n", - " if tight_layout:\n", - " plt.tight_layout()\n", - " plt.savefig(path, format='png', dpi=300)\n", - "\n", - "# Ignore useless warnings (see SciPy issue #5998)\n", - "import warnings\n", - "warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Code example 1-1" + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)" ] }, { @@ -122,9 +86,23 @@ "execution_count": 4, "metadata": {}, "outputs": [], + "source": [ + "# To plot pretty figures directly within Jupyter\n", + "%matplotlib inline\n", + "import matplotlib as mpl\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], "source": [ "# Code example\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", @@ -204,6 +182,47 @@ "outputs": [], "source": [] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a function to save the figures." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"fundamentals\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make this notebook's output stable across runs:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -222,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -234,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -257,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -270,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -281,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -290,7 +309,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -303,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -328,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -337,7 +356,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -346,7 +365,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -370,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -385,7 +404,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -401,7 +420,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -413,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -432,7 +451,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -441,7 +460,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -450,7 +469,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -471,16 +490,16 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Code example\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import sklearn\n", + "import sklearn.linear_model\n", "\n", "# Load the data\n", "oecd_bli = pd.read_csv(datapath + \"oecd_bli_2015.csv\", thousands=',')\n", @@ -509,7 +528,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -518,7 +537,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -527,7 +546,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -544,7 +563,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -575,7 +594,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -599,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -608,7 +627,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -617,7 +636,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -648,7 +667,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -660,7 +679,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -670,7 +689,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -680,7 +699,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -694,18 +713,11 @@ "X_new = np.array([[22587.0]]) # Cyprus' GDP per capita\n", "print(model.predict(X_new)) # outputs [[ 5.76666667]]" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 - tf2", "language": "python", "name": "python3" }, @@ -719,7 +731,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.8" }, "nav_menu": {}, "toc": { diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index 52ac5cb..192438d 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -11,13 +11,6 @@ "*This notebook contains all the sample code and solutions to the exercices in chapter 2.*" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note**: You may find little differences between the code outputs in the book and in these Jupyter notebooks: these slight differences are mostly due to the random nature of many training algorithms: although I have tried to make these notebooks' outputs as constant as possible, it is impossible to guarantee that they will produce the exact same output on every platform. Also, some data structures (such as dictionaries) do not preserve the item order. Finally, I fixed a few minor bugs (I added notes next to the concerned cells) which lead to slightly different results, without changing the ideas presented in the book." - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -29,7 +22,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." ] }, { @@ -38,8 +31,9 @@ "metadata": {}, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", "\n", "# Common imports\n", "import numpy as np\n", @@ -50,11 +44,11 @@ "\n", "# To plot pretty figures\n", "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", @@ -2177,7 +2171,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 - tf2", "language": "python", "name": "python3" }, @@ -2191,7 +2185,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.8" }, "nav_menu": { "height": "279px", diff --git a/03_classification.ipynb b/03_classification.ipynb index c25e2ef..96eef1a 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -20,7 +20,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." ] }, { @@ -29,8 +29,9 @@ "metadata": {}, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", "\n", "# Common imports\n", "import numpy as np\n", @@ -41,11 +42,11 @@ "\n", "# To plot pretty figures\n", "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", @@ -79,13 +80,10 @@ "metadata": {}, "outputs": [], "source": [ - "def sort_by_target(mnist):\n", - " reorder_train = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[:60000])]))[:, 1]\n", - " reorder_test = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[60000:])]))[:, 1]\n", - " mnist.data[:60000] = mnist.data[reorder_train]\n", - " mnist.target[:60000] = mnist.target[reorder_train]\n", - " mnist.data[60000:] = mnist.data[reorder_test + 60000]\n", - " mnist.target[60000:] = mnist.target[reorder_test + 60000]" + "from sklearn.datasets import fetch_openml\n", + "mnist = fetch_openml('mnist_784', version=1)\n", + "mnist.target = mnist.target.astype(np.int8) # fetch_openml() returns targets as strings\n", + "mnist[\"data\"], mnist[\"target\"]" ] }, { @@ -93,30 +91,13 @@ "execution_count": 3, "metadata": {}, "outputs": [], - "source": [ - "try:\n", - " from sklearn.datasets import fetch_openml\n", - " mnist = fetch_openml('mnist_784', version=1, cache=True)\n", - " mnist.target = mnist.target.astype(np.int8) # fetch_openml() returns targets as strings\n", - " sort_by_target(mnist) # fetch_openml() returns an unsorted dataset\n", - "except ImportError:\n", - " from sklearn.datasets import fetch_mldata\n", - " mnist = fetch_mldata('MNIST original')\n", - "mnist[\"data\"], mnist[\"target\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], "source": [ "mnist.data.shape" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -126,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -135,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -144,18 +125,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "\n", "some_digit = X[36000]\n", "some_digit_image = some_digit.reshape(28, 28)\n", - "plt.imshow(some_digit_image, cmap = matplotlib.cm.binary,\n", - " interpolation=\"nearest\")\n", + "plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation=\"nearest\")\n", "plt.axis(\"off\")\n", "\n", "save_fig(\"some_digit_plot\")\n", @@ -164,20 +144,20 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def plot_digit(data):\n", " image = data.reshape(28, 28)\n", - " plt.imshow(image, cmap = matplotlib.cm.binary,\n", + " plt.imshow(image, cmap = mpl.cm.binary,\n", " interpolation=\"nearest\")\n", " plt.axis(\"off\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -194,13 +174,13 @@ " rimages = images[row * images_per_row : (row + 1) * images_per_row]\n", " row_images.append(np.concatenate(rimages, axis=1))\n", " image = np.concatenate(row_images, axis=0)\n", - " plt.imshow(image, cmap = matplotlib.cm.binary, **options)\n", + " plt.imshow(image, cmap = mpl.cm.binary, **options)\n", " plt.axis(\"off\")" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -213,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -222,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -231,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -250,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -267,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -279,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -288,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -298,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -322,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -336,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -346,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -357,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -368,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -377,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -386,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -397,16 +377,16 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "4344 / (4344 + 1307)" + "4432 / (4432 + 1607)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -415,16 +395,16 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ - "4344 / (4344 + 1077)" + "4432 / (4432 + 989)" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -434,16 +414,16 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ - "4344 / (4344 + (1077 + 1307)/2)" + "4432 / (4432 + (1607 + 989)/2)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -453,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -463,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -472,18 +452,18 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ - "threshold = 200000\n", + "threshold = -200000\n", "y_some_digit_pred = (y_scores > threshold)\n", "y_some_digit_pred" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -491,36 +471,9 @@ " method=\"decision_function\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note: there was an [issue](https://github.com/scikit-learn/scikit-learn/issues/9589) in Scikit-Learn 0.19.0 (fixed in 0.19.1) where the result of `cross_val_predict()` was incorrect in the binary classification case when using `method=\"decision_function\"`, as in the code above. The resulting array had an extra first dimension full of 0s. Just in case you are using 0.19.0, we need to add this small hack to work around this issue:" - ] - }, { "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "y_scores.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "# hack to work around issue #9589 in Scikit-Learn 0.19.0\n", - "if y_scores.ndim == 2:\n", - " y_scores = y_scores[:, 1]" - ] - }, - { - "cell_type": "code", - "execution_count": 39, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -531,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -551,7 +504,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -560,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -569,7 +522,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -578,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -587,7 +540,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -612,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ @@ -623,7 +576,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -642,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -660,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -672,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -2531,7 +2484,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 - tf2", "language": "python", "name": "python3" }, @@ -2545,7 +2498,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.8" }, "nav_menu": {}, "toc": { diff --git a/04_training_linear_models.ipynb b/04_training_linear_models.ipynb index 1326008..cf52355 100644 --- a/04_training_linear_models.ipynb +++ b/04_training_linear_models.ipynb @@ -25,7 +25,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." ] }, { @@ -34,8 +34,9 @@ "metadata": {}, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", "\n", "# Common imports\n", "import numpy as np\n", @@ -46,11 +47,11 @@ "\n", "# To plot pretty figures\n", "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", @@ -1729,7 +1730,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 - tf2", "language": "python", "name": "python3" }, @@ -1743,7 +1744,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.8" }, "nav_menu": {}, "toc": { diff --git a/05_support_vector_machines.ipynb b/05_support_vector_machines.ipynb index cc08550..2c578dc 100644 --- a/05_support_vector_machines.ipynb +++ b/05_support_vector_machines.ipynb @@ -20,7 +20,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." ] }, { @@ -29,8 +29,9 @@ "metadata": {}, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", "\n", "# Common imports\n", "import numpy as np\n", @@ -41,11 +42,11 @@ "\n", "# To plot pretty figures\n", "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", @@ -1792,7 +1793,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 - tf2", "language": "python", "name": "python3" }, @@ -1806,7 +1807,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.8" }, "nav_menu": {}, "toc": { diff --git a/06_decision_trees.ipynb b/06_decision_trees.ipynb index 908abf7..c091db1 100644 --- a/06_decision_trees.ipynb +++ b/06_decision_trees.ipynb @@ -25,7 +25,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." ] }, { @@ -34,8 +34,9 @@ "metadata": {}, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", "\n", "# Common imports\n", "import numpy as np\n", @@ -46,11 +47,11 @@ "\n", "# To plot pretty figures\n", "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", @@ -696,7 +697,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 - tf2", "language": "python", "name": "python3" }, @@ -710,7 +711,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.8" }, "nav_menu": { "height": "309px", diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb index a7697cf..75517ef 100644 --- a/07_ensemble_learning_and_random_forests.ipynb +++ b/07_ensemble_learning_and_random_forests.ipynb @@ -25,7 +25,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." ] }, { @@ -34,8 +34,9 @@ "metadata": {}, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", "\n", "# Common imports\n", "import numpy as np\n", @@ -46,11 +47,11 @@ "\n", "# To plot pretty figures\n", "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", @@ -454,7 +455,7 @@ "source": [ "def plot_digit(data):\n", " image = data.reshape(28, 28)\n", - " plt.imshow(image, cmap = matplotlib.cm.hot,\n", + " plt.imshow(image, cmap = mpl.cm.hot,\n", " interpolation=\"nearest\")\n", " plt.axis(\"off\")" ] @@ -1336,7 +1337,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 - tf2", "language": "python", "name": "python3" }, @@ -1350,7 +1351,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.8" }, "nav_menu": { "height": "252px", diff --git a/08_dimensionality_reduction.ipynb b/08_dimensionality_reduction.ipynb index 213f0a0..9ecd2da 100644 --- a/08_dimensionality_reduction.ipynb +++ b/08_dimensionality_reduction.ipynb @@ -20,7 +20,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." ] }, { @@ -29,8 +29,9 @@ "metadata": {}, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", "\n", "# Common imports\n", "import numpy as np\n", @@ -41,11 +42,11 @@ "\n", "# To plot pretty figures\n", "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", @@ -873,7 +874,7 @@ " rimages = images[row * images_per_row : (row + 1) * images_per_row]\n", " row_images.append(np.concatenate(rimages, axis=1))\n", " image = np.concatenate(row_images, axis=0)\n", - " plt.imshow(image, cmap = matplotlib.cm.binary, **options)\n", + " plt.imshow(image, cmap = mpl.cm.binary, **options)\n", " plt.axis(\"off\")" ] }, @@ -1848,7 +1849,7 @@ "outputs": [], "source": [ "plt.figure(figsize=(9,9))\n", - "cmap = matplotlib.cm.get_cmap(\"jet\")\n", + "cmap = mpl.cm.get_cmap(\"jet\")\n", "for digit in (2, 3, 5):\n", " plt.scatter(X_reduced[y == digit, 0], X_reduced[y == digit, 1], c=[cmap(digit / 9)])\n", "plt.axis('off')\n", @@ -1930,7 +1931,7 @@ " neighbors = np.array([[10., 10.]])\n", " # The rest should be self-explanatory\n", " plt.figure(figsize=figsize)\n", - " cmap = matplotlib.cm.get_cmap(\"jet\")\n", + " cmap = mpl.cm.get_cmap(\"jet\")\n", " digits = np.unique(y)\n", " for digit in digits:\n", " plt.scatter(X_normalized[y == digit, 0], X_normalized[y == digit, 1], c=[cmap(digit / 9)])\n",