Large change: replace os.path with pathlib, move to Python 3.7

This commit is contained in:
Aurélien Geron
2021-10-15 21:46:27 +13:00
parent 1b16a81fe5
commit fa1ae51184
19 changed files with 969 additions and 1066 deletions

View File

@@ -4,8 +4,13 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"**Chapter 2 End-to-end Machine Learning project**\n",
"\n",
"**Chapter 2 End-to-end Machine Learning project**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"*Welcome to Machine Learning Housing Corp.! Your task is to predict median house values in Californian districts, given a number of features from these districts.*\n",
"\n",
"*This notebook contains all the sample code and solutions to the exercices in chapter 2.*"
@@ -36,7 +41,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20."
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures."
]
},
{
@@ -45,17 +50,17 @@
"metadata": {},
"outputs": [],
"source": [
"# Python ≥3.5 is required\n",
"# Python ≥3.7 is required\n",
"import sys\n",
"assert sys.version_info >= (3, 5)\n",
"assert sys.version_info >= (3, 7)\n",
"\n",
"# Scikit-Learn ≥0.20 is required\n",
"# Scikit-Learn ≥1.0 is required\n",
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\"\n",
"assert sklearn.__version__ >= \"1.0\"\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
@@ -66,14 +71,11 @@
"mpl.rc('ytick', labelsize=12)\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"end_to_end_project\"\n",
"IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
"os.makedirs(IMAGES_PATH, exist_ok=True)\n",
"IMAGES_PATH = Path() / \"images\" / \"end_to_end_project\"\n",
"IMAGES_PATH.mkdir(parents=True, exist_ok=True)\n",
"\n",
"def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
" path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
" print(\"Saving figure\", fig_id)\n",
" path = IMAGES_PATH / f\"{fig_id}.{fig_extension}\"\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format=fig_extension, dpi=resolution)"
@@ -95,48 +97,36 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pathlib import Path\n",
"import tarfile\n",
"import urllib.request\n",
"\n",
"DOWNLOAD_ROOT = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
"HOUSING_PATH = os.path.join(\"datasets\", \"housing\")\n",
"HOUSING_URL = DOWNLOAD_ROOT + \"datasets/housing/housing.tgz\"\n",
"\n",
"def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):\n",
" if not os.path.isdir(housing_path):\n",
" os.makedirs(housing_path)\n",
" tgz_path = os.path.join(housing_path, \"housing.tgz\")\n",
" urllib.request.urlretrieve(housing_url, tgz_path)\n",
" housing_tgz = tarfile.open(tgz_path)\n",
" housing_tgz.extractall(path=housing_path)\n",
" housing_tgz.close()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"fetch_housing_data()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"def load_housing_data(housing_path=HOUSING_PATH):\n",
" csv_path = os.path.join(housing_path, \"housing.csv\")\n",
" return pd.read_csv(csv_path)"
"def load_housing_data():\n",
" housing_path = Path() / \"datasets\" / \"housing\"\n",
" if not (housing_path / \"housing.csv\").is_file():\n",
" housing_path.mkdir(parents=True, exist_ok=True)\n",
" root = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
" url = root + \"datasets/housing/housing.tgz\"\n",
" tgz_path = housing_path / \"housing.tgz\"\n",
" urllib.request.urlretrieve(url, tgz_path)\n",
" housing_tgz = tarfile.open(tgz_path)\n",
" housing_tgz.extractall(path=housing_path)\n",
" housing_tgz.close()\n",
" return pd.read_csv(housing_path / \"housing.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"housing = load_housing_data()"
]
},
{
@@ -526,18 +516,19 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# Download the California image\n",
"images_path = os.path.join(PROJECT_ROOT_DIR, \"images\", \"end_to_end_project\")\n",
"os.makedirs(images_path, exist_ok=True)\n",
"DOWNLOAD_ROOT = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
"images_path = Path() / \"images\" / \"end_to_end_project\"\n",
"filename = \"california.png\"\n",
"print(\"Downloading\", filename)\n",
"url = DOWNLOAD_ROOT + \"images/end_to_end_project/\" + filename\n",
"urllib.request.urlretrieve(url, os.path.join(images_path, filename))"
"if not (images_path / filename).is_file():\n",
" images_path.mkdir(parents=True, exist_ok=True)\n",
" root = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
" url = root + \"images/end_to_end_project/\" + filename\n",
" print(\"Downloading\", filename)\n",
" urllib.request.urlretrieve(url, images_path / filename)"
]
},
{
@@ -547,7 +538,8 @@
"outputs": [],
"source": [
"import matplotlib.image as mpimg\n",
"california_img=mpimg.imread(os.path.join(images_path, filename))\n",
"\n",
"california_img=mpimg.imread(images_path / filename)\n",
"ax = housing.plot(kind=\"scatter\", x=\"longitude\", y=\"latitude\", figsize=(10,7),\n",
" s=housing['population']/100, label=\"Population\",\n",
" c=\"median_house_value\", cmap=plt.get_cmap(\"jet\"),\n",
@@ -2342,7 +2334,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},