mirror of
https://github.com/ArthurDanjou/handson-ml3.git
synced 2026-02-02 21:17:49 +01:00
Large change: replace os.path with pathlib, move to Python 3.7
This commit is contained in:
@@ -4,8 +4,13 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Chapter 2 – End-to-end Machine Learning project**\n",
|
||||
"\n",
|
||||
"**Chapter 2 – End-to-end Machine Learning project**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"*Welcome to Machine Learning Housing Corp.! Your task is to predict median house values in Californian districts, given a number of features from these districts.*\n",
|
||||
"\n",
|
||||
"*This notebook contains all the sample code and solutions to the exercices in chapter 2.*"
|
||||
@@ -36,7 +41,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20."
|
||||
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,17 +50,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Python ≥3.5 is required\n",
|
||||
"# Python ≥3.7 is required\n",
|
||||
"import sys\n",
|
||||
"assert sys.version_info >= (3, 5)\n",
|
||||
"assert sys.version_info >= (3, 7)\n",
|
||||
"\n",
|
||||
"# Scikit-Learn ≥0.20 is required\n",
|
||||
"# Scikit-Learn ≥1.0 is required\n",
|
||||
"import sklearn\n",
|
||||
"assert sklearn.__version__ >= \"0.20\"\n",
|
||||
"assert sklearn.__version__ >= \"1.0\"\n",
|
||||
"\n",
|
||||
"# Common imports\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"# To plot pretty figures\n",
|
||||
"%matplotlib inline\n",
|
||||
@@ -66,14 +71,11 @@
|
||||
"mpl.rc('ytick', labelsize=12)\n",
|
||||
"\n",
|
||||
"# Where to save the figures\n",
|
||||
"PROJECT_ROOT_DIR = \".\"\n",
|
||||
"CHAPTER_ID = \"end_to_end_project\"\n",
|
||||
"IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
|
||||
"os.makedirs(IMAGES_PATH, exist_ok=True)\n",
|
||||
"IMAGES_PATH = Path() / \"images\" / \"end_to_end_project\"\n",
|
||||
"IMAGES_PATH.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
|
||||
" path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
|
||||
" print(\"Saving figure\", fig_id)\n",
|
||||
" path = IMAGES_PATH / f\"{fig_id}.{fig_extension}\"\n",
|
||||
" if tight_layout:\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.savefig(path, format=fig_extension, dpi=resolution)"
|
||||
@@ -95,48 +97,36 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from pathlib import Path\n",
|
||||
"import tarfile\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"DOWNLOAD_ROOT = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
|
||||
"HOUSING_PATH = os.path.join(\"datasets\", \"housing\")\n",
|
||||
"HOUSING_URL = DOWNLOAD_ROOT + \"datasets/housing/housing.tgz\"\n",
|
||||
"\n",
|
||||
"def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):\n",
|
||||
" if not os.path.isdir(housing_path):\n",
|
||||
" os.makedirs(housing_path)\n",
|
||||
" tgz_path = os.path.join(housing_path, \"housing.tgz\")\n",
|
||||
" urllib.request.urlretrieve(housing_url, tgz_path)\n",
|
||||
" housing_tgz = tarfile.open(tgz_path)\n",
|
||||
" housing_tgz.extractall(path=housing_path)\n",
|
||||
" housing_tgz.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fetch_housing_data()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"def load_housing_data(housing_path=HOUSING_PATH):\n",
|
||||
" csv_path = os.path.join(housing_path, \"housing.csv\")\n",
|
||||
" return pd.read_csv(csv_path)"
|
||||
"def load_housing_data():\n",
|
||||
" housing_path = Path() / \"datasets\" / \"housing\"\n",
|
||||
" if not (housing_path / \"housing.csv\").is_file():\n",
|
||||
" housing_path.mkdir(parents=True, exist_ok=True)\n",
|
||||
" root = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
|
||||
" url = root + \"datasets/housing/housing.tgz\"\n",
|
||||
" tgz_path = housing_path / \"housing.tgz\"\n",
|
||||
" urllib.request.urlretrieve(url, tgz_path)\n",
|
||||
" housing_tgz = tarfile.open(tgz_path)\n",
|
||||
" housing_tgz.extractall(path=housing_path)\n",
|
||||
" housing_tgz.close()\n",
|
||||
" return pd.read_csv(housing_path / \"housing.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing = load_housing_data()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -526,18 +516,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Download the California image\n",
|
||||
"images_path = os.path.join(PROJECT_ROOT_DIR, \"images\", \"end_to_end_project\")\n",
|
||||
"os.makedirs(images_path, exist_ok=True)\n",
|
||||
"DOWNLOAD_ROOT = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
|
||||
"images_path = Path() / \"images\" / \"end_to_end_project\"\n",
|
||||
"filename = \"california.png\"\n",
|
||||
"print(\"Downloading\", filename)\n",
|
||||
"url = DOWNLOAD_ROOT + \"images/end_to_end_project/\" + filename\n",
|
||||
"urllib.request.urlretrieve(url, os.path.join(images_path, filename))"
|
||||
"if not (images_path / filename).is_file():\n",
|
||||
" images_path.mkdir(parents=True, exist_ok=True)\n",
|
||||
" root = \"https://raw.githubusercontent.com/ageron/handson-ml2/master/\"\n",
|
||||
" url = root + \"images/end_to_end_project/\" + filename\n",
|
||||
" print(\"Downloading\", filename)\n",
|
||||
" urllib.request.urlretrieve(url, images_path / filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -547,7 +538,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.image as mpimg\n",
|
||||
"california_img=mpimg.imread(os.path.join(images_path, filename))\n",
|
||||
"\n",
|
||||
"california_img=mpimg.imread(images_path / filename)\n",
|
||||
"ax = housing.plot(kind=\"scatter\", x=\"longitude\", y=\"latitude\", figsize=(10,7),\n",
|
||||
" s=housing['population']/100, label=\"Population\",\n",
|
||||
" c=\"median_house_value\", cmap=plt.get_cmap(\"jet\"),\n",
|
||||
@@ -2342,7 +2334,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user