From 6b821335c02fae07718710ed7b6d9913127a3799 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Sun, 3 Oct 2021 00:14:44 +1300
Subject: [PATCH] Add some section headers

---
 02_end_to_end_machine_learning_project.ipynb | 138 ++++++++++++++++++-
 03_classification.ipynb                      |  47 ++++++-
 04_training_linear_models.ipynb              |  80 +++++++++--
 3 files changed, 239 insertions(+), 26 deletions(-)

diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb
index efa5324..d8c8349 100644
--- a/02_end_to_end_machine_learning_project.ipynb
+++ b/02_end_to_end_machine_learning_project.ipynb
@@ -83,7 +83,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Get the data"
+    "# Get the Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Download the Data"
    ]
   },
   {
@@ -132,6 +139,13 @@
     "    return pd.read_csv(csv_path)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Take a Quick Look at the Data Structure"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,
@@ -182,6 +196,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a Test Set"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 10,
@@ -443,7 +464,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Discover and visualize the data to gain insights"
+    "# Discover and Visualize the Data to Gain Insights"
    ]
   },
   {
@@ -455,6 +476,13 @@
     "housing = strat_train_set.copy()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Visualizing Geographical Data"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 33,
@@ -540,6 +568,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Looking for Correlations"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 38,
@@ -585,6 +620,13 @@
     "save_fig(\"income_vs_house_value_scatterplot\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Experimenting with Attribute Combinations"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 42,
@@ -631,7 +673,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Prepare the data for Machine Learning algorithms"
+    "# Prepare the Data for Machine Learning Algorithms"
    ]
   },
   {
@@ -644,6 +686,29 @@
     "housing_labels = strat_train_set[\"median_house_value\"].copy()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Cleaning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the book 3 options are listed:\n",
+    "\n",
+    "```python\n",
+    "housing.dropna(subset=[\"total_bedrooms\"])    # option 1\n",
+    "housing.drop(\"total_bedrooms\", axis=1)       # option 2\n",
+    "median = housing[\"total_bedrooms\"].median()  # option 3\n",
+    "housing[\"total_bedrooms\"].fillna(median, inplace=True)\n",
+    "```\n",
+    "\n",
+    "To demonstrate each of them, let's create a copy of the housing dataset, but keeping only the rows that contain at least one null. Then it will be easier to visualize exactly what each option does:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 47,
@@ -815,6 +880,13 @@
     "housing_tr.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Handling Text and Categorical Attributes"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -910,6 +982,13 @@
     "cat_encoder.categories_"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Custom Transformers"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -985,6 +1064,13 @@
     "housing_extra_attribs.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Transformation Pipelines"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1154,7 +1240,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Select and train a model "
+    "# Select and Train a Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training and Evaluating on the Training Set"
    ]
   },
   {
@@ -1269,7 +1362,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Fine-tune your model"
+    "## Better Evaluation Using Cross-Validation"
    ]
   },
   {
@@ -1382,6 +1475,20 @@
     "svm_rmse"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Fine-Tune Your Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Grid Search"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 99,
@@ -1457,6 +1564,13 @@
     "pd.DataFrame(grid_search.cv_results_)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Randomized Search"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 104,
@@ -1488,6 +1602,13 @@
     "    print(np.sqrt(-mean_score), params)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Analyze the Best Models and Their Errors"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 106,
@@ -1512,6 +1633,13 @@
     "sorted(zip(feature_importances, attributes), reverse=True)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate Your System on the Test Set"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 108,
diff --git a/03_classification.ipynb b/03_classification.ipynb
index 9e2885e..6c8e0db 100644
--- a/03_classification.ipynb
+++ b/03_classification.ipynb
@@ -245,7 +245,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Binary classifier"
+    "# Training a Binary Classifier"
    ]
   },
   {
@@ -296,6 +296,20 @@
     "cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring=\"accuracy\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Performance Measures"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Measuring Accuracy Using Cross-Validation"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 18,
@@ -362,6 +376,13 @@
     "* lastly, other things may prevent perfect reproducibility, such as Python dicts and sets whose order is not guaranteed to be stable across sessions, or the order of files in a directory which is also not guaranteed."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Confusion Matrix"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 21,
@@ -394,6 +415,13 @@
     "confusion_matrix(y_train_5, y_train_perfect_predictions)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Precision and Recall"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 24,
@@ -453,6 +481,13 @@
     "cm[1, 1] / (cm[1, 1] + (cm[1, 0] + cm[0, 1]) / 2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Precision/Recall Trade-off"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 30,
@@ -625,7 +660,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# ROC curves"
+    "## The ROC Curve"
    ]
   },
   {
@@ -757,7 +792,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Multiclass classification"
+    "# Multiclass Classification"
    ]
   },
   {
@@ -882,7 +917,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Error analysis"
+    "# Error Analysis"
    ]
   },
   {
@@ -969,7 +1004,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Multilabel classification"
+    "# Multilabel Classification"
    ]
   },
   {
@@ -1018,7 +1053,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Multioutput classification"
+    "# Multioutput Classification"
    ]
   },
   {
diff --git a/04_training_linear_models.ipynb b/04_training_linear_models.ipynb
index 94d90b1..ae910ef 100644
--- a/04_training_linear_models.ipynb
+++ b/04_training_linear_models.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Chapter 4 – Training Linear Models**"
+    "**Chapter 4 – Training Models**"
    ]
   },
   {
@@ -89,7 +89,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Linear regression using the Normal Equation"
+    "# Linear Regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The Normal Equation"
    ]
   },
   {
@@ -243,7 +250,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Linear regression using batch gradient descent"
+    "# Gradient Descent\n",
+    "## Batch Gradient Descent"
    ]
   },
   {
@@ -330,7 +338,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Stochastic Gradient Descent"
+    "## Stochastic Gradient Descent"
    ]
   },
   {
@@ -416,7 +424,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Mini-batch gradient descent"
+    "## Mini-batch gradient descent"
    ]
   },
   {
@@ -494,7 +502,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Polynomial regression"
+    "# Polynomial Regression"
    ]
   },
   {
@@ -616,6 +624,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Learning Curves"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 35,
@@ -678,7 +693,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Regularized models"
+    "# Regularized Linear Models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Ridge Regression"
    ]
   },
   {
@@ -772,6 +794,13 @@
     "sgd_reg.predict([[1.5]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lasso Regression"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 43,
@@ -803,6 +832,13 @@
     "lasso_reg.predict([[1.5]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Elastic Net"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 45,
@@ -815,6 +851,13 @@
     "elastic_net.predict([[1.5]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Early Stopping"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 46,
@@ -829,13 +872,6 @@
     "X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Early stopping example:"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 47,
@@ -1029,7 +1065,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Logistic regression"
+    "# Logistic Regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Decision Boundaries"
    ]
   },
   {
@@ -1166,6 +1209,13 @@
     "log_reg.predict([[1.7], [1.5]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Softmax Regression"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 62,