diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index b13496a..34c45e7 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -1199,13 +1199,20 @@ "## Looking for Correlations" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: since Pandas 2.0.0, the `numeric_only` argument defaults to `False`, so we need to set it explicitly to True to avoid an error." + ] + }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ - "corr_matrix = housing.corr()" + "corr_matrix = housing.corr(numeric_only=True)" ] }, { @@ -1337,7 +1344,7 @@ } ], "source": [ - "corr_matrix = housing.corr()\n", + "corr_matrix = housing.corr(numeric_only=True)\n", "corr_matrix[\"median_house_value\"].sort_values(ascending=False)" ] }, @@ -2551,7 +2558,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Alternatively, you can set `sparse=False` when creating the `OneHotEncoder`:" + "Alternatively, you can set `sparse_output=False` when creating the `OneHotEncoder` (note: the `sparse` hyperparameter was renamned to `sparse_output` in Scikit-Learn 1.2):" ] }, { @@ -2577,7 +2584,7 @@ } ], "source": [ - "cat_encoder = OneHotEncoder(sparse=False)\n", + "cat_encoder = OneHotEncoder(sparse_output=False)\n", "housing_cat_1hot = cat_encoder.fit_transform(housing_cat)\n", "housing_cat_1hot" ] @@ -3299,7 +3306,8 @@ " self.random_state = random_state\n", "\n", " def fit(self, X, y=None, sample_weight=None):\n", - " self.kmeans_ = KMeans(self.n_clusters, random_state=self.random_state)\n", + " self.kmeans_ = KMeans(self.n_clusters, n_init=10,\n", + " random_state=self.random_state)\n", " self.kmeans_.fit(X, sample_weight=sample_weight)\n", " return self # always return self!\n", "\n", @@ -3310,6 +3318,13 @@ " return [f\"Cluster {i} similarity\" for i in range(self.n_clusters)]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: The default value for the `n_init` hyperparameter above will change from 10 to `\"auto\"` in Scikit-Learn 1.4, so I'm setting it explicitly to 10 to keep this notebook stable." + ] + }, { "cell_type": "code", "execution_count": 97, @@ -6238,7 +6253,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -6252,7 +6267,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.13" }, "nav_menu": { "height": "279px",