update sklearn version and case study with backend

2026-01-14 12:14:38 +01:00 · 2023-01-08 22:07:27 +01:00
parent 8bee2724f7
commit 69765922d0
7 changed files with 356 additions and 152 deletions
--- a/notebooks/4_analyze_toydata.ipynb
+++ b/notebooks/4_analyze_toydata.ipynb
@@ -25,13 +25,13 @@
    "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import GridSearchCV, train_test_split\n",
-    "from sklearn.inspection import plot_partial_dependence, permutation_importance\n",
+    "from sklearn.inspection import PartialDependenceDisplay, permutation_importance\n",
    "from sklearn import tree\n",
    "# interactive plotting (parallel coordinate plot)\n",
    "import plotly.express as px\n",
    "# suppress unnecessary warnings\n",
-    "import warnings\n",
-    "warnings.simplefilter(action='ignore', category=FutureWarning)"
+    "# import warnings\n",
+    "# warnings.simplefilter(action='ignore', category=FutureWarning)"
   ]
  },
  {
@@ -142,8 +142,8 @@
    "# look at the correlation matrix to see the correlations between all variables\n",
    "# for more info on what these numbers mean see here: https://en.wikipedia.org/wiki/Correlation_and_dependence\n",
    "corr_mat = df.corr()\n",
-    "# uncomment the part below to see the table in color\n",
-    "corr_mat #.style.background_gradient(cmap='coolwarm', axis=None).set_precision(2)"
+    "# we add color to the table with .style\n",
+    "corr_mat.style.background_gradient(cmap='coolwarm', axis=None).format(precision=2)"
   ]
  },
  {
@@ -241,7 +241,7 @@
   "source": [
    "# \"product\" is a categorical variable; for it to be handled correctly,\n",
    "# we have to transform it into a one-hot encoded vector\n",
-    "e = OneHotEncoder(sparse=False, categories='auto')\n",
+    "e = OneHotEncoder(sparse_output=False, categories='auto')\n",
    "ohe = e.fit_transform(df[[\"product\"]])\n",
    "df = df.join(pd.DataFrame(ohe, columns=[f\"product_{i}\" for i in e.categories_[0]], index=df.index))\n",
    "df.head()  # notice the additional columns with zeros and a one"
@@ -631,7 +631,7 @@
    "# you can also check how each feature influences the prediction\n",
    "# with a partial dependence plot (works for any model)\n",
    "plt.figure(figsize=(10, 5))\n",
-    "display = plot_partial_dependence(\n",
+    "display = PartialDependenceDisplay.from_estimator(\n",
    "    clf, X_train, feature_cols, kind=\"both\", subsample=50, line_kw={\"color\": '#15317E', \"label\": None},\n",
    "    n_cols=4, n_jobs=-1, grid_resolution=20, random_state=13, ax=plt.gca()\n",
    ")\n",