update sklearn version and case study with backend

This commit is contained in:
franzi
2023-01-08 22:07:27 +01:00
parent 8bee2724f7
commit 69765922d0
7 changed files with 356 additions and 152 deletions

View File

@@ -25,13 +25,13 @@
"from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import GridSearchCV, train_test_split\n",
"from sklearn.inspection import plot_partial_dependence, permutation_importance\n",
"from sklearn.inspection import PartialDependenceDisplay, permutation_importance\n",
"from sklearn import tree\n",
"# interactive plotting (parallel coordinate plot)\n",
"import plotly.express as px\n",
"# suppress unnecessary warnings\n",
"import warnings\n",
"warnings.simplefilter(action='ignore', category=FutureWarning)"
"# import warnings\n",
"# warnings.simplefilter(action='ignore', category=FutureWarning)"
]
},
{
@@ -142,8 +142,8 @@
"# look at the correlation matrix to see the correlations between all variables\n",
"# for more info on what these numbers mean see here: https://en.wikipedia.org/wiki/Correlation_and_dependence\n",
"corr_mat = df.corr()\n",
"# uncomment the part below to see the table in color\n",
"corr_mat #.style.background_gradient(cmap='coolwarm', axis=None).set_precision(2)"
"# we add color to the table with .style\n",
"corr_mat.style.background_gradient(cmap='coolwarm', axis=None).format(precision=2)"
]
},
{
@@ -241,7 +241,7 @@
"source": [
"# \"product\" is a categorical variable; for it to be handled correctly,\n",
"# we have to transform it into a one-hot encoded vector\n",
"e = OneHotEncoder(sparse=False, categories='auto')\n",
"e = OneHotEncoder(sparse_output=False, categories='auto')\n",
"ohe = e.fit_transform(df[[\"product\"]])\n",
"df = df.join(pd.DataFrame(ohe, columns=[f\"product_{i}\" for i in e.categories_[0]], index=df.index))\n",
"df.head() # notice the additional columns with zeros and a one"
@@ -631,7 +631,7 @@
"# you can also check how each feature influences the prediction\n",
"# with a partial dependence plot (works for any model)\n",
"plt.figure(figsize=(10, 5))\n",
"display = plot_partial_dependence(\n",
"display = PartialDependenceDisplay.from_estimator(\n",
" clf, X_train, feature_cols, kind=\"both\", subsample=50, line_kw={\"color\": '#15317E', \"label\": None},\n",
" n_cols=4, n_jobs=-1, grid_resolution=20, random_state=13, ax=plt.gca()\n",
")\n",