mirror of
https://github.com/ArthurDanjou/ml_exercises.git
synced 2026-01-14 12:14:38 +01:00
update sklearn version and case study with backend
This commit is contained in:
@@ -25,13 +25,13 @@
|
||||
"from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.model_selection import GridSearchCV, train_test_split\n",
|
||||
"from sklearn.inspection import plot_partial_dependence, permutation_importance\n",
|
||||
"from sklearn.inspection import PartialDependenceDisplay, permutation_importance\n",
|
||||
"from sklearn import tree\n",
|
||||
"# interactive plotting (parallel coordinate plot)\n",
|
||||
"import plotly.express as px\n",
|
||||
"# suppress unnecessary warnings\n",
|
||||
"import warnings\n",
|
||||
"warnings.simplefilter(action='ignore', category=FutureWarning)"
|
||||
"# import warnings\n",
|
||||
"# warnings.simplefilter(action='ignore', category=FutureWarning)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -142,8 +142,8 @@
|
||||
"# look at the correlation matrix to see the correlations between all variables\n",
|
||||
"# for more info on what these numbers mean see here: https://en.wikipedia.org/wiki/Correlation_and_dependence\n",
|
||||
"corr_mat = df.corr()\n",
|
||||
"# uncomment the part below to see the table in color\n",
|
||||
"corr_mat #.style.background_gradient(cmap='coolwarm', axis=None).set_precision(2)"
|
||||
"# we add color to the table with .style\n",
|
||||
"corr_mat.style.background_gradient(cmap='coolwarm', axis=None).format(precision=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -241,7 +241,7 @@
|
||||
"source": [
|
||||
"# \"product\" is a categorical variable; for it to be handled correctly,\n",
|
||||
"# we have to transform it into a one-hot encoded vector\n",
|
||||
"e = OneHotEncoder(sparse=False, categories='auto')\n",
|
||||
"e = OneHotEncoder(sparse_output=False, categories='auto')\n",
|
||||
"ohe = e.fit_transform(df[[\"product\"]])\n",
|
||||
"df = df.join(pd.DataFrame(ohe, columns=[f\"product_{i}\" for i in e.categories_[0]], index=df.index))\n",
|
||||
"df.head() # notice the additional columns with zeros and a one"
|
||||
@@ -631,7 +631,7 @@
|
||||
"# you can also check how each feature influences the prediction\n",
|
||||
"# with a partial dependence plot (works for any model)\n",
|
||||
"plt.figure(figsize=(10, 5))\n",
|
||||
"display = plot_partial_dependence(\n",
|
||||
"display = PartialDependenceDisplay.from_estimator(\n",
|
||||
" clf, X_train, feature_cols, kind=\"both\", subsample=50, line_kw={\"color\": '#15317E', \"label\": None},\n",
|
||||
" n_cols=4, n_jobs=-1, grid_resolution=20, random_state=13, ax=plt.gca()\n",
|
||||
")\n",
|
||||
|
||||
Reference in New Issue
Block a user