mirror of
https://github.com/ArthurDanjou/handson-ml3.git
synced 2026-01-30 11:37:49 +01:00
Update all notebooks assuming we are all in the future now: sklearn 0.20+, python 3.5+, TF 2.0 preview
This commit is contained in:
@@ -71,7 +71,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Voting classifiers"
|
||||
"This notebook assumes you have installed Scikit-Learn ≥0.20."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -79,6 +79,23 @@
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sklearn\n",
|
||||
"assert sklearn.__version__ >= \"0.20\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Voting classifiers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"heads_proba = 0.51\n",
|
||||
"coin_tosses = (np.random.rand(10000, 10) < heads_proba).astype(np.int32)\n",
|
||||
@@ -87,7 +104,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -105,7 +122,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -120,27 +137,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: In Scikit-Learn 0.20, some hyperparameters (`solver`, `n_estimators`, `gamma`, etc.) start issuing warnings about the fact that their default value will change in Scikit-Learn 0.22. To avoid these warnings and ensure that this notebooks keeps producing the same outputs as in the book, I set the hyperparameters to their old default value. In your own code, you can simply rely on the latest default values instead."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn.ensemble import VotingClassifier\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"\n",
|
||||
"log_clf = LogisticRegression(solver=\"liblinear\", random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
|
||||
"svm_clf = SVC(gamma=\"auto\", random_state=42)\n",
|
||||
"\n",
|
||||
"voting_clf = VotingClassifier(\n",
|
||||
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
|
||||
" voting='hard')"
|
||||
"**Note**: to be future-proof, we set `solver=\"lbfgs\"`, `n_estimators=100`, and `gamma=\"scale\"` since these will be the default values in upcoming Scikit-Learn versions."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -149,7 +146,18 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"voting_clf.fit(X_train, y_train)"
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn.ensemble import VotingClassifier\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"\n",
|
||||
"log_clf = LogisticRegression(solver=\"lbfgs\", random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
|
||||
"svm_clf = SVC(gamma=\"scale\", random_state=42)\n",
|
||||
"\n",
|
||||
"voting_clf = VotingClassifier(\n",
|
||||
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
|
||||
" voting='hard')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -157,6 +165,15 @@
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"voting_clf.fit(X_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
@@ -166,15 +183,22 @@
|
||||
" print(clf.__class__.__name__, accuracy_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Soft voting:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"log_clf = LogisticRegression(solver=\"liblinear\", random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
|
||||
"svm_clf = SVC(gamma=\"auto\", probability=True, random_state=42)\n",
|
||||
"log_clf = LogisticRegression(solver=\"lbfgs\", random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
|
||||
"svm_clf = SVC(gamma=\"scale\", probability=True, random_state=42)\n",
|
||||
"\n",
|
||||
"voting_clf = VotingClassifier(\n",
|
||||
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
|
||||
@@ -184,7 +208,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -205,7 +229,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -214,14 +238,14 @@
|
||||
"\n",
|
||||
"bag_clf = BaggingClassifier(\n",
|
||||
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
|
||||
" max_samples=100, bootstrap=True, n_jobs=-1, random_state=42)\n",
|
||||
" max_samples=100, bootstrap=True, random_state=42)\n",
|
||||
"bag_clf.fit(X_train, y_train)\n",
|
||||
"y_pred = bag_clf.predict(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -231,7 +255,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -243,7 +267,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -269,7 +293,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -293,18 +317,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bag_clf = BaggingClassifier(\n",
|
||||
" DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n",
|
||||
" n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1, random_state=42)"
|
||||
" n_estimators=500, max_samples=1.0, bootstrap=True, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -314,39 +338,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, random_state=42)\n",
|
||||
"rnd_clf.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"y_pred_rf = rnd_clf.predict(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"np.sum(y_pred == y_pred_rf) / len(y_pred) # almost identical predictions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_iris\n",
|
||||
"iris = load_iris()\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1, random_state=42)\n",
|
||||
"rnd_clf.fit(iris[\"data\"], iris[\"target\"])\n",
|
||||
"for name, score in zip(iris[\"feature_names\"], rnd_clf.feature_importances_):\n",
|
||||
" print(name, score)"
|
||||
"np.sum(y_pred == y_pred_rf) / len(y_pred) # almost identical predictions"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -355,7 +365,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rnd_clf.feature_importances_"
|
||||
"from sklearn.datasets import load_iris\n",
|
||||
"iris = load_iris()\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42)\n",
|
||||
"rnd_clf.fit(iris[\"data\"], iris[\"target\"])\n",
|
||||
"for name, score in zip(iris[\"feature_names\"], rnd_clf.feature_importances_):\n",
|
||||
" print(name, score)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -363,6 +378,15 @@
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rnd_clf.feature_importances_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.figure(figsize=(6, 4))\n",
|
||||
"\n",
|
||||
@@ -384,20 +408,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bag_clf = BaggingClassifier(\n",
|
||||
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
|
||||
" bootstrap=True, n_jobs=-1, oob_score=True, random_state=40)\n",
|
||||
" bootstrap=True, oob_score=True, random_state=40)\n",
|
||||
"bag_clf.fit(X_train, y_train)\n",
|
||||
"bag_clf.oob_score_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -406,7 +430,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -422,29 +446,16 @@
|
||||
"## Feature importance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" from sklearn.datasets import fetch_openml\n",
|
||||
" mnist = fetch_openml('mnist_784', version=1)\n",
|
||||
" mnist.target = mnist.target.astype(np.int64)\n",
|
||||
"except ImportError:\n",
|
||||
" from sklearn.datasets import fetch_mldata\n",
|
||||
" mnist = fetch_mldata('MNIST original')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
|
||||
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
|
||||
"from sklearn.datasets import fetch_openml\n",
|
||||
"\n",
|
||||
"mnist = fetch_openml('mnist_784', version=1)\n",
|
||||
"mnist.target = mnist.target.astype(np.uint8)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -452,6 +463,16 @@
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
|
||||
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def plot_digit(data):\n",
|
||||
" image = data.reshape(28, 28)\n",
|
||||
@@ -462,7 +483,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -484,7 +505,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -498,7 +519,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -507,7 +528,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -518,7 +539,7 @@
|
||||
" sample_weights = np.ones(m)\n",
|
||||
" plt.subplot(subplot)\n",
|
||||
" for i in range(5):\n",
|
||||
" svm_clf = SVC(kernel=\"rbf\", C=0.05, gamma=\"auto\", random_state=42)\n",
|
||||
" svm_clf = SVC(kernel=\"rbf\", C=0.05, gamma=\"scale\", random_state=42)\n",
|
||||
" svm_clf.fit(X_train, y_train, sample_weight=sample_weights)\n",
|
||||
" y_pred = svm_clf.predict(X_train)\n",
|
||||
" sample_weights[y_pred != y_train] *= (1 + learning_rate)\n",
|
||||
@@ -537,7 +558,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -553,7 +574,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -564,7 +585,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -576,7 +597,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -587,7 +608,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -598,7 +619,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -607,7 +628,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -616,7 +637,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -625,7 +646,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -674,7 +695,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -686,7 +707,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -696,7 +717,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -723,7 +744,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -746,7 +767,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -755,7 +776,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -781,7 +802,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -805,7 +826,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -814,7 +835,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -830,7 +851,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -843,7 +864,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -851,22 +872,8 @@
|
||||
" xgb_reg = xgboost.XGBRegressor(random_state=42)\n",
|
||||
" xgb_reg.fit(X_train, y_train)\n",
|
||||
" y_pred = xgb_reg.predict(X_val)\n",
|
||||
" val_error = mean_squared_error(y_val, y_pred)\n",
|
||||
" print(\"Validation MSE:\", val_error)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if xgboost is not None: # not shown in the book\n",
|
||||
" xgb_reg.fit(X_train, y_train,\n",
|
||||
" eval_set=[(X_val, y_val)], early_stopping_rounds=2)\n",
|
||||
" y_pred = xgb_reg.predict(X_val)\n",
|
||||
" val_error = mean_squared_error(y_val, y_pred)\n",
|
||||
" print(\"Validation MSE:\", val_error)"
|
||||
" val_error = mean_squared_error(y_val, y_pred) # Not shown\n",
|
||||
" print(\"Validation MSE:\", val_error) # Not shown"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -875,7 +882,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%timeit xgboost.XGBRegressor().fit(X_train, y_train) if xgboost is not None else None"
|
||||
"if xgboost is not None: # not shown in the book\n",
|
||||
" xgb_reg.fit(X_train, y_train,\n",
|
||||
" eval_set=[(X_val, y_val)], early_stopping_rounds=2)\n",
|
||||
" y_pred = xgb_reg.predict(X_val)\n",
|
||||
" val_error = mean_squared_error(y_val, y_pred) # Not shown\n",
|
||||
" print(\"Validation MSE:\", val_error) # Not shown"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -883,6 +895,15 @@
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%timeit xgboost.XGBRegressor().fit(X_train, y_train) if xgboost is not None else None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%timeit GradientBoostingRegressor().fit(X_train, y_train)"
|
||||
]
|
||||
@@ -933,7 +954,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -942,7 +963,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -961,7 +982,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -972,19 +993,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"random_forest_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
|
||||
"extra_trees_clf = ExtraTreesClassifier(n_estimators=10, random_state=42)\n",
|
||||
"random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
|
||||
"extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)\n",
|
||||
"svm_clf = LinearSVC(random_state=42)\n",
|
||||
"mlp_clf = MLPClassifier(random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -996,7 +1017,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1019,7 +1040,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1028,7 +1049,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1042,7 +1063,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1051,7 +1072,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1060,7 +1081,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1069,7 +1090,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 67,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1085,7 +1106,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 68,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1101,7 +1122,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 69,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1117,7 +1138,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 70,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1133,7 +1154,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 71,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1149,7 +1170,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"execution_count": 72,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1165,7 +1186,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1174,7 +1195,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 74,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1197,7 +1218,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 75,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1206,7 +1227,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1236,7 +1257,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1248,7 +1269,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1257,7 +1278,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1267,7 +1288,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"execution_count": 80,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1290,7 +1311,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1302,7 +1323,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1311,7 +1332,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1320,7 +1341,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"execution_count": 84,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1337,7 +1358,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 - tf2",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user