mirror of
https://github.com/ArthurDanjou/handson-ml3.git
synced 2026-01-14 12:14:36 +01:00
Update notebooks 1 to 8 to latest library versions (in particular Scikit-Learn 0.20)
This commit is contained in:
@@ -115,6 +115,13 @@
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Warning**: In Scikit-Learn 0.20, some hyperparameters (`solver`, `n_estimators`, `gamma`, etc.) start issuing warnings about the fact that their default value will change in Scikit-Learn 0.22. To avoid these warnings and ensure that this notebooks keeps producing the same outputs as in the book, I set the hyperparameters to their old default value. In your own code, you can simply rely on the latest default values instead."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
@@ -126,9 +133,9 @@
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"\n",
|
||||
"log_clf = LogisticRegression(random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(random_state=42)\n",
|
||||
"svm_clf = SVC(random_state=42)\n",
|
||||
"log_clf = LogisticRegression(solver=\"liblinear\", random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
|
||||
"svm_clf = SVC(gamma=\"auto\", random_state=42)\n",
|
||||
"\n",
|
||||
"voting_clf = VotingClassifier(\n",
|
||||
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
|
||||
@@ -164,9 +171,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"log_clf = LogisticRegression(random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(random_state=42)\n",
|
||||
"svm_clf = SVC(probability=True, random_state=42)\n",
|
||||
"log_clf = LogisticRegression(solver=\"liblinear\", random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
|
||||
"svm_clf = SVC(gamma=\"auto\", probability=True, random_state=42)\n",
|
||||
"\n",
|
||||
"voting_clf = VotingClassifier(\n",
|
||||
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
|
||||
@@ -420,8 +427,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"mnist = fetch_mldata('MNIST original')"
|
||||
"try:\n",
|
||||
" from sklearn.datasets import fetch_openml\n",
|
||||
" mnist = fetch_openml('mnist_784', version=1)\n",
|
||||
" mnist.target = mnist.target.astype(np.int64)\n",
|
||||
"except ImportError:\n",
|
||||
" from sklearn.datasets import fetch_mldata\n",
|
||||
" mnist = fetch_mldata('MNIST original')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -430,7 +442,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rnd_clf = RandomForestClassifier(random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
|
||||
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
|
||||
]
|
||||
},
|
||||
@@ -505,7 +517,7 @@
|
||||
" sample_weights = np.ones(m)\n",
|
||||
" plt.subplot(subplot)\n",
|
||||
" for i in range(5):\n",
|
||||
" svm_clf = SVC(kernel=\"rbf\", C=0.05, random_state=42)\n",
|
||||
" svm_clf = SVC(kernel=\"rbf\", C=0.05, gamma=\"auto\", random_state=42)\n",
|
||||
" svm_clf.fit(X_train, y_train, sample_weight=sample_weights)\n",
|
||||
" y_pred = svm_clf.predict(X_train)\n",
|
||||
" sample_weights[y_pred != y_train] *= (1 + learning_rate)\n",
|
||||
@@ -911,36 +923,25 @@
|
||||
"Exercise: _Load the MNIST data and split it into a training set, a validation set, and a test set (e.g., use 50,000 instances for training, 10,000 for validation, and 10,000 for testing)._"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The MNIST dataset was loaded earlier."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import fetch_mldata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mnist = fetch_mldata('MNIST original')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -959,7 +960,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -970,19 +971,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"random_forest_clf = RandomForestClassifier(random_state=42)\n",
|
||||
"extra_trees_clf = ExtraTreesClassifier(random_state=42)\n",
|
||||
"random_forest_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
|
||||
"extra_trees_clf = ExtraTreesClassifier(n_estimators=10, random_state=42)\n",
|
||||
"svm_clf = LinearSVC(random_state=42)\n",
|
||||
"mlp_clf = MLPClassifier(random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -994,7 +995,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1017,7 +1018,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1026,7 +1027,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1040,7 +1041,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1049,7 +1050,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1058,7 +1059,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1067,7 +1068,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1083,7 +1084,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 67,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1099,16 +1100,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"voting_clf.estimators"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"execution_count": 68,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1124,7 +1116,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 69,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1140,7 +1132,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 70,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1156,7 +1148,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 71,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1167,12 +1159,12 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Much better! The SVM was hurting performance. Now let's try using a soft voting classifier. We do not actually need to retrain the classifier, we can just set `voting` to `\"soft\"`:"
|
||||
"A bit better! The SVM was hurting performance. Now let's try using a soft voting classifier. We do not actually need to retrain the classifier, we can just set `voting` to `\"soft\"`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 72,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1181,7 +1173,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1204,7 +1196,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": 74,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1213,7 +1205,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 75,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1224,7 +1216,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The voting classifier reduced the error rate from about 4.9% for our best model (the `MLPClassifier`) to just 3.5%. That's about 28% less errors, not bad!"
|
||||
"The voting classifier reduced the error rate from about 4.0% for our best model (the `MLPClassifier`) to just 3.1%. That's about 22.5% less errors, not bad!"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1243,7 +1235,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1255,7 +1247,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1264,7 +1256,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1274,7 +1266,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1297,7 +1289,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"execution_count": 80,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1309,7 +1301,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1318,7 +1310,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1327,7 +1319,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 86,
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1338,15 +1330,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This stacking ensemble does not perform as well as the soft voting classifier we trained earlier, but it still beats all the individual classifiers."
|
||||
"This stacking ensemble does not perform as well as the soft voting classifier we trained earlier, it's just as good as the best individual classifier."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -1365,7 +1350,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2"
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "252px",
|
||||
|
||||
Reference in New Issue
Block a user