Update notebooks 1 to 8 to latest library versions (in particular Scikit-Learn 0.20)

This commit is contained in:
Aurélien Geron
2018-12-21 10:18:31 +08:00
parent dc16446c5f
commit b54ee1b608
8 changed files with 694 additions and 586 deletions

View File

@@ -774,6 +774,13 @@
"y = (0.2 + 0.1 * X + 0.5 * X**2 + np.random.randn(m, 1)/10).ravel()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Warning**: the default value of `gamma` will change from `'auto'` to `'scale'` in version 0.22 to better account for unscaled features. To preserve the same results as in the book, we explicitly set it to `'auto'`, but you should probably just use the default in your own code."
]
},
{
"cell_type": "code",
"execution_count": 27,
@@ -782,7 +789,7 @@
"source": [
"from sklearn.svm import SVR\n",
"\n",
"svm_poly_reg = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1)\n",
"svm_poly_reg = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1, gamma=\"auto\")\n",
"svm_poly_reg.fit(X, y)"
]
},
@@ -794,8 +801,8 @@
"source": [
"from sklearn.svm import SVR\n",
"\n",
"svm_poly_reg1 = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1)\n",
"svm_poly_reg2 = SVR(kernel=\"poly\", degree=2, C=0.01, epsilon=0.1)\n",
"svm_poly_reg1 = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1, gamma=\"auto\")\n",
"svm_poly_reg2 = SVR(kernel=\"poly\", degree=2, C=0.01, epsilon=0.1, gamma=\"auto\")\n",
"svm_poly_reg1.fit(X, y)\n",
"svm_poly_reg2.fit(X, y)"
]
@@ -876,7 +883,7 @@
"ax1 = fig.add_subplot(111, projection='3d')\n",
"plot_3D_decision_function(ax1, w=svm_clf2.coef_[0], b=svm_clf2.intercept_[0])\n",
"\n",
"save_fig(\"iris_3D_plot\")\n",
"#save_fig(\"iris_3D_plot\")\n",
"plt.show()"
]
},
@@ -1165,7 +1172,7 @@
"source": [
"from sklearn.linear_model import SGDClassifier\n",
"\n",
"sgd_clf = SGDClassifier(loss=\"hinge\", alpha = 0.017, max_iter = 50, random_state=42)\n",
"sgd_clf = SGDClassifier(loss=\"hinge\", alpha = 0.017, max_iter = 50, tol=-np.infty, random_state=42)\n",
"sgd_clf.fit(X, y.ravel())\n",
"\n",
"m = len(X)\n",
@@ -1265,7 +1272,7 @@
"lin_clf = LinearSVC(loss=\"hinge\", C=C, random_state=42)\n",
"svm_clf = SVC(kernel=\"linear\", C=C)\n",
"sgd_clf = SGDClassifier(loss=\"hinge\", learning_rate=\"constant\", eta0=0.001, alpha=alpha,\n",
" max_iter=100000, random_state=42)\n",
" max_iter=100000, tol=-np.infty, random_state=42)\n",
"\n",
"scaler = StandardScaler()\n",
"X_scaled = scaler.fit_transform(X)\n",
@@ -1354,9 +1361,13 @@
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_mldata\n",
"try:\n",
" from sklearn.datasets import fetch_openml\n",
" mnist = fetch_openml('mnist_784', version=1, cache=True)\n",
"except ImportError:\n",
" from sklearn.datasets import fetch_mldata\n",
" mnist = fetch_mldata('MNIST original')\n",
"\n",
"mnist = fetch_mldata(\"MNIST original\")\n",
"X = mnist[\"data\"]\n",
"y = mnist[\"target\"]\n",
"\n",
@@ -1425,7 +1436,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Wow, 82% accuracy on MNIST is a really bad performance. This linear model is certainly too simple for MNIST, but perhaps we just needed to scale the data first:"
"Wow, 86% accuracy on MNIST is a really bad performance. This linear model is certainly too simple for MNIST, but perhaps we just needed to scale the data first:"
]
},
{
@@ -1474,7 +1485,7 @@
"metadata": {},
"outputs": [],
"source": [
"svm_clf = SVC(decision_function_shape=\"ovr\")\n",
"svm_clf = SVC(decision_function_shape=\"ovr\", gamma=\"auto\")\n",
"svm_clf.fit(X_train_scaled[:10000], y_train[:10000])"
]
},
@@ -1505,7 +1516,7 @@
"from scipy.stats import reciprocal, uniform\n",
"\n",
"param_distributions = {\"gamma\": reciprocal(0.001, 0.1), \"C\": uniform(1, 10)}\n",
"rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2)\n",
"rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=3)\n",
"rnd_search_cv.fit(X_train_scaled[:1000], y_train[:1000])"
]
},
@@ -1536,7 +1547,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
@@ -1545,7 +1556,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
@@ -1562,7 +1573,7 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
@@ -1600,7 +1611,7 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
@@ -1620,7 +1631,7 @@
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
@@ -1638,7 +1649,7 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
@@ -1658,7 +1669,7 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
@@ -1677,7 +1688,7 @@
},
{
"cell_type": "code",
"execution_count": 66,
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
@@ -1697,7 +1708,7 @@
},
{
"cell_type": "code",
"execution_count": 67,
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
@@ -1713,7 +1724,7 @@
},
{
"cell_type": "code",
"execution_count": 68,
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
@@ -1722,13 +1733,13 @@
"from scipy.stats import reciprocal, uniform\n",
"\n",
"param_distributions = {\"gamma\": reciprocal(0.001, 0.1), \"C\": uniform(1, 10)}\n",
"rnd_search_cv = RandomizedSearchCV(SVR(), param_distributions, n_iter=10, verbose=2, random_state=42)\n",
"rnd_search_cv = RandomizedSearchCV(SVR(), param_distributions, n_iter=10, verbose=2, cv=3, random_state=42)\n",
"rnd_search_cv.fit(X_train_scaled, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
@@ -1744,7 +1755,7 @@
},
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
@@ -1762,7 +1773,7 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
@@ -1771,6 +1782,26 @@
"np.sqrt(mse)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"cmap = matplotlib.cm.get_cmap(\"jet\")"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_openml\n",
"mnist = fetch_openml(\"mnist_784\", version=1)\n",
"print(mnist.data.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -1795,7 +1826,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
"version": "3.6.6"
},
"nav_menu": {},
"toc": {