Update all notebooks assuming we are all in the future now: sklearn 0.20+, python 3.5+, TF 2.0 preview

This commit is contained in:
Aurélien Geron
2019-01-18 23:08:37 +08:00
parent 10c432a997
commit 6b8dff91d0
12 changed files with 1186 additions and 2625 deletions

View File

@@ -73,7 +73,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Linear regression using the Normal Equation"
"This notebook assumes you have installed Scikit-Learn ≥0.20."
]
},
{
@@ -81,6 +81,23 @@
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Linear regression using the Normal Equation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
@@ -90,7 +107,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -104,7 +121,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -114,7 +131,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -123,7 +140,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -135,7 +152,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -154,7 +171,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -170,11 +187,12 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"\n",
"lin_reg = LinearRegression()\n",
"lin_reg.fit(X, y)\n",
"lin_reg.intercept_, lin_reg.coef_"
@@ -182,7 +200,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -198,7 +216,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -215,7 +233,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -238,14 +256,15 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"eta = 0.1\n",
"eta = 0.1 # learning rate\n",
"n_iterations = 1000\n",
"m = 100\n",
"theta = np.random.randn(2,1)\n",
"\n",
"theta = np.random.randn(2,1) # random initialization\n",
"\n",
"for iteration in range(n_iterations):\n",
" gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)\n",
@@ -254,7 +273,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -263,7 +282,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
@@ -272,7 +291,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -298,7 +317,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -324,7 +343,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@@ -335,7 +354,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -371,8 +390,10 @@
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"execution_count": 21,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"theta"
@@ -380,18 +401,19 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import SGDRegressor\n",
"sgd_reg = SGDRegressor(max_iter=50, tol=-np.infty, penalty=None, eta0=0.1, random_state=42)\n",
"\n",
"sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=0.1, random_state=42)\n",
"sgd_reg.fit(X, y.ravel())"
]
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
@@ -407,7 +429,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
@@ -440,7 +462,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
@@ -449,7 +471,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
@@ -460,7 +482,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
@@ -485,7 +507,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
@@ -497,7 +519,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
@@ -508,7 +530,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
@@ -522,7 +544,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
@@ -534,7 +556,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
@@ -543,7 +565,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -554,7 +576,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
@@ -573,7 +595,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
@@ -604,7 +626,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
@@ -630,7 +652,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
@@ -643,7 +665,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
@@ -669,17 +691,47 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"\n",
"np.random.seed(42)\n",
"m = 20\n",
"X = 3 * np.random.rand(m, 1)\n",
"y = 1 + 0.5 * X + np.random.randn(m, 1) / 1.5\n",
"X_new = np.linspace(0, 3, 100).reshape(100, 1)\n",
"X_new = np.linspace(0, 3, 100).reshape(100, 1)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"ridge_reg = Ridge(alpha=1, solver=\"sag\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"\n",
"def plot_model(model_class, polynomial, alphas, **model_kargs):\n",
" for alpha, style in zip(alphas, (\"b-\", \"g--\", \"r:\")):\n",
@@ -711,42 +763,26 @@
]
},
{
"cell_type": "code",
"execution_count": 39,
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
"**Note**: to be future-proof, we set `max_iter=1000` and `tol=1e-3` because these will be the default values in Scikit-Learn 0.21."
]
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"sgd_reg = SGDRegressor(max_iter=50, tol=-np.infty, penalty=\"l2\", random_state=42)\n",
"sgd_reg = SGDRegressor(penalty=\"l2\", max_iter=1000, tol=1e-3, random_state=42)\n",
"sgd_reg.fit(X, y.ravel())\n",
"sgd_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"ridge_reg = Ridge(alpha=1, solver=\"sag\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
@@ -757,7 +793,7 @@
"plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1), random_state=42)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(122)\n",
"plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), tol=1, random_state=42)\n",
"plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), random_state=42)\n",
"\n",
"save_fig(\"lasso_regression_plot\")\n",
"plt.show()"
@@ -765,7 +801,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
@@ -777,7 +813,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
@@ -789,10 +825,8 @@
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"scrolled": true
},
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
@@ -800,23 +834,65 @@
"X = 6 * np.random.rand(m, 1) - 3\n",
"y = 2 + X + 0.5 * X**2 + np.random.randn(m, 1)\n",
"\n",
"X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)\n",
"X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Early stopping example:"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"from sklearn.base import clone\n",
"\n",
"poly_scaler = Pipeline([\n",
" (\"poly_features\", PolynomialFeatures(degree=90, include_bias=False)),\n",
" (\"std_scaler\", StandardScaler()),\n",
" (\"std_scaler\", StandardScaler())\n",
" ])\n",
"\n",
"X_train_poly_scaled = poly_scaler.fit_transform(X_train)\n",
"X_val_poly_scaled = poly_scaler.transform(X_val)\n",
"\n",
"sgd_reg = SGDRegressor(max_iter=1,\n",
" tol=-np.infty,\n",
" penalty=None,\n",
" eta0=0.0005,\n",
" warm_start=True,\n",
" learning_rate=\"constant\",\n",
" random_state=42)\n",
"sgd_reg = SGDRegressor(max_iter=1, tol=-np.infty, warm_start=True,\n",
" penalty=None, learning_rate=\"constant\", eta0=0.0005, random_state=42)\n",
"\n",
"minimum_val_error = float(\"inf\")\n",
"best_epoch = None\n",
"best_model = None\n",
"for epoch in range(1000):\n",
" sgd_reg.fit(X_train_poly_scaled, y_train) # continues where it left off\n",
" y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n",
" val_error = mean_squared_error(y_val, y_val_predict)\n",
" if val_error < minimum_val_error:\n",
" minimum_val_error = val_error\n",
" best_epoch = epoch\n",
" best_model = clone(sgd_reg)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the graph:"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"sgd_reg = SGDRegressor(max_iter=1, tol=-np.infty, warm_start=True,\n",
" penalty=None, learning_rate=\"constant\", eta0=0.0005, random_state=42)\n",
"\n",
"n_epochs = 500\n",
"train_errors, val_errors = [], []\n",
@@ -851,30 +927,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.base import clone\n",
"sgd_reg = SGDRegressor(max_iter=1, tol=-np.infty, warm_start=True, penalty=None,\n",
" learning_rate=\"constant\", eta0=0.0005, random_state=42)\n",
"\n",
"minimum_val_error = float(\"inf\")\n",
"best_epoch = None\n",
"best_model = None\n",
"for epoch in range(1000):\n",
" sgd_reg.fit(X_train_poly_scaled, y_train) # continues where it left off\n",
" y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n",
" val_error = mean_squared_error(y_val, y_val_predict)\n",
" if val_error < minimum_val_error:\n",
" minimum_val_error = val_error\n",
" best_epoch = epoch\n",
" best_model = clone(sgd_reg)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
@@ -883,7 +936,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
@@ -894,7 +947,7 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
@@ -921,7 +974,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
@@ -989,7 +1042,7 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
@@ -1010,7 +1063,7 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
@@ -1021,7 +1074,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
@@ -1030,7 +1083,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
@@ -1038,20 +1091,27 @@
"y = (iris[\"target\"] == 2).astype(np.int) # 1 if Iris-Virginica, else 0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Note**: To be future-proof we set `solver=\"lbfgs\"` since this will be the default value in Scikit-Learn 0.22."
]
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"log_reg = LogisticRegression(solver=\"liblinear\", random_state=42)\n",
"log_reg = LogisticRegression(solver=\"lbfgs\", random_state=42)\n",
"log_reg.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
@@ -1071,7 +1131,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
@@ -1098,7 +1158,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
@@ -1107,7 +1167,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
@@ -1116,7 +1176,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
@@ -1125,7 +1185,7 @@
"X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
"y = (iris[\"target\"] == 2).astype(np.int)\n",
"\n",
"log_reg = LogisticRegression(solver=\"liblinear\", C=10**10, random_state=42)\n",
"log_reg = LogisticRegression(solver=\"lbfgs\", C=10**10, random_state=42)\n",
"log_reg.fit(X, y)\n",
"\n",
"x0, x1 = np.meshgrid(\n",
@@ -1160,7 +1220,7 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
@@ -1173,7 +1233,7 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
@@ -1211,7 +1271,7 @@
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
@@ -1220,7 +1280,7 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
@@ -1265,7 +1325,7 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
@@ -1282,7 +1342,7 @@
},
{
"cell_type": "code",
"execution_count": 66,
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
@@ -1298,7 +1358,7 @@
},
{
"cell_type": "code",
"execution_count": 67,
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
@@ -1314,7 +1374,7 @@
},
{
"cell_type": "code",
"execution_count": 68,
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
@@ -1345,7 +1405,7 @@
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
@@ -1366,7 +1426,7 @@
},
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
@@ -1375,7 +1435,7 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
@@ -1391,7 +1451,7 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
@@ -1411,7 +1471,7 @@
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
@@ -1430,7 +1490,7 @@
},
{
"cell_type": "code",
"execution_count": 74,
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
@@ -1458,7 +1518,7 @@
},
{
"cell_type": "code",
"execution_count": 75,
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
@@ -1489,7 +1549,7 @@
},
{
"cell_type": "code",
"execution_count": 76,
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
@@ -1505,7 +1565,7 @@
},
{
"cell_type": "code",
"execution_count": 77,
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
@@ -1526,7 +1586,7 @@
},
{
"cell_type": "code",
"execution_count": 78,
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
@@ -1560,7 +1620,7 @@
},
{
"cell_type": "code",
"execution_count": 79,
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
@@ -1588,7 +1648,7 @@
},
{
"cell_type": "code",
"execution_count": 80,
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
@@ -1628,7 +1688,7 @@
},
{
"cell_type": "code",
"execution_count": 81,
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
@@ -1656,7 +1716,7 @@
},
{
"cell_type": "code",
"execution_count": 82,
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
@@ -1701,7 +1761,7 @@
},
{
"cell_type": "code",
"execution_count": 83,
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
@@ -1730,7 +1790,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 - tf2",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},