update versions

This commit is contained in:
franzi
2022-08-13 18:02:20 +02:00
parent dd5ced1028
commit 2ba3a610d5
11 changed files with 112 additions and 95 deletions

View File

@@ -223,15 +223,14 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# lambdas = eigenvalues\n", "print(kpca.eigenvalues_[:10])\n",
"print(kpca.lambdas_[:10])\n",
"# check how much \"information\" we would keep if we were to reduce the dimensionality to 20\n", "# check how much \"information\" we would keep if we were to reduce the dimensionality to 20\n",
"# (this is not 100% accurate, since we only computed the first 100 kPCA components, i.e.,\n", "# (this is not 100% accurate, since we only computed the first 100 kPCA components, i.e.,\n",
"# normally lambda_ should contain all eigenvalues - but this should be close enough)\n", "# normally kpca.eigenvalues_ should contain all eigenvalues - but this should be close enough)\n",
"print(\"Percentage of variance retained with 20 components:\", 100*(sum(kpca.lambdas_[:20])/sum(kpca.lambdas_)))\n", "print(\"Percentage of variance retained with 20 components:\", 100*(sum(kpca.eigenvalues_[:20])/sum(kpca.eigenvalues_)))\n",
"# plot eigenvalue spectrum\n", "# plot eigenvalue spectrum\n",
"plt.figure()\n", "plt.figure()\n",
"plt.plot(range(1, len(kpca.lambdas_)+1), kpca.lambdas_)\n", "plt.plot(range(1, len(kpca.eigenvalues_)+1), kpca.eigenvalues_)\n",
"plt.xlabel(\"PCs\")\n", "plt.xlabel(\"PCs\")\n",
"plt.ylabel(\"Eigenvalue\");\n", "plt.ylabel(\"Eigenvalue\");\n",
"# observe how the first value is extremely large" "# observe how the first value is extremely large"
@@ -359,7 +358,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -373,7 +372,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -185,7 +185,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -199,7 +199,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -22,6 +22,8 @@
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"from matplotlib.colors import ListedColormap\n", "from matplotlib.colors import ListedColormap\n",
"from sklearn.datasets import make_moons\n", "from sklearn.datasets import make_moons\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.inspection import DecisionBoundaryDisplay\n",
"# don't get unneccessary warnings\n", "# don't get unneccessary warnings\n",
"import warnings\n", "import warnings\n",
"warnings.simplefilter(action='ignore', category=FutureWarning)" "warnings.simplefilter(action='ignore', category=FutureWarning)"
@@ -54,39 +56,22 @@
"def plot_classification(X, Y, model=None):\n", "def plot_classification(X, Y, model=None):\n",
" # plot a classification dataset (and model predictions)\n", " # plot a classification dataset (and model predictions)\n",
" plt.figure()\n", " plt.figure()\n",
" x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n",
" y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n",
" xx, yy = np.meshgrid(np.linspace(x_min, x_max, 250),\n",
" np.linspace(y_min, y_max, 250))\n",
" cm = plt.cm.RdBu\n",
" cm_bright = ListedColormap(['#FF0000', '#0000FF'])\n",
" if model is not None:\n", " if model is not None:\n",
" try:\n", " DecisionBoundaryDisplay.from_estimator(\n",
" Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])\n", " model, X, cmap=plt.cm.RdBu, alpha=0.8, eps=0.5, xlabel=\"feature 1\", ylabel=\"feature 2\", ax=plt.gca()\n",
" alpha = 0.8\n", " )\n",
" except:\n", " # plot the training points\n",
" # decision tree\n", " plt.scatter(X[:, 0], X[:, 1], s=20, c=Y, cmap=ListedColormap(['#FF0000', '#0000FF']), label=\"data samples\")\n",
" Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n",
" alpha = 0.4\n",
" # Put the result into a color plot\n",
" Z = Z.reshape(xx.shape)\n",
" plt.contourf(xx, yy, Z, cmap=cm, alpha=alpha)\n",
" # Plot the training points\n",
" plt.scatter(X[:, 0], X[:, 1], s=20, c=Y, cmap=cm_bright, label=\"data samples\")\n",
" plt.xlim(xx.min(), xx.max())\n",
" plt.ylim(yy.min(), yy.max())\n",
" plt.xlabel(\"feature 1\")\n",
" plt.ylabel(\"feature 2\")\n",
" plt.title(\"Classification Problem\")\n", " plt.title(\"Classification Problem\")\n",
" plt.colorbar()\n", " plt.colorbar()\n",
"\n", " \n",
"def get_linear_regression():\n", "def get_linear_regression():\n",
" # generate noisy linear regression dataset\n", " # generate noisy linear regression dataset\n",
" np.random.seed(15)\n", " np.random.seed(15)\n",
" X = np.random.rand(n_train_reg, 1)\n", " X = np.random.rand(n_train_reg, 1)\n",
" y = -2.5 + 5*X\n", " y = -2.5 + 5*X\n",
" y += np.random.randn(n_train_reg, 1) * 0.4\n", " y += np.random.randn(n_train_reg, 1) * 0.4\n",
" return X, y.flatten()\n", " return StandardScaler(with_std=False).fit_transform(X), y.flatten()\n",
"\n", "\n",
"def get_linear_outlier():\n", "def get_linear_outlier():\n",
" # generate linear regression dataset with outliers\n", " # generate linear regression dataset with outliers\n",
@@ -95,7 +80,7 @@
" y = -2.5 + 5*X\n", " y = -2.5 + 5*X\n",
" y += np.random.randn(n_train_reg, 1) * 0.05\n", " y += np.random.randn(n_train_reg, 1) * 0.05\n",
" y[(X>0.7) & (X<0.73)] = 10\n", " y[(X>0.7) & (X<0.73)] = 10\n",
" return X, y.flatten()\n", " return StandardScaler(with_std=False).fit_transform(X), y.flatten()\n",
"\n", "\n",
"def get_nonlinear_regression():\n", "def get_nonlinear_regression():\n",
" # generate noisy non-linear regression dataset\n", " # generate noisy non-linear regression dataset\n",
@@ -103,7 +88,7 @@
" X = np.random.rand(n_train_reg, 1) * np.pi * 2.\n", " X = np.random.rand(n_train_reg, 1) * np.pi * 2.\n",
" y = np.sin(X)\n", " y = np.sin(X)\n",
" y += np.random.randn(n_train_reg, 1) * 0.2\n", " y += np.random.randn(n_train_reg, 1) * 0.2\n",
" return X, y.flatten()\n", " return StandardScaler().fit_transform(X), y.flatten()\n",
"\n", "\n",
"def get_linear_classification_1f():\n", "def get_linear_classification_1f():\n",
" # generate classification dataset with 1 informative feature\n", " # generate classification dataset with 1 informative feature\n",
@@ -117,7 +102,7 @@
" y = np.zeros(n_train_clf, dtype=int)\n", " y = np.zeros(n_train_clf, dtype=int)\n",
" y[n_train_clf//2:] = 1\n", " y[n_train_clf//2:] = 1\n",
" rndidx = np.random.permutation(len(y))\n", " rndidx = np.random.permutation(len(y))\n",
" return X[rndidx], y[rndidx]\n", " return StandardScaler().fit_transform(X[rndidx]), y[rndidx]\n",
"\n", "\n",
"def get_linear_classification_2f():\n", "def get_linear_classification_2f():\n",
" # generate classification dataset with 2 informative features\n", " # generate classification dataset with 2 informative features\n",
@@ -132,11 +117,12 @@
" y = np.zeros(n_train_clf, dtype=int)\n", " y = np.zeros(n_train_clf, dtype=int)\n",
" y[n_train_clf//2:] = 1\n", " y[n_train_clf//2:] = 1\n",
" rndidx = np.random.permutation(len(y))\n", " rndidx = np.random.permutation(len(y))\n",
" return X[rndidx], y[rndidx]\n", " return StandardScaler().fit_transform(X[rndidx]), y[rndidx]\n",
"\n", "\n",
"def get_nonlinear_classification():\n", "def get_nonlinear_classification():\n",
" # generate non-linear classification dataset\n", " # generate non-linear classification dataset\n",
" return make_moons(n_samples=n_train_clf, noise=0.3, random_state=1)" " X, y = make_moons(n_samples=n_train_clf, noise=0.3, random_state=1)\n",
" return StandardScaler().fit_transform(X), y"
] ]
}, },
{ {
@@ -248,6 +234,59 @@
"print(f\"f(x) = sigmoid({model.intercept_[0]:.3f} + {model.coef_[0, 0]:.3f} * x_1 + {model.coef_[0, 1]:.3f} * x_2)\")" "print(f\"f(x) = sigmoid({model.intercept_[0]:.3f} + {model.coef_[0, 0]:.3f} * x_1 + {model.coef_[0, 1]:.3f} * x_2)\")"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feed-Forward Neural Network (Multi-Layer Perceptron)\n",
"\n",
"After reading the chapter on neural networks, test them here on different datasets and experiment with their hyperparameter settings.\n",
"\n",
"**Questions:**\n",
"- What do you observe when you change the activation function to `'relu'` for the 3rd regression dataset?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.neural_network import MLPRegressor, MLPClassifier"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Neural network for regression:\n",
"# alpha (> 0): regularization (higher values = more regularization)\n",
"# hidden_layer_sizes (tuple of ints): number of units in the hidden layers\n",
"# activation (one of 'identity', 'logistic', 'tanh', 'relu'): non-linear activation function between layers\n",
"X, y = X_reg_3, y_reg_3\n",
"model = MLPRegressor(alpha=1e-05, hidden_layer_sizes=(15,), activation=\"tanh\", solver=\"lbfgs\", random_state=1)\n",
"model.fit(X, y)\n",
"plot_regression(X, y, model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Neural network for classification:\n",
"# alpha (> 0): regularization (higher values = more regularization)\n",
"# hidden_layer_sizes (tuple of ints): number of units in the hidden layers\n",
"# activation (one of 'identity', 'logistic', 'tanh', 'relu'): non-linear activation function between layers\n",
"X, y = X_clf_3, y_clf_3\n",
"model = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(25, 5), activation=\"relu\", solver=\"adam\", max_iter=1000, random_state=1)\n",
"model.fit(X, y)\n",
"plot_classification(X, y, model)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -468,7 +507,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -482,7 +521,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -840,27 +840,6 @@
"While it was a bit more work to set up the logistic regression model appropriately, incl. extra data preprocessing steps, we now even got a balanced accuracy on the test set that is slightly higher than that of the decision tree (0.938 instead of 0.935)." "While it was a bit more work to set up the logistic regression model appropriately, incl. extra data preprocessing steps, we now even got a balanced accuracy on the test set that is slightly higher than that of the decision tree (0.938 instead of 0.935)."
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Advanced Exercise (optional)\n",
"\n",
"Use a neural network (either using the `torch`/`skorch` (recommended) or `tensorflow`/`keras` libraries) to solve this task.\n",
"\n",
"Start with a linear network (i.e., a FFNN without hidden layers, i.e., the same number of trainable parameters as the logistic regression model used above) and try to get approximately the same performance as the LogReg model.\n",
"\n",
"Then use a deeper network (e.g., one additional hidden layer) and see if this improves the performance.\n",
"\n",
"**Tips:**\n",
"- Make sure to use scaled data!\n",
"- Since the faulty products are underrepresented, samples from this class should get a higher weight during training (similar to what we're doing with `class_weight=\"balanced\"` in sklearn models). \n",
"\n",
"**Using `torch` & `skorch`:**\n",
"- Use a skorch [`NeuralNetBinaryClassifier`](https://skorch.readthedocs.io/en/latest/classifier.html). Here the torch network shoud predict the output without any non-linear activation function at the end (i.e., *don't* use a sigmoid function to convert the output into probabilities) as the skorch model takes care of this conversion for you!\n",
"- The sample weights can be set by passing `criterion__pos_weight=torch.Tensor([np.sum(y_train==0)/np.sum(y_train==1)])` as an argument to the `NeuralNetBinaryClassifier` (see also the documentation for the torch [`BCEWithLogitsLoss`](https://pytorch.org/docs/stable/generated/torch.nn.BCEWithLogitsLoss.html) loss function, which is used internally by the skorch model)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -871,7 +850,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -885,7 +864,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -161,7 +161,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -175,7 +175,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -408,7 +408,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -422,7 +422,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -617,7 +617,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -631,7 +631,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -667,7 +667,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -681,7 +681,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -335,7 +335,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -349,7 +349,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -1,10 +1,10 @@
numpy>=1.18.5 numpy>=1.22.3
pandas>=1.2.1 pandas>=1.4.2
scipy>=1.4.1 scipy>=1.8.0
scikit-learn>=0.24.1 scikit-learn>=1.1.2
matplotlib>=3.3.1 matplotlib>=3.5.1
pillow>=8.1.0 pillow>=9.1.0
plotly>=4.9.0 plotly>=5.7.0
torch>=1.6.0 torch>=1.12.1
torchvision>=0.8.1 torchvision>=0.13.1
skorch>=0.9.0 skorch>=0.11.0

View File

@@ -16,19 +16,19 @@
"# check versions of the libraries\n", "# check versions of the libraries\n",
"# they should not be too much behind the ones in the comments...\n", "# they should not be too much behind the ones in the comments...\n",
"import numpy\n", "import numpy\n",
"print(\"numpy\", numpy.__version__) # >= 1.18.5\n", "print(\"numpy\", numpy.__version__) # >= 1.22.3\n",
"import pandas\n", "import pandas\n",
"print(\"pandas\", pandas.__version__) # >= 1.2.1\n", "print(\"pandas\", pandas.__version__) # >= 1.4.2\n",
"import scipy\n", "import scipy\n",
"print(\"scipy\", scipy.__version__) # >= 1.4.1\n", "print(\"scipy\", scipy.__version__) # >= 1.8.0\n",
"import sklearn\n", "import sklearn\n",
"print(\"sklearn\", sklearn.__version__) # >= 0.24.1\n", "print(\"sklearn\", sklearn.__version__) # >= 1.1.2\n",
"import matplotlib\n", "import matplotlib\n",
"print(\"matplotlib\", matplotlib.__version__) # >= 3.3.1\n", "print(\"matplotlib\", matplotlib.__version__) # >= 3.5.1\n",
"import PIL\n", "import PIL\n",
"print(\"pillow\", PIL.__version__) # >= 8.1.0\n", "print(\"pillow\", PIL.__version__) # >= 9.1.0\n",
"import plotly\n", "import plotly\n",
"print(\"plotly\", plotly.__version__) # >= 4.9.0\n", "print(\"plotly\", plotly.__version__) # >= 5.7.0\n",
"print(\"Congratulations! Your installation of the basic libraries was successful!\")\n", "print(\"Congratulations! Your installation of the basic libraries was successful!\")\n",
"# the following libraries are needed for the neural network example \n", "# the following libraries are needed for the neural network example \n",
"# (if you're working with the recommended pytorch, not keras/tensorflow)\n", "# (if you're working with the recommended pytorch, not keras/tensorflow)\n",
@@ -38,11 +38,11 @@
"# (if you're working on a normal laptop and you don't know what GPU means,\n", "# (if you're working on a normal laptop and you don't know what GPU means,\n",
"# don't worry about it and just execute `$ pip install torch torchvision skorch`)\n", "# don't worry about it and just execute `$ pip install torch torchvision skorch`)\n",
"import torch\n", "import torch\n",
"print(\"torch\", torch.__version__) # >= 1.6.0\n", "print(\"torch\", torch.__version__) # >= 1.12.1\n",
"import torchvision\n", "import torchvision\n",
"print(\"torchvision\", torchvision.__version__) # >= 0.8.1\n", "print(\"torchvision\", torchvision.__version__) # >= 0.13.1\n",
"import skorch\n", "import skorch\n",
"print(\"skorch\", skorch.__version__) # >= 0.9.0\n", "print(\"skorch\", skorch.__version__) # >= 0.11.0\n",
"print(\"Congratulations! Your installation of the neural network libraries was successful!\")" "print(\"Congratulations! Your installation of the neural network libraries was successful!\")"
] ]
}, },
@@ -56,7 +56,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -70,7 +70,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.10.2"
} }
}, },
"nbformat": 4, "nbformat": 4,