Refactor code for improved readability and consistency across notebooks

- Standardized spacing around operators and function arguments in TP7_Kmeans.ipynb and neural_network.ipynb.
- Enhanced the formatting of model building and training code in neural_network.ipynb for better clarity.
- Updated the pyproject.toml to remove a specific TensorFlow version and added linting configuration for Ruff.
- Improved comments and organization in the code to facilitate easier understanding and maintenance.
This commit is contained in:
2025-07-01 20:46:08 +02:00
parent e273cf90f7
commit f94ff07cab
34 changed files with 5713 additions and 5047 deletions

View File

@@ -122,7 +122,7 @@
" y = np.zeros(N + 1)\n",
" y[0] = y0\n",
" for n in range(N):\n",
" y[n + 1] = np.power(h + np.sqrt(h ** 2 + y[n]), 2)\n",
" y[n + 1] = np.power(h + np.sqrt(h**2 + y[n]), 2)\n",
" return t, y"
]
},
@@ -158,7 +158,7 @@
"\n",
"plt.scatter(t, sol_appr, label=\"Approximation with EI\")\n",
"plt.plot(x, f_exact(x, T), label=\"Exact solution\", color=\"red\")\n",
"plt.plot(x, x ** 2, label=\"Square function\", color=\"green\")\n",
"plt.plot(x, x**2, label=\"Square function\", color=\"green\")\n",
"plt.legend()\n",
"plt.show()"
]
@@ -297,9 +297,9 @@
"\n",
"sol = odeint(F, y0, t, args=(a, r))\n",
"\n",
"plt.plot(t, sol[:, 0], label='S(t)')\n",
"plt.plot(t, sol[:, 1], label='I(t)')\n",
"plt.plot(t, sol[:, 2], label='R(t)')\n",
"plt.plot(t, sol[:, 0], label=\"S(t)\")\n",
"plt.plot(t, sol[:, 1], label=\"I(t)\")\n",
"plt.plot(t, sol[:, 2], label=\"R(t)\")\n",
"plt.legend()\n",
"plt.show()"
]
@@ -336,7 +336,9 @@
"\n",
"def calculate_errors(sol_exact, sol_appr):\n",
" return np.max(\n",
" np.power(np.abs(sol_appr - sol_exact), 2)[np.isfinite(np.power(np.abs(sol_appr - sol_exact), 2))]\n",
" np.power(np.abs(sol_appr - sol_exact), 2)[\n",
" np.isfinite(np.power(np.abs(sol_appr - sol_exact), 2))\n",
" ]\n",
" )\n",
"\n",
"\n",
@@ -356,8 +358,8 @@
"plt.plot(errors_EE, label=\"Euler Explicit\")\n",
"plt.plot(errors_H, label=\"Heun\")\n",
"plt.plot(errors_RK4, label=\"Runge Kutta order 4\")\n",
"plt.yscale('log')\n",
"plt.xscale('log')\n",
"plt.yscale(\"log\")\n",
"plt.xscale(\"log\")\n",
"plt.legend()\n",
"plt.show()"
]
@@ -431,23 +433,23 @@
"# Plot the real parts\n",
"plt.figure(figsize=(12, 6))\n",
"plt.subplot(1, 2, 1)\n",
"plt.plot(t, np.real(x_appr_EI), label='Numerical Solution by EI')\n",
"plt.plot(t, np.real(x_appr_EE), label='Numerical Solution by EE')\n",
"plt.plot(t, np.real(x_exact), label='Exact Solution', linestyle='--')\n",
"plt.xlabel('Time')\n",
"plt.ylabel('Real Part')\n",
"plt.plot(t, np.real(x_appr_EI), label=\"Numerical Solution by EI\")\n",
"plt.plot(t, np.real(x_appr_EE), label=\"Numerical Solution by EE\")\n",
"plt.plot(t, np.real(x_exact), label=\"Exact Solution\", linestyle=\"--\")\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Real Part\")\n",
"plt.legend()\n",
"plt.title('Real Part of the Solution')\n",
"plt.title(\"Real Part of the Solution\")\n",
"\n",
"# Plot the imaginary parts\n",
"plt.subplot(1, 2, 2)\n",
"plt.plot(t, np.imag(x_appr_EI), label='Numerical Solution by EI')\n",
"plt.plot(t, np.imag(x_appr_EE), label='Numerical Solution by EE')\n",
"plt.plot(t, np.imag(x_exact), label='Exact Solution', linestyle='--')\n",
"plt.xlabel('Time')\n",
"plt.ylabel('Imaginary Part')\n",
"plt.plot(t, np.imag(x_appr_EI), label=\"Numerical Solution by EI\")\n",
"plt.plot(t, np.imag(x_appr_EE), label=\"Numerical Solution by EE\")\n",
"plt.plot(t, np.imag(x_exact), label=\"Exact Solution\", linestyle=\"--\")\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Imaginary Part\")\n",
"plt.legend()\n",
"plt.title('Imaginary Part of the Solution')\n",
"plt.title(\"Imaginary Part of the Solution\")\n",
"\n",
"plt.show()"
]

View File

@@ -1,8 +1,9 @@
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"id": "c897654e0a140cbd",
"metadata": {},
"source": [
"# Automatic Differentiation\n",
"\n",
@@ -11,42 +12,18 @@
"Loss function: softmax layer in $\\mathbb{R}^3$\n",
"\n",
"Architecture: FC/ReLU 4-5-7-3"
],
"id": "c897654e0a140cbd"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "70a4eb1d928b10d0",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T15:16:27.015669Z",
"start_time": "2025-03-24T15:16:23.856887Z"
}
},
"cell_type": "code",
"source": [
"import numpy as np\n",
"from sklearn.neural_network import MLPClassifier\n",
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"accuracies = []\n",
"\n",
"for _ in range(10):\n",
" X, y = make_classification(n_samples=1000, n_features=4, n_classes=3, n_clusters_per_class=1)\n",
"\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
" model = MLPClassifier(hidden_layer_sizes=(5, 7), activation='relu', max_iter=10000, solver='adam')\n",
" model.fit(X_train, y_train)\n",
"\n",
" y_pred = model.predict(X_test)\n",
" accuracies.append(accuracy_score(y_test, y_pred))\n",
"\n",
"print(f'Mean Accuracy: {np.mean(accuracies) * 100:.0f}%')\n",
"print(f'STD Accuracy: {np.std(accuracies) * 100:.0f}%')\n",
"print(f\"Max accuracy: {np.max(accuracies) * 100:.0f}%\")\n",
"print(f\"Min accuracy: {np.min(accuracies) * 100:.0f}%\")"
],
"id": "70a4eb1d928b10d0",
"outputs": [
{
"name": "stdout",
@@ -59,20 +36,47 @@
]
}
],
"execution_count": 33
"source": [
"import numpy as np\n",
"from sklearn.neural_network import MLPClassifier\n",
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"accuracies = []\n",
"\n",
"for _ in range(10):\n",
" X, y = make_classification(\n",
" n_samples=1000, n_features=4, n_classes=3, n_clusters_per_class=1\n",
" )\n",
"\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
" model = MLPClassifier(\n",
" hidden_layer_sizes=(5, 7), activation=\"relu\", max_iter=10000, solver=\"adam\"\n",
" )\n",
" model.fit(X_train, y_train)\n",
"\n",
" y_pred = model.predict(X_test)\n",
" accuracies.append(accuracy_score(y_test, y_pred))\n",
"\n",
"print(f\"Mean Accuracy: {np.mean(accuracies) * 100:.0f}%\")\n",
"print(f\"STD Accuracy: {np.std(accuracies) * 100:.0f}%\")\n",
"print(f\"Max accuracy: {np.max(accuracies) * 100:.0f}%\")\n",
"print(f\"Min accuracy: {np.min(accuracies) * 100:.0f}%\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "96b6d46883ed5570",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T14:37:53.507776Z",
"start_time": "2025-03-24T14:37:53.505376Z"
}
},
"cell_type": "code",
"source": "",
"id": "96b6d46883ed5570",
"outputs": [],
"execution_count": null
"source": []
}
],
"metadata": {

View File

@@ -51,17 +51,18 @@
" \"\"\"\n",
" return S0 * np.exp((mu - 0.5 * sigma**2) * t + sigma * W)\n",
"\n",
"\n",
"def euler_maruyama(mu, sigma, T, N, X0=0.0):\n",
" \"\"\"\n",
" Simulation d'une EDS de Black-Scholes par la méthode d'Euler-Maruyama\n",
" \n",
"\n",
" Paramètres :\n",
" mu (float) : drift\n",
" sigma (float) : volatilité\n",
" T (int) : temps final\n",
" N (int) : nombre de pas de temps\n",
" X0 (float) : valeur initiale\n",
" \n",
"\n",
" Retourne :\n",
" t (array-like) : tableau des temps\n",
" X (array-like) : tableau des valeurs de l'EDS\n",
@@ -70,17 +71,18 @@
"\n",
" t = np.linspace(0, T, N + 1)\n",
" X = np.zeros(N + 1)\n",
" \n",
"\n",
" X[0] = X0\n",
"\n",
" dW = np.random.normal(0, np.sqrt(dt), N)\n",
" \n",
"\n",
" for i in range(N):\n",
" St = S(t[i], X[i], mu, sigma, dW[i])\n",
" X[i + 1] = X[i] + mu * St * dt + sigma * St * dW[i]\n",
" \n",
"\n",
" return t, X\n",
"\n",
"\n",
"def plot_brownien(t, X, B=None):\n",
" \"\"\"\n",
" Plot la simulation d'Euler-Maruyama\n",
@@ -90,15 +92,15 @@
" X (array-like) : tableau des valeurs de l'EDS\n",
" B (float) : barrière (optionnelle)\n",
" \"\"\"\n",
" plt.plot(t, X, alpha=0.5, label='Euler-Maruyama')\n",
" plt.title('Simulation d\\'Euler-Maruyama pour une EDS')\n",
" \n",
" plt.plot(t, X, alpha=0.5, label=\"Euler-Maruyama\")\n",
" plt.title(\"Simulation d'Euler-Maruyama pour une EDS\")\n",
"\n",
" if B is not None:\n",
" plt.axhline(B, label='Barrière', color='red', linestyle='--')\n",
" \n",
" plt.axhline(B, label=\"Barrière\", color=\"red\", linestyle=\"--\")\n",
"\n",
" plt.legend()\n",
" plt.xlabel('Temps')\n",
" plt.ylabel('X(t)')\n",
" plt.xlabel(\"Temps\")\n",
" plt.ylabel(\"X(t)\")\n",
" plt.grid()"
]
},
@@ -165,10 +167,11 @@
"\n",
"np.random.seed(333)\n",
"\n",
"\n",
"def plot_convergence(S0, mu, sigma, T):\n",
" \"\"\"\n",
" Plot la convergence du schéma d'Euler-Maruyama\n",
" \n",
"\n",
" Paramètres :\n",
" S0 (int) : valeur initiale\n",
" mu (float) : drift\n",
@@ -176,26 +179,27 @@
" T (int) : temps final\n",
" \"\"\"\n",
" errors = []\n",
" \n",
"\n",
" for N in N_list:\n",
" dt = T / N\n",
" dW = np.random.normal(0, np.sqrt(dt), N)\n",
" \n",
"\n",
" exact = S(T, S0, mu, sigma, dW)\n",
" _, X = euler_maruyama(mu=mu, sigma=sigma, T=T, N=N, X0=S0)\n",
" \n",
"\n",
" errors.append(np.max(np.abs(X[1:] - exact)))\n",
" \n",
" plt.plot(np.log(h_list), np.log(errors), 'o-', label='Erreur numérique')\n",
" plt.plot(np.log(h_list), 0.5 * np.log(h_list), '--', label='Ordre 1/2')\n",
" plt.plot(np.log(h_list), np.log(h_list), '--', label='Ordre 1')\n",
" plt.plot(np.log(h_list), 2*np.log(h_list), '--', label='Ordre 2')\n",
" plt.xlabel('log(h)')\n",
" plt.ylabel('log(Erreur)')\n",
" plt.title('Convergence du schéma d\\'Euler-Maruyama')\n",
"\n",
" plt.plot(np.log(h_list), np.log(errors), \"o-\", label=\"Erreur numérique\")\n",
" plt.plot(np.log(h_list), 0.5 * np.log(h_list), \"--\", label=\"Ordre 1/2\")\n",
" plt.plot(np.log(h_list), np.log(h_list), \"--\", label=\"Ordre 1\")\n",
" plt.plot(np.log(h_list), 2 * np.log(h_list), \"--\", label=\"Ordre 2\")\n",
" plt.xlabel(\"log(h)\")\n",
" plt.ylabel(\"log(Erreur)\")\n",
" plt.title(\"Convergence du schéma d'Euler-Maruyama\")\n",
" plt.legend()\n",
" plt.grid(True)\n",
"\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"plot_convergence(S0, r, sigma, T)\n",
"plt.show()"
@@ -269,6 +273,7 @@
"plot_brownien(t, X, B=B)\n",
"plt.show()\n",
"\n",
"\n",
"def is_barrier_breached(X, B):\n",
" \"\"\"Renvoie True si la barrière est franchie, False sinon\n",
" La barrière est franchie si X >= B\n",
@@ -282,7 +287,12 @@
" \"\"\"\n",
" return any(X >= B)\n",
"\n",
"print(\"La barrière a été franchie\" if is_barrier_breached(X, B) else \"La barrière n'a pas été franchie\")"
"\n",
"print(\n",
" \"La barrière a été franchie\"\n",
" if is_barrier_breached(X, B)\n",
" else \"La barrière n'a pas été franchie\"\n",
")"
]
},
{
@@ -299,18 +309,19 @@
" trajectories (list of tuples): Liste des trajectoires avec le temps et les valeurs\n",
" B (float): Valeur de la barrière\n",
" \"\"\"\n",
" for (t, X) in trajectories:\n",
" col = 'pink' if is_barrier_breached(X, B) else 'lime'\n",
" for t, X in trajectories:\n",
" col = \"pink\" if is_barrier_breached(X, B) else \"lime\"\n",
" plt.plot(t, X, alpha=0.5, color=col)\n",
" plt.title('Simulation d\\'Euler-Maruyama pour une EDS')\n",
" \n",
" plt.axhline(B, label='Barrière', color='red', linestyle='--')\n",
" \n",
" plt.title(\"Simulation d'Euler-Maruyama pour une EDS\")\n",
"\n",
" plt.axhline(B, label=\"Barrière\", color=\"red\", linestyle=\"--\")\n",
"\n",
" plt.legend()\n",
" plt.xlabel('Temps')\n",
" plt.ylabel('X(t)')\n",
" plt.xlabel(\"Temps\")\n",
" plt.ylabel(\"X(t)\")\n",
" plt.grid()\n",
" \n",
"\n",
"\n",
"def payoff(X, B, K):\n",
" \"\"\"Calcule le payoff d'une option en fonction des trajectoires.\n",
"\n",
@@ -324,9 +335,10 @@
" \"\"\"\n",
" if not is_barrier_breached(X, B):\n",
" return max(X[-1] - K, 0)\n",
" else: \n",
" else:\n",
" return 0\n",
" \n",
"\n",
"\n",
"def call_BS(x):\n",
" \"\"\"Calcul du prix d'une option d'achat européenne selon le modèle de Black-Scholes en fonction de x.\n",
"\n",
@@ -336,27 +348,34 @@
" Retourne:\n",
" float: Le prix de l'option d'achat européenne.\n",
" \"\"\"\n",
" d1 = (np.log(x/K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))\n",
" d1 = (np.log(x / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))\n",
" d2 = d1 - sigma * np.sqrt(T)\n",
" return x * stats.norm.cdf(d1) - K * np.exp(-r * T) * stats.norm.cdf(d2)\n",
" \n",
"\n",
"\n",
"def compute_payoff_BS():\n",
" \"\"\"Calcul du prix d'une option d'achat Up-and-Out selon le modèle de Black-Scholes en fonction de la barrière.\n",
" \n",
"\n",
" Retourne:\n",
" float: Le prix de l'option d'achat Up-and-Out.\n",
" \"\"\"\n",
" lam = (r + 0.5 * sigma**2) / sigma**2\n",
" return call_BS(S0) - call_BS(S0) * (S0/B)**(2 * lam) + (S0/B)**(lam - 1) * (call_BS(B**2/S0) - (S0/B)**2 * call_BS(B**2/S0))\n",
" \n",
" return (\n",
" call_BS(S0)\n",
" - call_BS(S0) * (S0 / B) ** (2 * lam)\n",
" + (S0 / B) ** (lam - 1)\n",
" * (call_BS(B**2 / S0) - (S0 / B) ** 2 * call_BS(B**2 / S0))\n",
" )\n",
"\n",
"\n",
"def compute_payoff(trajectories, B, K):\n",
" \"\"\"Calcule le payoff d'une option en fonction des trajectoires.\n",
" \n",
"\n",
" Paramètres:\n",
" trajectories (list of tuples): Liste des trajectoires avec le temps et les valeurs.\n",
" B (float): Valeur de la barrière.\n",
" K (float): Prix d'exercice de l'option.\n",
" \n",
"\n",
" Retourne:\n",
" float: Valeur du payoff de l'option.\n",
" \"\"\"\n",
@@ -390,7 +409,13 @@
],
"source": [
"N_trajectories = 1000\n",
"trajectories = [(t, X) for (t, X) in [euler_maruyama(mu=r, sigma=sigma, T=T, N=1000, X0=S0) for _ in range(N_trajectories)]]\n",
"trajectories = [\n",
" (t, X)\n",
" for (t, X) in [\n",
" euler_maruyama(mu=r, sigma=sigma, T=T, N=1000, X0=S0)\n",
" for _ in range(N_trajectories)\n",
" ]\n",
"]\n",
"plt.figure(figsize=(10, 6))\n",
"plot_browniens(trajectories, B=B)\n",
"plt.show()\n",
@@ -431,28 +456,35 @@
"\n",
"np.random.seed(333)\n",
"\n",
"\n",
"def plot_payoff_errors():\n",
" \"\"\"Trace l'erreur de convergence du payoff actualisé en fonction de N.\"\"\"\n",
" errors = []\n",
" \n",
"\n",
" for N in N_list:\n",
" trajectories = [(t, X) for (t, X) in [euler_maruyama(mu=r, sigma=sigma, T=T, N=N, X0=S0) for _ in range(N_trajectories)]]\n",
" trajectories = [\n",
" (t, X)\n",
" for (t, X) in [\n",
" euler_maruyama(mu=r, sigma=sigma, T=T, N=N, X0=S0)\n",
" for _ in range(N_trajectories)\n",
" ]\n",
" ]\n",
" payoff_BS = compute_payoff_BS()\n",
" payoffs = compute_payoff(trajectories, B, K)\n",
" \n",
"\n",
" errors.append(np.max(np.abs(payoffs - payoff_BS)))\n",
" \n",
" \n",
" plt.plot(np.log(N_list), np.log(errors), 'o-', label='Erreur numérique')\n",
" plt.plot(np.log(N_list), 0.5 * np.log(N_list), '--', label='Ordre 1/2')\n",
" plt.plot(np.log(N_list), np.log(N_list), '--', label='Ordre 1')\n",
" plt.plot(np.log(N_list), 2*np.log(N_list), '--', label='Ordre 2')\n",
" plt.xlabel('log(h)')\n",
" plt.ylabel('log(Erreur)')\n",
" plt.title('Convergence de l\\'erreur du payoff actualisé')\n",
"\n",
" plt.plot(np.log(N_list), np.log(errors), \"o-\", label=\"Erreur numérique\")\n",
" plt.plot(np.log(N_list), 0.5 * np.log(N_list), \"--\", label=\"Ordre 1/2\")\n",
" plt.plot(np.log(N_list), np.log(N_list), \"--\", label=\"Ordre 1\")\n",
" plt.plot(np.log(N_list), 2 * np.log(N_list), \"--\", label=\"Ordre 2\")\n",
" plt.xlabel(\"log(h)\")\n",
" plt.ylabel(\"log(Erreur)\")\n",
" plt.title(\"Convergence de l'erreur du payoff actualisé\")\n",
" plt.legend()\n",
" plt.grid(True)\n",
"\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"plot_payoff_errors()\n",
"plt.show()"

View File

@@ -28,8 +28,9 @@
"k = np.arange(1, 12 + 1)\n",
"m = np.power(2, k)\n",
"\n",
"\n",
"def f(x):\n",
"\treturn 1 / np.sqrt(x)"
" return 1 / np.sqrt(x)"
]
},
{
@@ -39,19 +40,23 @@
"outputs": [],
"source": [
"a, b = 1, 2\n",
"\n",
"\n",
"def compute_I(f, a, b, m):\n",
" h_list = (b - a) / m\n",
" I = []\n",
" errors = []\n",
" sol_exact = quad(f, a, b)[0]\n",
" \n",
"\n",
" for h in h_list:\n",
" t = np.arange(a, b, h)\n",
" y = np.array([3/4 * h * f(t[i] + h/3) + h/4 * f(t[i] + h) for i in range(len(t))])\n",
" y = np.array(\n",
" [3 / 4 * h * f(t[i] + h / 3) + h / 4 * f(t[i] + h) for i in range(len(t))]\n",
" )\n",
" I_approx = np.sum(y)\n",
" I.append(I_approx)\n",
" errors.append(np.abs(I_approx - sol_exact))\n",
" \n",
"\n",
" return I, h_list, errors"
]
},
@@ -84,12 +89,12 @@
"print(f\"I1 = {I1}\")\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"plt.plot(np.log(h_list), np.log(errors1), 'o-', label='Erreur numérique')\n",
"plt.plot(np.log(h_list), 2*np.log(h_list), '--', label='Ordre 2')\n",
"plt.plot(np.log(h_list), 4*np.log(h_list), '--', label='Ordre 4')\n",
"plt.xlabel('log(h)')\n",
"plt.ylabel('log(Erreur)')\n",
"plt.title('Convergence de la méthode d\\'intégration')\n",
"plt.plot(np.log(h_list), np.log(errors1), \"o-\", label=\"Erreur numérique\")\n",
"plt.plot(np.log(h_list), 2 * np.log(h_list), \"--\", label=\"Ordre 2\")\n",
"plt.plot(np.log(h_list), 4 * np.log(h_list), \"--\", label=\"Ordre 4\")\n",
"plt.xlabel(\"log(h)\")\n",
"plt.ylabel(\"log(Erreur)\")\n",
"plt.title(\"Convergence de la méthode d'intégration\")\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.show()"
@@ -116,12 +121,12 @@
"I2, h_list, errors2 = compute_I(f, a, b, m)\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"plt.plot(np.log(h_list), np.log(errors2), label='Approximation de l\\'intégrale')\n",
"plt.plot(np.log(h_list), np.log(h_list), '--', label='h')\n",
"plt.plot(np.log(h_list), 2*np.log(h_list), '--', label='h^2')\n",
"plt.xlabel('h')\n",
"plt.ylabel('Approximation de l\\'intégrale')\n",
"plt.title('Approximation de l\\'intégrale par la méthode de Simpson')\n",
"plt.plot(np.log(h_list), np.log(errors2), label=\"Approximation de l'intégrale\")\n",
"plt.plot(np.log(h_list), np.log(h_list), \"--\", label=\"h\")\n",
"plt.plot(np.log(h_list), 2 * np.log(h_list), \"--\", label=\"h^2\")\n",
"plt.xlabel(\"h\")\n",
"plt.ylabel(\"Approximation de l'intégrale\")\n",
"plt.title(\"Approximation de l'intégrale par la méthode de Simpson\")\n",
"plt.legend()\n",
"plt.show()"
]
@@ -146,19 +151,19 @@
"metadata": {},
"outputs": [],
"source": [
"def RKI(f, y0, vt, tol = 1e-6, itmax = 20):\n",
"\tN, T = len(vt), vt[-1]\n",
"\tyn = np.zeros((len(y0), N))\n",
"\tyn[:, 0] = y0\n",
"\th = T / N\n",
"def RKI(f, y0, vt, tol=1e-6, itmax=20):\n",
" N, T = len(vt), vt[-1]\n",
" yn = np.zeros((len(y0), N))\n",
" yn[:, 0] = y0\n",
" h = T / N\n",
"\n",
"\tfor n in range(N-1):\n",
"\t\tp1 = f(vt[n], yn[:, n])\n",
"\t\tF1 = lambda p2: f(vt[n] + h/3, yn[:, n] + h/6 * (p1 + p2)) - p2\n",
"\t\tp2 = newton(F1, yn[:, n], fprime=None, tol=tol, maxiter=itmax)\n",
"\t\tF2 = lambda yn1: yn[:, n] + h/4 * (3 * p2 + f(vt[n+1], yn1)) - yn1\n",
"\t\tyn[:, n + 1] = newton(F2, yn[:, n], fprime=None, tol=tol, maxiter=itmax)\n",
"\treturn yn"
" for n in range(N - 1):\n",
" p1 = f(vt[n], yn[:, n])\n",
" F1 = lambda p2: f(vt[n] + h / 3, yn[:, n] + h / 6 * (p1 + p2)) - p2\n",
" p2 = newton(F1, yn[:, n], fprime=None, tol=tol, maxiter=itmax)\n",
" F2 = lambda yn1: yn[:, n] + h / 4 * (3 * p2 + f(vt[n + 1], yn1)) - yn1\n",
" yn[:, n + 1] = newton(F2, yn[:, n], fprime=None, tol=tol, maxiter=itmax)\n",
" return yn"
]
},
{
@@ -194,14 +199,18 @@
"source": [
"a, b = [0, 2]\n",
"\n",
"\n",
"def f(t, y):\n",
" return t * np.power(y, 3) - t * y\n",
" \n",
"\n",
"\n",
"y0 = [0.5]\n",
"\n",
"\n",
"def sol_exact(t):\n",
" return 1 / np.sqrt(1 + 3 * np.exp(np.power(t, 2)))\n",
"\n",
"\n",
"x_fine = np.linspace(a, b, 1000)\n",
"y_fine = sol_exact(x_fine)\n",
"\n",
@@ -212,13 +221,13 @@
"y_exact_interp = np.interp(vt, x_fine, y_fine)\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"plt.plot(x_fine, y_fine, label='Solution exacte')\n",
"plt.scatter(vt, y, label='Solution numérique', color='red')\n",
"plt.plot(x_fine, y_fine, label=\"Solution exacte\")\n",
"plt.scatter(vt, y, label=\"Solution numérique\", color=\"red\")\n",
"plt.legend()\n",
"plt.show()\n",
"\n",
"error = np.max(np.abs(y - y_exact_interp))\n",
"print(f\"Error with h={h}: {error}\")\n"
"print(f\"Error with h={h}: {error}\")"
]
},
{
@@ -246,7 +255,7 @@
],
"source": [
"k = np.arange(1, 10 + 1)\n",
"h_list = 1/np.power(2, k)\n",
"h_list = 1 / np.power(2, k)\n",
"\n",
"errors = []\n",
"for h in h_list:\n",
@@ -258,14 +267,14 @@
"log_h = np.log(h_list)\n",
"log_errors = np.log(errors)\n",
"order = np.polyfit(log_h, log_errors, 1)[0]\n",
" \n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"plt.plot(log_h, log_errors, 'o-', label=f'Erreur (ordre {order:.2f})')\n",
"plt.plot(log_h, log_h, '--', label='h')\n",
"plt.plot(log_h, 2*log_h, '--', label='h^2')\n",
"plt.plot(log_h, 4*log_h, '--', label='h^4')\n",
"plt.xlabel('log(h)')\n",
"plt.ylabel('log(error)')\n",
"plt.plot(log_h, log_errors, \"o-\", label=f\"Erreur (ordre {order:.2f})\")\n",
"plt.plot(log_h, log_h, \"--\", label=\"h\")\n",
"plt.plot(log_h, 2 * log_h, \"--\", label=\"h^2\")\n",
"plt.plot(log_h, 4 * log_h, \"--\", label=\"h^4\")\n",
"plt.xlabel(\"log(h)\")\n",
"plt.ylabel(\"log(error)\")\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.show()\n",
@@ -306,11 +315,14 @@
"source": [
"def F(t, Y):\n",
" x, y, z = Y\n",
" return np.array([\n",
" 1 + np.power(x, 2) * y - (z + 1) * x,\n",
" x * z - np.power(x, 2) * y,\n",
" - x * z + 1.45\n",
" ])\n",
" return np.array(\n",
" [\n",
" 1 + np.power(x, 2) * y - (z + 1) * x,\n",
" x * z - np.power(x, 2) * y,\n",
" -x * z + 1.45,\n",
" ]\n",
" )\n",
"\n",
"\n",
"h = 0.025\n",
"y0 = np.array([1, 1, 1])\n",
@@ -320,20 +332,20 @@
"y = RKI(F, y0, t)\n",
"fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))\n",
"\n",
"ax1.scatter(y[0], y[1], label='Solution numérique', color='red')\n",
"ax1.plot(sol_exact[:, 0], sol_exact[:, 1], label='Solution exacte', color='blue')\n",
"ax1.scatter(y[0], y[1], label=\"Solution numérique\", color=\"red\")\n",
"ax1.plot(sol_exact[:, 0], sol_exact[:, 1], label=\"Solution exacte\", color=\"blue\")\n",
"ax1.legend()\n",
"ax1.set_title('x vs y')\n",
"ax1.set_title(\"x vs y\")\n",
"\n",
"ax2.scatter(y[1], y[2], label='Solution numérique', color='red')\n",
"ax2.plot(sol_exact[:, 1], sol_exact[:, 2], label='Solution exacte', color='blue')\n",
"ax2.scatter(y[1], y[2], label=\"Solution numérique\", color=\"red\")\n",
"ax2.plot(sol_exact[:, 1], sol_exact[:, 2], label=\"Solution exacte\", color=\"blue\")\n",
"ax2.legend()\n",
"ax2.set_title('y vs z')\n",
"ax2.set_title(\"y vs z\")\n",
"\n",
"ax3.scatter(y[0], y[2], label='Solution numérique', color='red')\n",
"ax3.plot(sol_exact[:, 0], sol_exact[:, 2], label='Solution exacte', color='blue')\n",
"ax3.scatter(y[0], y[2], label=\"Solution numérique\", color=\"red\")\n",
"ax3.plot(sol_exact[:, 0], sol_exact[:, 2], label=\"Solution exacte\", color=\"blue\")\n",
"ax3.legend()\n",
"ax3.set_title('x vs z')\n",
"ax3.set_title(\"x vs z\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
@@ -357,14 +369,15 @@
],
"source": [
"def R(z):\n",
" return (1 + 3/4 * z * (1 + z/6)/(1 - z/6)) / (1 - z/4)\n",
" return (1 + 3 / 4 * z * (1 + z / 6) / (1 - z / 6)) / (1 - z / 4)\n",
"\n",
"\n",
"x = np.linspace(-15, 5, 100)\n",
"y = np.linspace(-7.5, 7.5, 100)\n",
"X, Y = np.meshgrid(x, y)\n",
"Z = R(X + 1j*Y)\n",
"Z = R(X + 1j * Y)\n",
"plt.figure(figsize=(10, 7))\n",
"plt.contour(X, Y, np.abs(Z), levels=[1], cmap='rainbow')\n",
"plt.contour(X, Y, np.abs(Z), levels=[1], cmap=\"rainbow\")\n",
"plt.grid()\n",
"plt.show()"
]

File diff suppressed because one or more lines are too long

View File

@@ -308,7 +308,6 @@
}
],
"source": [
"import numpy as np\n",
"\n",
"u = lambda x: np.sqrt((6 - x) ** 2 + 4)\n",
"\n",
@@ -364,7 +363,9 @@
"# Run Newton's method\n",
"optimal_point_newton, iterations_newton = newton_method(initial_guess_newton)\n",
"print(f\"Optimal point (Newton): {optimal_point_newton}\")\n",
"print(f\"Objective function value at optimal point (Newton): {objective_function(optimal_point_newton)}\")\n",
"print(\n",
" f\"Objective function value at optimal point (Newton): {objective_function(optimal_point_newton)}\"\n",
")\n",
"print(f\"Number of iterations (Newton): {iterations_newton}\")\n",
"\n",
"# Initial interval for dichotomy method\n",
@@ -373,7 +374,9 @@
"# Run dichotomy method\n",
"optimal_point_dichotomy, iterations_dichotomy = dichotomy_method(aL, aR)\n",
"print(f\"Optimal point (Dichotomy): {optimal_point_dichotomy}\")\n",
"print(f\"Objective function value at optimal point (Dichotomy): {objective_function(optimal_point_dichotomy)}\")\n",
"print(\n",
" f\"Objective function value at optimal point (Dichotomy): {objective_function(optimal_point_dichotomy)}\"\n",
")\n",
"print(f\"Number of iterations (Dichotomy): {iterations_dichotomy}\")"
]
},
@@ -564,9 +567,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n"
]
"source": []
}
],
"metadata": {

View File

@@ -42,20 +42,24 @@
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"def generate_thetas(n):\n",
" random_steps = np.random.random(n)\n",
" return np.concatenate(([0], np.cumsum(random_steps / np.sum(random_steps) * (2*np.pi))))\n",
" return np.concatenate(\n",
" ([0], np.cumsum(random_steps / np.sum(random_steps) * (2 * np.pi)))\n",
" )\n",
"\n",
"\n",
"n = 4\n",
"thetas = generate_thetas(n)\n",
"thetas_inf = np.linspace(0, 2*np.pi, 1000)\n",
"thetas_inf = np.linspace(0, 2 * np.pi, 1000)\n",
"\n",
"plt.figure(figsize=(7, 7))\n",
"plt.plot(np.cos(thetas), np.sin(thetas), label='polygon')\n",
"plt.scatter(np.cos(thetas), np.sin(thetas), color='red', label='vertices')\n",
"plt.plot(np.cos(thetas_inf), np.sin(thetas_inf), 'k--', label='unit circle', alpha=0.5)\n",
"plt.plot(np.cos(thetas), np.sin(thetas), label=\"polygon\")\n",
"plt.scatter(np.cos(thetas), np.sin(thetas), color=\"red\", label=\"vertices\")\n",
"plt.plot(np.cos(thetas_inf), np.sin(thetas_inf), \"k--\", label=\"unit circle\", alpha=0.5)\n",
"plt.legend()\n",
"plt.title(f'Polygon with {n} sides')\n",
"plt.title(f\"Polygon with {n} sides\")\n",
"plt.grid(True)\n",
"plt.xlim(-1.1, 1.1)\n",
"plt.ylim(-1.1, 1.1)\n",
@@ -207,58 +211,61 @@
}
],
"source": [
"from cProfile import label\n",
"import numpy as np\n",
"from scipy.optimize import minimize\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"def polygon_perimeter(theta, n):\n",
" points = np.array([[np.cos(t), np.sin(t)] for t in theta])\n",
" perimeter = 0\n",
" for i in range(n-1):\n",
" perimeter += np.sqrt(np.sum((points[i+1] - points[i])**2))\n",
" perimeter += np.sqrt(np.sum((points[0] - points[-1])**2))\n",
" for i in range(n - 1):\n",
" perimeter += np.sqrt(np.sum((points[i + 1] - points[i]) ** 2))\n",
" perimeter += np.sqrt(np.sum((points[0] - points[-1]) ** 2))\n",
" return -perimeter\n",
"\n",
"\n",
"def constraint_increasing(theta):\n",
" return np.array([theta[i+1] - theta[i] for i in range(len(theta)-1)])\n",
" return np.array([theta[i + 1] - theta[i] for i in range(len(theta) - 1)])\n",
"\n",
"\n",
"def optimize_polygon(n):\n",
" theta0 = generate_thetas(n)\n",
" \n",
"\n",
" constraints = [\n",
" {'type': 'ineq', 'fun': constraint_increasing},\n",
" {'type': 'eq', 'fun': lambda x: x[0]},\n",
" {'type': 'ineq', 'fun': lambda x: 2*np.pi - x[-1]}\n",
" {\"type\": \"ineq\", \"fun\": constraint_increasing},\n",
" {\"type\": \"eq\", \"fun\": lambda x: x[0]},\n",
" {\"type\": \"ineq\", \"fun\": lambda x: 2 * np.pi - x[-1]},\n",
" ]\n",
"\n",
" result = minimize(\n",
" lambda x: polygon_perimeter(x, n),\n",
" theta0,\n",
" constraints=constraints,\n",
" method='SLSQP'\n",
" method=\"SLSQP\",\n",
" )\n",
" \n",
"\n",
" return result.x\n",
"\n",
"\n",
"def plot_perimeter(n):\n",
" optimal_angles = optimize_polygon(n + 1)\n",
" plt.figure(figsize=(7, 7))\n",
" t = np.linspace(0, 2*np.pi, 100)\n",
" plt.plot(np.cos(t), np.sin(t), 'k--', alpha=0.5, label='unit circle')\n",
" t = np.linspace(0, 2 * np.pi, 100)\n",
" plt.plot(np.cos(t), np.sin(t), \"k--\", alpha=0.5, label=\"unit circle\")\n",
"\n",
" points = np.array([[np.cos(t), np.sin(t)] for t in optimal_angles])\n",
" points = np.vstack([points, points[0]])\n",
" plt.plot(points[:, 0], points[:, 1], 'b-', linewidth=2, label='optimal polygon')\n",
" plt.scatter(points[:-1, 0], points[:-1, 1], color='red', label='vertices')\n",
" plt.plot(points[:, 0], points[:, 1], \"b-\", linewidth=2, label=\"optimal polygon\")\n",
" plt.scatter(points[:-1, 0], points[:-1, 1], color=\"red\", label=\"vertices\")\n",
"\n",
" plt.legend()\n",
" plt.axis('equal')\n",
" plt.axis(\"equal\")\n",
" plt.grid(True)\n",
" plt.title(f'Optimal {n}-sided Polygon Inscribed in Unit Circle')\n",
" plt.xlabel('x')\n",
" plt.ylabel('y')\n",
" plt.axis('equal')\n",
" plt.title(f\"Optimal {n}-sided Polygon Inscribed in Unit Circle\")\n",
" plt.xlabel(\"x\")\n",
" plt.ylabel(\"y\")\n",
" plt.axis(\"equal\")\n",
" plt.grid(True)\n",
" plt.show()\n",
"\n",
@@ -266,6 +273,7 @@
" print(f\"Maximum perimeter: {-polygon_perimeter(optimal_angles, n)}\")\n",
" print(f\"2 * pi = {2 * np.pi}\")\n",
"\n",
"\n",
"for n in np.arange(3, 10, 1):\n",
" plot_perimeter(n)"
]
@@ -316,9 +324,11 @@
"source": [
"x0 = np.array([2, -1, 3, 0, -5])\n",
"\n",
"\n",
"def K(x):\n",
" return np.minimum(x, 0)\n",
"\n",
"\n",
"print(f\"Initial point: {x0}\")\n",
"print(f\"Projection of x0 onto K: {K(x0)}\")"
]

View File

@@ -1,8 +1,9 @@
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"id": "81049114d821d00e",
"metadata": {},
"source": [
"# Project - Portfolio Management\n",
"\n",
@@ -11,52 +12,36 @@
"### Time period studied from 2017-01-01 to 2018-01-01\n",
"\n",
"### Risk-free rate: 2%"
],
"id": "81049114d821d00e"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-11-25T13:43:46.298758Z",
"start_time": "2024-11-25T13:43:46.293696Z"
}
},
"collapsed": true
},
"outputs": [],
"source": [
"import yfinance as yf\n",
"import pandas as pd\n",
"import numpy as np"
],
"outputs": [],
"execution_count": 51
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "9f9fc36832c97e0",
"metadata": {
"ExecuteTime": {
"end_time": "2024-11-25T13:43:47.318911Z",
"start_time": "2024-11-25T13:43:47.198820Z"
}
},
"cell_type": "code",
"source": [
"# Data Extraction\n",
"Tickers = [\"^RUT\", \"^IXIC\", \"^GSPC\", \"XWD.TO\"]\n",
"start_input = \"2017-01-01\"\n",
"end_input = \"2018-01-01\"\n",
"S = pd.DataFrame()\n",
"for t in Tickers:\n",
" S[t] = yf.Tickers(t).history(start=start_input, end=end_input)[\"Close\"]\n",
"\n",
"S = S.interpolate(method=\"pad\")\n",
"\n",
"# Show the first five and last five values extracted\n",
"display(S.head())\n",
"display(S.tail())\n",
"print(S.shape)"
],
"id": "9f9fc36832c97e0",
"outputs": [
{
"name": "stderr",
@@ -72,15 +57,6 @@
},
{
"data": {
"text/plain": [
" ^RUT ^IXIC ^GSPC XWD.TO\n",
"Date \n",
"2017-01-03 00:00:00+00:00 1365.489990 5429.080078 2257.830078 38.499630\n",
"2017-01-04 00:00:00+00:00 1387.949951 5477.000000 2270.750000 38.553375\n",
"2017-01-05 00:00:00+00:00 1371.939941 5487.939941 2269.000000 38.481716\n",
"2017-01-06 00:00:00+00:00 1367.280029 5521.060059 2276.979980 38.517544\n",
"2017-01-09 00:00:00+00:00 1357.489990 5531.819824 2268.899902 38.383186"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
@@ -152,6 +128,15 @@
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ^RUT ^IXIC ^GSPC XWD.TO\n",
"Date \n",
"2017-01-03 00:00:00+00:00 1365.489990 5429.080078 2257.830078 38.499630\n",
"2017-01-04 00:00:00+00:00 1387.949951 5477.000000 2270.750000 38.553375\n",
"2017-01-05 00:00:00+00:00 1371.939941 5487.939941 2269.000000 38.481716\n",
"2017-01-06 00:00:00+00:00 1367.280029 5521.060059 2276.979980 38.517544\n",
"2017-01-09 00:00:00+00:00 1357.489990 5531.819824 2268.899902 38.383186"
]
},
"metadata": {},
@@ -159,15 +144,6 @@
},
{
"data": {
"text/plain": [
" ^RUT ^IXIC ^GSPC XWD.TO\n",
"Date \n",
"2017-12-22 00:00:00+00:00 1542.930054 6959.959961 2683.340088 44.323349\n",
"2017-12-26 00:00:00+00:00 1544.229980 6936.250000 2680.500000 44.323349\n",
"2017-12-27 00:00:00+00:00 1543.939941 6939.339844 2682.620117 44.052303\n",
"2017-12-28 00:00:00+00:00 1548.930054 6950.160156 2687.540039 43.857414\n",
"2017-12-29 00:00:00+00:00 1535.510010 6903.390137 2673.610107 43.784576"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
@@ -239,6 +215,15 @@
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ^RUT ^IXIC ^GSPC XWD.TO\n",
"Date \n",
"2017-12-22 00:00:00+00:00 1542.930054 6959.959961 2683.340088 44.323349\n",
"2017-12-26 00:00:00+00:00 1544.229980 6936.250000 2680.500000 44.323349\n",
"2017-12-27 00:00:00+00:00 1543.939941 6939.339844 2682.620117 44.052303\n",
"2017-12-28 00:00:00+00:00 1548.930054 6950.160156 2687.540039 43.857414\n",
"2017-12-29 00:00:00+00:00 1535.510010 6903.390137 2673.610107 43.784576"
]
},
"metadata": {},
@@ -252,63 +237,69 @@
]
}
],
"execution_count": 52
"source": [
"# Data Extraction\n",
"Tickers = [\"^RUT\", \"^IXIC\", \"^GSPC\", \"XWD.TO\"]\n",
"start_input = \"2017-01-01\"\n",
"end_input = \"2018-01-01\"\n",
"S = pd.DataFrame()\n",
"for t in Tickers:\n",
" S[t] = yf.Tickers(t).history(start=start_input, end=end_input)[\"Close\"]\n",
"\n",
"S = S.interpolate(method=\"pad\")\n",
"\n",
"# Show the first five and last five values extracted\n",
"display(S.head())\n",
"display(S.tail())\n",
"print(S.shape)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "53483cf3a925a4db",
"metadata": {
"ExecuteTime": {
"end_time": "2024-11-25T13:43:50.080380Z",
"start_time": "2024-11-25T13:43:50.073119Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"R = S / S.shift() - 1\n",
"R = R[1:]\n",
"mean_d = R.mean()\n",
"covar_d = R.cov()\n",
"corr = R.corr()"
],
"id": "53483cf3a925a4db",
"outputs": [],
"execution_count": 53
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "c327ed5967b1f442",
"metadata": {
"ExecuteTime": {
"end_time": "2024-11-25T13:43:50.965092Z",
"start_time": "2024-11-25T13:43:50.961969Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"mean = mean_d * 252\n",
"covar = covar_d * 252\n",
"std = np.sqrt(np.diag(covar))"
],
"id": "c327ed5967b1f442",
"outputs": [],
"execution_count": 54
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "6bc6a850bf06cc9d",
"metadata": {
"ExecuteTime": {
"end_time": "2024-11-25T13:43:51.701725Z",
"start_time": "2024-11-25T13:43:51.695020Z"
}
},
"cell_type": "code",
"source": [
"print(\"Mean:\\n\")\n",
"print(mean)\n",
"print(\"\\nCovariance:\\n\")\n",
"print(covar)\n",
"print(\"\\nStandard Deviation:\\n\")\n",
"print(std)\n",
"print(\"\\nCorrelation:\\n\")\n",
"print(corr)"
],
"id": "6bc6a850bf06cc9d",
"outputs": [
{
"name": "stdout",
@@ -344,22 +335,34 @@
]
}
],
"execution_count": 55
"source": [
"print(\"Mean:\\n\")\n",
"print(mean)\n",
"print(\"\\nCovariance:\\n\")\n",
"print(covar)\n",
"print(\"\\nStandard Deviation:\\n\")\n",
"print(std)\n",
"print(\"\\nCorrelation:\\n\")\n",
"print(corr)"
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Question 1",
"id": "fc4bec874f710f7c"
"id": "fc4bec874f710f7c",
"metadata": {},
"source": "# Question 1"
},
{
"cell_type": "code",
"execution_count": 56,
"id": "780c9cca6e0ed2d3",
"metadata": {
"ExecuteTime": {
"end_time": "2024-11-25T13:43:53.113423Z",
"start_time": "2024-11-25T13:43:53.109514Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"r = 0.02\n",
"d = len(Tickers)\n",
@@ -369,32 +372,18 @@
"\n",
"a = vec1.T.dot(inv_sigma).dot(vec1)\n",
"b = mean.T.dot(inv_sigma).dot(vec1)"
],
"id": "780c9cca6e0ed2d3",
"outputs": [],
"execution_count": 56
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "81c956f147c68070",
"metadata": {
"ExecuteTime": {
"end_time": "2024-11-25T13:43:54.545400Z",
"start_time": "2024-11-25T13:43:54.541579Z"
}
},
"cell_type": "code",
"source": [
"# Tangent portfolio\n",
"pi_T = inv_sigma.dot(mean - r * vec1) / (b - r * a)\n",
"sd_T = np.sqrt(pi_T.T.dot(sigma).dot(pi_T)) # Variance\n",
"m_T = pi_T.T.dot(mean) # expected return\n",
"\n",
"print(f\"Expected return m_T: {m_T}\")\n",
"print(f\"Standard deviation sd_T: {sd_T}\")\n",
"print(f\"Allocation pi_T: {pi_T}\")\n",
"print(\n",
" f\"We can verify that the allocation is possible as the sum of the allocations for the different indices is {sum(pi_T)}, that is very close to 1\")"
],
"id": "81c956f147c68070",
"outputs": [
{
"name": "stdout",
@@ -407,32 +396,36 @@
]
}
],
"execution_count": 57
"source": [
"# Tangent portfolio\n",
"pi_T = inv_sigma.dot(mean - r * vec1) / (b - r * a)\n",
"sd_T = np.sqrt(pi_T.T.dot(sigma).dot(pi_T)) # Variance\n",
"m_T = pi_T.T.dot(mean) # expected return\n",
"\n",
"print(f\"Expected return m_T: {m_T}\")\n",
"print(f\"Standard deviation sd_T: {sd_T}\")\n",
"print(f\"Allocation pi_T: {pi_T}\")\n",
"print(\n",
" f\"We can verify that the allocation is possible as the sum of the allocations for the different indices is {sum(pi_T)}, that is very close to 1\"\n",
")"
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Question 2",
"id": "2e121c2dfb946f3c"
"id": "2e121c2dfb946f3c",
"metadata": {},
"source": "# Question 2"
},
{
"cell_type": "code",
"execution_count": 58,
"id": "c169808384ca1112",
"metadata": {
"ExecuteTime": {
"end_time": "2024-11-25T13:43:59.797115Z",
"start_time": "2024-11-25T13:43:59.792462Z"
}
},
"cell_type": "code",
"source": [
"for i in range(len(std)):\n",
" print(f\"The annualized volatilities of the index {Tickers[i]} is {std[i]}\")\n",
" print(f\"The annualized expected returns of the index {Tickers[i]} is {mean[Tickers[i]]}\")\n",
" print(\"\")\n",
"\n",
"print(f\"The annualized volatility of the Tangent Portfolio is {sd_T * np.sqrt(252)}\")\n",
"print(f\"The annualized expected return of the Tangent Portfolio is {m_T * 252}\")"
],
"id": "c169808384ca1112",
"outputs": [
{
"name": "stdout",
@@ -455,29 +448,34 @@
]
}
],
"execution_count": 58
"source": [
"for i in range(len(std)):\n",
" print(f\"The annualized volatilities of the index {Tickers[i]} is {std[i]}\")\n",
" print(\n",
" f\"The annualized expected returns of the index {Tickers[i]} is {mean[Tickers[i]]}\"\n",
" )\n",
" print(\"\")\n",
"\n",
"print(f\"The annualized volatility of the Tangent Portfolio is {sd_T * np.sqrt(252)}\")\n",
"print(f\"The annualized expected return of the Tangent Portfolio is {m_T * 252}\")"
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Question 3",
"id": "af8d29ecdbf2ae1"
"id": "af8d29ecdbf2ae1",
"metadata": {},
"source": "# Question 3"
},
{
"cell_type": "code",
"execution_count": 59,
"id": "2e0215ab7904906a",
"metadata": {
"ExecuteTime": {
"end_time": "2024-11-25T13:44:01.393591Z",
"start_time": "2024-11-25T13:44:01.388830Z"
}
},
"cell_type": "code",
"source": [
"print(\"sharpe ratio of the Tangent portfolio :\", (m_T - r) / sd_T)\n",
"\n",
"for i in range(4):\n",
" print(f\"the sharpe ratio of the index {Tickers[i]} is {(mean[Tickers[i]] - r) / std[i]}\")"
],
"id": "2e0215ab7904906a",
"outputs": [
{
"name": "stdout",
@@ -491,7 +489,14 @@
]
}
],
"execution_count": 59
"source": [
"print(\"sharpe ratio of the Tangent portfolio :\", (m_T - r) / sd_T)\n",
"\n",
"for i in range(4):\n",
" print(\n",
" f\"the sharpe ratio of the index {Tickers[i]} is {(mean[Tickers[i]] - r) / std[i]}\"\n",
" )"
]
}
],
"metadata": {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -43,7 +43,7 @@
"source": [
"import warnings\n",
"\n",
"warnings.filterwarnings('ignore')"
"warnings.filterwarnings(\"ignore\")"
]
},
{
@@ -434,7 +434,7 @@
],
"source": [
"import numpy as np\n",
"import pandas as pd # dataframes are in pandas \n",
"import pandas as pd # dataframes are in pandas\n",
"import matplotlib.pyplot as plt\n",
"\n",
"hitters = pd.read_csv(\"data/Hitters.csv\", index_col=\"Name\")\n",
@@ -895,9 +895,13 @@
],
"source": [
"# Hint for Question (4) :\n",
"ex = pd.DataFrame(dict(nom=['Alice', 'Nicolas', 'Jean'],\n",
" age=[19, np.NaN, np.NaN],\n",
" exam=[15, 14, np.NaN]))\n",
"ex = pd.DataFrame(\n",
" dict(\n",
" nom=[\"Alice\", \"Nicolas\", \"Jean\"],\n",
" age=[19, np.NaN, np.NaN],\n",
" exam=[15, 14, np.NaN],\n",
" )\n",
")\n",
"\n",
"print(\"data : \\n\", ex)\n",
"print(\"First result : \\n\", ex.isnull())\n",
@@ -1080,10 +1084,10 @@
],
"source": [
"# We remove the players for whom Salary is missing\n",
"hitters.dropna(subset=['Salary'], inplace=True)\n",
"hitters.dropna(subset=[\"Salary\"], inplace=True)\n",
"\n",
"X = hitters.select_dtypes(include=int)\n",
"Y = hitters['Salary']\n",
"Y = hitters[\"Salary\"]\n",
"\n",
"# check-point\n",
"print(Y.isnull().sum()) # should be 0\n",
@@ -1109,7 +1113,7 @@
},
"outputs": [],
"source": [
"#Answer for Exercise 4\n",
"# Answer for Exercise 4\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, random_state=42)"
@@ -1697,8 +1701,8 @@
}
],
"source": [
"#the values of alphas chosen by defaults are also on a logarithmic scale\n",
"plt.plot(np.log10(alphas_lasso), '.')"
"# the values of alphas chosen by defaults are also on a logarithmic scale\n",
"plt.plot(np.log10(alphas_lasso), \".\")"
]
},
{
@@ -1735,8 +1739,8 @@
"source": [
"fig, ax = plt.subplots(figsize=(8, 6))\n",
"ax.plot(np.log10(alphas_lasso), coefs_lasso)\n",
"ax.set_xlabel('log10(alpha)')\n",
"ax.set_ylabel('Lasso coefficients')"
"ax.set_xlabel(\"log10(alpha)\")\n",
"ax.set_ylabel(\"Lasso coefficients\")"
]
},
{
@@ -1792,7 +1796,7 @@
"print(\"1.\\n\", ind)\n",
"print(\"2.\\n\", ind == 0)\n",
"print(\"3. Le nombre de 0 de chaque colonne est :\\n \", (ind == 0).sum(axis=0))\n",
"print(\"4. Le nombre de 0 de chaque ligne est : \\n\", (ind == 0).sum(axis=1))\n"
"print(\"4. Le nombre de 0 de chaque ligne est : \\n\", (ind == 0).sum(axis=1))"
]
},
{
@@ -2294,18 +2298,19 @@
"from sklearn.linear_model import LinearRegression\n",
"\n",
"linReg = LinearRegression()\n",
"linReg.fit(Xtrain,\n",
" Ytrain) # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
"linReg.fit(\n",
" Xtrain, Ytrain\n",
") # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
"# the predictions should not be different with or without standardization (could differ only owing to numerical problems)\n",
"hatY_LinReg = linReg.predict(Xtest)\n",
"\n",
"fig, ax = plt.subplots()\n",
"ax.scatter(Ytest, hatY_LinReg, s=5)\n",
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
"ax.set_xlabel('Ytest')\n",
"ax.set_ylabel('hatY')\n",
"ax.set_title('Predicted vs true salaries for OLS estimator')\n",
"ax.axis('square')"
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
"ax.set_xlabel(\"Ytest\")\n",
"ax.set_ylabel(\"hatY\")\n",
"ax.set_title(\"Predicted vs true salaries for OLS estimator\")\n",
"ax.axis(\"square\")"
]
},
{
@@ -2355,11 +2360,11 @@
"\n",
"fig, ax = plt.subplots()\n",
"ax.scatter(Ytest, hatY_ridge, s=5)\n",
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
"ax.set_xlabel('Ytest')\n",
"ax.set_ylabel('hatY')\n",
"ax.set_title('Predicted vs true salaries for Ridge estimator')\n",
"ax.axis('square')"
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
"ax.set_xlabel(\"Ytest\")\n",
"ax.set_ylabel(\"hatY\")\n",
"ax.set_title(\"Predicted vs true salaries for Ridge estimator\")\n",
"ax.axis(\"square\")"
]
},
{
@@ -2408,11 +2413,11 @@
"\n",
"fig, ax = plt.subplots()\n",
"ax.scatter(Ytest, hatY_lasso, s=5)\n",
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
"ax.set_xlabel('Ytest')\n",
"ax.set_ylabel('hatY')\n",
"ax.set_title('Predicted vs true salaries for Ridge estimator')\n",
"ax.axis('square')"
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
"ax.set_xlabel(\"Ytest\")\n",
"ax.set_ylabel(\"hatY\")\n",
"ax.set_title(\"Predicted vs true salaries for Ridge estimator\")\n",
"ax.axis(\"square\")"
]
},
{
@@ -2445,7 +2450,7 @@
"source": [
"from sklearn.linear_model import LassoLarsIC\n",
"\n",
"lassoBIC = LassoLarsIC(criterion='bic')\n",
"lassoBIC = LassoLarsIC(criterion=\"bic\")\n",
"lassoBIC.fit(XtrainScaled, Ytrain)\n",
"print(\"best alpha chosen by BIC criterion :\", lassoBIC.alpha_)\n",
"print(\"best alpha chosen by CV :\", lassoCV.alpha_)\n",
@@ -2499,11 +2504,11 @@
"\n",
"fig, ax = plt.subplots()\n",
"ax.scatter(Ytest, hatY_BIC, s=5)\n",
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls='--', c='gray')\n",
"ax.set_xlabel('Ytest')\n",
"ax.set_ylabel('hatY')\n",
"ax.set_title('Predicted vs true salaries for LassoBIC estimator')\n",
"ax.axis('square')"
"ax.plot([0, 1], [0, 1], transform=ax.transAxes, ls=\"--\", c=\"gray\")\n",
"ax.set_xlabel(\"Ytest\")\n",
"ax.set_ylabel(\"hatY\")\n",
"ax.set_title(\"Predicted vs true salaries for LassoBIC estimator\")\n",
"ax.axis(\"square\")"
]
},
{
@@ -2539,7 +2544,9 @@
"from sklearn.metrics import mean_squared_error\n",
"\n",
"MSEs = []\n",
"for name, estimator in zip([\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg]):\n",
"for name, estimator in zip(\n",
" [\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg]\n",
"):\n",
" y_pred = estimator.predict(Xtest)\n",
" MSE = mean_squared_error(Ytest, y_pred)\n",
" print(f\"MSE for {name} : {MSE}\")\n",
@@ -2584,10 +2591,12 @@
"ols_errors = np.abs(Ytest - linReg.predict(Xtest))\n",
"\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"ax.boxplot([ridge_cv_errors, lasso_cv_errors, lasso_bic_errors, ols_errors],\n",
" labels=['RidgeCV', 'LassoCV', 'LassoBIC', 'OLS'])\n",
"ax.set_title('Boxplot of Absolute Errors')\n",
"ax.set_ylabel('Absolute Error')\n",
"ax.boxplot(\n",
" [ridge_cv_errors, lasso_cv_errors, lasso_bic_errors, ols_errors],\n",
" labels=[\"RidgeCV\", \"LassoCV\", \"LassoBIC\", \"OLS\"],\n",
")\n",
"ax.set_title(\"Boxplot of Absolute Errors\")\n",
"ax.set_ylabel(\"Absolute Error\")\n",
"plt.show()"
]
},

View File

@@ -32,7 +32,7 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd \n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
@@ -226,7 +226,7 @@
}
],
"source": [
"sms = pd.read_csv(\"data/spam.csv\", encoding='latin')\n",
"sms = pd.read_csv(\"data/spam.csv\", encoding=\"latin\")\n",
"\n",
"sms.head()"
]
@@ -244,7 +244,7 @@
"metadata": {},
"outputs": [],
"source": [
"sms.rename(columns={'v1':'Label', 'v2':'Text'}, inplace=True)"
"sms.rename(columns={\"v1\": \"Label\", \"v2\": \"Text\"}, inplace=True)"
]
},
{
@@ -644,7 +644,7 @@
}
],
"source": [
"sms['Labelnum']=sms['Label'].map({'ham':0,'spam':1})\n",
"sms[\"Labelnum\"] = sms[\"Label\"].map({\"ham\": 0, \"spam\": 1})\n",
"\n",
"sms.head()"
]
@@ -674,13 +674,13 @@
}
],
"source": [
"# Hint 1 for Exercise 1 \n",
"a=np.array([0,1,1,1,0])\n",
"print (len(a))\n",
"print (a[a==0])\n",
"print (len(a[a==0]))\n",
"print (a[a==1])\n",
"print (len(a[a==1]))"
"# Hint 1 for Exercise 1\n",
"a = np.array([0, 1, 1, 1, 0])\n",
"print(len(a))\n",
"print(a[a == 0])\n",
"print(len(a[a == 0]))\n",
"print(a[a == 1])\n",
"print(len(a[a == 1]))"
]
},
{
@@ -881,8 +881,8 @@
}
],
"source": [
"# Hint 2 for Exercise 1 \n",
"sms[sms.Labelnum==0].head()"
"# Hint 2 for Exercise 1\n",
"sms[sms.Labelnum == 0].head()"
]
},
{
@@ -1083,8 +1083,8 @@
}
],
"source": [
"# Hint 3 for Exercise 1 \n",
"sms[sms.Labelnum==1].head()"
"# Hint 3 for Exercise 1\n",
"sms[sms.Labelnum == 1].head()"
]
},
{
@@ -1104,8 +1104,8 @@
],
"source": [
"print(len(sms))\n",
"print(sms[sms.Label == 'ham'].shape)\n",
"print(sms[sms.Label == 'spam'].shape)"
"print(sms[sms.Label == \"ham\"].shape)\n",
"print(sms[sms.Label == \"spam\"].shape)"
]
},
{
@@ -1136,8 +1136,8 @@
],
"source": [
"# Hint 1 for Exercise 2\n",
"print (sms.loc[0, 'Text']) \n",
"print (\"--> The length of the first sms is\", len(sms.loc[0, 'Text']))"
"print(sms.loc[0, \"Text\"])\n",
"print(\"--> The length of the first sms is\", len(sms.loc[0, \"Text\"]))"
]
},
{
@@ -1160,10 +1160,13 @@
],
"source": [
"plt.figure(figsize=(10, 6))\n",
"plt.hist(sms.loc[:, 'Text'].apply(len), bins='stone',)\n",
"plt.title('Histogram of SMS Lengths')\n",
"plt.xlabel('Length')\t\n",
"plt.ylabel('Frequency')\n",
"plt.hist(\n",
" sms.loc[:, \"Text\"].apply(len),\n",
" bins=\"stone\",\n",
")\n",
"plt.title(\"Histogram of SMS Lengths\")\n",
"plt.xlabel(\"Length\")\n",
"plt.ylabel(\"Frequency\")\n",
"plt.show()"
]
},
@@ -1222,30 +1225,41 @@
}
],
"source": [
"Example = pd.DataFrame([['iphone gratuit iphone gratuit',1],['mille vert gratuit',0],\n",
" ['iphone mille euro',0],['argent gratuit euro gratuit',1]],\n",
" columns=['sms', 'label'])\n",
"Example = pd.DataFrame(\n",
" [\n",
" [\"iphone gratuit iphone gratuit\", 1],\n",
" [\"mille vert gratuit\", 0],\n",
" [\"iphone mille euro\", 0],\n",
" [\"argent gratuit euro gratuit\", 1],\n",
" ],\n",
" columns=[\"sms\", \"label\"],\n",
")\n",
"vec = CountVectorizer()\n",
"X = vec.fit_transform(Example.sms)\n",
"\n",
"# 1. Displaying the vocabulary\n",
"\n",
"print (\"1. The vocabulary of Example is \", vec.vocabulary_)\n",
"print(\"1. The vocabulary of Example is \", vec.vocabulary_)\n",
"\n",
"# 1 bis :\n",
"\n",
"print('The vocabulary arranged in alphabetical order : ', sorted(list(vec.vocabulary_.keys())))\n",
"print(\n",
" \"The vocabulary arranged in alphabetical order : \",\n",
" sorted(list(vec.vocabulary_.keys())),\n",
")\n",
"\n",
"# 2. Displaying the vectors : \n",
"# 2. Displaying the vectors :\n",
"\n",
"print (\"2. The vectors corresponding to the sms are : \\n\", X.toarray())# X.toarray because \n",
"# X is a \"sparse\" matrix. \n",
"print(\n",
" \"2. The vectors corresponding to the sms are : \\n\", X.toarray()\n",
") # X.toarray because\n",
"# X is a \"sparse\" matrix.\n",
"\n",
"# 3. For a new data x_0=\"iphone gratuit\", \n",
"# you must also transform x_0 into a numerical vector before predicting. \n",
"# 3. For a new data x_0=\"iphone gratuit\",\n",
"# you must also transform x_0 into a numerical vector before predicting.\n",
"\n",
"vec_x_0=vec.transform(['iphone gratuit']).toarray() # \n",
"print (\"3. The numerical vector corresponding to (x_0=iphone gratuit) is \\n\", vec_x_0 )"
"vec_x_0 = vec.transform([\"iphone gratuit\"]).toarray() #\n",
"print(\"3. The numerical vector corresponding to (x_0=iphone gratuit) is \\n\", vec_x_0)"
]
},
{
@@ -1267,7 +1281,7 @@
],
"source": [
"#'sparse' version (without \"to_array\")\n",
"v = vec.transform(['iphone iphone gratuit'])\n",
"v = vec.transform([\"iphone iphone gratuit\"])\n",
"v"
]
},
@@ -1309,8 +1323,8 @@
}
],
"source": [
"# \"(0,2) 1\" means : the element in row 0 and column 2 is equal to 1. \n",
"# \"(0,3) 2\" means : the element in row 0 and column 3 is equal to 2. \n",
"# \"(0,2) 1\" means : the element in row 0 and column 2 is equal to 1.\n",
"# \"(0,3) 2\" means : the element in row 0 and column 3 is equal to 2.\n",
"print(v)"
]
},
@@ -1340,8 +1354,8 @@
}
],
"source": [
"vec_x_1 = vec.transform(['iphone vert gratuit']).toarray()\n",
"vec_x_2 = vec.transform(['iphone rouge gratuit']).toarray()\n",
"vec_x_1 = vec.transform([\"iphone vert gratuit\"]).toarray()\n",
"vec_x_2 = vec.transform([\"iphone rouge gratuit\"]).toarray()\n",
"print(vec_x_1)\n",
"print(vec_x_2)"
]
@@ -1372,8 +1386,8 @@
"outputs": [],
"source": [
"vectorizer = CountVectorizer()\n",
"X = vectorizer.fit_transform(sms['Text'])\n",
"y = sms['Labelnum']"
"X = vectorizer.fit_transform(sms[\"Text\"])\n",
"y = sms[\"Labelnum\"]"
]
},
{
@@ -1400,10 +1414,12 @@
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30,random_state=50)\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.30, random_state=50\n",
")\n",
"\n",
"print (\"size of the training set: \", X_train.shape[0])\n",
"print (\"size of the test set :\", X_test.shape[0])"
"print(\"size of the training set: \", X_train.shape[0])\n",
"print(\"size of the test set :\", X_test.shape[0])"
]
},
{
@@ -1906,7 +1922,7 @@
"from sklearn.metrics import accuracy_score\n",
"\n",
"y_pred = sms_bayes.predict(X_test)\n",
"print (\"The accuracy score on the test set is \", accuracy_score(y_test, y_pred))"
"print(\"The accuracy score on the test set is \", accuracy_score(y_test, y_pred))"
]
},
{
@@ -1969,10 +1985,17 @@
}
],
"source": [
"my_sms = vectorizer.transform(['free trial!', 'Iphone 15 is now free', 'I want coffee', 'I want to buy a new iphone'])\n",
"my_sms = vectorizer.transform(\n",
" [\n",
" \"free trial!\",\n",
" \"Iphone 15 is now free\",\n",
" \"I want coffee\",\n",
" \"I want to buy a new iphone\",\n",
" ]\n",
")\n",
"\n",
"pred_my_sms = sms_bayes.predict(my_sms)\n",
"print (pred_my_sms)"
"print(pred_my_sms)"
]
},
{
@@ -1999,7 +2022,7 @@
"from sklearn.naive_bayes import BernoulliNB\n",
"\n",
"# Load the MNIST dataset\n",
"mnist = fetch_openml('mnist_784', version=1, parser='auto')\n",
"mnist = fetch_openml(\"mnist_784\", version=1, parser=\"auto\")\n",
"X, y = mnist.data, mnist.target"
]
},
@@ -2036,7 +2059,9 @@
"source": [
"X_copy = (X.copy() >= 127).astype(int)\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X_copy, y, test_size=0.25, random_state=42)\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X_copy, y, test_size=0.25, random_state=42\n",
")\n",
"\n",
"ber_bayes = BernoulliNB()\n",
"ber_bayes.fit(X_train, y_train)\n",
@@ -2059,6 +2084,7 @@
"outputs": [],
"source": [
"from keras.datasets import cifar10\n",
"\n",
"(x_train, y_train), (x_test, y_test) = cifar10.load_data()"
]
},
@@ -2077,7 +2103,7 @@
}
],
"source": [
"# reminder : the output is an RGB image 32 x 32 \n",
"# reminder : the output is an RGB image 32 x 32\n",
"print(x_train.shape)\n",
"print(y_train.shape)"
]

File diff suppressed because one or more lines are too long

View File

@@ -46,23 +46,23 @@
"\n",
"\n",
"np.random.seed(12)\n",
"num_observations=400\n",
"num_observations = 400\n",
"\n",
"center1=[0,0]\n",
"center2=[1,4]\n",
"center3=[-3,2]\n",
"center1 = [0, 0]\n",
"center2 = [1, 4]\n",
"center3 = [-3, 2]\n",
"\n",
"x1=np.random.multivariate_normal(center1,[[1,0],[0,1]], num_observations)\n",
"x2=np.random.multivariate_normal(center2,[[1,0],[0,1]], num_observations)\n",
"x3=np.random.multivariate_normal(center3,[[1,0],[0,1]], num_observations)\n",
"x1 = np.random.multivariate_normal(center1, [[1, 0], [0, 1]], num_observations)\n",
"x2 = np.random.multivariate_normal(center2, [[1, 0], [0, 1]], num_observations)\n",
"x3 = np.random.multivariate_normal(center3, [[1, 0], [0, 1]], num_observations)\n",
"\n",
"X= np.vstack((x1, x2, x3)).astype(np.float32)\n",
"X = np.vstack((x1, x2, x3)).astype(np.float32)\n",
"\n",
"plt.figure(figsize=(8,6))\n",
"plt.plot(X[:,0], X[:,1],\".b\",alpha=0.2)\n",
"plt.plot(center1[0], center1[1], '.', color='red', markersize=10)\n",
"plt.plot(center2[0], center2[1], '.', color='red', markersize=10)\n",
"plt.plot(center3[0], center3[1], '.', color='red', markersize=10)\n",
"plt.figure(figsize=(8, 6))\n",
"plt.plot(X[:, 0], X[:, 1], \".b\", alpha=0.2)\n",
"plt.plot(center1[0], center1[1], \".\", color=\"red\", markersize=10)\n",
"plt.plot(center2[0], center2[1], \".\", color=\"red\", markersize=10)\n",
"plt.plot(center3[0], center3[1], \".\", color=\"red\", markersize=10)\n",
"plt.show()"
]
},
@@ -540,10 +540,12 @@
}
],
"source": [
"plt.figure(figsize=(8,6))\n",
"plt.plot(X[:,0], X[:,1],\".b\",alpha=0.2)\n",
"plt.figure(figsize=(8, 6))\n",
"plt.plot(X[:, 0], X[:, 1], \".b\", alpha=0.2)\n",
"for center in kmeans1.cluster_centers_:\n",
" plt.plot(center[0], center[1], '.', color='red', markersize=10, label='Cluster center')\n",
" plt.plot(\n",
" center[0], center[1], \".\", color=\"red\", markersize=10, label=\"Cluster center\"\n",
" )\n",
"plt.legend()\n",
"plt.show()"
]
@@ -585,11 +587,11 @@
"# Hint: An example for plotting the Voronoi partition\n",
"from scipy.spatial import Voronoi, voronoi_plot_2d\n",
"\n",
"points_generer_voronoi = np.array([[0,0],[1,4],[-3,2]])\n",
"points_generer_voronoi = np.array([[0, 0], [1, 4], [-3, 2]])\n",
"\n",
"vor = Voronoi(points_generer_voronoi)\n",
"\n",
"fig, ax = plt.subplots(1,1,figsize=(4,4)) \n",
"fig, ax = plt.subplots(1, 1, figsize=(4, 4))\n",
"\n",
"fig = voronoi_plot_2d(vor, ax=ax, show_vertices=False)"
]
@@ -614,14 +616,16 @@
"# Answer for Exercise 3\n",
"\n",
"\n",
"fig, ax = plt.subplots(1,1,figsize=(8,6)) \n",
"plt.plot(X[:,0], X[:,1], \".b\", alpha=0.2)\n",
"fig, ax = plt.subplots(1, 1, figsize=(8, 6))\n",
"plt.plot(X[:, 0], X[:, 1], \".b\", alpha=0.2)\n",
"\n",
"vor = Voronoi(kmeans1.cluster_centers_)\n",
"fig = voronoi_plot_2d(vor, ax=ax, show_vertices=False)\n",
"\n",
"for center in kmeans1.cluster_centers_:\n",
" plt.plot(center[0], center[1], '.', color='red', markersize=10, label='Cluster center')\n",
" plt.plot(\n",
" center[0], center[1], \".\", color=\"red\", markersize=10, label=\"Cluster center\"\n",
" )\n",
"plt.legend()\n",
"plt.show()"
]
@@ -1233,10 +1237,10 @@
}
],
"source": [
"print (\"1:\", compress_model.labels_)\n",
"print (\"2:\", compress_model.labels_.shape)\n",
"print (\"3:\", compress_model.cluster_centers_)\n",
"print (\"4:\", compress_model.cluster_centers_.shape)"
"print(\"1:\", compress_model.labels_)\n",
"print(\"2:\", compress_model.labels_.shape)\n",
"print(\"3:\", compress_model.cluster_centers_)\n",
"print(\"4:\", compress_model.cluster_centers_.shape)"
]
},
{
@@ -1275,13 +1279,13 @@
"metadata": {},
"outputs": [],
"source": [
"color_new=np.zeros_like(colors)\n",
"color_new = np.zeros_like(colors)\n",
"\n",
"labels=compress_model.labels_\n",
"centers=compress_model.cluster_centers_\n",
"labels = compress_model.labels_\n",
"centers = compress_model.cluster_centers_\n",
"\n",
"for i in range(len(colors)):\n",
" color_new[i]= centers[labels[i]]"
" color_new[i] = centers[labels[i]]"
]
},
{
@@ -1336,11 +1340,12 @@
],
"source": [
"import matplotlib.image as mpimg\n",
"\n",
"mpimg.imsave(\"assets/zelda_new.png\", zelda_new)\n",
"\n",
"plt.figure(figsize=(8, 6))\n",
"plt.imshow(zelda_new)\n",
"plt.show()\n"
"plt.show()"
]
},
{
@@ -1363,13 +1368,13 @@
"source": [
"import os\n",
"\n",
"size_new=os.path.getsize('assets/zelda_new.png')\n",
"size_old=os.path.getsize('assets/zelda.png')\n",
"size_new = os.path.getsize(\"assets/zelda_new.png\")\n",
"size_old = os.path.getsize(\"assets/zelda.png\")\n",
"\n",
"print (\"The original size is \", size_old, \"bytes.\")\n",
"print (\"The compressed size is \", size_new, \"bytes.\")\n",
"print(\"The original size is \", size_old, \"bytes.\")\n",
"print(\"The compressed size is \", size_new, \"bytes.\")\n",
"\n",
"print (f\"The compression factor is {size_old/size_new : .3f}\")"
"print(f\"The compression factor is {size_old / size_new: .3f}\")"
]
},
{
@@ -1407,8 +1412,8 @@
}
],
"source": [
"partiel=plt.imread(\"assets/partiel.png\")\n",
"plt.figure(figsize = (20,10))\n",
"partiel = plt.imread(\"assets/partiel.png\")\n",
"plt.figure(figsize=(20, 10))\n",
"plt.imshow(partiel)"
]
},
@@ -1426,7 +1431,7 @@
}
],
"source": [
"print (partiel.shape)"
"print(partiel.shape)"
]
},
{
@@ -1472,23 +1477,23 @@
}
],
"source": [
"partiel_new=np.zeros_like(partiel)\n",
"partiel_new = np.zeros_like(partiel)\n",
"\n",
"noir_rgb=np.array([0,0,0])\n",
"blanc_rgb=np.array([1,1,1])\n",
"noir_rgb = np.array([0, 0, 0])\n",
"blanc_rgb = np.array([1, 1, 1])\n",
"\n",
"epsilon = 0.5 # threshold\n",
"\n",
"epsilon=0.5 # threshold\n",
" \n",
"distances = np.linalg.norm(partiel - noir_rgb, axis=2)\n",
"partiel_new = np.zeros_like(partiel)\n",
"partiel_new[distances <= epsilon] = noir_rgb\n",
"partiel_new[distances > epsilon] = blanc_rgb\n",
" \n",
"\n",
"mpimg.imsave(\"assets/partiel_new.png\", partiel_new)\n",
"\n",
"plt.figure(figsize=(20,10))\n",
"plt.figure(figsize=(20, 10))\n",
"plt.imshow(partiel_new)\n",
"plt.show()\n"
"plt.show()"
]
},
{
@@ -1531,16 +1536,20 @@
"mnist = tf.keras.datasets.mnist\n",
"(X_train, y_train), (X_test, y_test) = mnist.load_data()\n",
"\n",
"X_train = X_train.reshape(-1, 28*28)\n",
"X_train = X_train.reshape(-1, 28 * 28)\n",
"\n",
"kmeans2 = KMeans(n_clusters=10)\n",
"clusters = kmeans2.fit_predict(X_train)\n",
"\n",
"\n",
"def map_clusters_to_labels(clusters, true_labels):\n",
" return np.array([mode(true_labels[clusters == i], keepdims=True).mode[0] for i in range(10)])\n",
" return np.array(\n",
" [mode(true_labels[clusters == i], keepdims=True).mode[0] for i in range(10)]\n",
" )\n",
"\n",
"\n",
"cluster_to_label = map_clusters_to_labels(clusters, y_train)\n",
"print(\"Cluster to label mapping:\", cluster_to_label)\n"
"print(\"Cluster to label mapping:\", cluster_to_label)"
]
},
{

View File

@@ -6,7 +6,7 @@
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"import numpy as np\n",
"import pandas as pd\n",
"import tensorflow as tf\n",
"import matplotlib.pyplot as plt"
@@ -178,16 +178,21 @@
"outputs": [],
"source": [
"def build_model():\n",
" model = tf.keras.models.Sequential([\n",
" tf.keras.layers.Dense(16, activation='relu', input_shape=(X.shape[1],), kernel_regularizer=tf.keras.regularizers.l2(0.01)),\n",
" tf.keras.layers.Dense(8, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),\n",
" tf.keras.layers.Dense(1, activation='sigmoid')\n",
" ])\n",
" model.compile(\n",
" optimizer='adam',\n",
" loss='binary_crossentropy',\n",
" metrics=['accuracy']\n",
" model = tf.keras.models.Sequential(\n",
" [\n",
" tf.keras.layers.Dense(\n",
" 16,\n",
" activation=\"relu\",\n",
" input_shape=(X.shape[1],),\n",
" kernel_regularizer=tf.keras.regularizers.l2(0.01),\n",
" ),\n",
" tf.keras.layers.Dense(\n",
" 8, activation=\"relu\", kernel_regularizer=tf.keras.regularizers.l2(0.01)\n",
" ),\n",
" tf.keras.layers.Dense(1, activation=\"sigmoid\"),\n",
" ]\n",
" )\n",
" model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\", metrics=[\"accuracy\"])\n",
" return model"
]
},
@@ -291,10 +296,7 @@
"histories = []\n",
"\n",
"early_stopping = EarlyStopping(\n",
" monitor='val_loss',\n",
" patience=10,\n",
" restore_best_weights=True,\n",
" verbose=1\n",
" monitor=\"val_loss\", patience=10, restore_best_weights=True, verbose=1\n",
")\n",
"\n",
"for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):\n",
@@ -305,29 +307,28 @@
" scaler = StandardScaler()\n",
" X_train_scaled = scaler.fit_transform(X_train)\n",
" X_val_scaled = scaler.transform(X_val)\n",
" \n",
"\n",
" model = build_model()\n",
"\n",
" model.compile(\n",
" optimizer='adam',\n",
" loss='binary_crossentropy',\n",
" metrics=[\"f1_score\"]\n",
" )\n",
" model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\", metrics=[\"f1_score\"])\n",
"\n",
" # EarlyStopping\n",
" callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)\n",
" callback = tf.keras.callbacks.EarlyStopping(\n",
" monitor=\"val_loss\", patience=10, restore_best_weights=True\n",
" )\n",
"\n",
" # Entraînement\n",
" history = model.fit(\n",
" X_train_scaled, y_train,\n",
" X_train_scaled,\n",
" y_train,\n",
" epochs=50,\n",
" batch_size=8,\n",
" validation_data=(X_val_scaled, y_val),\n",
" callbacks=[callback],\n",
" verbose=0,\n",
" class_weight={0: 1.0, 1: 2.0}\n",
" class_weight={0: 1.0, 1: 2.0},\n",
" )\n",
" \n",
"\n",
" histories.append(history.history)\n",
"\n",
" # Prédiction & F1\n",
@@ -360,9 +361,9 @@
"axes = axes.flatten() # Flatten to easily iterate\n",
"\n",
"for i, (hist, ax) in enumerate(zip(histories, axes)):\n",
" ax.plot(hist['loss'], label='Train loss', alpha=0.6)\n",
" ax.plot(hist['val_loss'], label='Val loss', linestyle='--', alpha=0.6)\n",
" ax.set_title(f\"Fold {i+1}\")\n",
" ax.plot(hist[\"loss\"], label=\"Train loss\", alpha=0.6)\n",
" ax.plot(hist[\"val_loss\"], label=\"Val loss\", linestyle=\"--\", alpha=0.6)\n",
" ax.set_title(f\"Fold {i + 1}\")\n",
" ax.set_xlabel(\"Epochs\")\n",
" if i % 2 == 0:\n",
" ax.set_ylabel(\"Binary Crossentropy\")\n",
@@ -436,7 +437,9 @@
"import tensorflow as tf\n",
"import numpy as np\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=42, stratify=y\n",
")\n",
"\n",
"scaler = StandardScaler()\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
@@ -444,21 +447,21 @@
"\n",
"model = build_model()\n",
"\n",
"model.compile(\n",
" optimizer='adam',\n",
" loss='binary_crossentropy'\n",
"model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\")\n",
"\n",
"callback = tf.keras.callbacks.EarlyStopping(\n",
" monitor=\"val_loss\", patience=10, restore_best_weights=True\n",
")\n",
"\n",
"callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)\n",
"\n",
"history = model.fit(\n",
" X_train_scaled, y_train,\n",
" X_train_scaled,\n",
" y_train,\n",
" epochs=50,\n",
" batch_size=8,\n",
" validation_split=0.2,\n",
" callbacks=[callback],\n",
" verbose=0,\n",
" class_weight={0: 1.0, 1: 2.0}\n",
" class_weight={0: 1.0, 1: 2.0},\n",
")\n",
"\n",
"\n",
@@ -486,11 +489,11 @@
],
"source": [
"plt.figure(figsize=(8, 5))\n",
"plt.plot(history.history['loss'], label='Loss (train)')\n",
"plt.plot(history.history['val_loss'], label='Loss (val)', linestyle='--')\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Binary Cross-Entropy Loss')\n",
"plt.title('Courbe d\\'apprentissage')\n",
"plt.plot(history.history[\"loss\"], label=\"Loss (train)\")\n",
"plt.plot(history.history[\"val_loss\"], label=\"Loss (val)\", linestyle=\"--\")\n",
"plt.xlabel(\"Epochs\")\n",
"plt.ylabel(\"Binary Cross-Entropy Loss\")\n",
"plt.title(\"Courbe d'apprentissage\")\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.tight_layout()\n",