Compare commits
298 Commits
21d8e37b06
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 53c0384cf9 | |||
| 3311d1a233 | |||
| c6e0ee5c05 | |||
| 0a65d41905 | |||
| e296508ebb | |||
| e39c858ff7 | |||
| 3094af5135 | |||
| d9c00c5159 | |||
| 5d0c1d2b4e | |||
| 4697570bcc | |||
| 514ba6a9b8 | |||
| 50849c6bc2 | |||
| 6552fc79fa | |||
| 17f7488751 | |||
| f943d4083c | |||
| 5b287234c2 | |||
| bcd013c57f | |||
| 29fa7b61fa | |||
| c136795d65 | |||
| 6255837215 | |||
| 94075f7f13 | |||
| cc467d9ff3 | |||
| 444c6facc3 | |||
| 88f45deba9 | |||
| 8b2e92635d | |||
| 80b38f0592 | |||
| 9452e42623 | |||
| b8b0024852 | |||
| 3e1ac18acd | |||
| 77feb27b97 | |||
| bcb8c66a9d | |||
| 03bc530c3a | |||
| 27fd147d0f | |||
| 56fdd5da45 | |||
| 3e6b2e313a | |||
| 346695212d | |||
| 8e7bbc1fe9 | |||
| c8c1bf4807 | |||
| 2e2500b509 | |||
| 5f5bd609d7 | |||
| e56fd6f2af | |||
| 0e65815e38 | |||
| 6eecdd6ab3 | |||
| 06bc1f28a9 | |||
| c5f60472fb | |||
| 0cb4dd4c57 | |||
| 98807a1b63 | |||
| 156411965d | |||
| fd775d1251 | |||
| 2824a9aed1 | |||
| acf1aa82c4 | |||
| f326ca42e0 | |||
| 5d01240748 | |||
| 7e62eaeb04 | |||
| 9e28765022 | |||
| 9b0b24bc8b | |||
| bcac5764f6 | |||
| 1141382c81 | |||
| 3cb05d3210 | |||
| d5a6bfd339 | |||
| f89ff4a016 | |||
| 0f766b62c3 | |||
| a2fa13ef8d | |||
| 0420f09b69 | |||
| 82fb7e53de | |||
| 33930ab89c | |||
| 95308de0cb | |||
| 5338517fee | |||
| 8397c8fee3 | |||
| 41378a2b42 | |||
| aad17ec465 | |||
|
|
886a7a2e2c | ||
|
|
dc054417f7 | ||
| c4d5b67321 | |||
| 08cf8fbeda | |||
| 5d968fa5e5 | |||
| 38ea77e86c | |||
| baf0d21a25 | |||
| f0854e58ba | |||
| 8400c722a5 | |||
| 21e376de79 | |||
| dc69e98b0d | |||
| 12c37869eb | |||
| e217b83754 | |||
| 1c61de108b | |||
| a3e636044a | |||
| 2b00a351c0 | |||
| 4570a011ec | |||
| f58afe7d71 | |||
| c7d0f4878f | |||
| 8b0afced5c | |||
| 74feddbddb | |||
| 5f1cec7858 | |||
| dec54d91d7 | |||
| 8fbf4681c9 | |||
| 568f38a59a | |||
| 007ca3c12c | |||
| bd05082e3c | |||
| 03bf0a4db2 | |||
| 8f5f2b417c | |||
| 5c8efbdc2e | |||
| 6369e30257 | |||
| aec178208e | |||
| 098e20c982 | |||
| e71aae349f | |||
| fa5785e714 | |||
| 632240d232 | |||
| ba6bea2c73 | |||
| 0af6f7a5d0 | |||
| f53ff6a2be | |||
| 20e3ca2326 | |||
| eab43866c3 | |||
| 61cc00c973 | |||
| 365da9c37e | |||
| db85923e94 | |||
| 37c0cd370e | |||
| 76a57c3d73 | |||
| a0cc98744f | |||
| 2ca65ffe73 | |||
| 3f2cd3a308 | |||
| 12bba2cea7 | |||
| cf7d23261b | |||
| e1255f326d | |||
| 4efbee7ce4 | |||
| 3c0113115c | |||
| 85a7469195 | |||
| 5af3c76113 | |||
| ba158c366b | |||
| d8b535418c | |||
| b6c9e91481 | |||
| ec5e23e3d4 | |||
| a0b0a9f8bd | |||
| 047f30def1 | |||
| f3a09a5282 | |||
| 1ccdcb3803 | |||
| a63b1bf94c | |||
| 19d7d398ae | |||
| 7cc7df0376 | |||
| 963948f19f | |||
| 592d7bc7eb | |||
| 26e7a4da36 | |||
| 6247d4b7e1 | |||
| 59b0c0de5c | |||
| 9fc0fad1ef | |||
| fe8be01369 | |||
|
|
40085147f0 | ||
|
|
beedb187f7 | ||
|
|
585277a622 | ||
| bbed2263ef | |||
| 82ff2db44c | |||
| cdac478b83 | |||
| 3cacb6be8a | |||
| 1effa6dc4b | |||
| 1d5089bfc8 | |||
| 37ede46fac | |||
| cb4e7d2ac2 | |||
| a4adf0a392 | |||
| 04d8b4cf14 | |||
| 9606f4224a | |||
| 185de1142d | |||
| b6cfa3349e | |||
| 8e081a1ccb | |||
| 2022563a28 | |||
| 6c120acab3 | |||
| f4a5b5b708 | |||
| c925c8a5c0 | |||
| 7b9a6bd0ff | |||
| e498a3eee8 | |||
| f3d7c2fc09 | |||
| a4e0e55efc | |||
| 34bd0307d5 | |||
| 76620f1d9d | |||
| 0d00de44e8 | |||
| 2768bcb565 | |||
| 6738419f7c | |||
| c72538fac3 | |||
| 08d0d93393 | |||
| fbd939c300 | |||
| 8cf328e18a | |||
| dfee405ea0 | |||
| 1a1c3c31f9 | |||
| f94ff07cab | |||
| e273cf90f7 | |||
| ecbdbc1dce | |||
| 192c4e02f1 | |||
| ad8f5857ca | |||
| c7f0603087 | |||
| 05222e0e65 | |||
| 01f64ae022 | |||
| 915cfeb97d | |||
| 1f1f52f3c6 | |||
| 7438ec6f5f | |||
| 32901d1247 | |||
| 6475965bd4 | |||
| c0a2307c94 | |||
| 07407fcdd4 | |||
| a4c09c50a5 | |||
| 5211dc754f | |||
| 8af29ceb78 | |||
| 704cefeeb1 | |||
| 786ebadefc | |||
| 44c277c8a7 | |||
| b784751776 | |||
| 52c6012197 | |||
| 4853ad1d64 | |||
| bc64c7ddcc | |||
| 632a1c6950 | |||
| 485844f674 | |||
| f536e28a24 | |||
| d28631c1c7 | |||
| d795afe07e | |||
| 6e60299ff9 | |||
| ba5bc36879 | |||
| dd760dad03 | |||
| 31f77d59e4 | |||
| d51159ad9e | |||
| 458a9b9698 | |||
| 3b1347c54c | |||
| 964821c058 | |||
| 070892c551 | |||
| bef64b5eb6 | |||
| f77bd7b184 | |||
| b0646dfb96 | |||
| 82ed0a1d8a | |||
| 767355c4df | |||
| 6449317f91 | |||
| 0e3c8aca99 | |||
| dbf9816453 | |||
| b7ca3f6e66 | |||
| 6596d39060 | |||
| 66d4be6542 | |||
| a86834aeb5 | |||
| d06b212417 | |||
| a668c6798a | |||
| 9bfa080c06 | |||
| c892ce7110 | |||
| 4c58e4a97a | |||
| 5e7db282cd | |||
| 188d1f7cad | |||
| 32e6c1733a | |||
| 41c789b7d4 | |||
| c84d813de6 | |||
| d00429881e | |||
| 2a863e6c9c | |||
| bef5077485 | |||
| af27fbba72 | |||
| 10e9191969 | |||
| 5e36d0f220 | |||
| 351b32cdb2 | |||
| 23b405ed57 | |||
| 4168d66030 | |||
| 0e16469176 | |||
| 7fbe02aced | |||
| 750ec5c719 | |||
| 87e7e58cd5 | |||
| 4e1aaa2310 | |||
| 7e3e01706d | |||
| 00388ad6b8 | |||
| 88d0907535 | |||
| df37fca8ac | |||
| 2656250b9c | |||
| cb5713ff6d | |||
| f6ba3d0890 | |||
| 611f22b99d | |||
| 365faafb5a | |||
| 7de8a90adf | |||
| 8e680a4f41 | |||
| fdfff091a7 | |||
| aeb5314b8b | |||
| fc0b87a405 | |||
| 0ecd2582bb | |||
| ae18b13ad2 | |||
| ea97f4e314 | |||
| c20c4f1585 | |||
| bcbe47df12 | |||
| e1fad33c55 | |||
| decac8bff2 | |||
| 515609c16b | |||
| ba27f1ce7c | |||
| 52796e9018 | |||
| 75b83bf0a4 | |||
| 65ead01e8e | |||
| 7a582c0601 | |||
| b8a53db50c | |||
| fccdc5dfb8 | |||
| 27333910df | |||
| 5093ee4d25 | |||
| 4055502110 | |||
| 3012b8a505 | |||
| 14951f25a5 | |||
| 13c19e5cf3 | |||
| 16e52106ce | |||
| 88ff5dbae1 | |||
| 52096035e7 | |||
| 0b1a10328f | |||
| c44cf4e836 | |||
| 48339b949b | |||
| dae19d4eb6 |
24
.gitignore
vendored
@@ -7,9 +7,29 @@
|
||||
|
||||
.RData
|
||||
.RHistory
|
||||
*.pdf
|
||||
|
||||
.ipynb_checkpoints
|
||||
|
||||
*.log
|
||||
logs
|
||||
logs
|
||||
catboost_info
|
||||
|
||||
tp1_files
|
||||
tp2_files
|
||||
tp3_files
|
||||
dashboard_files
|
||||
|
||||
Beaudelaire.txt
|
||||
Baudelaire_len_32.p
|
||||
|
||||
NoticeTechnique_files
|
||||
.posit
|
||||
renv
|
||||
|
||||
results/
|
||||
results_stage_1/
|
||||
results_stage_2/
|
||||
*.safetensors
|
||||
*.pt
|
||||
*.pth
|
||||
*.bin
|
||||
@@ -94,7 +94,7 @@
|
||||
" [0, 0, 3, 0, 0],\n",
|
||||
" [0, 0, 0, 4, 0],\n",
|
||||
" [0, 0, 0, 0, 5],\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"B = np.array(\n",
|
||||
" [\n",
|
||||
@@ -103,7 +103,7 @@
|
||||
" [3, 4, 5, 6, 7],\n",
|
||||
" [4, 5, 6, 7, 8],\n",
|
||||
" [5, 6, 7, 8, 9],\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"d = np.diag(A)\n",
|
||||
"dd = np.array([np.diag(A)])\n",
|
||||
@@ -160,7 +160,8 @@
|
||||
" for i in range(n):\n",
|
||||
" x[i] = (b[i] - np.dot(A[i, :i], x[:i])) / A[i, i]\n",
|
||||
" else:\n",
|
||||
" raise ValueError(\"A est ni triangulaire supérieure ni triangulaire inférieure\")\n",
|
||||
" msg = \"A est ni triangulaire supérieure ni triangulaire inférieure\"\n",
|
||||
" raise ValueError(msg)\n",
|
||||
" return x"
|
||||
]
|
||||
},
|
||||
@@ -296,10 +297,12 @@
|
||||
"def met_gauss_sys(A, b):\n",
|
||||
" n, m = A.shape\n",
|
||||
" if n != m:\n",
|
||||
" raise ValueError(\"Erreur de dimension : A doit etre carré\")\n",
|
||||
" msg = \"Erreur de dimension : A doit etre carré\"\n",
|
||||
" raise ValueError(msg)\n",
|
||||
" if n != b.size:\n",
|
||||
" msg = \"Erreur de dimension : le nombre de lignes de A doit être égal au nombr ede colonnes de b\"\n",
|
||||
" raise valueError(\n",
|
||||
" \"Erreur de dimension : le nombre de lignes de A doit être égal au nombr ede colonnes de b\"\n",
|
||||
" msg,\n",
|
||||
" )\n",
|
||||
" U = np.zeros((n, n + 1))\n",
|
||||
" U = A\n",
|
||||
|
||||
@@ -44,11 +44,10 @@ notes_MAN <- read.table("notes_MAN.csv", sep = ";", dec = ",", row.names = 1, he
|
||||
# qui est une variable catégorielle
|
||||
notes_MAN_prep <- notes_MAN[, -1]
|
||||
|
||||
X <- notes_MAN[1:6,] %>% select(c("Probas", "Analyse", "Anglais", "MAN.Stats", "Stats.Inférentielles"))
|
||||
X <- notes_MAN[1:6, ] |> select(c("Probas", "Analyse", "Anglais", "MAN.Stats", "Stats.Inférentielles"))
|
||||
# on prépare le jeu de données en retirant la colonne des Mentions
|
||||
# qui est une variable catégorielle
|
||||
# View(X)
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
@@ -101,7 +100,7 @@ C[, 1:2]
|
||||
deux premières composantes principales (1 point)
|
||||
|
||||
```{r}
|
||||
colors <- c('blue', 'red', 'green', 'yellow', 'purple', 'orange')
|
||||
colors <- c("blue", "red", "green", "yellow", "purple", "orange")
|
||||
plot(
|
||||
C[, 1], C[, 2],
|
||||
main = "Coordonnées des individus par rapport \n aux deux premières composantes principales",
|
||||
@@ -111,7 +110,7 @@ plot(
|
||||
col = colors,
|
||||
pch = 15
|
||||
)
|
||||
legend(x = 'topleft', legend = rownames(X), col = colors, pch = 15)
|
||||
legend(x = "topleft", legend = rownames(X), col = colors, pch = 15)
|
||||
```
|
||||
|
||||
------------------------------------------------------------------------
|
||||
@@ -130,7 +129,7 @@ ncol(notes_MAN_prep) # Nombre de variables
|
||||
```
|
||||
|
||||
```{r}
|
||||
dim(notes_MAN_prep) # On peut également utiliser 'dim' qui renvoit la dimension
|
||||
dim(notes_MAN_prep) # On peut également utiliser 'dim' qui renvoit la dimension
|
||||
```
|
||||
|
||||
Il y a donc **42** individus et **14** variables. A noter que la
|
||||
@@ -146,7 +145,7 @@ library(FactoMineR)
|
||||
```{r}
|
||||
# Ne pas oublier de charger la librairie FactoMineR
|
||||
|
||||
# Indication : pour afficher les résultats de l'ACP pour tous les individus, utiliser la
|
||||
# Indication : pour afficher les résultats de l'ACP pour tous les individus, utiliser la
|
||||
# fonction summary en précisant dedans nbind=Inf et nbelements=Inf
|
||||
res.notes <- PCA(notes_MAN_prep, scale.unit = TRUE)
|
||||
```
|
||||
@@ -190,7 +189,7 @@ avec:
|
||||
Depuis notre ACP, on peut donc récupérer les coordonnées:
|
||||
|
||||
```{r}
|
||||
coords_man_stats <- res.notes$var$coord["MAN.Stats",]
|
||||
coords_man_stats <- res.notes$var$coord["MAN.Stats", ]
|
||||
coords_man_stats[1:2]
|
||||
```
|
||||
|
||||
|
||||
@@ -113,9 +113,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"def C(t):\n",
|
||||
" \"\"\"\n",
|
||||
" Fonction retournant la solution exacte du problème au temps t\n",
|
||||
" \"\"\"\n",
|
||||
" \"\"\"Fonction retournant la solution exacte du problème au temps t.\"\"\"\n",
|
||||
" return K_star + K / (1 + (K / K0 - 1) * np.exp(-r * (t - t_fl)))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -137,9 +135,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"def dN(N, t, C_sol):\n",
|
||||
" \"\"\"\n",
|
||||
" Fonction calculant la dérivée de la solution approchée du problème à l'instant t dépendant de N(t) et de C(t)\n",
|
||||
" \"\"\"\n",
|
||||
" \"\"\"Fonction calculant la dérivée de la solution approchée du problème à l'instant t dépendant de N(t) et de C(t).\"\"\"\n",
|
||||
" return r_N * N * (1 - N / C_sol(t))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -225,7 +221,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"def F(X, t, a, b, c, d, p):\n",
|
||||
" \"\"\"Fonction second membre pour le système\"\"\"\n",
|
||||
" \"\"\"Fonction second membre pour le système.\"\"\"\n",
|
||||
" x, y = X\n",
|
||||
" return np.array([x * (a - p - b * y), y * (-c - p + d * x)])\n",
|
||||
"\n",
|
||||
@@ -239,13 +235,19 @@
|
||||
"# On crée une figure à trois graphiques\n",
|
||||
"fig = plt.figure(figsize=(12, 6))\n",
|
||||
"ax = fig.add_subplot(\n",
|
||||
" 1, 2, 2\n",
|
||||
" 1,\n",
|
||||
" 2,\n",
|
||||
" 2,\n",
|
||||
") # subplot pour le champ de vecteurs et le graphe sardines vs requins\n",
|
||||
"axr = fig.add_subplot(\n",
|
||||
" 2, 2, 1\n",
|
||||
" 2,\n",
|
||||
" 2,\n",
|
||||
" 1,\n",
|
||||
") # subplot pour le graphe du nombre de requins en fonction du temps\n",
|
||||
"axs = fig.add_subplot(\n",
|
||||
" 2, 2, 3\n",
|
||||
" 2,\n",
|
||||
" 2,\n",
|
||||
" 3,\n",
|
||||
") # subplot pour le graphe du nombre de sardines en fonction du temps\n",
|
||||
"ax.quiver(sardines, requins, fsardines / n_sndmb, frequins / n_sndmb)\n",
|
||||
"\n",
|
||||
@@ -317,12 +319,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def crank_nicolson(y0, T, N, r):\n",
|
||||
" \"\"\"\n",
|
||||
" schéma de Crank-Nicolson pour le modèle de Malthus\n",
|
||||
" \"\"\"schéma de Crank-Nicolson pour le modèle de Malthus.\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" y0: float\n",
|
||||
" donnée initiale\n",
|
||||
" T: float\n",
|
||||
@@ -334,13 +334,12 @@
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" t: ndarray\n",
|
||||
" les instants où la solution approchée est calculée\n",
|
||||
" y: ndarray\n",
|
||||
" les valeurs de la solution approchée par le theta-schema\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" dt = T / N\n",
|
||||
" t = np.zeros(N + 1)\n",
|
||||
" y = np.zeros(N + 1)\n",
|
||||
@@ -357,12 +356,10 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"def euler_explicit(y0, T, N, r):\n",
|
||||
" \"\"\"\n",
|
||||
" schéma de d'Euler pour le modèle de Malthus\n",
|
||||
" \"\"\"schéma de d'Euler pour le modèle de Malthus.\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" y0: float\n",
|
||||
" donnée initiale\n",
|
||||
" T: float\n",
|
||||
@@ -374,11 +371,11 @@
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" t: ndarray\n",
|
||||
" les instants où la solution approchée est calculée\n",
|
||||
" y: ndarray\n",
|
||||
" les valeurs de la solution approchée par le theta-schema\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" dt = T / N\n",
|
||||
" t = np.zeros(N + 1)\n",
|
||||
@@ -396,9 +393,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"def solution_exacte(t):\n",
|
||||
" \"\"\"\n",
|
||||
" Fonction calculant la solution exacte du modèle de Malthus à l'instant t\n",
|
||||
" \"\"\"\n",
|
||||
" \"\"\"Fonction calculant la solution exacte du modèle de Malthus à l'instant t.\"\"\"\n",
|
||||
" return y0 * np.exp(r * t)"
|
||||
]
|
||||
},
|
||||
@@ -462,7 +457,10 @@
|
||||
"ax = fig.add_subplot(1, 2, 2)\n",
|
||||
"for n in liste_N:\n",
|
||||
" t, y = crank_nicolson(\n",
|
||||
" y0, T, n, r\n",
|
||||
" y0,\n",
|
||||
" T,\n",
|
||||
" n,\n",
|
||||
" r,\n",
|
||||
" ) # On calcule la fonction Crank-Nicolson pour chaque n\n",
|
||||
" ax.scatter(t, y, label=f\"Solution approchée pour N={n}\")\n",
|
||||
"ax.plot(t_exact, solution_exacte(t_exact), label=\"Solution exacte\")\n",
|
||||
|
||||
@@ -151,20 +151,18 @@
|
||||
],
|
||||
"source": [
|
||||
"def M(x):\n",
|
||||
" \"\"\"\n",
|
||||
" Retourne la matrice du système (2)\n",
|
||||
" \"\"\"Retourne la matrice du système (2).\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" x: ndarray\n",
|
||||
" vecteurs contenant les valeurs [x0, x1, ..., xN]\n",
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" out: ndarray\n",
|
||||
" matrice du système (2)\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" h = x[1:] - x[:-1] # x[i+1] - x[i]\n",
|
||||
" return (\n",
|
||||
@@ -194,12 +192,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def sprime(x, y, p0, pN):\n",
|
||||
" \"\"\"\n",
|
||||
" Retourne la solution du système (2)\n",
|
||||
" \"\"\"Retourne la solution du système (2).\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" x: ndarray\n",
|
||||
" vecteurs contenant les valeurs [x0, x1, ..., xN]\n",
|
||||
" y: ndarray\n",
|
||||
@@ -211,9 +207,9 @@
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" out: ndarray\n",
|
||||
" solution du système (2)\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" h = x[1:] - x[:-1]\n",
|
||||
" delta_y = (y[1:] - y[:-1]) / h\n",
|
||||
@@ -276,39 +272,35 @@
|
||||
],
|
||||
"source": [
|
||||
"def f(x):\n",
|
||||
" \"\"\"\n",
|
||||
" Retourne la fonction f évaluée aux points x\n",
|
||||
" \"\"\"Retourne la fonction f évaluée aux points x.\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" x: ndarray\n",
|
||||
" vecteurs contenant les valeurs [x0, x1, ..., xN]\n",
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" out: ndarray\n",
|
||||
" Valeur de la fonction f aux points x\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" return 1 / (1 + x**2)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def fprime(x):\n",
|
||||
" \"\"\"\n",
|
||||
" Retourne la fonction dérivée de f évaluée aux points x\n",
|
||||
" \"\"\"Retourne la fonction dérivée de f évaluée aux points x.\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" x: ndarray\n",
|
||||
" vecteurs contenant les valeurs [x0, x1, ..., xN]\n",
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" out: ndarray\n",
|
||||
" Valeur de la fonction dérivée de f aux points x\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" return -2 * x / ((1 + x**2) ** 2)\n",
|
||||
"\n",
|
||||
@@ -368,12 +360,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def splines(x, y, p0, pN):\n",
|
||||
" \"\"\"\n",
|
||||
" Retourne la matrice S de taille (4, N)\n",
|
||||
" \"\"\"Retourne la matrice S de taille (4, N).\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" x: ndarray\n",
|
||||
" vecteurs contenant les valeurs [x0, x1, ..., xN]\n",
|
||||
" y: ndarray\n",
|
||||
@@ -385,9 +375,9 @@
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" out: ndarray\n",
|
||||
" Matrice S de taille (4, N) tel que la i-ième ligne contient les valeurs a_i, b_i, c_i et d_i\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" h = x[1:] - x[:-1]\n",
|
||||
" delta_y = (y[1:] - y[:-1]) / h\n",
|
||||
@@ -420,12 +410,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def spline_eval(x, xx, S):\n",
|
||||
" \"\"\"\n",
|
||||
" Evalue une spline définie par des noeuds équirepartis\n",
|
||||
" \"\"\"Evalue une spline définie par des noeuds équirepartis.\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" x: ndarray\n",
|
||||
" noeuds définissant la spline\n",
|
||||
"\n",
|
||||
@@ -439,19 +427,18 @@
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" ndarray\n",
|
||||
" ordonnées des points d'évaluation\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" ind = (np.floor((xx - x[0]) / (x[1] - x[0]))).astype(int)\n",
|
||||
" ind = np.where(ind == x.size - 1, ind - 1, ind)\n",
|
||||
" yy = (\n",
|
||||
" return (\n",
|
||||
" S[ind, 0]\n",
|
||||
" + S[ind, 1] * (xx - x[ind])\n",
|
||||
" + S[ind, 2] * (xx - x[ind]) ** 2\n",
|
||||
" + S[ind, 3] * (xx - x[ind]) ** 3\n",
|
||||
" )\n",
|
||||
" return yy"
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -180,11 +180,11 @@
|
||||
"source": [
|
||||
"for f in [f0, f1, f2, f3]:\n",
|
||||
" print(\n",
|
||||
" f\"Calcule de I(f) par la méthode de gauss et par la formule quadratique pour la fonction {f.__name__}\"\n",
|
||||
" f\"Calcule de I(f) par la méthode de gauss et par la formule quadratique pour la fonction {f.__name__}\",\n",
|
||||
" )\n",
|
||||
" for n in range(1, 11):\n",
|
||||
" print(f\"Pour n = {n}, gauss = {gauss(f, n)} et quad = {quad(f, -1, 1)[0]}\")\n",
|
||||
" print(\"\")"
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -215,7 +215,8 @@
|
||||
"source": [
|
||||
"def simpson(f, N):\n",
|
||||
" if N % 2 == 0:\n",
|
||||
" raise ValueError(\"N doit est impair.\")\n",
|
||||
" msg = \"N doit est impair.\"\n",
|
||||
" raise ValueError(msg)\n",
|
||||
"\n",
|
||||
" h = 2 / (2 * (N - 1) // 2)\n",
|
||||
" fx = f(np.linspace(-1, 1, N))\n",
|
||||
@@ -276,11 +277,11 @@
|
||||
"source": [
|
||||
"for f in [f0, f1, f2, f3]:\n",
|
||||
" print(\n",
|
||||
" f\"Calcule de I(f) par la méthode de simpson et par la formule quadratique pour la fonction {f.__name__}\"\n",
|
||||
" f\"Calcule de I(f) par la méthode de simpson et par la formule quadratique pour la fonction {f.__name__}\",\n",
|
||||
" )\n",
|
||||
" for n in range(3, 16, 2):\n",
|
||||
" print(f\"Pour n = {n}, simpson = {simpson(f, n)} et quad = {quad(f, -1, 1)[0]}\")\n",
|
||||
" print(\"\")"
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -340,10 +341,9 @@
|
||||
"def poly_tchebychev(x, N):\n",
|
||||
" if N == 0:\n",
|
||||
" return np.ones_like(x)\n",
|
||||
" elif N == 1:\n",
|
||||
" if N == 1:\n",
|
||||
" return x\n",
|
||||
" else:\n",
|
||||
" return 2 * x * poly_tchebychev(x, N - 1) - poly_tchebychev(x, N - 2)"
|
||||
" return 2 * x * poly_tchebychev(x, N - 1) - poly_tchebychev(x, N - 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -421,7 +421,7 @@
|
||||
" print(f\"Pour N = {n}\")\n",
|
||||
" print(f\"Les points de Tchebychev sont {xk}\")\n",
|
||||
" print(f\"L'evaluation du polynome de Tchebychev Tn en ces points est {Tn}\")\n",
|
||||
" print(\"\")"
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -537,11 +537,11 @@
|
||||
"source": [
|
||||
"for f in [f0, f1, f2, f3]:\n",
|
||||
" print(\n",
|
||||
" f\"Calcule de I(f) par la méthode de fejer et par la formule quadratique pour la fonction {f.__name__}\"\n",
|
||||
" f\"Calcule de I(f) par la méthode de fejer et par la formule quadratique pour la fonction {f.__name__}\",\n",
|
||||
" )\n",
|
||||
" for n in range(1, 11):\n",
|
||||
" print(f\"Pour n = {n}, fejer = {fejer(f, n)} et quad = {quad(f, -1, 1)[0]}\")\n",
|
||||
" print(\"\")"
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -603,7 +603,10 @@
|
||||
" marker=\"+\",\n",
|
||||
" )\n",
|
||||
" ax.scatter(\n",
|
||||
" np.arange(3, N + 1, 2), np.log10(error_simp), label=\"Simpson\", marker=\"+\"\n",
|
||||
" np.arange(3, N + 1, 2),\n",
|
||||
" np.log10(error_simp),\n",
|
||||
" label=\"Simpson\",\n",
|
||||
" marker=\"+\",\n",
|
||||
" )\n",
|
||||
" ax.scatter(\n",
|
||||
" np.arange(1, N + 1),\n",
|
||||
@@ -703,8 +706,14 @@
|
||||
"print(\"-----------------------------------------------------------------------\")\n",
|
||||
"print(\n",
|
||||
" \"{:>5s} | {:>7s} {:>9s} {:>9s} {:>9s} {:>9s} {:>9s}\".format(\n",
|
||||
" \"N\", \"x^0\", \"x^2\", \"x^4\", \"x^6\", \"x^8\", \"x^10\"\n",
|
||||
" )\n",
|
||||
" \"N\",\n",
|
||||
" \"x^0\",\n",
|
||||
" \"x^2\",\n",
|
||||
" \"x^4\",\n",
|
||||
" \"x^6\",\n",
|
||||
" \"x^8\",\n",
|
||||
" \"x^10\",\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"print(\"-----------------------------------------------------------------------\")\n",
|
||||
"\n",
|
||||
@@ -715,10 +724,7 @@
|
||||
" I_exact = 2 / (k + 1) if k % 2 == 0 else 0\n",
|
||||
" approx_error = np.abs(I_approx - I_exact)\n",
|
||||
" approx_errors.append(approx_error)\n",
|
||||
" print(\n",
|
||||
" f\"{N:5d} | \"\n",
|
||||
" + \" \".join(f\"{e:.3f} \" for e in approx_errors)\n",
|
||||
" )"
|
||||
" print(f\"{N:5d} | \" + \" \".join(f\"{e:.3f} \" for e in approx_errors))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -761,8 +767,14 @@
|
||||
"print(\"-----------------------------------------------------------------------\")\n",
|
||||
"print(\n",
|
||||
" \"{:>5s} | {:>7s} {:>9s} {:>9s} {:>9s} {:>9s} {:>9s}\".format(\n",
|
||||
" \"N\", \"x^0\", \"x^2\", \"x^4\", \"x^6\", \"x^8\", \"x^10\"\n",
|
||||
" )\n",
|
||||
" \"N\",\n",
|
||||
" \"x^0\",\n",
|
||||
" \"x^2\",\n",
|
||||
" \"x^4\",\n",
|
||||
" \"x^6\",\n",
|
||||
" \"x^8\",\n",
|
||||
" \"x^10\",\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"print(\"-----------------------------------------------------------------------\")\n",
|
||||
"\n",
|
||||
@@ -773,10 +785,7 @@
|
||||
" I_exact = 2 / (k + 1) if k % 2 == 0 else 0\n",
|
||||
" approx_error = np.abs(I_approx - I_exact)\n",
|
||||
" approx_errors.append(approx_error)\n",
|
||||
" print(\n",
|
||||
" f\"{N:5d} | \"\n",
|
||||
" + \" \".join(f\"{e:.3f} \" for e in approx_errors)\n",
|
||||
" )"
|
||||
" print(f\"{N:5d} | \" + \" \".join(f\"{e:.3f} \" for e in approx_errors))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -163,7 +163,7 @@
|
||||
" [\n",
|
||||
" (a + b) / 2 + (b - a) / 2 * np.cos((2 * i - 1) / (2 * N) * np.pi)\n",
|
||||
" for i in range(1, N + 1)\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -333,6 +333,8 @@
|
||||
"source": [
|
||||
"def f(x):\n",
|
||||
" return 1 / (1 + x**2)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"a, b = -5, 5\n",
|
||||
"xx = np.linspace(a, b, 200)\n",
|
||||
"\n",
|
||||
@@ -375,6 +377,8 @@
|
||||
"source": [
|
||||
"def f(x):\n",
|
||||
" return 1 / (1 + x**2)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"a, b = -5, 5\n",
|
||||
"xx = np.linspace(a, b, 200)\n",
|
||||
"\n",
|
||||
|
||||
@@ -151,11 +151,9 @@
|
||||
" return np.ones_like(x)\n",
|
||||
" if n == 1:\n",
|
||||
" return x\n",
|
||||
" else:\n",
|
||||
" return (\n",
|
||||
" (2 * n - 1) * x * poly_legendre(x, n - 1)\n",
|
||||
" - (n - 1) * poly_legendre(x, n - 2)\n",
|
||||
" ) / n"
|
||||
" return (\n",
|
||||
" (2 * n - 1) * x * poly_legendre(x, n - 1) - (n - 1) * poly_legendre(x, n - 2)\n",
|
||||
" ) / n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -509,7 +507,7 @@
|
||||
" for f in [f0, f1]:\n",
|
||||
" print(f\"I({f.__name__}) par quad_gauss: {quad_gauss(f, n)}\")\n",
|
||||
" print(f\"I({f.__name__}) par quad: {quad(f, -1, 1)[0]}\")\n",
|
||||
" print(\"\")"
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -65,12 +65,20 @@
|
||||
"source": [
|
||||
"def f1(x):\n",
|
||||
" return np.exp(x) - 1 - x\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def f2(x):\n",
|
||||
" return x - np.sin(x)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def f3(x):\n",
|
||||
" return x + np.sin(x)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def f4(x):\n",
|
||||
" return x + np.cos(x) - 1\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def f5(x):\n",
|
||||
" return x - np.cos(x) + 1"
|
||||
]
|
||||
@@ -135,12 +143,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def point_fixe(f, x0, tol=1.0e-6, itermax=5000):\n",
|
||||
" \"\"\"\n",
|
||||
" Recherche de point fixe : méthode brute x_{n+1} = f(x_n)\n",
|
||||
" \"\"\"Recherche de point fixe : méthode brute x_{n+1} = f(x_n).\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
"\n",
|
||||
" f: function\n",
|
||||
" la fonction dont on cherche le point fixe\n",
|
||||
" x0: float\n",
|
||||
@@ -152,13 +158,13 @@
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
"\n",
|
||||
" x: float\n",
|
||||
" la valeur trouvée pour le point fixe\n",
|
||||
" niter: int\n",
|
||||
" le nombre d'itérations effectuées\n",
|
||||
" xL: ndarray\n",
|
||||
" la suite des itérés de la suite\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" xL = [x0]\n",
|
||||
" niter = 0\n",
|
||||
|
||||
@@ -135,10 +135,10 @@
|
||||
"y = np.linspace(0, 2.1, 23) # ordonnées des points de la grille\n",
|
||||
"T, Y = np.meshgrid(t, y) # grille de points dans le plan (t,y)\n",
|
||||
"U = np.ones(T.shape) / np.sqrt(\n",
|
||||
" 1 + f1(T, Y) ** 2\n",
|
||||
" 1 + f1(T, Y) ** 2,\n",
|
||||
") # matrice avec les composantes horizontales des vecteurs (1), normalisées\n",
|
||||
"V = f1(T, Y) / np.sqrt(\n",
|
||||
" 1 + f1(T, Y) ** 2\n",
|
||||
" 1 + f1(T, Y) ** 2,\n",
|
||||
") # matrice avec les composantes verticales des vecteurs (f(t,y)), normalisées\n",
|
||||
"plt.quiver(T, Y, U, V, angles=\"xy\", scale=20, color=\"cyan\")\n",
|
||||
"plt.axis([-5, 5, 0, 2.1])"
|
||||
@@ -227,10 +227,10 @@
|
||||
"y = np.linspace(ymin, ymax) # ordonnées des points de la grille\n",
|
||||
"T, Y = np.meshgrid(t, y) # grille de points dans le plan (t,y)\n",
|
||||
"U = np.ones(T.shape) / np.sqrt(\n",
|
||||
" 1 + f2(T, Y) ** 2\n",
|
||||
" 1 + f2(T, Y) ** 2,\n",
|
||||
") # matrice avec les composantes horizontales des vecteurs (1), normalisées\n",
|
||||
"V = f1(T, Y) / np.sqrt(\n",
|
||||
" 1 + f2(T, Y) ** 2\n",
|
||||
" 1 + f2(T, Y) ** 2,\n",
|
||||
") # matrice avec les composantes verticales des vecteurs (f(t,y)), normalisées\n",
|
||||
"plt.quiver(T, Y, U, V, angles=\"xy\", scale=20, color=\"cyan\")\n",
|
||||
"plt.axis([xmin, xmax, ymin, ymax])"
|
||||
@@ -484,10 +484,10 @@
|
||||
"y = np.linspace(0, K + 100, 23) # ordonnées des points de la grille\n",
|
||||
"T, P = np.meshgrid(t, y) # grille de points dans le plan (t,y)\n",
|
||||
"U = np.ones(T.shape) / np.sqrt(\n",
|
||||
" 1 + fV(T, P) ** 2\n",
|
||||
" 1 + fV(T, P) ** 2,\n",
|
||||
") # matrice avec les composantes horizontales des vecteurs (1), normalisées\n",
|
||||
"V = fV(T, P) / np.sqrt(\n",
|
||||
" 1 + fV(T, P) ** 2\n",
|
||||
" 1 + fV(T, P) ** 2,\n",
|
||||
") # matrice avec les composantes verticales des vecteurs (f(t,y)), normalisées\n",
|
||||
"plt.quiver(T, P, U, V, angles=\"xy\", scale=20, color=\"cyan\")\n",
|
||||
"plt.legend(fontsize=4)"
|
||||
@@ -570,10 +570,10 @@
|
||||
"y = np.linspace(0, 6, 23) # ordonnées des points de la grille\n",
|
||||
"T, P = np.meshgrid(t, y) # grille de points dans le plan (t,y)\n",
|
||||
"U = np.ones(T.shape) / np.sqrt(\n",
|
||||
" 1 + fS(T, P) ** 2\n",
|
||||
" 1 + fS(T, P) ** 2,\n",
|
||||
") # matrice avec les composantes horizontales des vecteurs (1), normalisées\n",
|
||||
"V = fS(T, P) / np.sqrt(\n",
|
||||
" 1 + fS(T, P) ** 2\n",
|
||||
" 1 + fS(T, P) ** 2,\n",
|
||||
") # matrice avec les composantes verticales des vecteurs (f(t,y)), normalisées\n",
|
||||
"plt.quiver(T, P, U, V, angles=\"xy\", scale=20, color=\"cyan\")"
|
||||
]
|
||||
|
||||
@@ -125,7 +125,7 @@
|
||||
" [\n",
|
||||
" (C[0] * np.exp(-(t - t0)) * U1[0] + C[1] * np.exp(-2 * (t - t0)) * U2[0]),\n",
|
||||
" (C[0] * np.exp(-(t - t0)) * U1[1] + C[1] * np.exp(-2 * (t - t0)) * U2[1]),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -224,7 +224,7 @@
|
||||
"plt.xlabel(\"$t$\")\n",
|
||||
"plt.ylabel(\"$y^n$\")\n",
|
||||
"plt.title(\n",
|
||||
" \"Solutions approchées de (P) obtenus avec mon_schema pour différentes valeurs du pas h\"\n",
|
||||
" \"Solutions approchées de (P) obtenus avec mon_schema pour différentes valeurs du pas h\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -274,7 +274,7 @@
|
||||
"plt.xlabel(\"$t$\")\n",
|
||||
"plt.ylabel(\"$|y(t_n) - y^n|$\")\n",
|
||||
"plt.title(\n",
|
||||
" \"différence en valeur absolue entre sol. exacte et sol. approchée par mon_schema, pour différentes valeurs du pas h\"\n",
|
||||
" \"différence en valeur absolue entre sol. exacte et sol. approchée par mon_schema, pour différentes valeurs du pas h\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -357,7 +357,7 @@
|
||||
"\n",
|
||||
"plt.legend()\n",
|
||||
"plt.title(\n",
|
||||
" \"Erreur pour la méthode mon_schema en echelle logarithmique : log(E) en fonction de log(h)\"\n",
|
||||
" \"Erreur pour la méthode mon_schema en echelle logarithmique : log(E) en fonction de log(h)\",\n",
|
||||
")\n",
|
||||
"plt.xlabel(\"$log(h)$\")\n",
|
||||
"plt.ylabel(\"$log(E)$\")"
|
||||
@@ -674,7 +674,7 @@
|
||||
"plt.xlabel(\"$t$\")\n",
|
||||
"plt.ylabel(\"$|y(t_n) - y^n|$\")\n",
|
||||
"plt.title(\n",
|
||||
" \"différence en valeur absolue entre sol. exacte et sol. approchée, pour différents schemas\"\n",
|
||||
" \"différence en valeur absolue entre sol. exacte et sol. approchée, pour différents schemas\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -826,7 +826,7 @@
|
||||
" plt.xlabel(\"$t$\")\n",
|
||||
" plt.ylabel(\"$y^n$\")\n",
|
||||
" plt.title(\n",
|
||||
" f\"Solutions approchées de (P) obtenus avec {schema.__name__} pour différentes valeurs du pas h\"\n",
|
||||
" f\"Solutions approchées de (P) obtenus avec {schema.__name__} pour différentes valeurs du pas h\",\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -886,7 +886,7 @@
|
||||
"\n",
|
||||
" plt.legend()\n",
|
||||
" plt.title(\n",
|
||||
" f\"Erreur pour la méthode {schema.__name__} en echelle logarithmique : log(E) en fonction de log(h)\"\n",
|
||||
" f\"Erreur pour la méthode {schema.__name__} en echelle logarithmique : log(E) en fonction de log(h)\",\n",
|
||||
" )\n",
|
||||
" plt.xlabel(\"$log(h)$\")\n",
|
||||
" plt.ylabel(\"$log(E)$\")"
|
||||
|
||||
@@ -361,7 +361,7 @@
|
||||
"plt.ylabel(r\"$\\max_{j=0,\\dots,M+1}|u(x_j)-u_j|$\")\n",
|
||||
"plt.legend(fontsize=7)\n",
|
||||
"plt.title(\n",
|
||||
" \"Différence en valeur absolue entre la solution exacte et la solution approchée\"\n",
|
||||
" \"Différence en valeur absolue entre la solution exacte et la solution approchée\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -440,7 +440,7 @@
|
||||
"plt.xlabel(\"$log(h)$\")\n",
|
||||
"plt.ylabel(\"$log(E)$\")\n",
|
||||
"plt.title(\n",
|
||||
" \"Erreur pour la méthode mon_schema en echelle logarithmique : log(E) en fonction de log(h)\"\n",
|
||||
" \"Erreur pour la méthode mon_schema en echelle logarithmique : log(E) en fonction de log(h)\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -592,7 +592,7 @@
|
||||
"plt.ylabel(r\"$\\max_{j=0,\\dots,M+1}|u(x_j)-u_j|$\")\n",
|
||||
"plt.legend(fontsize=7)\n",
|
||||
"plt.title(\n",
|
||||
" \"Différence en valeur absolue entre la solution exacte et la solution approchée\"\n",
|
||||
" \"Différence en valeur absolue entre la solution exacte et la solution approchée\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -672,7 +672,7 @@
|
||||
"plt.xlabel(\"$log(h)$\")\n",
|
||||
"plt.ylabel(\"$log(E)$\")\n",
|
||||
"plt.title(\n",
|
||||
" \"Erreur pour la méthode mon_schema en echelle logarithmique : log(E) en fonction de log(h)\"\n",
|
||||
" \"Erreur pour la méthode mon_schema en echelle logarithmique : log(E) en fonction de log(h)\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -781,7 +781,11 @@
|
||||
" x, U_app = solution_neumann(f3, M, a, b)\n",
|
||||
"\n",
|
||||
" plt.scatter(\n",
|
||||
" x, U_app, marker=\"+\", s=3, label=\"$h^2({A_N}_h + I_M)U = h^2F$ pour M={M}\"\n",
|
||||
" x,\n",
|
||||
" U_app,\n",
|
||||
" marker=\"+\",\n",
|
||||
" s=3,\n",
|
||||
" label=\"$h^2({A_N}_h + I_M)U = h^2F$ pour M={M}\",\n",
|
||||
" )\n",
|
||||
"plt.plot(x, u3(x), label=\"Solution exacte\", color=\"red\")\n",
|
||||
"plt.legend(fontsize=8)\n",
|
||||
@@ -836,7 +840,7 @@
|
||||
"plt.ylabel(r\"$\\max_{j=0,\\dots,M+1}|u(x_j)-u_j|$\")\n",
|
||||
"plt.legend(fontsize=7)\n",
|
||||
"plt.title(\n",
|
||||
" \"Différence en valeur absolue entre la solution exacte et la solution approchée\"\n",
|
||||
" \"Différence en valeur absolue entre la solution exacte et la solution approchée\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -877,7 +881,7 @@
|
||||
"plt.xlabel(\"$log(h)$\")\n",
|
||||
"plt.ylabel(\"$log(E)$\")\n",
|
||||
"plt.title(\n",
|
||||
" \"Erreur pour la méthode mon_schema en echelle logarithmique : log(E) en fonction de log(h)\"\n",
|
||||
" \"Erreur pour la méthode mon_schema en echelle logarithmique : log(E) en fonction de log(h)\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class ModeleSchelling:\n",
|
||||
" def __init__(self, M, p, L):\n",
|
||||
" def __init__(self, M, p, L) -> None:\n",
|
||||
" self.M = M\n",
|
||||
" self.p = p\n",
|
||||
" self.L = L\n",
|
||||
@@ -51,7 +51,9 @@
|
||||
" def initialisation_grille(self):\n",
|
||||
" grille = np.zeros((self.M, self.M), dtype=int)\n",
|
||||
" occupes = np.random.choice(\n",
|
||||
" self.M * self.M, size=int((1 - self.p) * self.M * self.M), replace=False\n",
|
||||
" self.M * self.M,\n",
|
||||
" size=int((1 - self.p) * self.M * self.M),\n",
|
||||
" replace=False,\n",
|
||||
" )\n",
|
||||
" self.Ntot = len(occupes)\n",
|
||||
"\n",
|
||||
@@ -61,7 +63,7 @@
|
||||
"\n",
|
||||
" return grille\n",
|
||||
"\n",
|
||||
" def afficher_grille(self, title):\n",
|
||||
" def afficher_grille(self, title) -> None:\n",
|
||||
" color = plt.imshow(self.grille, cmap=\"coolwarm\", interpolation=\"nearest\")\n",
|
||||
" plt.colorbar(color)\n",
|
||||
" plt.title(title)\n",
|
||||
@@ -82,25 +84,20 @@
|
||||
" case = self.grille[i, j]\n",
|
||||
" if case == 0:\n",
|
||||
" pass\n",
|
||||
" elif case == groupe:\n",
|
||||
" count_similaires += 1\n",
|
||||
" else:\n",
|
||||
" if case == groupe:\n",
|
||||
" count_similaires += 1\n",
|
||||
" else:\n",
|
||||
" count_differents += 1\n",
|
||||
" count_differents += 1\n",
|
||||
"\n",
|
||||
" if count_similaires + count_differents == 0:\n",
|
||||
" return False\n",
|
||||
" else:\n",
|
||||
" return (\n",
|
||||
" float(count_similaires / (count_similaires + count_differents))\n",
|
||||
" >= self.L\n",
|
||||
" )\n",
|
||||
" return float(count_similaires / (count_similaires + count_differents)) >= self.L\n",
|
||||
"\n",
|
||||
" def clusters(self):\n",
|
||||
" visited = np.zeros_like(self.grille, dtype=bool)\n",
|
||||
" clusters = []\n",
|
||||
"\n",
|
||||
" def dfs(i, j, groupe, cluster): # Depth-First Search\n",
|
||||
" def dfs(i, j, groupe, cluster) -> None: # Depth-First Search\n",
|
||||
" stack = [(i, j)]\n",
|
||||
"\n",
|
||||
" while stack:\n",
|
||||
@@ -139,7 +136,7 @@
|
||||
" S += int(clusters[i][1]) ** 2\n",
|
||||
" return S * 2 / (self.Ntot**2)\n",
|
||||
"\n",
|
||||
" def simuler(self, T=400, move_satisfaits=True):\n",
|
||||
" def simuler(self, T=400, move_satisfaits=True) -> None:\n",
|
||||
" for _t in range(1, int((1 - self.p) * self.M**2 * T)):\n",
|
||||
" agents = [\n",
|
||||
" (i, j)\n",
|
||||
@@ -179,7 +176,7 @@
|
||||
" self.grille[agent[0]][agent[1]] = 0\n",
|
||||
" cases_non_occupees.append(agent)\n",
|
||||
" self.afficher_grille(\n",
|
||||
" f\"Configuration Finale de T={T} pour (M, p, L) = ({self.M},{self.p},{self.L})\"\n",
|
||||
" f\"Configuration Finale de T={T} pour (M, p, L) = ({self.M},{self.p},{self.L})\",\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -744,7 +741,7 @@
|
||||
" plt.xlabel(\"L\")\n",
|
||||
" plt.ylabel(\"S\")\n",
|
||||
" plt.title(\n",
|
||||
" f\"Evolution du coefficient de ségrégation S en fonction de la tolérance L = {L} pour p = {p}\"\n",
|
||||
" f\"Evolution du coefficient de ségrégation S en fonction de la tolérance L = {L} pour p = {p}\",\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import scipy.stats as stats"
|
||||
"from scipy import stats"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -144,7 +144,11 @@
|
||||
" label=\"Echantillon de X - Y/2\",\n",
|
||||
")\n",
|
||||
"plt.hist(\n",
|
||||
" sampleZ / 2, bins=intervalle, density=True, alpha=0.7, label=\"Echantillon de Z\"\n",
|
||||
" sampleZ / 2,\n",
|
||||
" bins=intervalle,\n",
|
||||
" density=True,\n",
|
||||
" alpha=0.7,\n",
|
||||
" label=\"Echantillon de Z\",\n",
|
||||
")\n",
|
||||
"plt.legend()"
|
||||
]
|
||||
@@ -178,7 +182,10 @@
|
||||
" sample = np.random.binomial(k, p, nb_repl)\n",
|
||||
" intervalle = np.linspace(np.min(sample), np.max(sample), 100)\n",
|
||||
" plt.hist(\n",
|
||||
" sample, bins=intervalle, density=True, label=f\"Echantillon de X pour n={k}\"\n",
|
||||
" sample,\n",
|
||||
" bins=intervalle,\n",
|
||||
" density=True,\n",
|
||||
" label=f\"Echantillon de X pour n={k}\",\n",
|
||||
" )\n",
|
||||
" plt.legend()"
|
||||
]
|
||||
@@ -271,7 +278,7 @@
|
||||
"\n",
|
||||
"for _ in range(nb_lgn):\n",
|
||||
" liste_Sn.append(\n",
|
||||
" np.mean(np.sqrt(3 * nb_repl) * np.tan(np.pi / 2 * sample_uniforme(nb_repl)))\n",
|
||||
" np.mean(np.sqrt(3 * nb_repl) * np.tan(np.pi / 2 * sample_uniforme(nb_repl))),\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# nb_bins = 100\n",
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"import numpy as np\n",
|
||||
"import scipy.optimize as opt\n",
|
||||
"import scipy.special as sp\n",
|
||||
"import scipy.stats as stats"
|
||||
"from scipy import stats"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -220,7 +220,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def simule(a, b, n):\n",
|
||||
"def simule(a, b, n) -> None:\n",
|
||||
" X = np.random.gamma(a, 1 / b, n)\n",
|
||||
" intervalle = np.linspace(0, np.max(X), 100)\n",
|
||||
" plt.hist(X, bins=intervalle, density=True, label=\"Echantillon de X\")\n",
|
||||
|
||||
BIN
M1/General Linear Models/Projet/GLM Final report.pdf
Normal file
BIN
M1/General Linear Models/Projet/Projet_R_MLG.pdf
Normal file
@@ -1,5 +1,5 @@
|
||||
```{r}
|
||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis')
|
||||
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis")
|
||||
|
||||
library(tidyverse)
|
||||
options(scipen = 999, digits = 5)
|
||||
@@ -56,8 +56,8 @@ summary(model)
|
||||
coef(model)
|
||||
```
|
||||
```{r}
|
||||
data <- data %>%
|
||||
mutate(yhat = beta0 + beta1 * poids) %>%
|
||||
data <- data |>
|
||||
mutate(yhat = beta0 + beta1 * poids) |>
|
||||
mutate(residuals = cholesterol - yhat)
|
||||
|
||||
data
|
||||
@@ -71,8 +71,8 @@ ggplot(data, aes(x = poids, y = cholesterol)) +
|
||||
```{r}
|
||||
mean(data[, "cholesterol"])
|
||||
mean(data[, "yhat"])
|
||||
mean(data[, "residuals"]) %>% round(10)
|
||||
cov(data[, "residuals"], data[, "poids"]) %>% round(10)
|
||||
mean(data[, "residuals"]) |> round(10)
|
||||
cov(data[, "residuals"], data[, "poids"]) |> round(10)
|
||||
(RSS <- sum((data[, "residuals"])^2))
|
||||
(TSS <- sum((y - mean(y))^2))
|
||||
TSS - beta1 * Sxy
|
||||
@@ -117,10 +117,10 @@ t <- qt(0.975, dof)
|
||||
sigma_hat <- sigma(model)
|
||||
n <- nrow(data)
|
||||
|
||||
data <- data %>%
|
||||
data <- data |>
|
||||
mutate(error = t *
|
||||
sigma_hat *
|
||||
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) %>%
|
||||
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) |>
|
||||
mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL)
|
||||
|
||||
ggplot(data, aes(x = poids, y = cholesterol)) +
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
```{r}
|
||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2-bis')
|
||||
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2-bis")
|
||||
|
||||
library(tidyverse)
|
||||
library(GGally)
|
||||
@@ -10,9 +10,9 @@ library(qqplotr)
|
||||
options(scipen = 999, digits = 5)
|
||||
```
|
||||
```{r}
|
||||
data <- read.csv('data02.csv', sep = ',', header = TRUE, dec = ".")
|
||||
data %>%
|
||||
mutate(type = factor(type, levels = c("maths", "english", "final"), labels = c("maths", "english", "final"))) %>%
|
||||
data <- read.csv("data02.csv", sep = ",", header = TRUE, dec = ".")
|
||||
data |>
|
||||
mutate(type = factor(type, levels = c("maths", "english", "final"), labels = c("maths", "english", "final"))) |>
|
||||
ggplot(aes(x = note)) +
|
||||
facet_wrap(vars(type), scales = "free_x") +
|
||||
geom_histogram(binwidth = 4, color = "black", fill = "grey80") +
|
||||
@@ -21,8 +21,8 @@ data %>%
|
||||
```
|
||||
```{r}
|
||||
data_wide <- pivot_wider(data, names_from = type, values_from = note)
|
||||
data_wide %>%
|
||||
select(-id) %>%
|
||||
data_wide |>
|
||||
select(-id) |>
|
||||
ggpairs() + theme_bw(14)
|
||||
```
|
||||
```{r}
|
||||
@@ -67,12 +67,12 @@ linearHypothesis(model, "maths - english = 0")
|
||||
|
||||
# Submodel testing
|
||||
```{r}
|
||||
data_predict <- predict(model, newdata = expand.grid(maths = seq(70, 90, 2), english = c(75, 85)), interval = "confidence") %>%
|
||||
as_tibble() %>%
|
||||
data_predict <- predict(model, newdata = expand.grid(maths = seq(70, 90, 2), english = c(75, 85)), interval = "confidence") |>
|
||||
as_tibble() |>
|
||||
bind_cols(expand.grid(maths = seq(70, 90, 2), english = c(75, 85)))
|
||||
|
||||
data_predict %>%
|
||||
mutate(english = as.factor(english)) %>%
|
||||
data_predict |>
|
||||
mutate(english = as.factor(english)) |>
|
||||
ggplot(aes(x = maths, y = fit, color = english, fill = english, label = round(fit, 1))) +
|
||||
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, show.legend = FALSE) +
|
||||
geom_point(size = 2) +
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
```{r}
|
||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2')
|
||||
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2")
|
||||
```
|
||||
|
||||
# Question 1 : Import dataset and check variables
|
||||
@@ -9,8 +9,8 @@ library(dplyr)
|
||||
cepages <- read.csv("Cepages B TP2.csv", header = TRUE, sep = ";", dec = ",")
|
||||
cepages$Couleur <- as.factor(cepages$Couleur)
|
||||
cepages$Origine <- as.factor(cepages$Origine)
|
||||
cepages <- cepages %>% mutate(across(where(is.character), as.numeric))
|
||||
cepages <- cepages %>% mutate(across(where(is.integer), as.numeric))
|
||||
cepages <- cepages |> mutate(across(where(is.character), as.numeric))
|
||||
cepages <- cepages |> mutate(across(where(is.integer), as.numeric))
|
||||
paged_table(cepages)
|
||||
```
|
||||
|
||||
@@ -39,7 +39,7 @@ tapply(cepages$pH, list(cepages$Couleur, cepages$Origine), mean)
|
||||
library(ggplot2)
|
||||
|
||||
ggplot(cepages, aes(x = AcTot, y = pH, color = Couleur)) +
|
||||
geom_point(col = 'red', size = 0.5) +
|
||||
geom_point(col = "red", size = 0.5) +
|
||||
geom_smooth(method = "lm", se = F)
|
||||
|
||||
ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) +
|
||||
@@ -50,8 +50,8 @@ ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) +
|
||||
|
||||
```{r}
|
||||
ggplot(cepages, aes(x = AcTot, y = pH, color = Origine)) +
|
||||
geom_smooth(method = 'lm', se = F) +
|
||||
geom_point(col = 'red', size = 0.5)
|
||||
geom_smooth(method = "lm", se = F) +
|
||||
geom_point(col = "red", size = 0.5)
|
||||
|
||||
ggplot(cepages, aes(y = pH, x = AcTot, colour = Origine, fill = Origine)) +
|
||||
geom_boxplot(alpha = 0.5, outlier.alpha = 0)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
```{r}
|
||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3')
|
||||
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3")
|
||||
```
|
||||
|
||||
# Question 1 : Import dataset and check variables
|
||||
@@ -9,8 +9,8 @@ library(dplyr)
|
||||
ozone <- read.table("ozone.txt", header = TRUE, sep = " ", dec = ".")
|
||||
ozone$vent <- as.factor(ozone$vent)
|
||||
ozone$temps <- as.factor(ozone$temps)
|
||||
ozone <- ozone %>% mutate(across(where(is.character), as.numeric))
|
||||
ozone <- ozone %>% mutate(across(where(is.integer), as.numeric))
|
||||
ozone <- ozone |> mutate(across(where(is.character), as.numeric))
|
||||
ozone <- ozone |> mutate(across(where(is.integer), as.numeric))
|
||||
paged_table(ozone)
|
||||
```
|
||||
|
||||
@@ -25,8 +25,8 @@ summary(model_T12)
|
||||
library(ggplot2)
|
||||
|
||||
ggplot(ozone, aes(x = T12, y = maxO3)) +
|
||||
geom_smooth(method = 'lm', se = T) +
|
||||
geom_point(col = 'red', size = 0.5) +
|
||||
geom_smooth(method = "lm", se = T) +
|
||||
geom_point(col = "red", size = 0.5) +
|
||||
labs(title = "maxO3 ~ T12") +
|
||||
theme_minimal()
|
||||
```
|
||||
@@ -130,5 +130,4 @@ new_obs <- list(
|
||||
maxO3v = 85
|
||||
)
|
||||
predict(model_backward, new_obs, interval = "confidence")
|
||||
|
||||
```
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
```{r}
|
||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP4')
|
||||
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP4")
|
||||
|
||||
set.seed(0911)
|
||||
library(ggplot2)
|
||||
@@ -22,19 +22,19 @@ library(lmtest) # LRtest
|
||||
library(survey) # Wald test
|
||||
library(vcdExtra) # deviance test
|
||||
|
||||
library(rsample) # for data splitting
|
||||
library(rsample) # for data splitting
|
||||
library(glmnet)
|
||||
library(nnet) # multinom, glm
|
||||
library(caret)
|
||||
library(ROCR)
|
||||
#library(PRROC) autre package pour courbe roc et courbe pr
|
||||
# library(PRROC) autre package pour courbe roc et courbe pr
|
||||
library(ISLR) # dataset for statistical learning
|
||||
|
||||
ggplot2::theme_set(ggplot2::theme_light())# Set the graphical theme
|
||||
ggplot2::theme_set(ggplot2::theme_light()) # Set the graphical theme
|
||||
```
|
||||
```{r}
|
||||
car <- read.table('car_income.txt', header = TRUE, sep = ';')
|
||||
car %>% rmarkdown::paged_table()
|
||||
car <- read.table("car_income.txt", header = TRUE, sep = ";")
|
||||
car |> rmarkdown::paged_table()
|
||||
summary(car)
|
||||
```
|
||||
|
||||
@@ -44,7 +44,7 @@ summary(model_purchase)
|
||||
```
|
||||
|
||||
```{r}
|
||||
p1 <- car %>%
|
||||
p1 <- car |>
|
||||
ggplot(aes(y = purchase, x = income + age)) +
|
||||
geom_point(alpha = .15) +
|
||||
geom_smooth(method = "lm") +
|
||||
@@ -53,7 +53,7 @@ p1 <- car %>%
|
||||
ylab("Probability of Purchase")
|
||||
|
||||
|
||||
p2 <- car %>%
|
||||
p2 <- car |>
|
||||
ggplot(aes(y = purchase, x = income + age)) +
|
||||
geom_point(alpha = .15) +
|
||||
geom_smooth(method = "glm", method.args = list(family = "binomial")) +
|
||||
@@ -66,9 +66,9 @@ ggplotly(p2)
|
||||
```
|
||||
|
||||
```{r}
|
||||
car <- car %>%
|
||||
car <- car |>
|
||||
mutate(old = ifelse(car$age > 3, 1, 0))
|
||||
car <- car %>%
|
||||
car <- car |>
|
||||
mutate(rich = ifelse(car$income > 40, 1, 0))
|
||||
model_old <- glm(purchase ~ age + income + rich + old, data = car, family = "binomial")
|
||||
summary(model_old)
|
||||
@@ -90,5 +90,5 @@ pima.te$pred <- as.factor(pima.te$pred)
|
||||
pima.te$type <- as.factor(pima.te$type)
|
||||
|
||||
# Confusion matrix
|
||||
confusionMatrix(data = pima.te$type, reference = pima.te$pred, positive = 'Yes')
|
||||
confusionMatrix(data = pima.te$type, reference = pima.te$pred, positive = "Yes")
|
||||
```
|
||||
|
||||
@@ -184,7 +184,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def F(y, t, a, r):\n",
|
||||
" S, I, R = y\n",
|
||||
" S, I, _R = y\n",
|
||||
" dS = -r * S * I\n",
|
||||
" dI = r * S * I - a * I\n",
|
||||
" dR = a * I\n",
|
||||
@@ -338,7 +338,7 @@
|
||||
" return np.max(\n",
|
||||
" np.power(np.abs(sol_appr - sol_exact), 2)[\n",
|
||||
" np.isfinite(np.power(np.abs(sol_appr - sol_exact), 2))\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"from sklearn.datasets import make_classification\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
@@ -47,12 +48,18 @@
|
||||
"\n",
|
||||
"for _ in range(10):\n",
|
||||
" X, y = make_classification(\n",
|
||||
" n_samples=1000, n_features=4, n_classes=3, n_clusters_per_class=1\n",
|
||||
" n_samples=1000,\n",
|
||||
" n_features=4,\n",
|
||||
" n_classes=3,\n",
|
||||
" n_clusters_per_class=1,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
|
||||
" model = MLPClassifier(\n",
|
||||
" hidden_layer_sizes=(5, 7), activation=\"relu\", max_iter=10000, solver=\"adam\"\n",
|
||||
" hidden_layer_sizes=(5, 7),\n",
|
||||
" activation=\"relu\",\n",
|
||||
" max_iter=10000,\n",
|
||||
" solver=\"adam\",\n",
|
||||
" )\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import scipy.stats as stats"
|
||||
"from scipy import stats"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -46,15 +46,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def S(t, S0, mu, sigma, W):\n",
|
||||
" \"\"\"\n",
|
||||
" Solution exacte de l'EDS de Black-Scholes\n",
|
||||
" \"\"\"\n",
|
||||
" \"\"\"Solution exacte de l'EDS de Black-Scholes.\"\"\"\n",
|
||||
" return S0 * np.exp((mu - 0.5 * sigma**2) * t + sigma * W)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def euler_maruyama(mu, sigma, T, N, X0=0.0):\n",
|
||||
" \"\"\"\n",
|
||||
" Simulation d'une EDS de Black-Scholes par la méthode d'Euler-Maruyama\n",
|
||||
" \"\"\"Simulation d'une EDS de Black-Scholes par la méthode d'Euler-Maruyama.\n",
|
||||
"\n",
|
||||
" Paramètres :\n",
|
||||
" mu (float) : drift\n",
|
||||
@@ -83,9 +80,8 @@
|
||||
" return t, X\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def plot_brownien(t, X, B=None):\n",
|
||||
" \"\"\"\n",
|
||||
" Plot la simulation d'Euler-Maruyama\n",
|
||||
"def plot_brownien(t, X, B=None) -> None:\n",
|
||||
" \"\"\"Plot la simulation d'Euler-Maruyama.\n",
|
||||
"\n",
|
||||
" Paramètres :\n",
|
||||
" t (array-like) : tableau des temps\n",
|
||||
@@ -168,9 +164,8 @@
|
||||
"np.random.seed(333)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def plot_convergence(S0, mu, sigma, T):\n",
|
||||
" \"\"\"\n",
|
||||
" Plot la convergence du schéma d'Euler-Maruyama\n",
|
||||
"def plot_convergence(S0, mu, sigma, T) -> None:\n",
|
||||
" \"\"\"Plot la convergence du schéma d'Euler-Maruyama.\n",
|
||||
"\n",
|
||||
" Paramètres :\n",
|
||||
" S0 (int) : valeur initiale\n",
|
||||
@@ -276,7 +271,7 @@
|
||||
"\n",
|
||||
"def is_barrier_breached(X, B):\n",
|
||||
" \"\"\"Renvoie True si la barrière est franchie, False sinon\n",
|
||||
" La barrière est franchie si X >= B\n",
|
||||
" La barrière est franchie si X >= B.\n",
|
||||
"\n",
|
||||
" Paramètres:\n",
|
||||
" X (array-like): Trajectoire des valeurs\n",
|
||||
@@ -291,7 +286,7 @@
|
||||
"print(\n",
|
||||
" \"La barrière a été franchie\"\n",
|
||||
" if is_barrier_breached(X, B)\n",
|
||||
" else \"La barrière n'a pas été franchie\"\n",
|
||||
" else \"La barrière n'a pas été franchie\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -302,8 +297,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def plot_browniens(trajectories, B):\n",
|
||||
" \"\"\"Trace les trajectoires de Brownien et la barrière\n",
|
||||
"def plot_browniens(trajectories, B) -> None:\n",
|
||||
" \"\"\"Trace les trajectoires de Brownien et la barrière.\n",
|
||||
"\n",
|
||||
" Paramètres:\n",
|
||||
" trajectories (list of tuples): Liste des trajectoires avec le temps et les valeurs\n",
|
||||
@@ -335,8 +330,7 @@
|
||||
" \"\"\"\n",
|
||||
" if not is_barrier_breached(X, B):\n",
|
||||
" return max(X[-1] - K, 0)\n",
|
||||
" else:\n",
|
||||
" return 0\n",
|
||||
" return 0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def call_BS(x):\n",
|
||||
@@ -457,7 +451,7 @@
|
||||
"np.random.seed(333)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def plot_payoff_errors():\n",
|
||||
"def plot_payoff_errors() -> None:\n",
|
||||
" \"\"\"Trace l'erreur de convergence du payoff actualisé en fonction de N.\"\"\"\n",
|
||||
" errors = []\n",
|
||||
"\n",
|
||||
@@ -515,7 +509,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.3"
|
||||
"version": "3.13.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
" for h in h_list:\n",
|
||||
" t = np.arange(a, b, h)\n",
|
||||
" y = np.array(\n",
|
||||
" [3 / 4 * h * f(t[i] + h / 3) + h / 4 * f(t[i] + h) for i in range(len(t))]\n",
|
||||
" [3 / 4 * h * f(t[i] + h / 3) + h / 4 * f(t[i] + h) for i in range(len(t))],\n",
|
||||
" )\n",
|
||||
" I_approx = np.sum(y)\n",
|
||||
" I.append(I_approx)\n",
|
||||
@@ -159,11 +159,15 @@
|
||||
"\n",
|
||||
" for n in range(N - 1):\n",
|
||||
" p1 = f(vt[n], yn[:, n])\n",
|
||||
"\n",
|
||||
" def F1(p2):\n",
|
||||
" return f(vt[n] + h / 3, yn[:, n] + h / 6 * (p1 + p2)) - p2\n",
|
||||
"\n",
|
||||
" p2 = newton(F1, yn[:, n], fprime=None, tol=tol, maxiter=itmax)\n",
|
||||
"\n",
|
||||
" def F2(yn1):\n",
|
||||
" return yn[:, n] + h / 4 * (3 * p2 + f(vt[n + 1], yn1)) - yn1\n",
|
||||
"\n",
|
||||
" yn[:, n + 1] = newton(F2, yn[:, n], fprime=None, tol=tol, maxiter=itmax)\n",
|
||||
" return yn"
|
||||
]
|
||||
@@ -322,7 +326,7 @@
|
||||
" 1 + np.power(x, 2) * y - (z + 1) * x,\n",
|
||||
" x * z - np.power(x, 2) * y,\n",
|
||||
" -x * z + 1.45,\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -408,7 +412,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.3"
|
||||
"version": "3.13.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -682,7 +682,7 @@
|
||||
" [\n",
|
||||
" (F(x + delta * e(i, d)) - F(x - delta * e(i, d))) / (2 * delta)\n",
|
||||
" for i in range(d)\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -951,7 +951,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.3"
|
||||
"version": "3.13.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -66,6 +66,8 @@
|
||||
"\n",
|
||||
"def f(x):\n",
|
||||
" return np.tanh(x)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"aL, aR = -20, 3\n",
|
||||
"print(dichotomy(f, aL, aR))"
|
||||
]
|
||||
@@ -135,9 +137,15 @@
|
||||
"\n",
|
||||
"def f(x):\n",
|
||||
" return np.log(np.exp(x) + np.exp(-x))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"x0 = 1.8\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def df(x):\n",
|
||||
" return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(Newton(f, df, x0))"
|
||||
]
|
||||
},
|
||||
@@ -188,6 +196,8 @@
|
||||
"\n",
|
||||
"def f(x):\n",
|
||||
" return np.log(np.exp(x) + np.exp(-x))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"xx = [(1, 1.9), (1, 2.3), (1, 2.4)]\n",
|
||||
"\n",
|
||||
"for x0, x1 in xx:\n",
|
||||
@@ -265,8 +275,12 @@
|
||||
"\n",
|
||||
"def f(x):\n",
|
||||
" return np.log(np.exp(x) + np.exp(-x))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def df(x):\n",
|
||||
" return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(DichotomyNewton(f, df, -20, 3))"
|
||||
]
|
||||
},
|
||||
@@ -370,7 +384,7 @@
|
||||
"optimal_point_newton, iterations_newton = newton_method(initial_guess_newton)\n",
|
||||
"print(f\"Optimal point (Newton): {optimal_point_newton}\")\n",
|
||||
"print(\n",
|
||||
" f\"Objective function value at optimal point (Newton): {objective_function(optimal_point_newton)}\"\n",
|
||||
" f\"Objective function value at optimal point (Newton): {objective_function(optimal_point_newton)}\",\n",
|
||||
")\n",
|
||||
"print(f\"Number of iterations (Newton): {iterations_newton}\")\n",
|
||||
"\n",
|
||||
@@ -381,7 +395,7 @@
|
||||
"optimal_point_dichotomy, iterations_dichotomy = dichotomy_method(aL, aR)\n",
|
||||
"print(f\"Optimal point (Dichotomy): {optimal_point_dichotomy}\")\n",
|
||||
"print(\n",
|
||||
" f\"Objective function value at optimal point (Dichotomy): {objective_function(optimal_point_dichotomy)}\"\n",
|
||||
" f\"Objective function value at optimal point (Dichotomy): {objective_function(optimal_point_dichotomy)}\",\n",
|
||||
")\n",
|
||||
"print(f\"Number of iterations (Dichotomy): {iterations_dichotomy}\")"
|
||||
]
|
||||
|
||||
@@ -46,7 +46,7 @@
|
||||
"def generate_thetas(n):\n",
|
||||
" random_steps = np.random.random(n)\n",
|
||||
" return np.concatenate(\n",
|
||||
" ([0], np.cumsum(random_steps / np.sum(random_steps) * (2 * np.pi)))\n",
|
||||
" ([0], np.cumsum(random_steps / np.sum(random_steps) * (2 * np.pi))),\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -248,7 +248,7 @@
|
||||
" return result.x\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def plot_perimeter(n):\n",
|
||||
"def plot_perimeter(n) -> None:\n",
|
||||
" optimal_angles = optimize_polygon(n + 1)\n",
|
||||
" plt.figure(figsize=(7, 7))\n",
|
||||
" t = np.linspace(0, 2 * np.pi, 100)\n",
|
||||
@@ -503,7 +503,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.3"
|
||||
"version": "3.13.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -27,9 +27,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import yfinance as yf\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import yfinance as yf"
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -406,7 +407,7 @@
|
||||
"print(f\"Standard deviation sd_T: {sd_T}\")\n",
|
||||
"print(f\"Allocation pi_T: {pi_T}\")\n",
|
||||
"print(\n",
|
||||
" f\"We can verify that the allocation is possible as the sum of the allocations for the different indices is {sum(pi_T)}, that is very close to 1\"\n",
|
||||
" f\"We can verify that the allocation is possible as the sum of the allocations for the different indices is {sum(pi_T)}, that is very close to 1\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -452,9 +453,9 @@
|
||||
"for i in range(len(std)):\n",
|
||||
" print(f\"The annualized volatilities of the index {Tickers[i]} is {std[i]}\")\n",
|
||||
" print(\n",
|
||||
" f\"The annualized expected returns of the index {Tickers[i]} is {mean[Tickers[i]]}\"\n",
|
||||
" f\"The annualized expected returns of the index {Tickers[i]} is {mean[Tickers[i]]}\",\n",
|
||||
" )\n",
|
||||
" print(\"\")\n",
|
||||
" print()\n",
|
||||
"\n",
|
||||
"print(f\"The annualized volatility of the Tangent Portfolio is {sd_T * np.sqrt(252)}\")\n",
|
||||
"print(f\"The annualized expected return of the Tangent Portfolio is {m_T * 252}\")"
|
||||
@@ -494,7 +495,7 @@
|
||||
"\n",
|
||||
"for i in range(4):\n",
|
||||
" print(\n",
|
||||
" f\"the sharpe ratio of the index {Tickers[i]} is {(mean[Tickers[i]] - r) / std[i]}\"\n",
|
||||
" f\"the sharpe ratio of the index {Tickers[i]} is {(mean[Tickers[i]] - r) / std[i]}\",\n",
|
||||
" )"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -13,9 +13,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import yfinance as yf\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import yfinance as yf"
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -530,7 +531,7 @@
|
||||
"\n",
|
||||
"# Self financing portfolio\n",
|
||||
"m_w = np.sqrt(\n",
|
||||
" (mean - b / a * vec1).T.dot(inv_sigma).dot(mean - b / a * vec1)\n",
|
||||
" (mean - b / a * vec1).T.dot(inv_sigma).dot(mean - b / a * vec1),\n",
|
||||
") # Expected return\n",
|
||||
"\n",
|
||||
"# Tangent portfolio\n",
|
||||
@@ -580,7 +581,7 @@
|
||||
"range_sup = np.max(mean) + 1\n",
|
||||
"y = np.linspace(range_inf, range_sup, 50)\n",
|
||||
"x_1 = np.array(\n",
|
||||
" [np.sqrt(((y - m_a) / m_w) ** 2 + sd_a**2)]\n",
|
||||
" [np.sqrt(((y - m_a) / m_w) ** 2 + sd_a**2)],\n",
|
||||
") # Sigma values for the frontier\n",
|
||||
"x_2 = np.array([(y - r) / (m_T - r) * sd_T]) # Sigma values for the Capital Market Line\n",
|
||||
"\n",
|
||||
|
||||
@@ -902,7 +902,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def divisible_by_3_and13(n):\n",
|
||||
"def divisible_by_3_and13(n) -> None:\n",
|
||||
" if n % 3 == 0 and n % 13 == 0:\n",
|
||||
" print(n, \"is divisible by 3 and 13\")\n",
|
||||
" else:\n",
|
||||
@@ -1114,7 +1114,12 @@
|
||||
"R = multivariate_normal([0, 0], np.eye(2))\n",
|
||||
"\n",
|
||||
"surf = ax.plot_surface(\n",
|
||||
" X, Y, R.pdf(np.dstack((X, Y))), cmap=\"coolwarm\", linewidth=0, antialiased=False\n",
|
||||
" X,\n",
|
||||
" Y,\n",
|
||||
" R.pdf(np.dstack((X, Y))),\n",
|
||||
" cmap=\"coolwarm\",\n",
|
||||
" linewidth=0,\n",
|
||||
" antialiased=False,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.colorbar(surf, shrink=0.5, aspect=5)\n",
|
||||
|
||||
@@ -238,7 +238,10 @@
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.33, random_state=42\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" test_size=0.33,\n",
|
||||
" random_state=42,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -702,10 +705,10 @@
|
||||
"predictions2 = [knn_class_2(X_train, y_train, data, 3) for data in X_test]\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" f\"The accuracy rate of our classifier is {np.sum(predictions == y_test) / len(predictions) * 100}%\"\n",
|
||||
" f\"The accuracy rate of our classifier is {np.sum(predictions == y_test) / len(predictions) * 100}%\",\n",
|
||||
")\n",
|
||||
"print(\n",
|
||||
" f\"The accuracy rate of our classifier is {np.sum(predictions2 == y_test) / len(predictions2) * 100}%\"\n",
|
||||
" f\"The accuracy rate of our classifier is {np.sum(predictions2 == y_test) / len(predictions2) * 100}%\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -1278,6 +1281,7 @@
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"from sklearn.neighbors import KNeighborsClassifier"
|
||||
]
|
||||
},
|
||||
@@ -1412,7 +1416,10 @@
|
||||
"f, axarr = plt.subplots(2, 3, sharex=\"col\", sharey=\"row\", figsize=(15, 12))\n",
|
||||
"\n",
|
||||
"for idx, clf, tt in zip(\n",
|
||||
" product([0, 1, 2], [0, 1, 2]), KNNs, [f\"KNN (k={k})\" for k in nb_neighbors], strict=False\n",
|
||||
" product([0, 1, 2], [0, 1, 2]),\n",
|
||||
" KNNs,\n",
|
||||
" [f\"KNN (k={k})\" for k in nb_neighbors],\n",
|
||||
" strict=False,\n",
|
||||
"):\n",
|
||||
" Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
|
||||
" Z = Z.reshape(xx.shape)\n",
|
||||
@@ -2091,7 +2098,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.33, random_state=42\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" test_size=0.33,\n",
|
||||
" random_state=42,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from sklearn import datasets\n",
|
||||
"\n",
|
||||
"iris = datasets.load_iris(as_frame=True)"
|
||||
@@ -402,7 +403,10 @@
|
||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.33, random_state=42\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" test_size=0.33,\n",
|
||||
" random_state=42,\n",
|
||||
")\n",
|
||||
"knn_clf = KNeighborsClassifier(n_neighbors=5)\n",
|
||||
"knn_clf.fit(X_train, y_train)\n",
|
||||
@@ -583,7 +587,11 @@
|
||||
],
|
||||
"source": [
|
||||
"X_train_strat, X_test_strat, y_train_strat, y_test_strat = train_test_split(\n",
|
||||
" X, y, test_size=0.33, random_state=42, stratify=y\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" test_size=0.33,\n",
|
||||
" random_state=42,\n",
|
||||
" stratify=y,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"y_test_strat.value_counts()"
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"from sklearn.linear_model import LogisticRegression"
|
||||
]
|
||||
},
|
||||
@@ -192,7 +193,10 @@
|
||||
"plt.figure(figsize=(8, 8))\n",
|
||||
"\n",
|
||||
"plt.scatter(\n",
|
||||
" X[:, 0], X[:, 1], alpha=0.3, cmap=mcolors.ListedColormap([\"steelblue\", \"tomato\"])\n",
|
||||
" X[:, 0],\n",
|
||||
" X[:, 1],\n",
|
||||
" alpha=0.3,\n",
|
||||
" cmap=mcolors.ListedColormap([\"steelblue\", \"tomato\"]),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -708,20 +712,22 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def mini_batch_SGD(X, y, learning_rate, batch_size, epochs):\n",
|
||||
" \"\"\"\n",
|
||||
" Mini-batch stochastic gradient descent for logistic regression.\n",
|
||||
" \"\"\"Mini-batch stochastic gradient descent for logistic regression.\n",
|
||||
"\n",
|
||||
" Parameters:\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
" X (numpy.ndarray): Input data of shape (n, d), where n is the number of samples and d is the number of features.\n",
|
||||
" y (numpy.ndarray): Labels of shape (n,), where n is the sample size.\n",
|
||||
" learning_rate (float): Learning rate for gradient descent.\n",
|
||||
" batch_size (int): Size of each mini-batch.\n",
|
||||
" epochs (int): Number of epochs to train.\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
" w (numpy.ndarray): Final weight vector of shape (d,).\n",
|
||||
" b (float): Final bias term.\n",
|
||||
" costs_SGD (list): Cost function values at each step.\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" # Initialization\n",
|
||||
" n, d = X.shape\n",
|
||||
@@ -40783,7 +40789,11 @@
|
||||
],
|
||||
"source": [
|
||||
"w_SGD, b_SGD, cost_SGD = mini_batch_SGD(\n",
|
||||
" X, y, learning_rate=5e-5, batch_size=50, epochs=1000\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" learning_rate=5e-5,\n",
|
||||
" batch_size=50,\n",
|
||||
" epochs=1000,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -40950,7 +40960,11 @@
|
||||
],
|
||||
"source": [
|
||||
"w_SGD, b_SGD, costs_SGD = mini_batch_SGD(\n",
|
||||
" X, y, learning_rate=5e-4, batch_size=1000, epochs=30\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" learning_rate=5e-4,\n",
|
||||
" batch_size=1000,\n",
|
||||
" epochs=30,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"The parameters computed by stochastic gradient descent are: \", w_SGD, b_SGD)"
|
||||
@@ -41041,7 +41055,10 @@
|
||||
"\n",
|
||||
"X, y = iris.data, iris.target\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.33, random_state=5\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" test_size=0.33,\n",
|
||||
" random_state=5,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"log_reg = LogisticRegression(fit_intercept=True)\n",
|
||||
@@ -41194,7 +41211,7 @@
|
||||
"model.add(tf.keras.layers.Input(shape=[28, 28])) # we specify the input shape\n",
|
||||
"model.add(tf.keras.layers.Flatten()) # we flatten the data\n",
|
||||
"model.add(\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
") # 10 labels (figures from 0 to 9)\n",
|
||||
"# activation=\"softmax\" as it is a multiclass problem"
|
||||
]
|
||||
@@ -41237,7 +41254,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"sgd\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -898,9 +898,9 @@
|
||||
"ex = pd.DataFrame(\n",
|
||||
" {\n",
|
||||
" \"nom\": [\"Alice\", \"Nicolas\", \"Jean\"],\n",
|
||||
" \"age\": [19, np.NaN, np.NaN],\n",
|
||||
" \"exam\": [15, 14, np.NaN],\n",
|
||||
" }\n",
|
||||
" \"age\": [19, np.nan, np.nan],\n",
|
||||
" \"exam\": [15, 14, np.nan],\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"data : \\n\", ex)\n",
|
||||
@@ -1084,7 +1084,7 @@
|
||||
],
|
||||
"source": [
|
||||
"# We remove the players for whom Salary is missing\n",
|
||||
"hitters.dropna(subset=[\"Salary\"], inplace=True)\n",
|
||||
"hitters = hitters.dropna(subset=[\"Salary\"])\n",
|
||||
"\n",
|
||||
"X = hitters.select_dtypes(include=int)\n",
|
||||
"Y = hitters[\"Salary\"]\n",
|
||||
@@ -2299,7 +2299,8 @@
|
||||
"\n",
|
||||
"linReg = LinearRegression()\n",
|
||||
"linReg.fit(\n",
|
||||
" Xtrain, Ytrain\n",
|
||||
" Xtrain,\n",
|
||||
" Ytrain,\n",
|
||||
") # no need to scale for OLS if you just want to predict (unless the solver works best with scaled data)\n",
|
||||
"# the predictions should not be different with or without standardization (could differ only owing to numerical problems)\n",
|
||||
"hatY_LinReg = linReg.predict(Xtest)\n",
|
||||
@@ -2545,7 +2546,9 @@
|
||||
"\n",
|
||||
"MSEs = []\n",
|
||||
"for name, estimator in zip(\n",
|
||||
" [\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"], [lassoCV, lassoBIC, ridgeCV, linReg], strict=False\n",
|
||||
" [\"LassoCV\", \"LassoBIC\", \"RidgeCV\", \"OLS\"],\n",
|
||||
" [lassoCV, lassoBIC, ridgeCV, linReg],\n",
|
||||
" strict=False,\n",
|
||||
"):\n",
|
||||
" y_pred = estimator.predict(Xtest)\n",
|
||||
" MSE = mean_squared_error(Ytest, y_pred)\n",
|
||||
|
||||
@@ -227,7 +227,8 @@
|
||||
"source": [
|
||||
"sms = pd.read_csv(\"data/spam.csv\", encoding=\"latin\")\n",
|
||||
"\n",
|
||||
"sms.head()"
|
||||
"sms.head()\n",
|
||||
"sms = "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -243,7 +244,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sms.rename(columns={\"v1\": \"Label\", \"v2\": \"Text\"}, inplace=True)"
|
||||
"sms.rename(columns={\"v1\": \"Label\", \"v2\": \"Text\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1246,14 +1247,15 @@
|
||||
"# 2. Displaying the vectors :\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" \"2. The vectors corresponding to the sms are : \\n\", X.toarray()\n",
|
||||
" \"2. The vectors corresponding to the sms are : \\n\",\n",
|
||||
" X.toarray(),\n",
|
||||
") # X.toarray because\n",
|
||||
"# X is a \"sparse\" matrix.\n",
|
||||
"\n",
|
||||
"# 3. For a new data x_0=\"iphone gratuit\",\n",
|
||||
"# you must also transform x_0 into a numerical vector before predicting.\n",
|
||||
"\n",
|
||||
"vec_x_0 = vec.transform([\"iphone gratuit\"]).toarray() #\n",
|
||||
"vec_x_0 = vec.transform([\"iphone gratuit\"]).toarray()\n",
|
||||
"print(\"3. The numerical vector corresponding to (x_0=iphone gratuit) is \\n\", vec_x_0)"
|
||||
]
|
||||
},
|
||||
@@ -1410,7 +1412,10 @@
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.30, random_state=50\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" test_size=0.30,\n",
|
||||
" random_state=50,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"size of the training set: \", X_train.shape[0])\n",
|
||||
@@ -1986,7 +1991,7 @@
|
||||
" \"Iphone 15 is now free\",\n",
|
||||
" \"I want coffee\",\n",
|
||||
" \"I want to buy a new iphone\",\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"pred_my_sms = sms_bayes.predict(my_sms)\n",
|
||||
@@ -2055,7 +2060,10 @@
|
||||
"X_copy = (X.copy() >= 127).astype(int)\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X_copy, y, test_size=0.25, random_state=42\n",
|
||||
" X_copy,\n",
|
||||
" y,\n",
|
||||
" test_size=0.25,\n",
|
||||
" random_state=42,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ber_bayes = BernoulliNB()\n",
|
||||
@@ -2199,7 +2207,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.3"
|
||||
"version": "3.13.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -92,6 +92,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import tensorflow as tf\n",
|
||||
"\n",
|
||||
"tf.keras.utils.set_random_seed(42)\n",
|
||||
@@ -346,7 +347,7 @@
|
||||
" tf.keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -691,7 +692,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -1101,11 +1104,13 @@
|
||||
" tf.keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"model_10.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"model_10.fit(X_train01, y_train, epochs=10, validation_data=(X_val01, y_val))"
|
||||
@@ -1270,7 +1275,8 @@
|
||||
],
|
||||
"source": [
|
||||
"early_stopping_cb = tf.keras.callbacks.EarlyStopping(\n",
|
||||
" patience=5, restore_best_weights=True\n",
|
||||
" patience=5,\n",
|
||||
" restore_best_weights=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"model = tf.keras.Sequential(\n",
|
||||
@@ -1280,11 +1286,13 @@
|
||||
" tf.keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"model.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"history2 = model.fit(\n",
|
||||
@@ -1598,10 +1606,12 @@
|
||||
" tf.keras.layers.Input(shape=[28, 28]),\n",
|
||||
" tf.keras.layers.Flatten(),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"reg_log.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")\n",
|
||||
"reg_log.fit(X_train01, y_train, epochs=90, validation_data=(X_val01, y_val))"
|
||||
]
|
||||
@@ -1709,10 +1719,12 @@
|
||||
" tf.keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"model_ter.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")\n",
|
||||
"model_ter.fit(X_train, y_train, epochs=30, validation_data=(X_val, y_val))"
|
||||
]
|
||||
@@ -1820,10 +1832,12 @@
|
||||
" tf.keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"model_5.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"X_train_far_too_small, X_val_far_too_small = X_train / 25500.0, X_val / 25500.0\n",
|
||||
@@ -1938,16 +1952,22 @@
|
||||
" tf.keras.layers.Input(shape=[28, 28]),\n",
|
||||
" tf.keras.layers.Flatten(),\n",
|
||||
" tf.keras.layers.Dense(\n",
|
||||
" 300, activation=\"sigmoid\", kernel_initializer=\"he_normal\"\n",
|
||||
" 300,\n",
|
||||
" activation=\"sigmoid\",\n",
|
||||
" kernel_initializer=\"he_normal\",\n",
|
||||
" ),\n",
|
||||
" tf.keras.layers.Dense(\n",
|
||||
" 100, activation=\"sigmoid\", kernel_initializer=\"he_normal\"\n",
|
||||
" 100,\n",
|
||||
" activation=\"sigmoid\",\n",
|
||||
" kernel_initializer=\"he_normal\",\n",
|
||||
" ),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"model_sig_norm.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")\n",
|
||||
"model_sig_norm.fit(X_train01, y_train, epochs=30, validation_data=(X_val, y_val))"
|
||||
]
|
||||
@@ -2043,16 +2063,22 @@
|
||||
" tf.keras.layers.Input(shape=[28, 28]),\n",
|
||||
" tf.keras.layers.Flatten(),\n",
|
||||
" tf.keras.layers.Dense(\n",
|
||||
" 300, activation=\"sigmoid\", kernel_initializer=\"he_normal\"\n",
|
||||
" 300,\n",
|
||||
" activation=\"sigmoid\",\n",
|
||||
" kernel_initializer=\"he_normal\",\n",
|
||||
" ),\n",
|
||||
" tf.keras.layers.Dense(\n",
|
||||
" 100, activation=\"sigmoid\", kernel_initializer=\"he_normal\"\n",
|
||||
" 100,\n",
|
||||
" activation=\"sigmoid\",\n",
|
||||
" kernel_initializer=\"he_normal\",\n",
|
||||
" ),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"model_sig_un_norm.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")\n",
|
||||
"model_sig_un_norm.fit(X_train, y_train, epochs=30, validation_data=(X_val, y_val))"
|
||||
]
|
||||
@@ -2220,17 +2246,19 @@
|
||||
" tf.keras.layers.Dense(300, activation=\"relu\"),\n",
|
||||
" tf.keras.layers.Dense(100, activation=\"relu\"),\n",
|
||||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"model_high_variance.layers[1].set_weights(\n",
|
||||
" [200 * np.random.randn(28 * 28, 300) / 100, np.zeros(300)]\n",
|
||||
" [200 * np.random.randn(28 * 28, 300) / 100, np.zeros(300)],\n",
|
||||
")\n",
|
||||
"model_high_variance.layers[2].set_weights(\n",
|
||||
" [200 * np.random.randn(300, 100) / 100, np.zeros(100)]\n",
|
||||
" [200 * np.random.randn(300, 100) / 100, np.zeros(100)],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"model_high_variance.compile(\n",
|
||||
" loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n",
|
||||
" loss=\"sparse_categorical_crossentropy\",\n",
|
||||
" optimizer=\"adam\",\n",
|
||||
" metrics=[\"accuracy\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"model_high_variance.fit(X_train01, y_train, epochs=60, validation_data=(X_val01, y_val))"
|
||||
@@ -2258,7 +2286,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.3"
|
||||
"version": "3.13.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -543,7 +543,12 @@
|
||||
"plt.plot(X[:, 0], X[:, 1], \".b\", alpha=0.2)\n",
|
||||
"for center in kmeans1.cluster_centers_:\n",
|
||||
" plt.plot(\n",
|
||||
" center[0], center[1], \".\", color=\"red\", markersize=10, label=\"Cluster center\"\n",
|
||||
" center[0],\n",
|
||||
" center[1],\n",
|
||||
" \".\",\n",
|
||||
" color=\"red\",\n",
|
||||
" markersize=10,\n",
|
||||
" label=\"Cluster center\",\n",
|
||||
" )\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()"
|
||||
@@ -623,7 +628,12 @@
|
||||
"\n",
|
||||
"for center in kmeans1.cluster_centers_:\n",
|
||||
" plt.plot(\n",
|
||||
" center[0], center[1], \".\", color=\"red\", markersize=10, label=\"Cluster center\"\n",
|
||||
" center[0],\n",
|
||||
" center[1],\n",
|
||||
" \".\",\n",
|
||||
" color=\"red\",\n",
|
||||
" markersize=10,\n",
|
||||
" label=\"Cluster center\",\n",
|
||||
" )\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()"
|
||||
@@ -1529,9 +1539,10 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"from scipy.stats import mode\n",
|
||||
"\n",
|
||||
"import tensorflow as tf\n",
|
||||
"\n",
|
||||
"mnist = tf.keras.datasets.mnist\n",
|
||||
"(X_train, y_train), (X_test, y_test) = mnist.load_data()\n",
|
||||
"\n",
|
||||
@@ -1543,7 +1554,7 @@
|
||||
"\n",
|
||||
"def map_clusters_to_labels(clusters, true_labels):\n",
|
||||
" return np.array(\n",
|
||||
" [mode(true_labels[clusters == i], keepdims=True).mode[0] for i in range(10)]\n",
|
||||
" [mode(true_labels[clusters == i], keepdims=True).mode[0] for i in range(10)],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import tensorflow as tf"
|
||||
]
|
||||
},
|
||||
@@ -187,10 +188,12 @@
|
||||
" kernel_regularizer=tf.keras.regularizers.l2(0.01),\n",
|
||||
" ),\n",
|
||||
" tf.keras.layers.Dense(\n",
|
||||
" 8, activation=\"relu\", kernel_regularizer=tf.keras.regularizers.l2(0.01)\n",
|
||||
" 8,\n",
|
||||
" activation=\"relu\",\n",
|
||||
" kernel_regularizer=tf.keras.regularizers.l2(0.01),\n",
|
||||
" ),\n",
|
||||
" tf.keras.layers.Dense(1, activation=\"sigmoid\"),\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\", metrics=[\"accuracy\"])\n",
|
||||
" return model"
|
||||
@@ -296,7 +299,10 @@
|
||||
"histories = []\n",
|
||||
"\n",
|
||||
"early_stopping = EarlyStopping(\n",
|
||||
" monitor=\"val_loss\", patience=10, restore_best_weights=True, verbose=1\n",
|
||||
" monitor=\"val_loss\",\n",
|
||||
" patience=10,\n",
|
||||
" restore_best_weights=True,\n",
|
||||
" verbose=1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):\n",
|
||||
@@ -314,7 +320,9 @@
|
||||
"\n",
|
||||
" # EarlyStopping\n",
|
||||
" callback = tf.keras.callbacks.EarlyStopping(\n",
|
||||
" monitor=\"val_loss\", patience=10, restore_best_weights=True\n",
|
||||
" monitor=\"val_loss\",\n",
|
||||
" patience=10,\n",
|
||||
" restore_best_weights=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Entraînement\n",
|
||||
@@ -433,13 +441,18 @@
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import tensorflow as tf\n",
|
||||
"from sklearn.metrics import classification_report, f1_score\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.2, random_state=42, stratify=y\n",
|
||||
" X,\n",
|
||||
" y,\n",
|
||||
" test_size=0.2,\n",
|
||||
" random_state=42,\n",
|
||||
" stratify=y,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"scaler = StandardScaler()\n",
|
||||
@@ -451,7 +464,9 @@
|
||||
"model.compile(optimizer=\"adam\", loss=\"binary_crossentropy\")\n",
|
||||
"\n",
|
||||
"callback = tf.keras.callbacks.EarlyStopping(\n",
|
||||
" monitor=\"val_loss\", patience=10, restore_best_weights=True\n",
|
||||
" monitor=\"val_loss\",\n",
|
||||
" patience=10,\n",
|
||||
" restore_best_weights=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"history = model.fit(\n",
|
||||
@@ -530,7 +545,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.3"
|
||||
"version": "3.13.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
615
M2/Advanced Machine Learning/TP1/TP1.ipynb
Normal file
BIN
M2/Advanced Machine Learning/TP1/data/data_pdp.xlsx
Normal file
BIN
M2/Advanced Machine Learning/TP1/question1_correlation_plots.png
Normal file
|
After Width: | Height: | Size: 419 KiB |
BIN
M2/Advanced Machine Learning/TP1/question2_pdp_plots.png
Normal file
|
After Width: | Height: | Size: 48 KiB |
BIN
M2/Advanced Machine Learning/TP1/question3_model_plots.png
Normal file
|
After Width: | Height: | Size: 47 KiB |
512
M2/Advanced Machine Learning/TP2/TP2.ipynb
Normal file
BIN
M2/Advanced Machine Learning/TP2/TP_ACE.pdf
Normal file
101
M2/Advanced Machine Learning/TP2/perf_circle.csv
Normal file
@@ -0,0 +1,101 @@
|
||||
x;y
|
||||
1;0
|
||||
0.997986676;0.06342392
|
||||
0.991954813;0.126592454
|
||||
0.981928697;0.189251244
|
||||
0.967948701;0.251147987
|
||||
0.950071118;0.312033446
|
||||
0.928367933;0.371662456
|
||||
0.902926538;0.429794912
|
||||
0.873849377;0.486196736
|
||||
0.841253533;0.540640817
|
||||
0.805270258;0.592907929
|
||||
0.766044443;0.64278761
|
||||
0.723734038;0.690079011
|
||||
0.678509412;0.734591709
|
||||
0.630552667;0.776146464
|
||||
0.58005691;0.814575952
|
||||
0.527225468;0.84972543
|
||||
0.472271075;0.881453363
|
||||
0.415415013;0.909631995
|
||||
0.356886222;0.93414786
|
||||
0.296920375;0.954902241
|
||||
0.235758936;0.971811568
|
||||
0.173648178;0.984807753
|
||||
0.1108382;0.993838464
|
||||
0.047581916;0.998867339
|
||||
-0.015865964;0.999874128
|
||||
-0.079249957;0.996854776
|
||||
-0.142314838;0.989821442
|
||||
-0.204806668;0.978802446
|
||||
-0.266473814;0.963842159
|
||||
-0.327067963;0.945000819
|
||||
-0.386345126;0.922354294
|
||||
-0.444066613;0.895993774
|
||||
-0.5;0.866025404
|
||||
-0.553920064;0.832569855
|
||||
-0.605609687;0.795761841
|
||||
-0.654860734;0.755749574
|
||||
-0.701474888;0.712694171
|
||||
-0.74526445;0.666769001
|
||||
-0.786053095;0.618158986
|
||||
-0.823676581;0.567059864
|
||||
-0.857983413;0.513677392
|
||||
-0.888835449;0.458226522
|
||||
-0.916108457;0.400930535
|
||||
-0.939692621;0.342020143
|
||||
-0.959492974;0.281732557
|
||||
-0.975429787;0.220310533
|
||||
-0.987438889;0.158001396
|
||||
-0.995471923;0.095056043
|
||||
-0.999496542;0.031727933
|
||||
-0.999496542;-0.031727933
|
||||
-0.995471923;-0.095056043
|
||||
-0.987438889;-0.158001396
|
||||
-0.975429787;-0.220310533
|
||||
-0.959492974;-0.281732557
|
||||
-0.939692621;-0.342020143
|
||||
-0.916108457;-0.400930535
|
||||
-0.888835449;-0.458226522
|
||||
-0.857983413;-0.513677392
|
||||
-0.823676581;-0.567059864
|
||||
-0.786053095;-0.618158986
|
||||
-0.74526445;-0.666769001
|
||||
-0.701474888;-0.712694171
|
||||
-0.654860734;-0.755749574
|
||||
-0.605609687;-0.795761841
|
||||
-0.553920064;-0.832569855
|
||||
-0.5;-0.866025404
|
||||
-0.444066613;-0.895993774
|
||||
-0.386345126;-0.922354294
|
||||
-0.327067963;-0.945000819
|
||||
-0.266473814;-0.963842159
|
||||
-0.204806668;-0.978802446
|
||||
-0.142314838;-0.989821442
|
||||
-0.079249957;-0.996854776
|
||||
-0.015865964;-0.999874128
|
||||
0.047581916;-0.998867339
|
||||
0.1108382;-0.993838464
|
||||
0.173648178;-0.984807753
|
||||
0.235758936;-0.971811568
|
||||
0.296920375;-0.954902241
|
||||
0.356886222;-0.93414786
|
||||
0.415415013;-0.909631995
|
||||
0.472271075;-0.881453363
|
||||
0.527225468;-0.84972543
|
||||
0.58005691;-0.814575952
|
||||
0.630552667;-0.776146464
|
||||
0.678509412;-0.734591709
|
||||
0.723734038;-0.690079011
|
||||
0.766044443;-0.64278761
|
||||
0.805270258;-0.592907929
|
||||
0.841253533;-0.540640817
|
||||
0.873849377;-0.486196736
|
||||
0.902926538;-0.429794912
|
||||
0.928367933;-0.371662456
|
||||
0.950071118;-0.312033446
|
||||
0.967948701;-0.251147987
|
||||
0.981928697;-0.189251244
|
||||
0.991954813;-0.126592454
|
||||
0.997986676;-0.06342392
|
||||
1;-2.45E-16
|
||||
|
286
M2/Advanced Machine Learning/TP2/test_lissage.csv
Normal file
@@ -0,0 +1,286 @@
|
||||
x;y
|
||||
1;12.6
|
||||
3;11.9
|
||||
5;12.8
|
||||
6;11.8
|
||||
8;12.3
|
||||
10;12.3
|
||||
12;11.7
|
||||
14;10.5
|
||||
15;10.6
|
||||
16;10.8
|
||||
20;9.7
|
||||
21;10.2
|
||||
22;9.7
|
||||
23;10.1
|
||||
25;11.8
|
||||
26;11.5
|
||||
27;11.6
|
||||
30;11.8
|
||||
31;11.8
|
||||
33;12.4
|
||||
34;13.3
|
||||
35;12.4
|
||||
39;12.9
|
||||
40;13.1
|
||||
42;13.2
|
||||
44;13.3
|
||||
46;13.2
|
||||
48;14.7
|
||||
51;13.3
|
||||
52;13.3
|
||||
55;12.5
|
||||
57;11.8
|
||||
60;11.7
|
||||
61;11.7
|
||||
63;12.2
|
||||
65;13.6
|
||||
67;12.4
|
||||
68;12.5
|
||||
70;13.2
|
||||
71;13.2
|
||||
72;13.6
|
||||
73;13.2
|
||||
77;14.4
|
||||
79;14.3
|
||||
80;14.2
|
||||
83;12.8
|
||||
84;12.8
|
||||
85;12.8
|
||||
87;12.9
|
||||
89;13.8
|
||||
91;13.2
|
||||
92;12.5
|
||||
98;13
|
||||
99;13
|
||||
100;13.4
|
||||
101;12.7
|
||||
104;12.3
|
||||
111;11.4
|
||||
112;11.1
|
||||
115;10.6
|
||||
117;10.5
|
||||
122;10.9
|
||||
123;11.1
|
||||
125;11.2
|
||||
127;11.9
|
||||
129;11
|
||||
132;10
|
||||
133;10.9
|
||||
134;10.5
|
||||
135;10.6
|
||||
138;10.4
|
||||
139;11.1
|
||||
140;11.1
|
||||
141;11.2
|
||||
144;9.9
|
||||
145;10.6
|
||||
146;9.7
|
||||
150;10.1
|
||||
151;9.9
|
||||
152;10.1
|
||||
153;10.2
|
||||
154;11.3
|
||||
155;11.4
|
||||
157;11.3
|
||||
159;11.7
|
||||
160;11.3
|
||||
162;11.4
|
||||
164;10.8
|
||||
165;10.8
|
||||
166;10.9
|
||||
168;10.8
|
||||
170;12
|
||||
171;12.4
|
||||
175;12.7
|
||||
176;12.1
|
||||
177;12.2
|
||||
179;12.3
|
||||
180;12.3
|
||||
181;12.5
|
||||
182;12.6
|
||||
183;11.8
|
||||
185;10.9
|
||||
186;10.9
|
||||
187;11.1
|
||||
188;11.1
|
||||
189;10.6
|
||||
191;9.9
|
||||
192;9.1
|
||||
193;9.6
|
||||
196;9.7
|
||||
197;10.3
|
||||
198;10.1
|
||||
200;11.7
|
||||
201;11.5
|
||||
203;11.1
|
||||
204;11.1
|
||||
205;11.6
|
||||
206;11.8
|
||||
210;11.2
|
||||
216;9.6
|
||||
217;9.1
|
||||
218;8.2
|
||||
219;7.3
|
||||
221;9
|
||||
230;9
|
||||
231;7.2
|
||||
234;7
|
||||
237;8.5
|
||||
239;8.2
|
||||
241;7.5
|
||||
242;7.2
|
||||
244;8.3
|
||||
245;8.5
|
||||
246;8.7
|
||||
250;8.8
|
||||
252;8.4
|
||||
254;8.4
|
||||
255;8.9
|
||||
258;8.3
|
||||
259;8.5
|
||||
260;9
|
||||
265;8.2
|
||||
269;8.6
|
||||
271;8
|
||||
274;8.8
|
||||
275;8.7
|
||||
276;8.6
|
||||
282;8.8
|
||||
283;9.3
|
||||
287;8.4
|
||||
290;8.6
|
||||
291;8.8
|
||||
292;9.3
|
||||
298;9.9
|
||||
301;9.6
|
||||
304;8
|
||||
306;10.6
|
||||
309;8.9
|
||||
310;8.7
|
||||
311;8.3
|
||||
313;7.6
|
||||
315;6.9
|
||||
316;6.3
|
||||
318;9.1
|
||||
321;6.8
|
||||
322;6.4
|
||||
324;6.9
|
||||
326;6.5
|
||||
331;7.5
|
||||
335;7.1
|
||||
337;7.1
|
||||
339;6.8
|
||||
341;6.6
|
||||
342;6.1
|
||||
343;6.7
|
||||
345;6.6
|
||||
346;5.7
|
||||
347;6.7
|
||||
348;7.1
|
||||
349;6.7
|
||||
351;6.7
|
||||
352;6.4
|
||||
355;6.2
|
||||
356;6.2
|
||||
357;6.4
|
||||
359;6.8
|
||||
366;6.4
|
||||
367;7.4
|
||||
368;7.4
|
||||
369;7.5
|
||||
370;7.2
|
||||
374;6.8
|
||||
378;5.8
|
||||
379;6.4
|
||||
381;5.9
|
||||
382;5.2
|
||||
386;4.7
|
||||
389;4.8
|
||||
392;4.8
|
||||
393;4.5
|
||||
394;5
|
||||
396;4.9
|
||||
399;4.5
|
||||
400;4.6
|
||||
402;4.2
|
||||
403;4.8
|
||||
406;5
|
||||
407;4.5
|
||||
411;7
|
||||
412;3.4
|
||||
413;4.1
|
||||
415;6.1
|
||||
416;5.8
|
||||
419;6.5
|
||||
420;6.4
|
||||
425;5.7
|
||||
427;5.5
|
||||
430;5.3
|
||||
431;5.8
|
||||
432;5.6
|
||||
433;6.3
|
||||
435;5.2
|
||||
438;5.4
|
||||
439;5
|
||||
443;5.3
|
||||
445;5.3
|
||||
446;5.2
|
||||
449;4.1
|
||||
450;6.9
|
||||
451;3.7
|
||||
452;3.4
|
||||
453;3.6
|
||||
455;3.5
|
||||
456;2.9
|
||||
459;2.7
|
||||
460;3.1
|
||||
462;3.7
|
||||
463;4.2
|
||||
465;4.2
|
||||
466;4
|
||||
467;3.8
|
||||
468;4
|
||||
469;3.4
|
||||
470;3.6
|
||||
475;3.7
|
||||
478;4.2
|
||||
482;3.4
|
||||
483;3.5
|
||||
485;3.1
|
||||
486;3.6
|
||||
492;5.5
|
||||
494;3.8
|
||||
496;5.5
|
||||
498;6.4
|
||||
500;5.5
|
||||
502;6.8
|
||||
503;8.2
|
||||
506;4.9
|
||||
508;5.4
|
||||
510;5.9
|
||||
514;6.4
|
||||
522;7.1
|
||||
524;7.6
|
||||
525;7
|
||||
527;6.9
|
||||
529;7.3
|
||||
533;7
|
||||
534;7.8
|
||||
536;8
|
||||
539;8.8
|
||||
540;9.1
|
||||
541;8.2
|
||||
542;8
|
||||
544;8.8
|
||||
545;9.7
|
||||
546;12
|
||||
547;6.3
|
||||
548;5.8
|
||||
556;6.3
|
||||
558;6.4
|
||||
562;7.4
|
||||
563;7.4
|
||||
566;7.2
|
||||
567;7.4
|
||||
573;7.4
|
||||
|
535
M2/Advanced Machine Learning/TP3/TP3.ipynb
Normal file
BIN
M2/Advanced Machine Learning/TP3/data/ijcnn1.bz2
Normal file
53
M2/Clustering In Practice/Encoding.Rmd
Normal file
@@ -0,0 +1,53 @@
|
||||
```{r}
|
||||
library(caret)
|
||||
library(dplyr)
|
||||
```
|
||||
|
||||
# One Hot Encoding
|
||||
|
||||
```{r}
|
||||
df <- data.frame(
|
||||
team = c("A", "A", "B", "B", "B", "B", "C", "C"),
|
||||
points = c(25, 12, 15, 14, 19, 23, 25, 29)
|
||||
)
|
||||
|
||||
dummies <- dummyVars(~team + points, data = df)
|
||||
one_hot_data <- predict(dummies, newdata = df)
|
||||
|
||||
one_hot_data
|
||||
```
|
||||
|
||||
# Target Encoding
|
||||
|
||||
```{r}
|
||||
train <- data.frame(
|
||||
target = c(10, 20, 15),
|
||||
cat_col1 = c("city1", "city2", "city1"),
|
||||
cat_col2 = c("james", "adam", "charles")
|
||||
)
|
||||
|
||||
global_mean <- mean(train$target)
|
||||
alpha <- 10
|
||||
|
||||
target_encoding <- train |>
|
||||
group_by(cat_col1) |>
|
||||
summarise(
|
||||
n = n(),
|
||||
sum_target = sum(target),
|
||||
cat_col1_te = (sum_target + (alpha * global_mean)) / (n + alpha),
|
||||
.groups = "drop"
|
||||
) |>
|
||||
select(cat_col1, cat_col1_te)
|
||||
|
||||
train <- train |> left_join(target_encoding, by = "cat_col1")
|
||||
```
|
||||
|
||||
# Frequential Encoding
|
||||
|
||||
|
||||
```{r}
|
||||
df <- data.frame(
|
||||
color = c("blue", "red", "blue", "green"),
|
||||
value = c(10, 20, 10, 30)
|
||||
)
|
||||
```
|
||||
1505
M2/Clustering In Practice/TP3.ipynb
Normal file
191
M2/Clustering In Practice/compression_image.R
Normal file
@@ -0,0 +1,191 @@
|
||||
# Objectifs pédagogiques
|
||||
# Comprendre la représentation matricielle d'une image.
|
||||
# Interpréter les centroïdes comme une palette de couleurs optimale (résumé).
|
||||
|
||||
setwd("~/Workspace/studies/M2/Clustering In Practice")
|
||||
|
||||
library(jpeg)
|
||||
|
||||
# 1. Chargement de l'image
|
||||
|
||||
img <- readJPEG("./data/PampasGrass.jpg")
|
||||
|
||||
# Dimensions
|
||||
dims <- dim(img)
|
||||
dims
|
||||
|
||||
# Reshaping : Transformation en matrice (Pixels x 3 canaux)
|
||||
# Chaque ligne est une observation dans R^3
|
||||
img_matrix <- matrix(img, ncol = 3)
|
||||
colnames(img_matrix) <- c("R", "G", "B")
|
||||
|
||||
head(img_matrix)
|
||||
|
||||
# 2. Application de l'algorithme K-means
|
||||
# Choix du nombre de couleurs (k)
|
||||
k <- 8
|
||||
|
||||
# Application de K-means
|
||||
set.seed(123)
|
||||
km_model <- kmeans(img_matrix, centers = k, iter.max = 20, nstart = 3)
|
||||
|
||||
# Les "résumés" de l'information (les centres des clusters)
|
||||
palette_optimale <- km_model$centers
|
||||
print(palette_optimale)
|
||||
|
||||
# 3. Reconstruction de l'image compressée
|
||||
# Associer chaque pixel à son centroïde
|
||||
img_compressed_matrix <- palette_optimale[km_model$cluster, ]
|
||||
|
||||
# Re-transformer la matrice en Array 3D
|
||||
img_compressed <- array(img_compressed_matrix, dim = dims)
|
||||
|
||||
# Affichage comparatif
|
||||
par(mfrow = c(1, 2), mar = c(1, 1, 1, 1))
|
||||
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
|
||||
rasterImage(img, -1, -1, 1, 1)
|
||||
title("Originale (Millions de couleurs)")
|
||||
|
||||
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
|
||||
rasterImage(img_compressed, -1, -1, 1, 1)
|
||||
title(paste("Compressée (k =", k, ")"))
|
||||
|
||||
# 4. Questions : coût de l'information (Distorsion)
|
||||
# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
|
||||
# l'image compressée :
|
||||
# Plus $k$ est petit, plus le résumé est ..., plus le MSE .....
|
||||
|
||||
library(imager)
|
||||
|
||||
|
||||
mse_imager <- function(img1, img2) {
|
||||
# Harmoniser dimensions (recadrage ou redimensionnement si besoin)
|
||||
if (!all(dim(img1) == dim(img2))) {
|
||||
# Ici, on redimensionne img2 sur la taille d'img1
|
||||
img2 <- imresize(img2, size_x = width(img1), size_y = height(img1))
|
||||
if (spectrum(img2) != spectrum(img1)) {
|
||||
img2 <- grayscale(img2) # fallback simple si nb de canaux diffère
|
||||
img1 <- grayscale(img1)
|
||||
}
|
||||
}
|
||||
# Convertir en vecteurs numériques [0,1]
|
||||
x <- as.numeric(img1)
|
||||
y <- as.numeric(img2)
|
||||
mean((x - y)^2)
|
||||
}
|
||||
|
||||
|
||||
mse_val <- mse_imager(img, img_compressed)
|
||||
cat("MSE =", mse_val, "\n")
|
||||
|
||||
mse_matrix <- mean((img_matrix - img_compressed_matrix)^2)
|
||||
cat("MSE =", mse_matrix, "\n")
|
||||
|
||||
########################################################################
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Règle du coude (Elbow Method)
|
||||
# tracez l'évolution de la Within-Cluster Sum of Squares en fonction de $k$
|
||||
# Prnde k = 2 à 32
|
||||
# A partir de quel $k$ le gain visuel devient-il négligeable pour l'œil humain ?
|
||||
|
||||
|
||||
|
||||
# X : matrice/df n x d
|
||||
# ks : valeurs de k à tester (par défaut 1:10)
|
||||
elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
|
||||
X <- as.matrix(X)
|
||||
if (scale_data) {
|
||||
X <- scale(X)
|
||||
}
|
||||
wss <- numeric(length(ks))
|
||||
|
||||
# Cas k = 1 : WSS = TSS (variance totale)
|
||||
total_ss <- sum(scale(X, scale = FALSE)^2) # TSS
|
||||
for (i in seq_along(ks)) {
|
||||
k <- ks[i]
|
||||
cat(" k =", k, "\n")
|
||||
if (k == 1) {
|
||||
wss[i] <- total_ss
|
||||
} else {
|
||||
set.seed(123) # reproductible
|
||||
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
|
||||
wss[i] <- km$tot.withinss
|
||||
}
|
||||
}
|
||||
|
||||
plot(ks, wss,
|
||||
type = "b", pch = 19, xlab = "Nombre de clusters (k)",
|
||||
ylab = "Inertie intra-classe (WSS)",
|
||||
main = "Méthode du coude (k-means)"
|
||||
)
|
||||
grid()
|
||||
invisible(data.frame(k = ks, WSS = wss))
|
||||
}
|
||||
|
||||
# Exemple d'utilisation :
|
||||
res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)
|
||||
|
||||
###############################################################################
|
||||
|
||||
elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
|
||||
X <- as.matrix(X)
|
||||
if (scale_data) X <- scale(X)
|
||||
set.seed(seed)
|
||||
|
||||
# Nombre de lignes distinctes
|
||||
n_unique <- nrow(unique(X))
|
||||
if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
|
||||
|
||||
# Tronquer ks si nécessaire
|
||||
ks <- ks[ks <= n_unique]
|
||||
if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
|
||||
|
||||
wss <- numeric(length(ks))
|
||||
# TSS (k = 1)
|
||||
total_ss <- sum(scale(X, scale = FALSE)^2)
|
||||
|
||||
for (i in seq_along(ks)) {
|
||||
k <- ks[i]
|
||||
cat(" k =", k, "\n")
|
||||
if (k == 1) {
|
||||
wss[i] <- total_ss
|
||||
} else {
|
||||
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
|
||||
wss[i] <- km$tot.withinss
|
||||
}
|
||||
}
|
||||
|
||||
plot(ks, wss,
|
||||
type = "b", pch = 19, xlab = "Nombre de clusters (k)",
|
||||
ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)"
|
||||
)
|
||||
axis(1, at = ks)
|
||||
grid()
|
||||
invisible(data.frame(k = ks, WSS = wss))
|
||||
}
|
||||
|
||||
# Exemple :
|
||||
res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
|
||||
|
||||
|
||||
|
||||
|
||||
# Taille de stockage
|
||||
# Ouvrir un fichier JPG
|
||||
jpeg("./data/image_compressed.jpg")
|
||||
|
||||
# Afficher l'image compressée dans le fichier
|
||||
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
|
||||
rasterImage(img_compressed, -1, -1, 1, 1)
|
||||
|
||||
dev.off()
|
||||
|
||||
info <- file.info("./data/PampasGrass.jpg")
|
||||
(taille_octets_reelle <- info$size / 1024)
|
||||
|
||||
info <- file.info("./data/image_compressed.jpg")
|
||||
(taille_octets_compresse <- info$size / 1024)
|
||||
105
M2/Clustering In Practice/compression_image_poissons.R
Normal file
@@ -0,0 +1,105 @@
|
||||
# Objectifs pédagogiques
|
||||
# Comprendre la représentation matricielle d'une image.
|
||||
# Interpréter les centroïdes comme une palette de couleurs optimale (résumé).
|
||||
# Analyser le compromis entre distorsion (perte de qualité) et taux de compression.
|
||||
|
||||
library(jpeg)
|
||||
|
||||
# 1. Chargement de l'image
|
||||
|
||||
|
||||
img <- readJPEG("./data/Guppy 2.jpeg")
|
||||
|
||||
# Dimensions
|
||||
dims <- dim(img)
|
||||
dims
|
||||
|
||||
# Reshaping : Transformation en matrice (Pixels x 3 canaux)
|
||||
# Chaque ligne est une observation dans R^3
|
||||
img_matrix <- matrix(img, ncol = 3)
|
||||
colnames(img_matrix) <- c("R", "G", "B")
|
||||
|
||||
head(img_matrix)
|
||||
|
||||
# 2. Application de l'algorithme K-means
|
||||
# Choix du nombre de couleurs (k)
|
||||
k <- 10
|
||||
|
||||
# Application de K-means
|
||||
# On augmente iter.max car la convergence sur des milliers de pixels peut être lente
|
||||
set.seed(123)
|
||||
km_model <- kmeans(img_matrix, centers = k, iter.max = 20, nstart = 3)
|
||||
|
||||
# Les "résumés" de l'information (les centres des clusters)
|
||||
palette_optimale <- km_model$centers
|
||||
print(palette_optimale)
|
||||
|
||||
# 3. Reconstruction de l'image compressée
|
||||
# Associer chaque pixel à son centroïde
|
||||
img_compressed_matrix <- palette_optimale[km_model$cluster, ]
|
||||
|
||||
# Re-transformer la matrice en Array 3D
|
||||
img_compressed <- array(img_compressed_matrix, dim = dims)
|
||||
|
||||
# Affichage comparatif
|
||||
par(mfrow = c(1, 2), mar = c(1, 1, 1, 1))
|
||||
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
|
||||
rasterImage(img, -1, -1, 1, 1)
|
||||
title("Originale (Millions de couleurs)")
|
||||
|
||||
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
|
||||
rasterImage(img_compressed, -1, -1, 1, 1)
|
||||
title(paste("Compressée (k =", k, ")"))
|
||||
|
||||
# 4. Questions : coût de l'information (Distorsion)
|
||||
# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
|
||||
# l'image compressée :
|
||||
# Plus $k$ est petit, plus le résumé est ..., plus le MSE .....
|
||||
|
||||
# Règle du coude (Elbow Method)
|
||||
# tracez l'évolution de la Within-Cluster Sum of Squares (WCSS) en fonction de $k$
|
||||
# Prnde k = 2 à 32
|
||||
# A partir de quel $k$ le gain visuel devient-il négligeable pour l'œil humain ?
|
||||
|
||||
# Taille de stockage
|
||||
# Ouvrir un fichier JPG
|
||||
jpeg("./data/image_compressed.jpg")
|
||||
|
||||
# Afficher l'image compressée dans le fichier
|
||||
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
|
||||
rasterImage(img_compressed, -1, -1, 1, 1)
|
||||
|
||||
info <- file.info("./data/Guppy 2.jpeg")
|
||||
(taille_octets_reelle <- info$size/1024)
|
||||
|
||||
info <- file.info("./data/image_compressed.jpg")
|
||||
(taille_octets_compresse <- info$size/1024)
|
||||
|
||||
|
||||
library(colordistance)
|
||||
|
||||
repertoire <- "poissons"
|
||||
|
||||
clusters <- colordistance::getHistList(repertoire, lower = NULL, upper = NULL)
|
||||
names(clusters)
|
||||
|
||||
kmeans_fits <- getKMeansList(repertoire, bins = 3, plotting = TRUE)
|
||||
|
||||
centroids_list <- extractClusters(kmeans_fits, ordering = TRUE)
|
||||
|
||||
emd_distance_matrix <- getColorDistanceMatrix(centroids_list, method = "color.dist", ordering = TRUE)
|
||||
|
||||
colordistance::imageClusterPipeline(repertoire, cluster.method = "hist")
|
||||
|
||||
clusters
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
553
M2/Clustering In Practice/data/DataModel.csv
Normal file
@@ -0,0 +1,553 @@
|
||||
"Year","Category","Region","Dist_EQ","PCT_CHARTER","PCT_NETWORK","PCT_REGIONAL","PCT_LCC"
|
||||
2002,"NBI","AFRMY",1016.77834099741,0.0597130903899918,0.622493232142152,0.267124620948855,0.0506690565190006
|
||||
2002,"NBI","AMCS",1048.40176202633,0,0.600042338293917,0.377530799113856,0.0224268625922272
|
||||
2002,"NBI","AMN",1409.85814009492,0.0173629970840251,0.590396464468702,0.173805680475439,0.218434857971835
|
||||
2002,"NBI","ASIAUS",731.613261373554,0,0.469682218115679,0.466558289304771,0.0637594925795506
|
||||
2002,"NBI","CEI",2021.61669687385,0.101248783609204,0.33592314644564,0.353537170585854,0.209290899359302
|
||||
2002,"NBI","CHN",884.36087443162,0,0.541909244621729,0.423698368369688,0.0343923870085828
|
||||
2002,"NBI","EUR",912.423381449187,0.133168395537474,0.531242916846183,0.199587502830158,0.136001184786185
|
||||
2002,"NBI","SIN",956.879704507048,0.0219844929189212,0.878033445909996,0.0999820611710825,0
|
||||
2002,"NBL","AFRMY",1870.45081950017,0.000605076293025976,0.669678363444417,0.329716560262557,0
|
||||
2002,"NBL","AMCS",1824.96151047405,0,0.809693945741259,0.0883583183642831,0.101947735894458
|
||||
2002,"NBL","AMN",1943.97876631639,0.051832775219223,0.620556018720925,0.140872307705065,0.186738898354787
|
||||
2002,"NBL","ASIAUS",765.889004994681,0,0.72582587884237,0.222936510506108,0.0512376106515214
|
||||
2002,"NBL","CEI",4013.47097243836,0.00884521845500873,0.0328186621150331,0.90034431408971,0.0579918053402481
|
||||
2002,"NBL","CHN",1144.23746822827,0,0.452582301987532,0.504259544661241,0.0431581533512276
|
||||
2002,"NBL","EUR",1234.90303545659,0.364607796353512,0.57342178096109,0.0530094145514347,0.00896100813396302
|
||||
2002,"NBL","SIN",1735.11180609851,0,0,1,0
|
||||
2002,"NBS","AFRMY",896.194534112975,0.0307407102817188,0.573908924174224,0.394921119969798,0.000429245574259246
|
||||
2002,"NBS","AMCS",747.174211603206,0,0.529624167602986,0.412755321767577,0.0576205106294371
|
||||
2002,"NBS","AMN",988.093839351939,0.0013652968487858,0.398303810665043,0.193870598522295,0.406460293963877
|
||||
2002,"NBS","ASIAUS",742.441286377743,0,0.35286235135051,0.51736909093461,0.12976855771488
|
||||
2002,"NBS","CEI",1301.34576825985,0.157995862718028,0,0.842004137281972,0
|
||||
2002,"NBS","CHN",901.974419592352,0,0.737234338617439,0.262765661382561,0
|
||||
2002,"NBS","EUR",781.859523179824,0.034147346899082,0.419150058586784,0.289664731496763,0.257037863017371
|
||||
2002,"NBS","SIN",1093.25099049704,0,0,1,0
|
||||
2003,"NBI","AFRMY",1051.83276929881,0.0703904417993861,0.58586464219364,0.297699437447877,0.0460454785590967
|
||||
2003,"NBI","AMCS",1046.0439259395,0.000159622611484496,0.580645331548867,0.381263370019119,0.0379316758205296
|
||||
2003,"NBI","AMN",1498.83664670975,0.0319213107098393,0.556433821677094,0.166345733914352,0.245299133698715
|
||||
2003,"NBI","ASIAUS",826.321630659026,2.87414444591047e-06,0.439523953566766,0.536791058278473,0.0236821140103144
|
||||
2003,"NBI","CEI",1949.75420550375,0.0575330852873459,0.298075010313518,0.434867435136563,0.209524469262573
|
||||
2003,"NBI","CHN",909.696158854379,0,0.632284602130837,0.346134092993799,0.0215813048753633
|
||||
2003,"NBI","EUR",955.788965857104,0.132691421953644,0.522631131213971,0.173206925976766,0.171470520855619
|
||||
2003,"NBI","SIN",974.713460537252,0.0158550280722602,0.808232832813693,0.171151971208312,0.00476016790573452
|
||||
2003,"NBL","AFRMY",1751.82883955615,0.0314011459979526,0.569588576896012,0.399010277106036,0
|
||||
2003,"NBL","AMCS",1822.559822083,0,0.762943162951321,0.128930594625762,0.108126242422917
|
||||
2003,"NBL","AMN",2026.0997925162,0.0644717204234929,0.598088557608768,0.139921778028566,0.197517943939173
|
||||
2003,"NBL","ASIAUS",1328.5958565509,0,0.64457537018407,0.20320958073791,0.152215049078021
|
||||
2003,"NBL","CEI",3723.64309407328,0.00870866487367566,0.00386223143758493,0.925957583814866,0.0614715198738737
|
||||
2003,"NBL","CHN",1312.91071496536,0,0.58368766901228,0.367520784078573,0.0487915469091474
|
||||
2003,"NBL","EUR",1215.06501553753,0.302037229834171,0.579633562109029,0.113156292947753,0.00517291510904663
|
||||
2003,"NBL","SIN",1549.69043928804,0,0.207197663467606,0.792802336532394,0
|
||||
2003,"NBS","AFRMY",1020.26844457009,0.0592471603459175,0.527859936259316,0.412445373213889,0.000447530180877296
|
||||
2003,"NBS","AMCS",812.867449227247,0,0.537901975918196,0.358837838554683,0.103260185527121
|
||||
2003,"NBS","AMN",1034.07827897107,0.00191780575189369,0.374634584757492,0.178652321458941,0.444795288031673
|
||||
2003,"NBS","ASIAUS",819.277607285573,0.00210191710870563,0.279343639618056,0.346537536703958,0.372016906569281
|
||||
2003,"NBS","CEI",1370.90327513558,0.201685678688886,0.0448981117980321,0.753416209513082,0
|
||||
2003,"NBS","CHN",947.421571182859,0,0.748828864793134,0.251171135206866,0
|
||||
2003,"NBS","EUR",797.83472170478,0.052057705585553,0.420549111447807,0.308357854284043,0.219035328682597
|
||||
2003,"NBS","SIN",1178.36392294334,0,0,0.924568394630158,0.0754316053698418
|
||||
2004,"NBI","AFRMY",1118.84228834104,0.0719015260194344,0.586382396634064,0.284417536153788,0.057298541192714
|
||||
2004,"NBI","AMCS",1066.33349071331,0,0.557795356455719,0.398933257242986,0.0432713863012945
|
||||
2004,"NBI","AMN",1559.76351030533,0.0371168132172758,0.457589075882335,0.167304100632898,0.337990010267491
|
||||
2004,"NBI","ASIAUS",895.061401625613,0.00351434191290419,0.407998279703067,0.578451861928994,0.0100355164550348
|
||||
2004,"NBI","CEI",1895.37246841514,0.0621895397647724,0.223517807028265,0.495640633683318,0.218652019523644
|
||||
2004,"NBI","CHN",1000.4594665654,0,0.600163096684699,0.374120748057107,0.0257161552581933
|
||||
2004,"NBI","EUR",1010.59338718854,0.132671487148077,0.497420746298331,0.152718867182171,0.21718889937142
|
||||
2004,"NBI","SIN",979.908245588873,0.000569529127851526,0.77049725220613,0.228933218666019,0
|
||||
2004,"NBL","AFRMY",1854.09068466113,0.0230344008837983,0.530715015293745,0.446250583822457,0
|
||||
2004,"NBL","AMCS",1735.4292061985,0,0.669305287395437,0.215266978003438,0.115427734601125
|
||||
2004,"NBL","AMN",2079.46013496378,0.0615241629505311,0.445398422407843,0.139054382607996,0.35402303203363
|
||||
2004,"NBL","ASIAUS",1169.74074607907,0.057163394637303,0.74791876942464,0.101251746052136,0.0936660898859209
|
||||
2004,"NBL","CEI",3414.37852598339,0.0975838497409029,0.111023915646246,0.728306869453293,0.0630853651595581
|
||||
2004,"NBL","CHN",1261.00651246869,0,0.609679750621362,0.330338461372172,0.0599817880064659
|
||||
2004,"NBL","EUR",1317.01591035747,0.250405851122713,0.623290601334873,0.101772769455369,0.0245307780870447
|
||||
2004,"NBL","SIN",1444.11232276274,0,0.419750948965739,0.580249051034261,0
|
||||
2004,"NBS","AFRMY",975.100903644071,0.0677497176473869,0.462821740223249,0.468351395459184,0.00107714667018013
|
||||
2004,"NBS","AMCS",827.996945667859,0,0.526184470650645,0.35735758077773,0.116457948571625
|
||||
2004,"NBS","AMN",1072.26228809395,0.00195151069157886,0.219300737137274,0.179046918234192,0.599700833936955
|
||||
2004,"NBS","ASIAUS",876.477227814581,0.00155311460801366,0.179458912302523,0.22482693354778,0.594161039541684
|
||||
2004,"NBS","CEI",1334.90751418335,0.216356673704201,0.313838369662649,0.468739021445937,0.00106593518721326
|
||||
2004,"NBS","CHN",956.848300480629,0,0.820224553164155,0.179516211481983,0.000259235353862591
|
||||
2004,"NBS","EUR",827.75461937925,0.0831298345762109,0.357945045100817,0.249001050066969,0.309924070256003
|
||||
2004,"NBS","SIN",1138.44344746954,0,0,1,0
|
||||
2005,"NBI","AFRMY",1144.6473085694,0.0664152811711306,0.539696049145382,0.337990821845681,0.0558978478378065
|
||||
2005,"NBI","AMCS",1066.95770202064,0,0.565873185288841,0.388196571002098,0.0459302437090615
|
||||
2005,"NBI","AMN",1547.31279759207,0.0227526536617333,0.452963215927672,0.167578737439385,0.35670539297121
|
||||
2005,"NBI","ASIAUS",902.205693075095,0.00417293984616082,0.352498092307051,0.569070563335918,0.0742584045108701
|
||||
2005,"NBI","CEI",1873.5506127118,0.0702844042456643,0.206525859735807,0.511657378206404,0.211532357812125
|
||||
2005,"NBI","CHN",1035.35139870713,0,0.630046071741499,0.343781662121277,0.0261722661372242
|
||||
2005,"NBI","EUR",1018.90251261652,0.128046913616995,0.529984291873972,0.121777702743246,0.220191091765786
|
||||
2005,"NBI","SIN",1023.06687094566,0,0.75275156733831,0.24724843266169,0
|
||||
2005,"NBL","AFRMY",1810.02105488293,0.0371926827644837,0.539387373962615,0.423419943272902,0
|
||||
2005,"NBL","AMCS",1726.75732585068,0,0.711059249437188,0.188230549920793,0.100710200642019
|
||||
2005,"NBL","AMN",2109.24405792989,0.0391014466679743,0.458734886225285,0.136562585217016,0.365601081889725
|
||||
2005,"NBL","ASIAUS",1037.04239942446,0.100698402680429,0.808376121156836,0.0185210151369094,0.0724044610258255
|
||||
2005,"NBL","CEI",3189.2428547016,0.171177549768552,0.20736132642256,0.57822815188997,0.0432329719189168
|
||||
2005,"NBL","CHN",1125.08281549542,0,0.555712767270187,0.389918708121961,0.0543685246078521
|
||||
2005,"NBL","EUR",1236.54730270984,0.283755233670644,0.631980022925384,0.0728362110640691,0.0114285323399037
|
||||
2005,"NBL","SIN",1474.32248032714,0,0.391378024983744,0.608621975016256,0
|
||||
2005,"NBS","AFRMY",944.057666641918,0.0868315093655335,0.413495892183916,0.456665085205021,0.0430075132455295
|
||||
2005,"NBS","AMCS",837.29685252464,0,0.552217955745499,0.295268249310276,0.152513794944226
|
||||
2005,"NBS","AMN",1102.98776605576,0.0050067486763006,0.21064297147412,0.163538466559019,0.62081181329056
|
||||
2005,"NBS","ASIAUS",880.594368505296,0,0.158548615316017,0.176302779128767,0.665148605555216
|
||||
2005,"NBS","CEI",1353.6195416818,0.232640433020087,0.273659282575309,0.422138212886689,0.0715620715179144
|
||||
2005,"NBS","CHN",959.782513779051,0,0.735002611782261,0.26485492276103,0.000142465456708642
|
||||
2005,"NBS","EUR",861.891616310017,0.084877102823163,0.301965006454799,0.163169501287278,0.449988389434761
|
||||
2005,"NBS","SIN",920.386833700046,0,0.0119700260861467,0.988029973913853,0
|
||||
2006,"NBI","AFRMY",1138.04732765766,0.0800975799238458,0.506326179207697,0.310658120147633,0.102918120720824
|
||||
2006,"NBI","AMCS",1031.34035186997,0,0.594644733989955,0.33380888015396,0.0715463858560854
|
||||
2006,"NBI","AMN",1587.15047211395,0.0144793536243242,0.493988590006146,0.129792047209454,0.361740009160076
|
||||
2006,"NBI","ASIAUS",942.019400916568,0.00668256126305651,0.285334186757348,0.498176437003973,0.209806814975622
|
||||
2006,"NBI","CEI",1902.06195567292,0.087699412848399,0.228088234173566,0.50140820203356,0.182804150944475
|
||||
2006,"NBI","CHN",1062.10188868693,0,0.605711891013932,0.368696072192362,0.0255920367937056
|
||||
2006,"NBI","EUR",1019.00908136861,0.125108192597397,0.483604311510742,0.138424836751621,0.252862659140239
|
||||
2006,"NBI","SIN",1049.04800558863,0,0.650299991727958,0.274523356683468,0.0751766515885743
|
||||
2006,"NBL","AFRMY",1899.32443336587,0.0135018297452449,0.5547227084213,0.431775461833455,0
|
||||
2006,"NBL","AMCS",1939.20935088796,0,0.54565098242017,0.328285859772465,0.126063157807365
|
||||
2006,"NBL","AMN",2187.67128026574,0.0161667752194186,0.57602842212399,0.03853815300604,0.369266649650551
|
||||
2006,"NBL","ASIAUS",838.495611230166,0.0245901590890957,0.950980745058982,0.0159268006244832,0.00850229522743875
|
||||
2006,"NBL","CEI",3023.53223923825,0.146023985007469,0.146985332154633,0.698296864512271,0.00869381832562711
|
||||
2006,"NBL","CHN",1134.28503381506,0,0.591546525322623,0.358284579807739,0.0501688948696376
|
||||
2006,"NBL","EUR",1310.43452032375,0.288605861840223,0.633770439586171,0.0579149345013532,0.0197087640722526
|
||||
2006,"NBL","SIN",1510.47536969813,0,0.450772203812408,0.549227796187592,0
|
||||
2006,"NBS","AFRMY",974.478817897077,0.0756597750364558,0.471440795514074,0.41963457244113,0.0332648570083407
|
||||
2006,"NBS","AMCS",858.268397159176,0,0.514976037718629,0.270613590784659,0.214410371496713
|
||||
2006,"NBS","AMN",1114.73769830693,0.00536186599671175,0.239422726677631,0.0950835749915738,0.660131832334083
|
||||
2006,"NBS","ASIAUS",873.220374167129,0,0.156823114834569,0.203876419644402,0.639300465521029
|
||||
2006,"NBS","CEI",1361.29909941249,0.170233594203652,0.187328238468591,0.491728192715042,0.150709974612716
|
||||
2006,"NBS","CHN",943.364937862824,0.0059309273700467,0.659226247821932,0.334842824808022,0
|
||||
2006,"NBS","EUR",873.34320313538,0.0867024921103081,0.262601342649803,0.187582150187531,0.463114015052359
|
||||
2006,"NBS","SIN",904.652496449135,0,0.849189109762939,0.150810890237061,0
|
||||
2007,"NBI","AFRMY",1203.54665874132,0.0585926657002463,0.509451122536068,0.271768575067512,0.160187636696174
|
||||
2007,"NBI","AMCS",1096.53436865042,0.00048342238509541,0.638132705020092,0.273647419438365,0.0877364531564478
|
||||
2007,"NBI","AMN",1614.57200208311,0.0144682302797693,0.629105237870985,0.122745188325937,0.233681343523309
|
||||
2007,"NBI","ASIAUS",988.986878274016,0.00817657984201907,0.343094041205826,0.37579422259989,0.272935156352265
|
||||
2007,"NBI","CEI",1843.83106753159,0.0807721344260225,0.262994945859939,0.538231385810682,0.118001533903357
|
||||
2007,"NBI","CHN",1087.72227782614,0.00871684618403742,0.596596783347116,0.363069168162567,0.0316172023062802
|
||||
2007,"NBI","EUR",1031.75960706547,0.136786494362055,0.410593149305638,0.143636996694623,0.308983359637684
|
||||
2007,"NBI","SIN",1026.44437882514,0,0.525161979414646,0.30293809998151,0.171899920603844
|
||||
2007,"NBL","AFRMY",1942.85550048815,0.00218389166757012,0.588470557597048,0.40929720543993,4.8345295451351e-05
|
||||
2007,"NBL","AMCS",1752.72021549169,0,0.361097899948369,0.478313640602461,0.16058845944917
|
||||
2007,"NBL","AMN",2193.13480849429,0.0149828113663743,0.78509866900973,0.0404014546028906,0.159517065021005
|
||||
2007,"NBL","ASIAUS",989.889932809892,0.0162271509217324,0.832187530626005,0.149945562183088,0.0016397562691749
|
||||
2007,"NBL","CEI",2909.34626217913,0.000850322363912121,0.277571816801808,0.72157786083428,0
|
||||
2007,"NBL","CHN",1169.68265841665,0,0.604437226859891,0.35048544113896,0.0450773320011491
|
||||
2007,"NBL","EUR",1336.48548759046,0.456332670589603,0.399129698484048,0.0511075391287189,0.0934300917976305
|
||||
2007,"NBL","SIN",1333.1696324163,0,0.816853994648282,0.183146005351718,0
|
||||
2007,"NBS","AFRMY",1003.75236095351,0.0651568400256861,0.432675433252729,0.45687060301457,0.0452971237070148
|
||||
2007,"NBS","AMCS",882.432144143286,0.00911187527644252,0.337419271740532,0.296353005902383,0.357115847080643
|
||||
2007,"NBS","AMN",1131.15899236726,0.00511992804128777,0.228541472390303,0.0795260829088163,0.686812516659593
|
||||
2007,"NBS","ASIAUS",851.405008344783,0,0.147859459329913,0.223130936448259,0.629009604221828
|
||||
2007,"NBS","CEI",1433.62733081037,0.0544880297398635,0.134230879319672,0.561145906269914,0.25013518467055
|
||||
2007,"NBS","CHN",958.820786614708,0.00201519673403784,0.672948683552199,0.325036119713764,0
|
||||
2007,"NBS","EUR",893.382551697775,0.0922905364833857,0.259615999569008,0.167117411829207,0.480976052118399
|
||||
2007,"NBS","SIN",861.497596814846,0,0.936102212290443,0.0638977877095566,0
|
||||
2008,"NBI","AFRMY",1313.54306329234,0.0516667778398931,0.547061472206402,0.235465465904931,0.165806284048774
|
||||
2008,"NBI","AMCS",1114.49127999271,0,0.575627146383918,0.226964472357331,0.197408381258751
|
||||
2008,"NBI","AMN",1629.01673594902,0.00620271978000414,0.664077881059971,0.0841953246826147,0.24552407447741
|
||||
2008,"NBI","ASIAUS",994.809090652737,0.000987946700090422,0.313624979246642,0.340936070149694,0.344451003903573
|
||||
2008,"NBI","CEI",1854.36334350641,0.0876837604099257,0.255835891839049,0.534707079924424,0.121773267826601
|
||||
2008,"NBI","CHN",1100.30413978341,0.0134513573271539,0.569656014383211,0.376550959295235,0.0403416689944007
|
||||
2008,"NBI","EUR",1043.54112546693,0.129774833686005,0.375025039275457,0.132825311109915,0.362374815928622
|
||||
2008,"NBI","SIN",1143.8008347137,0.00430039812644039,0.469988951992067,0.296132058994066,0.229578590887426
|
||||
2008,"NBL","AFRMY",1886.58991315224,0.00165812898256478,0.605155510062856,0.381504039564207,0.0116823213903723
|
||||
2008,"NBL","AMCS",1700.82158728223,0.0205919955785117,0.368989507434037,0.459661339231761,0.15075715775569
|
||||
2008,"NBL","AMN",2260.87332905502,0.00726989606142494,0.817317864743296,0.022247548087343,0.153164691107936
|
||||
2008,"NBL","ASIAUS",986.330466920578,0,0.609492711499366,0.345369469935553,0.0451378185650805
|
||||
2008,"NBL","CEI",2840.84259443527,0.00813188489772224,0.322413368793628,0.66945474630865,0
|
||||
2008,"NBL","CHN",1206.92511347414,0,0.727578560849299,0.229183584378653,0.0432378547720479
|
||||
2008,"NBL","EUR",1343.21561823585,0.448323700509502,0.392585120773226,0.0642592864143586,0.094831892302913
|
||||
2008,"NBL","SIN",1272.72578138311,0,0.877971926433105,0.122028073566895,0
|
||||
2008,"NBS","AFRMY",1005.64467742537,0.0762956621718267,0.453014830546209,0.400966646404061,0.0697228608779029
|
||||
2008,"NBS","AMCS",905.27681512962,0,0.415212580447228,0.328715964602552,0.25607145495022
|
||||
2008,"NBS","AMN",1147.49513867048,0.00358616306061373,0.236369026638581,0.0389015292315772,0.721143281069228
|
||||
2008,"NBS","ASIAUS",861.490624905805,0,0.147563247669611,0.636912321783231,0.215524430547158
|
||||
2008,"NBS","CEI",1429.536431327,0.0419663013467043,0.169225514491592,0.538045111746165,0.250763072415538
|
||||
2008,"NBS","CHN",998.620779678658,0,0.645436327900737,0.354563672099262,0
|
||||
2008,"NBS","EUR",913.83679925806,0.0792022575619804,0.274295148563102,0.167430830173512,0.479071763701405
|
||||
2008,"NBS","SIN",842.763596584005,0,0.893037698780132,0.106962301219868,0
|
||||
2009,"NBI","AFRMY",1395.6180557787,0.0400077144293168,0.563645886024902,0.169354435288079,0.226991964257702
|
||||
2009,"NBI","AMCS",1134.15923696427,0,0.549034539417961,0.194657314544938,0.256308146037101
|
||||
2009,"NBI","AMN",1636.14445625878,0.00306727606950699,0.660795491599832,0.0820724467980983,0.254064785532563
|
||||
2009,"NBI","ASIAUS",1007.76366536438,0.00160487039716765,0.295328454196822,0.311082796977006,0.391983878429004
|
||||
2009,"NBI","CEI",1831.7440983442,0.11200343505728,0.308555459025905,0.462291361915983,0.117149744000833
|
||||
2009,"NBI","CHN",1089.80047248912,0.0130309603361804,0.528815563203569,0.418075170054611,0.0400783064056394
|
||||
2009,"NBI","EUR",1077.4793370156,0.101058057927184,0.377478238815063,0.116074573263744,0.40538912999401
|
||||
2009,"NBI","SIN",1198.76062615162,0.000547586998631722,0.49611905510428,0.21785225257422,0.285481105322868
|
||||
2009,"NBL","AFRMY",1737.999604481,0.0024356014389473,0.667761381959535,0.311703744755316,0.0180992718462014
|
||||
2009,"NBL","AMCS",1786.30343201811,0,0.533278662185533,0.466721337814467,0
|
||||
2009,"NBL","AMN",2256.85781416248,0.000293962531991564,0.822998417014824,0.0192566507782951,0.157450969674889
|
||||
2009,"NBL","ASIAUS",975.288797642984,0,0.49502523471116,0.402058268815426,0.102916496473414
|
||||
2009,"NBL","CEI",2700.04791763294,0.016265741755581,0.428579284500524,0.555154973743895,0
|
||||
2009,"NBL","CHN",1312.51344831019,0,0.779308474977018,0.187962133444308,0.0327293915786742
|
||||
2009,"NBL","EUR",1332.00055759885,0.271301387749559,0.539690398469936,0.0930775018965338,0.0959307118839708
|
||||
2009,"NBL","SIN",1236.87637021478,0,0.87543498003085,0.12456501996915,0
|
||||
2009,"NBS","AFRMY",1015.19782818828,0.067724090202741,0.512349877055858,0.366186892953176,0.0537391397882253
|
||||
2009,"NBS","AMCS",922.339471792621,0,0.463090324498557,0.222086500275168,0.314823175226275
|
||||
2009,"NBS","AMN",1150.42444420275,0.0023638485091488,0.220153125835233,0.0195277289886652,0.757955296666953
|
||||
2009,"NBS","ASIAUS",862.341398000546,0,0.147486620243995,0.662139958344752,0.190373421411253
|
||||
2009,"NBS","CEI",1508.18157566757,0.0244837351288422,0.121376641024494,0.603876667548701,0.250262956297963
|
||||
2009,"NBS","CHN",1006.60539874558,0,0.624859405991935,0.356141637396337,0.0189989566117288
|
||||
2009,"NBS","EUR",912.597681541622,0.0597885210246579,0.300312732832543,0.152752448564238,0.48714629757856
|
||||
2009,"NBS","SIN",941.281238519887,0,0.784759223988131,0.214067828282338,0.00117294772953116
|
||||
2010,"NBI","AFRMY",1406.43296479054,0.0449048043681947,0.535051098553474,0.167201994631819,0.252842102446512
|
||||
2010,"NBI","AMCS",1134.83544343317,0,0.545390107797884,0.170182358466128,0.284427533735988
|
||||
2010,"NBI","AMN",1662.42359864004,0.00496061847067828,0.728816672217152,0.0769249813229236,0.189297727989246
|
||||
2010,"NBI","ASIAUS",1034.44869437252,0.00269041190310057,0.275544806830541,0.293535988808319,0.428228792458039
|
||||
2010,"NBI","CEI",1885.2361359789,0.129814317275619,0.218206627002135,0.490502210632826,0.161476845089419
|
||||
2010,"NBI","CHN",1111.75193991616,0.0147557384148718,0.498562398808354,0.441190995021097,0.0454908677556766
|
||||
2010,"NBI","EUR",1126.43528438371,0.0831563310136458,0.350644657574872,0.0893713436290118,0.476827667782471
|
||||
2010,"NBI","SIN",1232.04688847674,0,0.392683619459063,0.267490147161415,0.339826233379522
|
||||
2010,"NBL","AFRMY",1748.96251256379,0.00515544186776357,0.638631320076687,0.290611875262191,0.0656013627933582
|
||||
2010,"NBL","AMCS",1817.25210460535,0,0.569252188602098,0.430747811397902,0
|
||||
2010,"NBL","AMN",2293.48169537631,2.51886061967639e-05,0.977088844136898,0.0183975145457622,0.00448845271114271
|
||||
2010,"NBL","ASIAUS",979.060328830998,0,0.395449156088465,0.504781657452823,0.0997691864587119
|
||||
2010,"NBL","CEI",2534.23593350032,0.0364027800357403,0.447826169790593,0.515771050173667,0
|
||||
2010,"NBL","CHN",1285.01096331124,0,0.796704067562598,0.175527073808743,0.027768858628659
|
||||
2010,"NBL","EUR",1343.19436919399,0.187165060525048,0.576851827824477,0.111657707931873,0.124325403718603
|
||||
2010,"NBL","SIN",1341.1531012091,0,0.885615939288984,0.114384060711016,0
|
||||
2010,"NBS","AFRMY",1046.98052361658,0.0634349615944977,0.49049751068997,0.394055886461295,0.0520116412542372
|
||||
2010,"NBS","AMCS",917.446287524619,0,0.41819603371846,0.293179215802351,0.288624750479189
|
||||
2010,"NBS","AMN",1177.1913099698,0.00159825385375051,0.265352606783627,0.0181746340843401,0.714874505278283
|
||||
2010,"NBS","ASIAUS",865.473638725562,0,0.126644446963058,0.650364976235155,0.222990576801786
|
||||
2010,"NBS","CEI",1488.98639545524,0.017966577903667,0.115817629679639,0.644508930958306,0.221706861458388
|
||||
2010,"NBS","CHN",1034.68559085897,0.00130413828754895,0.639212948063118,0.333046610538162,0.0264363031111704
|
||||
2010,"NBS","EUR",911.023573481606,0.048294969718647,0.33208352318159,0.138074424882399,0.481547082217364
|
||||
2010,"NBS","SIN",960.523408860522,0,0.751895625744033,0.246443344797988,0.00166102945797802
|
||||
2011,"NBI","AFRMY",1424.51737732262,0.0351514254809325,0.55005406796654,0.168007335828231,0.246787170724297
|
||||
2011,"NBI","AMCS",1126.18376406221,0.000734612055461518,0.530754016639782,0.158005648584995,0.310505722719762
|
||||
2011,"NBI","AMN",1686.05998924036,0.0104602579483087,0.707861702176985,0.0778357048666881,0.203842335008018
|
||||
2011,"NBI","ASIAUS",1073.91369191156,0.00244862169795106,0.255719918276249,0.274977502095967,0.466853957929832
|
||||
2011,"NBI","CEI",1979.93800292899,0.137775539787413,0.23628000819228,0.528596890430094,0.0973475615902134
|
||||
2011,"NBI","CHN",1142.44620461285,0.0159717505033257,0.467328533995739,0.464062066137272,0.0526376493636626
|
||||
2011,"NBI","EUR",1187.57140666468,0.0880275242535338,0.328394484056784,0.0786915934967489,0.504886398192933
|
||||
2011,"NBI","SIN",1199.41243741536,0,0.39764156675056,0.211455746673591,0.390902686575849
|
||||
2011,"NBL","AFRMY",1739.6007920479,0.0040777823489101,0.732139231352923,0.253912764965224,0.00987022133294287
|
||||
2011,"NBL","AMCS",1872.53328207283,0,0.623483034203575,0.376516965796425,0
|
||||
2011,"NBL","AMN",2293.50566362053,0.00120843065221345,0.975075343474976,0.0195908168289434,0.00412540904386668
|
||||
2011,"NBL","ASIAUS",957.459125162572,0,0.3981324625055,0.522811105254002,0.0790564322404976
|
||||
2011,"NBL","CEI",2444.1952488186,0,0.369093409930154,0.630906590069846,0
|
||||
2011,"NBL","CHN",1288.51124331818,0,0.83484995943391,0.139045400403866,0.0261046401622243
|
||||
2011,"NBL","EUR",1620.02575901213,0.179912556375419,0.590261535633465,0.109784206390408,0.120041701600709
|
||||
2011,"NBL","SIN",1304.46241338626,0,0.888606378314601,0.0730967082076506,0.0382969134777483
|
||||
2011,"NBS","AFRMY",1007.19458217251,0.00631786884779677,0.508465565886657,0.422925929720447,0.0622906355450989
|
||||
2011,"NBS","AMCS",909.144153886733,0,0.367998198470403,0.292855238783156,0.339146562746441
|
||||
2011,"NBS","AMN",1191.16246661423,0.00175140501956247,0.246031544748117,0.0171544198963336,0.735062630335987
|
||||
2011,"NBS","ASIAUS",850.486998137522,0,0.1275802765743,0.675169487129654,0.197250236296046
|
||||
2011,"NBS","CEI",1510.19828714821,0.0265783802986438,0.0992704251101314,0.683706308679481,0.190444885911744
|
||||
2011,"NBS","CHN",1064.18012416966,0.00241012155494277,0.682256953411435,0.302010076448268,0.0133228485853545
|
||||
2011,"NBS","EUR",916.174649366482,0.0492043912001525,0.34135739820329,0.141561218830127,0.467876991766431
|
||||
2011,"NBS","SIN",888.171167446202,0,0.808119113541168,0.191880886458832,0
|
||||
2012,"NBI","AFRMY",1444.17076370054,0.0189785876610238,0.554143289263506,0.183522738833286,0.243355384242185
|
||||
2012,"NBI","AMCS",1145.36576413174,0.00268832200359789,0.559544366738983,0.142446977651729,0.29532033360569
|
||||
2012,"NBI","AMN",1720.16908219547,0.0159141747121521,0.677841464959253,0.0741658832141036,0.232078477114491
|
||||
2012,"NBI","ASIAUS",1085.95332163139,0.00265478449436225,0.237959011044416,0.220868274460398,0.538517930000824
|
||||
2012,"NBI","CEI",2006.06580045659,0.100906253687168,0.227465624809027,0.550683402218782,0.120944719285024
|
||||
2012,"NBI","CHN",1158.59266441247,0.0141035864977569,0.467190187861662,0.460395447099901,0.0583107785406796
|
||||
2012,"NBI","EUR",1182.16457725069,0.0968643146367463,0.30424724675862,0.0665441602095555,0.532344278395079
|
||||
2012,"NBI","SIN",1201.81633691092,0,0.341647722981543,0.145802256860062,0.512550020158395
|
||||
2012,"NBL","AFRMY",1608.76691303504,0.000106582842890438,0.787118825467425,0.212774591689684,0
|
||||
2012,"NBL","AMCS",1942.59130370118,0,0.632402926629166,0.367597073370834,0
|
||||
2012,"NBL","AMN",2247.80377072045,3.03863954678626e-05,0.9736440960383,0.0208958045695639,0.00542971299666854
|
||||
2012,"NBL","ASIAUS",949.291561304045,0,0.395181929394214,0.537425360098971,0.0673927105068143
|
||||
2012,"NBL","CEI",2327.82679505328,0,0.413095631019926,0.56467590472136,0.0222284642587132
|
||||
2012,"NBL","CHN",1293.31687328402,0,0.747038789170222,0.226671463593282,0.0262897472364964
|
||||
2012,"NBL","EUR",1552.95001340446,0.181635309992354,0.620577092099919,0.0768750158845336,0.120912582023194
|
||||
2012,"NBL","SIN",1262.1772234702,0,0.837584118852054,0.0666601175823454,0.0957557635656006
|
||||
2012,"NBS","AFRMY",1013.10920950682,0.000633311637145888,0.499427469358913,0.443650012538879,0.0562892064650616
|
||||
2012,"NBS","AMCS",917.64807277123,0,0.386175386765132,0.252805660269242,0.361018952965626
|
||||
2012,"NBS","AMN",1195.15696859212,0.00214482525496823,0.22493055282814,0.0176967797191902,0.755227842197701
|
||||
2012,"NBS","ASIAUS",753.026296589535,0,0.156786384849645,0.576596768499782,0.266616846650573
|
||||
2012,"NBS","CEI",1516.90709842238,0.0265719133759967,0.109642600306312,0.662408447449585,0.201377038868107
|
||||
2012,"NBS","CHN",1061.66838989265,0.0138058800383036,0.636560586041516,0.330225971057657,0.0194075628625229
|
||||
2012,"NBS","EUR",905.104659916776,0.0377161840217475,0.358327151010614,0.139953298748226,0.464003366219412
|
||||
2012,"NBS","SIN",840.89862762842,0,0.730587480131812,0.161983596208537,0.107428923659652
|
||||
2013,"NBI","AFRMY",1471.14511593237,0.0114910114769816,0.534102122708138,0.210037914839755,0.244368950975125
|
||||
2013,"NBI","AMCS",1155.06924658021,0.00241568258444271,0.543635960449871,0.136476587379677,0.317471769586009
|
||||
2013,"NBI","AMN",1719.64958082902,0.014770684837483,0.658952866924056,0.0664519605527871,0.259824487685674
|
||||
2013,"NBI","ASIAUS",1082.61894926893,0.00175837639481114,0.224484318647278,0.195343168934285,0.578414136023626
|
||||
2013,"NBI","CEI",1991.25170431686,0.0973485691261835,0.226934421221344,0.537183129176646,0.138533880475826
|
||||
2013,"NBI","CHN",1169.0751683354,0.0203698386923458,0.45928577064934,0.461191035794118,0.0591533548641968
|
||||
2013,"NBI","EUR",1194.77446120391,0.107305939312392,0.269210063850985,0.0765411714068999,0.546942825429723
|
||||
2013,"NBI","SIN",1189.66358932435,0,0.291268321817293,0.15977615942733,0.548955518755377
|
||||
2013,"NBL","AFRMY",1504.04783756136,0,0.798346292064315,0.201653707935685,0
|
||||
2013,"NBL","AMCS",1937.30127164138,0,0.663773719539538,0.336226280460462,0
|
||||
2013,"NBL","AMN",2286.21304965688,0.000211067046226865,0.955676846791949,0.0337984109773968,0.0103136751844274
|
||||
2013,"NBL","ASIAUS",946.036297823998,0,0.395066191903056,0.540170996680067,0.0647628114168767
|
||||
2013,"NBL","CEI",2188.51497455621,0,0.442662200849931,0.469005392742746,0.0883324064073231
|
||||
2013,"NBL","CHN",1285.8545792696,0,0.734483212310615,0.240666093799954,0.0248506938894303
|
||||
2013,"NBL","EUR",1532.39136571723,0.172350487079934,0.661374814189746,0.0501506017335714,0.116124096996749
|
||||
2013,"NBL","SIN",1328.54294126624,0,0.808486086695855,0.0986397949140966,0.092874118390048
|
||||
2013,"NBS","AFRMY",1068.69288183705,0.0021694248470834,0.483859805069158,0.462530097211811,0.0514406728719476
|
||||
2013,"NBS","AMCS",917.755280269507,0,0.37228855641885,0.278466499562154,0.349244944018996
|
||||
2013,"NBS","AMN",1189.93496263923,0.000811672645167246,0.237060773990007,0.0203814199708714,0.741746133393954
|
||||
2013,"NBS","ASIAUS",731.034760384142,0,0.135053042650456,0.550677194860484,0.31426976248906
|
||||
2013,"NBS","CEI",1422.79351241332,0.0308013295978799,0.111389044987001,0.653678245758535,0.204131379656584
|
||||
2013,"NBS","CHN",1081.8995362773,0.00914409729134034,0.623942014472763,0.346278836081969,0.0206350521539276
|
||||
2013,"NBS","EUR",898.461746765098,0.0395477275546877,0.366782374910371,0.13387853598876,0.459791361546181
|
||||
2013,"NBS","SIN",869.297618614158,0,0.679750084933829,0.247506035959847,0.0727438791063237
|
||||
2014,"NBI","AFRMY",1436.40944663271,0.0064222364775975,0.542811703744438,0.189062457146068,0.261703602631896
|
||||
2014,"NBI","AMCS",1170.64107322118,0.0027493707722935,0.530203693254464,0.135254397599347,0.331792538373895
|
||||
2014,"NBI","AMN",1727.68329190005,0.0178531848305123,0.637693960736605,0.0603331839078765,0.284119670525006
|
||||
2014,"NBI","ASIAUS",1069.76640284961,0.00140903943577794,0.219788280556817,0.199617160387617,0.579185519619788
|
||||
2014,"NBI","CEI",1973.25252699485,0.0911675719890466,0.292216237619569,0.482044785968709,0.134571404422675
|
||||
2014,"NBI","CHN",1178.42907769604,0.0203759433254584,0.454603685681043,0.45088118606297,0.074139184930529
|
||||
2014,"NBI","EUR",1220.87111343989,0.118961663081345,0.2616043762218,0.0718391226060263,0.547594838090828
|
||||
2014,"NBI","SIN",1186.49577387613,0,0.306002165399449,0.153511675513394,0.540486159087157
|
||||
2014,"NBL","AFRMY",1460.85102879045,0,0.799996324768136,0.200003675231864,0
|
||||
2014,"NBL","AMCS",1834.96900554596,0,0.71100178980825,0.28899821019175,0
|
||||
2014,"NBL","AMN",2268.84482457768,6.72936818232921e-05,0.934895447344904,0.0504745212860161,0.014562737687257
|
||||
2014,"NBL","ASIAUS",1000.51839657752,0,0.426399899813303,0.481541523775617,0.09205857641108
|
||||
2014,"NBL","CEI",2053.73479130168,0.000544108512580672,0.416405433989741,0.469643769666972,0.113406687830707
|
||||
2014,"NBL","CHN",1291.02642060912,0,0.716545024776569,0.255027082980876,0.028427892242555
|
||||
2014,"NBL","EUR",1606.64593197914,0.223161174507677,0.623883423471845,0.045626760908785,0.107328641111694
|
||||
2014,"NBL","SIN",1348.85019466439,0,0.765337403190089,0.115260651990989,0.119401944818922
|
||||
2014,"NBS","AFRMY",1064.45898081177,0.00291491760697259,0.485878275640673,0.457701466135592,0.0535053406167621
|
||||
2014,"NBS","AMCS",928.090241708309,0,0.411885967404546,0.272978645563733,0.315135387031722
|
||||
2014,"NBS","AMN",1205.70501230508,2.45836912217714e-05,0.260890569176629,0.0177039188823548,0.721380928249794
|
||||
2014,"NBS","ASIAUS",753.068372303079,0,0.096516532958733,0.566967056952962,0.336516410088305
|
||||
2014,"NBS","CEI",1431.03331915115,0.068921517641212,0.0570462073460887,0.677622808512896,0.196409466499804
|
||||
2014,"NBS","CHN",1105.47632702017,0.00768325406242105,0.570544252275754,0.401294897685053,0.0204775959767723
|
||||
2014,"NBS","EUR",914.25425692296,0.0393740037806408,0.356954682548805,0.141471803256619,0.462199510413936
|
||||
2014,"NBS","SIN",870.669726393812,0,0.733632170133234,0.222198134187043,0.0441696956797234
|
||||
2015,"NBI","AFRMY",1428.53050234763,0.0207766178438452,0.492908544562016,0.196885283444095,0.289429554150043
|
||||
2015,"NBI","AMCS",1203.29309803984,0.0028179607064242,0.520867760933472,0.140938655203608,0.335375623156496
|
||||
2015,"NBI","AMN",1738.01477644474,0.0238092584310295,0.579818612000771,0.0581504707429435,0.338221658825256
|
||||
2015,"NBI","ASIAUS",1052.60453445109,0.0032172314743161,0.219776081816767,0.185703310311,0.591303376397917
|
||||
2015,"NBI","CEI",1899.158400117,0.0893669146996939,0.289209803218771,0.462718555092102,0.158704726989434
|
||||
2015,"NBI","CHN",1218.17124952301,0.00914828266010726,0.441899152517771,0.461115758075308,0.0878368067468136
|
||||
2015,"NBI","EUR",1207.32369074129,0.118256348177102,0.250103229431232,0.059944525531618,0.571695896860048
|
||||
2015,"NBI","SIN",1176.24230287256,0,0.307018807724042,0.150213486045044,0.542767706230914
|
||||
2015,"NBL","AFRMY",1494.84589917535,0.0112118503311329,0.779272623184427,0.20951552648444,0
|
||||
2015,"NBL","AMCS",1562.20669493107,0,0.6986386766052,0.275581685391551,0.0257796380032494
|
||||
2015,"NBL","AMN",2268.33992274839,9.04679809181425e-06,0.911300397608658,0.06437449238171,0.0243160632115405
|
||||
2015,"NBL","ASIAUS",993.747398844302,0,0.467001449276421,0.399133992157238,0.133864558566342
|
||||
2015,"NBL","CEI",2048.75127359522,0,0.437091066089999,0.440660700787902,0.122248233122099
|
||||
2015,"NBL","CHN",1315.70922515485,0,0.690747469164544,0.275857121788021,0.0333954090474352
|
||||
2015,"NBL","EUR",1587.82150152401,0.221710041191847,0.624500798518349,0.0379757193450513,0.115813440944753
|
||||
2015,"NBL","SIN",1425.34077085995,0,0.73022044751683,0.173733140503727,0.0960464119794426
|
||||
2015,"NBS","AFRMY",1081.31463618046,0,0.502376778765073,0.441361634783227,0.0562615864516999
|
||||
2015,"NBS","AMCS",920.124003016477,0,0.408484604595118,0.273477618923622,0.31803777648126
|
||||
2015,"NBS","AMN",1207.20179372532,0.000510259136209996,0.260185914010707,0.0170901915972747,0.722213635255808
|
||||
2015,"NBS","ASIAUS",746.022483799258,0,0.0177077500445511,0.698664388961756,0.283627860993693
|
||||
2015,"NBS","CEI",1343.95715299032,0.0635179365450916,0.0472716426192377,0.693401532252457,0.195808888583214
|
||||
2015,"NBS","CHN",1108.44286330452,0.0776463771665144,0.489628594799918,0.411511072706446,0.0212139553271215
|
||||
2015,"NBS","EUR",939.017335617323,0.0313696377066949,0.393312783030407,0.12695669793992,0.448360881322978
|
||||
2015,"NBS","SIN",870.348448252073,0,0.702505145710871,0.172709655017247,0.124785199271883
|
||||
2016,"NBI","AFRMY",1475.56331850991,0.0319754311921306,0.501860657126291,0.183281816589932,0.282882095091646
|
||||
2016,"NBI","AMCS",1268.42203606365,0.0023583603231158,0.563185394601305,0.0808479769156193,0.35360826815996
|
||||
2016,"NBI","AMN",1711.79670365248,0.02577849127476,0.549989380291664,0.0492655076190842,0.374966620814492
|
||||
2016,"NBI","ASIAUS",1070.93354324963,0.00516606331186542,0.197972500385649,0.184155947112969,0.612705489189516
|
||||
2016,"NBI","CEI",1862.14713610059,0.112573334673903,0.33565303689593,0.379862621299724,0.171911007130443
|
||||
2016,"NBI","CHN",1245.23338795978,0.0174303355553323,0.410030769403998,0.489632917891661,0.0829059771490086
|
||||
2016,"NBI","EUR",1232.35805700697,0.123501446929874,0.227869683386626,0.0588770681164627,0.589751801567038
|
||||
2016,"NBI","SIN",1193.57262201123,0,0.289360253571763,0.140917456810684,0.569722289617552
|
||||
2016,"NBL","AFRMY",1547.9837934542,0.00152621623563369,0.848747591920331,0.149726191844036,0
|
||||
2016,"NBL","AMCS",1399.28855792509,0,0.798176059610811,0.11870362890116,0.0831203114880292
|
||||
2016,"NBL","AMN",2205.14765666623,0.000527650985546271,0.873246410525674,0.0677079740707337,0.058517964418046
|
||||
2016,"NBL","ASIAUS",1020.687695705,0,0.465360160672609,0.398125297827866,0.136514541499525
|
||||
2016,"NBL","CEI",2105.59709994094,0.000951451629755652,0.391067283495658,0.46983310993992,0.138148154934667
|
||||
2016,"NBL","CHN",1320.79179131628,0,0.639442580076711,0.347677415343407,0.012880004579882
|
||||
2016,"NBL","EUR",1554.64543886728,0.229774823410553,0.582330931609537,0.0419130744654885,0.145981170514422
|
||||
2016,"NBL","SIN",1460.91539363769,0,0.691968822226738,0.242406130669295,0.065625047103968
|
||||
2016,"NBS","AFRMY",1043.2327331395,0.000102620028115559,0.634683193935788,0.323485113563352,0.0417290724727441
|
||||
2016,"NBS","AMCS",1013.31700243345,0,0.501353450401791,0.189225496465874,0.309421053132335
|
||||
2016,"NBS","AMN",1225.63550236901,0.00141736160621114,0.289719174320785,0.0152546624653066,0.693608801607697
|
||||
2016,"NBS","ASIAUS",904.726098221287,0,0,0.798881873808401,0.201118126191599
|
||||
2016,"NBS","CEI",1364.60825764312,0.0683115316622174,0.00476814895964074,0.762177897951658,0.164742421426484
|
||||
2016,"NBS","CHN",1103.26586893804,0.00266194495735963,0.542546304930504,0.416096118847209,0.0386956312649273
|
||||
2016,"NBS","EUR",948.655111540345,0.0320549107778452,0.405305244353362,0.122519499361926,0.440120345506867
|
||||
2016,"NBS","SIN",844.823445399405,0,0.721656446373448,0.216189948982058,0.0621536046444943
|
||||
2017,"NBI","AFRMY",1496.17994808487,0.050625567469187,0.474891027462492,0.177206079246246,0.297277325822076
|
||||
2017,"NBI","AMCS",1340.77054209775,0.00419097560687048,0.548911692692836,0.0695792033443306,0.377318128355963
|
||||
2017,"NBI","AMN",1689.09690632216,0.0253778746560111,0.517462892568269,0.0468534041592378,0.410305828616482
|
||||
2017,"NBI","ASIAUS",1090.60485991004,0.00302765116934079,0.182402439784683,0.204964818235332,0.609605090810644
|
||||
2017,"NBI","CEI",1829.02413533484,0.122318230623105,0.335686084091387,0.365400712322871,0.176594972962637
|
||||
2017,"NBI","CHN",1241.32330932168,0.024165109264865,0.383933267779363,0.50615740525501,0.085744217700762
|
||||
2017,"NBI","EUR",1241.31006489195,0.123124647157609,0.21410854053978,0.0595471173091829,0.603219694993428
|
||||
2017,"NBI","SIN",1183.69574724329,0,0.242874340420892,0.166468473106022,0.590657186473086
|
||||
2017,"NBL","AFRMY",1555.18905266997,2.73547557210255e-06,0.898899901855501,0.101097362668927,0
|
||||
2017,"NBL","AMCS",1376.27783117168,0,0.706932971944753,0.0926412724368285,0.200425755618419
|
||||
2017,"NBL","AMN",2175.50832123322,0.00239165454393695,0.841933196283437,0.0707387560852616,0.0849363930873646
|
||||
2017,"NBL","ASIAUS",1023.43566831925,0.000800258513939384,0.447394398349715,0.414625939796609,0.137179403339737
|
||||
2017,"NBL","CEI",2180.61964841118,0.00560280639904956,0.410272233726201,0.435728583453766,0.148396376420984
|
||||
2017,"NBL","CHN",1336.04580667692,0,0.635925974829241,0.337062109484366,0.0270119156863931
|
||||
2017,"NBL","EUR",1581.27109738214,0.209323748985351,0.535854354427379,0.0590779284422201,0.19574396814505
|
||||
2017,"NBL","SIN",1385.85005953748,0,0.694349060550439,0.305650939449561,0
|
||||
2017,"NBS","AFRMY",1022.05829700248,0.000723919974063216,0.635702564857298,0.350461018515382,0.0131124966532562
|
||||
2017,"NBS","AMCS",944.049052843015,0.0115925636002999,0.489370351314295,0.208584407086816,0.29045267799859
|
||||
2017,"NBS","AMN",1209.28347755063,0.000260843711046378,0.301428731430571,0.0144515929155083,0.683858831942874
|
||||
2017,"NBS","ASIAUS",956.073385582094,0.00147769148061019,0,0.911135321358445,0.0873869871609443
|
||||
2017,"NBS","CEI",1339.60911044415,0.063829015346455,0,0.780514118854084,0.155656865799461
|
||||
2017,"NBS","CHN",1096.72095621103,0,0.495554779702135,0.461495254014362,0.0429499662835029
|
||||
2017,"NBS","EUR",962.176517730686,0.0334553700671753,0.404898911096585,0.113828314931266,0.447817403904974
|
||||
2017,"NBS","SIN",863.781882075268,0,0.778308570936917,0.221691429063083,0
|
||||
2018,"NBI","AFRMY",1549.53915639867,0.0663002134707632,0.449777947103253,0.172174303179534,0.31174753624645
|
||||
2018,"NBI","AMCS",1380.65995729124,0.00403791875092572,0.526953049921794,0.0644728642739742,0.404536167053306
|
||||
2018,"NBI","AMN",1681.37583151833,0.0243223958910481,0.492169303732863,0.0633986007298211,0.420109699646268
|
||||
2018,"NBI","ASIAUS",1102.63856456315,0.00113091958105788,0.161121175570221,0.216596903036671,0.621151001812049
|
||||
2018,"NBI","CEI",1838.97361970384,0.115750602261845,0.314124925171631,0.348725437026174,0.22139903554035
|
||||
2018,"NBI","CHN",1256.69089158885,0.0265087757771971,0.357904805904709,0.519799449653997,0.0957869686640962
|
||||
2018,"NBI","EUR",1267.88271268358,0.116072242775617,0.212225378892056,0.0546706111589407,0.617031767173387
|
||||
2018,"NBI","SIN",1192.02157391478,0,0.211194036778494,0.157406085488894,0.631399877732612
|
||||
2018,"NBL","AFRMY",1592.64169651201,0.0513972464483145,0.840367241579313,0.108235511972372,0
|
||||
2018,"NBL","AMCS",1340.46644169513,0,0.66054449860418,0.0732958526968107,0.266159648699009
|
||||
2018,"NBL","AMN",2183.23533315202,0.0017923290021823,0.823287104617353,0.0822383579702717,0.0926822084101931
|
||||
2018,"NBL","ASIAUS",1066.82729853101,0.000643403947630106,0.370208117432283,0.365332096984865,0.263816381635222
|
||||
2018,"NBL","CEI",2267.56279416812,0.0454603660327829,0.435043783737768,0.333858182214455,0.185637668014994
|
||||
2018,"NBL","CHN",1344.92957290438,0,0.611070288854834,0.344016836752947,0.0449128743922186
|
||||
2018,"NBL","EUR",1588.3174282926,0.18395427233798,0.527489434361314,0.0664491093108706,0.222107183989835
|
||||
2018,"NBL","SIN",1429.44110780293,0,0.727694458670705,0.272305541329295,0
|
||||
2018,"NBS","AFRMY",1020.71685747481,0.00692659851352168,0.648337238023146,0.339492629211395,0.00524353425193789
|
||||
2018,"NBS","AMCS",902.398858952222,0.00767826812173117,0.505309253487862,0.23799696604927,0.249015512341137
|
||||
2018,"NBS","AMN",1206.8536506272,0.000238267606518097,0.314470771627468,0.0207780560305486,0.664512904735466
|
||||
2018,"NBS","ASIAUS",911.614877157633,0,0.0279970581658189,0.88880103749632,0.0832019043378608
|
||||
2018,"NBS","CEI",1357.86587539802,0.0698812672610752,0,0.777867892979874,0.152250839759051
|
||||
2018,"NBS","CHN",1107.47879126715,0,0.492149708807731,0.486728197049722,0.0211220941425467
|
||||
2018,"NBS","EUR",970.85688005974,0.0343455036778129,0.41137643315386,0.120211212327105,0.434066850841222
|
||||
2018,"NBS","SIN",894.381759680565,0,0.855936877250193,0.144063122749808,0
|
||||
2019,"NBI","AFRMY",1492.2978111226,0.102649918856002,0.42680578751604,0.165936644969946,0.304607648658012
|
||||
2019,"NBI","AMCS",1354.78961410027,0.00337035610822788,0.470985961521779,0.0723116182708271,0.453332064099166
|
||||
2019,"NBI","AMN",1643.57578285843,0.0202378025679542,0.460651074647505,0.073868504257858,0.445242618526683
|
||||
2019,"NBI","ASIAUS",1134.24920064386,0.00108039379605935,0.135728909953501,0.224294044582557,0.638896651667882
|
||||
2019,"NBI","CEI",1813.05675723459,0.0896501184478546,0.309554188276996,0.323469457108335,0.277326236166814
|
||||
2019,"NBI","CHN",1271.78426912405,0.0248194088558872,0.37000220364988,0.509070546712698,0.0961078407815349
|
||||
2019,"NBI","EUR",1273.63382759207,0.107488921776733,0.217887890655653,0.0521920020091823,0.622431185558432
|
||||
2019,"NBI","SIN",1200.46166820718,0,0.0833726872437807,0.137415457334685,0.779211855421535
|
||||
2019,"NBL","AFRMY",1662.94590864627,0,0.842797719838686,0.124673635166073,0.0325286449952406
|
||||
2019,"NBL","AMCS",1235.12258886029,0,0.617596872702616,0.0477170231320532,0.334686104165331
|
||||
2019,"NBL","AMN",2148.05011529566,0.00761783833122888,0.802867121032521,0.098422138050584,0.0910929025856656
|
||||
2019,"NBL","ASIAUS",1144.60519451324,0,0.371336633430582,0.30078615964931,0.327877206920108
|
||||
2019,"NBL","CEI",2368.60926137144,0.0485422579137862,0.38017253853315,0.347589217922245,0.223695985630819
|
||||
2019,"NBL","CHN",1358.19649018895,0,0.645339552021407,0.310728603013136,0.0439318449654567
|
||||
2019,"NBL","EUR",1574.6849710538,0.150220914130494,0.551073518822584,0.0679955670446141,0.230710000002308
|
||||
2019,"NBL","SIN",1481.69541448869,0,0.694738182374634,0.220867504903163,0.0843943127222033
|
||||
2019,"NBS","AFRMY",1011.81873527508,0.0184179892903638,0.671794608859629,0.306147393397505,0.00364000845250183
|
||||
2019,"NBS","AMCS",918.820675598173,0,0.494093062827807,0.249364558198859,0.256542378973333
|
||||
2019,"NBS","AMN",1175.52459983989,0.00114786266440984,0.328186295175556,0.033747039008561,0.636918803151473
|
||||
2019,"NBS","ASIAUS",920.61931795843,0.000417544188915301,0.0591318440835829,0.860276886661068,0.0801737250664342
|
||||
2019,"NBS","CEI",1389.23206478903,0.0789510027434591,0,0.773127726131804,0.147921271124737
|
||||
2019,"NBS","CHN",1114.70988332997,0,0.455976409034769,0.507095166014595,0.0369284249506358
|
||||
2019,"NBS","EUR",924.972539854576,0.0298135908183802,0.427911639420309,0.0787710251981705,0.46350374456314
|
||||
2019,"NBS","SIN",846.981601236901,0,0.868729299147553,0.131270700852447,0
|
||||
2020,"NBI","AFRMY",1426.25559102747,0.106848432206768,0.373383866727221,0.179912610347422,0.339855090718589
|
||||
2020,"NBI","AMCS",1306.44021925163,8.79269989742728e-05,0.365998869543555,0.0756483357196987,0.558264867737772
|
||||
2020,"NBI","AMN",1607.55973458849,0.0208137079668084,0.404852448191474,0.0565542123531072,0.51777963148861
|
||||
2020,"NBI","ASIAUS",985.261328996938,0.00163127066622938,0.136321884296025,0.299521925183548,0.562524919854198
|
||||
2020,"NBI","CEI",1723.1238461104,0.0127266489771093,0.223472267070623,0.300583795139888,0.46321728881238
|
||||
2020,"NBI","CHN",1206.22329925023,0.0172890851938207,0.357717276826464,0.522238373766762,0.102755264212954
|
||||
2020,"NBI","EUR",1222.70165889077,0.108458721201359,0.222676388841383,0.0504372290592626,0.618427660897995
|
||||
2020,"NBI","SIN",1184.95872053627,0,0.0440019446739351,0.11774326558819,0.838254789737875
|
||||
2020,"NBL","AFRMY",1482.67185044828,0.000351746209561387,0.717985880116287,0.135904425201487,0.145757948472665
|
||||
2020,"NBL","AMCS",1276.0795553978,0,0.443361917669241,0.0259725624881629,0.530665519842596
|
||||
2020,"NBL","AMN",2049.02352240078,0.0118712033288142,0.772425051283519,0.120716504974669,0.0949872404129974
|
||||
2020,"NBL","ASIAUS",899.538384968181,0,0.333638225914751,0.361960367141017,0.304401406944233
|
||||
2020,"NBL","CEI",2148.90533738309,0.043822817946538,0.230190185738548,0.39626685985108,0.329720136463834
|
||||
2020,"NBL","CHN",1285.39430942117,0,0.660866332697191,0.325647988412346,0.0134856788904631
|
||||
2020,"NBL","EUR",1546.74022682415,0.0507529300296071,0.575646353351569,0.0477569957210068,0.325843720897817
|
||||
2020,"NBL","SIN",1355.02510680111,0,0.577504652539615,0.150945452269486,0.2715498951909
|
||||
2020,"NBS","AFRMY",935.255247110228,0.0199587016443027,0.666753484356739,0.313287813998959,0
|
||||
2020,"NBS","AMCS",902.162552916117,0,0.43782454377697,0.190070588729677,0.372104867493353
|
||||
2020,"NBS","AMN",1156.20881319321,7.74534625298871e-06,0.350232699958734,0.0256591015097475,0.624100453185266
|
||||
2020,"NBS","ASIAUS",825.335039912212,0,0.0562683451124768,0.908293639144726,0.0354380157427969
|
||||
2020,"NBS","CEI",1465.78858435796,0.106767581999736,0,0.807299576017807,0.0859328419824571
|
||||
2020,"NBS","CHN",1053.99157417391,0,0.406363801780305,0.517106893193666,0.0765293050260291
|
||||
2020,"NBS","EUR",881.94342382916,0.0369041996788106,0.497107719421705,0.0788573689914357,0.387130711908049
|
||||
2020,"NBS","SIN",842.928772674598,0,0.865726665502825,0.134273334497175,0
|
||||
2021,"NBI","AFRMY",1414.88415520516,0.0929941722087423,0.354796579859671,0.18322125464213,0.368987993289457
|
||||
2021,"NBI","AMCS",1224.77547703967,0,0.388189694550877,0.0706241741032749,0.541186131345848
|
||||
2021,"NBI","AMN",1633.84097570581,0.00803674285865951,0.399727129589615,0.0547820246268294,0.537454102924896
|
||||
2021,"NBI","ASIAUS",865.593828023355,0.00359077348539316,0.130402188765361,0.34989126700319,0.516115770746056
|
||||
2021,"NBI","CEI",1715.86698426312,0.0179473941125914,0.211494434523127,0.289180322252333,0.481377849111949
|
||||
2021,"NBI","CHN",1196.64231448929,0.0206478966337006,0.349782838674386,0.548240975791555,0.0813282889003586
|
||||
2021,"NBI","EUR",1266.71603824698,0.111288729111577,0.231708502222202,0.0535219844845168,0.603480784181704
|
||||
2021,"NBI","SIN",1168.04223850818,0,0.0436943252376604,0.129403678975315,0.826901995787025
|
||||
2021,"NBL","AFRMY",1451.47763124676,0.00138482532298442,0.559998858896625,0.230461545327438,0.208154770452952
|
||||
2021,"NBL","AMCS",1426.70968494685,0,0.543301799558644,0.039099762091056,0.4175984383503
|
||||
2021,"NBL","AMN",1999.16939747984,0.00485818950836252,0.773041430291726,0.112015224939166,0.110085155260746
|
||||
2021,"NBL","ASIAUS",790.951652462864,0.00108618084020231,0.357002562371498,0.369735580100665,0.272175676687635
|
||||
2021,"NBL","CEI",1972.37515036087,0.0804303432954618,0.233857944276774,0.334786089398675,0.350925623029089
|
||||
2021,"NBL","CHN",1303.74363457761,0,0.70398999555753,0.264253203455216,0.0317568009872544
|
||||
2021,"NBL","EUR",1561.97812541815,0.0611927746145145,0.545851335831636,0.0804972976250072,0.312458591928842
|
||||
2021,"NBL","SIN",1148.27920897887,0,0.441275678262291,0.113636648112906,0.445087673624803
|
||||
2021,"NBS","AFRMY",856.192787702671,0.00287971393527096,0.550560978765089,0.44655930729964,0
|
||||
2021,"NBS","AMCS",846.498287878333,0,0.458158478997787,0.146770944356393,0.39507057664582
|
||||
2021,"NBS","AMN",1217.73804339635,0.000479895470863492,0.374176598876324,0.0178203623503828,0.607523143302429
|
||||
2021,"NBS","ASIAUS",785.74421816884,0.00558084459838678,0.0558453523727677,0.878736357466402,0.059837445562444
|
||||
2021,"NBS","CEI",1543.1603219169,0.102608666329066,0,0.857339513894178,0.040051819776756
|
||||
2021,"NBS","CHN",1030.93692671912,0,0.512164125718881,0.417343964011558,0.0704919102695618
|
||||
2021,"NBS","EUR",936.127412326756,0.0430143716604469,0.489084103918227,0.0888290264826769,0.379072497938649
|
||||
2021,"NBS","SIN",835.916471702758,0,0.861926075951236,0.138073924048764,0
|
||||
2022,"NBI","AFRMY",1508.02793033993,0.0928239959096221,0.351348406690954,0.191833274953088,0.363994322446336
|
||||
2022,"NBI","AMCS",1253.60867656486,0.00155004878751767,0.414901184784682,0.079077187632123,0.504471578795677
|
||||
2022,"NBI","AMN",1603.84210069102,0.0233107034422129,0.399195566635614,0.0479893257361896,0.529504404185984
|
||||
2022,"NBI","ASIAUS",940.912955028639,0.0205676350844789,0.107599193269436,0.313939305146886,0.5578938664992
|
||||
2022,"NBI","CEI",1745.27844502451,0.00833915599876527,0.18921846489797,0.369509956297374,0.43293242280589
|
||||
2022,"NBI","CHN",1254.89850091034,0.0177947939078773,0.347283014203228,0.534906235380386,0.100015956508509
|
||||
2022,"NBI","EUR",1323.69990096757,0.0954795385367953,0.230089479151201,0.0519695510635642,0.622461431248439
|
||||
2022,"NBI","SIN",1243.99970231986,0,0.0592884712242457,0.137917967629151,0.802793561146603
|
||||
2022,"NBL","AFRMY",1676.60937263212,0.00281294148250758,0.534873022503775,0.143288502499541,0.319025533514176
|
||||
2022,"NBL","AMCS",1371.35837291087,8.20082972860506e-05,0.562808807640177,0.0380541338860963,0.399055050176441
|
||||
2022,"NBL","AMN",2054.24043496376,0.0183217001977137,0.760127960018196,0.125959978586627,0.0955903611974634
|
||||
2022,"NBL","ASIAUS",915.295801823025,0.00305899241747255,0.335999700744929,0.294693633557543,0.366247673280056
|
||||
2022,"NBL","CEI",2064.37589694466,0.0935933793784874,0.163971142624411,0.468512765498142,0.27392271249896
|
||||
2022,"NBL","CHN",1342.72615190688,0,0.721492954093771,0.242822087815385,0.0356849580908439
|
||||
2022,"NBL","EUR",1596.1270790658,0.0484788557290657,0.504223696059201,0.0606250058690828,0.38667244234265
|
||||
2022,"NBL","SIN",1214.63579642949,0,0.26197395277676,0.0981350053960581,0.639891041827182
|
||||
2022,"NBS","AFRMY",911.09338630692,0.00351769806258936,0.522374745960725,0.474107555976685,0
|
||||
2022,"NBS","AMCS",884.066743879667,0,0.575797186859166,0.145484987711224,0.27871782542961
|
||||
2022,"NBS","AMN",1158.47522743255,0.00374269998456825,0.396360775933847,0.0277995351662053,0.572096988915379
|
||||
2022,"NBS","ASIAUS",772.956060589141,0.00303386975872505,0.0530916415636241,0.828525538962808,0.115348949714842
|
||||
2022,"NBS","CEI",1589.78033325245,0.111400419373622,0,0.878810147303776,0.00978943332260171
|
||||
2022,"NBS","CHN",1085.29108387524,0,0.508404168500958,0.427733047993193,0.0638627835058491
|
||||
2022,"NBS","EUR",935.501417022643,0.0497429984106455,0.363821183684826,0.0810580895792733,0.505377728325256
|
||||
2022,"NBS","SIN",951.068123227409,0,0.920785060368375,0.0792149396316247,0
|
||||
2023,"NBI","AFRMY",1556.80725540075,0.108705729988229,0.357505917314596,0.189058468969886,0.344729883727289
|
||||
2023,"NBI","AMCS",1241.80819937441,0.0289649721176391,0.41397210082774,0.108470008003653,0.448592919050968
|
||||
2023,"NBI","AMN",1564.0567787468,0.0361240344242864,0.382880330125865,0.0376911413380658,0.543304494111783
|
||||
2023,"NBI","ASIAUS",1060.77102245777,0.0434597096601014,0.0996079028519961,0.263029878790639,0.593902508697264
|
||||
2023,"NBI","CEI",1826.74187551772,0.0161406537098493,0.181865607729458,0.451368477975653,0.35062526058504
|
||||
2023,"NBI","CHN",1283.76020047764,0.0216716274453564,0.351912323428936,0.531434528290253,0.0949815208354548
|
||||
2023,"NBI","EUR",1317.06560783486,0.107020107150277,0.228792064406469,0.0584522649085934,0.60573556353466
|
||||
2023,"NBI","SIN",1270.1874747079,0.00126325286810533,0.111853127830007,0.164994711115334,0.721888908186554
|
||||
2023,"NBL","AFRMY",1800.06458633877,0.0133801106637488,0.471638834214414,0.123224660013843,0.391756395107995
|
||||
2023,"NBL","AMCS",1471.55344394088,0.000357247706404697,0.514015409920159,0.0522471049169055,0.43338023745653
|
||||
2023,"NBL","AMN",2039.1261458019,0.0196181402759156,0.742988516683088,0.135165871294862,0.102227471746134
|
||||
2023,"NBL","ASIAUS",1166.41083664536,0.000821334636392441,0.337293163392121,0.258604343917637,0.403281158053849
|
||||
2023,"NBL","CEI",2075.76750196921,0.0793214969402916,0.155488785662149,0.497588719221434,0.267600998176125
|
||||
2023,"NBL","CHN",1406.90240627733,0,0.664221019460596,0.269499070261477,0.066279910277927
|
||||
2023,"NBL","EUR",1602.96286865254,0.0533530583431029,0.496518833895994,0.0684374957893583,0.381690611971545
|
||||
2023,"NBL","SIN",1207.05467159359,0.0144262416321987,0.100990835158525,0.0681571485731707,0.816425774636106
|
||||
2023,"NBS","AFRMY",948.355103599086,0.037007108754676,0.529137242613583,0.430945058144917,0.00291059048682419
|
||||
2023,"NBS","AMCS",816.632932817499,0.0403905851219203,0.543118516223362,0.128171678582223,0.288319220072495
|
||||
2023,"NBS","AMN",1161.48843882446,0.00544690110891447,0.397181519896337,0.0498032125672708,0.547568366427478
|
||||
2023,"NBS","ASIAUS",811.927507390515,0.00682706917018166,0.10350473181388,0.707316448782261,0.182351750233678
|
||||
2023,"NBS","CEI",1588.80512656602,0.0813996913130536,0,0.897450495806196,0.0211498128807507
|
||||
2023,"NBS","CHN",1146.74694167458,0,0.509160638468378,0.441672931770871,0.0491664297607512
|
||||
2023,"NBS","EUR",920.308789611741,0.0388582555858133,0.365848709273328,0.103845958454748,0.49144707668611
|
||||
2023,"NBS","SIN",878.739435661258,0,0.775025109785864,0.224974890214136,0
|
||||
2024,"NBI","AFRMY",1579.48948598743,0.106926908280134,0.360693015920477,0.189859687386662,0.342520388412728
|
||||
2024,"NBI","AMCS",1250.24403101588,0.0414159705614002,0.42689506003781,0.115876538651537,0.415812430749253
|
||||
2024,"NBI","AMN",1572.67501442444,0.0290876582832362,0.409449876448866,0.0362810938851846,0.525181371382713
|
||||
2024,"NBI","ASIAUS",1102.07739212829,0.0416770563805044,0.115549021444569,0.228050703639522,0.614723218535404
|
||||
2024,"NBI","CEI",1866.83639957087,0.0189098698362885,0.222097297249686,0.458414916894742,0.300577916019284
|
||||
2024,"NBI","CHN",1312.64618285207,0.0249347736528903,0.351847357160196,0.518839729058085,0.104378140128828
|
||||
2024,"NBI","EUR",1336.27890873247,0.117060447025523,0.221219359164198,0.0519930911985262,0.609727102611752
|
||||
2024,"NBI","SIN",1299.65489058005,0.00403312317372397,0.131350613355621,0.246597986209749,0.618018277260906
|
||||
2024,"NBL","AFRMY",1813.16505531261,0.0272968123923097,0.504343445099653,0.119369542405468,0.348990200102569
|
||||
2024,"NBL","AMCS",1565.72427872587,4.64099990715566e-05,0.556344879125429,0.0858793866552136,0.357729324220286
|
||||
2024,"NBL","AMN",2016.70289863956,0.0190064682639889,0.729746049435679,0.133516084090153,0.117731398210178
|
||||
2024,"NBL","ASIAUS",1247.63101805903,0,0.319612032106869,0.237939997376194,0.442447970516937
|
||||
2024,"NBL","CEI",2129.65465842982,0.0947184787619097,0.199795510901423,0.391833862691685,0.313652147644982
|
||||
2024,"NBL","CHN",1439.87320871325,0,0.618327064319791,0.282286201130497,0.0993867345497127
|
||||
2024,"NBL","EUR",1582.37900032054,0.0495865933513587,0.524625418338014,0.0561882336431036,0.369599754667523
|
||||
2024,"NBL","SIN",1227.30839530482,0.0160878020325343,0.0980672061606923,0.0544868664544921,0.831358125352281
|
||||
2024,"NBS","AFRMY",974.930683434649,0.0109738988294409,0.472691828022828,0.516334273147731,0
|
||||
2024,"NBS","AMCS",832.631287060762,0.0199417808883177,0.523428155173699,0.116230273314328,0.340399790623655
|
||||
2024,"NBS","AMN",1186.58632652511,0.00575161404328503,0.383000903445285,0.0665835446228188,0.544663937888611
|
||||
2024,"NBS","ASIAUS",812.959679368,0.00582811965677951,0.0902622565260704,0.756055039507645,0.147854584309506
|
||||
2024,"NBS","CEI",1639.1455889578,0.123707299379575,0,0.861039993258657,0.0152527073617684
|
||||
2024,"NBS","CHN",1167.96541144834,0,0.499883693867715,0.446786177395235,0.0533301287370499
|
||||
2024,"NBS","EUR",931.780060361597,0.0332866124235474,0.389731268275063,0.127345695393135,0.449636423908255
|
||||
2024,"NBS","SIN",1120.54617865243,0.0205221386317005,0.540682750051481,0.438795111316819,0
|
||||
|
BIN
M2/Clustering In Practice/data/Guppy 2.jpeg
Normal file
|
After Width: | Height: | Size: 842 KiB |
BIN
M2/Clustering In Practice/data/PampasGrass.jpg
Normal file
|
After Width: | Height: | Size: 255 KiB |
2008
M2/Clustering In Practice/data/chiffres.csv
Normal file
BIN
M2/Clustering In Practice/data/image_compressed.jpg
Normal file
|
After Width: | Height: | Size: 49 KiB |
1514
M2/Data Visualisation/Exemple Projet/Application projet.Rmd
Normal file
6980
M2/Data Visualisation/Exemple Projet/Application-projet.html
Normal file
BIN
M2/Data Visualisation/Exemple Projet/Notice projet.pdf
Normal file
304
M2/Data Visualisation/Exemple Projet/heart.csv
Normal file
@@ -0,0 +1,304 @@
|
||||
age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
|
||||
63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
|
||||
37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
|
||||
41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
|
||||
56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
|
||||
57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
|
||||
57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
|
||||
56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
|
||||
44,1,1,120,263,0,1,173,0,0,2,0,3,1
|
||||
52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
|
||||
57,1,2,150,168,0,1,174,0,1.6,2,0,2,1
|
||||
54,1,0,140,239,0,1,160,0,1.2,2,0,2,1
|
||||
48,0,2,130,275,0,1,139,0,0.2,2,0,2,1
|
||||
49,1,1,130,266,0,1,171,0,0.6,2,0,2,1
|
||||
64,1,3,110,211,0,0,144,1,1.8,1,0,2,1
|
||||
58,0,3,150,283,1,0,162,0,1,2,0,2,1
|
||||
50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
|
||||
58,0,2,120,340,0,1,172,0,0,2,0,2,1
|
||||
66,0,3,150,226,0,1,114,0,2.6,0,0,2,1
|
||||
43,1,0,150,247,0,1,171,0,1.5,2,0,2,1
|
||||
69,0,3,140,239,0,1,151,0,1.8,2,2,2,1
|
||||
59,1,0,135,234,0,1,161,0,0.5,1,0,3,1
|
||||
44,1,2,130,233,0,1,179,1,0.4,2,0,2,1
|
||||
42,1,0,140,226,0,1,178,0,0,2,0,2,1
|
||||
61,1,2,150,243,1,1,137,1,1,1,0,2,1
|
||||
40,1,3,140,199,0,1,178,1,1.4,2,0,3,1
|
||||
71,0,1,160,302,0,1,162,0,0.4,2,2,2,1
|
||||
59,1,2,150,212,1,1,157,0,1.6,2,0,2,1
|
||||
51,1,2,110,175,0,1,123,0,0.6,2,0,2,1
|
||||
65,0,2,140,417,1,0,157,0,0.8,2,1,2,1
|
||||
53,1,2,130,197,1,0,152,0,1.2,0,0,2,1
|
||||
41,0,1,105,198,0,1,168,0,0,2,1,2,1
|
||||
65,1,0,120,177,0,1,140,0,0.4,2,0,3,1
|
||||
44,1,1,130,219,0,0,188,0,0,2,0,2,1
|
||||
54,1,2,125,273,0,0,152,0,0.5,0,1,2,1
|
||||
51,1,3,125,213,0,0,125,1,1.4,2,1,2,1
|
||||
46,0,2,142,177,0,0,160,1,1.4,0,0,2,1
|
||||
54,0,2,135,304,1,1,170,0,0,2,0,2,1
|
||||
54,1,2,150,232,0,0,165,0,1.6,2,0,3,1
|
||||
65,0,2,155,269,0,1,148,0,0.8,2,0,2,1
|
||||
65,0,2,160,360,0,0,151,0,0.8,2,0,2,1
|
||||
51,0,2,140,308,0,0,142,0,1.5,2,1,2,1
|
||||
48,1,1,130,245,0,0,180,0,0.2,1,0,2,1
|
||||
45,1,0,104,208,0,0,148,1,3,1,0,2,1
|
||||
53,0,0,130,264,0,0,143,0,0.4,1,0,2,1
|
||||
39,1,2,140,321,0,0,182,0,0,2,0,2,1
|
||||
52,1,1,120,325,0,1,172,0,0.2,2,0,2,1
|
||||
44,1,2,140,235,0,0,180,0,0,2,0,2,1
|
||||
47,1,2,138,257,0,0,156,0,0,2,0,2,1
|
||||
53,0,2,128,216,0,0,115,0,0,2,0,0,1
|
||||
53,0,0,138,234,0,0,160,0,0,2,0,2,1
|
||||
51,0,2,130,256,0,0,149,0,0.5,2,0,2,1
|
||||
66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
|
||||
62,1,2,130,231,0,1,146,0,1.8,1,3,3,1
|
||||
44,0,2,108,141,0,1,175,0,0.6,1,0,2,1
|
||||
63,0,2,135,252,0,0,172,0,0,2,0,2,1
|
||||
52,1,1,134,201,0,1,158,0,0.8,2,1,2,1
|
||||
48,1,0,122,222,0,0,186,0,0,2,0,2,1
|
||||
45,1,0,115,260,0,0,185,0,0,2,0,2,1
|
||||
34,1,3,118,182,0,0,174,0,0,2,0,2,1
|
||||
57,0,0,128,303,0,0,159,0,0,2,1,2,1
|
||||
71,0,2,110,265,1,0,130,0,0,2,1,2,1
|
||||
54,1,1,108,309,0,1,156,0,0,2,0,3,1
|
||||
52,1,3,118,186,0,0,190,0,0,1,0,1,1
|
||||
41,1,1,135,203,0,1,132,0,0,1,0,1,1
|
||||
58,1,2,140,211,1,0,165,0,0,2,0,2,1
|
||||
35,0,0,138,183,0,1,182,0,1.4,2,0,2,1
|
||||
51,1,2,100,222,0,1,143,1,1.2,1,0,2,1
|
||||
45,0,1,130,234,0,0,175,0,0.6,1,0,2,1
|
||||
44,1,1,120,220,0,1,170,0,0,2,0,2,1
|
||||
62,0,0,124,209,0,1,163,0,0,2,0,2,1
|
||||
54,1,2,120,258,0,0,147,0,0.4,1,0,3,1
|
||||
51,1,2,94,227,0,1,154,1,0,2,1,3,1
|
||||
29,1,1,130,204,0,0,202,0,0,2,0,2,1
|
||||
51,1,0,140,261,0,0,186,1,0,2,0,2,1
|
||||
43,0,2,122,213,0,1,165,0,0.2,1,0,2,1
|
||||
55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
|
||||
51,1,2,125,245,1,0,166,0,2.4,1,0,2,1
|
||||
59,1,1,140,221,0,1,164,1,0,2,0,2,1
|
||||
52,1,1,128,205,1,1,184,0,0,2,0,2,1
|
||||
58,1,2,105,240,0,0,154,1,0.6,1,0,3,1
|
||||
41,1,2,112,250,0,1,179,0,0,2,0,2,1
|
||||
45,1,1,128,308,0,0,170,0,0,2,0,2,1
|
||||
60,0,2,102,318,0,1,160,0,0,2,1,2,1
|
||||
52,1,3,152,298,1,1,178,0,1.2,1,0,3,1
|
||||
42,0,0,102,265,0,0,122,0,0.6,1,0,2,1
|
||||
67,0,2,115,564,0,0,160,0,1.6,1,0,3,1
|
||||
68,1,2,118,277,0,1,151,0,1,2,1,3,1
|
||||
46,1,1,101,197,1,1,156,0,0,2,0,3,1
|
||||
54,0,2,110,214,0,1,158,0,1.6,1,0,2,1
|
||||
58,0,0,100,248,0,0,122,0,1,1,0,2,1
|
||||
48,1,2,124,255,1,1,175,0,0,2,2,2,1
|
||||
57,1,0,132,207,0,1,168,1,0,2,0,3,1
|
||||
52,1,2,138,223,0,1,169,0,0,2,4,2,1
|
||||
54,0,1,132,288,1,0,159,1,0,2,1,2,1
|
||||
45,0,1,112,160,0,1,138,0,0,1,0,2,1
|
||||
53,1,0,142,226,0,0,111,1,0,2,0,3,1
|
||||
62,0,0,140,394,0,0,157,0,1.2,1,0,2,1
|
||||
52,1,0,108,233,1,1,147,0,0.1,2,3,3,1
|
||||
43,1,2,130,315,0,1,162,0,1.9,2,1,2,1
|
||||
53,1,2,130,246,1,0,173,0,0,2,3,2,1
|
||||
42,1,3,148,244,0,0,178,0,0.8,2,2,2,1
|
||||
59,1,3,178,270,0,0,145,0,4.2,0,0,3,1
|
||||
63,0,1,140,195,0,1,179,0,0,2,2,2,1
|
||||
42,1,2,120,240,1,1,194,0,0.8,0,0,3,1
|
||||
50,1,2,129,196,0,1,163,0,0,2,0,2,1
|
||||
68,0,2,120,211,0,0,115,0,1.5,1,0,2,1
|
||||
69,1,3,160,234,1,0,131,0,0.1,1,1,2,1
|
||||
45,0,0,138,236,0,0,152,1,0.2,1,0,2,1
|
||||
50,0,1,120,244,0,1,162,0,1.1,2,0,2,1
|
||||
50,0,0,110,254,0,0,159,0,0,2,0,2,1
|
||||
64,0,0,180,325,0,1,154,1,0,2,0,2,1
|
||||
57,1,2,150,126,1,1,173,0,0.2,2,1,3,1
|
||||
64,0,2,140,313,0,1,133,0,0.2,2,0,3,1
|
||||
43,1,0,110,211,0,1,161,0,0,2,0,3,1
|
||||
55,1,1,130,262,0,1,155,0,0,2,0,2,1
|
||||
37,0,2,120,215,0,1,170,0,0,2,0,2,1
|
||||
41,1,2,130,214,0,0,168,0,2,1,0,2,1
|
||||
56,1,3,120,193,0,0,162,0,1.9,1,0,3,1
|
||||
46,0,1,105,204,0,1,172,0,0,2,0,2,1
|
||||
46,0,0,138,243,0,0,152,1,0,1,0,2,1
|
||||
64,0,0,130,303,0,1,122,0,2,1,2,2,1
|
||||
59,1,0,138,271,0,0,182,0,0,2,0,2,1
|
||||
41,0,2,112,268,0,0,172,1,0,2,0,2,1
|
||||
54,0,2,108,267,0,0,167,0,0,2,0,2,1
|
||||
39,0,2,94,199,0,1,179,0,0,2,0,2,1
|
||||
34,0,1,118,210,0,1,192,0,0.7,2,0,2,1
|
||||
47,1,0,112,204,0,1,143,0,0.1,2,0,2,1
|
||||
67,0,2,152,277,0,1,172,0,0,2,1,2,1
|
||||
52,0,2,136,196,0,0,169,0,0.1,1,0,2,1
|
||||
74,0,1,120,269,0,0,121,1,0.2,2,1,2,1
|
||||
54,0,2,160,201,0,1,163,0,0,2,1,2,1
|
||||
49,0,1,134,271,0,1,162,0,0,1,0,2,1
|
||||
42,1,1,120,295,0,1,162,0,0,2,0,2,1
|
||||
41,1,1,110,235,0,1,153,0,0,2,0,2,1
|
||||
41,0,1,126,306,0,1,163,0,0,2,0,2,1
|
||||
49,0,0,130,269,0,1,163,0,0,2,0,2,1
|
||||
60,0,2,120,178,1,1,96,0,0,2,0,2,1
|
||||
62,1,1,128,208,1,0,140,0,0,2,0,2,1
|
||||
57,1,0,110,201,0,1,126,1,1.5,1,0,1,1
|
||||
64,1,0,128,263,0,1,105,1,0.2,1,1,3,1
|
||||
51,0,2,120,295,0,0,157,0,0.6,2,0,2,1
|
||||
43,1,0,115,303,0,1,181,0,1.2,1,0,2,1
|
||||
42,0,2,120,209,0,1,173,0,0,1,0,2,1
|
||||
67,0,0,106,223,0,1,142,0,0.3,2,2,2,1
|
||||
76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
|
||||
70,1,1,156,245,0,0,143,0,0,2,0,2,1
|
||||
44,0,2,118,242,0,1,149,0,0.3,1,1,2,1
|
||||
60,0,3,150,240,0,1,171,0,0.9,2,0,2,1
|
||||
44,1,2,120,226,0,1,169,0,0,2,0,2,1
|
||||
42,1,2,130,180,0,1,150,0,0,2,0,2,1
|
||||
66,1,0,160,228,0,0,138,0,2.3,2,0,1,1
|
||||
71,0,0,112,149,0,1,125,0,1.6,1,0,2,1
|
||||
64,1,3,170,227,0,0,155,0,0.6,1,0,3,1
|
||||
66,0,2,146,278,0,0,152,0,0,1,1,2,1
|
||||
39,0,2,138,220,0,1,152,0,0,1,0,2,1
|
||||
58,0,0,130,197,0,1,131,0,0.6,1,0,2,1
|
||||
47,1,2,130,253,0,1,179,0,0,2,0,2,1
|
||||
35,1,1,122,192,0,1,174,0,0,2,0,2,1
|
||||
58,1,1,125,220,0,1,144,0,0.4,1,4,3,1
|
||||
56,1,1,130,221,0,0,163,0,0,2,0,3,1
|
||||
56,1,1,120,240,0,1,169,0,0,0,0,2,1
|
||||
55,0,1,132,342,0,1,166,0,1.2,2,0,2,1
|
||||
41,1,1,120,157,0,1,182,0,0,2,0,2,1
|
||||
38,1,2,138,175,0,1,173,0,0,2,4,2,1
|
||||
38,1,2,138,175,0,1,173,0,0,2,4,2,1
|
||||
67,1,0,160,286,0,0,108,1,1.5,1,3,2,0
|
||||
67,1,0,120,229,0,0,129,1,2.6,1,2,3,0
|
||||
62,0,0,140,268,0,0,160,0,3.6,0,2,2,0
|
||||
63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
|
||||
53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
|
||||
56,1,2,130,256,1,0,142,1,0.6,1,1,1,0
|
||||
48,1,1,110,229,0,1,168,0,1,0,0,3,0
|
||||
58,1,1,120,284,0,0,160,0,1.8,1,0,2,0
|
||||
58,1,2,132,224,0,0,173,0,3.2,2,2,3,0
|
||||
60,1,0,130,206,0,0,132,1,2.4,1,2,3,0
|
||||
40,1,0,110,167,0,0,114,1,2,1,0,3,0
|
||||
60,1,0,117,230,1,1,160,1,1.4,2,2,3,0
|
||||
64,1,2,140,335,0,1,158,0,0,2,0,2,0
|
||||
43,1,0,120,177,0,0,120,1,2.5,1,0,3,0
|
||||
57,1,0,150,276,0,0,112,1,0.6,1,1,1,0
|
||||
55,1,0,132,353,0,1,132,1,1.2,1,1,3,0
|
||||
65,0,0,150,225,0,0,114,0,1,1,3,3,0
|
||||
61,0,0,130,330,0,0,169,0,0,2,0,2,0
|
||||
58,1,2,112,230,0,0,165,0,2.5,1,1,3,0
|
||||
50,1,0,150,243,0,0,128,0,2.6,1,0,3,0
|
||||
44,1,0,112,290,0,0,153,0,0,2,1,2,0
|
||||
60,1,0,130,253,0,1,144,1,1.4,2,1,3,0
|
||||
54,1,0,124,266,0,0,109,1,2.2,1,1,3,0
|
||||
50,1,2,140,233,0,1,163,0,0.6,1,1,3,0
|
||||
41,1,0,110,172,0,0,158,0,0,2,0,3,0
|
||||
51,0,0,130,305,0,1,142,1,1.2,1,0,3,0
|
||||
58,1,0,128,216,0,0,131,1,2.2,1,3,3,0
|
||||
54,1,0,120,188,0,1,113,0,1.4,1,1,3,0
|
||||
60,1,0,145,282,0,0,142,1,2.8,1,2,3,0
|
||||
60,1,2,140,185,0,0,155,0,3,1,0,2,0
|
||||
59,1,0,170,326,0,0,140,1,3.4,0,0,3,0
|
||||
46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
|
||||
67,1,0,125,254,1,1,163,0,0.2,1,2,3,0
|
||||
62,1,0,120,267,0,1,99,1,1.8,1,2,3,0
|
||||
65,1,0,110,248,0,0,158,0,0.6,2,2,1,0
|
||||
44,1,0,110,197,0,0,177,0,0,2,1,2,0
|
||||
60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
|
||||
58,1,0,150,270,0,0,111,1,0.8,2,0,3,0
|
||||
68,1,2,180,274,1,0,150,1,1.6,1,0,3,0
|
||||
62,0,0,160,164,0,0,145,0,6.2,0,3,3,0
|
||||
52,1,0,128,255,0,1,161,1,0,2,1,3,0
|
||||
59,1,0,110,239,0,0,142,1,1.2,1,1,3,0
|
||||
60,0,0,150,258,0,0,157,0,2.6,1,2,3,0
|
||||
49,1,2,120,188,0,1,139,0,2,1,3,3,0
|
||||
59,1,0,140,177,0,1,162,1,0,2,1,3,0
|
||||
57,1,2,128,229,0,0,150,0,0.4,1,1,3,0
|
||||
61,1,0,120,260,0,1,140,1,3.6,1,1,3,0
|
||||
39,1,0,118,219,0,1,140,0,1.2,1,0,3,0
|
||||
61,0,0,145,307,0,0,146,1,1,1,0,3,0
|
||||
56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
|
||||
43,0,0,132,341,1,0,136,1,3,1,0,3,0
|
||||
62,0,2,130,263,0,1,97,0,1.2,1,1,3,0
|
||||
63,1,0,130,330,1,0,132,1,1.8,2,3,3,0
|
||||
65,1,0,135,254,0,0,127,0,2.8,1,1,3,0
|
||||
48,1,0,130,256,1,0,150,1,0,2,2,3,0
|
||||
63,0,0,150,407,0,0,154,0,4,1,3,3,0
|
||||
55,1,0,140,217,0,1,111,1,5.6,0,0,3,0
|
||||
65,1,3,138,282,1,0,174,0,1.4,1,1,2,0
|
||||
56,0,0,200,288,1,0,133,1,4,0,2,3,0
|
||||
54,1,0,110,239,0,1,126,1,2.8,1,1,3,0
|
||||
70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
|
||||
62,1,1,120,281,0,0,103,0,1.4,1,1,3,0
|
||||
35,1,0,120,198,0,1,130,1,1.6,1,0,3,0
|
||||
59,1,3,170,288,0,0,159,0,0.2,1,0,3,0
|
||||
64,1,2,125,309,0,1,131,1,1.8,1,0,3,0
|
||||
47,1,2,108,243,0,1,152,0,0,2,0,2,0
|
||||
57,1,0,165,289,1,0,124,0,1,1,3,3,0
|
||||
55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
|
||||
64,1,0,120,246,0,0,96,1,2.2,0,1,2,0
|
||||
70,1,0,130,322,0,0,109,0,2.4,1,3,2,0
|
||||
51,1,0,140,299,0,1,173,1,1.6,2,0,3,0
|
||||
58,1,0,125,300,0,0,171,0,0,2,2,3,0
|
||||
60,1,0,140,293,0,0,170,0,1.2,1,2,3,0
|
||||
77,1,0,125,304,0,0,162,1,0,2,3,2,0
|
||||
35,1,0,126,282,0,0,156,1,0,2,0,3,0
|
||||
70,1,2,160,269,0,1,112,1,2.9,1,1,3,0
|
||||
59,0,0,174,249,0,1,143,1,0,1,0,2,0
|
||||
64,1,0,145,212,0,0,132,0,2,1,2,1,0
|
||||
57,1,0,152,274,0,1,88,1,1.2,1,1,3,0
|
||||
56,1,0,132,184,0,0,105,1,2.1,1,1,1,0
|
||||
48,1,0,124,274,0,0,166,0,0.5,1,0,3,0
|
||||
56,0,0,134,409,0,0,150,1,1.9,1,2,3,0
|
||||
66,1,1,160,246,0,1,120,1,0,1,3,1,0
|
||||
54,1,1,192,283,0,0,195,0,0,2,1,3,0
|
||||
69,1,2,140,254,0,0,146,0,2,1,3,3,0
|
||||
51,1,0,140,298,0,1,122,1,4.2,1,3,3,0
|
||||
43,1,0,132,247,1,0,143,1,0.1,1,4,3,0
|
||||
62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
|
||||
67,1,0,100,299,0,0,125,1,0.9,1,2,2,0
|
||||
59,1,3,160,273,0,0,125,0,0,2,0,2,0
|
||||
45,1,0,142,309,0,0,147,1,0,1,3,3,0
|
||||
58,1,0,128,259,0,0,130,1,3,1,2,3,0
|
||||
50,1,0,144,200,0,0,126,1,0.9,1,0,3,0
|
||||
62,0,0,150,244,0,1,154,1,1.4,1,0,2,0
|
||||
38,1,3,120,231,0,1,182,1,3.8,1,0,3,0
|
||||
66,0,0,178,228,1,1,165,1,1,1,2,3,0
|
||||
52,1,0,112,230,0,1,160,0,0,2,1,2,0
|
||||
53,1,0,123,282,0,1,95,1,2,1,2,3,0
|
||||
63,0,0,108,269,0,1,169,1,1.8,1,2,2,0
|
||||
54,1,0,110,206,0,0,108,1,0,1,1,2,0
|
||||
66,1,0,112,212,0,0,132,1,0.1,2,1,2,0
|
||||
55,0,0,180,327,0,2,117,1,3.4,1,0,2,0
|
||||
49,1,2,118,149,0,0,126,0,0.8,2,3,2,0
|
||||
54,1,0,122,286,0,0,116,1,3.2,1,2,2,0
|
||||
56,1,0,130,283,1,0,103,1,1.6,0,0,3,0
|
||||
46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
|
||||
61,1,3,134,234,0,1,145,0,2.6,1,2,2,0
|
||||
67,1,0,120,237,0,1,71,0,1,1,0,2,0
|
||||
58,1,0,100,234,0,1,156,0,0.1,2,1,3,0
|
||||
47,1,0,110,275,0,0,118,1,1,1,1,2,0
|
||||
52,1,0,125,212,0,1,168,0,1,2,2,3,0
|
||||
58,1,0,146,218,0,1,105,0,2,1,1,3,0
|
||||
57,1,1,124,261,0,1,141,0,0.3,2,0,3,0
|
||||
58,0,1,136,319,1,0,152,0,0,2,2,2,0
|
||||
61,1,0,138,166,0,0,125,1,3.6,1,1,2,0
|
||||
42,1,0,136,315,0,1,125,1,1.8,1,0,1,0
|
||||
52,1,0,128,204,1,1,156,1,1,1,0,0,0
|
||||
59,1,2,126,218,1,1,134,0,2.2,1,1,1,0
|
||||
40,1,0,152,223,0,1,181,0,0,2,0,3,0
|
||||
61,1,0,140,207,0,0,138,1,1.9,2,1,3,0
|
||||
46,1,0,140,311,0,1,120,1,1.8,1,2,3,0
|
||||
59,1,3,134,204,0,1,162,0,0.8,2,2,2,0
|
||||
57,1,1,154,232,0,0,164,0,0,2,1,2,0
|
||||
57,1,0,110,335,0,1,143,1,3,1,1,3,0
|
||||
55,0,0,128,205,0,2,130,1,2,1,1,3,0
|
||||
61,1,0,148,203,0,1,161,0,0,2,1,3,0
|
||||
58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
|
||||
58,0,0,170,225,1,0,146,1,2.8,1,2,1,0
|
||||
67,1,2,152,212,0,0,150,0,0.8,1,0,3,0
|
||||
44,1,0,120,169,0,1,144,1,2.8,0,0,1,0
|
||||
63,1,0,140,187,0,0,144,1,4,2,2,3,0
|
||||
63,0,0,124,197,0,1,136,1,0,1,0,2,0
|
||||
59,1,0,164,176,1,0,90,0,1,1,2,1,0
|
||||
57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
|
||||
45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
|
||||
68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
|
||||
57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
|
||||
57,0,1,130,236,0,0,174,0,0,1,1,2,0
|
||||
|
1
M2/Data Visualisation/Project/.Rprofile
Normal file
@@ -0,0 +1 @@
|
||||
source("renv/activate.R")
|
||||
801
M2/Data Visualisation/Project/NoticeTechnique.Rmd
Normal file
@@ -0,0 +1,801 @@
|
||||
---
|
||||
output:
|
||||
pdf_document:
|
||||
number_sections: true
|
||||
toc: false
|
||||
toc_depth: 2
|
||||
fig_caption: true
|
||||
highlight: tango
|
||||
latex_engine: xelatex
|
||||
geometry: "left=2cm,right=2cm,top=2cm,bottom=2cm"
|
||||
header-includes:
|
||||
- \usepackage{titling}
|
||||
- \usepackage{graphicx}
|
||||
- \usepackage{fancyhdr}
|
||||
- \pagestyle{fancy}
|
||||
- \fancyhead[L]{Notice Technique - Tuberculose}
|
||||
- \fancyhead[R]{Arthur DANJOU}
|
||||
- \fancyfoot[C]{\thepage}
|
||||
---
|
||||
|
||||
\begin{titlepage}
|
||||
\begin{center}
|
||||
\vspace*{1cm}
|
||||
|
||||
% --- En-tête Université ---
|
||||
{\Large \textsc{Université Paris-Dauphine -- PSL}} \\
|
||||
\vspace{0.2cm}
|
||||
{\large Master 280 -- Ingénierie Statistique et Financière}
|
||||
|
||||
\vspace{1.5cm}
|
||||
|
||||
% --- Bloc Titre ---
|
||||
\hrulefill
|
||||
\vspace{0.4cm}
|
||||
|
||||
{\bfseries \Huge \uppercase{Monitorage et Segmentation \\[0.3cm] de la Tuberculose (OMS)}}
|
||||
|
||||
\vspace{0.4cm}
|
||||
\hrulefill
|
||||
|
||||
\vspace{1.5cm}
|
||||
|
||||
% --- AJOUT : Problématique ---
|
||||
% Utilisation d'une minipage pour contrôler la largeur du texte (80% de la page)
|
||||
\begin{minipage}{0.85\textwidth}
|
||||
\centering
|
||||
\Large \textit{«~Au-delà des agrégats nationaux : comment l'analyse multivariée permet-elle de révéler une typologie opérationnelle des risques sanitaires mondiaux face à la tuberculose ?~»}
|
||||
\end{minipage}
|
||||
|
||||
\vspace{2cm}
|
||||
|
||||
% --- Auteur et Enseignant ---
|
||||
{\Large \textsc{Arthur DANJOU}} \\
|
||||
\vspace{1.5cm}
|
||||
|
||||
{\large Enseignant :} \\ [0.2cm]
|
||||
{\large Quentin GUIBERT}
|
||||
|
||||
\vfill % Pousse le logo vers le bas de page automatiquement
|
||||
|
||||
% --- Logo ---
|
||||
\includegraphics[height=25mm]{logo_dauphine.jpg}
|
||||
\vspace{0.5cm}
|
||||
|
||||
\hrulefill
|
||||
\vspace{0.2cm}
|
||||
|
||||
% --- Pied de page ---
|
||||
{\textsc{Data Visualisation \\ Année Universitaire 2025-2026}}
|
||||
|
||||
\end{center}
|
||||
\end{titlepage}
|
||||
|
||||
\newpage
|
||||
\tableofcontents
|
||||
\newpage
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(
|
||||
echo = FALSE,
|
||||
warning = FALSE,
|
||||
message = FALSE,
|
||||
fig.align = "center",
|
||||
out.width = "75%"
|
||||
)
|
||||
|
||||
library(tidyverse)
|
||||
library(sf)
|
||||
library(rnaturalearth)
|
||||
library(rnaturalearthdata)
|
||||
library(knitr)
|
||||
library(kableExtra)
|
||||
library(gridExtra)
|
||||
library(moments)
|
||||
library(factoextra)
|
||||
```
|
||||
|
||||
> * **Application déployée :** [https://go.arthurdanjou.fr/datavis-app](https://go.arthurdanjou.fr/datavis-app)
|
||||
> * **Code Source de(GitHub) :** [https://go.arthurdanjou.fr/datavis-code](https://go.arthurdanjou.fr/datavis-code)
|
||||
|
||||
# Introduction
|
||||
|
||||
## Contexte et enjeux sanitaires
|
||||
|
||||
Avec 1,6 million de décès annuels et plus de 10 millions de nouveaux cas estimés en 2022, la tuberculose (TB) demeure la deuxième maladie infectieuse la plus meurtrière au monde après le COVID-19 (OMS, 2025). Pourtant, derrière ces chiffres globaux se cache une épidémie profondément inégalitaire. Alors que certains pays rapportent une incidence maîtrisée inférieure à 10 cas pour 100 000 habitants, d'autres font face à des taux critiques dépassant les 500 cas, révélant des fractures sanitaires majeures entre les nations.
|
||||
|
||||
Pour piloter la réponse mondiale, l'Organisation Mondiale de la Santé produit le *Global Tuberculosis Report*, une base de données exhaustive comptant plus de 200 pays et une quarantaine d'indicateurs. Cependant, la richesse même de ces données pose un défi d'analyse : face à la multitude de variables (incidence, notification, mortalité, co-infection), les tableaux statistiques traditionnels échouent à offrir une vision synthétique et opérationnelle. Ils ne permettent ni d'identifier rapidement les profils à risque, ni de visualiser les dynamiques temporelles complexes.
|
||||
|
||||
## Problématique : Au-delà des agrégats nationaux : comment l'analyse multivariée permet-elle de révéler une typologie opérationnelle des risques sanitaires mondiaux face à la tuberculose ?
|
||||
|
||||
Ce projet déploie une chaîne de traitement Data Science complète reposant sur trois piliers. Premièrement, une rationalisation de la donnée par sélection de variables et analyse exploratoire (EDA) pour isoler les signaux pertinents. Deuxièmement, une segmentation intelligente (Clustering K-Means) pour identifier des profils de risque homogènes au-delà des simples zones géographiques. Enfin, une opérationnalisation interactive via une application R Shiny, offrant aux décideurs une interface dynamique pour visualiser les tendances 2000-2024.
|
||||
|
||||
## Périmètre et Structure
|
||||
|
||||
L'étude se concentre sur les indicateurs épidémiologiques "durs" pour garantir la robustesse du modèle, les facteurs exogènes (PIB, dépenses) étant considérés comme contextuels.
|
||||
|
||||
La suite de cette notice détaille la méthodologie : la préparation des données (Section 2) et la modélisation mathématique (Section 3) précèdent l'analyse des profils identifiés (Section 4). L'architecture de l'application R Shiny est décrite en Section 5, suivie de l'exploitation des résultats et du benchmarking (Section 6). Le document se clôt sur les perspectives d'évolution (Section 7) et le cadre d'intégrité académique (Section 8).
|
||||
|
||||
# Analyse Exploratoire des Données
|
||||
|
||||
## Source et structure des données
|
||||
|
||||
### Origine et portée des données
|
||||
|
||||
Le socle empirique repose sur les données du *Global Tuberculosis Report* 2024 de l'OMS, référence internationale couvrant 25 ans (2000-2024) pour 215 territoires. Le fichier brut de 50 variables s'articule autour de trois dimensions complémentaires : **épidémiologique** (morbidité, mortalité, prise en charge), **démographique** (structure de population nécessaire à la standardisation des taux) et **géopolitique** (métadonnées spatiales et codes ISO-3) dédiées à l'analyse spatiale.
|
||||
|
||||
### Convention de nommage et sémantique
|
||||
|
||||
L'analyse requiert la maîtrise d'une nomenclature rigoureuse distinguant les **Cas notifiés** (préfixe `c_`, données brutes administratives) des **Estimations modélisées** (préfixe `e_`), par lesquelles l'OMS corrige les biais de sous-déclaration et intègre les incertitudes. Pour cette étude, nous privilégierons exclusivement ces variables estimées (`e_`) : ce choix méthodologique permet de neutraliser l'hétérogénéité des performances administratives locales afin de garantir une comparabilité internationale stricte des dynamiques épidémiques.
|
||||
|
||||
### Qualité des données et limites
|
||||
|
||||
Bien qu'offrant une profondeur spatio-temporelle unique appuyée par une méthodologie standardisée, ce jeu de données présente des hétérogénéités inhérentes à la surveillance mondiale. Les biais de mesure restent prégnants pour les pays à faibles revenus ou en conflit, où les estimations reposent sur l'extrapolation statistique plutôt que sur un comptage exhaustif, sans compter le caractère provisoire des données récentes (2023-2024). Ces limites intrinsèques justifient l'adoption d'une approche méthodologique prudente, privilégiant l'exclusion des variables incertaines et le rejet de l'imputation pour les observations incomplètes.
|
||||
|
||||
### Importation et aperçu initial
|
||||
|
||||
```{r}
|
||||
data_raw <- read.csv("data/TB_burden_countries_2025-12-09.csv")
|
||||
```
|
||||
|
||||
Le jeu de données importé contient $5347$ observations et $50$ variables. Le tableau ci-dessous présente les dix premières lignes du jeu de données, illustrant la structure longitudinale pour le premier pays par ordre alphabétique (Afghanistan) au début de la période d'étude (de 2000 à 2009).
|
||||
|
||||
```{r}
|
||||
data_raw |>
|
||||
select(year, country, g_whoregion, e_inc_100k, e_mort_exc_tbhiv_100k) |>
|
||||
head(10) |>
|
||||
kable(
|
||||
col.names = c(
|
||||
"Année",
|
||||
"Pays",
|
||||
"Région",
|
||||
"Incidence (/100k)",
|
||||
"Mortalité (/100k)"
|
||||
),
|
||||
caption = "Aperçu des premières lignes du jeu de données brut",
|
||||
booktabs = TRUE
|
||||
) |>
|
||||
kable_styling(latex_options = c("striped", "hold_position"))
|
||||
```
|
||||
|
||||
## Sélection de variables
|
||||
|
||||
La qualité d'une segmentation non-supervisée étant tributaire de la pertinence des entrées, l'injection brute des 50 variables initiales a été écartée pour prévenir deux écueils méthodologiques. D'une part, le **fléau de la dimension** (*Curse of Dimensionality*) qui tend à uniformiser les distances euclidiennes et flouter les clusters et d'autre part, le **biais de redondance**, où la colinéarité des variables risque de surpondérer artificiellement un même phénomène. Nous avons donc déployé une stratégie de réduction de dimension en deux temps : un filtrage structurel (approche par entonnoir) consolidé par un arbitrage statistique des corrélations.
|
||||
|
||||
### Approche par entonnoir : élimination des métadonnées, des bornes d'incertitude et des valeurs absolues
|
||||
|
||||
Une stratégie de réduction de dimension en quatre étapes successives a été appliquée pour isoler les variables pertinentes. Dans un premier temps, le **nettoyage structurel** et la simplification ont permis d'écarter les métadonnées techniques (ex: `iso_numeric`) ainsi que les bornes d'incertitude (`_lo`, `_hi`), jugées non pertinentes pour le calcul de distances euclidiennes ou redondantes avec l'estimation centrale. Ensuite, l'étape de **standardisation** a exclu les valeurs absolues (`_num`) afin de neutraliser tout biais démographique et permettre la comparaison directe entre pays de tailles hétérogènes. Enfin, un **filtrage de la colinéarité** a supprimé les indicateurs redondants (corrélation > 0,8), tels que les notifications brutes, pour éviter de biaiser la pondération des dimensions dans l'algorithme de clustering.
|
||||
|
||||
### Arbitrage méthodologique : traitement de la colinéarité (Incidence vs Notifications) et de la redondance (Mortalité vs Mortalité VIH)
|
||||
|
||||
À l'issue du filtrage structurel, il subsiste plusieurs candidats potentiels pour mesurer la charge épidémique. Pour éviter la redondance (colinéarité), nous analysons la matrice de corrélation de Pearson entre ces candidats.
|
||||
|
||||
L'objectif est de conserver les variables les plus représentatives tout en maximisant l'orthogonalité (l'indépendance) des informations fournies au modèle. La figure ci-dessous visualise la matrice de corrélation de Pearson entre les quatre variables candidates : l'incidence (estimée et notifiée) et la mortalité (avec et hors VIH).
|
||||
|
||||
```{r}
|
||||
vars_candidates <- data_raw |>
|
||||
select(
|
||||
"Incidence (Estimée)" = e_inc_100k,
|
||||
"Incidence (Notifiée)" = c_newinc_100k,
|
||||
"Mortalité (Hors VIH)" = e_mort_exc_tbhiv_100k,
|
||||
"Mortalité (Avec VIH)" = e_mort_tbhiv_100k
|
||||
)
|
||||
|
||||
cor_mat <- cor(vars_candidates, use = "pairwise.complete.obs")
|
||||
```
|
||||
|
||||
```{r}
|
||||
cor_df <- as.data.frame(cor_mat) |>
|
||||
tibble::rownames_to_column(var = "Var1") |>
|
||||
pivot_longer(-Var1, names_to = "Var2", values_to = "r") |>
|
||||
mutate(
|
||||
Var1 = factor(Var1, levels = unique(Var1)),
|
||||
Var2 = factor(Var2, levels = rev(unique(Var1)))
|
||||
)
|
||||
|
||||
ggplot(cor_df, aes(x = Var1, y = Var2, fill = r)) +
|
||||
geom_tile(color = "white") +
|
||||
geom_text(aes(label = round(r, 2)), size = 3) +
|
||||
scale_fill_gradient2(
|
||||
low = "#313695",
|
||||
mid = "white",
|
||||
high = "#a50026",
|
||||
midpoint = 0,
|
||||
limits = c(-1, 1),
|
||||
name = "r"
|
||||
) +
|
||||
coord_fixed() +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 45, hjust = 1),
|
||||
axis.title = element_blank(),
|
||||
panel.grid = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
#### Analyse et décisions de modélisation :
|
||||
|
||||
L'analyse de la matrice de corrélation a imposé deux arbitrages majeurs. Premièrement, l'**Incidence Estimée** (`e_inc_100k`) a été préférée aux cas notifiés. En effet, ces derniers souffrent d'un biais administratif : un faible taux de notification peut refléter un manque de médecins plutôt qu'une absence de malades, alors que l'estimation de l'OMS corrige ces sous-diagnostics pour refléter la charge réelle.
|
||||
|
||||
Deuxièmement, nous avons retenu la **Mortalité hors VIH** (`e_mort_exc_tbhiv_100k`) malgré sa redondance avec la mortalité globale. Inclure la mortalité liée au VIH aurait risqué de biaiser la segmentation en isolant un "cluster SIDA" (spécifique à l'Afrique Australe), ce qui aurait masqué notre objectif principal : évaluer la performance des programmes antituberculeux indépendamment de l'accès aux antirétroviraux.
|
||||
|
||||
#### Synthèse des variables retenues :
|
||||
|
||||
Le modèle de clustering reposera donc sur un couple de variables actives parcimonieux et complémentaire :
|
||||
|
||||
- Variable Active 1 : Incidence (Diffusion de la maladie) - `e_inc_100k`.
|
||||
- Variable Active 2 : Mortalité (Sévérité / Échec du traitement) - `e_mort_exc_tbhiv_100k`
|
||||
|
||||
Ces deux dimensions, bien que corrélées ($r \approx 0.73$), ne sont pas redondantes : la variance non expliquée par la corrélation correspond justement à la différence d'efficacité des systèmes de soins (capacité à guérir les malades identifiés), ce qui est le cœur de notre segmentation.
|
||||
|
||||
### Variables illustratives et contextuelles
|
||||
|
||||
En complément des variables actives, cinq variables illustratives sont conservées pour éclairer l'interprétation a posteriori sans biaiser le calcul des distances euclidiennes. Le contexte démographique est porté par la Population (`e_pop_num`), indispensable aux pondérations, tandis que le volet géopolitique repose sur la **Région OMS** (`g_whoregion`), structurant l'analyse spatiale en six zones administratives (AFR, AMR, EMR, EUR, SEA, WPR). Enfin, les identifiants techniques — **Pays, Code ISO et Année** — assurent les fonctions supports : étiquetage, jointure cartographique et filtrage dynamique des trajectoires temporelles.
|
||||
|
||||
#### Création du sous-ensemble de travail :
|
||||
|
||||
Nous appliquons cette sélection au jeu de données brut pour ne conserver que les 7 colonnes d'intérêt.
|
||||
|
||||
```{r, echo=TRUE}
|
||||
tb_clean <- data_raw |>
|
||||
select(
|
||||
iso3,
|
||||
country,
|
||||
year,
|
||||
g_whoregion,
|
||||
e_inc_100k,
|
||||
e_mort_exc_tbhiv_100k,
|
||||
e_pop_num
|
||||
)
|
||||
```
|
||||
|
||||
## Traitement des valeurs manquantes
|
||||
|
||||
La gestion des valeurs manquantes (NA) est une étape critique en analyse de données, particulièrement pour les méthodes de partitionnement comme les K-Means qui reposent sur des calculs de distance euclidienne et ne tolèrent aucune incomplétude vectorielle.
|
||||
|
||||
Cette étape ne relève pas du simple "nettoyage" technique mais constitue un choix méthodologique qui influence la représentativité de l'échantillon final.
|
||||
|
||||
### Diagnostic de la structure des manquants
|
||||
|
||||
Nous analysons la distribution spatio-temporelle des valeurs manquantes sur la variable de mortalité (`e_mort_exc_tbhiv_100k`), l'incidence étant complète par construction (filtrage préalable).
|
||||
|
||||
```{r}
|
||||
ggplot(tb_clean, aes(x = year, y = e_inc_100k)) +
|
||||
geom_point(
|
||||
aes(color = is.na(e_mort_exc_tbhiv_100k)),
|
||||
alpha = 0.6,
|
||||
size = 1.5
|
||||
) +
|
||||
scale_color_manual(
|
||||
values = c("TRUE" = "red", "FALSE" = "blue"),
|
||||
labels = c("FALSE" = "Donnée Complète", "TRUE" = "Donnée Manquante")
|
||||
) +
|
||||
labs(
|
||||
subtitle = "Les points rouges indiquent les observations exclues de l'analyse",
|
||||
x = "Année",
|
||||
y = "Incidence (log scale)",
|
||||
color = "Statut"
|
||||
) +
|
||||
scale_y_log10() +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
### Analyse d'impact de l'exclusion
|
||||
|
||||
Le tableau ci-dessous identifie les territoires les plus affectés :
|
||||
|
||||
```{r}
|
||||
missing_profiles <- tb_clean |>
|
||||
filter(is.na(e_mort_exc_tbhiv_100k)) |>
|
||||
group_by(country, g_whoregion) |>
|
||||
summarise(
|
||||
n_missing = n(),
|
||||
avg_incidence = mean(e_inc_100k, na.rm = TRUE),
|
||||
total_pop_affected = mean(e_pop_num, na.rm = TRUE),
|
||||
.groups = "drop"
|
||||
) |>
|
||||
arrange(desc(n_missing)) |>
|
||||
head(10)
|
||||
|
||||
kable(
|
||||
missing_profiles,
|
||||
col.names = c(
|
||||
"Territoire",
|
||||
"Région",
|
||||
"Années manquantes",
|
||||
"Incidence Moyenne",
|
||||
"Population Moy."
|
||||
),
|
||||
caption = "Top 10 des territoires exclus pour données manquantes",
|
||||
digits = 0,
|
||||
format.args = list(big.mark = " "),
|
||||
booktabs = TRUE
|
||||
) |>
|
||||
kable_styling(latex_options = c("striped", "hold_position"))
|
||||
```
|
||||
|
||||
Ce tableau confirme que les données manquantes concernent quasi-exclusivement des micro-états et territoires insulaires à très faible démographie (souvent inférieure à 100 000 habitants), validant ainsi leur exclusion sans impact significatif sur la représentativité mondiale de l'étude.
|
||||
|
||||
### Justification méthodologique
|
||||
|
||||
L'exclusion des données manquantes se fonde sur trois justifications méthodologiques. D'un point de vue **géographique**, ces lacunes concernent quasi-exclusivement des micro-états ou territoires insulaires (ex: Monaco, Anguilla) dont la faible démographie induit une volatilité statistique excessive rendant les estimations peu fiables. Sur le plan **épidémiologique**, cette suppression est sans impact stratégique : ces territoires, bien que représentant 15% des observations, ne cumulent que 0,1% de la population mondiale et affichent une incidence marginale (17 cas/100k contre 125 pour l'échantillon conservé). Enfin, l'i**ntégrité statistique** a prévalu sur l'exhaustivité artificielle : le recours à l'imputation a été écarté car la génération de valeurs synthétiques pour ces profils atypiques risquerait de bruiter le calcul des distances euclidiennes et d'introduire des artefacts mathématiques préjudiciables au clustering.
|
||||
|
||||
### Finalisation de l'échantillon
|
||||
|
||||
Nous appliquons donc le filtre définitif pour générer le jeu de données d'analyse.
|
||||
|
||||
```{r, results='asis', echo=TRUE}
|
||||
tb_clean <- tb_clean |> drop_na(e_inc_100k, e_mort_exc_tbhiv_100k)
|
||||
```
|
||||
|
||||
L'exclusion des observations incomplètes réduit la taille de l'échantillon de 15% (de 5 322 à 4 532 observations valides), couvrant 183 pays sur la période 2000-2024
|
||||
|
||||
## Analyse et Transformation
|
||||
|
||||
Cette étape vise à caractériser la structure distributionnelle des variables actives (`tb_clean`). L'objectif est double : comprendre la dynamique épidémique sous-jacente et préparer les données pour satisfaire les hypothèses de l'algorithme K-Means (sensibilité aux valeurs extrêmes et aux variances inégales).
|
||||
|
||||
### Statistiques descriptives et asymétrie
|
||||
|
||||
Le tableau ci-dessous résume les moments statistiques des deux variables actives sur l'ensemble de la période ($n = 4 532$ observations).
|
||||
|
||||
```{r}
|
||||
desc_stats <- tb_clean |>
|
||||
summarise(
|
||||
across(
|
||||
c(e_inc_100k, e_mort_exc_tbhiv_100k),
|
||||
list(
|
||||
Min = ~ min(.x),
|
||||
Q1 = ~ quantile(.x, 0.25),
|
||||
Med = ~ median(.x),
|
||||
Mean = ~ mean(.x),
|
||||
Q3 = ~ quantile(.x, 0.75),
|
||||
Max = ~ max(.x),
|
||||
Skew = ~ moments::skewness(.x)
|
||||
),
|
||||
.names = "{.col}__{.fn}"
|
||||
)
|
||||
)
|
||||
|
||||
desc_long <- desc_stats |>
|
||||
pivot_longer(
|
||||
everything(),
|
||||
names_to = c("Var", "Stat"),
|
||||
names_sep = "__"
|
||||
) |>
|
||||
pivot_wider(names_from = Stat, values_from = value) |>
|
||||
mutate(
|
||||
Var = case_when(
|
||||
Var == "e_inc_100k" ~ "Incidence",
|
||||
Var == "e_mort_exc_tbhiv_100k" ~ "Mortalité",
|
||||
TRUE ~ Var
|
||||
)
|
||||
)
|
||||
|
||||
kable(
|
||||
desc_long,
|
||||
digits = 2,
|
||||
caption = "Statistiques descriptives des variables actives (2000-2024)",
|
||||
booktabs = TRUE
|
||||
) |>
|
||||
kable_styling(latex_options = c("striped", "hold_position"))
|
||||
```
|
||||
|
||||
L'écart considérable entre la médiane et la moyenne, couplé à des coefficients d'asymétrie (Skewness) largement supérieurs à 1, indique des distributions fortement asymétriques à droite (Lognormales ou de Pareto). Concrètement, la majorité des pays présentent une charge épidémique faible, tandis qu'une minorité d'observations "extrêmes" tire la moyenne vers le haut. Cette structure est typique des phénomènes épidémiques mais problématique pour le K-Means, qui risque de créer des clusters uniquement pour isoler ces valeurs extrêmes.
|
||||
|
||||
### Dynamiques temporelles et spatiales
|
||||
|
||||
L'analyse visuelle permet de contextualiser ces statistiques globales.
|
||||
|
||||
```{r}
|
||||
p_time <- ggplot(tb_clean, aes(x = year, y = e_inc_100k)) +
|
||||
geom_line(aes(group = country), alpha = 0.05, color = "#2c3e50") +
|
||||
geom_smooth(method = "loess", color = "#d73027", se = FALSE) +
|
||||
scale_y_log10() +
|
||||
labs(
|
||||
title = "Trajectoires (2000-2024)",
|
||||
subtitle = "Échelle Log",
|
||||
y = "Incidence",
|
||||
x = "Année"
|
||||
) +
|
||||
theme_minimal()
|
||||
|
||||
p_region <- ggplot(
|
||||
tb_clean,
|
||||
aes(x = g_whoregion, y = e_inc_100k, fill = g_whoregion)
|
||||
) +
|
||||
geom_boxplot(outlier.size = 0.5, alpha = 0.8) +
|
||||
scale_y_log10() +
|
||||
scale_fill_brewer(palette = "Set3") +
|
||||
labs(
|
||||
title = "Disparités Régionales",
|
||||
y = "Incidence",
|
||||
x = ""
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
legend.position = "none",
|
||||
axis.text.x = element_text(angle = 45, hjust = 1)
|
||||
)
|
||||
|
||||
grid.arrange(p_time, p_region, ncol = 2)
|
||||
```
|
||||
|
||||
L'analyse visuelle révèle une double dynamique. D'une part, la **tendance globale** montre une lente érosion de l'incidence moyenne mondiale (courbe rouge), malgré la forte inertie des trajectoires individuelles. D'autre part, les boxplots confirment une **fracture Nord-Sud** structurelle : les médianes logarithmiques de l'Afrique (AFR) et de l'Asie du Sud-Est (SEA) sont nettement supérieures à celles de l'Europe ou des Amériques. Cette hétérogénéité spatiale valide la pertinence d'inclure la région comme variable illustrative pour l'interprétation post-clustering.
|
||||
|
||||
### Relation Bivariée et Transformation
|
||||
|
||||
La relation entre l'Incidence et la Mortalité est le cœur de notre modélisation.
|
||||
|
||||
```{r}
|
||||
p_raw <- ggplot(tb_clean, aes(x = e_inc_100k, y = e_mort_exc_tbhiv_100k)) +
|
||||
geom_point(alpha = 0.3, color = "#2c3e50") +
|
||||
labs(
|
||||
title = "Espace Naturel (Asymétrique)",
|
||||
x = "Incidence",
|
||||
y = "Mortalité"
|
||||
) +
|
||||
theme_minimal()
|
||||
|
||||
tb_ready <- tb_clean |>
|
||||
mutate(
|
||||
log_inc = log1p(e_inc_100k),
|
||||
log_mort = log1p(e_mort_exc_tbhiv_100k)
|
||||
)
|
||||
|
||||
p_log <- ggplot(tb_ready, aes(x = log_inc, y = log_mort)) +
|
||||
geom_point(alpha = 0.3, color = "#4575b4") +
|
||||
geom_smooth(method = "lm", color = "#d73027", se = FALSE) +
|
||||
labs(
|
||||
title = "Espace Log-Transformé (Symétrique)",
|
||||
subtitle = "Transformation log(1+x)",
|
||||
x = "Log-Incidence",
|
||||
y = "Log-Mortalité"
|
||||
) +
|
||||
theme_minimal()
|
||||
|
||||
grid.arrange(p_raw, p_log, ncol = 2)
|
||||
```
|
||||
|
||||
Le graphique supérieur met en évidence une forte concentration à l'origine et une hétéroscédasticité marquée, risquant de biaiser les distances euclidiennes par les seules valeurs extrêmes. L'application de la transformation $f(x)=ln(1+x)$ corrige ces biais structurels : elle **symétrise les distributions** pour optimiser l'occupation de l'espace vectoriel et **linéarise la relation** entre les variables, facilitant la détection de groupes naturels. De plus, contrairement au logarithme népérien standard, cette fonction assure une gestion **robuste des zéros** (évitant le cas $ln(0)=−\infty$ pour les pays sans décès), garantissant ainsi la stabilité numérique du modèle.
|
||||
|
||||
## Synthèse de l'exploration, du nettoyage et des transformations
|
||||
|
||||
À l'issue de cette phase de préparation, nous disposons d'un jeu de données optimisé pour la modélisation.
|
||||
|
||||
Le tableau ci-dessous synthétise les caractéristiques du dataset final `tb_ready` qui sera injecté dans l'algorithme :
|
||||
|
||||
```{r}
|
||||
summary_final <- data.frame(
|
||||
Metrique = c(
|
||||
"Observations totales",
|
||||
"Pays couverts",
|
||||
"Plage Temporelle",
|
||||
"Variables Actives (Transformées)",
|
||||
"Variables Illustratives"
|
||||
),
|
||||
Valeur = c(
|
||||
nrow(tb_ready),
|
||||
length(unique(tb_ready$iso3)),
|
||||
"2000 - 2024",
|
||||
"log_inc, log_mort",
|
||||
"Population, Région, Année"
|
||||
)
|
||||
)
|
||||
|
||||
kable(
|
||||
summary_final,
|
||||
caption = "Fiche d'identité du jeu de données final",
|
||||
booktabs = TRUE
|
||||
) |>
|
||||
kable_styling(latex_options = "hold_position")
|
||||
```
|
||||
|
||||
La validation de ce socle de données clôture la phase exploratoire. L'absence de valeurs manquantes, la réduction de la dimensionnalité et la normalisation des distributions nous permettent désormais de procéder au partitionnement (Clustering) avec une robustesse statistique garantie.
|
||||
|
||||
# Stratégie de Modélisation (Clustering)
|
||||
|
||||
La préparation des données ayant abouti à un espace vectoriel cohérent et symétrisé (`tb_ready`), nous procédons désormais à la segmentation proprement dite. Nous avons retenu l'algorithme des K-Means (Nuées dynamiques), une méthode de partitionnement non-supervisé privilégiée pour sa robustesse sur des jeux de données de dimension modérée et pour la lisibilité géométrique de ses résultats.
|
||||
|
||||
## Prétraitement : Centrage et Réduction
|
||||
|
||||
Bien que nous ayons appliqué une transformation logarithmique pour corriger l'asymétrie, les variables d'Incidence et de Mortalité possèdent des plages de variation distinctes. L'algorithme K-Means reposant sur la distance euclidienne isotrope, il est impératif que chaque dimension contribue de manière équitable au calcul de similarité.
|
||||
|
||||
Nous appliquons donc une standardisation (Z-score) : $z = \frac{x - \mu}{\sigma}$
|
||||
|
||||
```{r}
|
||||
data_scaled <- tb_ready |>
|
||||
select(log_inc, log_mort) |>
|
||||
scale()
|
||||
|
||||
check_table <- data.frame(
|
||||
Variable = c("Incidence (Log)", "Mortalité (Log)"),
|
||||
Moyenne = apply(data_scaled, 2, mean),
|
||||
Ecart_Type = apply(data_scaled, 2, sd)
|
||||
)
|
||||
|
||||
kable(
|
||||
check_table,
|
||||
digits = 2,
|
||||
col.names = c("Variable", "Moyenne (Z)", "Écart-Type (Z)"),
|
||||
caption = "Validation du Centrage-Réduction",
|
||||
booktabs = TRUE
|
||||
) |>
|
||||
kable_styling(
|
||||
latex_options = c("striped", "hold_position"),
|
||||
font_size = 10
|
||||
)
|
||||
```
|
||||
|
||||
## Détermination du nombre de clusters ($k$)
|
||||
|
||||
L'algorithme K-Means nécessite de fixer a priori le nombre de classes k. Ce choix résulte d'un arbitrage entre performance statistique (minimisation de l'inertie intra-classe) et pertinence opérationnelle (interprétabilité métier).
|
||||
|
||||
### Approche statistique (Méthode du Coude)
|
||||
|
||||
Nous calculons l'inertie intra-classe totale pour des valeurs de k allant de 1 à 10. Le point d'inflexion ("coude") indique le seuil au-delà duquel l'ajout d'un cluster n'apporte plus de gain significatif en compacité. Sur la figure ci-dessous, le coude se situe entre $k=2$ et $k=3$.
|
||||
|
||||
```{r}
|
||||
fviz_nbclust(data_scaled, kmeans, method = "wss") +
|
||||
geom_vline(xintercept = 3, linetype = 2, color = "#d73027") +
|
||||
labs(
|
||||
title = "Optimisation du nombre de clusters",
|
||||
x = "Nombre de clusters k",
|
||||
y = "Inertie Intra-classe totale"
|
||||
) +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
### Arbitrage
|
||||
|
||||
L'analyse graphique révèle une rupture de pente franche à $k=3$, seuil au-delà duquel les gains d'inertie deviennent marginaux (rendements décroissants). Ce choix statistique est corroboré par une pertinence opérationnelle majeure : une segmentation ternaire permet d'adopter une logique de signalisation intuitive type Traffic Light (Vert/Contrôle, Orange/Surveillance, Rouge/Critique). Nous retenons donc $k=3$ afin de garantir des clusters à la fois statistiquement denses et immédiatement actionnables par les décideurs.
|
||||
|
||||
## Paramétrage et Exécution de l'algorithme
|
||||
|
||||
L'algorithme K-Means étant sensible à l'initialisation des centroïdes (risque d'optimum local), nous avons configuré une exécution robuste : le modèle opère 25 initialisations aléatoires différentes (`nstart = 25`) pour ne conserver que la partition minimisant l'inertie globale sur les 3 classes définies (`centers = 3`). Enfin, la fixation de la graine aléatoire (`set.seed(123)`) garantit la stricte reproductibilité des résultats présentés.
|
||||
|
||||
```{r, echo=TRUE}
|
||||
set.seed(123)
|
||||
|
||||
km_res <- kmeans(data_scaled, centers = 3, nstart = 25)
|
||||
var_totale <- round(km_res$betweenss / km_res$totss * 100, 1)
|
||||
```
|
||||
|
||||
Avec **83,9 % de variance expliquée**, le modèle valide la robustesse statistique de la segmentation ternaire. Ce score élevé traduit une séparation nette des profils épidémiologiques, corroborant ainsi la forte structuration spatiale pressentie lors de l'analyse exploratoire.
|
||||
|
||||
## Intégration des résultats
|
||||
|
||||
Nous réintégrons les labels de clusters dans le jeu de données principal pour l'analyse.
|
||||
|
||||
```{r}
|
||||
tb_clustered <- tb_ready |>
|
||||
mutate(cluster = as.factor(km_res$cluster))
|
||||
|
||||
table(tb_clustered$cluster) |>
|
||||
kable(
|
||||
col.names = c("Cluster ID", "Nombre d'observations"),
|
||||
caption = "Répartition des observations par cluster (k=3)",
|
||||
booktabs = TRUE
|
||||
) |>
|
||||
kable_styling(latex_options = "hold_position")
|
||||
```
|
||||
|
||||
Le partitionnement étant validé avec 3 classes, nous abordons désormais l'étape de labellisation visant à traduire ces clusters statistiques en profils épidémiologiques intelligibles.
|
||||
|
||||
# Analyse des Profils Épidémiques
|
||||
|
||||
L'analyse mathématique ayant validé la qualité de la partition, nous procédons ici à la caractérisation "métier" des clusters pour les transformer en outils d'aide à la décision.
|
||||
|
||||
## Caractérisation et Labellisation
|
||||
|
||||
Nous calculons les moyennes d'incidence et de mortalité par groupe, ordonnons les clusters du moins au plus sévère et leur attribuons des étiquettes sémantiques explicites.
|
||||
|
||||
```{r}
|
||||
cluster_stats <- tb_clustered |>
|
||||
group_by(cluster) |>
|
||||
summarise(
|
||||
n_obs = n(),
|
||||
mean_inc = mean(e_inc_100k),
|
||||
mean_mort = mean(e_mort_exc_tbhiv_100k)
|
||||
) |>
|
||||
arrange(mean_inc)
|
||||
|
||||
labels_map <- c("1. Impact Faible", "2. Impact Modéré", "3. Impact Critique")
|
||||
|
||||
tb_clustered <- tb_clustered |>
|
||||
mutate(
|
||||
rank_severity = match(cluster, cluster_stats$cluster),
|
||||
label = factor(labels_map[rank_severity], levels = labels_map)
|
||||
)
|
||||
|
||||
tb_clustered |>
|
||||
select(country, year, e_inc_100k, label) |>
|
||||
head(10) |>
|
||||
kable(
|
||||
col.names = c("Pays", "Année", "Incidence (pour 100k)", "Classification"),
|
||||
digits = 1,
|
||||
align = c("l", "c", "r", "l"),
|
||||
caption = "Aperçu de la segmentation sanitaire (Échantillon)"
|
||||
) |>
|
||||
kable_styling(latex_options = c("striped", "hold_position"))
|
||||
```
|
||||
|
||||
## Analyse des Profils Épidémiques
|
||||
|
||||
Le tableau ci-dessous synthétise les caractéristiques moyennes de chaque profil type identifié par le modèle.
|
||||
|
||||
```{r}
|
||||
final_summary <- tb_clustered |>
|
||||
group_by(label) |>
|
||||
summarise(
|
||||
`Nombre d'observations` = n(),
|
||||
`Incidence Moyenne` = round(mean(e_inc_100k), 0),
|
||||
`Mortalité Moyenne` = round(mean(e_mort_exc_tbhiv_100k), 1),
|
||||
`Ratio Mort/Inc (%)` = round(
|
||||
mean(e_mort_exc_tbhiv_100k) / mean(e_inc_100k) * 100,
|
||||
1
|
||||
)
|
||||
)
|
||||
|
||||
kable(
|
||||
final_summary,
|
||||
caption = "Typologie des clusters de Tuberculose (k=3)",
|
||||
booktabs = TRUE
|
||||
) |>
|
||||
kable_styling(latex_options = c("striped", "hold_position"))
|
||||
```
|
||||
|
||||
### Interprétation de la typologie
|
||||
|
||||
L'analyse des centroïdes révèle une hiérarchisation sanitaire nette. Le cluster **Impact Faible** (`n=1 416`), représentatif des standards occidentaux (Europe, Amérique du Nord), affiche une incidence marginale (14 cas/100k) et une mortalité résiduelle (<1 décès/100k). Le faible ratio de létalité (~6 %) témoigne d'une prise en charge thérapeutique efficace où la maladie est rarement fatale.
|
||||
|
||||
Le cluster **Impact Modéré** (`n=1 570`) regroupe des pays en transition (Maghreb, Amérique Latine) confrontés à une circulation active du bacille (79 cas/100k). Toutefois, la mortalité contenue (7 décès/100k) indique que si le contrôle de la transmission reste un défi, les systèmes de santé parviennent à traiter la majorité des patients diagnostiqués.
|
||||
|
||||
Enfin, le cluster **Impact Critique** (`n=1 546`), centré sur l'Afrique subsaharienne, concentre la charge mondiale avec une incidence massive (374 cas/100k) et une mortalité très élevée (57 décès/100k). Le taux de létalité y atteint un niveau alarmant de 15,3 %, révélant des défaillances systémiques graves (retards de diagnostic, résistances) : dans cette zone, la tuberculose ne se contente pas de circuler, elle tue massivement.
|
||||
|
||||
## Visualisation de la Segmentation
|
||||
|
||||
La projection des clusters sur le plan bivarié illustre la logique de séparation opérée par l'algorithme.
|
||||
|
||||
```{r}
|
||||
ggplot(
|
||||
tb_clustered,
|
||||
aes(x = e_inc_100k, y = e_mort_exc_tbhiv_100k, color = label)
|
||||
) +
|
||||
geom_point(alpha = 0.5, size = 1.5) +
|
||||
scale_x_log10() +
|
||||
scale_y_log10() +
|
||||
scale_color_manual(values = c("#66bd63", "#fdae61", "#d73027")) +
|
||||
labs(
|
||||
title = "Projection des Clusters de Risque",
|
||||
subtitle = "k=3 : Une segmentation claire du risque sanitaire",
|
||||
x = "Incidence (Log scale)",
|
||||
y = "Mortalité (Log scale)",
|
||||
color = "Niveau de Risque"
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(legend.position = "bottom")
|
||||
```
|
||||
|
||||
Le graphique confirme que le score de 83,9 % d'inertie expliquée se traduit visuellement par des frontières nettes entre les groupes, avec très peu de chevauchement. La segmentation en "feux tricolores" est donc statistiquement robuste et opérationnellement pertinente.
|
||||
|
||||
## Préparation pour l'Application
|
||||
|
||||
Nous sauvegardons le jeu de données final enrichi des labels, qui servira de socle à l'application R Shiny.
|
||||
|
||||
```{r, echo=TRUE}
|
||||
save(tb_clustered, file = "data/TB_analysis_ready.RData")
|
||||
```
|
||||
|
||||
# Application R Shiny
|
||||
|
||||
L'étape finale de ce projet consiste à transformer les résultats de la segmentation (K-Means) en un outil de pilotage interactif. Nous avons développé une application web via le framework R Shiny, permettant aux décideurs de santé publique d'explorer les données, de visualiser les disparités géographiques et de monitorer l'évolution des profils de risque en temps réel.
|
||||
|
||||
## Architecture technique : Structure UI/Server et flux de données réactif
|
||||
|
||||
Fondée sur une architecture client-serveur réactive, l'application mobilise un écosystème de librairies R spécialisées pour garantir fluidité et interactivité. L'interface utilisateur, structurée de manière modulaire via `shinydashboard`, articule la cartographie vectorielle de `leaflet` avec les graphiques dynamiques du couple `ggplot2` / `plotly` (survol, zoom). En amont, la manipulation des données et le filtrage en temps réel reposent sur la performance des packages `dplyr` et `tidyr`, assurant une réactivité immédiate aux interactions de l'utilisateur.
|
||||
|
||||
### Flux de Données Réactif
|
||||
|
||||
Le coeur de l'application réside dans son graphe de dépendance réactif qui, contrairement à un script statique, optimise les ressources en ne recalculant les éléments qu'à la demande. Le flux suit une logique séquentielle : toute interaction sur un **Input** (sélection d'une année ou d'un pays) déclenche une **Expression Réactive** chargée de filtrer le jeu de données `tb_clustered`. Ce nouveau sous-ensemble propage alors instantanément la mise à jour vers les **Outputs** (cartes, tableaux et courbes) sans nécessiter de rechargement de la page.
|
||||
|
||||
## Fonctionnalités décisionnelles :
|
||||
|
||||
L'interface a été conçue pour répondre à trois besoins analytiques majeurs : la vision globale, le suivi temporel et l'analyse comparative.
|
||||
|
||||
### Cartographie Interactive des Risques (Vision Globale)
|
||||
|
||||
La page d'accueil déploie une carte mondiale interactive (`leaflet`) où chaque pays est coloré selon son cluster d'appartenance : **Vert** (Impact Faible), **Orange** (Modéré) ou **Rouge** (Critique). Cette visualisation offre une lecture immédiate de la géographie sanitaire, permettant d'identifier les foyers épidémiques structurels (telle la ceinture rouge subsaharienne) tout en repérant rapidement les anomalies locales (pays critiques isolés au sein d'une zone préservée).
|
||||
|
||||
### Monitorage Temporel (Analyse Dynamique)
|
||||
|
||||
Un curseur temporel (Slider Input) permet de naviguer sur la période 2000-2024. L'animation de ce curseur permet de visualiser les transitions de clusters (trajectoires). On peut ainsi observer les succès de certains pays passant du statut "Critique" à "Modéré" suite à l'amélioration de leur système de soins, ou inversement, les dégradations liées à des conflits ou crises sanitaires.
|
||||
|
||||
### Analyse Comparative
|
||||
|
||||
Un module dédié permet de sélectionner un pays spécifique (ex: Nigeria) pour générer son Bulletin de Santé complet. Celui-ci articule l'affichage des **KPIs clés** (valeurs brutes d'incidence, mortalité, cluster) avec une analyse de **positionnement relatif**. En confrontant la trajectoire du pays sélectionné aux moyennes régionales et mondiales, ce graphique permet d'objectiver sa performance réelle et de déterminer s'il sous-performe par rapport à son voisinage direct, indépendamment de la tendance globale.
|
||||
|
||||
## Implémentation et logique applicative
|
||||
|
||||
L'application a été développée selon une architecture modulaire, séparant distinctement l'interface utilisateur (Frontend) de la logique de calcul (Backend), conformément au paradigme du framework Shiny.
|
||||
|
||||
### Stack Technologique et Dépendances
|
||||
|
||||
Le développement repose sur une stack technique optimisée pour l'interactivité. L'orchestration de l'interface est assurée par le couple `shiny` et `shinydashboard`, garantissant une structure modulaire et responsive. La couche géospatiale combine la précision vectorielle de `sf` à la fluidité de rendu de `leaflet`, tandis que la visualisation des résultats exploite les capacités dynamiques de `plotly` (pour les graphiques interactifs) et la puissance de tri de `DT` (pour les tableaux). Enfin, `dplyr` agit comme moteur de calcul en temps réel, assurant le filtrage réactif et l'agrégation instantanée des données en arrière-plan.
|
||||
|
||||
### Architecture de l'Interface Utilisateur (UI)
|
||||
|
||||
L'interface guide l'utilisateur du général au particulier via une structure en trois volets. Le **Dashboard**, véritable cœur décisionnel, orchestre via une grille fluide l'affichage de KPIs dynamiques, d'une double visualisation interactive (Carte/Nuage de points) et d'un module de comparaison des trajectoires. Il est complété par un **Explorateur de Données** pour l'accès aux chiffres bruts et une section **Méthodologie** garantissant l'auto-portance de l'outil. Transversalement, la navigation latérale assure le pilotage global des graphiques via un filtrage régional et un contrôle temporel animé (2000-2024).
|
||||
|
||||
### Logique Serveur et Réactivité
|
||||
|
||||
Le script serveur orchestre l'intelligence applicative via deux leviers. D'une part, le **filtrage réactif** optimise la performance : contrairement à une approche statique, les données ne sont chargées qu'une fois puis segmentées dynamiquement par une expression (`filtered_data`) qui joint instantanément le sous-ensemble aux polygones géographiques (`world_map`) à chaque modification des entrées.
|
||||
|
||||
D'autre part, la gestion d'état centralisée permet un **Cross-Filtering** avancé. Une variable réactive (`reactiveVal`), stockant l'identifiant du pays actif, est mise à jour indifféremment par trois interactions distinctes : un clic sur la carte, le nuage de points ou le graphique de densité. Cette interconnexion totale assure une exploration fluide, où l'investigation d'un point aberrant sur un graphique projette immédiatement l'information sur l'ensemble des autres vues.
|
||||
|
||||
### Rendu Conditionnel et Comparaison
|
||||
|
||||
Le graphique de tendance (`trend_plot`) transforme la simple série temporelle en un outil d'analyse comparative en construisant dynamiquement trois courbes à la volée : **la trajectoire du pays sélectionné** (mise en évidence), confrontée à la **moyenne de sa région** (calculée en temps réel) et à la **référence mondiale** fixe. Cette logique de calcul à la demande permet ainsi de situer instantanément la performance de n'importe quel territoire vis-à-vis de son contexte géographique immédiat.
|
||||
|
||||
# Exploitation et Analyse des Résultats
|
||||
|
||||
Au-delà de l'implémentation technique, l'application R Shiny permet d'objectiver les dynamiques épidémiologiques mondiales. L'exploration interactive des données (2000-2024) met en lumière trois niveaux de lecture.
|
||||
|
||||
## Analyse Macroscopique : La fracture Nord-Sud
|
||||
|
||||
La cartographie interactive confirme que la segmentation ternaire obéit à une logique géopolitique structurante. Le **Cluster 1 (Faible Impact - Vert)** se superpose quasi-intégralement aux pays de l'OCDE, caractérisant une maladie devenue résiduelle. Il se distingue du **Cluster 2 (Intermédiaire - Orange)**, véritable zone tampon hétérogène (Amérique Latine, Europe de l'Est) où les infrastructures de santé font face à des défis de résistance. Enfin, le **Cluster 3 (Critique - Rouge)** dessine une ceinture épidémique continue en Afrique Subsaharienne et sur certains foyers asiatiques, dont la superposition avec les zones de forte prévalence du VIH et d'instabilité politique apparaît frappante.
|
||||
|
||||
## Dynamiques Régionales et Temporelles
|
||||
|
||||
L'outil de monitorage (2000-2024) objective une baisse mondiale de l'incidence à géométrie variable. Tandis que l'**Europe** et les **Amériques** affichent une stagnation ou une décroissance marginale caractéristique d'une épidémie maîtrisée, l'**Afrique** se distingue par la chute la plus rapide en valeur absolue depuis 2010, témoignant du succès des campagnes contre la co-infection TB-VIH. À l'opposé, l'**Asie du Sud-Est** manifeste une inertie inquiétante et demeure, par la densité démographique de l'Inde et de l'Indonésie, le principal réservoir volumique mondial de nouveaux cas.
|
||||
|
||||
## Cas d'usage : la France
|
||||
|
||||
Pour illustrer la puissance analytique de l'outil, nous prenons le cas de la France. L'analyse du cas français illustre la puissance de l'outil pour situer un territoire. Solidement ancrée dans le **Cluster 1 (Faible Impact)** avec une incidence de 8 cas/100k en 2024, la France affiche une performance remarquable sur trois échelles : elle se situe un facteur 15 sous la moyenne mondiale et surperforme nettement la moyenne européenne (~24 cas/100k), cette dernière étant grevée par les pays de l'Est du Cluster 2. La confrontation avec un représentant du Cluster 3 comme l'Afrique du Sud (> 389 cas/100k) objective une fracture sanitaire vertigineuse : maladie du passé pour l'Hexagone, la tuberculose demeure une urgence vitale ailleurs. Ce diagnostic valide l'efficacité de la stratégie nationale tout en rappelant l'impératif de vigilance face aux risques de réintroduction depuis les zones critiques (Orange et Rouge).
|
||||
|
||||
# Conclusion et Perspectives
|
||||
|
||||
Ce projet s'est attaché à transformer une base de données brute et complexe, issue du rapport mondial de l'OMS, en un outil d'aide à la décision sanitaire opérationnel. En combinant une approche statistique rigoureuse (analyse exploratoire, réduction de dimension) et une modélisation non-supervisée (Clustering K-Means), nous avons pu objectiver les disparités mondiales face à l'épidémie de tuberculose.
|
||||
|
||||
## Synthèse des résultats
|
||||
|
||||
L'analyse de la période 2000-2024 valide trois enseignements majeurs. D'abord, la **pertinence d'une segmentation ternaire** ($k=3$) qui, forte d'une robustesse statistique de 83,9 %, dépasse le simple clivage Nord-Sud pour cartographier le risque selon une gradation opérationnelle (Faible, Modéré, Critique). Ensuite, la **polarisation de l'épidémie** : le cluster Critique concentre une létalité disproportionnée (> 15 %), dictant un ciblage prioritaire des efforts sur l'Afrique subsaharienne. Enfin, la valeur ajoutée du **monitorage dynamique** : l'application R Shiny a permis d'objectiver la mobilité des trajectoires, identifiant les pays en transition pour fournir des signaux d'alerte précoce ou valider l'efficacité des politiques publiques.
|
||||
|
||||
## Limites méthodologiques
|
||||
|
||||
Dans une démarche critique, trois limites méthodologiques doivent être soulignées. Premièrement, le **biais déclaratif** persiste malgré l'usage des estimations OMS (`e_`) : les données restent tributaires de la qualité de la surveillance nationale, induisant un paradoxe où l'amélioration du diagnostic peut être confondue avec une dégradation épidémique (hausse mécanique de l'incidence détectée). Deuxièmement, la **parcimonie du modèle**, restreinte à deux variables pour garantir la robustesse, confine l'étude à un rôle descriptif qui occulte les déterminants causaux (pauvreté, VIH). Enfin, la **suppression des données manquantes** (15 % des observations), impérative pour la stabilité du K-Means, rend de facto le modèle inopérant pour les micro-états insulaires exclus.
|
||||
|
||||
## Perspectives d'évolution
|
||||
|
||||
Pour enrichir cet outil de pilotage, trois axes de développement majeurs se dessinent. D'abord, le passage vers une **modélisation explicative** : l'intégration de variables socio-économiques (PIB, Gini) via une ACP permettrait d'identifier les déterminants structurels du cluster Critique. Ensuite, le déploiement d'une **approche prédictive** (via ARIMA ou Prophet) transformerait ce tableau de bord analytique en outil prospectif, capable d'évaluer l'atteinte des objectifs onusiens à l'horizon 2030. Enfin, l'adoption d'une **granularité infra-nationale** s'avérerait pertinente pour les grands états fédéraux (Brésil, Inde) où la moyenne nationale masque de fortes disparités. En somme, ce projet offre une boussole efficace et constitue la première pierre d'une épidémiologie de précision guidée par la donnée.
|
||||
|
||||
# Déclaration d'Intégrité et Usage de l'IA
|
||||
|
||||
Conformément aux consignes académiques relatives au plagiat et à l'utilisation des assistants numériques, cette section explicite le cadre de réalisation de ce projet.
|
||||
|
||||
## Originalité de la démarche
|
||||
|
||||
Le jeu de données utilisé (*Global Tuberculosis Report*) est public et largement étudié. Cependant, l'approche développée dans ce projet est originale et personnelle.
|
||||
|
||||
Disposant d'un **profil d'ingénieur logiciel**, j'ai fait le choix stratégique de concentrer mon effort technique sur l'architecture et l'interactivité de l'application **R Shiny** (Section 5), afin de produire un outil de qualité professionnelle. Cette notice technique assure la couverture rigoureuse de la partie Data Science, justifiant les choix mathématiques implémentés dans l'application.
|
||||
|
||||
## Usage des outils d'IA Générative
|
||||
|
||||
L'utilisation d'outils d'intelligence artificielle générative s'est inscrite dans une démarche d'assistance ponctuelle et rigoureusement contrôlée. Sur le volet **rédactionnel**, l'IA a contribué à l'optimisation syntaxique et à la fluidité des transitions, le raisonnement et les interprétations demeurant strictement personnels. Sur le plan **technique**, elle a servi d'outil de diagnostic pour le débogage de l'application R Shiny (gestion de la réactivité, conflits). L'intégralité du code a été vérifiée et maîtrisée : aucune partie de l'analyse n'a été déléguée sans supervision humaine.
|
||||
|
||||
\newpage
|
||||
|
||||
# Bibliographie
|
||||
|
||||
## Rapports et Encyclopédies
|
||||
|
||||
- [1] Organisation Mondiale de la Santé (OMS). (2024). Global Tuberculosis Report 2024. Disponible sur : https://www.who.int/teams/global-programme-on-tuberculosis-and-lung-health/tb-reports/global-tuberculosis-report-2024
|
||||
|
||||
- [2] Wikipédia. (s.d.). Tuberculose. Disponible sur : https://fr.wikipedia.org/wiki/Tuberculose
|
||||
|
||||
## Supports de Cours - Master 2 ISF (2025-2026)
|
||||
|
||||
- [3] Ochoa, J. (2025-2026). *Les algorithmes non supervisés.* Support de cours : Machine Learning. Université Paris-Dauphine - PSL.
|
||||
|
||||
- [4] Bertrand, P. (2025-2026). *K-Means.* Support de cours : Apprentissage non supervisé et clustering. Université Paris-Dauphine - PSL.
|
||||
|
||||
- [5] Guibert, Q. (2025-2026). *Data Visualisation.* Support de cours : Visualisation des données avec R. Université Paris-Dauphine - PSL.
|
||||
BIN
M2/Data Visualisation/Project/NoticeTechnique.pdf
Normal file
863
M2/Data Visualisation/Project/app.R
Normal file
@@ -0,0 +1,863 @@
|
||||
# Chargement des bibliothèques
|
||||
library(shiny)
|
||||
library(shinydashboard)
|
||||
library(leaflet)
|
||||
library(plotly)
|
||||
library(dplyr)
|
||||
library(sf)
|
||||
library(RColorBrewer)
|
||||
library(DT)
|
||||
library(rnaturalearth)
|
||||
library(rnaturalearthdata)
|
||||
|
||||
# Chargement des données
|
||||
load("data/TB_analysis_ready.RData")
|
||||
|
||||
# Définition des labels pour les clusters
|
||||
labels <- c("1. Faible Impact", "2. Impact Modéré", "3. Impact Critique")
|
||||
|
||||
# Application des labels aux données
|
||||
tb_clustered$label <- factor(tb_clustered$label)
|
||||
levels(tb_clustered$label) <- labels
|
||||
|
||||
# Création de la carte du monde
|
||||
world_map <- ne_countries(scale = "medium", returnclass = "sf")
|
||||
|
||||
# Définition des couleurs pour les clusters
|
||||
green <- "#66bd63"
|
||||
orange <- "#f48a43"
|
||||
red <- "#d73027"
|
||||
|
||||
# Interface utilisateur
|
||||
ui <- shinydashboard::dashboardPage(
|
||||
skin = "black",
|
||||
|
||||
# Header
|
||||
dashboardHeader(title = "Tuberculose"),
|
||||
|
||||
# Sidebar
|
||||
dashboardSidebar(
|
||||
sidebarMenu(
|
||||
menuItem(
|
||||
"Méthodologie & Définitions",
|
||||
tabName = "methodo",
|
||||
icon = icon("info-circle")
|
||||
),
|
||||
menuItem(
|
||||
"Vue d'Ensemble",
|
||||
tabName = "dashboard",
|
||||
icon = icon("dashboard")
|
||||
),
|
||||
menuItem("Données Brutes", tabName = "data", icon = icon("table")),
|
||||
|
||||
# Footer - Informations et crédits
|
||||
div(
|
||||
style = "position: absolute; bottom: 10px; width: 100%; text-align: center; font-size: 12px; color: #b8c7ce;",
|
||||
p("© 2026 Arthur Danjou"),
|
||||
p("M2 ISF - Dauphine PSL"),
|
||||
p(
|
||||
a(
|
||||
"Code Source",
|
||||
href = "https://go.arthurdanjou.fr/datavis",
|
||||
target = "_blank",
|
||||
style = "color: #3c8dbc;"
|
||||
)
|
||||
)
|
||||
),
|
||||
hr(),
|
||||
|
||||
# Filtre par Région
|
||||
selectInput(
|
||||
"region_select",
|
||||
"Filtrer par Région :",
|
||||
choices = c("Toutes", unique(tb_clustered$g_whoregion)),
|
||||
selected = "Toutes"
|
||||
),
|
||||
|
||||
# Sélecteur d'année
|
||||
sliderInput(
|
||||
"year_select",
|
||||
"Année :",
|
||||
min = min(tb_clustered$year),
|
||||
max = max(tb_clustered$year),
|
||||
value = max(tb_clustered$year),
|
||||
step = 1,
|
||||
sep = "",
|
||||
animate = animationOptions(interval = 5000, loop = FALSE)
|
||||
)
|
||||
)
|
||||
),
|
||||
|
||||
# Body
|
||||
dashboardBody(
|
||||
tabItems(
|
||||
# Page 1 - Vue d'Ensemble
|
||||
tabItem(
|
||||
tabName = "dashboard",
|
||||
|
||||
# KPI - Total des cas
|
||||
fluidRow(
|
||||
valueBoxOutput("kpi_total_cases", width = 4),
|
||||
valueBoxOutput("kpi_worst_country", width = 4),
|
||||
valueBoxOutput("kpi_critical_count", width = 4)
|
||||
),
|
||||
|
||||
# Carte Épidémiologique
|
||||
fluidRow(
|
||||
box(
|
||||
width = 7,
|
||||
title = "Carte Épidémiologique",
|
||||
status = "primary",
|
||||
solidHeader = TRUE,
|
||||
radioButtons(
|
||||
"metric_select",
|
||||
"Indicateur :",
|
||||
choices = c(
|
||||
"Incidence" = "e_inc_100k",
|
||||
"Mortalité" = "e_mort_exc_tbhiv_100k",
|
||||
"Clusters K-Means" = "label"
|
||||
),
|
||||
inline = TRUE
|
||||
),
|
||||
p(
|
||||
class = "text-muted",
|
||||
"Cliquer sur un point pour filtrer par pays."
|
||||
),
|
||||
leafletOutput("map_plot", height = "500px")
|
||||
),
|
||||
|
||||
# Scatter Plot des Clusters
|
||||
box(
|
||||
width = 5,
|
||||
title = "Analyse des Clusters (Incidence vs Mortalité)",
|
||||
status = "success",
|
||||
solidHeader = TRUE,
|
||||
p(
|
||||
class = "text-muted",
|
||||
style = "font-size:0.9em",
|
||||
"Chaque point est un pays. Les couleurs correspondent aux groupes de risque identifiés par l'algorithme K-Means."
|
||||
),
|
||||
p(
|
||||
class = "text-muted",
|
||||
"Cliquer sur un point pour filtrer par pays."
|
||||
),
|
||||
plotlyOutput("cluster_scatter", height = "530px")
|
||||
)
|
||||
),
|
||||
fluidRow(
|
||||
# Plot des tendances
|
||||
box(
|
||||
width = 7,
|
||||
title = "Comparaison : Pays vs Moyenne Régionale vs Moyenne Mondiale",
|
||||
status = "warning",
|
||||
solidHeader = TRUE,
|
||||
plotlyOutput("trend_plot", height = "400px")
|
||||
),
|
||||
|
||||
# Distribution des Clusters
|
||||
box(
|
||||
width = 5,
|
||||
title = "Distribution des Clusters",
|
||||
status = "info",
|
||||
solidHeader = TRUE,
|
||||
p(
|
||||
class = "text-muted",
|
||||
"Cliquer sur un point du rug pour filtrer par pays."
|
||||
),
|
||||
plotlyOutput("density_plot", height = "400px")
|
||||
)
|
||||
)
|
||||
),
|
||||
|
||||
# Page 2 - Données Brutes
|
||||
tabItem(
|
||||
tabName = "data",
|
||||
fluidRow(
|
||||
box(
|
||||
width = 12,
|
||||
title = "Explorateur de Données",
|
||||
status = "primary",
|
||||
p("Tableau filtrable et exportable des données utilisées."),
|
||||
DTOutput("raw_table")
|
||||
)
|
||||
)
|
||||
),
|
||||
|
||||
# Page 3 - Méthodologie
|
||||
tabItem(
|
||||
tabName = "methodo",
|
||||
fluidRow(
|
||||
# Indicateurs OMS
|
||||
box(
|
||||
width = 12,
|
||||
title = "Définitions des Indicateurs OMS",
|
||||
status = "info",
|
||||
solidHeader = TRUE,
|
||||
column(
|
||||
width = 6,
|
||||
h4(icon("lungs"), "Incidence de la Tuberculose"),
|
||||
p(
|
||||
"Correspond à la variable ",
|
||||
code("e_inc_100k"),
|
||||
" dans le jeu de données de l'OMS."
|
||||
),
|
||||
p(
|
||||
"Il s'agit du nombre estimé de ",
|
||||
strong("nouveaux cas"),
|
||||
" de tuberculose (toutes formes confondues) survenus au cours d'une année donnée, rapporté pour 100 000 habitants."
|
||||
),
|
||||
p(
|
||||
"Cet indicateur mesure la ",
|
||||
em("propagation"),
|
||||
" de la maladie dans la population."
|
||||
),
|
||||
),
|
||||
column(
|
||||
width = 6,
|
||||
h4(icon("skull"), "Mortalité (hors VIH)"),
|
||||
p(
|
||||
"Correspond à la variable ",
|
||||
code("e_mort_exc_tbhiv_100k"),
|
||||
"."
|
||||
),
|
||||
p(
|
||||
"Il s'agit du nombre estimé de décès dus à la tuberculose chez les personnes non infectées par le VIH, rapporté pour 100 000 habitants."
|
||||
),
|
||||
p(
|
||||
"Cet indicateur mesure la ",
|
||||
em("sévérité"),
|
||||
" et l'efficacité de l'accès aux soins (un taux élevé signale souvent un système de santé défaillant)."
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
|
||||
# Choix des Variables
|
||||
fluidRow(
|
||||
column(
|
||||
width = 6,
|
||||
box(
|
||||
width = 12,
|
||||
title = "Pourquoi seulement 2 variables actives ?",
|
||||
status = "warning",
|
||||
solidHeader = TRUE,
|
||||
p(
|
||||
"Le modèle de clustering repose uniquement sur l'Incidence et la Mortalité. Ce choix de parcimonie est justifié par 4 contraintes techniques :"
|
||||
),
|
||||
br(),
|
||||
column(
|
||||
width = 6,
|
||||
h4(
|
||||
icon("ruler-combined"),
|
||||
"1. Robustesse Mathématique",
|
||||
class = "text-warning"
|
||||
),
|
||||
p(
|
||||
"Évite le 'fléau de la dimension'. Avec trop de variables, les distances euclidiennes perdent leur sens et les groupes deviennent flous."
|
||||
),
|
||||
br(),
|
||||
h4(
|
||||
icon("project-diagram"),
|
||||
"2. Non-Colinéarité",
|
||||
class = "text-warning"
|
||||
),
|
||||
p(
|
||||
"Évite de compter deux fois la même information (ex: Incidence vs Nombre de cas) qui fausserait le poids des indicateurs."
|
||||
),
|
||||
),
|
||||
column(
|
||||
width = 6,
|
||||
h4(
|
||||
icon("filter"),
|
||||
"3. Qualité des Données",
|
||||
class = "text-warning"
|
||||
),
|
||||
p(
|
||||
"Le K-Means ne tolère pas les données manquantes. Ajouter des variables socio-économiques aurait réduit la taille de l'échantillon de 30% à 50%."
|
||||
),
|
||||
br(),
|
||||
h4(icon("eye"), "4. Lisibilité", class = "text-warning"),
|
||||
p(
|
||||
"Permet une visualisation directe en 2D (Scatterplot) sans déformation, rendant l'outil accessible aux non-statisticiens."
|
||||
)
|
||||
)
|
||||
),
|
||||
|
||||
# Source des Données
|
||||
box(
|
||||
width = 12,
|
||||
title = "Source des Données",
|
||||
status = "danger",
|
||||
solidHeader = TRUE,
|
||||
p(
|
||||
icon("database"),
|
||||
"Les données sont issues du ",
|
||||
a(
|
||||
"Global Tuberculosis Report",
|
||||
href = "https://www.who.int/teams/global-programme-on-tuberculosis-and-lung-health/data",
|
||||
target = "_blank"
|
||||
),
|
||||
" de l'Organisation Mondiale de la Santé (OMS)."
|
||||
),
|
||||
p("Dernière mise à jour du dataset : Octobre 2024.")
|
||||
),
|
||||
),
|
||||
column(
|
||||
width = 6,
|
||||
box(
|
||||
width = 12,
|
||||
title = "Algorithme de Classification (Clustering)",
|
||||
status = "success",
|
||||
solidHeader = TRUE,
|
||||
h4("Pourquoi un Clustering K-Means ?"),
|
||||
p(
|
||||
"Afin de synthétiser l'information et de faciliter la prise de décision, j'ai appliqué un algorithme d'apprentissage non supervisé (K-Means) pour regrouper les pays ayant des profils épidémiques similaires sous 4 clusters."
|
||||
),
|
||||
h4("Méthodologie"),
|
||||
tags$ul(
|
||||
tags$li(
|
||||
strong("Variables :"),
|
||||
" Incidence et Mortalité (centrées et réduites pour assurer un poids équivalent)."
|
||||
),
|
||||
tags$li(
|
||||
strong("Nombre de Clusters (k) :"),
|
||||
" Fixé à 3 pour obtenir une segmentation tricolore lisible (Faible, Modéré, Critique)."
|
||||
),
|
||||
tags$li(
|
||||
strong("Stabilité :"),
|
||||
"Utilisation de `set.seed(123)` pour garantir la reproductibilité des résultats."
|
||||
)
|
||||
),
|
||||
h4("Interprétation des 3 Groupes"),
|
||||
|
||||
# Tableau des Groupes
|
||||
tags$table(
|
||||
class = "table table-striped",
|
||||
tags$thead(
|
||||
tags$tr(
|
||||
tags$th("Cluster"),
|
||||
tags$th("Description"),
|
||||
tags$th("Profil Type")
|
||||
)
|
||||
),
|
||||
tags$tbody(
|
||||
tags$tr(
|
||||
tags$td(span(
|
||||
style = paste0(
|
||||
"background-color:",
|
||||
green,
|
||||
"; color: black; font-weight: bold; padding: 5px; border-radius: 5px;"
|
||||
),
|
||||
labels[1]
|
||||
)),
|
||||
tags$td("Incidence et mortalité très basses."),
|
||||
tags$td("Europe de l'Ouest, Amérique du Nord")
|
||||
),
|
||||
tags$tr(
|
||||
tags$td(span(
|
||||
style = paste0(
|
||||
"background-color:",
|
||||
orange,
|
||||
"; color: black; font-weight: bold; padding: 5px; border-radius: 5px;"
|
||||
),
|
||||
labels[2]
|
||||
)),
|
||||
tags$td("Incidence significative mais mortalité contenue."),
|
||||
tags$td("Amérique Latine, Maghreb, Europe de l'Est")
|
||||
),
|
||||
tags$tr(
|
||||
tags$td(span(
|
||||
style = paste0(
|
||||
"background-color:",
|
||||
red,
|
||||
"; color: black; font-weight: bold; padding: 5px; border-radius: 5px;"
|
||||
),
|
||||
labels[3]
|
||||
)),
|
||||
tags$td("Incidence massive et forte létalité."),
|
||||
tags$td("Afrique Subsaharienne, Zones de conflit")
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
|
||||
# Code & Documentation
|
||||
box(
|
||||
title = "Code & Documentation",
|
||||
status = "primary",
|
||||
solidHeader = TRUE,
|
||||
width = 12,
|
||||
tags$p(
|
||||
"Ce projet suit une approche Open Science.",
|
||||
style = "font-style: italic;"
|
||||
),
|
||||
tags$p(
|
||||
"L'intégralité du code source (Rmd, App) ainsi que la notice technique (PDF) sont disponibles en libre accès sur le dépôt GitHub."
|
||||
),
|
||||
tags$a(
|
||||
href = "https://go.arthurdanjou.fr/datavis-code",
|
||||
target = "_blank",
|
||||
class = "btn btn-block btn-social btn-github",
|
||||
style = "color: white; background-color: #333; border-color: #333;",
|
||||
icon("github"),
|
||||
" Accéder au Code"
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Serveur
|
||||
server <- function(input, output, session) {
|
||||
# Filtrage des données
|
||||
filtered_data <- shiny::reactive({
|
||||
req(input$year_select)
|
||||
data <- tb_clustered |> filter(year == input$year_select)
|
||||
|
||||
if (input$region_select != "Toutes") {
|
||||
data <- data |> filter(g_whoregion == input$region_select)
|
||||
}
|
||||
return(data)
|
||||
})
|
||||
|
||||
# Données pour la carte
|
||||
map_data_reactive <- shiny::reactive({
|
||||
req(filtered_data())
|
||||
world_map |> inner_join(filtered_data(), by = c("adm0_a3" = "iso3"))
|
||||
})
|
||||
|
||||
# Définition des couleurs pour les clusters
|
||||
cluster_colors <- setNames(
|
||||
c(green, orange, red),
|
||||
labels
|
||||
)
|
||||
|
||||
# Fonction pour dessiner les polygones sur la carte
|
||||
dessiner_polygones <- function(map_object, data, metric) {
|
||||
if (metric == "label") {
|
||||
values_vec <- data$label
|
||||
pal_fun <- leaflet::colorFactor(
|
||||
as.character(cluster_colors),
|
||||
domain = names(cluster_colors)
|
||||
)
|
||||
fill_vals <- pal_fun(values_vec)
|
||||
legend_title <- "Cluster"
|
||||
label_txt <- paste0(data$name, " - ", data$label)
|
||||
legend_vals <- names(cluster_colors)
|
||||
} else {
|
||||
values_vec <- data[[metric]]
|
||||
pal_fun <- leaflet::colorNumeric(
|
||||
palette = c(green, orange, red),
|
||||
domain = values_vec
|
||||
)
|
||||
fill_vals <- pal_fun(values_vec)
|
||||
legend_title <- "Taux / 100k"
|
||||
label_txt <- paste0(data$name, ": ", round(values_vec, 1))
|
||||
legend_vals <- values_vec
|
||||
}
|
||||
|
||||
map_object |>
|
||||
leaflet::addPolygons(
|
||||
data = data,
|
||||
fillColor = ~fill_vals,
|
||||
weight = 1,
|
||||
color = ifelse(metric == "label", "gray", "black"),
|
||||
fillOpacity = 0.7,
|
||||
layerId = ~adm0_a3,
|
||||
label = ~label_txt,
|
||||
highlightOptions = highlightOptions(
|
||||
weight = 3,
|
||||
color = "#666",
|
||||
bringToFront = TRUE
|
||||
)
|
||||
) |>
|
||||
leaflet::addLegend(
|
||||
pal = pal_fun,
|
||||
values = legend_vals,
|
||||
title = legend_title,
|
||||
position = "bottomright"
|
||||
)
|
||||
}
|
||||
|
||||
# Carte
|
||||
output$map_plot <- leaflet::renderLeaflet({
|
||||
isolate({
|
||||
data <- map_data_reactive()
|
||||
metric <- input$metric_select
|
||||
|
||||
leaflet(options = leafletOptions(minZoom = 2, maxZoom = 6)) |>
|
||||
addProviderTiles(
|
||||
providers$CartoDB.Positron,
|
||||
options = providerTileOptions(noWrap = TRUE)
|
||||
) |>
|
||||
setMaxBounds(lng1 = -180, lat1 = -90, lng2 = 180, lat2 = 90) |>
|
||||
setView(lat = 20, lng = 0, zoom = 2) |>
|
||||
dessiner_polygones(data, metric)
|
||||
})
|
||||
})
|
||||
|
||||
# KPI - Total des cas
|
||||
output$kpi_total_cases <- shinydashboard::renderValueBox({
|
||||
data <- filtered_data()
|
||||
val <- round(mean(data$e_inc_100k, na.rm = TRUE))
|
||||
valueBox(
|
||||
val,
|
||||
"Incidence Moyenne (cas/100k)",
|
||||
icon = icon("chart-area"),
|
||||
color = "green"
|
||||
)
|
||||
})
|
||||
|
||||
# KPI - Pire pays
|
||||
output$kpi_worst_country <- shinydashboard::renderValueBox({
|
||||
data <- filtered_data()
|
||||
worst <- data |>
|
||||
arrange(desc(e_inc_100k)) |>
|
||||
slice(1)
|
||||
|
||||
if (nrow(worst) > 0) {
|
||||
valueBox(
|
||||
worst$country,
|
||||
paste("Max Incidence :", round(worst$e_inc_100k)),
|
||||
icon = icon("exclamation-triangle"),
|
||||
color = "red"
|
||||
)
|
||||
} else {
|
||||
valueBox("N/A", "Pas de données", icon = icon("ban"), color = "red")
|
||||
}
|
||||
})
|
||||
|
||||
# KPI - Pays en phase 'Critique'
|
||||
output$kpi_critical_count <- shinydashboard::renderValueBox({
|
||||
data <- filtered_data()
|
||||
count <- sum(data$label == labels[3], na.rm = TRUE)
|
||||
valueBox(
|
||||
count,
|
||||
"Pays en phase 'Critique'",
|
||||
icon = icon("hospital"),
|
||||
color = "orange"
|
||||
)
|
||||
})
|
||||
|
||||
# Plot des tendances
|
||||
output$trend_plot <- plotly::renderPlotly({
|
||||
req(selected_country())
|
||||
country_hist <- tb_clustered |> filter(iso3 == selected_country())
|
||||
if (nrow(country_hist) == 0) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
nom_pays <- unique(country_hist$country)[1]
|
||||
region_du_pays <- unique(country_hist$g_whoregion)[1]
|
||||
|
||||
region_benchmark <- tb_clustered |>
|
||||
filter(g_whoregion == region_du_pays) |>
|
||||
group_by(year) |>
|
||||
summarise(mean_inc = mean(e_inc_100k, na.rm = TRUE))
|
||||
|
||||
global_benchmark <- tb_clustered |>
|
||||
group_by(year) |>
|
||||
summarise(mean_inc = mean(e_inc_100k, na.rm = TRUE))
|
||||
|
||||
p <- ggplot() +
|
||||
geom_area(
|
||||
data = country_hist,
|
||||
aes(
|
||||
x = year,
|
||||
y = e_inc_100k,
|
||||
group = 1,
|
||||
text = paste0("<b>Pays : ", nom_pays, "</b>")
|
||||
),
|
||||
fill = red,
|
||||
alpha = 0.1
|
||||
) +
|
||||
geom_line(
|
||||
data = region_benchmark,
|
||||
aes(
|
||||
x = year,
|
||||
y = mean_inc,
|
||||
group = 1,
|
||||
color = "Moyenne Régionale",
|
||||
text = paste0(
|
||||
"<b>Moyenne ",
|
||||
region_du_pays,
|
||||
"</b><br>Année : ",
|
||||
year,
|
||||
"<br>Incidence : ",
|
||||
round(mean_inc, 1)
|
||||
)
|
||||
),
|
||||
size = 0.5,
|
||||
linetype = "dashed"
|
||||
) +
|
||||
geom_line(
|
||||
data = global_benchmark,
|
||||
aes(
|
||||
x = year,
|
||||
y = mean_inc,
|
||||
group = 1,
|
||||
color = "Moyenne Mondiale",
|
||||
text = paste0(
|
||||
"<b>Moyenne Mondiale</b><br>Année : ",
|
||||
year,
|
||||
"<br>Incidence : ",
|
||||
round(mean_inc, 1)
|
||||
)
|
||||
),
|
||||
size = 0.75,
|
||||
linetype = "dashed"
|
||||
) +
|
||||
geom_line(
|
||||
data = country_hist,
|
||||
aes(
|
||||
x = year,
|
||||
y = e_inc_100k,
|
||||
group = 1,
|
||||
color = "Pays Sélectionné",
|
||||
text = paste0(
|
||||
"<b>Pays : ",
|
||||
nom_pays,
|
||||
"</b><br>Incidence : ",
|
||||
round(e_inc_100k, 1),
|
||||
"<br>Mortalité : ",
|
||||
round(e_mort_exc_tbhiv_100k, 1)
|
||||
)
|
||||
),
|
||||
size = 1
|
||||
) +
|
||||
geom_vline(
|
||||
xintercept = as.numeric(input$year_select),
|
||||
linetype = "dotted",
|
||||
color = "black",
|
||||
alpha = 0.6
|
||||
) +
|
||||
scale_color_manual(
|
||||
name = "",
|
||||
values = c(
|
||||
"Moyenne Régionale" = "grey30",
|
||||
"Pays Sélectionné" = red,
|
||||
"Moyenne Mondiale" = "orange"
|
||||
)
|
||||
) +
|
||||
labs(
|
||||
title = paste(
|
||||
"Trajectoire :",
|
||||
nom_pays,
|
||||
"vs",
|
||||
region_du_pays,
|
||||
"vs Monde"
|
||||
),
|
||||
x = "Année",
|
||||
y = "Incidence (pour 100k)"
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(legend.position = "bottom")
|
||||
|
||||
ggplotly(p, tooltip = "text") |>
|
||||
layout(
|
||||
legend = list(orientation = "h", x = 0.1, y = -0.2),
|
||||
hovermode = "x unified"
|
||||
)
|
||||
})
|
||||
|
||||
# Densité des cas
|
||||
output$density_plot <- plotly::renderPlotly({
|
||||
data <- filtered_data()
|
||||
sel_iso <- selected_country()
|
||||
highlight_data <- data |> filter(iso3 == sel_iso)
|
||||
|
||||
p <- ggplot(data, aes(x = e_inc_100k, fill = label)) +
|
||||
geom_density(
|
||||
aes(text = paste0("<b>Cluster : </b>", label)),
|
||||
alpha = 0.6,
|
||||
color = NA
|
||||
) +
|
||||
geom_rug(
|
||||
aes(
|
||||
color = label,
|
||||
customdata = iso3,
|
||||
text = paste0(
|
||||
"<b>Pays : </b>",
|
||||
country,
|
||||
"<br><b>Incidence : </b>",
|
||||
round(e_inc_100k),
|
||||
" (cas/100k)<br><b>Cluster : </b>",
|
||||
label
|
||||
)
|
||||
),
|
||||
sides = "b",
|
||||
length = unit(0.2, "npc"),
|
||||
size = 1.2,
|
||||
alpha = 0.9
|
||||
) +
|
||||
geom_point(
|
||||
data = highlight_data,
|
||||
aes(
|
||||
x = e_inc_100k,
|
||||
y = 0,
|
||||
text = paste0(
|
||||
"<b>PAYS SÉLECTIONNÉ</b><br><b>",
|
||||
country,
|
||||
"</b><br>Incidence : ",
|
||||
round(e_inc_100k)
|
||||
)
|
||||
),
|
||||
color = "black",
|
||||
fill = "white",
|
||||
shape = 21,
|
||||
size = 4
|
||||
) +
|
||||
scale_fill_manual(values = cluster_colors) +
|
||||
scale_color_manual(values = cluster_colors) +
|
||||
scale_x_log10() +
|
||||
labs(
|
||||
title = "Distribution des Risques",
|
||||
x = "Incidence (Échelle Log)",
|
||||
y = NULL
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
legend.position = "none",
|
||||
axis.text.y = element_blank(),
|
||||
axis.ticks.y = element_blank(),
|
||||
panel.grid.major.y = element_blank(),
|
||||
panel.grid.minor.y = element_blank()
|
||||
)
|
||||
|
||||
ggplotly(p, tooltip = "text", source = "density_click") |>
|
||||
layout(hovermode = "closest")
|
||||
})
|
||||
|
||||
# Nuage de points des clusters
|
||||
output$cluster_scatter <- plotly::renderPlotly({
|
||||
data <- filtered_data()
|
||||
sel_iso <- selected_country()
|
||||
highlight_data <- data |> filter(iso3 == sel_iso)
|
||||
|
||||
p <- ggplot(data, aes(x = e_inc_100k, y = e_mort_exc_tbhiv_100k)) +
|
||||
geom_point(
|
||||
aes(
|
||||
color = label,
|
||||
customdata = iso3,
|
||||
text = paste(
|
||||
"Pays:",
|
||||
country,
|
||||
"<br>Cluster:",
|
||||
label,
|
||||
"<br>Pop:",
|
||||
round(e_pop_num / 1e6, 1),
|
||||
"M",
|
||||
"<br>Incidence:",
|
||||
round(e_inc_100k),
|
||||
"<br>Mortalité:",
|
||||
round(e_mort_exc_tbhiv_100k)
|
||||
)
|
||||
),
|
||||
size = 3,
|
||||
alpha = 0.6
|
||||
) +
|
||||
geom_point(
|
||||
data = highlight_data,
|
||||
aes(
|
||||
fill = label,
|
||||
text = paste(
|
||||
"<b>PAYS SÉLECTIONNÉ</b>",
|
||||
"<br>Pays:",
|
||||
country,
|
||||
"<br>Cluster:",
|
||||
label,
|
||||
"<br>Incidence:",
|
||||
round(e_inc_100k),
|
||||
"<br>Mortalité:",
|
||||
round(e_mort_exc_tbhiv_100k)
|
||||
)
|
||||
),
|
||||
shape = 21,
|
||||
color = "black",
|
||||
stroke = 1,
|
||||
size = 5,
|
||||
alpha = 1,
|
||||
show.legend = FALSE
|
||||
) +
|
||||
scale_x_log10() +
|
||||
scale_y_log10() +
|
||||
scale_color_manual(values = cluster_colors) +
|
||||
scale_fill_manual(values = cluster_colors) +
|
||||
labs(title = "Incidence vs Mortalité", x = "Incidence", y = "Mortalité") +
|
||||
theme_minimal() +
|
||||
theme(legend.position = "bottom")
|
||||
|
||||
ggplotly(p, tooltip = "text", source = "scatter_click")
|
||||
})
|
||||
|
||||
# Tableau des données brutes
|
||||
output$raw_table <- DT::renderDT({
|
||||
data <- filtered_data() |>
|
||||
select(
|
||||
country,
|
||||
year,
|
||||
g_whoregion,
|
||||
e_inc_100k,
|
||||
e_mort_exc_tbhiv_100k,
|
||||
label
|
||||
)
|
||||
|
||||
datatable(
|
||||
data,
|
||||
rownames = FALSE,
|
||||
options = list(pageLength = 15, scrollX = TRUE),
|
||||
colnames = c(
|
||||
"Pays",
|
||||
"Année",
|
||||
"Région",
|
||||
"Incidence",
|
||||
"Mortalité",
|
||||
"Cluster"
|
||||
)
|
||||
)
|
||||
})
|
||||
|
||||
# Mise à jour de la carte
|
||||
shiny::observe({
|
||||
data <- map_data_reactive()
|
||||
metric <- input$metric_select
|
||||
|
||||
leafletProxy("map_plot", data = data) |>
|
||||
clearShapes() |>
|
||||
clearControls() |>
|
||||
dessiner_polygones(data, metric)
|
||||
})
|
||||
|
||||
# Sélection du pays
|
||||
selected_country <- shiny::reactiveVal("FRA")
|
||||
|
||||
# Sélection du pays sur la carte
|
||||
shiny::observeEvent(input$map_plot_shape_click, {
|
||||
click <- input$map_plot_shape_click
|
||||
if (!is.null(click$id)) {
|
||||
selected_country(click$id)
|
||||
}
|
||||
})
|
||||
|
||||
# Sélection du pays dans le nuage de points
|
||||
shiny::observeEvent(event_data("plotly_click", source = "scatter_click"), {
|
||||
click_info <- event_data("plotly_click", source = "scatter_click")
|
||||
if (!is.null(click_info$customdata)) {
|
||||
selected_country(click_info$customdata)
|
||||
}
|
||||
})
|
||||
|
||||
# Sélection du pays dans la densité
|
||||
shiny::observeEvent(event_data("plotly_click", source = "density_click"), {
|
||||
click_info <- event_data("plotly_click", source = "density_click")
|
||||
if (!is.null(click_info$customdata)) {
|
||||
selected_country(click_info$customdata)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
# Lancement de l'application Shiny
|
||||
shiny::shinyApp(ui, server)
|
||||
BIN
M2/Data Visualisation/Project/data/TB_analysis_ready.RData
Normal file
BIN
M2/Data Visualisation/Project/logo_dauphine.jpg
Normal file
|
After Width: | Height: | Size: 45 KiB |
3914
M2/Data Visualisation/Project/renv.lock
Normal file
24
M2/Data Visualisation/init.R
Normal file
@@ -0,0 +1,24 @@
|
||||
# Liste des packages à installer
|
||||
packages_to_install <- c(
|
||||
"lattice", "grid", "ggplot2", "gridExtra", "locfit", "scales",
|
||||
"formattable", "RColorBrewer", "plotly", "dplyr", "tidyr",
|
||||
"rmarkdown", "ggthemes", "cowplot", "kableExtra", "ggridges",
|
||||
"colorspace", "sf", "mapview", "tidyverse", "readxl", "readr",
|
||||
"giscoR", "gapminder", "GGally", "ggfortify", "lubridate", "zoo",
|
||||
"xts", "forecast", "feasts", "tseries", "tsibble", "fable"
|
||||
)
|
||||
|
||||
# Fonction pour installer les packages manquants
|
||||
install_if_absent <- function(package_name) {
|
||||
if (!requireNamespace(package_name, quietly = TRUE)) {
|
||||
install.packages(package_name)
|
||||
message(paste("Package", package_name, "installé avec succès."))
|
||||
} else {
|
||||
message(
|
||||
paste("Package", package_name, "déjà installé, installation ignorée.")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
# Appliquer la fonction à la liste de packages
|
||||
lapply(packages_to_install, install_if_absent)
|
||||
BIN
M2/Data Visualisation/tp1/data/datafreMPTL.RData
Normal file
BIN
M2/Data Visualisation/tp1/manchot.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
198
M2/Data Visualisation/tp1/style.css
Normal file
@@ -0,0 +1,198 @@
|
||||
.infobox {
|
||||
padding: 1em 1em 1em 4em;
|
||||
margin-bottom: 10px;
|
||||
border: 2px solid orange;
|
||||
border-radius: 10px;
|
||||
background: #f5f5f5 5px center/3em no-repeat;
|
||||
}
|
||||
|
||||
/* Custom box styles for math environments */
|
||||
|
||||
/* Lemma Box */
|
||||
.lemma-box {
|
||||
border: 2px solid #3c8dbc;
|
||||
background-color: #e6f7ff;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Theorem Box */
|
||||
.theorem-box {
|
||||
border: 2px solid #2ca02c;
|
||||
background-color: #eaffea;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Corollary Box */
|
||||
.corollary-box {
|
||||
border: 2px solid #2ca02c;
|
||||
background-color: #eaffea;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Proposition Box */
|
||||
.proposition-box {
|
||||
border: 2px solid #ff7f0e;
|
||||
background-color: #fff3e6;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Conjecture Box */
|
||||
.conjecture-box {
|
||||
border: 2px solid #9467bd;
|
||||
background-color: #f5e6ff;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Definition Box */
|
||||
.definition-box {
|
||||
border: 2px solid #d62728;
|
||||
background-color: #ffe6e6;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Example Box */
|
||||
.example-box {
|
||||
border: 2px solid #17becf;
|
||||
/* background-color: #e6f7ff;*/
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Exercise Box */
|
||||
.exercise-box {
|
||||
border: 2px solid #1f77b4;
|
||||
background-color: #e6f7ff;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Hypothesis Box */
|
||||
.hypothesis-box {
|
||||
border: 2px solid #e377c2;
|
||||
background-color: #ffe6f5;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Remark Box */
|
||||
.remark-box {
|
||||
border: 2px solid #7f7f7f;
|
||||
background-color: #f2f2f2;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Proof Box */
|
||||
.proof-box {
|
||||
border: 2px solid #bcbd22;
|
||||
background-color: #fafad2;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
/* Hint Box */
|
||||
.hint-box {
|
||||
border: 2px solid #7f7f7f;
|
||||
background-color: #f2f2f2;
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
|
||||
/* Numérotation automatique */
|
||||
|
||||
|
||||
.lemma-box::before {
|
||||
counter-increment: lemma-counter;
|
||||
content: "Lemme " counter(lemma-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.theorem-box::before {
|
||||
counter-increment: theorem-counter;
|
||||
content: "Théorème " counter(theorem-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.corollary-box::before {
|
||||
counter-increment: corollary-counter;
|
||||
content: "Corollaire " counter(corollary-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.proposition-box::before {
|
||||
counter-increment: proposition-counter;
|
||||
content: "Proposition " counter(proposition-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.conjecture-box::before {
|
||||
counter-increment: conjecture-counter;
|
||||
content: "Conjecture " counter(conjecture-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.definition-box::before {
|
||||
counter-increment: definition-counter;
|
||||
content: "Définition " counter(definition-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.example-box::before {
|
||||
counter-increment: example-counter;
|
||||
content: "Exemple " counter(example-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
|
||||
.exercise-box::before {
|
||||
counter-increment: exercise-counter;
|
||||
content: "Question " counter(exercise-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.hypothesis-box::before {
|
||||
counter-increment: hypothesis-counter;
|
||||
content: "Hypothèse " counter(hypothesis-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.remark-box::before {
|
||||
counter-increment: remark-counter;
|
||||
content: "Remarque " counter(remark-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.proof-box::before {
|
||||
counter-increment: proof-counter;
|
||||
content: "Preuve " counter(proof-counter) ". ";
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.hint-box::before {
|
||||
content: "Indice." ;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* Initialisation des compteurs */
|
||||
body {
|
||||
counter-reset: lemma-counter corollary-counter proposition-counter conjecture-counter definition-counter example-counter exercise-counter hypothesis-counter remark-counter proof-counter;
|
||||
}
|
||||
677
M2/Data Visualisation/tp1/tp1.Rmd
Normal file
@@ -0,0 +1,677 @@
|
||||
---
|
||||
title: "Prise en main de ggplot2"
|
||||
author: "Quentin Guibert"
|
||||
date: "Année 2025-2026"
|
||||
institute: "Université Paris-Dauphine | Master ISF"
|
||||
lang: fr
|
||||
link-citations: true
|
||||
output:
|
||||
rmdformats::robobook:
|
||||
highlight: kate
|
||||
use_bookdown: true
|
||||
css: style.css
|
||||
lightbox : true
|
||||
gallery: true
|
||||
code_folding: show
|
||||
theme: flatly
|
||||
toc_float:
|
||||
collapsed: no
|
||||
editor_options:
|
||||
markdown:
|
||||
wrap: 72
|
||||
# bibliography: references.bib
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
## Global options
|
||||
knitr::opts_chunk$set(
|
||||
cache = FALSE,
|
||||
warning = FALSE,
|
||||
message = FALSE,
|
||||
fig.retina = 2,
|
||||
fig.height = 6,
|
||||
fig.width = 12
|
||||
)
|
||||
options(encoding = "UTF-8")
|
||||
```
|
||||
|
||||
```{r, echo = FALSE, fig.keep= 'none'}
|
||||
# Chargement des librairies graphiques
|
||||
library(lattice)
|
||||
library(grid)
|
||||
library(ggplot2)
|
||||
require(gridExtra)
|
||||
library(locfit)
|
||||
library(scales)
|
||||
library(formattable)
|
||||
library(RColorBrewer)
|
||||
library(plotly)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(rmarkdown)
|
||||
library(ggthemes)
|
||||
library(cowplot)
|
||||
library(kableExtra)
|
||||
```
|
||||
|
||||
# Objectifs du TP
|
||||
|
||||
L'objectif de ce TP vise à se familiariser avec le package **ggplot2**
|
||||
de `R`. Il s'agit de faire des manipulations graphiques élémentaires et
|
||||
d'interpréter les résultats de ces visualisations.
|
||||
|
||||
Dans un premier temps, vous pouvez suivre l'exemple introductif en
|
||||
répliquant le code fourni. Dans un deuxième temps, il convient de
|
||||
réaliser l'exercice point par point.
|
||||
|
||||
# Prérequis
|
||||
|
||||
- Avoir installer `R` [ici](https://www.r-project.org/).
|
||||
- Avoir installer un IDE, par exemple `RStudio`
|
||||
[ici](https://posit.co/download/rstudio-desktop/).
|
||||
- Créer un nouveau projet (`File`, puis `New Projet`) dans un dossier
|
||||
sur votre ordinateur.
|
||||
- Télécharger [ici](https://moodle.psl.eu/course/view.php?id=33799)
|
||||
les fichiers nécessaires au TD.
|
||||
|
||||
Vous pouvez ensuite écrire vos codes soit :
|
||||
|
||||
- En ouvrant un nouveau script `.R` ;
|
||||
- En ouvrant le ouvrant le rapport Rmarkdown `3-td_ggplot2 - enonce`.
|
||||
Certains codes sont partiels et sont à compléter (indication `???`).
|
||||
N'oubliez pas de modifier l'option `eval = TRUE` pour que les
|
||||
calculs puissent être réalisés.
|
||||
|
||||
# Exemple introductif
|
||||
|
||||
Pour illustrer cette première partie, nous reprenons l'exemple
|
||||
introductif fourni par @wickham2023 sur le jeu de données `penguins` du
|
||||
package **palmerpenguins**. Ce jeu de données s'intèresse des mesures
|
||||
réalisées sur des manchots sur 3 îles de l'archipelle Palmer.
|
||||
|
||||
## Données
|
||||
|
||||
Dans un premier temps, il faut installer le package et le charger.
|
||||
|
||||
```{r}
|
||||
# install.packages("palmerpenguins") #nolint
|
||||
library(palmerpenguins)
|
||||
```
|
||||
|
||||
Ce jeu de données contient 344 observations où chaque ligne correspond à
|
||||
un individu.
|
||||
|
||||
```{r}
|
||||
paged_table(penguins, options = list(rows.print = 15))
|
||||
```
|
||||
|
||||
On se concentre plus particulièrement sur les variables suivantes :
|
||||
|
||||
- `species` : l'espèce de manchot ;
|
||||
- `flipper_length_mm` : la longueur de la nageoire en mm ;
|
||||
- `body_mass_g` : la masse en gramme.
|
||||
|
||||
Pour plus détails, voir l'aide `?penguins`.
|
||||
|
||||
## But de la visualisation
|
||||
|
||||
On s'intéresse au lien entre le masse et la taille des nageoires des
|
||||
manchots :
|
||||
|
||||
- ceux dont les nageoires sont les plus longues sont-ils plus lourds
|
||||
que les manchots aux nageoires courtes ?
|
||||
- si oui quelle est le type de relation (linéaire, croissante,
|
||||
décroissante, ...) ?
|
||||
- quels facteurs influencent également cette relation (lieu, l'espèce,
|
||||
... ) ?
|
||||
|
||||
On cherche à recréer la figure suivante.
|
||||
|
||||

|
||||
|
||||
## Création de la figure étape par étape
|
||||
|
||||
### Etape 1 : Scatterplot {.unnumbered}
|
||||
|
||||
On commence par créer un scatterplot pour examiner la relation entre la
|
||||
masse et la taille de la nageoire.
|
||||
|
||||
```{r}
|
||||
ggplot(
|
||||
data = penguins,
|
||||
mapping = aes(x = flipper_length_mm, y = body_mass_g)
|
||||
) +
|
||||
geom_point()
|
||||
```
|
||||
|
||||
Cette figure fait clairement apparaître une relation croissante et a
|
||||
priori linéaire entre les deux variables.
|
||||
|
||||
::: remark-box
|
||||
Un message d'erreur apparaît pour deux individus avec des données
|
||||
manquantes. Ils sont automatiquement exclus.
|
||||
:::
|
||||
|
||||
### Etape 2 : Ajout d'élements esthétiques {.unnumbered}
|
||||
|
||||
On cherche à présent exhiber le rôle de l’espèce à partir d'une couleur.
|
||||
Trois espèces sont présents, ainsi l'ajout de 3 couleurs à la figure ne
|
||||
devrait pas surcharger le graphique.
|
||||
|
||||
```{r}
|
||||
ggplot(
|
||||
data = penguins,
|
||||
mapping = aes(x = flipper_length_mm, y = body_mass_g, color = species)
|
||||
) +
|
||||
geom_point()
|
||||
```
|
||||
|
||||
Compte tenu du nombre important de points, nous pouvons renforcer les
|
||||
différences par espèce en ajoutant une variation de forme aux points.
|
||||
|
||||
```{r}
|
||||
ggplot(
|
||||
data = penguins,
|
||||
mapping = aes(x = flipper_length_mm, y = body_mass_g)
|
||||
) +
|
||||
geom_point(
|
||||
mapping = aes(
|
||||
color = species,
|
||||
shape = species
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Etape 3 : Ajout d'une géométrie {.unnumbered}
|
||||
|
||||
Voyons à présent comment interpréter la nature de la relation entre
|
||||
masse et longueur de la nageoire. Pour ce faire, nous essayons d'ajout
|
||||
des courbes de tendance. Nous commençons par une tendance linéaire.
|
||||
|
||||
```{r}
|
||||
ggplot(
|
||||
data = penguins,
|
||||
mapping = aes(x = flipper_length_mm, y = body_mass_g)
|
||||
) +
|
||||
geom_point(
|
||||
mapping = aes(
|
||||
color = species,
|
||||
shape = species
|
||||
)
|
||||
) +
|
||||
geom_smooth(method = "lm")
|
||||
```
|
||||
|
||||
La même figure peut être générée par espèce en déplaçant l'argument
|
||||
`color = species`.
|
||||
|
||||
```{r}
|
||||
ggplot(
|
||||
data = penguins,
|
||||
mapping = aes(x = flipper_length_mm, y = body_mass_g, color = species)
|
||||
) +
|
||||
geom_point(
|
||||
mapping = aes(
|
||||
shape = species
|
||||
)
|
||||
) +
|
||||
geom_smooth(method = "lm")
|
||||
```
|
||||
|
||||
Les pentes entre les espèces ne sont pas si éloignées. Nous décidons que
|
||||
conserver une relation commune pour toutes espèces. Pour tester si la
|
||||
nature linéaire de la relation est a priori une bonne hypothèse, nous
|
||||
considérons un lissage non-paramétrique.
|
||||
|
||||
```{r}
|
||||
ggplot(
|
||||
data = penguins,
|
||||
mapping = aes(x = flipper_length_mm, y = body_mass_g)
|
||||
) +
|
||||
geom_point(
|
||||
mapping = aes(
|
||||
color = species,
|
||||
shape = species
|
||||
)
|
||||
) +
|
||||
geom_smooth(method = "loess")
|
||||
```
|
||||
|
||||
L'ajout d'un lissage non-paramétrique permet d'affiner l’adéquation aux
|
||||
données, mais sans pour autant clairement remettre en cause la tendance
|
||||
linéaire qui sera donc conservée.
|
||||
|
||||
### Etape 4 : Ajout des titres et changement de thème {.unnumbered}
|
||||
|
||||
Afin de finaliser la figure, nous ajouter :
|
||||
|
||||
- un titre ;
|
||||
- un sous-titre ;
|
||||
- des titres aux axes ;
|
||||
- un titre à la légende.
|
||||
|
||||
Ces informations sont ajoutées avec `labs()`.
|
||||
|
||||
De plus, nous modifions le thème avec la commande `theme_bw()`.
|
||||
|
||||
```{r}
|
||||
ggplot(
|
||||
data = penguins,
|
||||
mapping = aes(x = flipper_length_mm, y = body_mass_g)
|
||||
) +
|
||||
geom_point(aes(color = species, shape = species)) +
|
||||
geom_smooth(method = "lm") +
|
||||
labs(
|
||||
title = "Masse et taille de la nageoire",
|
||||
subtitle = "Manchots d'Adelie, a
|
||||
jugulaire et de Gentoo",
|
||||
x = "Longueur de la nageoire (mm)",
|
||||
y = "Masse (g)",
|
||||
color = "Espece",
|
||||
shape = "Espece"
|
||||
) +
|
||||
scale_color_colorblind() +
|
||||
theme_bw()
|
||||
```
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
# Exercice
|
||||
|
||||
## Données
|
||||
|
||||
Nous travaillons avec les jeux de données `FreMTPL2freq` et
|
||||
`FreMTPL2sev` du package **Casdatasets**. Ces données ont été
|
||||
préalablement pré-formatées et regroupées.
|
||||
|
||||
Ce jeux de données regroupent les caractéristiques de 677 991 polices de
|
||||
responsabilité civile automobile, observées principalement sur une
|
||||
année. Dans les données regroupées, on dispose des numéros de sinistre
|
||||
par police, des montants de sinistre correspondants, des
|
||||
caractéristiques du risque et du nombre de sinistres.
|
||||
|
||||
On présente ci-dessous un aperçu des données.
|
||||
|
||||
```{r begin}
|
||||
# Folds
|
||||
fold <- getwd()
|
||||
|
||||
# Load data
|
||||
# load(paste0(fold, "/M2/Data Visualisation/tp1", "/data/datafreMPTL.RData")) # VSCode
|
||||
load(paste0(fold, "/data/datafreMPTL.RData")) # RStudio
|
||||
paged_table(dat, options = list(rows.print = 15))
|
||||
```
|
||||
|
||||
Le tableau suivant présente une définition des variables.
|
||||
|
||||
```{r}
|
||||
kableExtra::kable(
|
||||
data.frame(
|
||||
Variable = c(
|
||||
"IDpol",
|
||||
"Exposure",
|
||||
"VehPower",
|
||||
"VehAge",
|
||||
"DrivAge",
|
||||
"BonusMalus",
|
||||
"VehBrand",
|
||||
"VehGas",
|
||||
"Area",
|
||||
"Density",
|
||||
"Region",
|
||||
"ClaimTotal",
|
||||
"ClaimNb"
|
||||
),
|
||||
Description = c(
|
||||
"Identifiant de la police",
|
||||
"Exposition au risque",
|
||||
"Puissance du véhicule",
|
||||
"Age du véhicule en année",
|
||||
"Age du conducteur en année",
|
||||
"Coefficient de bonus-malus",
|
||||
"Marque du véhicule",
|
||||
"Carburant du véhicule",
|
||||
"Catégorie correspondant à la densité de la zone assurée",
|
||||
"Densité de population",
|
||||
"Region (selon la classication 1970-2015)",
|
||||
"Montant total des sinistres",
|
||||
"Nombre de sinistres sur la période"
|
||||
),
|
||||
Type = c(
|
||||
rep("Reel", 2),
|
||||
rep("Entier", 4),
|
||||
rep("Cat", 3),
|
||||
"Entier",
|
||||
"Cat",
|
||||
rep("Reel", 2)
|
||||
)
|
||||
),
|
||||
booktabs = TRUE
|
||||
)
|
||||
# Short summary
|
||||
str(dat)
|
||||
```
|
||||
|
||||
Pour plus de détails, consulter l'aide `?CASdatasets::freMTPL2freq`.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
## But de la visualisation
|
||||
|
||||
Nous effectuons une première analyse descriptive de données et cherchons
|
||||
à étudier la relation entre :
|
||||
|
||||
- la fréquence, calculée avec les variables `ClaimNb` et `Exposure`
|
||||
(période d'exposition en année).
|
||||
- les variables `Area` et `DrivAge`.
|
||||
|
||||
Le but de la visualisation est de fait ressortir les liens entre la
|
||||
fréquence et ces deux variables.
|
||||
|
||||
### Etape 1 : Visualisation de la fréquence et de l'exposition {.unnumbered}
|
||||
|
||||
::: exercise-box
|
||||
A partir des données `dat` :
|
||||
|
||||
- afficher les statistiques descriptives du nombre de sinistres
|
||||
`ClaimNb` et de la variable `Exposure` ;
|
||||
- afficher des histogrammes pour visualiser leur distribution ;
|
||||
- afficher les figures côte a côte avec la fonction `plot_grid()`.
|
||||
|
||||
Essayer de choisir un thème de couleur et un écartement des barres de
|
||||
l'histogramme facilitant sa lisibilité.
|
||||
:::
|
||||
|
||||
::: indice-box
|
||||
On pourra développer une fonction qui utilise `geom_histogram()` sous la
|
||||
package **ggplot2**.
|
||||
:::
|
||||
|
||||
```{r, fig.height = 6, fig.width = 12}
|
||||
# Descriptive statistics
|
||||
summary(dat$ClaimNb)
|
||||
summary(dat$Exposure)
|
||||
|
||||
p1 <- ggplot(dat) +
|
||||
geom_histogram(
|
||||
aes(x = ClaimNb),
|
||||
binwidth = 0.25,
|
||||
fill = "lightblue",
|
||||
color = "black"
|
||||
) +
|
||||
labs(
|
||||
title = "Distribution du nombre de sinistres",
|
||||
x = "Nombre de sinistres",
|
||||
y = "Effectif"
|
||||
) +
|
||||
theme_bw()
|
||||
|
||||
p2 <- ggplot(dat) +
|
||||
geom_histogram(
|
||||
aes(x = Exposure),
|
||||
binwidth = 0.05,
|
||||
fill = "lightblue",
|
||||
color = "black"
|
||||
) +
|
||||
labs(title = "Exposition", x = "Nombre de sinistres", y = "Effectif") +
|
||||
theme_bw()
|
||||
|
||||
plot_grid(p1, p2, ncol = 2)
|
||||
```
|
||||
|
||||
### Etape 2 : Calculer la fréquence {.unnumbered}
|
||||
|
||||
::: exercise-box
|
||||
Construire un tableau présentant l’exposition cumulée et le nombre
|
||||
d’observations avec 0 sinistre, 1 sinistre, …
|
||||
:::
|
||||
|
||||
```{r}
|
||||
dat |>
|
||||
group_by(ClaimNb) |>
|
||||
summarise(n = n(), Exposure = round(sum(Exposure), 0)) |>
|
||||
kable(
|
||||
col_names = c(
|
||||
"Nombre de sinistres",
|
||||
"Nombres d'observations",
|
||||
"Exposition totale"
|
||||
)
|
||||
) |>
|
||||
kable_styling(full_width = FALSE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
pf_freq <- round(sum(dat$ClaimNb) / sum(dat$Exposure), 4)
|
||||
pf_freq
|
||||
```
|
||||
|
||||
Ce calcul de fréquence sera ensuite utile pour l'affichage des
|
||||
résultats.
|
||||
|
||||
### Etape 3 : Calculer l'exposition et la fréquence par variable {.unnumbered}
|
||||
|
||||
::: exercise-box
|
||||
Pour la variable `DrivAge`, présenter :
|
||||
|
||||
1. un histogramme de l'exposition en fonction de cette variable.
|
||||
2. un histogramme de la fréquence moyenne de sinistres en fonction de
|
||||
cette variable.
|
||||
|
||||
Remplacer ensuite le second histogramme par un scatter plot avec une
|
||||
courbe de tendance. Est-ce plus clair ?
|
||||
|
||||
**Indice**
|
||||
|
||||
On pourra développer une fonction qui utilise `geom_bar()` sous la
|
||||
package **ggplot2**.
|
||||
:::
|
||||
|
||||
```{r, eval = FALSE}
|
||||
# On regroupe selon les modalites de la DrivAge
|
||||
# l'exposition, le nombre de sinistres et la frequence
|
||||
df_plot <- dat |>
|
||||
group_by(DrivAge) |>
|
||||
summarize(
|
||||
exp = sum(Exposure),
|
||||
nb_claims = sum(ClaimNb),
|
||||
freq = sum(ClaimNb) / sum(Exposure)
|
||||
)
|
||||
|
||||
# Histogramme exposition
|
||||
p1 <- ggplot(df_plot, aes(x = DrivAge, y = exp)) +
|
||||
geom_bar(stat = "identity", fill = "grey", color = "black", alpha = 0.5) +
|
||||
labs(x = "Age du conducteur", y = "Exposition en années") +
|
||||
theme_bw()
|
||||
|
||||
# Histogramme frequence
|
||||
p2 <- ggplot(df_plot, aes(x = DrivAge, y = freq)) +
|
||||
geom_bar(stat = "identity", fill = "grey", color = "black", alpha = 0.5) +
|
||||
labs(x = "Age du conducteur", y = "Frequence") +
|
||||
theme_bw()
|
||||
|
||||
plot_grid(p1, p2, labels = c("A", "B"), label_size = 12)
|
||||
```
|
||||
|
||||
```{r}
|
||||
df_plot <- dat |>
|
||||
group_by(DrivAge) |>
|
||||
summarize(
|
||||
exp = sum(Exposure),
|
||||
nb_claims = sum(ClaimNb),
|
||||
freq = sum(ClaimNb) / sum(Exposure)
|
||||
)
|
||||
|
||||
# Scatter plot frequence
|
||||
p3 <- ggplot(df_plot, aes(x = DrivAge, y = freq)) +
|
||||
geom_point() +
|
||||
geom_smooth() +
|
||||
labs(x = "Age du conducteur", y = "Frequence") +
|
||||
theme_bw()
|
||||
p3
|
||||
```
|
||||
|
||||
### Etape 4 : Examiner l'intéraction avec une autre variable {.unnumbered}
|
||||
|
||||
::: exercise-box
|
||||
A partir du scatter plot réalisé à l'étape précédente, distinguer les
|
||||
évolutions de fréquence en fonction de `DrivAge` et de `BonusMalus`.
|
||||
|
||||
Ce graphique vous paraît-il transmettre un message clair ? Proposez des
|
||||
améliorations en modifiant les variables `DrivAge` et `BonusMalus`.
|
||||
:::
|
||||
|
||||
```{r}
|
||||
# On regroupe selon les modalites de la DrivAge et de Area
|
||||
# l'exposition, le nombre de sinistres et la frequence
|
||||
df_plot <- dat |>
|
||||
group_by(DrivAge, BonusMalus) |>
|
||||
summarize(
|
||||
exp = sum(Exposure),
|
||||
nb_claims = sum(ClaimNb),
|
||||
freq = sum(ClaimNb) / sum(Exposure)
|
||||
)
|
||||
|
||||
p4 <- ggplot(df_plot, aes(x = DrivAge, y = freq, color = BonusMalus)) +
|
||||
geom_point() +
|
||||
geom_smooth() +
|
||||
labs(x = "Age du conducteur", y = "Frequence", color = "Bonus-Malus") +
|
||||
theme_bw()
|
||||
p4
|
||||
```
|
||||
|
||||
On propose 4 ajustements :
|
||||
|
||||
- Exclure les âges extrêmes au-delà de 85 ans pour lesquels
|
||||
l'exposition est très faible.
|
||||
- Faire des classes d'âges.
|
||||
- Limiter le Bonus-Malus à 125.
|
||||
- Faire des classes de Bonus-Malus.
|
||||
|
||||
```{r}
|
||||
# Classes d'âges pour Bonus-Malus
|
||||
lim_classes <- c(50, 75, 100, 125, Inf)
|
||||
|
||||
# Exclusion des donnees "extremes" et faire les regroupement
|
||||
df_plot <- dat |>
|
||||
filter(DrivAge <= 85, BonusMalus <= 125) |>
|
||||
# regroupement en classes d'ages de 5 ans
|
||||
mutate(DrivAge = ceiling(pmin(DrivAge, 85) / 5) * 5) |>
|
||||
mutate(
|
||||
BonusMalus = cut(BonusMalus, breaks = lim_classes, include.lowest = TRUE)
|
||||
)
|
||||
|
||||
# On regroupe selon les modalites de la DrivAge et de Area
|
||||
# l'exposition, le nombre de sinistres et la frequence
|
||||
df_plot <- df_plot |>
|
||||
group_by(DrivAge, BonusMalus) |>
|
||||
summarize(
|
||||
exp = sum(Exposure),
|
||||
nb_claims = sum(ClaimNb),
|
||||
freq = sum(ClaimNb) / sum(Exposure)
|
||||
)
|
||||
|
||||
# Scatter plot frequence
|
||||
p5 <- ggplot(df_plot, aes(x = DrivAge, y = freq, color = BonusMalus)) +
|
||||
geom_point() +
|
||||
geom_smooth() +
|
||||
labs(x = "Age du conducteur", y = "Frequence", color = "Bonus-Malus") +
|
||||
theme_bw()
|
||||
p5
|
||||
```
|
||||
|
||||
### Conclure {.unnumbered}
|
||||
|
||||
::: exercise-box
|
||||
Comparer à présenter comment l'exposition se répartie entre âge et
|
||||
bonus-malus.
|
||||
:::
|
||||
|
||||
```{r, fig.height = 6, fig.width = 12}
|
||||
# Histogramme plot frequence
|
||||
p6 <- ggplot(df_plot, aes(x = DrivAge, y = exp, fill = BonusMalus)) +
|
||||
geom_bar(stat = "identity", color = "black", alpha = 0.5) +
|
||||
scale_x_continuous(breaks = seq(20, 85, 20), limits = c(20, 85)) +
|
||||
labs(x = "Age du conducteur", y = "Exposition en années") +
|
||||
theme_bw()
|
||||
|
||||
|
||||
# Ajustement des legendes pour faire un graphique multiple
|
||||
p5 <- p5 +
|
||||
theme(
|
||||
legend.position = "bottom"
|
||||
) +
|
||||
labs(color = "BonusMalus")
|
||||
p6 <- p6 +
|
||||
theme(
|
||||
legend.position = "bottom"
|
||||
) +
|
||||
labs(fill = "BonusMalus")
|
||||
|
||||
# Creation d'un graphique avec deux figures et une légende commune
|
||||
plot_grid(p6, p5, ncol = 2)
|
||||
```
|
||||
|
||||
### Bonus - Analyse des couples {.unnumbered}
|
||||
|
||||
::: exercise-box
|
||||
En traitant toutes les variables comme des variables catégorielles,
|
||||
analyser graphiquement comment évolue la fréquence de sinistres selon
|
||||
les couples de variables.
|
||||
|
||||
Compléter pour cela la fonction suivante et appliquer la à différents
|
||||
couples.
|
||||
|
||||
```{r}
|
||||
# Fonction d'analyse bivariée
|
||||
# df : nom du data.frame
|
||||
# var1 : nom de la variable explicative 1
|
||||
# var2 : nom de la variable explicative 2
|
||||
plot_pairwise_disc <- function(df, var1, var2) {
|
||||
df <- rename(df, "varx" = all_of(var1), "vary" = all_of(var2))
|
||||
|
||||
# replace variable vname by the binning variable
|
||||
if (is.numeric(df$varx)) {
|
||||
df <- df |>
|
||||
mutate(varx = ntile(varx, 5))
|
||||
}
|
||||
|
||||
if (is.numeric(df$vary)) {
|
||||
df <- df |>
|
||||
mutate(vary = ntile(vary, 5), vary = factor(vary))
|
||||
}
|
||||
|
||||
df |>
|
||||
group_by(varx, vary) |>
|
||||
summarize(
|
||||
exp = sum(Exposure),
|
||||
nb_claims = sum(ClaimNb),
|
||||
freq = sum(ClaimNb) / sum(Exposure), .groups = "drop"
|
||||
) |>
|
||||
ggplot(aes(x = varx, y = freq, colour = vary, group = vary), alpha = 0.3) +
|
||||
geom_point() +
|
||||
geom_line() +
|
||||
theme_bw() +
|
||||
labs(x = var1, y = "Frequence", colour = var2)
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
p1 <- plot_pairwise_disc(dat, "DrivAge", "BonusMalus")
|
||||
p2 <- plot_pairwise_disc(dat, "VehAge", "BonusMalus")
|
||||
p3 <- plot_pairwise_disc(dat, "BonusMalus", "VehBrand")
|
||||
p4 <- plot_pairwise_disc(dat, "VehBrand", "Area")
|
||||
p5 <- plot_pairwise_disc(dat, "BonusMalus", "VehGas")
|
||||
p6 <- plot_pairwise_disc(dat, "BonusMalus", "Area")
|
||||
p7 <- plot_pairwise_disc(dat, "DrivAge", "Area")
|
||||
p8 <- plot_pairwise_disc(dat, "VehPower", "VehGas")
|
||||
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, ncol = 2)
|
||||
```
|
||||
:::
|
||||
|
||||
# Informations de session {.unnumbered}
|
||||
|
||||
```{r}
|
||||
sessionInfo()
|
||||
```
|
||||
|
||||
# Références
|
||||
1345
M2/Data Visualisation/tp1/tp1.html
Normal file
|
After Width: | Height: | Size: 206 KiB |
|
After Width: | Height: | Size: 94 KiB |
|
After Width: | Height: | Size: 133 KiB |
|
After Width: | Height: | Size: 152 KiB |
|
After Width: | Height: | Size: 174 KiB |
|
After Width: | Height: | Size: 180 KiB |
|
After Width: | Height: | Size: 465 KiB |
|
After Width: | Height: | Size: 161 KiB |
|
After Width: | Height: | Size: 181 KiB |
|
After Width: | Height: | Size: 146 KiB |
|
After Width: | Height: | Size: 194 KiB |
|
After Width: | Height: | Size: 207 KiB |
|
After Width: | Height: | Size: 187 KiB |