mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-28 08:56:10 +01:00
Refactor code to use forward-pipe operator for better readability and consistency
This commit is contained in:
@@ -7,7 +7,7 @@ library(dplyr)
|
||||
|
||||
```{r}
|
||||
df <- data.frame(
|
||||
team = c('A', 'A', 'B', 'B', 'B', 'B', 'C', 'C'),
|
||||
team = c("A", "A", "B", "B", "B", "B", "C", "C"),
|
||||
points = c(25, 12, 15, 14, 19, 23, 25, 29)
|
||||
)
|
||||
|
||||
@@ -22,24 +22,24 @@ one_hot_data
|
||||
```{r}
|
||||
train <- data.frame(
|
||||
target = c(10, 20, 15),
|
||||
cat_col1 = c('city1', 'city2', 'city1'),
|
||||
cat_col2 = c('james', 'adam', 'charles')
|
||||
cat_col1 = c("city1", "city2", "city1"),
|
||||
cat_col2 = c("james", "adam", "charles")
|
||||
)
|
||||
|
||||
global_mean <- mean(train$target)
|
||||
alpha <- 10
|
||||
|
||||
target_encoding <- train %>%
|
||||
group_by(cat_col1) %>%
|
||||
target_encoding <- train |>
|
||||
group_by(cat_col1) |>
|
||||
summarise(
|
||||
n = n(),
|
||||
sum_target = sum(target),
|
||||
cat_col1_te = (sum_target + (alpha * global_mean)) / (n + alpha),
|
||||
.groups = "drop"
|
||||
) %>%
|
||||
) |>
|
||||
select(cat_col1, cat_col1_te)
|
||||
|
||||
train <- train %>% left_join(target_encoding, by = "cat_col1")
|
||||
train <- train |> left_join(target_encoding, by = "cat_col1")
|
||||
```
|
||||
|
||||
# Frequential Encoding
|
||||
@@ -47,7 +47,7 @@ train <- train %>% left_join(target_encoding, by = "cat_col1")
|
||||
|
||||
```{r}
|
||||
df <- data.frame(
|
||||
color = c('blue', 'red', 'blue', 'green'),
|
||||
color = c("blue", "red", "blue", "green"),
|
||||
value = c(10, 20, 10, 30)
|
||||
)
|
||||
```
|
||||
@@ -1,7 +1,8 @@
|
||||
# Objectifs pédagogiques
|
||||
# Comprendre la représentation matricielle d'une image.
|
||||
# Interpréter les centroïdes comme une palette de couleurs optimale (résumé).
|
||||
# Analyser le compromis entre distorsion (perte de qualité) et taux de compression.
|
||||
|
||||
setwd("~/Workspace/studies/M2/Clustering In Practice")
|
||||
|
||||
library(jpeg)
|
||||
|
||||
@@ -25,7 +26,6 @@ head(img_matrix)
|
||||
k <- 8
|
||||
|
||||
# Application de K-means
|
||||
# On augmente iter.max car la convergence sur des milliers de pixels peut être lente
|
||||
set.seed(123)
|
||||
km_model <- kmeans(img_matrix, centers = k, iter.max = 20, nstart = 3)
|
||||
|
||||
@@ -42,16 +42,16 @@ img_compressed <- array(img_compressed_matrix, dim = dims)
|
||||
|
||||
# Affichage comparatif
|
||||
par(mfrow = c(1, 2), mar = c(1, 1, 1, 1))
|
||||
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
|
||||
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
|
||||
rasterImage(img, -1, -1, 1, 1)
|
||||
title("Originale (Millions de couleurs)")
|
||||
|
||||
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
|
||||
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
|
||||
rasterImage(img_compressed, -1, -1, 1, 1)
|
||||
title(paste("Compressée (k =", k, ")"))
|
||||
|
||||
# 4. Questions : coût de l'information (Distorsion)
|
||||
# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
|
||||
# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
|
||||
# l'image compressée :
|
||||
# Plus $k$ est petit, plus le résumé est ..., plus le MSE .....
|
||||
|
||||
@@ -64,7 +64,7 @@ mse_imager <- function(img1, img2) {
|
||||
# Ici, on redimensionne img2 sur la taille d'img1
|
||||
img2 <- imresize(img2, size_x = width(img1), size_y = height(img1))
|
||||
if (spectrum(img2) != spectrum(img1)) {
|
||||
img2 <- grayscale(img2) # fallback simple si nb de canaux diffère
|
||||
img2 <- grayscale(img2) # fallback simple si nb de canaux diffère
|
||||
img1 <- grayscale(img1)
|
||||
}
|
||||
}
|
||||
@@ -102,71 +102,75 @@ elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
|
||||
X <- scale(X)
|
||||
}
|
||||
wss <- numeric(length(ks))
|
||||
|
||||
|
||||
# Cas k = 1 : WSS = TSS (variance totale)
|
||||
total_ss <- sum(scale(X, scale = FALSE)^2) # TSS
|
||||
total_ss <- sum(scale(X, scale = FALSE)^2) # TSS
|
||||
for (i in seq_along(ks)) {
|
||||
k <- ks[i]
|
||||
cat(" k =", k, "\n")
|
||||
if (k == 1) {
|
||||
wss[i] <- total_ss
|
||||
} else {
|
||||
set.seed(123) # reproductible
|
||||
set.seed(123) # reproductible
|
||||
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
|
||||
wss[i] <- km$tot.withinss
|
||||
}
|
||||
}
|
||||
|
||||
plot(ks, wss, type = "b", pch = 19, xlab = "Nombre de clusters (k)",
|
||||
ylab = "Inertie intra-classe (WSS)",
|
||||
main = "Méthode du coude (k-means)")
|
||||
|
||||
plot(ks, wss,
|
||||
type = "b", pch = 19, xlab = "Nombre de clusters (k)",
|
||||
ylab = "Inertie intra-classe (WSS)",
|
||||
main = "Méthode du coude (k-means)"
|
||||
)
|
||||
grid()
|
||||
# invisible(data.frame(k = ks, WSS = wss))
|
||||
# invisible(data.frame(k = ks, WSS = wss))
|
||||
}
|
||||
|
||||
# Exemple d'utilisation :
|
||||
res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)
|
||||
res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)
|
||||
|
||||
###############################################################################
|
||||
|
||||
elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
|
||||
X <- as.matrix(X)
|
||||
if (scale_data) X <- scale(X)
|
||||
set.seed(seed)
|
||||
|
||||
# Nombre de lignes distinctes
|
||||
n_unique <- nrow(unique(X))
|
||||
if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
|
||||
|
||||
# Tronquer ks si nécessaire
|
||||
ks <- ks[ks <= n_unique]
|
||||
if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
|
||||
|
||||
wss <- numeric(length(ks))
|
||||
# TSS (k = 1)
|
||||
total_ss <- sum(scale(X, scale = FALSE)^2)
|
||||
|
||||
for (i in seq_along(ks)) {
|
||||
k <- ks[i]
|
||||
cat(" k =", k, "\n")
|
||||
if (k == 1) {
|
||||
wss[i] <- total_ss
|
||||
} else {
|
||||
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
|
||||
wss[i] <- km$tot.withinss
|
||||
}
|
||||
}
|
||||
|
||||
plot(ks, wss, type = "b", pch = 19, xlab = "Nombre de clusters (k)",
|
||||
ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)")
|
||||
axis(1, at = ks)
|
||||
grid()
|
||||
# invisible(data.frame(k = ks, WSS = wss))
|
||||
}
|
||||
|
||||
# Exemple :
|
||||
res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
|
||||
|
||||
|
||||
elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
|
||||
X <- as.matrix(X)
|
||||
if (scale_data) X <- scale(X)
|
||||
set.seed(seed)
|
||||
|
||||
# Nombre de lignes distinctes
|
||||
n_unique <- nrow(unique(X))
|
||||
if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
|
||||
|
||||
# Tronquer ks si nécessaire
|
||||
ks <- ks[ks <= n_unique]
|
||||
if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
|
||||
|
||||
wss <- numeric(length(ks))
|
||||
# TSS (k = 1)
|
||||
total_ss <- sum(scale(X, scale = FALSE)^2)
|
||||
|
||||
for (i in seq_along(ks)) {
|
||||
k <- ks[i]
|
||||
cat(" k =", k, "\n")
|
||||
if (k == 1) {
|
||||
wss[i] <- total_ss
|
||||
} else {
|
||||
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
|
||||
wss[i] <- km$tot.withinss
|
||||
}
|
||||
}
|
||||
|
||||
plot(ks, wss,
|
||||
type = "b", pch = 19, xlab = "Nombre de clusters (k)",
|
||||
ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)"
|
||||
)
|
||||
axis(1, at = ks)
|
||||
grid()
|
||||
# invisible(data.frame(k = ks, WSS = wss))
|
||||
}
|
||||
|
||||
# Exemple :
|
||||
res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -175,11 +179,11 @@ elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
|
||||
jpeg("./data/image_compressed.jpg")
|
||||
|
||||
# Afficher l'image compressée dans le fichier
|
||||
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
|
||||
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
|
||||
rasterImage(img_compressed, -1, -1, 1, 1)
|
||||
|
||||
info <- file.info("./data/PampasGrass.jpg")
|
||||
(taille_octets_reelle <- info$size/1024)
|
||||
(taille_octets_reelle <- info$size / 1024)
|
||||
|
||||
info <- file.info("./data/image_compressed.jpg")
|
||||
(taille_octets_compresse <- info$size/1024)
|
||||
(taille_octets_compresse <- info$size / 1024)
|
||||
|
||||
@@ -185,9 +185,9 @@ cor_mat <- cor(vars_candidates, use = "pairwise.complete.obs")
|
||||
```
|
||||
|
||||
```{r}
|
||||
cor_df <- as.data.frame(cor_mat) %>%
|
||||
tibble::rownames_to_column(var = "Var1") %>%
|
||||
pivot_longer(-Var1, names_to = "Var2", values_to = "r") %>%
|
||||
cor_df <- as.data.frame(cor_mat) |>
|
||||
tibble::rownames_to_column(var = "Var1") |>
|
||||
pivot_longer(-Var1, names_to = "Var2", values_to = "r") |>
|
||||
mutate(
|
||||
Var1 = factor(Var1, levels = unique(Var1)),
|
||||
Var2 = factor(Var2, levels = rev(unique(Var1)))
|
||||
@@ -503,7 +503,9 @@ Bien que nous ayons appliqué une transformation logarithmique pour corriger l'a
|
||||
Nous appliquons donc une standardisation (Z-score) : $z = \frac{x - \mu}{\sigma}$
|
||||
|
||||
```{r}
|
||||
data_scaled <- tb_ready |> select(log_inc, log_mort) |> scale()
|
||||
data_scaled <- tb_ready |>
|
||||
select(log_inc, log_mort) |>
|
||||
scale()
|
||||
|
||||
check_table <- data.frame(
|
||||
Variable = c("Incidence (Log)", "Mortalité (Log)"),
|
||||
|
||||
@@ -43,13 +43,11 @@ ui <- shinydashboard::dashboardPage(
|
||||
tabName = "methodo",
|
||||
icon = icon("info-circle")
|
||||
),
|
||||
|
||||
menuItem(
|
||||
"Vue d'Ensemble",
|
||||
tabName = "dashboard",
|
||||
icon = icon("dashboard")
|
||||
),
|
||||
|
||||
menuItem("Données Brutes", tabName = "data", icon = icon("table")),
|
||||
|
||||
# Footer - Informations et crédits
|
||||
@@ -66,7 +64,6 @@ ui <- shinydashboard::dashboardPage(
|
||||
)
|
||||
)
|
||||
),
|
||||
|
||||
hr(),
|
||||
|
||||
# Filtre par Région
|
||||
@@ -147,7 +144,6 @@ ui <- shinydashboard::dashboardPage(
|
||||
plotlyOutput("cluster_scatter", height = "530px")
|
||||
)
|
||||
),
|
||||
|
||||
fluidRow(
|
||||
# Plot des tendances
|
||||
box(
|
||||
@@ -190,7 +186,6 @@ ui <- shinydashboard::dashboardPage(
|
||||
# Page 3 - Méthodologie
|
||||
tabItem(
|
||||
tabName = "methodo",
|
||||
|
||||
fluidRow(
|
||||
# Indicateurs OMS
|
||||
box(
|
||||
@@ -307,7 +302,6 @@ ui <- shinydashboard::dashboardPage(
|
||||
p("Dernière mise à jour du dataset : Octobre 2024.")
|
||||
),
|
||||
),
|
||||
|
||||
column(
|
||||
width = 6,
|
||||
box(
|
||||
@@ -319,7 +313,6 @@ ui <- shinydashboard::dashboardPage(
|
||||
p(
|
||||
"Afin de synthétiser l'information et de faciliter la prise de décision, j'ai appliqué un algorithme d'apprentissage non supervisé (K-Means) pour regrouper les pays ayant des profils épidémiques similaires sous 4 clusters."
|
||||
),
|
||||
|
||||
h4("Méthodologie"),
|
||||
tags$ul(
|
||||
tags$li(
|
||||
@@ -335,7 +328,6 @@ ui <- shinydashboard::dashboardPage(
|
||||
"Utilisation de `set.seed(123)` pour garantir la reproductibilité des résultats."
|
||||
)
|
||||
),
|
||||
|
||||
h4("Interprétation des 3 Groupes"),
|
||||
|
||||
# Tableau des Groupes
|
||||
@@ -395,7 +387,6 @@ ui <- shinydashboard::dashboardPage(
|
||||
status = "primary",
|
||||
solidHeader = TRUE,
|
||||
width = 12,
|
||||
|
||||
tags$p(
|
||||
"Ce projet suit une approche Open Science.",
|
||||
style = "font-style: italic;"
|
||||
@@ -403,7 +394,6 @@ ui <- shinydashboard::dashboardPage(
|
||||
tags$p(
|
||||
"L'intégralité du code source (Rmd, App) ainsi que la notice technique (PDF) sont disponibles en libre accès sur le dépôt GitHub."
|
||||
),
|
||||
|
||||
tags$a(
|
||||
href = "https://go.arthurdanjou.fr/datavis-code",
|
||||
target = "_blank",
|
||||
@@ -524,7 +514,9 @@ server <- function(input, output, session) {
|
||||
# KPI - Pire pays
|
||||
output$kpi_worst_country <- shinydashboard::renderValueBox({
|
||||
data <- filtered_data()
|
||||
worst <- data |> arrange(desc(e_inc_100k)) |> slice(1)
|
||||
worst <- data |>
|
||||
arrange(desc(e_inc_100k)) |>
|
||||
slice(1)
|
||||
|
||||
if (nrow(worst) > 0) {
|
||||
valueBox(
|
||||
@@ -743,7 +735,7 @@ server <- function(input, output, session) {
|
||||
output$cluster_scatter <- plotly::renderPlotly({
|
||||
data <- filtered_data()
|
||||
sel_iso <- selected_country()
|
||||
highlight_data <- data %>% filter(iso3 == sel_iso)
|
||||
highlight_data <- data |> filter(iso3 == sel_iso)
|
||||
|
||||
p <- ggplot(data, aes(x = e_inc_100k, y = e_mort_exc_tbhiv_100k)) +
|
||||
geom_point(
|
||||
|
||||
@@ -2056,7 +2056,7 @@
|
||||
"Type": "Package",
|
||||
"Title": "A Forward-Pipe Operator for R",
|
||||
"Authors@R": "c( person(\"Stefan Milton\", \"Bache\", , \"stefan@stefanbache.dk\", role = c(\"aut\", \"cph\"), comment = \"Original author and creator of magrittr\"), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\"), person(\"Lionel\", \"Henry\", , \"lionel@posit.co\", role = \"cre\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )",
|
||||
"Description": "Provides a mechanism for chaining commands with a new forward-pipe operator, %>%. This operator will forward a value, or the result of an expression, into the next function call/expression. There is flexible support for the type of right-hand side expressions. For more information, see package vignette. To quote Rene Magritte, \"Ceci n'est pas un pipe.\"",
|
||||
"Description": "Provides a mechanism for chaining commands with a new forward-pipe operator, |>. This operator will forward a value, or the result of an expression, into the next function call/expression. There is flexible support for the type of right-hand side expressions. For more information, see package vignette. To quote Rene Magritte, \"Ceci n'est pas un pipe.\"",
|
||||
"License": "MIT + file LICENSE",
|
||||
"URL": "https://magrittr.tidyverse.org, https://github.com/tidyverse/magrittr",
|
||||
"BugReports": "https://github.com/tidyverse/magrittr/issues",
|
||||
|
||||
@@ -32,8 +32,8 @@ Note that the `echo = FALSE` parameter was added to the code chunk to prevent pr
|
||||
|
||||
```{r}
|
||||
library(kableExtra)
|
||||
mtcars[1:5, 1:5] %>%
|
||||
kbl() %>%
|
||||
mtcars[1:5, 1:5] |>
|
||||
kbl() |>
|
||||
kable_styling()
|
||||
```
|
||||
|
||||
|
||||
Reference in New Issue
Block a user