Refactor code to use forward-pipe operator for better readability and consistency

This commit is contained in:
2026-01-27 17:19:44 +01:00
parent 4697570bcc
commit 5d0c1d2b4e
6 changed files with 82 additions and 84 deletions

View File

@@ -7,7 +7,7 @@ library(dplyr)
```{r}
df <- data.frame(
team = c('A', 'A', 'B', 'B', 'B', 'B', 'C', 'C'),
team = c("A", "A", "B", "B", "B", "B", "C", "C"),
points = c(25, 12, 15, 14, 19, 23, 25, 29)
)
@@ -22,24 +22,24 @@ one_hot_data
```{r}
train <- data.frame(
target = c(10, 20, 15),
cat_col1 = c('city1', 'city2', 'city1'),
cat_col2 = c('james', 'adam', 'charles')
cat_col1 = c("city1", "city2", "city1"),
cat_col2 = c("james", "adam", "charles")
)
global_mean <- mean(train$target)
alpha <- 10
target_encoding <- train %>%
group_by(cat_col1) %>%
target_encoding <- train |>
group_by(cat_col1) |>
summarise(
n = n(),
sum_target = sum(target),
cat_col1_te = (sum_target + (alpha * global_mean)) / (n + alpha),
.groups = "drop"
) %>%
) |>
select(cat_col1, cat_col1_te)
train <- train %>% left_join(target_encoding, by = "cat_col1")
train <- train |> left_join(target_encoding, by = "cat_col1")
```
# Frequential Encoding
@@ -47,7 +47,7 @@ train <- train %>% left_join(target_encoding, by = "cat_col1")
```{r}
df <- data.frame(
color = c('blue', 'red', 'blue', 'green'),
color = c("blue", "red", "blue", "green"),
value = c(10, 20, 10, 30)
)
```

View File

@@ -1,7 +1,8 @@
# Objectifs pédagogiques
# Comprendre la représentation matricielle d'une image.
# Interpréter les centroïdes comme une palette de couleurs optimale (résumé).
# Analyser le compromis entre distorsion (perte de qualité) et taux de compression.
setwd("~/Workspace/studies/M2/Clustering In Practice")
library(jpeg)
@@ -25,7 +26,6 @@ head(img_matrix)
k <- 8
# Application de K-means
# On augmente iter.max car la convergence sur des milliers de pixels peut être lente
set.seed(123)
km_model <- kmeans(img_matrix, centers = k, iter.max = 20, nstart = 3)
@@ -42,16 +42,16 @@ img_compressed <- array(img_compressed_matrix, dim = dims)
# Affichage comparatif
par(mfrow = c(1, 2), mar = c(1, 1, 1, 1))
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
rasterImage(img, -1, -1, 1, 1)
title("Originale (Millions de couleurs)")
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
rasterImage(img_compressed, -1, -1, 1, 1)
title(paste("Compressée (k =", k, ")"))
# 4. Questions : coût de l'information (Distorsion)
# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
# l'image compressée :
# Plus $k$ est petit, plus le résumé est ..., plus le MSE .....
@@ -64,7 +64,7 @@ mse_imager <- function(img1, img2) {
# Ici, on redimensionne img2 sur la taille d'img1
img2 <- imresize(img2, size_x = width(img1), size_y = height(img1))
if (spectrum(img2) != spectrum(img1)) {
img2 <- grayscale(img2) # fallback simple si nb de canaux diffère
img2 <- grayscale(img2) # fallback simple si nb de canaux diffère
img1 <- grayscale(img1)
}
}
@@ -102,71 +102,75 @@ elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
X <- scale(X)
}
wss <- numeric(length(ks))
# Cas k = 1 : WSS = TSS (variance totale)
total_ss <- sum(scale(X, scale = FALSE)^2) # TSS
total_ss <- sum(scale(X, scale = FALSE)^2) # TSS
for (i in seq_along(ks)) {
k <- ks[i]
cat(" k =", k, "\n")
if (k == 1) {
wss[i] <- total_ss
} else {
set.seed(123) # reproductible
set.seed(123) # reproductible
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
wss[i] <- km$tot.withinss
}
}
plot(ks, wss, type = "b", pch = 19, xlab = "Nombre de clusters (k)",
ylab = "Inertie intra-classe (WSS)",
main = "Méthode du coude (k-means)")
plot(ks, wss,
type = "b", pch = 19, xlab = "Nombre de clusters (k)",
ylab = "Inertie intra-classe (WSS)",
main = "Méthode du coude (k-means)"
)
grid()
# invisible(data.frame(k = ks, WSS = wss))
# invisible(data.frame(k = ks, WSS = wss))
}
# Exemple d'utilisation :
res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)
res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)
###############################################################################
elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
X <- as.matrix(X)
if (scale_data) X <- scale(X)
set.seed(seed)
# Nombre de lignes distinctes
n_unique <- nrow(unique(X))
if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
# Tronquer ks si nécessaire
ks <- ks[ks <= n_unique]
if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
wss <- numeric(length(ks))
# TSS (k = 1)
total_ss <- sum(scale(X, scale = FALSE)^2)
for (i in seq_along(ks)) {
k <- ks[i]
cat(" k =", k, "\n")
if (k == 1) {
wss[i] <- total_ss
} else {
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
wss[i] <- km$tot.withinss
}
}
plot(ks, wss, type = "b", pch = 19, xlab = "Nombre de clusters (k)",
ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)")
axis(1, at = ks)
grid()
# invisible(data.frame(k = ks, WSS = wss))
}
# Exemple :
res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
X <- as.matrix(X)
if (scale_data) X <- scale(X)
set.seed(seed)
# Nombre de lignes distinctes
n_unique <- nrow(unique(X))
if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
# Tronquer ks si nécessaire
ks <- ks[ks <= n_unique]
if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
wss <- numeric(length(ks))
# TSS (k = 1)
total_ss <- sum(scale(X, scale = FALSE)^2)
for (i in seq_along(ks)) {
k <- ks[i]
cat(" k =", k, "\n")
if (k == 1) {
wss[i] <- total_ss
} else {
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
wss[i] <- km$tot.withinss
}
}
plot(ks, wss,
type = "b", pch = 19, xlab = "Nombre de clusters (k)",
ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)"
)
axis(1, at = ks)
grid()
# invisible(data.frame(k = ks, WSS = wss))
}
# Exemple :
res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
@@ -175,11 +179,11 @@ elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
jpeg("./data/image_compressed.jpg")
# Afficher l'image compressée dans le fichier
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
rasterImage(img_compressed, -1, -1, 1, 1)
info <- file.info("./data/PampasGrass.jpg")
(taille_octets_reelle <- info$size/1024)
(taille_octets_reelle <- info$size / 1024)
info <- file.info("./data/image_compressed.jpg")
(taille_octets_compresse <- info$size/1024)
(taille_octets_compresse <- info$size / 1024)

View File

@@ -185,9 +185,9 @@ cor_mat <- cor(vars_candidates, use = "pairwise.complete.obs")
```
```{r}
cor_df <- as.data.frame(cor_mat) %>%
tibble::rownames_to_column(var = "Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "r") %>%
cor_df <- as.data.frame(cor_mat) |>
tibble::rownames_to_column(var = "Var1") |>
pivot_longer(-Var1, names_to = "Var2", values_to = "r") |>
mutate(
Var1 = factor(Var1, levels = unique(Var1)),
Var2 = factor(Var2, levels = rev(unique(Var1)))
@@ -503,7 +503,9 @@ Bien que nous ayons appliqué une transformation logarithmique pour corriger l'a
Nous appliquons donc une standardisation (Z-score) : $z = \frac{x - \mu}{\sigma}$
```{r}
data_scaled <- tb_ready |> select(log_inc, log_mort) |> scale()
data_scaled <- tb_ready |>
select(log_inc, log_mort) |>
scale()
check_table <- data.frame(
Variable = c("Incidence (Log)", "Mortalité (Log)"),

View File

@@ -43,13 +43,11 @@ ui <- shinydashboard::dashboardPage(
tabName = "methodo",
icon = icon("info-circle")
),
menuItem(
"Vue d'Ensemble",
tabName = "dashboard",
icon = icon("dashboard")
),
menuItem("Données Brutes", tabName = "data", icon = icon("table")),
# Footer - Informations et crédits
@@ -66,7 +64,6 @@ ui <- shinydashboard::dashboardPage(
)
)
),
hr(),
# Filtre par Région
@@ -147,7 +144,6 @@ ui <- shinydashboard::dashboardPage(
plotlyOutput("cluster_scatter", height = "530px")
)
),
fluidRow(
# Plot des tendances
box(
@@ -190,7 +186,6 @@ ui <- shinydashboard::dashboardPage(
# Page 3 - Méthodologie
tabItem(
tabName = "methodo",
fluidRow(
# Indicateurs OMS
box(
@@ -307,7 +302,6 @@ ui <- shinydashboard::dashboardPage(
p("Dernière mise à jour du dataset : Octobre 2024.")
),
),
column(
width = 6,
box(
@@ -319,7 +313,6 @@ ui <- shinydashboard::dashboardPage(
p(
"Afin de synthétiser l'information et de faciliter la prise de décision, j'ai appliqué un algorithme d'apprentissage non supervisé (K-Means) pour regrouper les pays ayant des profils épidémiques similaires sous 4 clusters."
),
h4("Méthodologie"),
tags$ul(
tags$li(
@@ -335,7 +328,6 @@ ui <- shinydashboard::dashboardPage(
"Utilisation de `set.seed(123)` pour garantir la reproductibilité des résultats."
)
),
h4("Interprétation des 3 Groupes"),
# Tableau des Groupes
@@ -395,7 +387,6 @@ ui <- shinydashboard::dashboardPage(
status = "primary",
solidHeader = TRUE,
width = 12,
tags$p(
"Ce projet suit une approche Open Science.",
style = "font-style: italic;"
@@ -403,7 +394,6 @@ ui <- shinydashboard::dashboardPage(
tags$p(
"L'intégralité du code source (Rmd, App) ainsi que la notice technique (PDF) sont disponibles en libre accès sur le dépôt GitHub."
),
tags$a(
href = "https://go.arthurdanjou.fr/datavis-code",
target = "_blank",
@@ -524,7 +514,9 @@ server <- function(input, output, session) {
# KPI - Pire pays
output$kpi_worst_country <- shinydashboard::renderValueBox({
data <- filtered_data()
worst <- data |> arrange(desc(e_inc_100k)) |> slice(1)
worst <- data |>
arrange(desc(e_inc_100k)) |>
slice(1)
if (nrow(worst) > 0) {
valueBox(
@@ -743,7 +735,7 @@ server <- function(input, output, session) {
output$cluster_scatter <- plotly::renderPlotly({
data <- filtered_data()
sel_iso <- selected_country()
highlight_data <- data %>% filter(iso3 == sel_iso)
highlight_data <- data |> filter(iso3 == sel_iso)
p <- ggplot(data, aes(x = e_inc_100k, y = e_mort_exc_tbhiv_100k)) +
geom_point(

View File

@@ -2056,7 +2056,7 @@
"Type": "Package",
"Title": "A Forward-Pipe Operator for R",
"Authors@R": "c( person(\"Stefan Milton\", \"Bache\", , \"stefan@stefanbache.dk\", role = c(\"aut\", \"cph\"), comment = \"Original author and creator of magrittr\"), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\"), person(\"Lionel\", \"Henry\", , \"lionel@posit.co\", role = \"cre\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )",
"Description": "Provides a mechanism for chaining commands with a new forward-pipe operator, %>%. This operator will forward a value, or the result of an expression, into the next function call/expression. There is flexible support for the type of right-hand side expressions. For more information, see package vignette. To quote Rene Magritte, \"Ceci n'est pas un pipe.\"",
"Description": "Provides a mechanism for chaining commands with a new forward-pipe operator, |>. This operator will forward a value, or the result of an expression, into the next function call/expression. There is flexible support for the type of right-hand side expressions. For more information, see package vignette. To quote Rene Magritte, \"Ceci n'est pas un pipe.\"",
"License": "MIT + file LICENSE",
"URL": "https://magrittr.tidyverse.org, https://github.com/tidyverse/magrittr",
"BugReports": "https://github.com/tidyverse/magrittr/issues",

View File

@@ -32,8 +32,8 @@ Note that the `echo = FALSE` parameter was added to the code chunk to prevent pr
```{r}
library(kableExtra)
mtcars[1:5, 1:5] %>%
kbl() %>%
mtcars[1:5, 1:5] |>
kbl() |>
kable_styling()
```