Refactor code to use forward-pipe operator for better readability and consistency

This commit is contained in:
2026-01-27 17:19:44 +01:00
parent 4697570bcc
commit 5d0c1d2b4e
6 changed files with 82 additions and 84 deletions

View File

@@ -7,7 +7,7 @@ library(dplyr)
```{r}
df <- data.frame(
team = c('A', 'A', 'B', 'B', 'B', 'B', 'C', 'C'),
team = c("A", "A", "B", "B", "B", "B", "C", "C"),
points = c(25, 12, 15, 14, 19, 23, 25, 29)
)
@@ -22,24 +22,24 @@ one_hot_data
```{r}
train <- data.frame(
target = c(10, 20, 15),
cat_col1 = c('city1', 'city2', 'city1'),
cat_col2 = c('james', 'adam', 'charles')
cat_col1 = c("city1", "city2", "city1"),
cat_col2 = c("james", "adam", "charles")
)
global_mean <- mean(train$target)
alpha <- 10
target_encoding <- train %>%
group_by(cat_col1) %>%
target_encoding <- train |>
group_by(cat_col1) |>
summarise(
n = n(),
sum_target = sum(target),
cat_col1_te = (sum_target + (alpha * global_mean)) / (n + alpha),
.groups = "drop"
) %>%
) |>
select(cat_col1, cat_col1_te)
train <- train %>% left_join(target_encoding, by = "cat_col1")
train <- train |> left_join(target_encoding, by = "cat_col1")
```
# Frequential Encoding
@@ -47,7 +47,7 @@ train <- train %>% left_join(target_encoding, by = "cat_col1")
```{r}
df <- data.frame(
color = c('blue', 'red', 'blue', 'green'),
color = c("blue", "red", "blue", "green"),
value = c(10, 20, 10, 30)
)
```

View File

@@ -1,7 +1,8 @@
# Objectifs pédagogiques
# Comprendre la représentation matricielle d'une image.
# Interpréter les centroïdes comme une palette de couleurs optimale (résumé).
# Analyser le compromis entre distorsion (perte de qualité) et taux de compression.
setwd("~/Workspace/studies/M2/Clustering In Practice")
library(jpeg)
@@ -25,7 +26,6 @@ head(img_matrix)
k <- 8
# Application de K-means
# On augmente iter.max car la convergence sur des milliers de pixels peut être lente
set.seed(123)
km_model <- kmeans(img_matrix, centers = k, iter.max = 20, nstart = 3)
@@ -42,16 +42,16 @@ img_compressed <- array(img_compressed_matrix, dim = dims)
# Affichage comparatif
par(mfrow = c(1, 2), mar = c(1, 1, 1, 1))
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
rasterImage(img, -1, -1, 1, 1)
title("Originale (Millions de couleurs)")
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
rasterImage(img_compressed, -1, -1, 1, 1)
title(paste("Compressée (k =", k, ")"))
# 4. Questions : coût de l'information (Distorsion)
# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
# l'image compressée :
# Plus $k$ est petit, plus le résumé est ..., plus le MSE .....
@@ -64,7 +64,7 @@ mse_imager <- function(img1, img2) {
# Ici, on redimensionne img2 sur la taille d'img1
img2 <- imresize(img2, size_x = width(img1), size_y = height(img1))
if (spectrum(img2) != spectrum(img1)) {
img2 <- grayscale(img2) # fallback simple si nb de canaux diffère
img2 <- grayscale(img2) # fallback simple si nb de canaux diffère
img1 <- grayscale(img1)
}
}
@@ -102,71 +102,75 @@ elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
X <- scale(X)
}
wss <- numeric(length(ks))
# Cas k = 1 : WSS = TSS (variance totale)
total_ss <- sum(scale(X, scale = FALSE)^2) # TSS
total_ss <- sum(scale(X, scale = FALSE)^2) # TSS
for (i in seq_along(ks)) {
k <- ks[i]
cat(" k =", k, "\n")
if (k == 1) {
wss[i] <- total_ss
} else {
set.seed(123) # reproductible
set.seed(123) # reproductible
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
wss[i] <- km$tot.withinss
}
}
plot(ks, wss, type = "b", pch = 19, xlab = "Nombre de clusters (k)",
ylab = "Inertie intra-classe (WSS)",
main = "Méthode du coude (k-means)")
plot(ks, wss,
type = "b", pch = 19, xlab = "Nombre de clusters (k)",
ylab = "Inertie intra-classe (WSS)",
main = "Méthode du coude (k-means)"
)
grid()
# invisible(data.frame(k = ks, WSS = wss))
# invisible(data.frame(k = ks, WSS = wss))
}
# Exemple d'utilisation :
res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)
res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)
###############################################################################
elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
X <- as.matrix(X)
if (scale_data) X <- scale(X)
set.seed(seed)
# Nombre de lignes distinctes
n_unique <- nrow(unique(X))
if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
# Tronquer ks si nécessaire
ks <- ks[ks <= n_unique]
if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
wss <- numeric(length(ks))
# TSS (k = 1)
total_ss <- sum(scale(X, scale = FALSE)^2)
for (i in seq_along(ks)) {
k <- ks[i]
cat(" k =", k, "\n")
if (k == 1) {
wss[i] <- total_ss
} else {
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
wss[i] <- km$tot.withinss
}
}
plot(ks, wss, type = "b", pch = 19, xlab = "Nombre de clusters (k)",
ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)")
axis(1, at = ks)
grid()
# invisible(data.frame(k = ks, WSS = wss))
}
# Exemple :
res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
X <- as.matrix(X)
if (scale_data) X <- scale(X)
set.seed(seed)
# Nombre de lignes distinctes
n_unique <- nrow(unique(X))
if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
# Tronquer ks si nécessaire
ks <- ks[ks <= n_unique]
if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
wss <- numeric(length(ks))
# TSS (k = 1)
total_ss <- sum(scale(X, scale = FALSE)^2)
for (i in seq_along(ks)) {
k <- ks[i]
cat(" k =", k, "\n")
if (k == 1) {
wss[i] <- total_ss
} else {
km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
wss[i] <- km$tot.withinss
}
}
plot(ks, wss,
type = "b", pch = 19, xlab = "Nombre de clusters (k)",
ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)"
)
axis(1, at = ks)
grid()
# invisible(data.frame(k = ks, WSS = wss))
}
# Exemple :
res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
@@ -175,11 +179,11 @@ elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
jpeg("./data/image_compressed.jpg")
# Afficher l'image compressée dans le fichier
plot(0, 0, type='n', axes=FALSE, ann=FALSE)
plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
rasterImage(img_compressed, -1, -1, 1, 1)
info <- file.info("./data/PampasGrass.jpg")
(taille_octets_reelle <- info$size/1024)
(taille_octets_reelle <- info$size / 1024)
info <- file.info("./data/image_compressed.jpg")
(taille_octets_compresse <- info$size/1024)
(taille_octets_compresse <- info$size / 1024)