Refactor code to use forward-pipe operator for better readability and consistency

2026-01-28 08:56:10 +01:00 · 2026-01-27 17:19:44 +01:00
parent 4697570bcc
commit 5d0c1d2b4e
6 changed files with 82 additions and 84 deletions
--- a/Practice/Encoding.Rmd
+++ b/Practice/Encoding.Rmd
@@ -7,7 +7,7 @@ library(dplyr)

 ```{r}
 df <- data.frame(
-  team = c('A', 'A', 'B', 'B', 'B', 'B', 'C', 'C'),
+  team = c("A", "A", "B", "B", "B", "B", "C", "C"),
  points = c(25, 12, 15, 14, 19, 23, 25, 29)
 )

@@ -22,24 +22,24 @@ one_hot_data
 ```{r}
 train <- data.frame(
  target = c(10, 20, 15),
-  cat_col1 = c('city1', 'city2', 'city1'),
-  cat_col2 = c('james', 'adam', 'charles')
+  cat_col1 = c("city1", "city2", "city1"),
+  cat_col2 = c("james", "adam", "charles")
 )

 global_mean <- mean(train$target)
 alpha <- 10

-target_encoding <- train %>%
-  group_by(cat_col1) %>%
+target_encoding <- train |>
+  group_by(cat_col1) |>
  summarise(
    n = n(),
    sum_target = sum(target),
    cat_col1_te = (sum_target + (alpha * global_mean)) / (n + alpha),
    .groups = "drop"
-  ) %>%
+  ) |>
  select(cat_col1, cat_col1_te)

-train <- train %>% left_join(target_encoding, by = "cat_col1")
+train <- train |> left_join(target_encoding, by = "cat_col1")
 ```

 # Frequential Encoding
@@ -47,7 +47,7 @@ train <- train %>% left_join(target_encoding, by = "cat_col1")

 ```{r}
 df <- data.frame(
-  color = c('blue', 'red', 'blue', 'green'),
+  color = c("blue", "red", "blue", "green"),
  value = c(10, 20, 10, 30)
 )
 ```
--- a/Practice/compression_image.R
+++ b/Practice/compression_image.R
@@ -1,7 +1,8 @@
 # Objectifs pédagogiques
 # Comprendre la représentation matricielle d'une image.
 # Interpréter les centroïdes comme une palette de couleurs optimale (résumé).
-# Analyser le compromis entre distorsion (perte de qualité) et taux de compression.
+
+setwd("~/Workspace/studies/M2/Clustering In Practice")

 library(jpeg)

@@ -25,7 +26,6 @@ head(img_matrix)
 k <- 8

 # Application de K-means
-# On augmente iter.max car la convergence sur des milliers de pixels peut être lente
 set.seed(123)
 km_model <- kmeans(img_matrix, centers = k, iter.max = 20, nstart = 3)

@@ -42,16 +42,16 @@ img_compressed <- array(img_compressed_matrix, dim = dims)

 # Affichage comparatif
 par(mfrow = c(1, 2), mar = c(1, 1, 1, 1))
-plot(0, 0, type='n', axes=FALSE, ann=FALSE)
+plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
 rasterImage(img, -1, -1, 1, 1)
 title("Originale (Millions de couleurs)")

-plot(0, 0, type='n', axes=FALSE, ann=FALSE)
+plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
 rasterImage(img_compressed, -1, -1, 1, 1)
 title(paste("Compressée (k =", k, ")"))

 # 4. Questions : coût de l'information (Distorsion)
-# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et 
+# Calculez l'erreur quadratique moyenne (MSE) entre l'image originale et
 # l'image compressée :
 # Plus $k$ est petit, plus le résumé est ..., plus le MSE .....

@@ -64,7 +64,7 @@ mse_imager <- function(img1, img2) {
    # Ici, on redimensionne img2 sur la taille d'img1
    img2 <- imresize(img2, size_x = width(img1), size_y = height(img1))
    if (spectrum(img2) != spectrum(img1)) {
-      img2 <- grayscale(img2)  # fallback simple si nb de canaux diffère
+      img2 <- grayscale(img2) # fallback simple si nb de canaux diffère
      img1 <- grayscale(img1)
    }
  }
@@ -102,71 +102,75 @@ elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
    X <- scale(X)
  }
  wss <- numeric(length(ks))
-  
+
  # Cas k = 1 : WSS = TSS (variance totale)
-  total_ss <- sum(scale(X, scale = FALSE)^2)  # TSS
+  total_ss <- sum(scale(X, scale = FALSE)^2) # TSS
  for (i in seq_along(ks)) {
    k <- ks[i]
    cat(" k =", k, "\n")
    if (k == 1) {
      wss[i] <- total_ss
    } else {
-      set.seed(123)  # reproductible
+      set.seed(123) # reproductible
      km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
      wss[i] <- km$tot.withinss
    }
  }
-  
-  plot(ks, wss, type = "b", pch = 19, xlab = "Nombre de clusters (k)",
-       ylab = "Inertie intra-classe (WSS)",
-       main = "Méthode du coude (k-means)")
+
+  plot(ks, wss,
+    type = "b", pch = 19, xlab = "Nombre de clusters (k)",
+    ylab = "Inertie intra-classe (WSS)",
+    main = "Méthode du coude (k-means)"
+  )
  grid()
-#  invisible(data.frame(k = ks, WSS = wss))
+  #  invisible(data.frame(k = ks, WSS = wss))
 }

 # Exemple d'utilisation :
- res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)
+res <- elbow_wss(img_compressed, ks = 2:32, nstart = 20, scale_data = FALSE)

 ###############################################################################
- 
- elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
-   X <- as.matrix(X)
-   if (scale_data) X <- scale(X)
-   set.seed(seed)
-   
-   # Nombre de lignes distinctes
-   n_unique <- nrow(unique(X))
-   if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
-   
-   # Tronquer ks si nécessaire
-   ks <- ks[ks <= n_unique]
-   if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
-   
-   wss <- numeric(length(ks))
-   # TSS (k = 1)
-   total_ss <- sum(scale(X, scale = FALSE)^2)
-   
-   for (i in seq_along(ks)) {
-     k <- ks[i]
-     cat(" k =", k, "\n")
-     if (k == 1) {
-       wss[i] <- total_ss
-     } else {
-       km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
-       wss[i] <- km$tot.withinss
-     }
-   }
-   
-   plot(ks, wss, type = "b", pch = 19, xlab = "Nombre de clusters (k)",
-        ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)")
-   axis(1, at = ks)
-   grid()
-#   invisible(data.frame(k = ks, WSS = wss))
- }
- 
- # Exemple :
-  res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
- 
+
+elbow_wss_safe <- function(X, ks = 2:32, nstart = 20, scale_data = FALSE, seed = 123) {
+  X <- as.matrix(X)
+  if (scale_data) X <- scale(X)
+  set.seed(seed)
+
+  # Nombre de lignes distinctes
+  n_unique <- nrow(unique(X))
+  if (n_unique < 2) stop("Moins de 2 points distincts : k-means n'a pas de sens.")
+
+  # Tronquer ks si nécessaire
+  ks <- ks[ks <= n_unique]
+  if (length(ks) == 0) stop("Tous les k demandés dépassent le nombre de points distincts.")
+
+  wss <- numeric(length(ks))
+  # TSS (k = 1)
+  total_ss <- sum(scale(X, scale = FALSE)^2)
+
+  for (i in seq_along(ks)) {
+    k <- ks[i]
+    cat(" k =", k, "\n")
+    if (k == 1) {
+      wss[i] <- total_ss
+    } else {
+      km <- kmeans(X, centers = k, nstart = nstart, iter.max = 100)
+      wss[i] <- km$tot.withinss
+    }
+  }
+
+  plot(ks, wss,
+    type = "b", pch = 19, xlab = "Nombre de clusters (k)",
+    ylab = "Inertie intra-classe (WSS)", main = "Méthode du coude (k-means)"
+  )
+  axis(1, at = ks)
+  grid()
+  #   invisible(data.frame(k = ks, WSS = wss))
+}
+
+# Exemple :
+res <- elbow_wss_safe(img_compressed, ks = 2:32, nstart = 20)
+



@@ -175,11 +179,11 @@ elbow_wss <- function(X, ks = 2:32, nstart = 10, scale_data = FALSE) {
 jpeg("./data/image_compressed.jpg")

 # Afficher l'image compressée dans le fichier
-plot(0, 0, type='n', axes=FALSE, ann=FALSE)
+plot(0, 0, type = "n", axes = FALSE, ann = FALSE)
 rasterImage(img_compressed, -1, -1, 1, 1)

 info <- file.info("./data/PampasGrass.jpg")
-(taille_octets_reelle <- info$size/1024)
+(taille_octets_reelle <- info$size / 1024)

 info <- file.info("./data/image_compressed.jpg")
-(taille_octets_compresse <- info$size/1024)
+(taille_octets_compresse <- info$size / 1024)
--- a/Visualisation/Project/NoticeTechnique.Rmd
+++ b/Visualisation/Project/NoticeTechnique.Rmd
@@ -185,9 +185,9 @@ cor_mat <- cor(vars_candidates, use = "pairwise.complete.obs")
 ```

 ```{r}
-cor_df <- as.data.frame(cor_mat) %>%
-  tibble::rownames_to_column(var = "Var1") %>%
-  pivot_longer(-Var1, names_to = "Var2", values_to = "r") %>%
+cor_df <- as.data.frame(cor_mat) |>
+  tibble::rownames_to_column(var = "Var1") |>
+  pivot_longer(-Var1, names_to = "Var2", values_to = "r") |>
  mutate(
    Var1 = factor(Var1, levels = unique(Var1)),
    Var2 = factor(Var2, levels = rev(unique(Var1)))
@@ -503,7 +503,9 @@ Bien que nous ayons appliqué une transformation logarithmique pour corriger l'a
 Nous appliquons donc une standardisation (Z-score) : $z = \frac{x - \mu}{\sigma}$

 ```{r}
-data_scaled <- tb_ready |> select(log_inc, log_mort) |> scale()
+data_scaled <- tb_ready |>
+  select(log_inc, log_mort) |>
+  scale()

 check_table <- data.frame(
  Variable = c("Incidence (Log)", "Mortalité (Log)"),
--- a/Visualisation/Project/app.R
+++ b/Visualisation/Project/app.R
@@ -43,13 +43,11 @@ ui <- shinydashboard::dashboardPage(
        tabName = "methodo",
        icon = icon("info-circle")
      ),
-
      menuItem(
        "Vue d'Ensemble",
        tabName = "dashboard",
        icon = icon("dashboard")
      ),
-
      menuItem("Données Brutes", tabName = "data", icon = icon("table")),

      # Footer - Informations et crédits
@@ -66,7 +64,6 @@ ui <- shinydashboard::dashboardPage(
          )
        )
      ),
-
      hr(),

      # Filtre par Région
@@ -147,7 +144,6 @@ ui <- shinydashboard::dashboardPage(
            plotlyOutput("cluster_scatter", height = "530px")
          )
        ),
-
        fluidRow(
          # Plot des tendances
          box(
@@ -190,7 +186,6 @@ ui <- shinydashboard::dashboardPage(
      # Page 3 - Méthodologie
      tabItem(
        tabName = "methodo",
-
        fluidRow(
          # Indicateurs OMS
          box(
@@ -307,7 +302,6 @@ ui <- shinydashboard::dashboardPage(
              p("Dernière mise à jour du dataset : Octobre 2024.")
            ),
          ),
-
          column(
            width = 6,
            box(
@@ -319,7 +313,6 @@ ui <- shinydashboard::dashboardPage(
              p(
                "Afin de synthétiser l'information et de faciliter la prise de décision, j'ai appliqué un algorithme d'apprentissage non supervisé (K-Means) pour regrouper les pays ayant des profils épidémiques similaires sous 4 clusters."
              ),
-
              h4("Méthodologie"),
              tags$ul(
                tags$li(
@@ -335,7 +328,6 @@ ui <- shinydashboard::dashboardPage(
                  "Utilisation de `set.seed(123)` pour garantir la reproductibilité des résultats."
                )
              ),
-
              h4("Interprétation des 3 Groupes"),

              # Tableau des Groupes
@@ -395,7 +387,6 @@ ui <- shinydashboard::dashboardPage(
              status = "primary",
              solidHeader = TRUE,
              width = 12,
-
              tags$p(
                "Ce projet suit une approche Open Science.",
                style = "font-style: italic;"
@@ -403,7 +394,6 @@ ui <- shinydashboard::dashboardPage(
              tags$p(
                "L'intégralité du code source (Rmd, App) ainsi que la notice technique (PDF) sont disponibles en libre accès sur le dépôt GitHub."
              ),
-
              tags$a(
                href = "https://go.arthurdanjou.fr/datavis-code",
                target = "_blank",
@@ -524,7 +514,9 @@ server <- function(input, output, session) {
  # KPI - Pire pays
  output$kpi_worst_country <- shinydashboard::renderValueBox({
    data <- filtered_data()
-    worst <- data |> arrange(desc(e_inc_100k)) |> slice(1)
+    worst <- data |>
+      arrange(desc(e_inc_100k)) |>
+      slice(1)

    if (nrow(worst) > 0) {
      valueBox(
@@ -743,7 +735,7 @@ server <- function(input, output, session) {
  output$cluster_scatter <- plotly::renderPlotly({
    data <- filtered_data()
    sel_iso <- selected_country()
-    highlight_data <- data %>% filter(iso3 == sel_iso)
+    highlight_data <- data |> filter(iso3 == sel_iso)

    p <- ggplot(data, aes(x = e_inc_100k, y = e_mort_exc_tbhiv_100k)) +
      geom_point(
--- a/Visualisation/Project/renv.lock
+++ b/Visualisation/Project/renv.lock
@@ -2056,7 +2056,7 @@
      "Type": "Package",
      "Title": "A Forward-Pipe Operator for R",
      "Authors@R": "c( person(\"Stefan Milton\", \"Bache\", , \"stefan@stefanbache.dk\", role = c(\"aut\", \"cph\"), comment = \"Original author and creator of magrittr\"), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\"), person(\"Lionel\", \"Henry\", , \"lionel@posit.co\", role = \"cre\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )",
-      "Description": "Provides a mechanism for chaining commands with a new forward-pipe operator, %>%. This operator will forward a value, or the result of an expression, into the next function call/expression. There is flexible support for the type of right-hand side expressions. For more information, see package vignette.  To quote Rene Magritte, \"Ceci n'est pas un pipe.\"",
+      "Description": "Provides a mechanism for chaining commands with a new forward-pipe operator, |>. This operator will forward a value, or the result of an expression, into the next function call/expression. There is flexible support for the type of right-hand side expressions. For more information, see package vignette.  To quote Rene Magritte, \"Ceci n'est pas un pipe.\"",
      "License": "MIT + file LICENSE",
      "URL": "https://magrittr.tidyverse.org, https://github.com/tidyverse/magrittr",
      "BugReports": "https://github.com/tidyverse/magrittr/issues",
--- a/Visualisation/tp3/tp3.Rmd
+++ b/Visualisation/tp3/tp3.Rmd
@@ -32,8 +32,8 @@ Note that the `echo = FALSE` parameter was added to the code chunk to prevent pr

 ```{r}
 library(kableExtra)
-mtcars[1:5, 1:5] %>%
-  kbl() %>%
+mtcars[1:5, 1:5] |>
+  kbl() |>
  kable_styling()
 ```