Add "Clustering In Practice" section: add Encoding.Rmd and data/chiffres.csv; update README

2026-03-18 22:49:59 +01:00 · 2026-01-08 13:44:01 +01:00
parent 2e2500b509
commit c8c1bf4807
3 changed files with 2062 additions and 0 deletions
--- a/Practice/Encoding.Rmd
+++ b/Practice/Encoding.Rmd
@@ -0,0 +1,53 @@
+```{r}
+library(caret)
+library(dplyr)
+```
+
+# One Hot Encoding
+
+```{r}
+df <- data.frame(
+  team = c('A', 'A', 'B', 'B', 'B', 'B', 'C', 'C'),
+  points = c(25, 12, 15, 14, 19, 23, 25, 29)
+)
+
+dummies <- dummyVars(~team + points, data = df)
+one_hot_data <- predict(dummies, newdata = df)
+
+one_hot_data
+```
+
+# Target Encoding
+
+```{r}
+train <- data.frame(
+  target = c(10, 20, 15),
+  cat_col1 = c('city1', 'city2', 'city1'),
+  cat_col2 = c('james', 'adam', 'charles')
+)
+
+global_mean <- mean(train$target)
+alpha <- 10
+
+target_encoding <- train %>%
+  group_by(cat_col1) %>%
+  summarise(
+    n = n(),
+    sum_target = sum(target),
+    cat_col1_te = (sum_target + (alpha * global_mean)) / (n + alpha),
+    .groups = "drop"
+  ) %>%
+  select(cat_col1, cat_col1_te)
+
+train <- train %>% left_join(target_encoding, by = "cat_col1")
+```
+
+# Frequential Encoding
+
+
+```{r}
+df <- data.frame(
+  color = c('blue', 'red', 'blue', 'green'),
+  value = c(10, 20, 10, 30)
+)
+```
--- a/Practice/data/chiffres.csv
+++ b/Practice/data/chiffres.csv
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ The projects are organized into two main sections:
  - `Statistical Learning`

 - `M2`
+  - `Clustering In Practice`
  - `Data Visualisation`
  - `Deep Learning`
  - `Generative AI`