mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-29 05:57:23 +01:00
Refactor code to use forward-pipe operator for better readability and consistency
This commit is contained in:
@@ -7,7 +7,7 @@ library(dplyr)
|
||||
|
||||
```{r}
|
||||
df <- data.frame(
|
||||
team = c('A', 'A', 'B', 'B', 'B', 'B', 'C', 'C'),
|
||||
team = c("A", "A", "B", "B", "B", "B", "C", "C"),
|
||||
points = c(25, 12, 15, 14, 19, 23, 25, 29)
|
||||
)
|
||||
|
||||
@@ -22,24 +22,24 @@ one_hot_data
|
||||
```{r}
|
||||
train <- data.frame(
|
||||
target = c(10, 20, 15),
|
||||
cat_col1 = c('city1', 'city2', 'city1'),
|
||||
cat_col2 = c('james', 'adam', 'charles')
|
||||
cat_col1 = c("city1", "city2", "city1"),
|
||||
cat_col2 = c("james", "adam", "charles")
|
||||
)
|
||||
|
||||
global_mean <- mean(train$target)
|
||||
alpha <- 10
|
||||
|
||||
target_encoding <- train %>%
|
||||
group_by(cat_col1) %>%
|
||||
target_encoding <- train |>
|
||||
group_by(cat_col1) |>
|
||||
summarise(
|
||||
n = n(),
|
||||
sum_target = sum(target),
|
||||
cat_col1_te = (sum_target + (alpha * global_mean)) / (n + alpha),
|
||||
.groups = "drop"
|
||||
) %>%
|
||||
) |>
|
||||
select(cat_col1, cat_col1_te)
|
||||
|
||||
train <- train %>% left_join(target_encoding, by = "cat_col1")
|
||||
train <- train |> left_join(target_encoding, by = "cat_col1")
|
||||
```
|
||||
|
||||
# Frequential Encoding
|
||||
@@ -47,7 +47,7 @@ train <- train %>% left_join(target_encoding, by = "cat_col1")
|
||||
|
||||
```{r}
|
||||
df <- data.frame(
|
||||
color = c('blue', 'red', 'blue', 'green'),
|
||||
color = c("blue", "red", "blue", "green"),
|
||||
value = c(10, 20, 10, 30)
|
||||
)
|
||||
```
|
||||
Reference in New Issue
Block a user