Refactor code for improved readability and consistency across R Markdown files

- Updated comments and code formatting in `3-td_ggplot2 - enonce.Rmd` for clarity.
- Enhanced code structure in `4-td_graphiques - enonce.Rmd` by organizing options and library calls.
- Replaced pipe operator `%>%` with `|>` in `Code_Lec3.Rmd` for consistency with modern R syntax.
- Cleaned up commented-out code and ensured consistent spacing in ggplot calls.
This commit is contained in:
2025-11-06 09:26:58 +01:00
parent 8f5f2b417c
commit 03bf0a4db2
10 changed files with 764 additions and 588 deletions

View File

@@ -44,11 +44,10 @@ notes_MAN <- read.table("notes_MAN.csv", sep = ";", dec = ",", row.names = 1, he
# qui est une variable catégorielle # qui est une variable catégorielle
notes_MAN_prep <- notes_MAN[, -1] notes_MAN_prep <- notes_MAN[, -1]
X <- notes_MAN[1:6,] %>% select(c("Probas", "Analyse", "Anglais", "MAN.Stats", "Stats.Inférentielles")) X <- notes_MAN[1:6, ] |> select(c("Probas", "Analyse", "Anglais", "MAN.Stats", "Stats.Inférentielles"))
# on prépare le jeu de données en retirant la colonne des Mentions # on prépare le jeu de données en retirant la colonne des Mentions
# qui est une variable catégorielle # qui est une variable catégorielle
# View(X) # View(X)
``` ```
```{r} ```{r}
@@ -101,7 +100,7 @@ C[, 1:2]
deux premières composantes principales (1 point) deux premières composantes principales (1 point)
```{r} ```{r}
colors <- c('blue', 'red', 'green', 'yellow', 'purple', 'orange') colors <- c("blue", "red", "green", "yellow", "purple", "orange")
plot( plot(
C[, 1], C[, 2], C[, 1], C[, 2],
main = "Coordonnées des individus par rapport \n aux deux premières composantes principales", main = "Coordonnées des individus par rapport \n aux deux premières composantes principales",
@@ -111,7 +110,7 @@ plot(
col = colors, col = colors,
pch = 15 pch = 15
) )
legend(x = 'topleft', legend = rownames(X), col = colors, pch = 15) legend(x = "topleft", legend = rownames(X), col = colors, pch = 15)
``` ```
------------------------------------------------------------------------ ------------------------------------------------------------------------
@@ -190,7 +189,7 @@ avec:
Depuis notre ACP, on peut donc récupérer les coordonnées: Depuis notre ACP, on peut donc récupérer les coordonnées:
```{r} ```{r}
coords_man_stats <- res.notes$var$coord["MAN.Stats",] coords_man_stats <- res.notes$var$coord["MAN.Stats", ]
coords_man_stats[1:2] coords_man_stats[1:2]
``` ```

View File

@@ -1,5 +1,5 @@
```{r} ```{r}
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis') setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis")
library(tidyverse) library(tidyverse)
options(scipen = 999, digits = 5) options(scipen = 999, digits = 5)
@@ -56,8 +56,8 @@ summary(model)
coef(model) coef(model)
``` ```
```{r} ```{r}
data <- data %>% data <- data |>
mutate(yhat = beta0 + beta1 * poids) %>% mutate(yhat = beta0 + beta1 * poids) |>
mutate(residuals = cholesterol - yhat) mutate(residuals = cholesterol - yhat)
data data
@@ -71,8 +71,8 @@ ggplot(data, aes(x = poids, y = cholesterol)) +
```{r} ```{r}
mean(data[, "cholesterol"]) mean(data[, "cholesterol"])
mean(data[, "yhat"]) mean(data[, "yhat"])
mean(data[, "residuals"]) %>% round(10) mean(data[, "residuals"]) |> round(10)
cov(data[, "residuals"], data[, "poids"]) %>% round(10) cov(data[, "residuals"], data[, "poids"]) |> round(10)
(RSS <- sum((data[, "residuals"])^2)) (RSS <- sum((data[, "residuals"])^2))
(TSS <- sum((y - mean(y))^2)) (TSS <- sum((y - mean(y))^2))
TSS - beta1 * Sxy TSS - beta1 * Sxy
@@ -117,10 +117,10 @@ t <- qt(0.975, dof)
sigma_hat <- sigma(model) sigma_hat <- sigma(model)
n <- nrow(data) n <- nrow(data)
data <- data %>% data <- data |>
mutate(error = t * mutate(error = t *
sigma_hat * sigma_hat *
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) %>% sqrt(1 / n + (poids - mean(poids))^2 / RSS)) |>
mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL) mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL)
ggplot(data, aes(x = poids, y = cholesterol)) + ggplot(data, aes(x = poids, y = cholesterol)) +

View File

@@ -1,5 +1,5 @@
```{r} ```{r}
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2-bis') setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2-bis")
library(tidyverse) library(tidyverse)
library(GGally) library(GGally)
@@ -10,9 +10,9 @@ library(qqplotr)
options(scipen = 999, digits = 5) options(scipen = 999, digits = 5)
``` ```
```{r} ```{r}
data <- read.csv('data02.csv', sep = ',', header = TRUE, dec = ".") data <- read.csv("data02.csv", sep = ",", header = TRUE, dec = ".")
data %>% data |>
mutate(type = factor(type, levels = c("maths", "english", "final"), labels = c("maths", "english", "final"))) %>% mutate(type = factor(type, levels = c("maths", "english", "final"), labels = c("maths", "english", "final"))) |>
ggplot(aes(x = note)) + ggplot(aes(x = note)) +
facet_wrap(vars(type), scales = "free_x") + facet_wrap(vars(type), scales = "free_x") +
geom_histogram(binwidth = 4, color = "black", fill = "grey80") + geom_histogram(binwidth = 4, color = "black", fill = "grey80") +
@@ -21,8 +21,8 @@ data %>%
``` ```
```{r} ```{r}
data_wide <- pivot_wider(data, names_from = type, values_from = note) data_wide <- pivot_wider(data, names_from = type, values_from = note)
data_wide %>% data_wide |>
select(-id) %>% select(-id) |>
ggpairs() + theme_bw(14) ggpairs() + theme_bw(14)
``` ```
```{r} ```{r}
@@ -67,12 +67,12 @@ linearHypothesis(model, "maths - english = 0")
# Submodel testing # Submodel testing
```{r} ```{r}
data_predict <- predict(model, newdata = expand.grid(maths = seq(70, 90, 2), english = c(75, 85)), interval = "confidence") %>% data_predict <- predict(model, newdata = expand.grid(maths = seq(70, 90, 2), english = c(75, 85)), interval = "confidence") |>
as_tibble() %>% as_tibble() |>
bind_cols(expand.grid(maths = seq(70, 90, 2), english = c(75, 85))) bind_cols(expand.grid(maths = seq(70, 90, 2), english = c(75, 85)))
data_predict %>% data_predict |>
mutate(english = as.factor(english)) %>% mutate(english = as.factor(english)) |>
ggplot(aes(x = maths, y = fit, color = english, fill = english, label = round(fit, 1))) + ggplot(aes(x = maths, y = fit, color = english, fill = english, label = round(fit, 1))) +
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, show.legend = FALSE) + geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, show.legend = FALSE) +
geom_point(size = 2) + geom_point(size = 2) +

View File

@@ -1,5 +1,5 @@
```{r} ```{r}
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2') setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2")
``` ```
# Question 1 : Import dataset and check variables # Question 1 : Import dataset and check variables
@@ -9,8 +9,8 @@ library(dplyr)
cepages <- read.csv("Cepages B TP2.csv", header = TRUE, sep = ";", dec = ",") cepages <- read.csv("Cepages B TP2.csv", header = TRUE, sep = ";", dec = ",")
cepages$Couleur <- as.factor(cepages$Couleur) cepages$Couleur <- as.factor(cepages$Couleur)
cepages$Origine <- as.factor(cepages$Origine) cepages$Origine <- as.factor(cepages$Origine)
cepages <- cepages %>% mutate(across(where(is.character), as.numeric)) cepages <- cepages |> mutate(across(where(is.character), as.numeric))
cepages <- cepages %>% mutate(across(where(is.integer), as.numeric)) cepages <- cepages |> mutate(across(where(is.integer), as.numeric))
paged_table(cepages) paged_table(cepages)
``` ```
@@ -39,7 +39,7 @@ tapply(cepages$pH, list(cepages$Couleur, cepages$Origine), mean)
library(ggplot2) library(ggplot2)
ggplot(cepages, aes(x = AcTot, y = pH, color = Couleur)) + ggplot(cepages, aes(x = AcTot, y = pH, color = Couleur)) +
geom_point(col = 'red', size = 0.5) + geom_point(col = "red", size = 0.5) +
geom_smooth(method = "lm", se = F) geom_smooth(method = "lm", se = F)
ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) + ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) +
@@ -50,8 +50,8 @@ ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) +
```{r} ```{r}
ggplot(cepages, aes(x = AcTot, y = pH, color = Origine)) + ggplot(cepages, aes(x = AcTot, y = pH, color = Origine)) +
geom_smooth(method = 'lm', se = F) + geom_smooth(method = "lm", se = F) +
geom_point(col = 'red', size = 0.5) geom_point(col = "red", size = 0.5)
ggplot(cepages, aes(y = pH, x = AcTot, colour = Origine, fill = Origine)) + ggplot(cepages, aes(y = pH, x = AcTot, colour = Origine, fill = Origine)) +
geom_boxplot(alpha = 0.5, outlier.alpha = 0) geom_boxplot(alpha = 0.5, outlier.alpha = 0)

View File

@@ -1,5 +1,5 @@
```{r} ```{r}
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3') setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3")
``` ```
# Question 1 : Import dataset and check variables # Question 1 : Import dataset and check variables
@@ -9,8 +9,8 @@ library(dplyr)
ozone <- read.table("ozone.txt", header = TRUE, sep = " ", dec = ".") ozone <- read.table("ozone.txt", header = TRUE, sep = " ", dec = ".")
ozone$vent <- as.factor(ozone$vent) ozone$vent <- as.factor(ozone$vent)
ozone$temps <- as.factor(ozone$temps) ozone$temps <- as.factor(ozone$temps)
ozone <- ozone %>% mutate(across(where(is.character), as.numeric)) ozone <- ozone |> mutate(across(where(is.character), as.numeric))
ozone <- ozone %>% mutate(across(where(is.integer), as.numeric)) ozone <- ozone |> mutate(across(where(is.integer), as.numeric))
paged_table(ozone) paged_table(ozone)
``` ```
@@ -25,8 +25,8 @@ summary(model_T12)
library(ggplot2) library(ggplot2)
ggplot(ozone, aes(x = T12, y = maxO3)) + ggplot(ozone, aes(x = T12, y = maxO3)) +
geom_smooth(method = 'lm', se = T) + geom_smooth(method = "lm", se = T) +
geom_point(col = 'red', size = 0.5) + geom_point(col = "red", size = 0.5) +
labs(title = "maxO3 ~ T12") + labs(title = "maxO3 ~ T12") +
theme_minimal() theme_minimal()
``` ```
@@ -130,5 +130,4 @@ new_obs <- list(
maxO3v = 85 maxO3v = 85
) )
predict(model_backward, new_obs, interval = "confidence") predict(model_backward, new_obs, interval = "confidence")
``` ```

View File

@@ -1,5 +1,5 @@
```{r} ```{r}
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP4') setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP4")
set.seed(0911) set.seed(0911)
library(ggplot2) library(ggplot2)
@@ -27,14 +27,14 @@ library(glmnet)
library(nnet) # multinom, glm library(nnet) # multinom, glm
library(caret) library(caret)
library(ROCR) library(ROCR)
#library(PRROC) autre package pour courbe roc et courbe pr # library(PRROC) autre package pour courbe roc et courbe pr
library(ISLR) # dataset for statistical learning library(ISLR) # dataset for statistical learning
ggplot2::theme_set(ggplot2::theme_light())# Set the graphical theme ggplot2::theme_set(ggplot2::theme_light()) # Set the graphical theme
``` ```
```{r} ```{r}
car <- read.table('car_income.txt', header = TRUE, sep = ';') car <- read.table("car_income.txt", header = TRUE, sep = ";")
car %>% rmarkdown::paged_table() car |> rmarkdown::paged_table()
summary(car) summary(car)
``` ```
@@ -44,7 +44,7 @@ summary(model_purchase)
``` ```
```{r} ```{r}
p1 <- car %>% p1 <- car |>
ggplot(aes(y = purchase, x = income + age)) + ggplot(aes(y = purchase, x = income + age)) +
geom_point(alpha = .15) + geom_point(alpha = .15) +
geom_smooth(method = "lm") + geom_smooth(method = "lm") +
@@ -53,7 +53,7 @@ p1 <- car %>%
ylab("Probability of Purchase") ylab("Probability of Purchase")
p2 <- car %>% p2 <- car |>
ggplot(aes(y = purchase, x = income + age)) + ggplot(aes(y = purchase, x = income + age)) +
geom_point(alpha = .15) + geom_point(alpha = .15) +
geom_smooth(method = "glm", method.args = list(family = "binomial")) + geom_smooth(method = "glm", method.args = list(family = "binomial")) +
@@ -66,9 +66,9 @@ ggplotly(p2)
``` ```
```{r} ```{r}
car <- car %>% car <- car |>
mutate(old = ifelse(car$age > 3, 1, 0)) mutate(old = ifelse(car$age > 3, 1, 0))
car <- car %>% car <- car |>
mutate(rich = ifelse(car$income > 40, 1, 0)) mutate(rich = ifelse(car$income > 40, 1, 0))
model_old <- glm(purchase ~ age + income + rich + old, data = car, family = "binomial") model_old <- glm(purchase ~ age + income + rich + old, data = car, family = "binomial")
summary(model_old) summary(model_old)
@@ -90,5 +90,5 @@ pima.te$pred <- as.factor(pima.te$pred)
pima.te$type <- as.factor(pima.te$type) pima.te$type <- as.factor(pima.te$type)
# Confusion matrix # Confusion matrix
confusionMatrix(data = pima.te$type, reference = pima.te$pred, positive = 'Yes') confusionMatrix(data = pima.te$type, reference = pima.te$pred, positive = "Yes")
``` ```

File diff suppressed because it is too large Load Diff

View File

@@ -297,7 +297,7 @@ On présente ci-dessous un aperçu des données.
fold <- getwd() fold <- getwd()
# Load data # Load data
# load(paste0(fold, "/M2/Data Visualisation/tp1", "/data/datafreMPTL.RData")) # VSCode # nolint # load(paste0(fold, "/M2/Data Visualisation/tp1", "/data/datafreMPTL.RData")) # VSCode
load(paste0(fold, "/data/datafreMPTL.RData")) # RStudio load(paste0(fold, "/data/datafreMPTL.RData")) # RStudio
paged_table(dat, options = list(rows.print = 15)) paged_table(dat, options = list(rows.print = 15))
``` ```
@@ -642,11 +642,15 @@ plot_pairwise_disc <- function(df, var1, var2) {
df |> df |>
group_by(varx, vary) |> group_by(varx, vary) |>
summarize(exp = sum(Exposure), summarize(
exp = sum(Exposure),
nb_claims = sum(ClaimNb), nb_claims = sum(ClaimNb),
freq = sum(ClaimNb) / sum(Exposure), .groups = "drop") |> freq = sum(ClaimNb) / sum(Exposure), .groups = "drop"
) |>
ggplot(aes(x = varx, y = freq, colour = vary, group = vary), alpha = 0.3) + ggplot(aes(x = varx, y = freq, colour = vary, group = vary), alpha = 0.3) +
geom_point() + geom_line() + theme_bw() + geom_point() +
geom_line() +
theme_bw() +
labs(x = var1, y = "Frequence", colour = var2) labs(x = var1, y = "Frequence", colour = var2)
} }
``` ```

View File

@@ -23,8 +23,13 @@ editor_options:
```{r setup, include=FALSE} ```{r setup, include=FALSE}
## Global options ## Global options
knitr::opts_chunk$set(cache = FALSE, warning = FALSE, message = FALSE, fig.retina = 2) knitr::opts_chunk$set(
options(encoding = 'UTF-8') cache = FALSE,
warning = FALSE,
message = FALSE,
fig.retina = 2
)
options(encoding = "UTF-8")
``` ```
@@ -88,7 +93,7 @@ de vie par pays sur la période 1952-1990. Les observations ont lieu tous les 5
Dans un premier temps, il faut installer le package et le charger. Dans un premier temps, il faut installer le package et le charger.
```{r} ```{r}
# install.packages("gapminder") # install.packages("gapminder") #nolint
library(gapminder) library(gapminder)
``` ```
@@ -178,9 +183,9 @@ world <- gisco_countries
world <- subset(world, NAME_ENGL != "Antarctica") # Remove Antartica world <- subset(world, NAME_ENGL != "Antarctica") # Remove Antartica
# Merge data # Merge data
world_df <- gapminder %>% world_df <- gapminder |>
filter(year == "2007") filter(year == "2007")
world_df <- world %>% world_df <- world |>
left_join(world_df, by = c("NAME_ENGL" = "country")) left_join(world_df, by = c("NAME_ENGL" = "country"))
ggplot(world_df) + ggplot(world_df) +
@@ -231,7 +236,7 @@ accidents <- read_csv("data/accidentsVelo.csv",
date = col_date(format = "%Y-%m-%d"))) date = col_date(format = "%Y-%m-%d")))
# few ajustements # few ajustements
accidents <- accidents %>% accidents <- accidents |>
mutate(mois = factor(mois), mutate(mois = factor(mois),
jour = factor(jour), jour = factor(jour),
dep = factor(dep), dep = factor(dep),
@@ -247,8 +252,8 @@ correct <- paste0("0", str_sub(correct, 1, 1), ":",
accidents$hrmn[issue] <- correct accidents$hrmn[issue] <- correct
# Extract hour # Extract hour
accidents <- accidents %>% accidents <- accidents |>
mutate(hour = paste(date, hrmn, sep = " ")) %>% mutate(hour = paste(date, hrmn, sep = " ")) |>
mutate(hour = strptime(hour, "%Y-%m-%d %H:%M")$hour) mutate(hour = strptime(hour, "%Y-%m-%d %H:%M")$hour)
# mapping table for french departments # mapping table for french departments
@@ -327,8 +332,8 @@ library(mapview)
library(sf) library(sf)
## Remove NA ## Remove NA
df_map_dyn <- accidents %>% df_map_dyn <- accidents |>
filter(???) %>% filter(???) |>
na.omit() na.omit()
# Make map and print it # Make map and print it
@@ -354,27 +359,27 @@ Voici un premier code à trou pour vous aider.
```{r, eval = F} ```{r, eval = F}
# get french map - level nuts2 # get french map - level nuts2
fr <- gisco_get_nuts(resolution = "20", country = ???, nuts_level = ???) %>% fr <- gisco_get_nuts(resolution = "20", country = ???, nuts_level = ???) |>
mutate(res = "20M") mutate(res = "20M")
# Remove white-space to avoid errors. # Remove white-space to avoid errors.
library(stringr) library(stringr)
departements_francais <- departements_francais %>% departements_francais <- departements_francais |>
mutate(dep_name = str_trim(dep_name)) mutate(dep_name = str_trim(dep_name))
fr <- fr %>% fr <- fr |>
mutate(NUTS_NAME = str_trim(NUTS_NAME)) mutate(NUTS_NAME = str_trim(NUTS_NAME))
# Merge and remove departements outside metropolitan France # Merge and remove departements outside metropolitan France
fr_map <- fr %>% fr_map <- fr |>
left_join(???) %>% left_join(???) |>
filter(! dep %in% c("971", ???) ) filter(! dep %in% c("971", ???) )
# count the number of accidents # count the number of accidents
df_acc <- ??? df_acc <- ???
# merge statistics with the map # merge statistics with the map
map_acc <- fr_map %>% map_acc <- fr_map |>
left_join(df_acc, by = c("dep" = "dep")) left_join(df_acc, by = c("dep" = "dep"))
# map with all accidents # map with all accidents

View File

@@ -194,11 +194,11 @@ linear.mod$results
```{r} ```{r}
Ytrain <- cookie.train$sugars Ytrain <- cookie.train$sugars
dfc_train <- data.frame(ytrain = Ytrain, linear.mod = fitted(linear.mod)) dfc_train <- data.frame(ytrain = Ytrain, linear.mod = fitted(linear.mod))
dfc_train %>% rmarkdown::paged_table() dfc_train |> rmarkdown::paged_table()
``` ```
```{r} ```{r}
dfc_train %>% dfc_train |>
ggplot(aes(x = ytrain, y = linear.mod)) + ggplot(aes(x = ytrain, y = linear.mod)) +
geom_point(size = 2, color = "#983399") + geom_point(size = 2, color = "#983399") +
geom_smooth(method = "lm", color = "#389900") + geom_smooth(method = "lm", color = "#389900") +
@@ -211,9 +211,9 @@ dfc_train %>%
Ytest <- cookie.test$sugars Ytest <- cookie.test$sugars
dfc_test <- data.frame(ytest = Ytest) dfc_test <- data.frame(ytest = Ytest)
dfc_test$linear.mod <- predict(linear.mod, newdata = cookie.test) dfc_test$linear.mod <- predict(linear.mod, newdata = cookie.test)
# dfc_test%>%rmarkdown::paged_table() # dfc_test|>rmarkdown::paged_table()
dfc_test %>% dfc_test |>
ggplot(aes(x = ytest, y = linear.mod)) + ggplot(aes(x = ytest, y = linear.mod)) +
geom_point(size = 2, color = "#983399") + geom_point(size = 2, color = "#983399") +
geom_smooth(method = "lm", color = "#389900") + geom_smooth(method = "lm", color = "#389900") +
@@ -244,7 +244,7 @@ ggplotly(ggplot(Lasso))
``` ```
```{r} ```{r}
Lasso$results %>% rmarkdown::paged_table() Lasso$results |> rmarkdown::paged_table()
``` ```
```{r} ```{r}
@@ -271,8 +271,8 @@ coef_lasso <- data.frame(
Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))), Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[, 1] Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[, 1]
) )
coef_lasso %>% coef_lasso |>
subset(Coefficient != 0) %>% subset(Coefficient != 0) |>
rmarkdown::paged_table() rmarkdown::paged_table()
``` ```
@@ -298,7 +298,7 @@ ggplotly(ggplot(ridge))
``` ```
```{r} ```{r}
ridge$results %>% rmarkdown::paged_table() ridge$results |> rmarkdown::paged_table()
``` ```
```{r} ```{r}
@@ -320,7 +320,7 @@ vip(ridge, num_features = 15)
``` ```
```{r} ```{r}
data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) %>% data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) |>
rmarkdown::paged_table() rmarkdown::paged_table()
``` ```
@@ -346,7 +346,7 @@ ggplotly(ggplot(ElNet))
``` ```
```{r} ```{r}
ElNet$results %>% rmarkdown::paged_table() ElNet$results |> rmarkdown::paged_table()
``` ```
```{r} ```{r}
@@ -372,8 +372,8 @@ coef_elnet <- data.frame(
Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))), Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[, 1] Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[, 1]
) )
coef_elnet %>% coef_elnet |>
subset(Coefficient != 0) %>% subset(Coefficient != 0) |>
rmarkdown::paged_table() rmarkdown::paged_table()
``` ```
@@ -396,7 +396,7 @@ ggplotly(ggplot(pls_mod))
``` ```
```{r} ```{r}
pls_mod$results %>% rmarkdown::paged_table() pls_mod$results |> rmarkdown::paged_table()
``` ```
```{r} ```{r}
@@ -412,7 +412,7 @@ vip(pls_mod, num_features = 20)
``` ```
```{r} ```{r}
data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) %>% data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) |>
rmarkdown::paged_table() rmarkdown::paged_table()
``` ```
@@ -435,7 +435,7 @@ dTrain$ridge <- fitted(ridge)
dTrain$ElNet <- fitted(ElNet) dTrain$ElNet <- fitted(ElNet)
dTrain$pls <- fitted(pls_mod) dTrain$pls <- fitted(pls_mod)
melt.dTrain <- melt(dTrain, id = "yTrain", variable.name = "model") melt.dTrain <- melt(dTrain, id = "yTrain", variable.name = "model")
melt.dTrain %>% ggplot() + melt.dTrain |> ggplot() +
aes(x = yTrain, y = value) + aes(x = yTrain, y = value) +
geom_smooth(method = "lm") + geom_smooth(method = "lm") +
geom_point(size = 1, colour = "#983399") + geom_point(size = 1, colour = "#983399") +
@@ -446,11 +446,11 @@ melt.dTrain %>% ggplot() +
``` ```
```{r} ```{r}
dTrain %>% rmarkdown::paged_table() dTrain |> rmarkdown::paged_table()
``` ```
```{r} ```{r}
melt.dTrain %>% rmarkdown::paged_table() melt.dTrain |> rmarkdown::paged_table()
``` ```
### On the test set ### On the test set
@@ -463,10 +463,10 @@ dTest$Lasso <- predict(Lasso, newdata = cookie.test)
dTest$ridge <- predict(ridge, newdata = cookie.test) dTest$ridge <- predict(ridge, newdata = cookie.test)
dTest$ElNet <- predict(ElNet, newdata = cookie.test) dTest$ElNet <- predict(ElNet, newdata = cookie.test)
dTest$pls <- predict(pls_mod, newdata = cookie.test) dTest$pls <- predict(pls_mod, newdata = cookie.test)
# dTest%>% rmarkdown::paged_table() # dTest|> rmarkdown::paged_table()
melt.dTest <- melt(dTest, id = "yTest", variable.name = "model") melt.dTest <- melt(dTest, id = "yTest", variable.name = "model")
# melt.dTest%>% rmarkdown::paged_table() # melt.dTest|> rmarkdown::paged_table()
melt.dTest %>% ggplot() + melt.dTest |> ggplot() +
aes(x = yTest, y = value) + aes(x = yTest, y = value) +
geom_smooth(method = "lm") + geom_smooth(method = "lm") +
geom_point(size = 1, colour = "#983399") + geom_point(size = 1, colour = "#983399") +
@@ -491,8 +491,8 @@ RMSE <- rbind.data.frame(
) )
names(RMSE) <- c("Train", "Test") names(RMSE) <- c("Train", "Test")
row.names(RMSE) <- c("Linear", "Lasso", "Ridge", "ElNet", "PLS") row.names(RMSE) <- c("Linear", "Lasso", "Ridge", "ElNet", "PLS")
RMSE %>% RMSE |>
kableExtra::kbl() %>% kableExtra::kbl() |>
kableExtra::kable_styling() kableExtra::kable_styling()
``` ```