mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 13:54:06 +01:00
Add initial R project configuration file (studies.Rproj) with default settings
This commit is contained in:
515
M2/Linear Models/Biaised Models/Code_Lec3.Rmd
Normal file
515
M2/Linear Models/Biaised Models/Code_Lec3.Rmd
Normal file
@@ -0,0 +1,515 @@
|
||||
---
|
||||
title: "Code: Biased regression"
|
||||
subtitle: "Methods for Regression and classification"
|
||||
author: "Katia Meziani"
|
||||
output:
|
||||
html_document:
|
||||
self_contained: true
|
||||
math_method:
|
||||
engine: mathjax
|
||||
url: https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
|
||||
code_folding: hide
|
||||
css: ./style.css
|
||||
df_print: paged
|
||||
highlight: tango
|
||||
number_sections: no
|
||||
theme: flatly
|
||||
toc: yes
|
||||
toc_float:
|
||||
collapsed: no
|
||||
---
|
||||
|
||||
<style>
|
||||
.column {
|
||||
width: 50%;
|
||||
float: left;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.image-wrapper {
|
||||
text-align: center; /* Centre toutes les images */
|
||||
}
|
||||
.image-container {
|
||||
display: inline-block;
|
||||
margin-right: 20px; /* Espacement entre les images */
|
||||
text-align: center; /* Centre le texte sous l'image */
|
||||
}
|
||||
.image-container:last-child {
|
||||
margin-right: 0; /* Pas d'espacement après la dernière image */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.attention {
|
||||
font-size: 24px;
|
||||
color: red;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.underline {
|
||||
text-decoration: underline;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style contenteditable>
|
||||
.brd {
|
||||
border: 2px solid black; ; padding: 5px
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.brdpurple {
|
||||
border: 2px solid black;
|
||||
padding: 10px; /* Espace interne */
|
||||
background-color: rgba(128, 0, 128, 0.05); /* Fond violet très très clair avec transparence */
|
||||
margin: 10px 0; /* Espace autour de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style contenteditable>
|
||||
.brdred {
|
||||
border: 2px solid #B22222 ;
|
||||
padding: 10px; /* Ajouter de l'espace autour du texte */
|
||||
background-color: #f9f9f9; /* Optionnel : couleur de fond pour le texte */
|
||||
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.solution {
|
||||
font-size: 24px;
|
||||
color: yellow; /* Couleur jaune pour simuler la lumière */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.underline {
|
||||
text-decoration: underline;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.brdgreen {
|
||||
border: 2px solid black; /* Bordure verte */
|
||||
padding: 10px; /* Espace interne */
|
||||
background-color: rgba(114, 213, 114, 0.03); /* Fond vert pastel très clair avec transparence */
|
||||
margin: 10px 0; /* Espace autour de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.brdblack {
|
||||
border: 2px solid black; /* Définir la couleur et l'épaisseur de la bordure */
|
||||
padding: 10px; /* Ajouter de l'espace autour du texte */
|
||||
background-color: #f9f9f9; /* Optionnel : couleur de fond pour le texte */
|
||||
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.brdblackempty {
|
||||
border: 2px solid black; /* Définir la couleur et l'épaisseur de la bordure */
|
||||
padding: 10px; /* Ajouter de l'espace autour du texte */
|
||||
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
\newcommand{\Lasso}{\widehat{\beta}^{\lambda,L}}
|
||||
\newcommand{\R}{\mathbb{R}}
|
||||
\newcommand{\1}{\mathbb{1}}
|
||||
\newcommand{\RE}{\text{RE}}
|
||||
\newcommand{\sign}{\text{sign}}
|
||||
\newcommand{\e}{\epsilon}
|
||||
\renewcommand{\P}{\text{P}}
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(fig.align = "center", message = FALSE, warning = FALSE)
|
||||
```
|
||||
|
||||
## Packages
|
||||
|
||||
We begin by loading the necessary packages for this analysis.
|
||||
|
||||
```{r message=FALSE,warning=FALSE}
|
||||
library(glmnet) # for regularized regression
|
||||
library(caret) # for training and evaluating models
|
||||
library(ggplot2) # for data visualization
|
||||
library(ggfortify) # to extend ggplot2 features for autoplot
|
||||
library(reshape2) # for reshaping data
|
||||
library(Metrics) # for calculating metrics like RMSE
|
||||
library(vip) # for variable importance visualization
|
||||
library(dplyr) # for data manipulation
|
||||
library(tidyverse) # includes ggplot2, dplyr, and other useful packages
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
# The Dataset: Cookies
|
||||
|
||||
## Upload Datasets
|
||||
|
||||
```{r}
|
||||
setwd("/Users/arthurdanjou/Workspace/studies/M2/Linear Models/Biaised Models")
|
||||
```
|
||||
|
||||
```{r}
|
||||
# Loading the training dataset
|
||||
cookie.train <- read.csv("Cookies_Train.csv", header = TRUE, row.names = 1)
|
||||
```
|
||||
|
||||
```{r}
|
||||
# Loading the test dataset
|
||||
cookie.test <- read.csv("Cookies_Test.csv", header = TRUE, row.names = 1)
|
||||
```
|
||||
|
||||
## Custom Control Parameters
|
||||
|
||||
```{r}
|
||||
custom <- trainControl(
|
||||
method = "repeatedcv",
|
||||
number = 5, # Using 5-fold cross-validation
|
||||
repeats = 3, # Repeating 3 times for robustness
|
||||
summaryFunction = defaultSummary, # Default metrics (RMSE, MAE)
|
||||
allowParallel = TRUE # Use parallel processing if resources allow
|
||||
)
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
# Models Study
|
||||
|
||||
****
|
||||
|
||||
## Linear regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
linear.mod <- train(sugars ~ ., cookie.train, method = "lm", preProc = c("center", "scale"), trControl = custom)
|
||||
linear.mod$results
|
||||
```
|
||||
|
||||
```{r}
|
||||
Ytrain <- cookie.train$sugars
|
||||
dfc_train <- data.frame(ytrain = Ytrain, linear.mod = fitted(linear.mod))
|
||||
dfc_train %>% rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
dfc_train %>%
|
||||
ggplot(aes(x = ytrain, y = linear.mod)) +
|
||||
geom_point(size = 2, color = "#983399") +
|
||||
geom_smooth(method = "lm", color = "#389900") +
|
||||
ggtitle("Train Dataset") +
|
||||
ylab("Fitted Values") +
|
||||
xlab("Actual Values (Y)")
|
||||
```
|
||||
|
||||
```{r}
|
||||
Ytest <- cookie.test$sugars
|
||||
dfc_test <- data.frame(ytest = Ytest)
|
||||
dfc_test$linear.mod <- predict(linear.mod, newdata = cookie.test)
|
||||
# dfc_test%>%rmarkdown::paged_table()
|
||||
|
||||
dfc_test %>%
|
||||
ggplot(aes(x = ytest, y = linear.mod)) +
|
||||
geom_point(size = 2, color = "#983399") +
|
||||
geom_smooth(method = "lm", color = "#389900") +
|
||||
ggtitle("Test Dataset") +
|
||||
ylab("Fitted Values") +
|
||||
xlab("Actual Values (Y)")
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## Lasso regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
# grid_Lasso <- seq(0.001, 0.1, length = 100)
|
||||
grid_Lasso <- 10^seq(0, 0.1, length = 100)
|
||||
Lasso <- train(sugars ~ ., cookie.train,
|
||||
method = "glmnet",
|
||||
tuneGrid = expand.grid(alpha = 1, lambda = grid_Lasso),
|
||||
preProc = c("center", "scale"),
|
||||
trControl = custom
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(plotly)
|
||||
ggplotly(ggplot(Lasso))
|
||||
```
|
||||
|
||||
```{r}
|
||||
Lasso$results %>% rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
Lasso$bestTune
|
||||
```
|
||||
|
||||
```{r}
|
||||
Lasso$results[which.min(Lasso$results$RMSE), ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(mfrow = c(1, 2))
|
||||
plot(Lasso$finalModel, xvar = "lambda", label = TRUE)
|
||||
plot(Lasso$finalModel, xvar = "dev", label = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(vip)
|
||||
vip(Lasso, num_features = 15)
|
||||
```
|
||||
|
||||
```{r}
|
||||
coef_lasso <- data.frame(
|
||||
Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
|
||||
Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[, 1]
|
||||
)
|
||||
coef_lasso %>%
|
||||
subset(Coefficient != 0) %>%
|
||||
rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## Ridge regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
lambda_ridge <- seq(11, 12, length = 100)
|
||||
ridge <- train(sugars ~ .,
|
||||
data = cookie.train,
|
||||
method = "glmnet",
|
||||
tuneGrid = expand.grid(alpha = 0, lambda = lambda_ridge),
|
||||
preProc = c("center", "scale"),
|
||||
trControl = custom
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(plotly)
|
||||
ggplotly(ggplot(ridge))
|
||||
```
|
||||
|
||||
```{r}
|
||||
ridge$results %>% rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ridge$bestTune
|
||||
```
|
||||
|
||||
```{r}
|
||||
ridge$results[which.min(ridge$results$RMSE), ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(mfrow = c(1, 2))
|
||||
plot(ridge$finalModel, xvar = "lambda", label = TRUE)
|
||||
plot(ridge$finalModel, xvar = "dev", label = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
vip(ridge, num_features = 15)
|
||||
```
|
||||
|
||||
```{r}
|
||||
data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) %>%
|
||||
rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## ElasticNet regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
alpha_Enet <- seq(0.5, 0.9, length = 10)
|
||||
lambda_Enet <- seq(0.01, 0.05, length = 10)
|
||||
|
||||
ElNet <- train(sugars ~ ., cookie.train,
|
||||
method = "glmnet",
|
||||
tuneGrid = expand.grid(alpha = alpha_Enet, lambda = lambda_Enet),
|
||||
preProc = c("center", "scale"),
|
||||
trControl = custom
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplotly(ggplot(ElNet))
|
||||
```
|
||||
|
||||
```{r}
|
||||
ElNet$results %>% rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ElNet$bestTune
|
||||
```
|
||||
|
||||
```{r}
|
||||
ElNet$results[which.min(ElNet$results$RMSE), ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(mfrow = c(1, 2))
|
||||
plot(ElNet$finalModel, xvar = "lambda", label = T)
|
||||
plot(ElNet$finalModel, xvar = "dev", label = T)
|
||||
```
|
||||
|
||||
```{r}
|
||||
vip(ElNet, num_features = 20)
|
||||
```
|
||||
|
||||
```{r}
|
||||
coef_elnet <- data.frame(
|
||||
Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
|
||||
Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[, 1]
|
||||
)
|
||||
coef_elnet %>%
|
||||
subset(Coefficient != 0) %>%
|
||||
rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## PLS regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
pls_mod <- train(sugars ~ ., cookie.train,
|
||||
method = "pls",
|
||||
tuneLength = 20,
|
||||
preProc = c("center", "scale"),
|
||||
trControl = custom
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplotly(ggplot(pls_mod))
|
||||
```
|
||||
|
||||
```{r}
|
||||
pls_mod$results %>% rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
pls_mod$bestTune
|
||||
```
|
||||
|
||||
```{r}
|
||||
pls_mod$results[which.min(pls_mod$results$RMSE), ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
vip(pls_mod, num_features = 20)
|
||||
```
|
||||
|
||||
```{r}
|
||||
data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) %>%
|
||||
rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
# Models Comparaison
|
||||
|
||||
****
|
||||
|
||||
## Graphical comparison of model performance
|
||||
|
||||
### On the training set
|
||||
|
||||
```{r}
|
||||
yTrain <- cookie.train$sugars
|
||||
dTrain <- data.frame(yTrain = yTrain)
|
||||
dTrain$linear <- fitted(linear.mod)
|
||||
dTrain$Lasso <- fitted(Lasso)
|
||||
dTrain$ridge <- fitted(ridge)
|
||||
dTrain$ElNet <- fitted(ElNet)
|
||||
dTrain$pls <- fitted(pls_mod)
|
||||
melt.dTrain <- melt(dTrain, id = "yTrain", variable.name = "model")
|
||||
melt.dTrain %>% ggplot() +
|
||||
aes(x = yTrain, y = value) +
|
||||
geom_smooth(method = "lm") +
|
||||
geom_point(size = 1, colour = "#983399") +
|
||||
facet_wrap(~model, nrow = 3) +
|
||||
ggtitle("Train dataset") +
|
||||
ylab("Fitted value") +
|
||||
xlab("Y")
|
||||
```
|
||||
|
||||
```{r}
|
||||
dTrain %>% rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
melt.dTrain %>% rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
### On the test set
|
||||
|
||||
```{r}
|
||||
yTest <- cookie.test$sugars
|
||||
dTest <- data.frame(yTest = yTest)
|
||||
dTest$linear <- predict(linear.mod, newdata = cookie.test)
|
||||
dTest$Lasso <- predict(Lasso, newdata = cookie.test)
|
||||
dTest$ridge <- predict(ridge, newdata = cookie.test)
|
||||
dTest$ElNet <- predict(ElNet, newdata = cookie.test)
|
||||
dTest$pls <- predict(pls_mod, newdata = cookie.test)
|
||||
# dTest%>% rmarkdown::paged_table()
|
||||
melt.dTest <- melt(dTest, id = "yTest", variable.name = "model")
|
||||
# melt.dTest%>% rmarkdown::paged_table()
|
||||
melt.dTest %>% ggplot() +
|
||||
aes(x = yTest, y = value) +
|
||||
geom_smooth(method = "lm") +
|
||||
geom_point(size = 1, colour = "#983399") +
|
||||
facet_wrap(~model, nrow = 3) +
|
||||
ggtitle("Test dataset") +
|
||||
ylab("Fitted value") +
|
||||
xlab("Y") +
|
||||
theme_bw()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## RMSE comparaison among models
|
||||
|
||||
```{r}
|
||||
RMSE <- rbind.data.frame(
|
||||
cbind(rmse(yTrain, dTrain$linear), rmse(yTest, dTest$linear)),
|
||||
cbind(rmse(yTrain, dTrain$Lasso), rmse(yTest, dTest$Lasso)),
|
||||
cbind(rmse(yTrain, dTrain$ridge), rmse(yTest, dTest$ridge)),
|
||||
cbind(rmse(yTrain, dTrain$ElNet), rmse(yTest, dTest$ElNet)),
|
||||
cbind(rmse(yTrain, dTrain$pls), rmse(yTest, dTest$pls))
|
||||
)
|
||||
names(RMSE) <- c("Train", "Test")
|
||||
row.names(RMSE) <- c("Linear", "Lasso", "Ridge", "ElNet", "PLS")
|
||||
RMSE %>%
|
||||
kableExtra::kbl() %>%
|
||||
kableExtra::kable_styling()
|
||||
```
|
||||
|
||||
```{r}
|
||||
summary(yTrain)
|
||||
summary(yTest)
|
||||
```
|
||||
|
||||
```{r, echo=FALSE}
|
||||
LM_train <- round(rmse(yTrain, dTrain$linear), 2)
|
||||
LM_test <- round(rmse(yTest, dTest$linear), 2)
|
||||
Lasso_train <- round(rmse(yTrain, dTrain$Lasso), 2)
|
||||
Lasso_test <- round(rmse(yTest, dTest$Lasso), 2)
|
||||
Ridge_train <- round(rmse(yTrain, dTrain$ridge), 2)
|
||||
Ridge_test <- round(rmse(yTest, dTest$ridge), 2)
|
||||
ElNet_train <- round(rmse(yTrain, dTrain$ElNet), 2)
|
||||
ElNet_test <- round(rmse(yTest, dTest$ElNet), 2)
|
||||
PLS_train <- round(rmse(yTrain, dTrain$pls), 2)
|
||||
PLS_test <- round(rmse(yTest, dTest$pls), 2)
|
||||
```
|
||||
5696
M2/Linear Models/Biaised Models/Code_Lec3.html
Normal file
5696
M2/Linear Models/Biaised Models/Code_Lec3.html
Normal file
File diff suppressed because one or more lines are too long
33
M2/Linear Models/Biaised Models/Cookies_Test.csv
Normal file
33
M2/Linear Models/Biaised Models/Cookies_Test.csv
Normal file
File diff suppressed because one or more lines are too long
41
M2/Linear Models/Biaised Models/Cookies_Train.csv
Normal file
41
M2/Linear Models/Biaised Models/Cookies_Train.csv
Normal file
File diff suppressed because one or more lines are too long
13
studies.Rproj
Normal file
13
studies.Rproj
Normal file
@@ -0,0 +1,13 @@
|
||||
Version: 1.0
|
||||
|
||||
RestoreWorkspace: Default
|
||||
SaveWorkspace: Default
|
||||
AlwaysSaveHistory: Default
|
||||
|
||||
EnableCodeIndexing: Yes
|
||||
UseSpacesForTab: Yes
|
||||
NumSpacesForTab: 2
|
||||
Encoding: UTF-8
|
||||
|
||||
RnwWeave: Sweave
|
||||
LaTeX: pdfLaTeX
|
||||
Reference in New Issue
Block a user