mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-03-16 03:11:46 +01:00
Remove 'Linear Models' section from README.md as it is no longer relevant to the current focus areas.
This commit is contained in:
@@ -1,515 +0,0 @@
|
||||
---
|
||||
title: "Code: Biased regression"
|
||||
subtitle: "Methods for Regression and classification"
|
||||
author: "Katia Meziani"
|
||||
output:
|
||||
html_document:
|
||||
self_contained: true
|
||||
math_method:
|
||||
engine: mathjax
|
||||
url: https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
|
||||
code_folding: hide
|
||||
css: ./style.css
|
||||
df_print: paged
|
||||
highlight: tango
|
||||
number_sections: no
|
||||
theme: flatly
|
||||
toc: yes
|
||||
toc_float:
|
||||
collapsed: no
|
||||
---
|
||||
|
||||
<style>
|
||||
.column {
|
||||
width: 50%;
|
||||
float: left;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.image-wrapper {
|
||||
text-align: center; /* Centre toutes les images */
|
||||
}
|
||||
.image-container {
|
||||
display: inline-block;
|
||||
margin-right: 20px; /* Espacement entre les images */
|
||||
text-align: center; /* Centre le texte sous l'image */
|
||||
}
|
||||
.image-container:last-child {
|
||||
margin-right: 0; /* Pas d'espacement après la dernière image */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.attention {
|
||||
font-size: 24px;
|
||||
color: red;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.underline {
|
||||
text-decoration: underline;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style contenteditable>
|
||||
.brd {
|
||||
border: 2px solid black; ; padding: 5px
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.brdpurple {
|
||||
border: 2px solid black;
|
||||
padding: 10px; /* Espace interne */
|
||||
background-color: rgba(128, 0, 128, 0.05); /* Fond violet très très clair avec transparence */
|
||||
margin: 10px 0; /* Espace autour de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style contenteditable>
|
||||
.brdred {
|
||||
border: 2px solid #B22222 ;
|
||||
padding: 10px; /* Ajouter de l'espace autour du texte */
|
||||
background-color: #f9f9f9; /* Optionnel : couleur de fond pour le texte */
|
||||
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.solution {
|
||||
font-size: 24px;
|
||||
color: yellow; /* Couleur jaune pour simuler la lumière */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.underline {
|
||||
text-decoration: underline;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.brdgreen {
|
||||
border: 2px solid black; /* Bordure verte */
|
||||
padding: 10px; /* Espace interne */
|
||||
background-color: rgba(114, 213, 114, 0.03); /* Fond vert pastel très clair avec transparence */
|
||||
margin: 10px 0; /* Espace autour de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.brdblack {
|
||||
border: 2px solid black; /* Définir la couleur et l'épaisseur de la bordure */
|
||||
padding: 10px; /* Ajouter de l'espace autour du texte */
|
||||
background-color: #f9f9f9; /* Optionnel : couleur de fond pour le texte */
|
||||
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>
|
||||
.brdblackempty {
|
||||
border: 2px solid black; /* Définir la couleur et l'épaisseur de la bordure */
|
||||
padding: 10px; /* Ajouter de l'espace autour du texte */
|
||||
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
|
||||
}
|
||||
</style>
|
||||
|
||||
\newcommand{\Lasso}{\widehat{\beta}^{\lambda,L}}
|
||||
\newcommand{\R}{\mathbb{R}}
|
||||
\newcommand{\1}{\mathbb{1}}
|
||||
\newcommand{\RE}{\text{RE}}
|
||||
\newcommand{\sign}{\text{sign}}
|
||||
\newcommand{\e}{\epsilon}
|
||||
\renewcommand{\P}{\text{P}}
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(fig.align = "center", message = FALSE, warning = FALSE)
|
||||
```
|
||||
|
||||
## Packages
|
||||
|
||||
We begin by loading the necessary packages for this analysis.
|
||||
|
||||
```{r message=FALSE,warning=FALSE}
|
||||
library(glmnet) # for regularized regression
|
||||
library(caret) # for training and evaluating models
|
||||
library(ggplot2) # for data visualization
|
||||
library(ggfortify) # to extend ggplot2 features for autoplot
|
||||
library(reshape2) # for reshaping data
|
||||
library(Metrics) # for calculating metrics like RMSE
|
||||
library(vip) # for variable importance visualization
|
||||
library(dplyr) # for data manipulation
|
||||
library(tidyverse) # includes ggplot2, dplyr, and other useful packages
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
# The Dataset: Cookies
|
||||
|
||||
## Upload Datasets
|
||||
|
||||
```{r}
|
||||
setwd("/Users/arthurdanjou/Workspace/studies/M2/Linear Models/Biaised Models")
|
||||
```
|
||||
|
||||
```{r}
|
||||
# Loading the training dataset
|
||||
cookie.train <- read.csv("Cookies_Train.csv", header = TRUE, row.names = 1)
|
||||
```
|
||||
|
||||
```{r}
|
||||
# Loading the test dataset
|
||||
cookie.test <- read.csv("Cookies_Test.csv", header = TRUE, row.names = 1)
|
||||
```
|
||||
|
||||
## Custom Control Parameters
|
||||
|
||||
```{r}
|
||||
custom <- trainControl(
|
||||
method = "repeatedcv",
|
||||
number = 10, # Using 5-fold cross-validation
|
||||
repeats = 10, # Repeating 3 times for robustness
|
||||
summaryFunction = defaultSummary, # Default metrics (RMSE, MAE)
|
||||
allowParallel = TRUE # Use parallel processing if resources allow
|
||||
)
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
# Models Study
|
||||
|
||||
****
|
||||
|
||||
## Linear regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
linear.mod <- train(sugars ~ ., cookie.train, method = "lm", preProc = c("center", "scale"), trControl = custom)
|
||||
linear.mod$results
|
||||
```
|
||||
|
||||
```{r}
|
||||
Ytrain <- cookie.train$sugars
|
||||
dfc_train <- data.frame(ytrain = Ytrain, linear.mod = fitted(linear.mod))
|
||||
dfc_train |> rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
dfc_train |>
|
||||
ggplot(aes(x = ytrain, y = linear.mod)) +
|
||||
geom_point(size = 2, color = "#983399") +
|
||||
geom_smooth(method = "lm", color = "#389900") +
|
||||
ggtitle("Train Dataset") +
|
||||
ylab("Fitted Values") +
|
||||
xlab("Actual Values (Y)")
|
||||
```
|
||||
|
||||
```{r}
|
||||
Ytest <- cookie.test$sugars
|
||||
dfc_test <- data.frame(ytest = Ytest)
|
||||
dfc_test$linear.mod <- predict(linear.mod, newdata = cookie.test)
|
||||
# dfc_test|>rmarkdown::paged_table()
|
||||
|
||||
dfc_test |>
|
||||
ggplot(aes(x = ytest, y = linear.mod)) +
|
||||
geom_point(size = 2, color = "#983399") +
|
||||
geom_smooth(method = "lm", color = "#389900") +
|
||||
ggtitle("Test Dataset") +
|
||||
ylab("Fitted Values") +
|
||||
xlab("Actual Values (Y)")
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## Lasso regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
# grid_Lasso <- seq(0.001, 0.1, length = 100)
|
||||
grid_Lasso <- 10^seq(-4, 1, length = 100)
|
||||
Lasso <- train(sugars ~ ., cookie.train,
|
||||
method = "glmnet",
|
||||
tuneGrid = expand.grid(alpha = 1, lambda = grid_Lasso),
|
||||
preProc = c("center", "scale"),
|
||||
trControl = custom
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(plotly)
|
||||
ggplotly(ggplot(Lasso))
|
||||
```
|
||||
|
||||
```{r}
|
||||
Lasso$results |> rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
Lasso$bestTune
|
||||
```
|
||||
|
||||
```{r}
|
||||
Lasso$results[which.min(Lasso$results$RMSE), ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(mfrow = c(1, 2))
|
||||
plot(Lasso$finalModel, xvar = "lambda", label = TRUE)
|
||||
plot(Lasso$finalModel, xvar = "dev", label = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(vip)
|
||||
vip(Lasso, num_features = 15)
|
||||
```
|
||||
|
||||
```{r}
|
||||
coef_lasso <- data.frame(
|
||||
Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
|
||||
Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[, 1]
|
||||
)
|
||||
coef_lasso |>
|
||||
subset(Coefficient != 0) |>
|
||||
rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## Ridge regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
lambda_ridge <- seq(11, 12, length = 100)
|
||||
ridge <- train(sugars ~ .,
|
||||
data = cookie.train,
|
||||
method = "glmnet",
|
||||
tuneGrid = expand.grid(alpha = 0, lambda = lambda_ridge),
|
||||
preProc = c("center", "scale"),
|
||||
trControl = custom
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(plotly)
|
||||
ggplotly(ggplot(ridge))
|
||||
```
|
||||
|
||||
```{r}
|
||||
ridge$results |> rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ridge$bestTune
|
||||
```
|
||||
|
||||
```{r}
|
||||
ridge$results[which.min(ridge$results$RMSE), ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(mfrow = c(1, 2))
|
||||
plot(ridge$finalModel, xvar = "lambda", label = TRUE)
|
||||
plot(ridge$finalModel, xvar = "dev", label = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
vip(ridge, num_features = 15)
|
||||
```
|
||||
|
||||
```{r}
|
||||
data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) |>
|
||||
rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## ElasticNet regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
alpha_Enet <- seq(0.5, 0.9, length = 10)
|
||||
lambda_Enet <- seq(0.01, 0.05, length = 10)
|
||||
|
||||
ElNet <- train(sugars ~ ., cookie.train,
|
||||
method = "glmnet",
|
||||
tuneGrid = expand.grid(alpha = alpha_Enet, lambda = lambda_Enet),
|
||||
preProc = c("center", "scale"),
|
||||
trControl = custom
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplotly(ggplot(ElNet))
|
||||
```
|
||||
|
||||
```{r}
|
||||
ElNet$results |> rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ElNet$bestTune
|
||||
```
|
||||
|
||||
```{r}
|
||||
ElNet$results[which.min(ElNet$results$RMSE), ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
par(mfrow = c(1, 2))
|
||||
plot(ElNet$finalModel, xvar = "lambda", label = T)
|
||||
plot(ElNet$finalModel, xvar = "dev", label = T)
|
||||
```
|
||||
|
||||
```{r}
|
||||
vip(ElNet, num_features = 20)
|
||||
```
|
||||
|
||||
```{r}
|
||||
coef_elnet <- data.frame(
|
||||
Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
|
||||
Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[, 1]
|
||||
)
|
||||
coef_elnet |>
|
||||
subset(Coefficient != 0) |>
|
||||
rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## PLS regression analysis
|
||||
|
||||
```{r}
|
||||
set.seed(602)
|
||||
pls_mod <- train(sugars ~ ., cookie.train,
|
||||
method = "pls",
|
||||
tuneLength = 20,
|
||||
preProc = c("center", "scale"),
|
||||
trControl = custom
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplotly(ggplot(pls_mod))
|
||||
```
|
||||
|
||||
```{r}
|
||||
pls_mod$results |> rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
pls_mod$bestTune
|
||||
```
|
||||
|
||||
```{r}
|
||||
pls_mod$results[which.min(pls_mod$results$RMSE), ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
vip(pls_mod, num_features = 20)
|
||||
```
|
||||
|
||||
```{r}
|
||||
data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) |>
|
||||
rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
# Models Comparaison
|
||||
|
||||
****
|
||||
|
||||
## Graphical comparison of model performance
|
||||
|
||||
### On the training set
|
||||
|
||||
```{r}
|
||||
yTrain <- cookie.train$sugars
|
||||
dTrain <- data.frame(yTrain = yTrain)
|
||||
dTrain$linear <- fitted(linear.mod)
|
||||
dTrain$Lasso <- fitted(Lasso)
|
||||
dTrain$ridge <- fitted(ridge)
|
||||
dTrain$ElNet <- fitted(ElNet)
|
||||
dTrain$pls <- fitted(pls_mod)
|
||||
melt.dTrain <- melt(dTrain, id = "yTrain", variable.name = "model")
|
||||
melt.dTrain |> ggplot() +
|
||||
aes(x = yTrain, y = value) +
|
||||
geom_smooth(method = "lm") +
|
||||
geom_point(size = 1, colour = "#983399") +
|
||||
facet_wrap(~model, nrow = 3) +
|
||||
ggtitle("Train dataset") +
|
||||
ylab("Fitted value") +
|
||||
xlab("Y")
|
||||
```
|
||||
|
||||
```{r}
|
||||
dTrain |> rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
melt.dTrain |> rmarkdown::paged_table()
|
||||
```
|
||||
|
||||
### On the test set
|
||||
|
||||
```{r}
|
||||
yTest <- cookie.test$sugars
|
||||
dTest <- data.frame(yTest = yTest)
|
||||
dTest$linear <- predict(linear.mod, newdata = cookie.test)
|
||||
dTest$Lasso <- predict(Lasso, newdata = cookie.test)
|
||||
dTest$ridge <- predict(ridge, newdata = cookie.test)
|
||||
dTest$ElNet <- predict(ElNet, newdata = cookie.test)
|
||||
dTest$pls <- predict(pls_mod, newdata = cookie.test)
|
||||
# dTest|> rmarkdown::paged_table()
|
||||
melt.dTest <- melt(dTest, id = "yTest", variable.name = "model")
|
||||
# melt.dTest|> rmarkdown::paged_table()
|
||||
melt.dTest |> ggplot() +
|
||||
aes(x = yTest, y = value) +
|
||||
geom_smooth(method = "lm") +
|
||||
geom_point(size = 1, colour = "#983399") +
|
||||
facet_wrap(~model, nrow = 3) +
|
||||
ggtitle("Test dataset") +
|
||||
ylab("Fitted value") +
|
||||
xlab("Y") +
|
||||
theme_bw()
|
||||
```
|
||||
|
||||
****
|
||||
|
||||
## RMSE comparaison among models
|
||||
|
||||
```{r}
|
||||
RMSE <- rbind.data.frame(
|
||||
cbind(rmse(yTrain, dTrain$linear), rmse(yTest, dTest$linear)),
|
||||
cbind(rmse(yTrain, dTrain$Lasso), rmse(yTest, dTest$Lasso)),
|
||||
cbind(rmse(yTrain, dTrain$ridge), rmse(yTest, dTest$ridge)),
|
||||
cbind(rmse(yTrain, dTrain$ElNet), rmse(yTest, dTest$ElNet)),
|
||||
cbind(rmse(yTrain, dTrain$pls), rmse(yTest, dTest$pls))
|
||||
)
|
||||
names(RMSE) <- c("Train", "Test")
|
||||
row.names(RMSE) <- c("Linear", "Lasso", "Ridge", "ElNet", "PLS")
|
||||
RMSE |>
|
||||
kableExtra::kbl() |>
|
||||
kableExtra::kable_styling()
|
||||
```
|
||||
|
||||
```{r}
|
||||
summary(yTrain)
|
||||
summary(yTest)
|
||||
```
|
||||
|
||||
```{r, echo=FALSE}
|
||||
LM_train <- round(rmse(yTrain, dTrain$linear), 2)
|
||||
LM_test <- round(rmse(yTest, dTest$linear), 2)
|
||||
Lasso_train <- round(rmse(yTrain, dTrain$Lasso), 2)
|
||||
Lasso_test <- round(rmse(yTest, dTest$Lasso), 2)
|
||||
Ridge_train <- round(rmse(yTrain, dTrain$ridge), 2)
|
||||
Ridge_test <- round(rmse(yTest, dTest$ridge), 2)
|
||||
ElNet_train <- round(rmse(yTrain, dTrain$ElNet), 2)
|
||||
ElNet_test <- round(rmse(yTest, dTest$ElNet), 2)
|
||||
PLS_train <- round(rmse(yTrain, dTrain$pls), 2)
|
||||
PLS_test <- round(rmse(yTest, dTest$pls), 2)
|
||||
```
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -43,7 +43,6 @@
|
||||
| `Deep Learning` | Neural networks and deep architectures |
|
||||
| `Enjeux Climatiques` | Climate issues and data analysis |
|
||||
| `Generative AI` | Generative models (LLMs, diffusion) |
|
||||
| `Linear Models` | Linear modeling techniques |
|
||||
| `Machine Learning` | Core machine learning algorithms |
|
||||
| `Machine Learning 2` | Applications of machine learning algorithms |
|
||||
| `Natural Language Processing` | NLP techniques and applications |
|
||||
|
||||
Reference in New Issue
Block a user