Remove 'Linear Models' section from README.md as it is no longer relevant to the current focus areas.

This commit is contained in:
2026-02-20 14:23:08 +01:00
parent 8c3d6b5f32
commit dedc4f24a6
5 changed files with 0 additions and 6309 deletions

View File

@@ -1,515 +0,0 @@
---
title: "Code: Biased regression"
subtitle: "Methods for Regression and classification"
author: "Katia Meziani"
output:
html_document:
self_contained: true
math_method:
engine: mathjax
url: https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
code_folding: hide
css: ./style.css
df_print: paged
highlight: tango
number_sections: no
theme: flatly
toc: yes
toc_float:
collapsed: no
---
<style>
.column {
width: 50%;
float: left;
}
</style>
<style>
.image-wrapper {
text-align: center; /* Centre toutes les images */
}
.image-container {
display: inline-block;
margin-right: 20px; /* Espacement entre les images */
text-align: center; /* Centre le texte sous l'image */
}
.image-container:last-child {
margin-right: 0; /* Pas d'espacement après la dernière image */
}
</style>
<style>
.attention {
font-size: 24px;
color: red;
font-weight: bold;
}
</style>
<style>
.underline {
text-decoration: underline;
}
</style>
<style contenteditable>
.brd {
border: 2px solid black; ; padding: 5px
}
</style>
<style>
.brdpurple {
border: 2px solid black;
padding: 10px; /* Espace interne */
background-color: rgba(128, 0, 128, 0.05); /* Fond violet très très clair avec transparence */
margin: 10px 0; /* Espace autour de l'encadrement */
}
</style>
<style contenteditable>
.brdred {
border: 2px solid #B22222 ;
padding: 10px; /* Ajouter de l'espace autour du texte */
background-color: #f9f9f9; /* Optionnel : couleur de fond pour le texte */
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
}
</style>
<style>
.solution {
font-size: 24px;
color: yellow; /* Couleur jaune pour simuler la lumière */
}
</style>
<style>
.underline {
text-decoration: underline;
}
</style>
<style>
.brdgreen {
border: 2px solid black; /* Bordure verte */
padding: 10px; /* Espace interne */
background-color: rgba(114, 213, 114, 0.03); /* Fond vert pastel très clair avec transparence */
margin: 10px 0; /* Espace autour de l'encadrement */
}
</style>
<style>
.brdblack {
border: 2px solid black; /* Définir la couleur et l'épaisseur de la bordure */
padding: 10px; /* Ajouter de l'espace autour du texte */
background-color: #f9f9f9; /* Optionnel : couleur de fond pour le texte */
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
}
</style>
<style>
.brdblackempty {
border: 2px solid black; /* Définir la couleur et l'épaisseur de la bordure */
padding: 10px; /* Ajouter de l'espace autour du texte */
margin: 10px 0; /* Ajouter de l'espace au-dessus et en-dessous de l'encadrement */
}
</style>
\newcommand{\Lasso}{\widehat{\beta}^{\lambda,L}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\1}{\mathbb{1}}
\newcommand{\RE}{\text{RE}}
\newcommand{\sign}{\text{sign}}
\newcommand{\e}{\epsilon}
\renewcommand{\P}{\text{P}}
```{r setup, include=FALSE}
knitr::opts_chunk$set(fig.align = "center", message = FALSE, warning = FALSE)
```
## Packages
We begin by loading the necessary packages for this analysis.
```{r message=FALSE,warning=FALSE}
library(glmnet) # for regularized regression
library(caret) # for training and evaluating models
library(ggplot2) # for data visualization
library(ggfortify) # to extend ggplot2 features for autoplot
library(reshape2) # for reshaping data
library(Metrics) # for calculating metrics like RMSE
library(vip) # for variable importance visualization
library(dplyr) # for data manipulation
library(tidyverse) # includes ggplot2, dplyr, and other useful packages
```
****
# The Dataset: Cookies
## Upload Datasets
```{r}
setwd("/Users/arthurdanjou/Workspace/studies/M2/Linear Models/Biaised Models")
```
```{r}
# Loading the training dataset
cookie.train <- read.csv("Cookies_Train.csv", header = TRUE, row.names = 1)
```
```{r}
# Loading the test dataset
cookie.test <- read.csv("Cookies_Test.csv", header = TRUE, row.names = 1)
```
## Custom Control Parameters
```{r}
custom <- trainControl(
method = "repeatedcv",
number = 10, # Using 5-fold cross-validation
repeats = 10, # Repeating 3 times for robustness
summaryFunction = defaultSummary, # Default metrics (RMSE, MAE)
allowParallel = TRUE # Use parallel processing if resources allow
)
```
****
# Models Study
****
## Linear regression analysis
```{r}
set.seed(602)
linear.mod <- train(sugars ~ ., cookie.train, method = "lm", preProc = c("center", "scale"), trControl = custom)
linear.mod$results
```
```{r}
Ytrain <- cookie.train$sugars
dfc_train <- data.frame(ytrain = Ytrain, linear.mod = fitted(linear.mod))
dfc_train |> rmarkdown::paged_table()
```
```{r}
dfc_train |>
ggplot(aes(x = ytrain, y = linear.mod)) +
geom_point(size = 2, color = "#983399") +
geom_smooth(method = "lm", color = "#389900") +
ggtitle("Train Dataset") +
ylab("Fitted Values") +
xlab("Actual Values (Y)")
```
```{r}
Ytest <- cookie.test$sugars
dfc_test <- data.frame(ytest = Ytest)
dfc_test$linear.mod <- predict(linear.mod, newdata = cookie.test)
# dfc_test|>rmarkdown::paged_table()
dfc_test |>
ggplot(aes(x = ytest, y = linear.mod)) +
geom_point(size = 2, color = "#983399") +
geom_smooth(method = "lm", color = "#389900") +
ggtitle("Test Dataset") +
ylab("Fitted Values") +
xlab("Actual Values (Y)")
```
****
## Lasso regression analysis
```{r}
set.seed(602)
# grid_Lasso <- seq(0.001, 0.1, length = 100)
grid_Lasso <- 10^seq(-4, 1, length = 100)
Lasso <- train(sugars ~ ., cookie.train,
method = "glmnet",
tuneGrid = expand.grid(alpha = 1, lambda = grid_Lasso),
preProc = c("center", "scale"),
trControl = custom
)
```
```{r}
library(plotly)
ggplotly(ggplot(Lasso))
```
```{r}
Lasso$results |> rmarkdown::paged_table()
```
```{r}
Lasso$bestTune
```
```{r}
Lasso$results[which.min(Lasso$results$RMSE), ]
```
```{r}
par(mfrow = c(1, 2))
plot(Lasso$finalModel, xvar = "lambda", label = TRUE)
plot(Lasso$finalModel, xvar = "dev", label = TRUE)
```
```{r}
library(vip)
vip(Lasso, num_features = 15)
```
```{r}
coef_lasso <- data.frame(
Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[, 1]
)
coef_lasso |>
subset(Coefficient != 0) |>
rmarkdown::paged_table()
```
****
## Ridge regression analysis
```{r}
set.seed(602)
lambda_ridge <- seq(11, 12, length = 100)
ridge <- train(sugars ~ .,
data = cookie.train,
method = "glmnet",
tuneGrid = expand.grid(alpha = 0, lambda = lambda_ridge),
preProc = c("center", "scale"),
trControl = custom
)
```
```{r}
library(plotly)
ggplotly(ggplot(ridge))
```
```{r}
ridge$results |> rmarkdown::paged_table()
```
```{r}
ridge$bestTune
```
```{r}
ridge$results[which.min(ridge$results$RMSE), ]
```
```{r}
par(mfrow = c(1, 2))
plot(ridge$finalModel, xvar = "lambda", label = TRUE)
plot(ridge$finalModel, xvar = "dev", label = TRUE)
```
```{r}
vip(ridge, num_features = 15)
```
```{r}
data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) |>
rmarkdown::paged_table()
```
****
## ElasticNet regression analysis
```{r}
set.seed(602)
alpha_Enet <- seq(0.5, 0.9, length = 10)
lambda_Enet <- seq(0.01, 0.05, length = 10)
ElNet <- train(sugars ~ ., cookie.train,
method = "glmnet",
tuneGrid = expand.grid(alpha = alpha_Enet, lambda = lambda_Enet),
preProc = c("center", "scale"),
trControl = custom
)
```
```{r}
ggplotly(ggplot(ElNet))
```
```{r}
ElNet$results |> rmarkdown::paged_table()
```
```{r}
ElNet$bestTune
```
```{r}
ElNet$results[which.min(ElNet$results$RMSE), ]
```
```{r}
par(mfrow = c(1, 2))
plot(ElNet$finalModel, xvar = "lambda", label = T)
plot(ElNet$finalModel, xvar = "dev", label = T)
```
```{r}
vip(ElNet, num_features = 20)
```
```{r}
coef_elnet <- data.frame(
Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[, 1]
)
coef_elnet |>
subset(Coefficient != 0) |>
rmarkdown::paged_table()
```
****
## PLS regression analysis
```{r}
set.seed(602)
pls_mod <- train(sugars ~ ., cookie.train,
method = "pls",
tuneLength = 20,
preProc = c("center", "scale"),
trControl = custom
)
```
```{r}
ggplotly(ggplot(pls_mod))
```
```{r}
pls_mod$results |> rmarkdown::paged_table()
```
```{r}
pls_mod$bestTune
```
```{r}
pls_mod$results[which.min(pls_mod$results$RMSE), ]
```
```{r}
vip(pls_mod, num_features = 20)
```
```{r}
data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) |>
rmarkdown::paged_table()
```
****
# Models Comparaison
****
## Graphical comparison of model performance
### On the training set
```{r}
yTrain <- cookie.train$sugars
dTrain <- data.frame(yTrain = yTrain)
dTrain$linear <- fitted(linear.mod)
dTrain$Lasso <- fitted(Lasso)
dTrain$ridge <- fitted(ridge)
dTrain$ElNet <- fitted(ElNet)
dTrain$pls <- fitted(pls_mod)
melt.dTrain <- melt(dTrain, id = "yTrain", variable.name = "model")
melt.dTrain |> ggplot() +
aes(x = yTrain, y = value) +
geom_smooth(method = "lm") +
geom_point(size = 1, colour = "#983399") +
facet_wrap(~model, nrow = 3) +
ggtitle("Train dataset") +
ylab("Fitted value") +
xlab("Y")
```
```{r}
dTrain |> rmarkdown::paged_table()
```
```{r}
melt.dTrain |> rmarkdown::paged_table()
```
### On the test set
```{r}
yTest <- cookie.test$sugars
dTest <- data.frame(yTest = yTest)
dTest$linear <- predict(linear.mod, newdata = cookie.test)
dTest$Lasso <- predict(Lasso, newdata = cookie.test)
dTest$ridge <- predict(ridge, newdata = cookie.test)
dTest$ElNet <- predict(ElNet, newdata = cookie.test)
dTest$pls <- predict(pls_mod, newdata = cookie.test)
# dTest|> rmarkdown::paged_table()
melt.dTest <- melt(dTest, id = "yTest", variable.name = "model")
# melt.dTest|> rmarkdown::paged_table()
melt.dTest |> ggplot() +
aes(x = yTest, y = value) +
geom_smooth(method = "lm") +
geom_point(size = 1, colour = "#983399") +
facet_wrap(~model, nrow = 3) +
ggtitle("Test dataset") +
ylab("Fitted value") +
xlab("Y") +
theme_bw()
```
****
## RMSE comparaison among models
```{r}
RMSE <- rbind.data.frame(
cbind(rmse(yTrain, dTrain$linear), rmse(yTest, dTest$linear)),
cbind(rmse(yTrain, dTrain$Lasso), rmse(yTest, dTest$Lasso)),
cbind(rmse(yTrain, dTrain$ridge), rmse(yTest, dTest$ridge)),
cbind(rmse(yTrain, dTrain$ElNet), rmse(yTest, dTest$ElNet)),
cbind(rmse(yTrain, dTrain$pls), rmse(yTest, dTest$pls))
)
names(RMSE) <- c("Train", "Test")
row.names(RMSE) <- c("Linear", "Lasso", "Ridge", "ElNet", "PLS")
RMSE |>
kableExtra::kbl() |>
kableExtra::kable_styling()
```
```{r}
summary(yTrain)
summary(yTest)
```
```{r, echo=FALSE}
LM_train <- round(rmse(yTrain, dTrain$linear), 2)
LM_test <- round(rmse(yTest, dTest$linear), 2)
Lasso_train <- round(rmse(yTrain, dTrain$Lasso), 2)
Lasso_test <- round(rmse(yTest, dTest$Lasso), 2)
Ridge_train <- round(rmse(yTrain, dTrain$ridge), 2)
Ridge_test <- round(rmse(yTest, dTest$ridge), 2)
ElNet_train <- round(rmse(yTrain, dTrain$ElNet), 2)
ElNet_test <- round(rmse(yTest, dTest$ElNet), 2)
PLS_train <- round(rmse(yTrain, dTrain$pls), 2)
PLS_test <- round(rmse(yTest, dTest$pls), 2)
```

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -43,7 +43,6 @@
| `Deep Learning` | Neural networks and deep architectures |
| `Enjeux Climatiques` | Climate issues and data analysis |
| `Generative AI` | Generative models (LLMs, diffusion) |
| `Linear Models` | Linear modeling techniques |
| `Machine Learning` | Core machine learning algorithms |
| `Machine Learning 2` | Applications of machine learning algorithms |
| `Natural Language Processing` | NLP techniques and applications |