diff --git a/M2/Linear Models/Biaised Models/Code_Lec3.Rmd b/M2/Linear Models/Biaised Models/Code_Lec3.Rmd index eab533d..c0d2a6d 100644 --- a/M2/Linear Models/Biaised Models/Code_Lec3.Rmd +++ b/M2/Linear Models/Biaised Models/Code_Lec3.Rmd @@ -229,7 +229,7 @@ dfc_test %>% ```{r} set.seed(602) # grid_Lasso <- seq(0.001, 0.1, length = 100) -grid_Lasso <- 10^seq(0, 0.1, length = 100) +grid_Lasso <- 10^seq(-4, 1, length = 100) Lasso <- train(sugars ~ ., cookie.train, method = "glmnet", tuneGrid = expand.grid(alpha = 1, lambda = grid_Lasso), diff --git a/M2/Linear Models/Biaised Models/Code_Lec3.html b/M2/Linear Models/Biaised Models/Code_Lec3.html index 8094372..809b252 100644 --- a/M2/Linear Models/Biaised Models/Code_Lec3.html +++ b/M2/Linear Models/Biaised Models/Code_Lec3.html @@ -5204,35 +5204,36 @@ margin: 10px 0;

Packages

We begin by loading the necessary packages for this analysis.

-
library(glmnet)    # for regularized regression
-library(caret)      # for training and evaluating models
-library(ggplot2)    # for data visualization
-library(ggfortify)  # to extend ggplot2 features for autoplot
-library(reshape2)   # for reshaping data
-library(Metrics)    # for calculating metrics like RMSE
-library(vip)        # for variable importance visualization
-library(dplyr)      # for data manipulation
-library(tidyverse)  # includes ggplot2, dplyr, and other useful packages
+
library(glmnet) # for regularized regression
+library(caret) # for training and evaluating models
+library(ggplot2) # for data visualization
+library(ggfortify) # to extend ggplot2 features for autoplot
+library(reshape2) # for reshaping data
+library(Metrics) # for calculating metrics like RMSE
+library(vip) # for variable importance visualization
+library(dplyr) # for data manipulation
+library(tidyverse) # includes ggplot2, dplyr, and other useful packages

The Dataset: Cookies

Upload Datasets

-
# Loading the training dataset
-cookie.train <- read.csv('Cookies_Train.csv', header = TRUE, row.names = 1)
-
# Loading the test dataset
-cookie.test <- read.csv('Cookies_Test.csv', header = TRUE, row.names = 1)
+
setwd("/Users/arthurdanjou/Workspace/studies/M2/Linear Models/Biaised Models")
+
# Loading the training dataset
+cookie.train <- read.csv("Cookies_Train.csv", header = TRUE, row.names = 1)
+
# Loading the test dataset
+cookie.test <- read.csv("Cookies_Test.csv", header = TRUE, row.names = 1)

Custom Control Parameters

-
custom <- trainControl(
-  method = 'repeatedcv',
-  number = 5,  # Using 5-fold cross-validation
-  repeats = 3,  # Repeating 3 times for robustness
-  summaryFunction = defaultSummary,  # Default metrics (RMSE, MAE)
-  allowParallel = TRUE  # Use parallel processing if resources allow
-)
+
custom <- trainControl(
+  method = "repeatedcv",
+  number = 5, # Using 5-fold cross-validation
+  repeats = 3, # Repeating 3 times for robustness
+  summaryFunction = defaultSummary, # Default metrics (RMSE, MAE)
+  allowParallel = TRUE # Use parallel processing if resources allow
+)

@@ -5241,92 +5242,92 @@ margin: 10px 0;

Linear regression analysis

-
set.seed(602)
-linear.mod <- train(sugars~.,cookie.train,
-                  method='lm',
-                  preProc = c("center", "scale"),
-                  trControl=custom)
-linear.mod$results
+
set.seed(602)
+linear.mod <- train(sugars ~ ., cookie.train, method = "lm", preProc = c("center", "scale"), trControl = custom)
+linear.mod$results
-
Ytrain <- cookie.train$sugars
-dfc_train <- data.frame(ytrain=Ytrain, linear.mod = fitted(linear.mod))
-dfc_train %>% rmarkdown::paged_table()
+
Ytrain <- cookie.train$sugars
+dfc_train <- data.frame(ytrain = Ytrain, linear.mod = fitted(linear.mod))
+dfc_train %>% rmarkdown::paged_table()
-
dfc_train %>% 
-  ggplot(aes(x = ytrain, y = linear.mod)) + 
-  geom_point(size = 2, color = "#983399") +
-  geom_smooth(method = "lm", color = "#389900") +
-  ggtitle("Train Dataset") +
-  ylab("Fitted Values") + 
-  xlab("Actual Values (Y)") 
+
dfc_train %>%
+  ggplot(aes(x = ytrain, y = linear.mod)) +
+  geom_point(size = 2, color = "#983399") +
+  geom_smooth(method = "lm", color = "#389900") +
+  ggtitle("Train Dataset") +
+  ylab("Fitted Values") +
+  xlab("Actual Values (Y)")

-
Ytest<-cookie.test$sugars
-dfc_test<-data.frame(ytest=Ytest)
-dfc_test$linear.mod<-predict(linear.mod,newdata = cookie.test)
-#dfc_test%>%rmarkdown::paged_table()
-
-dfc_test %>% 
-  ggplot(aes(x = ytest, y = linear.mod)) + 
-  geom_point(size = 2, color = "#983399") +
-  geom_smooth(method = "lm", color = "#389900") +
-  ggtitle("Test Dataset") +
-  ylab("Fitted Values") + 
-  xlab("Actual Values (Y)")
+
Ytest <- cookie.test$sugars
+dfc_test <- data.frame(ytest = Ytest)
+dfc_test$linear.mod <- predict(linear.mod, newdata = cookie.test)
+# dfc_test%>%rmarkdown::paged_table()
+
+dfc_test %>%
+  ggplot(aes(x = ytest, y = linear.mod)) +
+  geom_point(size = 2, color = "#983399") +
+  geom_smooth(method = "lm", color = "#389900") +
+  ggtitle("Test Dataset") +
+  ylab("Fitted Values") +
+  xlab("Actual Values (Y)")


Lasso regression analysis

-
set.seed(602)
-#grid_Lasso <- seq(0.001, 0.1, length = 100)
-grid_Lasso <- 10^seq(-4, -1, length = 100) 
-Lasso <- train(sugars ~ ., cookie.train,
-               method = 'glmnet',
-               tuneGrid = expand.grid(alpha = 1, lambda = grid_Lasso),
-               preProc = c("center", "scale"),
-               trControl = custom)
-
library(plotly)
-ggplotly(ggplot(Lasso))
+
set.seed(602)
+# grid_Lasso <- seq(0.001, 0.1, length = 100)
+grid_Lasso <- 10^seq(-4, 1, length = 100)
+Lasso <- train(sugars ~ ., cookie.train,
+  method = "glmnet",
+  tuneGrid = expand.grid(alpha = 1, lambda = grid_Lasso),
+  preProc = c("center", "scale"),
+  trControl = custom
+)
+
library(plotly)
+ggplotly(ggplot(Lasso))
- -
Lasso$results%>%rmarkdown::paged_table()
+ +
Lasso$results %>% rmarkdown::paged_table()
-
Lasso$bestTune
+
Lasso$bestTune
-
Lasso$results[which.min(Lasso$results$RMSE), ]
+
Lasso$results[which.min(Lasso$results$RMSE), ]
-
par(mfrow=c(1, 2))
-plot(Lasso$finalModel, xvar = "lambda", label = TRUE)
-plot(Lasso$finalModel, xvar = "dev", label = TRUE)
+
par(mfrow = c(1, 2))
+plot(Lasso$finalModel, xvar = "lambda", label = TRUE)
+plot(Lasso$finalModel, xvar = "dev", label = TRUE)

-
library(vip)    
-vip(Lasso,num_features = 15)
+
library(vip)
+vip(Lasso, num_features = 15)

-
coef_lasso <- data.frame(
-  Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
-  Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[,1]
-)
-coef_lasso %>% subset(Coefficient != 0) %>% rmarkdown::paged_table()
+
coef_lasso <- data.frame(
+  Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
+  Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[, 1]
+)
+coef_lasso %>%
+  subset(Coefficient != 0) %>%
+  rmarkdown::paged_table()
-
ridge$results%>%rmarkdown::paged_table()
+ +
ridge$results %>% rmarkdown::paged_table()
-
ridge$bestTune
+
ridge$bestTune
-
ridge$results[which.min(ridge$results$RMSE), ]
+
ridge$results[which.min(ridge$results$RMSE), ]
-
par(mfrow=c(1, 2))
-plot(ridge$finalModel, xvar = "lambda", label = TRUE)
-plot(ridge$finalModel, xvar = "dev", label = TRUE)
+
par(mfrow = c(1, 2))
+plot(ridge$finalModel, xvar = "lambda", label = TRUE)
+plot(ridge$finalModel, xvar = "dev", label = TRUE)

-
vip(ridge,num_features = 15)
+
vip(ridge, num_features = 15)

-
data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) %>%
-  rmarkdown::paged_table()
+
data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) %>%
+  rmarkdown::paged_table()
-
ElNet$results%>%rmarkdown::paged_table()
+ +
ElNet$results %>% rmarkdown::paged_table()
-
ElNet$bestTune
+
ElNet$bestTune
-
ElNet$results[which.min(ElNet$results$RMSE), ]
+
ElNet$results[which.min(ElNet$results$RMSE), ]
-
par(mfrow=c(1, 2))
-plot(ElNet$finalModel,xvar="lambda",label=T)
-plot(ElNet$finalModel,xvar="dev",label=T)
+
par(mfrow = c(1, 2))
+plot(ElNet$finalModel, xvar = "lambda", label = T)
+plot(ElNet$finalModel, xvar = "dev", label = T)

-
vip(ElNet,num_features = 20)
+
vip(ElNet, num_features = 20)

-
coef_elnet <- data.frame(
-  Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
-  Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[,1]
-)
-coef_elnet %>% subset(Coefficient != 0) %>% rmarkdown::paged_table()
+
coef_elnet <- data.frame(
+  Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
+  Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[, 1]
+)
+coef_elnet %>%
+  subset(Coefficient != 0) %>%
+  rmarkdown::paged_table()
-
pls_mod$results%>%rmarkdown::paged_table()
+ +
pls_mod$results %>% rmarkdown::paged_table()
-
pls_mod$bestTune
+
pls_mod$bestTune
-
pls_mod$results[which.min(pls_mod$results$RMSE), ]
+
pls_mod$results[which.min(pls_mod$results$RMSE), ]
-
vip(pls_mod,num_features = 20)
+
vip(pls_mod, num_features = 20)

-
data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) %>% rmarkdown::paged_table()
+
data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) %>%
+  rmarkdown::paged_table()
-
melt.dTrain%>% rmarkdown::paged_table()
+
melt.dTrain %>% rmarkdown::paged_table()