We begin by loading the necessary packages for this analysis.
library(glmnet) # for regularized regression
library(caret) # for training and evaluating models
library(ggplot2) # for data visualization
library(ggfortify) # to extend ggplot2 features for autoplot
library(reshape2) # for reshaping data
library(Metrics) # for calculating metrics like RMSE
library(vip) # for variable importance visualization
library(dplyr) # for data manipulation
library(tidyverse) # includes ggplot2, dplyr, and other useful packagesset.seed(602)
linear.mod <- train(sugars~.,cookie.train,
method='lm',
preProc = c("center", "scale"),
trControl=custom)
linear.mod$resultsYtrain <- cookie.train$sugars
dfc_train <- data.frame(ytrain=Ytrain, linear.mod = fitted(linear.mod))
dfc_train %>% rmarkdown::paged_table()dfc_train %>%
ggplot(aes(x = ytrain, y = linear.mod)) +
geom_point(size = 2, color = "#983399") +
geom_smooth(method = "lm", color = "#389900") +
ggtitle("Train Dataset") +
ylab("Fitted Values") +
xlab("Actual Values (Y)") Ytest<-cookie.test$sugars
dfc_test<-data.frame(ytest=Ytest)
dfc_test$linear.mod<-predict(linear.mod,newdata = cookie.test)
#dfc_test%>%rmarkdown::paged_table()
dfc_test %>%
ggplot(aes(x = ytest, y = linear.mod)) +
geom_point(size = 2, color = "#983399") +
geom_smooth(method = "lm", color = "#389900") +
ggtitle("Test Dataset") +
ylab("Fitted Values") +
xlab("Actual Values (Y)")set.seed(602)
#grid_Lasso <- seq(0.001, 0.1, length = 100)
grid_Lasso <- 10^seq(-4, -1, length = 100)
Lasso <- train(sugars ~ ., cookie.train,
method = 'glmnet',
tuneGrid = expand.grid(alpha = 1, lambda = grid_Lasso),
preProc = c("center", "scale"),
trControl = custom)par(mfrow=c(1, 2))
plot(Lasso$finalModel, xvar = "lambda", label = TRUE)
plot(Lasso$finalModel, xvar = "dev", label = TRUE)coef_lasso <- data.frame(
Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[,1]
)
coef_lasso %>% subset(Coefficient != 0) %>% rmarkdown::paged_table()set.seed(602)
lambda_ridge <- seq(11, 12, length = 100)
ridge <- train(sugars ~ ., data = cookie.train,
method = 'glmnet',
tuneGrid = expand.grid(alpha = 0, lambda = lambda_ridge),
preProc = c("center", "scale"),
trControl = custom
)par(mfrow=c(1, 2))
plot(ridge$finalModel, xvar = "lambda", label = TRUE)
plot(ridge$finalModel, xvar = "dev", label = TRUE)set.seed(602)
alpha_Enet <- seq(0.5, 0.9, length = 10)
lambda_Enet <- seq(0.01, 0.05, length = 10)
ElNet <- train(sugars ~ ., cookie.train,
method = 'glmnet',
tuneGrid = expand.grid(alpha = alpha_Enet, lambda = lambda_Enet),
preProc = c("center", "scale"),
trControl = custom)par(mfrow=c(1, 2))
plot(ElNet$finalModel,xvar="lambda",label=T)
plot(ElNet$finalModel,xvar="dev",label=T)coef_elnet <- data.frame(
Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[,1]
)
coef_elnet %>% subset(Coefficient != 0) %>% rmarkdown::paged_table()yTrain<-cookie.train$sugars
dTrain<-data.frame(yTrain=yTrain)
dTrain$linear<-fitted(linear.mod)
dTrain$Lasso<-fitted(Lasso)
dTrain$ridge<-fitted(ridge)
dTrain$ElNet<-fitted(ElNet)
dTrain$pls<-fitted(pls_mod)
melt.dTrain<-melt(dTrain,id="yTrain",variable.name="model")
melt.dTrain%>%ggplot()+aes(x = yTrain, y = value)+geom_smooth(method = "lm")+ geom_point(size=1, colour="#983399")+ facet_wrap( ~ model,nrow=3) + ggtitle("Train dataset")+
ylab("Fitted value") + xlab(" Y ")yTest<-cookie.test$sugars
dTest<-data.frame(yTest=yTest)
dTest$linear<-predict(linear.mod, newdata = cookie.test)
dTest$Lasso<-predict(Lasso, newdata = cookie.test)
dTest$ridge<-predict(ridge, newdata = cookie.test)
dTest$ElNet<-predict(ElNet, newdata = cookie.test)
dTest$pls<- predict(pls_mod, newdata = cookie.test)
#dTest%>% rmarkdown::paged_table()
melt.dTest<-melt(dTest,id="yTest",variable.name="model")
#melt.dTest%>% rmarkdown::paged_table()
melt.dTest%>%ggplot()+aes(x = yTest, y = value)+geom_smooth(method = "lm")+ geom_point(size=1, colour="#983399")+ facet_wrap( ~ model,nrow=3) + ggtitle("Test dataset")+
ylab("Fitted value") + xlab(" Y ") + theme_bw()RMSE<-rbind.data.frame(
cbind(rmse(yTrain,dTrain$linear),rmse(yTest,dTest$linear)),
cbind(rmse(yTrain,dTrain$Lasso),rmse(yTest,dTest$Lasso )),
cbind(rmse(yTrain,dTrain$ridge),rmse(yTest,dTest$ridge )),
cbind(rmse(yTrain,dTrain$ElNet),rmse(yTest,dTest$ElNet)),
cbind(rmse(yTrain,dTrain$pls),rmse(yTest,dTest$pls )))
names(RMSE)=c("Train","Test")
row.names(RMSE)=c("Linear","Lasso","Ridge","ElNet","PLS")
RMSE%>% kableExtra::kbl() %>% kableExtra::kable_styling()| Train | Test | |
|---|---|---|
| Linear | 0.000000 | 11.598358 |
| Lasso | 1.125158 | 1.060734 |
| Ridge | 2.526064 | 2.594585 |
| ElNet | 1.133296 | 1.095012 |
| PLS | 2.271071 | 1.306989 |
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.95 13.32 16.36 16.54 19.82 23.11
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.12 13.38 16.66 16.66 19.93 23.19