mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-03-16 03:11:46 +01:00
Ajout de l'implémentation de Bagging et Boosting pour l'analyse des données de Boston, avec des visualisations et des évaluations de performance.
This commit is contained in:
118
M2/Machine Learning 2/Bagging & Boosting.Rmd
Normal file
118
M2/Machine Learning 2/Bagging & Boosting.Rmd
Normal file
@@ -0,0 +1,118 @@
|
||||
```{r}
|
||||
library(randomForest)
|
||||
library(MASS)
|
||||
library(tree)
|
||||
data(Boston)
|
||||
```
|
||||
|
||||
## Sampling
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
appr <- sample(seq_len(nrow(Boston)), nrow(Boston) / 2)
|
||||
```
|
||||
|
||||
## Regression Tree
|
||||
|
||||
```{r}
|
||||
arbre_boston <- tree(medv ~ ., Boston, subset = appr)
|
||||
summary(arbre_boston)
|
||||
plot(arbre_boston)
|
||||
text(arbre_boston, pretty = 0)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
cv_boston <- cv.tree(arbre_boston)
|
||||
plot(cv_boston$size, cv_boston$dev, type = "b")
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
yprev <- predict(arbre_boston, newdata = Boston[-appr, ])
|
||||
boston_test <- Boston[-appr, "medv"]
|
||||
plot(yprev, boston_test)
|
||||
abline(0, 1)
|
||||
mean((yprev - boston_test)^2)
|
||||
```
|
||||
|
||||
## Bagging
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
|
||||
bag_boston <- randomForest(
|
||||
medv ~ .,
|
||||
data = Boston,
|
||||
subset = appr,
|
||||
mtry = 13,
|
||||
importance = TRUE
|
||||
)
|
||||
bag_boston
|
||||
yprev_bag <- predict(bag_boston, newdata = Boston[-appr, ])
|
||||
plot(yprev_bag, boston_test)
|
||||
abline(0, 1)
|
||||
mean((yprev_bag - boston_test)^2)
|
||||
```
|
||||
|
||||
## Random Forest
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
rf_boston <- randomForest(
|
||||
medv ~ .,
|
||||
data = Boston,
|
||||
subset = appr,
|
||||
mtry = 6,
|
||||
importance = TRUE
|
||||
)
|
||||
yprev_rf <- predict(rf_boston, newdata = Boston[-appr, ])
|
||||
mean((yprev_rf - boston_test)^2)
|
||||
```
|
||||
|
||||
## Variable importance
|
||||
|
||||
```{r}
|
||||
importance(rf_boston)
|
||||
varImpPlot(rf_boston)
|
||||
```
|
||||
|
||||
# Boosting
|
||||
|
||||
```{r}
|
||||
library(gbm)
|
||||
set.seed(1)
|
||||
boost_boston <- gbm(
|
||||
medv ~ .,
|
||||
data = Boston[appr, ],
|
||||
distribution = "gaussian",
|
||||
n.trees = 5000,
|
||||
interaction.depth = 4
|
||||
)
|
||||
|
||||
summary(boost_boston)
|
||||
|
||||
par(mfrow = c(1, 2))
|
||||
plot(boost_boston, i = "rm")
|
||||
plot(boost_boston, i = "lstat")
|
||||
```
|
||||
|
||||
```{r}
|
||||
yhat_boost <- predict(boost_boston, newdata = Boston[-appr, ], n.trees = 5000)
|
||||
mean((yhat_boost - boston_test)^2)
|
||||
```
|
||||
|
||||
```{r}
|
||||
boost_boston <- gbm(
|
||||
medv ~ .,
|
||||
data = Boston[appr, ],
|
||||
distribution = "gaussian",
|
||||
n.trees = 5000,
|
||||
interaction.depth = 4,
|
||||
shrinkage = 0.2,
|
||||
verbose = FALSE
|
||||
)
|
||||
|
||||
yhat_boost <- predict(boost_boston, newdata = Boston[-appr, ], n.trees = 5000)
|
||||
mean((yhat_boost - boston_test)^2)
|
||||
```
|
||||
@@ -1,79 +0,0 @@
|
||||
```{r}
|
||||
library(randomForest)
|
||||
library(MASS)
|
||||
library(tree)
|
||||
data(Boston)
|
||||
```
|
||||
|
||||
## Sampling
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
appr = sample(1:nrow(Boston), nrow(Boston) / 2)
|
||||
```
|
||||
|
||||
## Regression Tree
|
||||
|
||||
```{r}
|
||||
arbre.boston = tree(medv ~ ., Boston, subset = appr)
|
||||
summary(arbre.boston)
|
||||
plot(arbre.boston)
|
||||
text(arbre.boston, pretty = 0)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
cv.boston = cv.tree(arbre.boston)
|
||||
plot(cv.boston$size, cv.boston$dev, type = 'b')
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
yprev = predict(arbre.boston, newdata = Boston[-appr, ])
|
||||
boston.test = Boston[-appr, "medv"]
|
||||
plot(yprev, boston.test)
|
||||
abline(0, 1)
|
||||
mean((yprev - boston.test)^2)
|
||||
```
|
||||
|
||||
## Bagging
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
|
||||
bag.boston = randomForest(
|
||||
medv ~ .,
|
||||
data = Boston,
|
||||
subset = appr,
|
||||
mtry = 13,
|
||||
importance = TRUE
|
||||
)
|
||||
bag.boston
|
||||
yprev.bag = predict(bag.boston, newdata = Boston[-appr, ])
|
||||
plot(yprev.bag, boston.test)
|
||||
abline(0, 1)
|
||||
mean((yprev.bag - boston.test)^2)
|
||||
```
|
||||
|
||||
## Random Forest
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
rf.boston = randomForest(
|
||||
medv ~ .,
|
||||
data = Boston,
|
||||
subset = appr,
|
||||
mtry = 6,
|
||||
importance = TRUE
|
||||
)
|
||||
yprev.rf = predict(rf.boston, newdata = Boston[-appr, ])
|
||||
mean((yprev.rf - boston.test)^2)
|
||||
```
|
||||
|
||||
## Variable importance
|
||||
|
||||
|
||||
```{r}
|
||||
importance(rf.boston)
|
||||
varImpPlot(rf.boston)
|
||||
```
|
||||
Reference in New Issue
Block a user