Ajout de l'implémentation de Bagging et Boosting pour l'analyse des données de Boston, avec des visualisations et des évaluations de performance.

This commit is contained in:
2026-02-16 11:29:46 +01:00
parent 14390f51e9
commit dc743105f3
2 changed files with 118 additions and 79 deletions

View File

@@ -0,0 +1,118 @@
```{r}
library(randomForest)
library(MASS)
library(tree)
data(Boston)
```
## Sampling
```{r}
set.seed(123)
appr <- sample(seq_len(nrow(Boston)), nrow(Boston) / 2)
```
## Regression Tree
```{r}
arbre_boston <- tree(medv ~ ., Boston, subset = appr)
summary(arbre_boston)
plot(arbre_boston)
text(arbre_boston, pretty = 0)
```
```{r}
cv_boston <- cv.tree(arbre_boston)
plot(cv_boston$size, cv_boston$dev, type = "b")
```
```{r}
yprev <- predict(arbre_boston, newdata = Boston[-appr, ])
boston_test <- Boston[-appr, "medv"]
plot(yprev, boston_test)
abline(0, 1)
mean((yprev - boston_test)^2)
```
## Bagging
```{r}
set.seed(123)
bag_boston <- randomForest(
medv ~ .,
data = Boston,
subset = appr,
mtry = 13,
importance = TRUE
)
bag_boston
yprev_bag <- predict(bag_boston, newdata = Boston[-appr, ])
plot(yprev_bag, boston_test)
abline(0, 1)
mean((yprev_bag - boston_test)^2)
```
## Random Forest
```{r}
set.seed(123)
rf_boston <- randomForest(
medv ~ .,
data = Boston,
subset = appr,
mtry = 6,
importance = TRUE
)
yprev_rf <- predict(rf_boston, newdata = Boston[-appr, ])
mean((yprev_rf - boston_test)^2)
```
## Variable importance
```{r}
importance(rf_boston)
varImpPlot(rf_boston)
```
# Boosting
```{r}
library(gbm)
set.seed(1)
boost_boston <- gbm(
medv ~ .,
data = Boston[appr, ],
distribution = "gaussian",
n.trees = 5000,
interaction.depth = 4
)
summary(boost_boston)
par(mfrow = c(1, 2))
plot(boost_boston, i = "rm")
plot(boost_boston, i = "lstat")
```
```{r}
yhat_boost <- predict(boost_boston, newdata = Boston[-appr, ], n.trees = 5000)
mean((yhat_boost - boston_test)^2)
```
```{r}
boost_boston <- gbm(
medv ~ .,
data = Boston[appr, ],
distribution = "gaussian",
n.trees = 5000,
interaction.depth = 4,
shrinkage = 0.2,
verbose = FALSE
)
yhat_boost <- predict(boost_boston, newdata = Boston[-appr, ], n.trees = 5000)
mean((yhat_boost - boston_test)^2)
```

View File

@@ -1,79 +0,0 @@
```{r}
library(randomForest)
library(MASS)
library(tree)
data(Boston)
```
## Sampling
```{r}
set.seed(123)
appr = sample(1:nrow(Boston), nrow(Boston) / 2)
```
## Regression Tree
```{r}
arbre.boston = tree(medv ~ ., Boston, subset = appr)
summary(arbre.boston)
plot(arbre.boston)
text(arbre.boston, pretty = 0)
```
```{r}
cv.boston = cv.tree(arbre.boston)
plot(cv.boston$size, cv.boston$dev, type = 'b')
```
```{r}
yprev = predict(arbre.boston, newdata = Boston[-appr, ])
boston.test = Boston[-appr, "medv"]
plot(yprev, boston.test)
abline(0, 1)
mean((yprev - boston.test)^2)
```
## Bagging
```{r}
set.seed(123)
bag.boston = randomForest(
medv ~ .,
data = Boston,
subset = appr,
mtry = 13,
importance = TRUE
)
bag.boston
yprev.bag = predict(bag.boston, newdata = Boston[-appr, ])
plot(yprev.bag, boston.test)
abline(0, 1)
mean((yprev.bag - boston.test)^2)
```
## Random Forest
```{r}
set.seed(123)
rf.boston = randomForest(
medv ~ .,
data = Boston,
subset = appr,
mtry = 6,
importance = TRUE
)
yprev.rf = predict(rf.boston, newdata = Boston[-appr, ])
mean((yprev.rf - boston.test)^2)
```
## Variable importance
```{r}
importance(rf.boston)
varImpPlot(rf.boston)
```