mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-03-16 05:11:40 +01:00
119 lines
1.9 KiB
Plaintext
119 lines
1.9 KiB
Plaintext
```{r}
|
|
library(randomForest)
|
|
library(MASS)
|
|
library(tree)
|
|
data(Boston)
|
|
```
|
|
|
|
## Sampling
|
|
|
|
```{r}
|
|
set.seed(123)
|
|
appr <- sample(seq_len(nrow(Boston)), nrow(Boston) / 2)
|
|
```
|
|
|
|
## Regression Tree
|
|
|
|
```{r}
|
|
arbre_boston <- tree(medv ~ ., Boston, subset = appr)
|
|
summary(arbre_boston)
|
|
plot(arbre_boston)
|
|
text(arbre_boston, pretty = 0)
|
|
```
|
|
|
|
|
|
```{r}
|
|
cv_boston <- cv.tree(arbre_boston)
|
|
plot(cv_boston$size, cv_boston$dev, type = "b")
|
|
```
|
|
|
|
|
|
```{r}
|
|
yprev <- predict(arbre_boston, newdata = Boston[-appr, ])
|
|
boston_test <- Boston[-appr, "medv"]
|
|
plot(yprev, boston_test)
|
|
abline(0, 1)
|
|
mean((yprev - boston_test)^2)
|
|
```
|
|
|
|
## Bagging
|
|
|
|
```{r}
|
|
set.seed(123)
|
|
|
|
bag_boston <- randomForest(
|
|
medv ~ .,
|
|
data = Boston,
|
|
subset = appr,
|
|
mtry = 13,
|
|
importance = TRUE
|
|
)
|
|
bag_boston
|
|
yprev_bag <- predict(bag_boston, newdata = Boston[-appr, ])
|
|
plot(yprev_bag, boston_test)
|
|
abline(0, 1)
|
|
mean((yprev_bag - boston_test)^2)
|
|
```
|
|
|
|
## Random Forest
|
|
|
|
```{r}
|
|
set.seed(123)
|
|
rf_boston <- randomForest(
|
|
medv ~ .,
|
|
data = Boston,
|
|
subset = appr,
|
|
mtry = 6,
|
|
importance = TRUE
|
|
)
|
|
yprev_rf <- predict(rf_boston, newdata = Boston[-appr, ])
|
|
mean((yprev_rf - boston_test)^2)
|
|
```
|
|
|
|
## Variable importance
|
|
|
|
```{r}
|
|
importance(rf_boston)
|
|
varImpPlot(rf_boston)
|
|
```
|
|
|
|
# Boosting
|
|
|
|
```{r}
|
|
library(gbm)
|
|
set.seed(1)
|
|
boost_boston <- gbm(
|
|
medv ~ .,
|
|
data = Boston[appr, ],
|
|
distribution = "gaussian",
|
|
n.trees = 5000,
|
|
interaction.depth = 4
|
|
)
|
|
|
|
summary(boost_boston)
|
|
|
|
par(mfrow = c(1, 2))
|
|
plot(boost_boston, i = "rm")
|
|
plot(boost_boston, i = "lstat")
|
|
```
|
|
|
|
```{r}
|
|
yhat_boost <- predict(boost_boston, newdata = Boston[-appr, ], n.trees = 5000)
|
|
mean((yhat_boost - boston_test)^2)
|
|
```
|
|
|
|
```{r}
|
|
boost_boston <- gbm(
|
|
medv ~ .,
|
|
data = Boston[appr, ],
|
|
distribution = "gaussian",
|
|
n.trees = 5000,
|
|
interaction.depth = 4,
|
|
shrinkage = 0.2,
|
|
verbose = FALSE
|
|
)
|
|
|
|
yhat_boost <- predict(boost_boston, newdata = Boston[-appr, ], n.trees = 5000)
|
|
mean((yhat_boost - boston_test)^2)
|
|
```
|