mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 13:54:06 +01:00
Refactor code for improved readability and consistency across R Markdown files
- Updated comments and code formatting in `3-td_ggplot2 - enonce.Rmd` for clarity. - Enhanced code structure in `4-td_graphiques - enonce.Rmd` by organizing options and library calls. - Replaced pipe operator `%>%` with `|>` in `Code_Lec3.Rmd` for consistency with modern R syntax. - Cleaned up commented-out code and ensured consistent spacing in ggplot calls.
This commit is contained in:
@@ -44,11 +44,10 @@ notes_MAN <- read.table("notes_MAN.csv", sep = ";", dec = ",", row.names = 1, he
|
|||||||
# qui est une variable catégorielle
|
# qui est une variable catégorielle
|
||||||
notes_MAN_prep <- notes_MAN[, -1]
|
notes_MAN_prep <- notes_MAN[, -1]
|
||||||
|
|
||||||
X <- notes_MAN[1:6,] %>% select(c("Probas", "Analyse", "Anglais", "MAN.Stats", "Stats.Inférentielles"))
|
X <- notes_MAN[1:6, ] |> select(c("Probas", "Analyse", "Anglais", "MAN.Stats", "Stats.Inférentielles"))
|
||||||
# on prépare le jeu de données en retirant la colonne des Mentions
|
# on prépare le jeu de données en retirant la colonne des Mentions
|
||||||
# qui est une variable catégorielle
|
# qui est une variable catégorielle
|
||||||
# View(X)
|
# View(X)
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
@@ -101,7 +100,7 @@ C[, 1:2]
|
|||||||
deux premières composantes principales (1 point)
|
deux premières composantes principales (1 point)
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
colors <- c('blue', 'red', 'green', 'yellow', 'purple', 'orange')
|
colors <- c("blue", "red", "green", "yellow", "purple", "orange")
|
||||||
plot(
|
plot(
|
||||||
C[, 1], C[, 2],
|
C[, 1], C[, 2],
|
||||||
main = "Coordonnées des individus par rapport \n aux deux premières composantes principales",
|
main = "Coordonnées des individus par rapport \n aux deux premières composantes principales",
|
||||||
@@ -111,7 +110,7 @@ plot(
|
|||||||
col = colors,
|
col = colors,
|
||||||
pch = 15
|
pch = 15
|
||||||
)
|
)
|
||||||
legend(x = 'topleft', legend = rownames(X), col = colors, pch = 15)
|
legend(x = "topleft", legend = rownames(X), col = colors, pch = 15)
|
||||||
```
|
```
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
------------------------------------------------------------------------
|
||||||
@@ -130,7 +129,7 @@ ncol(notes_MAN_prep) # Nombre de variables
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
dim(notes_MAN_prep) # On peut également utiliser 'dim' qui renvoit la dimension
|
dim(notes_MAN_prep) # On peut également utiliser 'dim' qui renvoit la dimension
|
||||||
```
|
```
|
||||||
|
|
||||||
Il y a donc **42** individus et **14** variables. A noter que la
|
Il y a donc **42** individus et **14** variables. A noter que la
|
||||||
@@ -146,7 +145,7 @@ library(FactoMineR)
|
|||||||
```{r}
|
```{r}
|
||||||
# Ne pas oublier de charger la librairie FactoMineR
|
# Ne pas oublier de charger la librairie FactoMineR
|
||||||
|
|
||||||
# Indication : pour afficher les résultats de l'ACP pour tous les individus, utiliser la
|
# Indication : pour afficher les résultats de l'ACP pour tous les individus, utiliser la
|
||||||
# fonction summary en précisant dedans nbind=Inf et nbelements=Inf
|
# fonction summary en précisant dedans nbind=Inf et nbelements=Inf
|
||||||
res.notes <- PCA(notes_MAN_prep, scale.unit = TRUE)
|
res.notes <- PCA(notes_MAN_prep, scale.unit = TRUE)
|
||||||
```
|
```
|
||||||
@@ -190,7 +189,7 @@ avec:
|
|||||||
Depuis notre ACP, on peut donc récupérer les coordonnées:
|
Depuis notre ACP, on peut donc récupérer les coordonnées:
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
coords_man_stats <- res.notes$var$coord["MAN.Stats",]
|
coords_man_stats <- res.notes$var$coord["MAN.Stats", ]
|
||||||
coords_man_stats[1:2]
|
coords_man_stats[1:2]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
```{r}
|
```{r}
|
||||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis')
|
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis")
|
||||||
|
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
options(scipen = 999, digits = 5)
|
options(scipen = 999, digits = 5)
|
||||||
@@ -56,8 +56,8 @@ summary(model)
|
|||||||
coef(model)
|
coef(model)
|
||||||
```
|
```
|
||||||
```{r}
|
```{r}
|
||||||
data <- data %>%
|
data <- data |>
|
||||||
mutate(yhat = beta0 + beta1 * poids) %>%
|
mutate(yhat = beta0 + beta1 * poids) |>
|
||||||
mutate(residuals = cholesterol - yhat)
|
mutate(residuals = cholesterol - yhat)
|
||||||
|
|
||||||
data
|
data
|
||||||
@@ -71,8 +71,8 @@ ggplot(data, aes(x = poids, y = cholesterol)) +
|
|||||||
```{r}
|
```{r}
|
||||||
mean(data[, "cholesterol"])
|
mean(data[, "cholesterol"])
|
||||||
mean(data[, "yhat"])
|
mean(data[, "yhat"])
|
||||||
mean(data[, "residuals"]) %>% round(10)
|
mean(data[, "residuals"]) |> round(10)
|
||||||
cov(data[, "residuals"], data[, "poids"]) %>% round(10)
|
cov(data[, "residuals"], data[, "poids"]) |> round(10)
|
||||||
(RSS <- sum((data[, "residuals"])^2))
|
(RSS <- sum((data[, "residuals"])^2))
|
||||||
(TSS <- sum((y - mean(y))^2))
|
(TSS <- sum((y - mean(y))^2))
|
||||||
TSS - beta1 * Sxy
|
TSS - beta1 * Sxy
|
||||||
@@ -117,10 +117,10 @@ t <- qt(0.975, dof)
|
|||||||
sigma_hat <- sigma(model)
|
sigma_hat <- sigma(model)
|
||||||
n <- nrow(data)
|
n <- nrow(data)
|
||||||
|
|
||||||
data <- data %>%
|
data <- data |>
|
||||||
mutate(error = t *
|
mutate(error = t *
|
||||||
sigma_hat *
|
sigma_hat *
|
||||||
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) %>%
|
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) |>
|
||||||
mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL)
|
mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL)
|
||||||
|
|
||||||
ggplot(data, aes(x = poids, y = cholesterol)) +
|
ggplot(data, aes(x = poids, y = cholesterol)) +
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
```{r}
|
```{r}
|
||||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2-bis')
|
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2-bis")
|
||||||
|
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
library(GGally)
|
library(GGally)
|
||||||
@@ -10,9 +10,9 @@ library(qqplotr)
|
|||||||
options(scipen = 999, digits = 5)
|
options(scipen = 999, digits = 5)
|
||||||
```
|
```
|
||||||
```{r}
|
```{r}
|
||||||
data <- read.csv('data02.csv', sep = ',', header = TRUE, dec = ".")
|
data <- read.csv("data02.csv", sep = ",", header = TRUE, dec = ".")
|
||||||
data %>%
|
data |>
|
||||||
mutate(type = factor(type, levels = c("maths", "english", "final"), labels = c("maths", "english", "final"))) %>%
|
mutate(type = factor(type, levels = c("maths", "english", "final"), labels = c("maths", "english", "final"))) |>
|
||||||
ggplot(aes(x = note)) +
|
ggplot(aes(x = note)) +
|
||||||
facet_wrap(vars(type), scales = "free_x") +
|
facet_wrap(vars(type), scales = "free_x") +
|
||||||
geom_histogram(binwidth = 4, color = "black", fill = "grey80") +
|
geom_histogram(binwidth = 4, color = "black", fill = "grey80") +
|
||||||
@@ -21,8 +21,8 @@ data %>%
|
|||||||
```
|
```
|
||||||
```{r}
|
```{r}
|
||||||
data_wide <- pivot_wider(data, names_from = type, values_from = note)
|
data_wide <- pivot_wider(data, names_from = type, values_from = note)
|
||||||
data_wide %>%
|
data_wide |>
|
||||||
select(-id) %>%
|
select(-id) |>
|
||||||
ggpairs() + theme_bw(14)
|
ggpairs() + theme_bw(14)
|
||||||
```
|
```
|
||||||
```{r}
|
```{r}
|
||||||
@@ -67,12 +67,12 @@ linearHypothesis(model, "maths - english = 0")
|
|||||||
|
|
||||||
# Submodel testing
|
# Submodel testing
|
||||||
```{r}
|
```{r}
|
||||||
data_predict <- predict(model, newdata = expand.grid(maths = seq(70, 90, 2), english = c(75, 85)), interval = "confidence") %>%
|
data_predict <- predict(model, newdata = expand.grid(maths = seq(70, 90, 2), english = c(75, 85)), interval = "confidence") |>
|
||||||
as_tibble() %>%
|
as_tibble() |>
|
||||||
bind_cols(expand.grid(maths = seq(70, 90, 2), english = c(75, 85)))
|
bind_cols(expand.grid(maths = seq(70, 90, 2), english = c(75, 85)))
|
||||||
|
|
||||||
data_predict %>%
|
data_predict |>
|
||||||
mutate(english = as.factor(english)) %>%
|
mutate(english = as.factor(english)) |>
|
||||||
ggplot(aes(x = maths, y = fit, color = english, fill = english, label = round(fit, 1))) +
|
ggplot(aes(x = maths, y = fit, color = english, fill = english, label = round(fit, 1))) +
|
||||||
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, show.legend = FALSE) +
|
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, show.legend = FALSE) +
|
||||||
geom_point(size = 2) +
|
geom_point(size = 2) +
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
```{r}
|
```{r}
|
||||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2')
|
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2")
|
||||||
```
|
```
|
||||||
|
|
||||||
# Question 1 : Import dataset and check variables
|
# Question 1 : Import dataset and check variables
|
||||||
@@ -9,8 +9,8 @@ library(dplyr)
|
|||||||
cepages <- read.csv("Cepages B TP2.csv", header = TRUE, sep = ";", dec = ",")
|
cepages <- read.csv("Cepages B TP2.csv", header = TRUE, sep = ";", dec = ",")
|
||||||
cepages$Couleur <- as.factor(cepages$Couleur)
|
cepages$Couleur <- as.factor(cepages$Couleur)
|
||||||
cepages$Origine <- as.factor(cepages$Origine)
|
cepages$Origine <- as.factor(cepages$Origine)
|
||||||
cepages <- cepages %>% mutate(across(where(is.character), as.numeric))
|
cepages <- cepages |> mutate(across(where(is.character), as.numeric))
|
||||||
cepages <- cepages %>% mutate(across(where(is.integer), as.numeric))
|
cepages <- cepages |> mutate(across(where(is.integer), as.numeric))
|
||||||
paged_table(cepages)
|
paged_table(cepages)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -39,7 +39,7 @@ tapply(cepages$pH, list(cepages$Couleur, cepages$Origine), mean)
|
|||||||
library(ggplot2)
|
library(ggplot2)
|
||||||
|
|
||||||
ggplot(cepages, aes(x = AcTot, y = pH, color = Couleur)) +
|
ggplot(cepages, aes(x = AcTot, y = pH, color = Couleur)) +
|
||||||
geom_point(col = 'red', size = 0.5) +
|
geom_point(col = "red", size = 0.5) +
|
||||||
geom_smooth(method = "lm", se = F)
|
geom_smooth(method = "lm", se = F)
|
||||||
|
|
||||||
ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) +
|
ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) +
|
||||||
@@ -50,8 +50,8 @@ ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) +
|
|||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
ggplot(cepages, aes(x = AcTot, y = pH, color = Origine)) +
|
ggplot(cepages, aes(x = AcTot, y = pH, color = Origine)) +
|
||||||
geom_smooth(method = 'lm', se = F) +
|
geom_smooth(method = "lm", se = F) +
|
||||||
geom_point(col = 'red', size = 0.5)
|
geom_point(col = "red", size = 0.5)
|
||||||
|
|
||||||
ggplot(cepages, aes(y = pH, x = AcTot, colour = Origine, fill = Origine)) +
|
ggplot(cepages, aes(y = pH, x = AcTot, colour = Origine, fill = Origine)) +
|
||||||
geom_boxplot(alpha = 0.5, outlier.alpha = 0)
|
geom_boxplot(alpha = 0.5, outlier.alpha = 0)
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
```{r}
|
```{r}
|
||||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3')
|
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3")
|
||||||
```
|
```
|
||||||
|
|
||||||
# Question 1 : Import dataset and check variables
|
# Question 1 : Import dataset and check variables
|
||||||
@@ -9,8 +9,8 @@ library(dplyr)
|
|||||||
ozone <- read.table("ozone.txt", header = TRUE, sep = " ", dec = ".")
|
ozone <- read.table("ozone.txt", header = TRUE, sep = " ", dec = ".")
|
||||||
ozone$vent <- as.factor(ozone$vent)
|
ozone$vent <- as.factor(ozone$vent)
|
||||||
ozone$temps <- as.factor(ozone$temps)
|
ozone$temps <- as.factor(ozone$temps)
|
||||||
ozone <- ozone %>% mutate(across(where(is.character), as.numeric))
|
ozone <- ozone |> mutate(across(where(is.character), as.numeric))
|
||||||
ozone <- ozone %>% mutate(across(where(is.integer), as.numeric))
|
ozone <- ozone |> mutate(across(where(is.integer), as.numeric))
|
||||||
paged_table(ozone)
|
paged_table(ozone)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -25,8 +25,8 @@ summary(model_T12)
|
|||||||
library(ggplot2)
|
library(ggplot2)
|
||||||
|
|
||||||
ggplot(ozone, aes(x = T12, y = maxO3)) +
|
ggplot(ozone, aes(x = T12, y = maxO3)) +
|
||||||
geom_smooth(method = 'lm', se = T) +
|
geom_smooth(method = "lm", se = T) +
|
||||||
geom_point(col = 'red', size = 0.5) +
|
geom_point(col = "red", size = 0.5) +
|
||||||
labs(title = "maxO3 ~ T12") +
|
labs(title = "maxO3 ~ T12") +
|
||||||
theme_minimal()
|
theme_minimal()
|
||||||
```
|
```
|
||||||
@@ -130,5 +130,4 @@ new_obs <- list(
|
|||||||
maxO3v = 85
|
maxO3v = 85
|
||||||
)
|
)
|
||||||
predict(model_backward, new_obs, interval = "confidence")
|
predict(model_backward, new_obs, interval = "confidence")
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
```{r}
|
```{r}
|
||||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP4')
|
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP4")
|
||||||
|
|
||||||
set.seed(0911)
|
set.seed(0911)
|
||||||
library(ggplot2)
|
library(ggplot2)
|
||||||
@@ -22,19 +22,19 @@ library(lmtest) # LRtest
|
|||||||
library(survey) # Wald test
|
library(survey) # Wald test
|
||||||
library(vcdExtra) # deviance test
|
library(vcdExtra) # deviance test
|
||||||
|
|
||||||
library(rsample) # for data splitting
|
library(rsample) # for data splitting
|
||||||
library(glmnet)
|
library(glmnet)
|
||||||
library(nnet) # multinom, glm
|
library(nnet) # multinom, glm
|
||||||
library(caret)
|
library(caret)
|
||||||
library(ROCR)
|
library(ROCR)
|
||||||
#library(PRROC) autre package pour courbe roc et courbe pr
|
# library(PRROC) autre package pour courbe roc et courbe pr
|
||||||
library(ISLR) # dataset for statistical learning
|
library(ISLR) # dataset for statistical learning
|
||||||
|
|
||||||
ggplot2::theme_set(ggplot2::theme_light())# Set the graphical theme
|
ggplot2::theme_set(ggplot2::theme_light()) # Set the graphical theme
|
||||||
```
|
```
|
||||||
```{r}
|
```{r}
|
||||||
car <- read.table('car_income.txt', header = TRUE, sep = ';')
|
car <- read.table("car_income.txt", header = TRUE, sep = ";")
|
||||||
car %>% rmarkdown::paged_table()
|
car |> rmarkdown::paged_table()
|
||||||
summary(car)
|
summary(car)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -44,7 +44,7 @@ summary(model_purchase)
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
p1 <- car %>%
|
p1 <- car |>
|
||||||
ggplot(aes(y = purchase, x = income + age)) +
|
ggplot(aes(y = purchase, x = income + age)) +
|
||||||
geom_point(alpha = .15) +
|
geom_point(alpha = .15) +
|
||||||
geom_smooth(method = "lm") +
|
geom_smooth(method = "lm") +
|
||||||
@@ -53,7 +53,7 @@ p1 <- car %>%
|
|||||||
ylab("Probability of Purchase")
|
ylab("Probability of Purchase")
|
||||||
|
|
||||||
|
|
||||||
p2 <- car %>%
|
p2 <- car |>
|
||||||
ggplot(aes(y = purchase, x = income + age)) +
|
ggplot(aes(y = purchase, x = income + age)) +
|
||||||
geom_point(alpha = .15) +
|
geom_point(alpha = .15) +
|
||||||
geom_smooth(method = "glm", method.args = list(family = "binomial")) +
|
geom_smooth(method = "glm", method.args = list(family = "binomial")) +
|
||||||
@@ -66,9 +66,9 @@ ggplotly(p2)
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
car <- car %>%
|
car <- car |>
|
||||||
mutate(old = ifelse(car$age > 3, 1, 0))
|
mutate(old = ifelse(car$age > 3, 1, 0))
|
||||||
car <- car %>%
|
car <- car |>
|
||||||
mutate(rich = ifelse(car$income > 40, 1, 0))
|
mutate(rich = ifelse(car$income > 40, 1, 0))
|
||||||
model_old <- glm(purchase ~ age + income + rich + old, data = car, family = "binomial")
|
model_old <- glm(purchase ~ age + income + rich + old, data = car, family = "binomial")
|
||||||
summary(model_old)
|
summary(model_old)
|
||||||
@@ -90,5 +90,5 @@ pima.te$pred <- as.factor(pima.te$pred)
|
|||||||
pima.te$type <- as.factor(pima.te$type)
|
pima.te$type <- as.factor(pima.te$type)
|
||||||
|
|
||||||
# Confusion matrix
|
# Confusion matrix
|
||||||
confusionMatrix(data = pima.te$type, reference = pima.te$pred, positive = 'Yes')
|
confusionMatrix(data = pima.te$type, reference = pima.te$pred, positive = "Yes")
|
||||||
```
|
```
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -297,7 +297,7 @@ On présente ci-dessous un aperçu des données.
|
|||||||
fold <- getwd()
|
fold <- getwd()
|
||||||
|
|
||||||
# Load data
|
# Load data
|
||||||
# load(paste0(fold, "/M2/Data Visualisation/tp1", "/data/datafreMPTL.RData")) # VSCode # nolint
|
# load(paste0(fold, "/M2/Data Visualisation/tp1", "/data/datafreMPTL.RData")) # VSCode
|
||||||
load(paste0(fold, "/data/datafreMPTL.RData")) # RStudio
|
load(paste0(fold, "/data/datafreMPTL.RData")) # RStudio
|
||||||
paged_table(dat, options = list(rows.print = 15))
|
paged_table(dat, options = list(rows.print = 15))
|
||||||
```
|
```
|
||||||
@@ -505,7 +505,7 @@ df_plot <- dat |>
|
|||||||
p3 <- ggplot(df_plot, aes(x = DrivAge, y = freq)) +
|
p3 <- ggplot(df_plot, aes(x = DrivAge, y = freq)) +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
geom_smooth() +
|
geom_smooth() +
|
||||||
labs(x = "Age du conducteur", y = "Frequence") +
|
labs(x = "Age du conducteur", y = "Frequence") +
|
||||||
theme_bw()
|
theme_bw()
|
||||||
p3
|
p3
|
||||||
```
|
```
|
||||||
@@ -642,12 +642,16 @@ plot_pairwise_disc <- function(df, var1, var2) {
|
|||||||
|
|
||||||
df |>
|
df |>
|
||||||
group_by(varx, vary) |>
|
group_by(varx, vary) |>
|
||||||
summarize(exp = sum(Exposure),
|
summarize(
|
||||||
nb_claims = sum(ClaimNb),
|
exp = sum(Exposure),
|
||||||
freq = sum(ClaimNb) / sum(Exposure), .groups = "drop") |>
|
nb_claims = sum(ClaimNb),
|
||||||
|
freq = sum(ClaimNb) / sum(Exposure), .groups = "drop"
|
||||||
|
) |>
|
||||||
ggplot(aes(x = varx, y = freq, colour = vary, group = vary), alpha = 0.3) +
|
ggplot(aes(x = varx, y = freq, colour = vary, group = vary), alpha = 0.3) +
|
||||||
geom_point() + geom_line() + theme_bw() +
|
geom_point() +
|
||||||
labs(x = var1, y = "Frequence", colour = var2)
|
geom_line() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x = var1, y = "Frequence", colour = var2)
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -23,8 +23,13 @@ editor_options:
|
|||||||
|
|
||||||
```{r setup, include=FALSE}
|
```{r setup, include=FALSE}
|
||||||
## Global options
|
## Global options
|
||||||
knitr::opts_chunk$set(cache = FALSE, warning = FALSE, message = FALSE, fig.retina = 2)
|
knitr::opts_chunk$set(
|
||||||
options(encoding = 'UTF-8')
|
cache = FALSE,
|
||||||
|
warning = FALSE,
|
||||||
|
message = FALSE,
|
||||||
|
fig.retina = 2
|
||||||
|
)
|
||||||
|
options(encoding = "UTF-8")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -33,11 +38,11 @@ options(encoding = 'UTF-8')
|
|||||||
library(lattice)
|
library(lattice)
|
||||||
library(grid)
|
library(grid)
|
||||||
library(ggplot2)
|
library(ggplot2)
|
||||||
require(gridExtra)
|
require(gridExtra)
|
||||||
library(locfit)
|
library(locfit)
|
||||||
library(scales)
|
library(scales)
|
||||||
library(formattable)
|
library(formattable)
|
||||||
library(RColorBrewer)
|
library(RColorBrewer)
|
||||||
library(plotly)
|
library(plotly)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
library(tidyr)
|
library(tidyr)
|
||||||
@@ -88,7 +93,7 @@ de vie par pays sur la période 1952-1990. Les observations ont lieu tous les 5
|
|||||||
Dans un premier temps, il faut installer le package et le charger.
|
Dans un premier temps, il faut installer le package et le charger.
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
# install.packages("gapminder")
|
# install.packages("gapminder") #nolint
|
||||||
library(gapminder)
|
library(gapminder)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -140,7 +145,7 @@ pouvez observer entre `gdpPercap` et `lifeExp`.
|
|||||||
:::
|
:::
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
|
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
|
||||||
geom_point()
|
geom_point()
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -158,7 +163,7 @@ visualisations permettant de comparer des distributions.
|
|||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
ggplot(data = gapminder, aes(x = lifeExp)) +
|
ggplot(data = gapminder, aes(x = lifeExp)) +
|
||||||
geom_density()
|
geom_density()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -171,16 +176,16 @@ Il faut au préalable récupérer un fond de carte (ici de l'année 2016). Nous
|
|||||||
les données `gapminder` de 2007.
|
les données `gapminder` de 2007.
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
library(giscoR)
|
library(giscoR)
|
||||||
library(sf)
|
library(sf)
|
||||||
|
|
||||||
world <- gisco_countries
|
world <- gisco_countries
|
||||||
world <- subset(world, NAME_ENGL != "Antarctica") # Remove Antartica
|
world <- subset(world, NAME_ENGL != "Antarctica") # Remove Antartica
|
||||||
|
|
||||||
# Merge data
|
# Merge data
|
||||||
world_df <- gapminder %>%
|
world_df <- gapminder |>
|
||||||
filter(year == "2007")
|
filter(year == "2007")
|
||||||
world_df <- world %>%
|
world_df <- world |>
|
||||||
left_join(world_df, by = c("NAME_ENGL" = "country"))
|
left_join(world_df, by = c("NAME_ENGL" = "country"))
|
||||||
|
|
||||||
ggplot(world_df) +
|
ggplot(world_df) +
|
||||||
@@ -231,7 +236,7 @@ accidents <- read_csv("data/accidentsVelo.csv",
|
|||||||
date = col_date(format = "%Y-%m-%d")))
|
date = col_date(format = "%Y-%m-%d")))
|
||||||
|
|
||||||
# few ajustements
|
# few ajustements
|
||||||
accidents <- accidents %>%
|
accidents <- accidents |>
|
||||||
mutate(mois = factor(mois),
|
mutate(mois = factor(mois),
|
||||||
jour = factor(jour),
|
jour = factor(jour),
|
||||||
dep = factor(dep),
|
dep = factor(dep),
|
||||||
@@ -247,8 +252,8 @@ correct <- paste0("0", str_sub(correct, 1, 1), ":",
|
|||||||
accidents$hrmn[issue] <- correct
|
accidents$hrmn[issue] <- correct
|
||||||
|
|
||||||
# Extract hour
|
# Extract hour
|
||||||
accidents <- accidents %>%
|
accidents <- accidents |>
|
||||||
mutate(hour = paste(date, hrmn, sep = " ")) %>%
|
mutate(hour = paste(date, hrmn, sep = " ")) |>
|
||||||
mutate(hour = strptime(hour, "%Y-%m-%d %H:%M")$hour)
|
mutate(hour = strptime(hour, "%Y-%m-%d %H:%M")$hour)
|
||||||
|
|
||||||
# mapping table for french departments
|
# mapping table for french departments
|
||||||
@@ -327,8 +332,8 @@ library(mapview)
|
|||||||
library(sf)
|
library(sf)
|
||||||
|
|
||||||
## Remove NA
|
## Remove NA
|
||||||
df_map_dyn <- accidents %>%
|
df_map_dyn <- accidents |>
|
||||||
filter(???) %>%
|
filter(???) |>
|
||||||
na.omit()
|
na.omit()
|
||||||
|
|
||||||
# Make map and print it
|
# Make map and print it
|
||||||
@@ -354,27 +359,27 @@ Voici un premier code à trou pour vous aider.
|
|||||||
|
|
||||||
```{r, eval = F}
|
```{r, eval = F}
|
||||||
# get french map - level nuts2
|
# get french map - level nuts2
|
||||||
fr <- gisco_get_nuts(resolution = "20", country = ???, nuts_level = ???) %>%
|
fr <- gisco_get_nuts(resolution = "20", country = ???, nuts_level = ???) |>
|
||||||
mutate(res = "20M")
|
mutate(res = "20M")
|
||||||
|
|
||||||
# Remove white-space to avoid errors.
|
# Remove white-space to avoid errors.
|
||||||
library(stringr)
|
library(stringr)
|
||||||
departements_francais <- departements_francais %>%
|
departements_francais <- departements_francais |>
|
||||||
mutate(dep_name = str_trim(dep_name))
|
mutate(dep_name = str_trim(dep_name))
|
||||||
|
|
||||||
fr <- fr %>%
|
fr <- fr |>
|
||||||
mutate(NUTS_NAME = str_trim(NUTS_NAME))
|
mutate(NUTS_NAME = str_trim(NUTS_NAME))
|
||||||
|
|
||||||
# Merge and remove departements outside metropolitan France
|
# Merge and remove departements outside metropolitan France
|
||||||
fr_map <- fr %>%
|
fr_map <- fr |>
|
||||||
left_join(???) %>%
|
left_join(???) |>
|
||||||
filter(! dep %in% c("971", ???) )
|
filter(! dep %in% c("971", ???) )
|
||||||
|
|
||||||
# count the number of accidents
|
# count the number of accidents
|
||||||
df_acc <- ???
|
df_acc <- ???
|
||||||
|
|
||||||
# merge statistics with the map
|
# merge statistics with the map
|
||||||
map_acc <- fr_map %>%
|
map_acc <- fr_map |>
|
||||||
left_join(df_acc, by = c("dep" = "dep"))
|
left_join(df_acc, by = c("dep" = "dep"))
|
||||||
|
|
||||||
# map with all accidents
|
# map with all accidents
|
||||||
|
|||||||
@@ -194,11 +194,11 @@ linear.mod$results
|
|||||||
```{r}
|
```{r}
|
||||||
Ytrain <- cookie.train$sugars
|
Ytrain <- cookie.train$sugars
|
||||||
dfc_train <- data.frame(ytrain = Ytrain, linear.mod = fitted(linear.mod))
|
dfc_train <- data.frame(ytrain = Ytrain, linear.mod = fitted(linear.mod))
|
||||||
dfc_train %>% rmarkdown::paged_table()
|
dfc_train |> rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
dfc_train %>%
|
dfc_train |>
|
||||||
ggplot(aes(x = ytrain, y = linear.mod)) +
|
ggplot(aes(x = ytrain, y = linear.mod)) +
|
||||||
geom_point(size = 2, color = "#983399") +
|
geom_point(size = 2, color = "#983399") +
|
||||||
geom_smooth(method = "lm", color = "#389900") +
|
geom_smooth(method = "lm", color = "#389900") +
|
||||||
@@ -211,9 +211,9 @@ dfc_train %>%
|
|||||||
Ytest <- cookie.test$sugars
|
Ytest <- cookie.test$sugars
|
||||||
dfc_test <- data.frame(ytest = Ytest)
|
dfc_test <- data.frame(ytest = Ytest)
|
||||||
dfc_test$linear.mod <- predict(linear.mod, newdata = cookie.test)
|
dfc_test$linear.mod <- predict(linear.mod, newdata = cookie.test)
|
||||||
# dfc_test%>%rmarkdown::paged_table()
|
# dfc_test|>rmarkdown::paged_table()
|
||||||
|
|
||||||
dfc_test %>%
|
dfc_test |>
|
||||||
ggplot(aes(x = ytest, y = linear.mod)) +
|
ggplot(aes(x = ytest, y = linear.mod)) +
|
||||||
geom_point(size = 2, color = "#983399") +
|
geom_point(size = 2, color = "#983399") +
|
||||||
geom_smooth(method = "lm", color = "#389900") +
|
geom_smooth(method = "lm", color = "#389900") +
|
||||||
@@ -244,7 +244,7 @@ ggplotly(ggplot(Lasso))
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
Lasso$results %>% rmarkdown::paged_table()
|
Lasso$results |> rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
@@ -271,8 +271,8 @@ coef_lasso <- data.frame(
|
|||||||
Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
|
Variable = rownames(as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))),
|
||||||
Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[, 1]
|
Coefficient = as.matrix(coef(Lasso$finalModel, Lasso$bestTune$lambda))[, 1]
|
||||||
)
|
)
|
||||||
coef_lasso %>%
|
coef_lasso |>
|
||||||
subset(Coefficient != 0) %>%
|
subset(Coefficient != 0) |>
|
||||||
rmarkdown::paged_table()
|
rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -298,7 +298,7 @@ ggplotly(ggplot(ridge))
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
ridge$results %>% rmarkdown::paged_table()
|
ridge$results |> rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
@@ -320,7 +320,7 @@ vip(ridge, num_features = 15)
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) %>%
|
data.frame(as.matrix(coef(ridge$finalModel, ridge$bestTune$lambda))) |>
|
||||||
rmarkdown::paged_table()
|
rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -346,7 +346,7 @@ ggplotly(ggplot(ElNet))
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
ElNet$results %>% rmarkdown::paged_table()
|
ElNet$results |> rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
@@ -372,8 +372,8 @@ coef_elnet <- data.frame(
|
|||||||
Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
|
Variable = rownames(as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))),
|
||||||
Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[, 1]
|
Coefficient = as.matrix(coef(ElNet$finalModel, ElNet$bestTune$lambda))[, 1]
|
||||||
)
|
)
|
||||||
coef_elnet %>%
|
coef_elnet |>
|
||||||
subset(Coefficient != 0) %>%
|
subset(Coefficient != 0) |>
|
||||||
rmarkdown::paged_table()
|
rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -396,7 +396,7 @@ ggplotly(ggplot(pls_mod))
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
pls_mod$results %>% rmarkdown::paged_table()
|
pls_mod$results |> rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
@@ -412,7 +412,7 @@ vip(pls_mod, num_features = 20)
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) %>%
|
data.frame(Coefficients = as.matrix(coef(pls_mod$finalModel))) |>
|
||||||
rmarkdown::paged_table()
|
rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -435,7 +435,7 @@ dTrain$ridge <- fitted(ridge)
|
|||||||
dTrain$ElNet <- fitted(ElNet)
|
dTrain$ElNet <- fitted(ElNet)
|
||||||
dTrain$pls <- fitted(pls_mod)
|
dTrain$pls <- fitted(pls_mod)
|
||||||
melt.dTrain <- melt(dTrain, id = "yTrain", variable.name = "model")
|
melt.dTrain <- melt(dTrain, id = "yTrain", variable.name = "model")
|
||||||
melt.dTrain %>% ggplot() +
|
melt.dTrain |> ggplot() +
|
||||||
aes(x = yTrain, y = value) +
|
aes(x = yTrain, y = value) +
|
||||||
geom_smooth(method = "lm") +
|
geom_smooth(method = "lm") +
|
||||||
geom_point(size = 1, colour = "#983399") +
|
geom_point(size = 1, colour = "#983399") +
|
||||||
@@ -446,11 +446,11 @@ melt.dTrain %>% ggplot() +
|
|||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
dTrain %>% rmarkdown::paged_table()
|
dTrain |> rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
melt.dTrain %>% rmarkdown::paged_table()
|
melt.dTrain |> rmarkdown::paged_table()
|
||||||
```
|
```
|
||||||
|
|
||||||
### On the test set
|
### On the test set
|
||||||
@@ -463,10 +463,10 @@ dTest$Lasso <- predict(Lasso, newdata = cookie.test)
|
|||||||
dTest$ridge <- predict(ridge, newdata = cookie.test)
|
dTest$ridge <- predict(ridge, newdata = cookie.test)
|
||||||
dTest$ElNet <- predict(ElNet, newdata = cookie.test)
|
dTest$ElNet <- predict(ElNet, newdata = cookie.test)
|
||||||
dTest$pls <- predict(pls_mod, newdata = cookie.test)
|
dTest$pls <- predict(pls_mod, newdata = cookie.test)
|
||||||
# dTest%>% rmarkdown::paged_table()
|
# dTest|> rmarkdown::paged_table()
|
||||||
melt.dTest <- melt(dTest, id = "yTest", variable.name = "model")
|
melt.dTest <- melt(dTest, id = "yTest", variable.name = "model")
|
||||||
# melt.dTest%>% rmarkdown::paged_table()
|
# melt.dTest|> rmarkdown::paged_table()
|
||||||
melt.dTest %>% ggplot() +
|
melt.dTest |> ggplot() +
|
||||||
aes(x = yTest, y = value) +
|
aes(x = yTest, y = value) +
|
||||||
geom_smooth(method = "lm") +
|
geom_smooth(method = "lm") +
|
||||||
geom_point(size = 1, colour = "#983399") +
|
geom_point(size = 1, colour = "#983399") +
|
||||||
@@ -491,8 +491,8 @@ RMSE <- rbind.data.frame(
|
|||||||
)
|
)
|
||||||
names(RMSE) <- c("Train", "Test")
|
names(RMSE) <- c("Train", "Test")
|
||||||
row.names(RMSE) <- c("Linear", "Lasso", "Ridge", "ElNet", "PLS")
|
row.names(RMSE) <- c("Linear", "Lasso", "Ridge", "ElNet", "PLS")
|
||||||
RMSE %>%
|
RMSE |>
|
||||||
kableExtra::kbl() %>%
|
kableExtra::kbl() |>
|
||||||
kableExtra::kable_styling()
|
kableExtra::kable_styling()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user