Data Analysis
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/Projet')
library(MASS)
library(AER)
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(rmarkdown)
library(car)
library(corrplot)
## corrplot 0.95 loaded
library(carData)
library(ggfortify)
## Loading required package: ggplot2
library(ggplot2)
library(gridExtra)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
Data Preprocessing
data <- read.csv("./projet.csv", header = TRUE, sep = ",", dec = ".")
# Factors
data$saison <- as.factor(data$saison)
data$mois <- as.factor(data$mois)
data$jour_mois <- as.factor(data$jour_mois)
data$jour_semaine <- as.factor(data$jour_semaine)
data$horaire <- as.factor(data$horaire)
data$jour_travail <- as.factor(data$jour_travail)
data$vacances <- as.factor(data$vacances)
data$meteo <- as.factor(data$meteo)
# Quantitative variables
data$humidite_sqrt <- sqrt(data$humidite)
data$humidite_square <- data$humidite^2
data$temperature1_square <- data$temperature1^2
data$temperature2_square <- data$temperature2^2
data$vent_square <- data$vent^2
data$vent_sqrt <- sqrt(data$vent)
# Remove obs column
rownames(data) <- data$obs
data <- data[, -1]
paged_table(data)
colSums(is.na(data))
## saison mois jour_mois jour_semaine
## 0 0 0 0
## horaire jour_travail vacances meteo
## 0 0 0 0
## velos humidite vent temperature1
## 0 0 0 0
## temperature2 humidite_sqrt humidite_square temperature1_square
## 0 0 0 0
## temperature2_square vent_square vent_sqrt
## 0 0 0
sum(duplicated(data))
## [1] 0
str(data)
## 'data.frame': 1817 obs. of 19 variables:
## $ saison : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## $ mois : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ jour_mois : Factor w/ 31 levels "1","2","3","4",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ jour_semaine : Factor w/ 7 levels "1","2","3","4",..: 7 7 7 7 7 1 1 1 1 1 ...
## $ horaire : Factor w/ 5 levels "1","2","3","4",..: 1 2 3 4 5 1 2 3 4 5 ...
## $ jour_travail : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ vacances : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ meteo : Factor w/ 3 levels "1","2","3": 1 1 1 2 2 2 2 2 3 1 ...
## $ velos : int 105 61 340 305 174 54 82 297 268 100 ...
## $ humidite : num 78 78.2 75.5 82.2 88.8 ...
## $ vent : num 1.38 6.84 30.98 30.58 26.39 ...
## $ temperature1 : num 2.88 5.39 11.98 12.21 11.55 ...
## $ temperature2 : num 2.29 5.25 11.5 12.5 11.8 ...
## $ humidite_sqrt : num 8.83 8.85 8.69 9.07 9.42 ...
## $ humidite_square : num 6084 6123 5700 6765 7885 ...
## $ temperature1_square: num 8.29 29.05 143.52 149.08 133.4 ...
## $ temperature2_square: num 5.24 27.56 132.25 156.25 139.24 ...
## $ vent_square : num 1.9 46.8 959.8 935.1 696.4 ...
## $ vent_sqrt : num 1.17 2.62 5.57 5.53 5.14 ...
summary(data)
## saison mois jour_mois jour_semaine horaire jour_travail
## 1:444 3 :155 1 : 60 1:259 1:362 1: 573
## 2:460 5 :155 2 : 60 2:260 2:363 2:1244
## 3:468 7 :155 3 : 60 3:258 3:364
## 4:445 10 :155 4 : 60 4:259 4:365
## 12 :155 5 : 60 5:257 5:363
## 8 :153 6 : 60 6:260
## (Other):889 (Other):1457 7:264
## vacances meteo velos humidite vent
## 1:1767 1:1192 Min. : 7.0 Min. : 0.00 Min. : 0.00
## 2: 50 2: 464 1st Qu.: 275.0 1st Qu.: 48.40 1st Qu.:12.47
## 3: 161 Median : 619.0 Median : 62.75 Median :20.00
## Mean : 684.2 Mean : 62.94 Mean :21.23
## 3rd Qu.:1041.0 3rd Qu.: 78.00 3rd Qu.:28.28
## Max. :2036.0 Max. :100.00 Max. :68.66
##
## temperature1 temperature2 humidite_sqrt humidite_square
## Min. :-6.59 Min. :-14.50 Min. : 0.000 Min. : 0
## 1st Qu.: 7.51 1st Qu.: 5.25 1st Qu.: 6.957 1st Qu.: 2343
## Median :15.31 Median : 15.80 Median : 7.921 Median : 3938
## Mean :15.24 Mean : 15.25 Mean : 7.828 Mean : 4323
## 3rd Qu.:23.15 3rd Qu.: 25.00 3rd Qu.: 8.832 3rd Qu.: 6084
## Max. :36.65 Max. : 47.25 Max. :10.000 Max. :10000
##
## temperature1_square temperature2_square vent_square vent_sqrt
## Min. : 0.0001 Min. : 0.00 Min. : 0.0 Min. :0.000
## 1st Qu.: 56.4001 1st Qu.: 39.06 1st Qu.: 155.5 1st Qu.:3.531
## Median : 234.3961 Median : 249.64 Median : 400.0 Median :4.472
## Mean : 319.6898 Mean : 369.70 Mean : 585.5 Mean :4.400
## 3rd Qu.: 535.9225 3rd Qu.: 625.00 3rd Qu.: 799.8 3rd Qu.:5.318
## Max. :1343.2225 Max. :2232.56 Max. :4714.2 Max. :8.286
##
Study of the Quantitative Variables
Distribution of Variables
plot_velos <- ggplot(data, aes(x = velos)) +
geom_histogram(bins = 25, fill = "blue") +
labs(title = "Distribution du nombre de vélos loués", x = "Nombre de locations de vélos")
plot_temperature1 <- ggplot(data, aes(x = temperature1)) +
geom_histogram(bins = 30, fill = "green") +
labs(title = "Distribution de la température1", x = "Température moyenne mesuré (°C)")
plot_temperature2 <- ggplot(data, aes(x = temperature2)) +
geom_histogram(bins = 30, fill = "green") +
labs(title = "Distribution de la température2", x = "Température moyenne ressentie (°C)")
plot_humidite <- ggplot(data, aes(x = humidite)) +
geom_histogram(bins = 30, fill = "green") +
labs(title = "Distribution de la humidité", x = "Pourcentage d'humidité")
plot_vent <- ggplot(data, aes(x = vent)) +
geom_histogram(bins = 30, fill = "green") +
labs(title = "Distribution de la vent", x = "Vitesse du vent (Km/h)")
grid.arrange(plot_velos, plot_temperature1, plot_temperature2, plot_humidite, plot_vent, ncol = 3)

Correlation between Quantitatives Variables
#detach(package:arm) # If error, uncomment this line due to duplicate function 'corrplot'
corr_matrix <- cor(data[, sapply(data, is.numeric)])
corrplot(corr_matrix, type = "lower", tl.col = "black", tl.srt = 45)

pairs(data[, sapply(data, is.numeric)])

cor.test(data$temperature1, data$temperature2)
##
## Pearson's product-moment correlation
##
## data: data$temperature1 and data$temperature2
## t = 384.61, df = 1815, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9933370 0.9944541
## sample estimates:
## cor
## 0.993921
Study of the Qualitative Variables
plot_saison <- ggplot(data, aes(x = saison, y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Nombre de vélos par saison", x = "Saison", y = "Nombre de vélos loués")
plot_jour_semaine <- ggplot(data, aes(x = jour_semaine, y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Nombre de vélos par jour de la semaine", x = "Jour de la semaine", y = "Nombre de vélos loués")
plot_mois <- ggplot(data, aes(x = mois, y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Nombre de vélos par mois de l'année", x = "Mois de l'année", y = "Nombre de vélos loués")
plot_jour_mois <- ggplot(data, aes(x = jour_mois, y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Nombre de vélos par jour du mois", x = "Jour du mois", y = "Nombre de vélos loués")
plot_horaire <- ggplot(data, aes(x = horaire, y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Nombre de vélos par horaire de la journée", x = "Horaire de la journée", y = "Nombre de vélos loués")
plot_jour_travail <- ggplot(data, aes(x = jour_travail, y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Nombre de vélos par jour travaillé", x = "Jour travaillé", y = "Nombre de vélos loués")
plot_vacances <- ggplot(data, aes(x = vacances, y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Nombre de vélos par vacances", x = "Vacances", y = "Nombre de vélos loués")
plot_meteo <- ggplot(data, aes(x = meteo, y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Nombre de vélos par météo", x = "Météo", y = "Nombre de vélos loués")
grid.arrange(plot_horaire, plot_jour_semaine, plot_jour_mois, plot_mois, plot_saison, plot_jour_travail, plot_vacances, plot_meteo, ncol = 3)

chisq.test(data$mois, data$saison)
##
## Pearson's Chi-squared test
##
## data: data$mois and data$saison
## X-squared = 4380.8, df = 33, p-value < 2.2e-16
chisq.test(data$meteo, data$saison)
##
## Pearson's Chi-squared test
##
## data: data$meteo and data$saison
## X-squared = 20.78, df = 6, p-value = 0.002009
chisq.test(data$meteo, data$mois)
##
## Pearson's Chi-squared test
##
## data: data$meteo and data$mois
## X-squared = 91.211, df = 22, p-value = 2.13e-10
Outliers Detection
boxplot_velos <- ggplot(data, aes(x = "", y = velos)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de vélos", y = "Nombre de vélos")
boxplot_temperature1 <- ggplot(data, aes(x = "", y = temperature1)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de température1", y = "Température moyenne mesuré (°C)")
boxplot_temperature1_sq <- ggplot(data, aes(x = "", y = temperature1_square)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de température1^2", y = "Température moyenne mesuré (°C^2)")
boxplot_temperature2 <- ggplot(data, aes(x = "", y = temperature2)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de température2", y = "Température moyenne ressentie (°C)")
boxplot_temperature2_sq <- ggplot(data, aes(x = "", y = temperature2_square)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de température2^2", y = "Température moyenne ressentie (°C^2)")
boxplot_humidite <- ggplot(data, aes(x = "", y = humidite)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de humidité", y = "Pourcentage d'humidité")
boxplot_humidite_sqrt <- ggplot(data, aes(x = "", y = humidite_sqrt)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de sqrt(humidité)", y = "Pourcentage d'humidité (sqrt(%))")
boxplot_humidite_square <- ggplot(data, aes(x = "", y = humidite_square)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de humidité^2", y = "Pourcentage d'humidité (%^2)")
boxplot_vent <- ggplot(data, aes(x = "", y = vent)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de vent", y = "Vitesse du vent (Km/h)")
boxplot_vent_sqrt <- ggplot(data, aes(x = "", y = vent_sqrt)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de sqrt(vent)", y = "Vitesse du vent sqrt(Km/h)")
boxplot_vent_square <- ggplot(data, aes(x = "", y = vent_square)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Boxplot de vent^2", y = "Vitesse du vent (Km/h)^2")
grid.arrange(boxplot_velos, boxplot_temperature1, boxplot_temperature1_sq, boxplot_temperature2, boxplot_temperature2_sq, boxplot_humidite, boxplot_humidite_sqrt, boxplot_humidite_square, boxplot_vent, boxplot_vent_sqrt, boxplot_vent_square, ncol = 4)

length_data <- nrow(data)
numeric_vars <- sapply(data, is.numeric)
total_outliers <- 0
for (var in names(data)[numeric_vars]) {
data_outlier <- data[[var]]
Q1 <- quantile(data_outlier, 0.25)
Q3 <- quantile(data_outlier, 0.75)
IQR <- Q3 - Q1
lower_limit <- Q1 - 1.5 * IQR
upper_limit <- Q3 + 1.5 * IQR
outliers <- data[data_outlier < lower_limit | data_outlier > upper_limit,]
cat("Number of outliers for the variable", var, ":", nrow(outliers), "\n")
data <- data[data_outlier >= lower_limit & data_outlier <= upper_limit,]
}
## Number of outliers for the variable velos : 0
## Number of outliers for the variable humidite : 5
## Number of outliers for the variable vent : 24
## Number of outliers for the variable temperature1 : 0
## Number of outliers for the variable temperature2 : 0
## Number of outliers for the variable humidite_sqrt : 1
## Number of outliers for the variable humidite_square : 0
## Number of outliers for the variable temperature1_square : 4
## Number of outliers for the variable temperature2_square : 11
## Number of outliers for the variable vent_square : 80
## Number of outliers for the variable vent_sqrt : 27
cat("Number of outliers removed :", length_data - nrow(data), "\n")
## Number of outliers removed : 152
cat("Data length after removing outliers :", nrow(data), "\n")
## Data length after removing outliers : 1665
Model Creation and Comparison
Data Split
set.seed(123)
data_split <- rsample::initial_split(data, prop = 0.8)
data_train <- rsample::training(data_split)
data_test <- rsample::testing(data_split)
Choice of the Distribution vélos
model_poisson <- glm(velos ~ ., family = poisson, data = data_train)
model_nb <- glm.nb(velos ~ ., data = data_train)
model_gaussian <- glm(velos ~ ., family = gaussian, data = data_train)
t(AIC(model_poisson, model_nb, model_gaussian)[2])
## model_poisson model_nb model_gaussian
## AIC 82988.75 17532.5 18420.81
dispersiontest(model_poisson)
##
## Overdispersion test
##
## data: model_poisson
## z = 20.793, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 54.85188
mean_velos <- mean(data_train$velos)
var_velos <- var(data_train$velos)
cat("Mean :", mean_velos, "Variance :", var_velos, "\n")
## Mean : 693.4317 Variance : 214261.1
Model Selection
model_full_quantitative <- glm.nb(
velos ~ vent +
vent_square +
vent_sqrt +
humidite +
humidite_square +
humidite_sqrt +
temperature1_square +
temperature1 +
temperature2 +
temperature2_square
, data = data_train)
summary(model_full_quantitative)
##
## Call:
## glm.nb(formula = velos ~ vent + vent_square + vent_sqrt + humidite +
## humidite_square + humidite_sqrt + temperature1_square + temperature1 +
## temperature2 + temperature2_square, data = data_train, init.theta = 2.91504998,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.2125765 2.2378964 3.670 0.000243 ***
## vent 0.0281498 0.0499554 0.563 0.573096
## vent_square -0.0002027 0.0004951 -0.409 0.682254
## vent_sqrt -0.1673324 0.2546295 -0.657 0.511078
## humidite 0.1061905 0.0816427 1.301 0.193370
## humidite_square -0.0004965 0.0002360 -2.104 0.035388 *
## humidite_sqrt -0.8882801 0.8081472 -1.099 0.271700
## temperature1_square -0.0010461 0.0009065 -1.154 0.248518
## temperature1 0.0702146 0.0379386 1.851 0.064206 .
## temperature2 0.0260605 0.0252172 1.033 0.301397
## temperature2_square -0.0006738 0.0005534 -1.218 0.223397
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(2.915) family taken to be 1)
##
## Null deviance: 2358.2 on 1331 degrees of freedom
## Residual deviance: 1408.9 on 1321 degrees of freedom
## AIC: 19155
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 2.915
## Std. Err.: 0.108
##
## 2 x log-likelihood: -19131.246
anova(model_full_quantitative)
## Warning in anova.negbin(model_full_quantitative): tests made without
## re-estimating 'theta'
## Analysis of Deviance Table
##
## Model: Negative Binomial(2.915), link: log
##
## Response: velos
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1331 2358.2
## vent 1 8.84 1330 2349.3 0.002946 **
## vent_square 1 4.04 1329 2345.3 0.044442 *
## vent_sqrt 1 0.33 1328 2345.0 0.568601
## humidite 1 183.86 1327 2161.1 < 2.2e-16 ***
## humidite_square 1 48.09 1326 2113.0 4.067e-12 ***
## humidite_sqrt 1 5.51 1325 2107.5 0.018912 *
## temperature1_square 1 431.26 1324 1676.2 < 2.2e-16 ***
## temperature1 1 265.92 1323 1410.3 < 2.2e-16 ***
## temperature2 1 0.02 1322 1410.3 0.898079
## temperature2_square 1 1.44 1321 1408.9 0.229603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_quantitative_quali <- glm.nb(
velos ~
vent +
vent_square +
vent_sqrt +
humidite +
humidite_square +
humidite_sqrt +
temperature1 +
temperature1_square +
horaire +
saison +
meteo +
mois +
vacances +
jour_travail +
jour_semaine +
jour_mois
, data = data_train)
summary(model_quantitative_quali)
##
## Call:
## glm.nb(formula = velos ~ vent + vent_square + vent_sqrt + humidite +
## humidite_square + humidite_sqrt + temperature1 + temperature1_square +
## horaire + saison + meteo + mois + vacances + jour_travail +
## jour_semaine + jour_mois, data = data_train, init.theta = 10.11407919,
## link = log)
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.7578159 1.2735469 4.521 6.15e-06 ***
## vent 0.0065391 0.0277120 0.236 0.813457
## vent_square 0.0000256 0.0002747 0.093 0.925750
## vent_sqrt -0.0872814 0.1411176 -0.619 0.536245
## humidite 0.0718573 0.0465415 1.544 0.122603
## humidite_square -0.0003074 0.0001352 -2.273 0.023026 *
## humidite_sqrt -0.5879792 0.4597808 -1.279 0.200959
## temperature1 0.0538688 0.0050283 10.713 < 2e-16 ***
## temperature1_square -0.0009887 0.0001523 -6.490 8.59e-11 ***
## horaire2 1.4014996 0.0282339 49.639 < 2e-16 ***
## horaire3 1.2441366 0.0342452 36.330 < 2e-16 ***
## horaire4 1.6669509 0.0340096 49.014 < 2e-16 ***
## horaire5 1.3009922 0.0289632 44.919 < 2e-16 ***
## saison2 0.2113832 0.0569855 3.709 0.000208 ***
## saison3 0.3238081 0.0647808 4.999 5.78e-07 ***
## saison4 0.5120136 0.0557309 9.187 < 2e-16 ***
## meteo2 -0.0388243 0.0227839 -1.704 0.088376 .
## meteo3 -0.4183265 0.0423535 -9.877 < 2e-16 ***
## mois2 0.1673029 0.0489754 3.416 0.000635 ***
## mois3 0.1690126 0.0550596 3.070 0.002143 **
## mois4 0.2656971 0.0844978 3.144 0.001664 **
## mois5 0.5093488 0.0894762 5.693 1.25e-08 ***
## mois6 0.4535267 0.0938529 4.832 1.35e-06 ***
## mois7 0.3494709 0.1036055 3.373 0.000743 ***
## mois8 0.3217628 0.1007131 3.195 0.001399 **
## mois9 0.3638295 0.0911294 3.992 6.54e-05 ***
## mois10 0.2067528 0.0832300 2.484 0.012987 *
## mois11 0.1452113 0.0794983 1.827 0.067760 .
## mois12 0.1929111 0.0622809 3.097 0.001952 **
## vacances2 -0.1803626 0.0653453 -2.760 0.005778 **
## jour_travail2 0.0234146 0.0328769 0.712 0.476347
## jour_semaine2 -0.0435844 0.0340130 -1.281 0.200052
## jour_semaine3 -0.0365816 0.0336789 -1.086 0.277397
## jour_semaine4 -0.0390571 0.0331463 -1.178 0.238668
## jour_semaine5 -0.0133017 0.0336982 -0.395 0.693043
## jour_semaine6 NA NA NA NA
## jour_semaine7 0.0302082 0.0329428 0.917 0.359148
## jour_mois2 0.0413982 0.0679635 0.609 0.542442
## jour_mois3 0.0756509 0.0680085 1.112 0.265978
## jour_mois4 0.1520220 0.0659987 2.303 0.021256 *
## jour_mois5 0.1174765 0.0679582 1.729 0.083870 .
## jour_mois6 0.0194055 0.0678443 0.286 0.774855
## jour_mois7 0.0515925 0.0695970 0.741 0.458510
## jour_mois8 0.0088355 0.0679031 0.130 0.896472
## jour_mois9 0.0763066 0.0669394 1.140 0.254314
## jour_mois10 0.1225251 0.0661751 1.852 0.064094 .
## jour_mois11 0.1082326 0.0713126 1.518 0.129085
## jour_mois12 0.0648527 0.0675506 0.960 0.337024
## jour_mois13 0.0099991 0.0692432 0.144 0.885181
## jour_mois14 0.0894572 0.0685495 1.305 0.191892
## jour_mois15 0.0621761 0.0702483 0.885 0.376108
## jour_mois16 0.0553908 0.0653521 0.848 0.396674
## jour_mois17 0.1266634 0.0669505 1.892 0.058505 .
## jour_mois18 0.0767661 0.0652818 1.176 0.239628
## jour_mois19 0.0961955 0.0675449 1.424 0.154397
## jour_mois20 0.1152297 0.0705546 1.633 0.102427
## jour_mois21 0.1368864 0.0705549 1.940 0.052362 .
## jour_mois22 0.0723416 0.0677257 1.068 0.285450
## jour_mois23 0.0672894 0.0678375 0.992 0.321236
## jour_mois24 -0.0371423 0.0678872 -0.547 0.584298
## jour_mois25 0.0099320 0.0686310 0.145 0.884935
## jour_mois26 -0.0009005 0.0681434 -0.013 0.989456
## jour_mois27 -0.0409743 0.0713894 -0.574 0.565999
## jour_mois28 0.0344889 0.0671148 0.514 0.607336
## jour_mois29 0.0385154 0.0673973 0.571 0.567683
## jour_mois30 0.1637766 0.0670492 2.443 0.014580 *
## jour_mois31 0.1191179 0.0777575 1.532 0.125544
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(10.1141) family taken to be 1)
##
## Null deviance: 8029.0 on 1331 degrees of freedom
## Residual deviance: 1356.7 on 1266 degrees of freedom
## AIC: 17531
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 10.114
## Std. Err.: 0.398
##
## 2 x log-likelihood: -17396.536
anova(model_quantitative_quali)
## Warning in anova.negbin(model_quantitative_quali): tests made without
## re-estimating 'theta'
## Analysis of Deviance Table
##
## Model: Negative Binomial(10.1141), link: log
##
## Response: velos
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1331 8029.0
## vent 1 30.35 1330 7998.6 3.609e-08 ***
## vent_square 1 13.88 1329 7984.8 0.0001951 ***
## vent_sqrt 1 1.12 1328 7983.6 0.2905323
## humidite 1 630.37 1327 7353.3 < 2.2e-16 ***
## humidite_square 1 164.51 1326 7188.8 < 2.2e-16 ***
## humidite_sqrt 1 18.83 1325 7169.9 1.426e-05 ***
## temperature1 1 2047.02 1324 5122.9 < 2.2e-16 ***
## temperature1_square 1 340.16 1323 4782.7 < 2.2e-16 ***
## horaire 4 2909.21 1319 1873.5 < 2.2e-16 ***
## saison 3 285.86 1316 1587.7 < 2.2e-16 ***
## meteo 2 114.28 1314 1473.4 < 2.2e-16 ***
## mois 11 66.77 1303 1406.6 4.995e-10 ***
## vacances 1 10.42 1302 1396.2 0.0012449 **
## jour_travail 1 1.30 1301 1394.9 0.2541936
## jour_semaine 5 4.18 1296 1390.7 0.5231791
## jour_mois 30 34.01 1266 1356.7 0.2803690
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_quanti_quali_final <- glm.nb(
velos ~
vent +
vent_square + #
humidite +
humidite_square +
humidite_sqrt + #
temperature1 +
temperature1_square +
horaire +
saison +
meteo +
mois +
vacances
, data = data_train)
summary(model_quanti_quali_final)
##
## Call:
## glm.nb(formula = velos ~ vent + vent_square + humidite + humidite_square +
## humidite_sqrt + temperature1 + temperature1_square + horaire +
## saison + meteo + mois + vacances, data = data_train, init.theta = 9.822854355,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.373e+00 1.248e+00 4.304 1.67e-05 ***
## vent -9.504e-03 4.036e-03 -2.355 0.018529 *
## vent_square 1.630e-04 9.481e-05 1.719 0.085660 .
## humidite 5.968e-02 4.615e-02 1.293 0.195913
## humidite_square -2.771e-04 1.341e-04 -2.067 0.038757 *
## humidite_sqrt -4.645e-01 4.559e-01 -1.019 0.308254
## temperature1 5.358e-02 4.882e-03 10.976 < 2e-16 ***
## temperature1_square -1.033e-03 1.469e-04 -7.034 2.00e-12 ***
## horaire2 1.398e+00 2.847e-02 49.083 < 2e-16 ***
## horaire3 1.248e+00 3.442e-02 36.244 < 2e-16 ***
## horaire4 1.668e+00 3.414e-02 48.867 < 2e-16 ***
## horaire5 1.298e+00 2.919e-02 44.454 < 2e-16 ***
## saison2 2.114e-01 5.714e-02 3.700 0.000216 ***
## saison3 3.146e-01 6.485e-02 4.851 1.23e-06 ***
## saison4 4.924e-01 5.589e-02 8.810 < 2e-16 ***
## meteo2 -3.463e-02 2.274e-02 -1.523 0.127822
## meteo3 -4.184e-01 4.210e-02 -9.938 < 2e-16 ***
## mois2 1.657e-01 4.924e-02 3.366 0.000762 ***
## mois3 1.718e-01 5.498e-02 3.125 0.001777 **
## mois4 2.794e-01 8.413e-02 3.321 0.000898 ***
## mois5 5.298e-01 8.927e-02 5.934 2.95e-09 ***
## mois6 4.858e-01 9.293e-02 5.227 1.72e-07 ***
## mois7 3.945e-01 1.026e-01 3.846 0.000120 ***
## mois8 3.642e-01 9.993e-02 3.644 0.000268 ***
## mois9 4.037e-01 9.013e-02 4.479 7.49e-06 ***
## mois10 2.384e-01 8.283e-02 2.878 0.004005 **
## mois11 1.710e-01 7.934e-02 2.155 0.031136 *
## mois12 2.175e-01 6.227e-02 3.493 0.000478 ***
## vacances2 -1.746e-01 5.379e-02 -3.246 0.001170 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(9.8229) family taken to be 1)
##
## Null deviance: 7803.6 on 1331 degrees of freedom
## Residual deviance: 1357.7 on 1303 degrees of freedom
## AIC: 17496
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 9.823
## Std. Err.: 0.386
##
## 2 x log-likelihood: -17436.000
model_0 <- glm.nb(velos ~ 1, data = data_train)
model_forward <- stepAIC(
model_0,
vélos ~ vent +
humidite +
humidite_square +
temperature1 +
temperature1_square +
horaire +
saison +
meteo +
mois +
vacances,
data = data_train,
trace = FALSE,
direction = "forward"
)
model_backward <- stepAIC(
model_quanti_quali_final,
~1,
trace = FALSE,
direction = "backward"
)
model_both <- stepAIC(
model_0,
vélos ~ vent +
humidite +
humidite_square +
temperature1 +
temperature1_square +
horaire +
saison +
meteo +
mois +
vacances,
data = data_train,
trace = FALSE,
direction = "both"
)
AIC(model_forward, model_both, model_backward)
## df AIC
## model_forward 28 17496.08
## model_both 28 17496.08
## model_backward 29 17495.00
summary(model_forward)
##
## Call:
## glm.nb(formula = velos ~ horaire + mois + meteo + temperature1 +
## saison + humidite_square + temperature1_square + humidite +
## vacances + vent, data = data_train, init.theta = 9.793225092,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.073e+00 1.134e-01 35.913 < 2e-16 ***
## horaire2 1.397e+00 2.851e-02 49.005 < 2e-16 ***
## horaire3 1.247e+00 3.442e-02 36.232 < 2e-16 ***
## horaire4 1.668e+00 3.419e-02 48.794 < 2e-16 ***
## horaire5 1.296e+00 2.921e-02 44.351 < 2e-16 ***
## mois2 1.675e-01 4.927e-02 3.399 0.000676 ***
## mois3 1.740e-01 5.504e-02 3.161 0.001573 **
## mois4 2.850e-01 8.407e-02 3.390 0.000700 ***
## mois5 5.331e-01 8.929e-02 5.971 2.36e-09 ***
## mois6 4.899e-01 9.302e-02 5.266 1.39e-07 ***
## mois7 4.001e-01 1.027e-01 3.896 9.77e-05 ***
## mois8 3.725e-01 9.997e-02 3.726 0.000195 ***
## mois9 4.072e-01 9.024e-02 4.512 6.42e-06 ***
## mois10 2.420e-01 8.290e-02 2.919 0.003510 **
## mois11 1.733e-01 7.945e-02 2.181 0.029176 *
## mois12 2.213e-01 6.234e-02 3.550 0.000385 ***
## meteo2 -3.797e-02 2.272e-02 -1.672 0.094590 .
## meteo3 -4.297e-01 4.158e-02 -10.334 < 2e-16 ***
## temperature1 5.379e-02 4.887e-03 11.007 < 2e-16 ***
## saison2 2.070e-01 5.679e-02 3.644 0.000268 ***
## saison3 3.105e-01 6.468e-02 4.801 1.58e-06 ***
## saison4 4.910e-01 5.597e-02 8.773 < 2e-16 ***
## humidite_square -1.372e-04 2.661e-05 -5.157 2.51e-07 ***
## temperature1_square -1.045e-03 1.468e-04 -7.113 1.13e-12 ***
## humidite 1.206e-02 3.387e-03 3.560 0.000371 ***
## vacances2 -1.718e-01 5.383e-02 -3.192 0.001414 **
## vent -2.818e-03 1.028e-03 -2.741 0.006118 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(9.7932) family taken to be 1)
##
## Null deviance: 7780.7 on 1331 degrees of freedom
## Residual deviance: 1357.8 on 1305 degrees of freedom
## AIC: 17496
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 9.793
## Std. Err.: 0.385
##
## 2 x log-likelihood: -17440.082
model_final_without_interaction <- glm.nb(
velos ~ horaire +
mois +
meteo +
temperature1 +
saison +
temperature1_square +
humidite_square +
vacances +
humidite +
vent
, data = data_train)
summary(model_final_without_interaction)
##
## Call:
## glm.nb(formula = velos ~ horaire + mois + meteo + temperature1 +
## saison + temperature1_square + humidite_square + vacances +
## humidite + vent, data = data_train, init.theta = 9.793225092,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.073e+00 1.134e-01 35.913 < 2e-16 ***
## horaire2 1.397e+00 2.851e-02 49.005 < 2e-16 ***
## horaire3 1.247e+00 3.442e-02 36.232 < 2e-16 ***
## horaire4 1.668e+00 3.419e-02 48.794 < 2e-16 ***
## horaire5 1.296e+00 2.921e-02 44.351 < 2e-16 ***
## mois2 1.675e-01 4.927e-02 3.399 0.000676 ***
## mois3 1.740e-01 5.504e-02 3.161 0.001573 **
## mois4 2.850e-01 8.407e-02 3.390 0.000700 ***
## mois5 5.331e-01 8.929e-02 5.971 2.36e-09 ***
## mois6 4.899e-01 9.302e-02 5.266 1.39e-07 ***
## mois7 4.001e-01 1.027e-01 3.896 9.77e-05 ***
## mois8 3.725e-01 9.997e-02 3.726 0.000195 ***
## mois9 4.072e-01 9.024e-02 4.512 6.42e-06 ***
## mois10 2.420e-01 8.290e-02 2.919 0.003510 **
## mois11 1.733e-01 7.945e-02 2.181 0.029176 *
## mois12 2.213e-01 6.234e-02 3.550 0.000385 ***
## meteo2 -3.797e-02 2.272e-02 -1.672 0.094590 .
## meteo3 -4.297e-01 4.158e-02 -10.334 < 2e-16 ***
## temperature1 5.379e-02 4.887e-03 11.007 < 2e-16 ***
## saison2 2.070e-01 5.679e-02 3.644 0.000268 ***
## saison3 3.105e-01 6.468e-02 4.801 1.58e-06 ***
## saison4 4.910e-01 5.597e-02 8.773 < 2e-16 ***
## temperature1_square -1.045e-03 1.468e-04 -7.113 1.13e-12 ***
## humidite_square -1.372e-04 2.661e-05 -5.157 2.51e-07 ***
## vacances2 -1.718e-01 5.383e-02 -3.192 0.001414 **
## humidite 1.206e-02 3.387e-03 3.560 0.000371 ***
## vent -2.818e-03 1.028e-03 -2.741 0.006118 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(9.7932) family taken to be 1)
##
## Null deviance: 7780.7 on 1331 degrees of freedom
## Residual deviance: 1357.8 on 1305 degrees of freedom
## AIC: 17496
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 9.793
## Std. Err.: 0.385
##
## 2 x log-likelihood: -17440.082
Final Model
Choice and Validation of the Model
model_final <- glm.nb(
velos ~ horaire +
mois +
meteo +
temperature1 +
saison +
temperature1_square +
humidite_square +
vacances +
humidite +
vent +
horaire:temperature1 +
temperature1_square:mois
, data = data_train
)
summary(model_final)
##
## Call:
## glm.nb(formula = velos ~ horaire + mois + meteo + temperature1 +
## saison + temperature1_square + humidite_square + vacances +
## humidite + vent + horaire:temperature1 + temperature1_square:mois,
## data = data_train, init.theta = 10.64651881, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.1688082 0.1195673 34.866 < 2e-16 ***
## horaire2 1.6136959 0.0494090 32.660 < 2e-16 ***
## horaire3 1.3248624 0.0622321 21.289 < 2e-16 ***
## horaire4 1.6124217 0.0611669 26.361 < 2e-16 ***
## horaire5 1.1846038 0.0526976 22.479 < 2e-16 ***
## mois2 0.1166036 0.0554402 2.103 0.035446 *
## mois3 0.0947222 0.0664671 1.425 0.154128
## mois4 0.0604694 0.0986704 0.613 0.539980
## mois5 0.5272220 0.1174380 4.489 7.14e-06 ***
## mois6 0.6533283 0.1542792 4.235 2.29e-05 ***
## mois7 0.7377272 0.1852322 3.983 6.81e-05 ***
## mois8 0.4846330 0.1807033 2.682 0.007320 **
## mois9 0.4051305 0.1386855 2.921 0.003487 **
## mois10 0.1218501 0.1020733 1.194 0.232575
## mois11 0.2258393 0.0942154 2.397 0.016528 *
## mois12 0.1426387 0.0728088 1.959 0.050103 .
## meteo2 -0.0120673 0.0220598 -0.547 0.584360
## meteo3 -0.4136117 0.0404725 -10.220 < 2e-16 ***
## temperature1 0.0509424 0.0074086 6.876 6.15e-12 ***
## saison2 0.2240149 0.0552593 4.054 5.04e-05 ***
## saison3 0.3385659 0.0625326 5.414 6.16e-08 ***
## saison4 0.5050795 0.0538878 9.373 < 2e-16 ***
## temperature1_square -0.0036795 0.0012369 -2.975 0.002931 **
## humidite_square -0.0001265 0.0000264 -4.792 1.65e-06 ***
## vacances2 -0.1925533 0.0524129 -3.674 0.000239 ***
## humidite 0.0101473 0.0033687 3.012 0.002593 **
## vent -0.0033359 0.0010029 -3.326 0.000880 ***
## horaire2:temperature1 -0.0157180 0.0031003 -5.070 3.98e-07 ***
## horaire3:temperature1 -0.0047919 0.0035916 -1.334 0.182146
## horaire4:temperature1 0.0028109 0.0035586 0.790 0.429588
## horaire5:temperature1 0.0067708 0.0031848 2.126 0.033508 *
## mois2:temperature1_square 0.0027785 0.0011641 2.387 0.016998 *
## mois3:temperature1_square 0.0032223 0.0011660 2.764 0.005716 **
## mois4:temperature1_square 0.0036563 0.0011573 3.159 0.001581 **
## mois5:temperature1_square 0.0027471 0.0011715 2.345 0.019033 *
## mois6:temperature1_square 0.0024540 0.0011820 2.076 0.037883 *
## mois7:temperature1_square 0.0022636 0.0011880 1.905 0.056727 .
## mois8:temperature1_square 0.0025486 0.0011881 2.145 0.031948 *
## mois9:temperature1_square 0.0027241 0.0011786 2.311 0.020818 *
## mois10:temperature1_square 0.0032170 0.0011638 2.764 0.005706 **
## mois11:temperature1_square 0.0022141 0.0011684 1.895 0.058106 .
## mois12:temperature1_square 0.0034507 0.0012066 2.860 0.004237 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(10.6465) family taken to be 1)
##
## Null deviance: 8440.1 on 1331 degrees of freedom
## Residual deviance: 1357.4 on 1290 degrees of freedom
## AIC: 17416
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 10.647
## Std. Err.: 0.420
##
## 2 x log-likelihood: -17329.890
anova(model_final, test = "Chisq")
## Warning in anova.negbin(model_final, test = "Chisq"): tests made without
## re-estimating 'theta'
## Analysis of Deviance Table
##
## Model: Negative Binomial(10.6465), link: log
##
## Response: velos
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1331 8440.1
## horaire 4 4252.9 1327 4187.3 < 2.2e-16 ***
## mois 11 2126.6 1316 2060.7 < 2.2e-16 ***
## meteo 2 306.4 1314 1754.2 < 2.2e-16 ***
## temperature1 1 78.8 1313 1675.4 < 2.2e-16 ***
## saison 3 72.3 1310 1603.2 1.385e-15 ***
## temperature1_square 1 40.2 1309 1563.0 2.294e-10 ***
## humidite_square 1 60.0 1308 1503.0 9.541e-15 ***
## vacances 1 9.2 1307 1493.8 0.0024119 **
## humidite 1 13.4 1306 1480.4 0.0002541 ***
## vent 1 8.1 1305 1472.3 0.0043736 **
## horaire:temperature1 4 71.0 1301 1401.2 1.371e-14 ***
## mois:temperature1_square 11 43.8 1290 1357.4 7.908e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(model_final, model_final_without_interaction)
## Likelihood ratio test
##
## Model 1: velos ~ horaire + mois + meteo + temperature1 + saison + temperature1_square +
## humidite_square + vacances + humidite + vent + horaire:temperature1 +
## temperature1_square:mois
## Model 2: velos ~ horaire + mois + meteo + temperature1 + saison + temperature1_square +
## humidite_square + vacances + humidite + vent
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 43 -8664.9
## 2 28 -8720.0 -15 110.19 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_ratio <- sum(residuals(model_final, type = "pearson")^2) / df.residual(model_final)
print(paste("Dispersion Ratio:", round(dispersion_ratio, 2)))
## [1] "Dispersion Ratio: 0.97"
hist(residuals(model_final, type = "deviance"), breaks = 30, freq = FALSE, col = "lightblue", main = "Histogram of Deviance Residuals")
x <- seq(-5, 5, length = 100)
curve(dnorm(x), col = "darkblue", lwd = 2, add = TRUE)

autoplot(model_final, 1:4)

library(arm)
## Loading required package: Matrix
## Loading required package: lme4
##
## arm (Version 1.14-4, built: 2024-4-1)
## Working directory is /Users/arthurdanjou/Workspace/studies/M1/General Linear Models/Projet
##
## Attaching package: 'arm'
## The following object is masked from 'package:corrplot':
##
## corrplot
## The following object is masked from 'package:car':
##
## logit
binnedplot(fitted(model_final), residuals(model_final, type = "deviance"), col.int = "blue", col.pts = 2)

Performance and Limitations of the Final Model
Predictions and Mean Square Error
data_test$predictions <- predict(model_final, newdata = data_test, type = "response")
data_train$predictions <- predict(model_final, newdata = data_train, type = "response")
predictions_ci <- predict(
model_final,
newdata = data_test,
type = "link",
se.fit = TRUE
)
data_test$lwr <- exp(predictions_ci$fit - 1.96 * predictions_ci$se.fit)
data_test$upr <- exp(predictions_ci$fit + 1.96 * predictions_ci$se.fit)
MSE <- mean((data_test$velos - data_test$predictions)^2)
cat("MSE :", MSE, "\n")
## MSE : 43331
cat("RMSE train :", sqrt(mean((data_train$velos - data_train$predictions)^2)), "\n")
## RMSE train : 195.0359
cat('RMSE :', sqrt(MSE), '\n')
## RMSE : 208.161
Evaluation of Model Performance
bounds <- c(200, 650)
cat("Observations vs Prédictions\n")
## Observations vs Prédictions
cat(nrow(data_test[data_test$velos < bounds[1],]), "vs", sum(data_test$predictions < bounds[1]), "\n")
## 50 vs 39
cat(nrow(data_test[data_test$velos > bounds[1] & data_test$velos < bounds[2],]), "vs", sum(data_test$predictions > bounds[1] & data_test$predictions < bounds[2]), "\n")
## 124 vs 128
cat(nrow(data_test[data_test$velos > bounds[2],]), "vs", sum(data_test$predictions > bounds[2]), "\n")
## 159 vs 166
cat('\n')
categories <- c(0, bounds, Inf)
categories_label <- c("Low", "Mid", "High")
data_test$velos_cat <- cut(data_test$velos,
breaks = categories,
labels = categories_label,
include.lowest = TRUE)
data_test$predictions_cat <- cut(data_test$predictions,
breaks = categories,
labels = categories_label,
include.lowest = TRUE)
conf_matrix <- confusionMatrix(data_test$velos_cat, data_test$predictions_cat)
conf_matrix
## Confusion Matrix and Statistics
##
## Reference
## Prediction Low Mid High
## Low 36 14 0
## Mid 3 95 26
## High 0 19 140
##
## Overall Statistics
##
## Accuracy : 0.8138
## 95% CI : (0.7678, 0.8542)
## No Information Rate : 0.4985
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6903
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Low Class: Mid Class: High
## Sensitivity 0.9231 0.7422 0.8434
## Specificity 0.9524 0.8585 0.8862
## Pos Pred Value 0.7200 0.7661 0.8805
## Neg Pred Value 0.9894 0.8421 0.8506
## Prevalence 0.1171 0.3844 0.4985
## Detection Rate 0.1081 0.2853 0.4204
## Detection Prevalence 0.1502 0.3724 0.4775
## Balanced Accuracy 0.9377 0.8004 0.8648
ggplot(data_test, aes(x = velos, y = predictions)) +
geom_point(color = "blue", alpha = 0.6, size = 2) +
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") +
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, fill = "grey") +
labs(
title = "Comparison of Observed and Predicted Bikes",
x = "Number of Observed Bikes",
y = "Number of Predicted Bikes"
) +
theme_minimal()

ggplot(data_test, aes(x = velos_cat, fill = predictions_cat)) +
geom_bar(position = "dodge") +
labs(title = "Predictions vs Observations (by categories)",
x = "Observed categories", y = "Number of predictions")

df_plot <- data.frame(observed = data_test$velos, predicted = data_test$predictions)
# Créer l'histogramme avec la courbe de densité
ggplot(df_plot, aes(x = observed)) +
geom_histogram(aes(y = ..density..), binwidth = 50, fill = "lightblue", color = "black", alpha = 0.7) + # Histogramme des données observées
geom_density(aes(x = predicted), color = "red", size = 1) + # Courbe de densité des valeurs prédites
labs(title = "Observed distribution vs. Predicted distribution",
x = "Number of vélos",
y = "Density") +
theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
