mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 18:59:59 +01:00
- Updated comments and code formatting in `3-td_ggplot2 - enonce.Rmd` for clarity. - Enhanced code structure in `4-td_graphiques - enonce.Rmd` by organizing options and library calls. - Replaced pipe operator `%>%` with `|>` in `Code_Lec3.Rmd` for consistency with modern R syntax. - Cleaned up commented-out code and ensured consistent spacing in ggplot calls.
118 lines
3.2 KiB
Plaintext
118 lines
3.2 KiB
Plaintext
```{r}
|
|
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2-bis")
|
|
|
|
library(tidyverse)
|
|
library(GGally)
|
|
library(broom)
|
|
library(scales)
|
|
library(car)
|
|
library(qqplotr)
|
|
options(scipen = 999, digits = 5)
|
|
```
|
|
```{r}
|
|
data <- read.csv("data02.csv", sep = ",", header = TRUE, dec = ".")
|
|
data |>
|
|
mutate(type = factor(type, levels = c("maths", "english", "final"), labels = c("maths", "english", "final"))) |>
|
|
ggplot(aes(x = note)) +
|
|
facet_wrap(vars(type), scales = "free_x") +
|
|
geom_histogram(binwidth = 4, color = "black", fill = "grey80") +
|
|
labs(title = "Histogram of notes", x = "Note") +
|
|
theme_bw(14)
|
|
```
|
|
```{r}
|
|
data_wide <- pivot_wider(data, names_from = type, values_from = note)
|
|
data_wide |>
|
|
select(-id) |>
|
|
ggpairs() + theme_bw(14)
|
|
```
|
|
```{r}
|
|
model <- lm(data_wide, formula = final ~ maths + english)
|
|
summary(model)
|
|
```
|
|
|
|
```{r}
|
|
tidy(model, conf.int = TRUE, conf.level = 0.95)
|
|
glance(model)
|
|
|
|
(R2 <- summary(model)$r.squared)
|
|
(R2 / 2) * 57 / (1 - R2)
|
|
vcov(model)
|
|
```
|
|
|
|
# Hypothesis testing
|
|
|
|
```{r}
|
|
C <- c(0, 1, -1)
|
|
beta <- cbind(coef(model))
|
|
(C_beta <- C %*% beta)
|
|
|
|
X <- model.matrix(model)
|
|
(inv_XtX <- solve(t(X) %*% X))
|
|
|
|
q <- 1
|
|
numerator <- t(C_beta) %*%
|
|
solve(t(C) %*% inv_XtX %*% C) %*%
|
|
C_beta
|
|
denominator <- sigma(model)^2
|
|
F <- (numerator / q) / denominator
|
|
F
|
|
|
|
dof <- nrow(data_wide) - 3
|
|
|
|
qf(0.95, q, dof)
|
|
pf(F, q, dof, lower.tail = FALSE)
|
|
|
|
linearHypothesis(model, "maths - english = 0")
|
|
```
|
|
|
|
# Submodel testing
|
|
```{r}
|
|
data_predict <- predict(model, newdata = expand.grid(maths = seq(70, 90, 2), english = c(75, 85)), interval = "confidence") |>
|
|
as_tibble() |>
|
|
bind_cols(expand.grid(maths = seq(70, 90, 2), english = c(75, 85)))
|
|
|
|
data_predict |>
|
|
mutate(english = as.factor(english)) |>
|
|
ggplot(aes(x = maths, y = fit, color = english, fill = english, label = round(fit, 1))) +
|
|
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, show.legend = FALSE) +
|
|
geom_point(size = 2) +
|
|
geom_line(aes(y = fit)) +
|
|
geom_text(vjust = -1, show.legend = FALSE) +
|
|
labs(title = "Prediction of final note", x = "Maths note", y = "Final note", color = "English", fill = "English") +
|
|
theme_bw(14)
|
|
```
|
|
```{r}
|
|
diag_data <- augment(model)
|
|
|
|
ggplot(diag_data, aes(x = .fitted, y = .resid)) +
|
|
geom_point() +
|
|
geom_hline(yintercept = 0) +
|
|
labs(title = "Residuals vs Fitted", x = "Fitted values", y = "Residuals") +
|
|
theme_bw(14)
|
|
```
|
|
```{r}
|
|
ggplot(diag_data, aes(sample = .resid)) +
|
|
stat_qq_band(alpha = 0.2, fill = "blue") +
|
|
stat_qq_line(color = "red") +
|
|
stat_qq_point(size = 1) +
|
|
labs(y = "Sample quantile", x = "Theoritical quantile") +
|
|
theme_minimal(base_size = 14)
|
|
|
|
ggplot(diag_data, aes(x = .resid)) +
|
|
geom_histogram(fill = "dodgerblue", color = "black", bins = 7) +
|
|
labs(y = "Count", x = "Résiduals") +
|
|
scale_y_continuous(expand = expansion(c(0, 0.05))) +
|
|
scale_x_continuous(breaks = pretty_breaks(n = 10)) +
|
|
theme_minimal(base_size = 14)
|
|
|
|
mutate(diag_data, obs = row_number()) |>
|
|
ggplot(aes(x = obs, y = .cooksd)) +
|
|
geom_segment(aes(x = obs, y = 0, xend = obs, yend = .cooksd)) +
|
|
geom_point(color = "blue", size = 1) +
|
|
scale_x_continuous(breaks = seq(0, 60, 10)) +
|
|
labs(y = "Cook's distance", x = "Index") +
|
|
theme_minimal(base_size = 14)
|
|
|
|
influenceIndexPlot(model, vars = "cook", id = list(n = 5), main = NULL)
|
|
```
|