mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 15:54:13 +01:00
- Updated comments and code formatting in `3-td_ggplot2 - enonce.Rmd` for clarity. - Enhanced code structure in `4-td_graphiques - enonce.Rmd` by organizing options and library calls. - Replaced pipe operator `%>%` with `|>` in `Code_Lec3.Rmd` for consistency with modern R syntax. - Cleaned up commented-out code and ensured consistent spacing in ggplot calls.
140 lines
3.1 KiB
Plaintext
140 lines
3.1 KiB
Plaintext
```{r}
|
|
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis")
|
|
|
|
library(tidyverse)
|
|
options(scipen = 999, digits = 5)
|
|
```
|
|
```{r}
|
|
data <- read.csv("data01.csv", header = TRUE, sep = ",", dec = ".")
|
|
head(data, 20)
|
|
```
|
|
|
|
```{r}
|
|
ggplot(data, aes(x = cholesterol)) +
|
|
geom_histogram(binwidth = 5, color = "black", fill = "gray80") +
|
|
labs(x = "Cholesterol", y = "Frequency", title = "Histogram of cholesterol") +
|
|
theme_bw(14)
|
|
|
|
ggplot(data, aes(x = poids)) +
|
|
geom_histogram(binwidth = 2.5, color = "black", fill = "gray80") +
|
|
labs(x = "Poids", y = "Frequency", title = "Histogram of Poids") +
|
|
theme_bw(14)
|
|
|
|
ggplot(aes(y = cholesterol, x = poids), data = data) +
|
|
geom_point() +
|
|
labs(y = "Cholesterol (y)", x = "Poids, kg (x)", title = "Scatter plot of cholesterol and poids") +
|
|
theme_bw(14)
|
|
```
|
|
|
|
# OLSE
|
|
```{r}
|
|
x <- data[, "poids"]
|
|
y <- data[, "cholesterol"]
|
|
Sxy <- sum((x - mean(x)) * y)
|
|
Sxx <- sum((x - mean(x))^2)
|
|
|
|
beta1 <- Sxy / Sxx
|
|
beta0 <- mean(y) - beta1 * mean(x)
|
|
|
|
c(beta0, beta1)
|
|
```
|
|
|
|
Final Equation: y = 18.4470 + 2.0523 * x
|
|
|
|
```{r}
|
|
X <- cbind(1, x)
|
|
|
|
colnames(X) <- NULL
|
|
X_t_X <- t(X) %*% X
|
|
inv_X_t_x <- solve(X_t_X)
|
|
betas <- inv_X_t_x %*% t(X) %*% y
|
|
betas
|
|
```
|
|
```{r}
|
|
model <- lm(data, formula = cholesterol ~ poids)
|
|
summary(model)
|
|
coef(model)
|
|
```
|
|
```{r}
|
|
data <- data |>
|
|
mutate(yhat = beta0 + beta1 * poids) |>
|
|
mutate(residuals = cholesterol - yhat)
|
|
|
|
data
|
|
|
|
ggplot(data, aes(x = poids, y = cholesterol)) +
|
|
geom_point(size = 2, shape = 21, fill = "blue", color = "cyan") +
|
|
geom_line(aes(y = yhat), color = "blue") +
|
|
labs(x = "Poids", y = "Cholesterol", title = "OLS Regression Line") +
|
|
theme_bw(14)
|
|
```
|
|
```{r}
|
|
mean(data[, "cholesterol"])
|
|
mean(data[, "yhat"])
|
|
mean(data[, "residuals"]) |> round(10)
|
|
cov(data[, "residuals"], data[, "poids"]) |> round(10)
|
|
(RSS <- sum((data[, "residuals"])^2))
|
|
(TSS <- sum((y - mean(y))^2))
|
|
TSS - beta1 * Sxy
|
|
```
|
|
|
|
```{r}
|
|
dof <- nrow(data) - 2
|
|
|
|
sigma_hat_2 <- RSS / dof
|
|
|
|
cov_beta <- sigma_hat_2 * inv_X_t_x
|
|
var_beta <- diag(cov_beta)
|
|
std_beta <- sqrt(var_beta)
|
|
|
|
sigma(model)^2
|
|
vcov(model)
|
|
```
|
|
|
|
|
|
# Hypothesis Testing
|
|
|
|
```{r}
|
|
(t_beta <- beta1 / std_beta[2])
|
|
qt(0.975, dof)
|
|
2 * pt(abs(t_beta), dof, lower.tail = FALSE)
|
|
summary(model)
|
|
```
|
|
```{r}
|
|
MSS <- (TSS - RSS)
|
|
(F <- MSS / (RSS / dof))
|
|
qf(0.95, 1, dof)
|
|
pf(F, 1, dof, lower.tail = FALSE)
|
|
anova(model)
|
|
```
|
|
|
|
# Confidence Interval
|
|
```{r}
|
|
(CI_beta1 <- c(beta1 - qt(0.97, dof) * std_beta[2], beta1 + qt(0.97, dof) * std_beta[2]))
|
|
confint(model, 0.95)
|
|
|
|
t <- qt(0.975, dof)
|
|
sigma_hat <- sigma(model)
|
|
n <- nrow(data)
|
|
|
|
data <- data |>
|
|
mutate(error = t *
|
|
sigma_hat *
|
|
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) |>
|
|
mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL)
|
|
|
|
ggplot(data, aes(x = poids, y = cholesterol)) +
|
|
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.3, fill = "blue") +
|
|
geom_point(size = 2, shape = 21, fill = "blue", color = "cyan") +
|
|
geom_line(aes(y = yhat), color = "blue") +
|
|
labs(x = "Poids", y = "Cholesterol", title = "OLS Regression Line") +
|
|
theme_bw(14)
|
|
```
|
|
|
|
# R^2
|
|
```{r}
|
|
(R2 <- MSS / TSS)
|
|
summary(model)$r.squared
|
|
cor(data[, "cholesterol"], data[, "poids"])^2
|
|
```
|