Add TP1 bis

This commit is contained in:
2024-11-14 16:15:49 +01:00
parent a800bfcd83
commit 2fb228347e
2 changed files with 190 additions and 0 deletions

View File

@@ -0,0 +1,139 @@
```{r}
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis')
library(tidyverse)
options(scipen = 999, digits = 5)
```
```{r}
data <- read.csv("data01.csv", header = TRUE, sep = ",", dec = ".")
head(data, 20)
```
```{r}
ggplot(data, aes(x = cholesterol)) +
geom_histogram(binwidth = 5, color = "black", fill = "gray80") +
labs(x = "Cholesterol", y = "Frequency", title = "Histogram of cholesterol") +
theme_bw(14)
ggplot(data, aes(x = poids)) +
geom_histogram(binwidth = 2.5, color = "black", fill = "gray80") +
labs(x = "Poids", y = "Frequency", title = "Histogram of Poids") +
theme_bw(14)
ggplot(aes(y = cholesterol, x = poids), data = data) +
geom_point() +
labs(y = "Cholesterol (y)", x = "Poids, kg (x)", title = "Scatter plot of cholesterol and poids") +
theme_bw(14)
```
# OLSE
```{r}
x <- data[, "poids"]
y <- data[, "cholesterol"]
Sxy <- sum((x - mean(x)) * y)
Sxx <- sum((x - mean(x))^2)
beta1 <- Sxy / Sxx
beta0 <- mean(y) - beta1 * mean(x)
c(beta0, beta1)
```
Final Equation: y = 18.4470 + 2.0523 * x
```{r}
X <- cbind(1, x)
colnames(X) <- NULL
X_t_X <- t(X) %*% X
inv_X_t_x <- solve(X_t_X)
betas <- inv_X_t_x %*% t(X) %*% y
betas
```
```{r}
model <- lm(data, formula = cholesterol ~ poids)
summary(model)
coef(model)
```
```{r}
data <- data %>%
mutate(yhat = beta0 + beta1 * poids) %>%
mutate(residuals = cholesterol - yhat)
data
ggplot(data, aes(x = poids, y = cholesterol)) +
geom_point(size = 2, shape = 21, fill = "blue", color = "cyan") +
geom_line(aes(y = yhat), color = "blue") +
labs(x = "Poids", y = "Cholesterol", title = "OLS Regression Line") +
theme_bw(14)
```
```{r}
mean(data[, "cholesterol"])
mean(data[, "yhat"])
mean(data[, "residuals"]) %>% round(10)
cov(data[, "residuals"], data[, "poids"]) %>% round(10)
(RSS <- sum((data[, "residuals"])^2))
(TSS <- sum((y - mean(y))^2))
TSS - beta1 * Sxy
```
```{r}
dof <- nrow(data) - 2
sigma_hat_2 <- RSS / dof
cov_beta <- sigma_hat_2 * inv_X_t_x
var_beta <- diag(cov_beta)
std_beta <- sqrt(var_beta)
sigma(model)^2
vcov(model)
```
# Hypothesis Testing
```{r}
(t_beta <- beta1 / std_beta[2])
qt(0.975, dof)
2 * pt(abs(t_beta), dof, lower.tail = FALSE)
summary(model)
```
```{r}
MSS <- (TSS - RSS)
(F <- MSS / (RSS / dof))
qf(0.95, 1, dof)
pf(F, 1, dof, lower.tail = FALSE)
anova(model)
```
# Confidence Interval
```{r}
(CI_beta1 <- c(beta1 - qt(0.97, dof) * std_beta[2], beta1 + qt(0.97, dof) * std_beta[2]))
confint(model, 0.95)
t <- qt(0.975, dof)
sigma_hat <- sigma(model)
n <- nrow(data)
data <- data %>%
mutate(error = t *
sigma_hat *
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) %>%
mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL)
ggplot(data, aes(x = poids, y = cholesterol)) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.3, fill = "blue") +
geom_point(size = 2, shape = 21, fill = "blue", color = "cyan") +
geom_line(aes(y = yhat), color = "blue") +
labs(x = "Poids", y = "Cholesterol", title = "OLS Regression Line") +
theme_bw(14)
```
# R^2
```{r}
(R2 <- MSS / TSS)
summary(model)$r.squared
cor(data[, "cholesterol"], data[, "poids"])^2
```

View File

@@ -0,0 +1,51 @@
id,cholesterol,poids
1,144.47452579195235,61.2
2,145.88924868817446,62.6
3,161.1126037995184,69.9
4,155.4458581667524,63.8
5,147.97655520732974,64
6,170.24615865999974,70.5
7,144.19706954005656,65.4
8,140.1067951922945,58.3
9,142.7466793355334,60.7
10,145.2692979993652,61.7
11,160.3640967819068,68.5
12,148.56479095579394,65
13,149.76055898423206,65.1
14,143.85346030579532,63.9
15,137.88860216547474,61.2
16,164.79575052668758,70.8
17,154.49767048200715,65.5
18,131.4504444444691,55.5
19,158.60793029776818,66.4
20,154.20805689206188,61.6
21,136.4149514835298,59.1
22,134.5904701014675,62.6
23,144.2563545892844,59.3
24,138.22197617664875,60.5
25,139.21587117755206,60.9
26,138.70662904163586,56.6
27,153.73921419517316,66.9
28,143.2077515962295,64.1
29,139.1754465872936,58.8
30,158.02566076295264,68.6
31,151.67509002312616,65.2
32,147.4035408260477,62.3
33,153.80002424297288,67.1
34,158.72992406100167,67.1
35,153.92208543890675,66.8
36,155.54678399213427,66.3
37,158.2201910034931,65.8
38,149.64656232873804,63.2
39,143.75436129736758,62.2
40,150.4910110335996,61.9
41,147.0277746100402,60.7
42,148.96429207047277,62.6
43,138.37451986964854,58.3
44,163.40504312344743,72.3
45,165.13133732815768,68.4
46,135.39612367787572,58.9
47,155.49199962327577,61.8
48,151.67314985744744,61.6
49,153.49019996627314,66.7
50,142.15508998013192,63.1
1 id cholesterol poids
2 1 144.47452579195235 61.2
3 2 145.88924868817446 62.6
4 3 161.1126037995184 69.9
5 4 155.4458581667524 63.8
6 5 147.97655520732974 64
7 6 170.24615865999974 70.5
8 7 144.19706954005656 65.4
9 8 140.1067951922945 58.3
10 9 142.7466793355334 60.7
11 10 145.2692979993652 61.7
12 11 160.3640967819068 68.5
13 12 148.56479095579394 65
14 13 149.76055898423206 65.1
15 14 143.85346030579532 63.9
16 15 137.88860216547474 61.2
17 16 164.79575052668758 70.8
18 17 154.49767048200715 65.5
19 18 131.4504444444691 55.5
20 19 158.60793029776818 66.4
21 20 154.20805689206188 61.6
22 21 136.4149514835298 59.1
23 22 134.5904701014675 62.6
24 23 144.2563545892844 59.3
25 24 138.22197617664875 60.5
26 25 139.21587117755206 60.9
27 26 138.70662904163586 56.6
28 27 153.73921419517316 66.9
29 28 143.2077515962295 64.1
30 29 139.1754465872936 58.8
31 30 158.02566076295264 68.6
32 31 151.67509002312616 65.2
33 32 147.4035408260477 62.3
34 33 153.80002424297288 67.1
35 34 158.72992406100167 67.1
36 35 153.92208543890675 66.8
37 36 155.54678399213427 66.3
38 37 158.2201910034931 65.8
39 38 149.64656232873804 63.2
40 39 143.75436129736758 62.2
41 40 150.4910110335996 61.9
42 41 147.0277746100402 60.7
43 42 148.96429207047277 62.6
44 43 138.37451986964854 58.3
45 44 163.40504312344743 72.3
46 45 165.13133732815768 68.4
47 46 135.39612367787572 58.9
48 47 155.49199962327577 61.8
49 48 151.67314985744744 61.6
50 49 153.49019996627314 66.7
51 50 142.15508998013192 63.1