mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-29 16:57:51 +01:00
Add TP1 bis
This commit is contained in:
139
M1/General Linear Models/TP1-bis/TP1.Rmd
Normal file
139
M1/General Linear Models/TP1-bis/TP1.Rmd
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
```{r}
|
||||||
|
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis')
|
||||||
|
|
||||||
|
library(tidyverse)
|
||||||
|
options(scipen = 999, digits = 5)
|
||||||
|
```
|
||||||
|
```{r}
|
||||||
|
data <- read.csv("data01.csv", header = TRUE, sep = ",", dec = ".")
|
||||||
|
head(data, 20)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
ggplot(data, aes(x = cholesterol)) +
|
||||||
|
geom_histogram(binwidth = 5, color = "black", fill = "gray80") +
|
||||||
|
labs(x = "Cholesterol", y = "Frequency", title = "Histogram of cholesterol") +
|
||||||
|
theme_bw(14)
|
||||||
|
|
||||||
|
ggplot(data, aes(x = poids)) +
|
||||||
|
geom_histogram(binwidth = 2.5, color = "black", fill = "gray80") +
|
||||||
|
labs(x = "Poids", y = "Frequency", title = "Histogram of Poids") +
|
||||||
|
theme_bw(14)
|
||||||
|
|
||||||
|
ggplot(aes(y = cholesterol, x = poids), data = data) +
|
||||||
|
geom_point() +
|
||||||
|
labs(y = "Cholesterol (y)", x = "Poids, kg (x)", title = "Scatter plot of cholesterol and poids") +
|
||||||
|
theme_bw(14)
|
||||||
|
```
|
||||||
|
|
||||||
|
# OLSE
|
||||||
|
```{r}
|
||||||
|
x <- data[, "poids"]
|
||||||
|
y <- data[, "cholesterol"]
|
||||||
|
Sxy <- sum((x - mean(x)) * y)
|
||||||
|
Sxx <- sum((x - mean(x))^2)
|
||||||
|
|
||||||
|
beta1 <- Sxy / Sxx
|
||||||
|
beta0 <- mean(y) - beta1 * mean(x)
|
||||||
|
|
||||||
|
c(beta0, beta1)
|
||||||
|
```
|
||||||
|
|
||||||
|
Final Equation: y = 18.4470 + 2.0523 * x
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
X <- cbind(1, x)
|
||||||
|
|
||||||
|
colnames(X) <- NULL
|
||||||
|
X_t_X <- t(X) %*% X
|
||||||
|
inv_X_t_x <- solve(X_t_X)
|
||||||
|
betas <- inv_X_t_x %*% t(X) %*% y
|
||||||
|
betas
|
||||||
|
```
|
||||||
|
```{r}
|
||||||
|
model <- lm(data, formula = cholesterol ~ poids)
|
||||||
|
summary(model)
|
||||||
|
coef(model)
|
||||||
|
```
|
||||||
|
```{r}
|
||||||
|
data <- data %>%
|
||||||
|
mutate(yhat = beta0 + beta1 * poids) %>%
|
||||||
|
mutate(residuals = cholesterol - yhat)
|
||||||
|
|
||||||
|
data
|
||||||
|
|
||||||
|
ggplot(data, aes(x = poids, y = cholesterol)) +
|
||||||
|
geom_point(size = 2, shape = 21, fill = "blue", color = "cyan") +
|
||||||
|
geom_line(aes(y = yhat), color = "blue") +
|
||||||
|
labs(x = "Poids", y = "Cholesterol", title = "OLS Regression Line") +
|
||||||
|
theme_bw(14)
|
||||||
|
```
|
||||||
|
```{r}
|
||||||
|
mean(data[, "cholesterol"])
|
||||||
|
mean(data[, "yhat"])
|
||||||
|
mean(data[, "residuals"]) %>% round(10)
|
||||||
|
cov(data[, "residuals"], data[, "poids"]) %>% round(10)
|
||||||
|
(RSS <- sum((data[, "residuals"])^2))
|
||||||
|
(TSS <- sum((y - mean(y))^2))
|
||||||
|
TSS - beta1 * Sxy
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
dof <- nrow(data) - 2
|
||||||
|
|
||||||
|
sigma_hat_2 <- RSS / dof
|
||||||
|
|
||||||
|
cov_beta <- sigma_hat_2 * inv_X_t_x
|
||||||
|
var_beta <- diag(cov_beta)
|
||||||
|
std_beta <- sqrt(var_beta)
|
||||||
|
|
||||||
|
sigma(model)^2
|
||||||
|
vcov(model)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Hypothesis Testing
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
(t_beta <- beta1 / std_beta[2])
|
||||||
|
qt(0.975, dof)
|
||||||
|
2 * pt(abs(t_beta), dof, lower.tail = FALSE)
|
||||||
|
summary(model)
|
||||||
|
```
|
||||||
|
```{r}
|
||||||
|
MSS <- (TSS - RSS)
|
||||||
|
(F <- MSS / (RSS / dof))
|
||||||
|
qf(0.95, 1, dof)
|
||||||
|
pf(F, 1, dof, lower.tail = FALSE)
|
||||||
|
anova(model)
|
||||||
|
```
|
||||||
|
|
||||||
|
# Confidence Interval
|
||||||
|
```{r}
|
||||||
|
(CI_beta1 <- c(beta1 - qt(0.97, dof) * std_beta[2], beta1 + qt(0.97, dof) * std_beta[2]))
|
||||||
|
confint(model, 0.95)
|
||||||
|
|
||||||
|
t <- qt(0.975, dof)
|
||||||
|
sigma_hat <- sigma(model)
|
||||||
|
n <- nrow(data)
|
||||||
|
|
||||||
|
data <- data %>%
|
||||||
|
mutate(error = t *
|
||||||
|
sigma_hat *
|
||||||
|
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) %>%
|
||||||
|
mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL)
|
||||||
|
|
||||||
|
ggplot(data, aes(x = poids, y = cholesterol)) +
|
||||||
|
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.3, fill = "blue") +
|
||||||
|
geom_point(size = 2, shape = 21, fill = "blue", color = "cyan") +
|
||||||
|
geom_line(aes(y = yhat), color = "blue") +
|
||||||
|
labs(x = "Poids", y = "Cholesterol", title = "OLS Regression Line") +
|
||||||
|
theme_bw(14)
|
||||||
|
```
|
||||||
|
|
||||||
|
# R^2
|
||||||
|
```{r}
|
||||||
|
(R2 <- MSS / TSS)
|
||||||
|
summary(model)$r.squared
|
||||||
|
cor(data[, "cholesterol"], data[, "poids"])^2
|
||||||
|
```
|
||||||
51
M1/General Linear Models/TP1-bis/data01.csv
Normal file
51
M1/General Linear Models/TP1-bis/data01.csv
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
id,cholesterol,poids
|
||||||
|
1,144.47452579195235,61.2
|
||||||
|
2,145.88924868817446,62.6
|
||||||
|
3,161.1126037995184,69.9
|
||||||
|
4,155.4458581667524,63.8
|
||||||
|
5,147.97655520732974,64
|
||||||
|
6,170.24615865999974,70.5
|
||||||
|
7,144.19706954005656,65.4
|
||||||
|
8,140.1067951922945,58.3
|
||||||
|
9,142.7466793355334,60.7
|
||||||
|
10,145.2692979993652,61.7
|
||||||
|
11,160.3640967819068,68.5
|
||||||
|
12,148.56479095579394,65
|
||||||
|
13,149.76055898423206,65.1
|
||||||
|
14,143.85346030579532,63.9
|
||||||
|
15,137.88860216547474,61.2
|
||||||
|
16,164.79575052668758,70.8
|
||||||
|
17,154.49767048200715,65.5
|
||||||
|
18,131.4504444444691,55.5
|
||||||
|
19,158.60793029776818,66.4
|
||||||
|
20,154.20805689206188,61.6
|
||||||
|
21,136.4149514835298,59.1
|
||||||
|
22,134.5904701014675,62.6
|
||||||
|
23,144.2563545892844,59.3
|
||||||
|
24,138.22197617664875,60.5
|
||||||
|
25,139.21587117755206,60.9
|
||||||
|
26,138.70662904163586,56.6
|
||||||
|
27,153.73921419517316,66.9
|
||||||
|
28,143.2077515962295,64.1
|
||||||
|
29,139.1754465872936,58.8
|
||||||
|
30,158.02566076295264,68.6
|
||||||
|
31,151.67509002312616,65.2
|
||||||
|
32,147.4035408260477,62.3
|
||||||
|
33,153.80002424297288,67.1
|
||||||
|
34,158.72992406100167,67.1
|
||||||
|
35,153.92208543890675,66.8
|
||||||
|
36,155.54678399213427,66.3
|
||||||
|
37,158.2201910034931,65.8
|
||||||
|
38,149.64656232873804,63.2
|
||||||
|
39,143.75436129736758,62.2
|
||||||
|
40,150.4910110335996,61.9
|
||||||
|
41,147.0277746100402,60.7
|
||||||
|
42,148.96429207047277,62.6
|
||||||
|
43,138.37451986964854,58.3
|
||||||
|
44,163.40504312344743,72.3
|
||||||
|
45,165.13133732815768,68.4
|
||||||
|
46,135.39612367787572,58.9
|
||||||
|
47,155.49199962327577,61.8
|
||||||
|
48,151.67314985744744,61.6
|
||||||
|
49,153.49019996627314,66.7
|
||||||
|
50,142.15508998013192,63.1
|
||||||
|
Reference in New Issue
Block a user