mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 22:59:57 +01:00
Add TP1 bis
This commit is contained in:
139
M1/General Linear Models/TP1-bis/TP1.Rmd
Normal file
139
M1/General Linear Models/TP1-bis/TP1.Rmd
Normal file
@@ -0,0 +1,139 @@
|
||||
```{r}
|
||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP1-bis')
|
||||
|
||||
library(tidyverse)
|
||||
options(scipen = 999, digits = 5)
|
||||
```
|
||||
```{r}
|
||||
data <- read.csv("data01.csv", header = TRUE, sep = ",", dec = ".")
|
||||
head(data, 20)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(data, aes(x = cholesterol)) +
|
||||
geom_histogram(binwidth = 5, color = "black", fill = "gray80") +
|
||||
labs(x = "Cholesterol", y = "Frequency", title = "Histogram of cholesterol") +
|
||||
theme_bw(14)
|
||||
|
||||
ggplot(data, aes(x = poids)) +
|
||||
geom_histogram(binwidth = 2.5, color = "black", fill = "gray80") +
|
||||
labs(x = "Poids", y = "Frequency", title = "Histogram of Poids") +
|
||||
theme_bw(14)
|
||||
|
||||
ggplot(aes(y = cholesterol, x = poids), data = data) +
|
||||
geom_point() +
|
||||
labs(y = "Cholesterol (y)", x = "Poids, kg (x)", title = "Scatter plot of cholesterol and poids") +
|
||||
theme_bw(14)
|
||||
```
|
||||
|
||||
# OLSE
|
||||
```{r}
|
||||
x <- data[, "poids"]
|
||||
y <- data[, "cholesterol"]
|
||||
Sxy <- sum((x - mean(x)) * y)
|
||||
Sxx <- sum((x - mean(x))^2)
|
||||
|
||||
beta1 <- Sxy / Sxx
|
||||
beta0 <- mean(y) - beta1 * mean(x)
|
||||
|
||||
c(beta0, beta1)
|
||||
```
|
||||
|
||||
Final Equation: y = 18.4470 + 2.0523 * x
|
||||
|
||||
```{r}
|
||||
X <- cbind(1, x)
|
||||
|
||||
colnames(X) <- NULL
|
||||
X_t_X <- t(X) %*% X
|
||||
inv_X_t_x <- solve(X_t_X)
|
||||
betas <- inv_X_t_x %*% t(X) %*% y
|
||||
betas
|
||||
```
|
||||
```{r}
|
||||
model <- lm(data, formula = cholesterol ~ poids)
|
||||
summary(model)
|
||||
coef(model)
|
||||
```
|
||||
```{r}
|
||||
data <- data %>%
|
||||
mutate(yhat = beta0 + beta1 * poids) %>%
|
||||
mutate(residuals = cholesterol - yhat)
|
||||
|
||||
data
|
||||
|
||||
ggplot(data, aes(x = poids, y = cholesterol)) +
|
||||
geom_point(size = 2, shape = 21, fill = "blue", color = "cyan") +
|
||||
geom_line(aes(y = yhat), color = "blue") +
|
||||
labs(x = "Poids", y = "Cholesterol", title = "OLS Regression Line") +
|
||||
theme_bw(14)
|
||||
```
|
||||
```{r}
|
||||
mean(data[, "cholesterol"])
|
||||
mean(data[, "yhat"])
|
||||
mean(data[, "residuals"]) %>% round(10)
|
||||
cov(data[, "residuals"], data[, "poids"]) %>% round(10)
|
||||
(RSS <- sum((data[, "residuals"])^2))
|
||||
(TSS <- sum((y - mean(y))^2))
|
||||
TSS - beta1 * Sxy
|
||||
```
|
||||
|
||||
```{r}
|
||||
dof <- nrow(data) - 2
|
||||
|
||||
sigma_hat_2 <- RSS / dof
|
||||
|
||||
cov_beta <- sigma_hat_2 * inv_X_t_x
|
||||
var_beta <- diag(cov_beta)
|
||||
std_beta <- sqrt(var_beta)
|
||||
|
||||
sigma(model)^2
|
||||
vcov(model)
|
||||
```
|
||||
|
||||
|
||||
# Hypothesis Testing
|
||||
|
||||
```{r}
|
||||
(t_beta <- beta1 / std_beta[2])
|
||||
qt(0.975, dof)
|
||||
2 * pt(abs(t_beta), dof, lower.tail = FALSE)
|
||||
summary(model)
|
||||
```
|
||||
```{r}
|
||||
MSS <- (TSS - RSS)
|
||||
(F <- MSS / (RSS / dof))
|
||||
qf(0.95, 1, dof)
|
||||
pf(F, 1, dof, lower.tail = FALSE)
|
||||
anova(model)
|
||||
```
|
||||
|
||||
# Confidence Interval
|
||||
```{r}
|
||||
(CI_beta1 <- c(beta1 - qt(0.97, dof) * std_beta[2], beta1 + qt(0.97, dof) * std_beta[2]))
|
||||
confint(model, 0.95)
|
||||
|
||||
t <- qt(0.975, dof)
|
||||
sigma_hat <- sigma(model)
|
||||
n <- nrow(data)
|
||||
|
||||
data <- data %>%
|
||||
mutate(error = t *
|
||||
sigma_hat *
|
||||
sqrt(1 / n + (poids - mean(poids))^2 / RSS)) %>%
|
||||
mutate(conf.low = yhat - error, conf.high = yhat + error, error = NULL)
|
||||
|
||||
ggplot(data, aes(x = poids, y = cholesterol)) +
|
||||
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.3, fill = "blue") +
|
||||
geom_point(size = 2, shape = 21, fill = "blue", color = "cyan") +
|
||||
geom_line(aes(y = yhat), color = "blue") +
|
||||
labs(x = "Poids", y = "Cholesterol", title = "OLS Regression Line") +
|
||||
theme_bw(14)
|
||||
```
|
||||
|
||||
# R^2
|
||||
```{r}
|
||||
(R2 <- MSS / TSS)
|
||||
summary(model)$r.squared
|
||||
cor(data[, "cholesterol"], data[, "poids"])^2
|
||||
```
|
||||
51
M1/General Linear Models/TP1-bis/data01.csv
Normal file
51
M1/General Linear Models/TP1-bis/data01.csv
Normal file
@@ -0,0 +1,51 @@
|
||||
id,cholesterol,poids
|
||||
1,144.47452579195235,61.2
|
||||
2,145.88924868817446,62.6
|
||||
3,161.1126037995184,69.9
|
||||
4,155.4458581667524,63.8
|
||||
5,147.97655520732974,64
|
||||
6,170.24615865999974,70.5
|
||||
7,144.19706954005656,65.4
|
||||
8,140.1067951922945,58.3
|
||||
9,142.7466793355334,60.7
|
||||
10,145.2692979993652,61.7
|
||||
11,160.3640967819068,68.5
|
||||
12,148.56479095579394,65
|
||||
13,149.76055898423206,65.1
|
||||
14,143.85346030579532,63.9
|
||||
15,137.88860216547474,61.2
|
||||
16,164.79575052668758,70.8
|
||||
17,154.49767048200715,65.5
|
||||
18,131.4504444444691,55.5
|
||||
19,158.60793029776818,66.4
|
||||
20,154.20805689206188,61.6
|
||||
21,136.4149514835298,59.1
|
||||
22,134.5904701014675,62.6
|
||||
23,144.2563545892844,59.3
|
||||
24,138.22197617664875,60.5
|
||||
25,139.21587117755206,60.9
|
||||
26,138.70662904163586,56.6
|
||||
27,153.73921419517316,66.9
|
||||
28,143.2077515962295,64.1
|
||||
29,139.1754465872936,58.8
|
||||
30,158.02566076295264,68.6
|
||||
31,151.67509002312616,65.2
|
||||
32,147.4035408260477,62.3
|
||||
33,153.80002424297288,67.1
|
||||
34,158.72992406100167,67.1
|
||||
35,153.92208543890675,66.8
|
||||
36,155.54678399213427,66.3
|
||||
37,158.2201910034931,65.8
|
||||
38,149.64656232873804,63.2
|
||||
39,143.75436129736758,62.2
|
||||
40,150.4910110335996,61.9
|
||||
41,147.0277746100402,60.7
|
||||
42,148.96429207047277,62.6
|
||||
43,138.37451986964854,58.3
|
||||
44,163.40504312344743,72.3
|
||||
45,165.13133732815768,68.4
|
||||
46,135.39612367787572,58.9
|
||||
47,155.49199962327577,61.8
|
||||
48,151.67314985744744,61.6
|
||||
49,153.49019996627314,66.7
|
||||
50,142.15508998013192,63.1
|
||||
|
Reference in New Issue
Block a user