Add Chapter 1 of GLM

2026-01-25 00:52:11 +01:00 · 2024-09-24 11:11:26 +02:00
parent 97d5bc3e56
commit fe6a765dff
3 changed files with 4747 additions and 0 deletions
--- a/1/Code_Chapter1.Rmd
+++ b/1/Code_Chapter1.Rmd
@@ -0,0 +1,335 @@
+---
+title: "Linear Models and Their Generalizations"
+subtitle : "Code Chapter 1: Linear gaussian regression"
+author: "Katia Meziani"
+output:
+#  pdf_document:
+#    toc: yes
+html_document:
+code_folding: hide
+css: ./style.css
+df_print: paged
+highlight: tango
+number_sections: no
+theme: flatly
+toc: yes
+toc_float:
+collapsed: no
+
+---
+
+```{r  setup, include=FALSE}
+knitr::opts_chunk$set(warning = FALSE, message = FALSE)
+```
+
+
+
+
+
+
+
+
+
+#  Motivations
+
+<span style="color: blue;">
+    <span class="underline">**The Ozone Example**</span> (With <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span> )
+</span>
+
+This dataset (Air Breizh, Summer 2001) contains 112 observations and 13 variables.
+
+```{r}
+library(rmarkdown)
+ozone1 <- read.table("ozone1.txt", header = TRUE, sep = " ", dec = ".")
+paged_table(ozone1)
+```
+
+Let's correct the incorrect declaration of variables:
+
+
+
+```{r}
+library(dplyr)
+ozone1$Date <- as.Date(as.factor(ozone1$Date), format = "%Y%m%d")
+ozone1$vent <- as.factor(ozone1$vent)
+ozone1$pluie <- as.factor(ozone1$pluie)
+ozone1 <- ozone1 %>% mutate(across(where(is.character), as.numeric))
+ozone1 <- ozone1 %>% mutate(across(where(is.integer), as.numeric))
+str(ozone1)
+```
+
+
+
+
+
+
+
+# 1. Linear regression model
+
+##  Simple linear model
+
+
+
+
+Let's plot this line (here with  <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span> ) is called the least squares line.
+
+```{r,   out.width = "60%", fig.align = 'center', message = FALSE, warning = FALSE}
+library(ggplot2)
+library(plotly)
+p1 <- ggplot(ozone1) +
+  aes(x = T12, y = maxO3) +
+  geom_point(col = 'red', size = 0.5) +
+  geom_smooth(method = "lm", se = FALSE)
+ggplotly(p1)
+```
+
+
+With  <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span> , we can compute $\widehat {\boldsymbol{\beta}}=(\widehat{\beta}_0, \widehat{\beta}_1)$
+
+```{r}
+library(kableExtra)
+mod <- lm(maxO3 ~ T12, data = ozone1)
+mod$coefficients %>% kbl(col.names = "Estimation")
+```
+
+
+
+
+
+
+
+
+
+
+
+# 2. Ordinary least squares estimator (OLSE)
+
+
+
+
+
+
+
+##  Declaration of the model with Under <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>
+
+Let's revisit the ozone example and, for this chapter, select only the numerical regressors with  <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>
+
+```{r}
+names(ozone1)
+Ozone <- ozone1[, 2:12]
+names(Ozone)
+```
+
+
+
+Under <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>   ,we can declare the linear model in two ways: either by naming the variables or by using a `.`.
+
+```{r}
+mod <- lm(maxO3 ~ T9 +
+  T12 +
+  T15 +
+  Ne9 +
+  Ne12 +
+  Ne15 +
+  Vx9 +
+  Vx12 +
+  Vx15 +
+  maxO3v, data = Ozone)
+mod <- lm(maxO3 ~ ., data = Ozone)
+```
+
+Now, let's display the estimated values $\widehat{\boldsymbol{\beta}}$ of $\boldsymbol{\beta}$.
+
+```{r}
+coefficients(mod)
+```
+
+
+
+
+# 3. Residuals analysis
+
+
+
+```{r}
+summary(mod$residuals)
+```
+
+
+
+
+
+We can display Residual standard error with <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>
+```{r}
+cat("Residual standard error:", summary(mod)$sigma, "on", summary(mod)$df[2], "degrees of freedom\n")
+```
+
+
+
+
+
+
+
+
+
+
+
+# 4. Inference in the Gaussian linear regression model
+
+
+##  Confidence intervals/regions
+
+
+
+
+
+
+
+<span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>  Display the confidence interval for $\beta_j$
+
+
+```{r}
+mod <- lm(maxO3 ~ ., data = Ozone)
+cbind(mod$coefficient, confint(mod, level = 0.95))
+```
+
+
+<span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>  Display the confidence interval for
+$\sigma^2$
+
+
+```{r}
+
+n <- length(Ozone$maxO3)
+p <- length(coef(mod))
+alpha <- 0.05
+
+sigma2_hat <- summary(mod)$sigma^2
+
+IC_sigma2 <- sigma2_hat * c((n - p) / qchisq(1 - alpha / 2, n - p),
+                            (n - p) / qchisq(alpha / 2, n - p))
+
+cat("estimation of sigma : ", sigma2_hat, "\n")
+cat("Intervalle de confiance pour sigma^2 (95%) : [", IC_sigma2[1], ", ", IC_sigma2[2], "]\n")
+```
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# 5. Hypothesis testing
+
+##  Test: $\mathscr{H}_0:\,c^\top{\beta}=a$
+
+
+
+
+Declare our linear model and display with  <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>
+
+```{r}
+library(kableExtra)
+mod <- lm(maxO3 ~ ., data = Ozone)
+summary(mod) %>% coefficients() %>% kbl()
+```
+
+
+
+
+##  Global Fisher test
+
+
+
+
+
+Under <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span> ,  the `summary` displays the Global Fisher test
+
+
+```{r, eval=FALSE}
+F-statistic: 32.67 on 10 and 101 DF,  p-value: < 2.2e-16
+```
+
+
+
+##  Fisher test for nested models
+
+
+
+
+<span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>  Consider 2 nested models
+$$
+\left\{\begin{array}{l}
+\text{mod}_1:\, \text{maxO3=T9+Ne9+Vx9+maxO3v}+\epsilon,\\
+\text{maxO3=T9+T12+Ne9+Ne12+Vx9+Vx12+maxO3v}+\epsilon
+\end{array}\right.
+$$
+and display the nested Fisher test
+$$
+\left\{\begin{array}{l}
+\mathscr{H}_0: \, \text{mod}_1 \text{ is adequat}\\
+\mathscr{H}_1: \, \text{mod}_2 \text{ is adequat}
+\end{array}\right.
+$$
+
+```{r}
+mod_1<-lm(maxO3~T9+Ne9+Vx9+maxO3v,data=Ozone)
+mod_2<-lm(maxO3~T9+T12+Ne9+Ne12+Vx9+Vx12+maxO3v,data=Ozone)
+
+anova(mod_1,mod_2)
+```
+
+
+
+
+# 6. $R^2$-coefficient
+
+
+
+
+Let display these coefficient with <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span>
+
+```{r}
+cat("Multiple R-squared:", summary(mod)$r.squared, ", Adjusted R-squared:", summary(mod)$adj.r.squared, "\n")
+
+```
+
+
+
+
+
+
+# 7. Prediction
+
+
+## 7.1. Fitted values
+
+
+
+
+Note that with <span class="solution"><span style="color: blue;">**Ⓡ**</span> </span> , we can display the fitted values $\widehat Y_i$
+
+```{r}
+haty<-predict(mod,data=Ozone)
+# or equivalently
+#haty<-mod$fitted.values
+y<-Ozone$maxO3
+cbind.data.frame(y,haty)%>%rmarkdown::paged_table()
+```
+
+
+
+
+
+
+
+
+
+