diff --git a/M1/General Linear Models/Chapter 1/Code_Chapter1.Rmd b/M1/General Linear Models/Chapter 1/Code_Chapter1.Rmd deleted file mode 100644 index 7212c63..0000000 --- a/M1/General Linear Models/Chapter 1/Code_Chapter1.Rmd +++ /dev/null @@ -1,335 +0,0 @@ ---- -title: "Linear Models and Their Generalizations" -subtitle : "Code Chapter 1: Linear gaussian regression" -author: "Katia Meziani" -output: -# pdf_document: -# toc: yes -html_document: -code_folding: hide -css: ./style.css -df_print: paged -highlight: tango -number_sections: no -theme: flatly -toc: yes -toc_float: -collapsed: no - ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(warning = FALSE, message = FALSE) -``` - - - - - - - - - -# Motivations - - - **The Ozone Example** (With **Ⓡ** ) - - -This dataset (Air Breizh, Summer 2001) contains 112 observations and 13 variables. - -```{r} -library(rmarkdown) -ozone1 <- read.table("ozone1.txt", header = TRUE, sep = " ", dec = ".") -paged_table(ozone1) -``` - -Let's correct the incorrect declaration of variables: - - - -```{r} -library(dplyr) -ozone1$Date <- as.Date(as.factor(ozone1$Date), format = "%Y%m%d") -ozone1$vent <- as.factor(ozone1$vent) -ozone1$pluie <- as.factor(ozone1$pluie) -ozone1 <- ozone1 %>% mutate(across(where(is.character), as.numeric)) -ozone1 <- ozone1 %>% mutate(across(where(is.integer), as.numeric)) -str(ozone1) -``` - - - - - - - -# 1. Linear regression model - -## Simple linear model - - - - -Let's plot this line (here with **Ⓡ** ) is called the least squares line. - -```{r, out.width = "60%", fig.align = 'center', message = FALSE, warning = FALSE} -library(ggplot2) -library(plotly) -p1 <- ggplot(ozone1) + - aes(x = T12, y = maxO3) + - geom_point(col = 'red', size = 0.5) + - geom_smooth(method = "lm", se = FALSE) -ggplotly(p1) -``` - - -With **Ⓡ** , we can compute $\widehat {\boldsymbol{\beta}}=(\widehat{\beta}_0, \widehat{\beta}_1)$ - -```{r} -library(kableExtra) -mod <- lm(maxO3 ~ T12, data = ozone1) -mod$coefficients %>% kbl(col.names = "Estimation") -``` - - - - - - - - - - - -# 2. Ordinary least squares estimator (OLSE) - - - - - - - -## Declaration of the model with Under **Ⓡ** - -Let's revisit the ozone example and, for this chapter, select only the numerical regressors with **Ⓡ** - -```{r} -names(ozone1) -Ozone <- ozone1[, 2:12] -names(Ozone) -``` - - - -Under **Ⓡ** ,we can declare the linear model in two ways: either by naming the variables or by using a `.`. - -```{r} -mod <- lm(maxO3 ~ T9 + - T12 + - T15 + - Ne9 + - Ne12 + - Ne15 + - Vx9 + - Vx12 + - Vx15 + - maxO3v, data = Ozone) -mod <- lm(maxO3 ~ ., data = Ozone) -``` - -Now, let's display the estimated values $\widehat{\boldsymbol{\beta}}$ of $\boldsymbol{\beta}$. - -```{r} -coefficients(mod) -``` - - - - -# 3. Residuals analysis - - - -```{r} -summary(mod$residuals) -``` - - - - - -We can display Residual standard error with **Ⓡ** -```{r} -cat("Residual standard error:", summary(mod)$sigma, "on", summary(mod)$df[2], "degrees of freedom\n") -``` - - - - - - - - - - - -# 4. Inference in the Gaussian linear regression model - - -## Confidence intervals/regions - - - - - - - -**Ⓡ** Display the confidence interval for $\beta_j$ - - -```{r} -mod <- lm(maxO3 ~ ., data = Ozone) -cbind(mod$coefficient, confint(mod, level = 0.95)) -``` - - -**Ⓡ** Display the confidence interval for -$\sigma^2$ - - -```{r} - -n <- length(Ozone$maxO3) -p <- length(coef(mod)) -alpha <- 0.05 - -sigma2_hat <- summary(mod)$sigma^2 - -IC_sigma2 <- sigma2_hat * c((n - p) / qchisq(1 - alpha / 2, n - p), - (n - p) / qchisq(alpha / 2, n - p)) - -cat("estimation of sigma : ", sigma2_hat, "\n") -cat("Intervalle de confiance pour sigma^2 (95%) : [", IC_sigma2[1], ", ", IC_sigma2[2], "]\n") -``` - - - - - - - - - - - - - - -# 5. Hypothesis testing - -## Test: $\mathscr{H}_0:\,c^\top{\beta}=a$ - - - - -Declare our linear model and display with **Ⓡ** - -```{r} -library(kableExtra) -mod <- lm(maxO3 ~ ., data = Ozone) -summary(mod) %>% coefficients() %>% kbl() -``` - - - - -## Global Fisher test - - - - - -Under **Ⓡ** , the `summary` displays the Global Fisher test - - -```{r, eval=FALSE} -F-statistic: 32.67 on 10 and 101 DF, p-value: < 2.2e-16 -``` - - - -## Fisher test for nested models - - - - -**Ⓡ** Consider 2 nested models -$$ -\left\{\begin{array}{l} -\text{mod}_1:\, \text{maxO3=T9+Ne9+Vx9+maxO3v}+\epsilon,\\ -\text{maxO3=T9+T12+Ne9+Ne12+Vx9+Vx12+maxO3v}+\epsilon -\end{array}\right. -$$ -and display the nested Fisher test -$$ -\left\{\begin{array}{l} -\mathscr{H}_0: \, \text{mod}_1 \text{ is adequat}\\ -\mathscr{H}_1: \, \text{mod}_2 \text{ is adequat} -\end{array}\right. -$$ - -```{r} -mod_1<-lm(maxO3~T9+Ne9+Vx9+maxO3v,data=Ozone) -mod_2<-lm(maxO3~T9+T12+Ne9+Ne12+Vx9+Vx12+maxO3v,data=Ozone) - -anova(mod_1,mod_2) -``` - - - - -# 6. $R^2$-coefficient - - - - -Let display these coefficient with **Ⓡ** - -```{r} -cat("Multiple R-squared:", summary(mod)$r.squared, ", Adjusted R-squared:", summary(mod)$adj.r.squared, "\n") - -``` - - - - - - -# 7. Prediction - - -## 7.1. Fitted values - - - - -Note that with **Ⓡ** , we can display the fitted values $\widehat Y_i$ - -```{r} -haty<-predict(mod,data=Ozone) -# or equivalently -#haty<-mod$fitted.values -y<-Ozone$maxO3 -cbind.data.frame(y,haty)%>%rmarkdown::paged_table() -``` - - - - - - - - - - diff --git a/M1/General Linear Models/Chapter 1/Code_Chapter1.html b/M1/General Linear Models/Chapter 1/Code_Chapter1.html deleted file mode 100644 index 85023e2..0000000 --- a/M1/General Linear Models/Chapter 1/Code_Chapter1.html +++ /dev/null @@ -1,4299 +0,0 @@ - - - - -
- - - - - - - - - -The Ozone Example -(With Ⓡ )
-This dataset (Air Breizh, Summer 2001) contains 112 observations and -13 variables.
-library(rmarkdown)
-ozone1 <- read.table("ozone1.txt", header = TRUE, sep = " ", dec = ".")
-paged_table(ozone1)
-Let’s correct the incorrect declaration of variables:
-library(dplyr)
-ozone1$Date <- as.Date(as.factor(ozone1$Date), format = "%Y%m%d")
-ozone1$vent <- as.factor(ozone1$vent)
-ozone1$pluie <- as.factor(ozone1$pluie)
-ozone1 <- ozone1 %>% mutate(across(where(is.character), as.numeric))
-ozone1 <- ozone1 %>% mutate(across(where(is.integer), as.numeric))
-str(ozone1)
-## 'data.frame': 112 obs. of 14 variables:
-## $ Date : Date, format: "2001-06-01" "2001-06-02" ...
-## $ maxO3 : num 87 82 92 114 94 80 79 79 101 106 ...
-## $ T9 : num 15.6 17 15.3 16.2 17.4 17.7 16.8 14.9 16.1 18.3 ...
-## $ T12 : num 18.5 18.4 17.6 19.7 20.5 19.8 15.6 17.5 19.6 21.9 ...
-## $ T15 : num 18.4 17.7 19.5 22.5 20.4 18.3 14.9 18.9 21.4 22.9 ...
-## $ Ne9 : num 4 5 2 1 8 6 7 5 2 5 ...
-## $ Ne12 : num 4 5 5 1 8 6 8 5 4 6 ...
-## $ Ne15 : num 8 7 4 0 7 7 8 4 4 8 ...
-## $ Vx9 : num 0.695 -4.33 2.954 0.985 -0.5 ...
-## $ Vx12 : num -1.71 -4 1.879 0.347 -2.954 ...
-## $ Vx15 : num -0.695 -3 0.521 -0.174 -4.33 ...
-## $ maxO3v: num 84 87 82 92 114 94 80 99 79 101 ...
-## $ vent : Factor w/ 4 levels "Est","Nord","Ouest",..: 2 2 1 2 3 3 3 2 2 3 ...
-## $ pluie : Factor w/ 2 levels "Pluie","Sec": 2 2 2 2 2 1 2 2 2 2 ...
-Let’s plot this line (here with Ⓡ ) is called the -least squares line.
-library(ggplot2)
-library(plotly)
-p1 <- ggplot(ozone1) +
- aes(x = T12, y = maxO3) +
- geom_point(col = 'red', size = 0.5) +
- geom_smooth(method = "lm", se = FALSE)
-ggplotly(p1)
-
-
-With Ⓡ , we can compute -\(\widehat -{\boldsymbol{\beta}}=(\widehat{\beta}_0, \widehat{\beta}_1)\)
-library(kableExtra)
-mod <- lm(maxO3 ~ T12, data = ozone1)
-mod$coefficients %>% kbl(col.names = "Estimation")
-| - | --Estimation - | -
|---|---|
| -(Intercept) - | ---27.419636 - | -
| -T12 - | --5.468685 - | -
Let’s revisit the ozone example and, for this chapter, select only -the numerical regressors with Ⓡ
-names(ozone1)
-## [1] "Date" "maxO3" "T9" "T12" "T15" "Ne9" "Ne12" "Ne15"
-## [9] "Vx9" "Vx12" "Vx15" "maxO3v" "vent" "pluie"
-Ozone <- ozone1[, 2:12]
-names(Ozone)
-## [1] "maxO3" "T9" "T12" "T15" "Ne9" "Ne12" "Ne15" "Vx9"
-## [9] "Vx12" "Vx15" "maxO3v"
-Under Ⓡ ,we can declare
-the linear model in two ways: either by naming the variables or by using
-a ..
mod <- lm(maxO3 ~ T9 +
- T12 +
- T15 +
- Ne9 +
- Ne12 +
- Ne15 +
- Vx9 +
- Vx12 +
- Vx15 +
- maxO3v, data = Ozone)
-mod <- lm(maxO3 ~ ., data = Ozone)
-Now, let’s display the estimated values \(\widehat{\boldsymbol{\beta}}\) of \(\boldsymbol{\beta}\).
-coefficients(mod)
-## (Intercept) T9 T12 T15 Ne9 Ne12
-## 12.24441987 -0.01901425 2.22115189 0.55853087 -2.18909215 -0.42101517
-## Ne15 Vx9 Vx12 Vx15 maxO3v
-## 0.18373097 0.94790917 0.03119824 0.41859252 0.35197646
-summary(mod$residuals)
-## Min. 1st Qu. Median Mean 3rd Qu. Max.
-## -53.566 -8.727 -0.403 0.000 7.599 39.458
-We can display Residual standard error with Ⓡ -
-cat("Residual standard error:", summary(mod)$sigma, "on", summary(mod)$df[2], "degrees of freedom\n")
-## Residual standard error: 14.36002 on 101 degrees of freedom
-Ⓡ Display the -confidence interval for \(\beta_j\)
-mod <- lm(maxO3 ~ ., data = Ozone)
-cbind(mod$coefficient, confint(mod, level = 0.95))
-## 2.5 % 97.5 %
-## (Intercept) 12.24441987 -14.4802083 38.9690481
-## T9 -0.01901425 -2.2510136 2.2129851
-## T12 2.22115189 -0.6214249 5.0637287
-## T15 0.55853087 -1.7121182 2.8291799
-## Ne9 -2.18909215 -4.0502993 -0.3278850
-## Ne12 -0.42101517 -3.1340934 2.2920630
-## Ne15 0.18373097 -1.8055357 2.1729977
-## Vx9 0.94790917 -0.8618028 2.7576211
-## Vx12 0.03119824 -2.0620871 2.1244836
-## Vx15 0.41859252 -1.3978619 2.2350470
-## maxO3v 0.35197646 0.2272237 0.4767292
-Ⓡ Display the -confidence interval for \(\sigma^2\)
-n <- length(Ozone$maxO3)
-p <- length(coef(mod))
-alpha <- 0.05
-
-sigma2_hat <- summary(mod)$sigma^2
-
-IC_sigma2 <- sigma2_hat * c((n - p) / qchisq(1 - alpha / 2, n - p),
- (n - p) / qchisq(alpha / 2, n - p))
-
-cat("estimation of sigma : ", sigma2_hat, "\n")
-## estimation of sigma : 206.2102
-cat("Intervalle de confiance pour sigma^2 (95%) : [", IC_sigma2[1], ", ", IC_sigma2[2], "]\n")
-## Intervalle de confiance pour sigma^2 (95%) : [ 159.3518 , 277.3877 ]
-Declare our linear model and display with Ⓡ -
-library(kableExtra)
-mod <- lm(maxO3 ~ ., data = Ozone)
-summary(mod) %>% coefficients() %>% kbl()
-| - | --Estimate - | --Std. Error - | --t value - | --Pr(>|t|) - | -
|---|---|---|---|---|
| -(Intercept) - | --12.2444199 - | --13.4719013 - | --0.9088858 - | --0.3655741 - | -
| -T9 - | ---0.0190143 - | --1.1251522 - | ---0.0168993 - | --0.9865503 - | -
| -T12 - | --2.2211519 - | --1.4329447 - | --1.5500612 - | --0.1242547 - | -
| -T15 - | --0.5585309 - | --1.1446356 - | --0.4879552 - | --0.6266392 - | -
| -Ne9 - | ---2.1890921 - | --0.9382356 - | ---2.3332008 - | --0.0216199 - | -
| -Ne12 - | ---0.4210152 - | --1.3676644 - | ---0.3078352 - | --0.7588417 - | -
| -Ne15 - | --0.1837310 - | --1.0027906 - | --0.1832197 - | --0.8549930 - | -
| -Vx9 - | --0.9479092 - | --0.9122769 - | --1.0390586 - | --0.3012586 - | -
| -Vx12 - | --0.0311982 - | --1.0552264 - | --0.0295654 - | --0.9764720 - | -
| -Vx15 - | --0.4185925 - | --0.9156758 - | --0.4571405 - | --0.6485518 - | -
| -maxO3v - | --0.3519765 - | --0.0628879 - | --5.5968846 - | --0.0000002 - | -
Under Ⓡ , the
-summary displays the Global Fisher test
F-statistic: 32.67 on 10 and 101 DF, p-value: < 2.2e-16
-Ⓡ Consider 2 nested -models \[ -\left\{\begin{array}{l} -\text{mod}_1:\, \text{maxO3=T9+Ne9+Vx9+maxO3v}+\epsilon,\\ -\text{maxO3=T9+T12+Ne9+Ne12+Vx9+Vx12+maxO3v}+\epsilon -\end{array}\right. -\] and display the nested Fisher test \[ -\left\{\begin{array}{l} -\mathscr{H}_0: \, \text{mod}_1 \text{ is adequat}\\ -\mathscr{H}_1: \, \text{mod}_2 \text{ is adequat} -\end{array}\right. -\]
-mod_1<-lm(maxO3~T9+Ne9+Vx9+maxO3v,data=Ozone)
-mod_2<-lm(maxO3~T9+T12+Ne9+Ne12+Vx9+Vx12+maxO3v,data=Ozone)
-
-anova(mod_1,mod_2)
-## Analysis of Variance Table
-##
-## Model 1: maxO3 ~ T9 + Ne9 + Vx9 + maxO3v
-## Model 2: maxO3 ~ T9 + T12 + Ne9 + Ne12 + Vx9 + Vx12 + maxO3v
-## Res.Df RSS Df Sum of Sq F Pr(>F)
-## 1 107 23263
-## 2 104 20914 3 2349.8 3.8951 0.01106 *
-## ---
-## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-Let display these coefficient with Ⓡ
-cat("Multiple R-squared:", summary(mod)$r.squared, ", Adjusted R-squared:", summary(mod)$adj.r.squared, "\n")
-## Multiple R-squared: 0.7638413 , Adjusted R-squared: 0.7404593
-Note that with Ⓡ , we can display -the fitted values \(\widehat Y_i\)
-haty<-predict(mod,data=Ozone)
-# or equivalently
-#haty<-mod$fitted.values
-y<-Ozone$maxO3
-cbind.data.frame(y,haty)%>%rmarkdown::paged_table()
-