```{r}
setwd("/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP2")
```

# Question 1 : Import dataset and check variables

```{r}
library(dplyr)
cepages <- read.csv("Cepages B TP2.csv", header = TRUE, sep = ";", dec = ",")
cepages$Couleur <- as.factor(cepages$Couleur)
cepages$Origine <- as.factor(cepages$Origine)
cepages <- cepages |> mutate(across(where(is.character), as.numeric))
cepages <- cepages |> mutate(across(where(is.integer), as.numeric))
paged_table(cepages)
```

# Question 2 : Table of counts

```{r}
table(cepages$Origine, cepages$Couleur)
```

# Question 3

## Display the table of average Ph according to couleur and average ph

```{r}
tapply(cepages$pH, list(cepages$Couleur), mean)
```

## Display the table of average pH according to couleur and origine

```{r}
tapply(cepages$pH, list(cepages$Couleur, cepages$Origine), mean)
```

# Question 4 : Regression lines of ph over AcTol for different Color
```{r}
library(ggplot2)

ggplot(cepages, aes(x = AcTot, y = pH, color = Couleur)) +
  geom_point(col = "red", size = 0.5) +
  geom_smooth(method = "lm", se = F)

ggplot(cepages, aes(y = pH, x = AcTot, colour = Couleur, fill = Couleur)) +
  geom_boxplot(alpha = 0.5, outlier.alpha = 0)
```

# Question 5 : Regression Ligne of pH over AcTot for different Origine

```{r}
ggplot(cepages, aes(x = AcTot, y = pH, color = Origine)) +
  geom_smooth(method = "lm", se = F) +
  geom_point(col = "red", size = 0.5)

ggplot(cepages, aes(y = pH, x = AcTot, colour = Origine, fill = Origine)) +
  geom_boxplot(alpha = 0.5, outlier.alpha = 0)
```

# Question 6 : ANOVA
```{r}
model_full <- lm(pH ~ Couleur, data = cepages)
summary(model_full)
```

```{r}
autoplot(model_full, 1:4)
```

[P1] is verified as the 'Residuals vs Fitted' plot shows that the points are well distributed around 0
[P2] is verified as the 'Scale-Location' plot shows that the points are well distributed around 1
[P4] is verified as the 'QQPlot' is aligned with the 'y=x' line

```{r}
set.seed(12)
durbinWatsonTest(model_full)
```
[P3] is verified as the p-value is 0.7 > 0.05, so we do not reject H0 so the residuals are not auto-correlated

# Bonus : Type II Test

```{r}
library(car)
Anova(model_full)
```