Add TP3

2026-03-18 18:50:25 +01:00 · 2024-11-20 18:32:54 +01:00
parent 10e9191969
commit af27fbba72
4 changed files with 533 additions and 1 deletions
--- a/Models/TP2/TP2.rmd
+++ b/Models/TP2/TP2.rmd
@@ -75,7 +75,6 @@ autoplot(model_full, 1:4)
 set.seed(12)
 durbinWatsonTest(model_full)
 ```
-
 [P3] is verified as the p-value is 0.7 > 0.05, so we do not reject H0 so the residuals are not auto-correlated

 # Bonus : Type II Test
--- a/Models/TP3/TP2.rmd
+++ b/Models/TP3/TP2.rmd
@@ -0,0 +1,134 @@
+```{r}
+setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3')
+```
+
+# Question 1 : Import dataset and check variables
+
+```{r}
+library(dplyr)
+ozone <- read.table("ozone.txt", header = TRUE, sep = " ", dec = ".")
+ozone$vent <- as.factor(ozone$vent)
+ozone$temps <- as.factor(ozone$temps)
+ozone <- ozone %>% mutate(across(where(is.character), as.numeric))
+ozone <- ozone %>% mutate(across(where(is.integer), as.numeric))
+paged_table(ozone)
+```
+
+# Question 2 : max03 ~ T12
+
+```{r}
+model_T12 <- lm(maxO3 ~ T12, data = ozone)
+summary(model_T12)
+```
+
+```{r}
+library(ggplot2)
+
+ggplot(ozone, aes(x = T12, y = maxO3)) +
+  geom_smooth(method = 'lm', se = T) +
+  geom_point(col = 'red', size = 0.5) +
+  labs(title = "maxO3 ~ T12") +
+  theme_minimal()
+```
+
+```{r}
+library(ggfortify)
+library(car)
+
+autoplot(model_T12, 1:4)
+durbinWatsonTest(model_T12)
+```
+
+The p-value of the Durbin-Watson test is 0, so [P3] is not valid. The residuals are correlated.
+
+The model is not valid.
+
+# Question 3 : max03 ~ T12 + temps
+
+```{r}
+library(ggplot2)
+
+ggplot(ozone, aes(y = maxO3, x = temps, colour = temps, fill = temps)) +
+  geom_boxplot(alpha = 0.5, outlier.alpha = 0) +
+  geom_jitter(width = 0.25, size = 1) +
+  stat_summary(fun = mean, colour = "black", geom = "point", shape = 18, size = 3)
+```
+
+```{r}
+model_temps <- lm(maxO3 ~ -1 + temps, data = ozone)
+summary(model_temps)
+```
+
+```{r}
+autoplot(model_temps, 1:4)
+durbinWatsonTest(model_temps)
+```
+```{r}
+model_vent <- lm(maxO3 ~ -1 + vent, data = ozone)
+summary(model_vent)
+```
+
+```{r}
+autoplot(model_vent, 1:4)
+durbinWatsonTest(model_vent)
+```
+The p-value of the Durbin-Watson test is 0, so [P3] is not valid. The residuals are correlated.
+
+The model is not valid.
+```{r}
+model_temps_vent <- lm(maxO3 ~ temps * vent, data = ozone)
+summary(model_temps_vent)
+```
+
+```{r}
+autoplot(model_temps_vent, 1:4)
+durbinWatsonTest(model_temps_vent)
+```
+
+# Question 4 : Multiple linear regression
+```{r}
+model <- lm(maxO3 ~ ., data = ozone)
+summary(model)
+```
+
+```{r}
+autoplot(model, 1:4)
+durbinWatsonTest(model)
+```
+
+[P1] is verified as the 'Residuals vs Fitted' plot shows that the points are well distributed around 0
+[P2] is verified as the 'Scale-Location' plot shows that the points are well distributed around 1
+[P4] is verified as the 'QQPlot' is aligned with the 'y=x' line
+
+[P3] is verified as the p-value is 0.7 > 0.05, so we do not reject H0 so the residuals are not auto-correlated
+
+The model is valid.
+
+```{r}
+library(GGally)
+ggpairs(ozone, progress = FALSE)
+```
+```{r}
+library(MASS)
+
+model_backward <- stepAIC(model, ~., trace = FALSE, direction = c("backward"))
+summary(model_backward)
+```
+
+```{r}
+AIC(model_backward)
+autoplot(model_backward, 1:4)
+durbinWatsonTest(model_backward)
+```
+
+# Question 5 : Prediction
+```{r}
+new_obs <- list(
+  T12 = 18,
+  Ne9 = 3,
+  Vx9 = 0.7,
+  maxO3v = 85
+)
+predict(model_backward, new_obs, interval = "confidence")
+
+```
--- a/Models/TP3/ozone.txt
+++ b/Models/TP3/ozone.txt
@@ -0,0 +1,113 @@
+maxO3 T9 T12 T15 Ne9 Ne12 Ne15 Vx9 Vx12 Vx15 maxO3v vent temps
+87 15.6 18.5 18.4 4 4 8 0.6946 -1.7101 -0.6946 84 Nord Sec
+82 17 18.4 17.7 5 5 7 -4.3301 -4 -3 87 Nord Sec
+92 15.3 17.6 19.5 2 5 4 2.9544 1.8794 0.5209 82 Est Sec
+114 16.2 19.7 22.5 1 1 0 0.9848 0.3473 -0.1736 92 Nord Sec
+94 17.4 20.5 20.4 8 8 7 -0.5 -2.9544 -4.3301 114 Ouest Sec
+80 17.7 19.8 18.3 6 6 7 -5.6382 -5 -6 94 Ouest Pluie
+79 16.8 15.6 14.9 7 8 8 -4.3301 -1.8794 -3.7588 80 Ouest Sec
+79 14.9 17.5 18.9 5 5 4 0 -1.0419 -1.3892 99 Nord Sec
+101 16.1 19.6 21.4 2 4 4 -0.766 -1.0261 -2.2981 79 Nord Sec
+106 18.3 21.9 22.9 5 6 8 1.2856 -2.2981 -3.9392 101 Ouest Sec
+101 17.3 19.3 20.2 7 7 3 -1.5 -1.5 -0.8682 106 Nord Sec
+90 17.6 20.3 17.4 7 6 8 0.6946 -1.0419 -0.6946 101 Sud Sec
+72 18.3 19.6 19.4 7 5 6 -0.8682 -2.7362 -6.8944 90 Sud Sec
+70 17.1 18.2 18 7 7 7 -4.3301 -7.8785 -5.1962 72 Ouest Pluie
+83 15.4 17.4 16.6 8 7 7 -4.3301 -2.0521 -3 70 Nord Sec
+88 15.9 19.1 21.5 6 5 4 0.5209 -2.9544 -1.0261 83 Ouest Sec
+145 21 24.6 26.9 0 1 1 -0.342 -1.5321 -0.684 121 Ouest Sec
+81 16.2 22.4 23.4 8 3 1 0 0.3473 -2.5712 145 Nord Sec
+121 19.7 24.2 26.9 2 1 0 1.5321 1.7321 2 81 Est Sec
+146 23.6 28.6 28.4 1 1 2 1 -1.9284 -1.2155 121 Sud Sec
+121 20.4 25.2 27.7 1 0 0 0 -0.5209 1.0261 146 Nord Sec
+146 27 32.7 33.7 0 0 0 2.9544 6.5778 4.3301 121 Est Sec
+108 24 23.5 25.1 4 4 0 -2.5712 -3.8567 -4.6985 146 Sud Sec
+83 19.7 22.9 24.8 7 6 6 -2.5981 -3.9392 -4.924 108 Ouest Sec
+57 20.1 22.4 22.8 7 6 7 -5.6382 -3.8302 -4.5963 83 Ouest Pluie
+81 19.6 25.1 27.2 3 4 4 -1.9284 -2.5712 -4.3301 57 Sud Sec
+67 19.5 23.4 23.7 5 5 4 -1.5321 -3.0642 -0.8682 81 Ouest Sec
+70 18.8 22.7 24.9 5 2 1 0.684 0 1.3681 67 Nord Sec
+106 24.1 28.4 30.1 0 0 1 2.8191 3.9392 3.4641 70 Est Sec
+139 26.6 30.1 31.9 0 1 4 1.8794 2 1.3681 106 Sud Sec
+79 19.5 18.8 17.8 8 8 8 0.6946 -0.866 -1.0261 139 Ouest Sec
+93 16.8 18.2 22 8 8 6 0 0 1.2856 79 Sud Pluie
+97 20.8 23.7 25 2 3 4 0 1.7101 -2.7362 93 Nord Sec
+113 17.5 18.2 22.7 8 8 5 -3.7588 -3.9392 -4.6985 97 Ouest Pluie
+72 18.1 21.2 23.9 7 6 4 -2.5981 -3.9392 -3.7588 113 Ouest Pluie
+88 19.2 22 25.2 4 7 4 -1.9696 -3.0642 -4 72 Ouest Sec
+77 19.4 20.7 22.5 7 8 7 -6.5778 -5.6382 -9 88 Ouest Sec
+71 19.2 21 22.4 6 4 6 -7.8785 -6.8937 -6.8937 77 Ouest Sec
+56 13.8 17.3 18.5 8 8 6 1.5 -3.8302 -2.0521 71 Ouest Pluie
+45 14.3 14.5 15.2 8 8 8 0.684 4 2.9544 56 Est Pluie
+67 15.6 18.6 20.3 5 7 5 -3.2139 -3.7588 -4 45 Ouest Pluie
+67 16.9 19.1 19.5 5 5 6 -2.2981 -3.7588 0 67 Ouest Pluie
+84 17.4 20.4 21.4 3 4 6 0 0.3473 -2.5981 67 Sud Sec
+63 15.1 20.5 20.6 8 6 6 2 -5.3623 -6.1284 84 Ouest Pluie
+69 15.1 15.6 15.9 8 8 8 -4.5963 -3.8302 -4.3301 63 Ouest Pluie
+92 16.7 19.1 19.3 7 6 4 -2.0521 -4.4995 -2.7362 69 Nord Sec
+88 16.9 20.3 20.7 6 6 5 -2.8191 -3.4641 -3 92 Ouest Pluie
+66 18 21.6 23.3 8 6 5 -3 -3.5 -3.2139 88 Sud Sec
+72 18.6 21.9 23.6 4 7 6 0.866 -1.9696 -1.0261 66 Ouest Sec
+81 18.8 22.5 23.9 6 3 2 0.5209 -1 -2 72 Nord Sec
+83 19 22.5 24.1 2 4 6 0 -1.0261 0.5209 81 Nord Sec
+149 19.9 26.9 29 3 4 3 1 -0.9397 -0.6428 83 Ouest Sec
+153 23.8 27.7 29.4 1 1 4 0.9397 1.5 0 149 Nord Sec
+159 24 28.3 26.5 2 2 7 -0.342 1.2856 -2 153 Nord Sec
+149 23.3 27.6 28.8 4 6 3 0.866 -1.5321 -0.1736 159 Ouest Sec
+160 25 29.6 31.1 0 3 5 1.5321 -0.684 2.8191 149 Sud Sec
+156 24.9 30.5 32.2 0 1 4 -0.5 -1.8794 -1.2856 160 Ouest Sec
+84 20.5 26.3 27.8 1 0 2 -1.3681 -0.6946 0 156 Nord Sec
+126 25.3 29.5 31.2 1 4 4 3 3.7588 5 84 Est Sec
+116 21.3 23.8 22.1 7 7 8 0 -2.3941 -1.3892 126 Sud Pluie
+77 20 18.2 23.6 5 7 6 -3.4641 -2.5981 -3.7588 116 Ouest Pluie
+63 18.7 20.6 20.3 6 7 7 -5 -4.924 -5.6382 77 Ouest Pluie
+54 18.6 18.7 17.8 8 8 8 -4.6985 -2.5 -0.8682 63 Sud Pluie
+65 19.2 23 22.7 8 7 7 -3.8302 -4.924 -5.6382 54 Ouest Sec
+72 19.9 21.6 20.4 7 7 8 -3 -4.5963 -5.1962 65 Ouest Pluie
+60 18.7 21.4 21.7 7 7 7 -5.6382 -6.0622 -6.8937 72 Ouest Pluie
+70 18.4 17.1 20.5 3 6 3 -5.9088 -3.2139 -4.4995 60 Nord Pluie
+77 17.1 20 20.8 4 5 4 -1.9284 -1.0261 0.5209 70 Nord Sec
+98 17.8 22.8 24.3 1 1 0 0 -1.5321 -1 77 Ouest Pluie
+111 20.9 25.2 26.7 1 5 2 -1.0261 -3 -2.2981 98 Ouest Sec
+75 18.8 20.5 26 8 7 1 -0.866 0 0 111 Nord Sec
+116 23.5 29.8 31.7 1 3 5 1.8794 1.3681 0.6946 75 Sud Sec
+109 20.8 23.7 26.6 8 5 4 -1.0261 -1.7101 -3.2139 116 Sud Sec
+67 18.8 21.1 18.9 7 7 8 -5.3623 -5.3623 -2.5 86 Ouest Pluie
+76 17.8 21.3 24 7 5 5 -3.0642 -2.2981 -3.9392 67 Ouest Pluie
+113 20.6 24.8 27 1 1 2 1.3681 0.8682 -2.2981 76 Sud Sec
+117 21.6 26.9 28.6 6 6 4 1.5321 1.9284 1.9284 113 Sud Pluie
+131 22.7 28.4 30.1 5 3 3 0.1736 -1.9696 -1.9284 117 Ouest Sec
+166 19.8 27.2 30.8 4 0 1 0.6428 -0.866 0.684 131 Ouest Sec
+159 25 33.5 35.5 1 1 1 1 0.6946 -1.7101 166 Sud Sec
+100 20.1 22.9 27.6 8 8 6 1.2856 -1.7321 -0.684 159 Ouest Sec
+114 21 26.3 26.4 7 4 5 3.0642 2.8191 1.3681 100 Est Sec
+112 21 24.4 26.8 1 6 3 4 4 3.7588 114 Est Sec
+101 16.9 17.8 20.6 7 7 7 -2 -0.5209 1.8794 112 Nord Pluie
+76 17.5 18.6 18.7 7 7 7 -3.4641 -4 -1.7321 101 Ouest Sec
+59 16.5 20.3 20.3 5 7 6 -4.3301 -5.3623 -4.5 76 Ouest Pluie
+78 17.7 20.2 21.5 5 5 3 0 0.5209 0 59 Nord Pluie
+76 17.3 22.7 24.6 4 5 6 -2.9544 -2.9544 -2 78 Ouest Pluie
+55 15.3 16.8 19.2 8 7 5 -1.8794 -1.8794 -2.3941 76 Ouest Pluie
+71 15.9 19.2 19.5 7 5 3 -6.1284 0 -1.3892 55 Nord Pluie
+66 16.2 18.9 19.3 2 5 6 -1.3681 -0.8682 1.7101 71 Nord Pluie
+59 18.3 18.3 19 7 7 7 -3.9392 -1.9284 -1.7101 66 Nord Pluie
+68 16.9 20.8 22.5 6 5 7 -1.5 -3.4641 -3.0642 59 Ouest Pluie
+63 17.3 19.8 19.4 7 8 8 -4.5963 -6.0622 -4.3301 68 Ouest Sec
+78 14.2 22.2 22 5 5 6 -0.866 -5 -5 62 Ouest Sec
+74 15.8 18.7 19.1 8 7 7 -4.5963 -6.8937 -7.5175 78 Ouest Pluie
+71 15.2 17.9 18.6 6 5 1 -1.0419 -1.3681 -1.0419 74 Nord Pluie
+69 17.1 17.7 17.5 6 7 8 -5.1962 -2.7362 -1.0419 71 Nord Pluie
+71 15.4 17.7 16.6 4 5 5 -3.8302 0 1.3892 69 Nord Sec
+60 13.7 14 15.8 4 5 4 0 3.2139 0 71 Nord Pluie
+42 12.7 14.3 14.9 8 7 7 -2.5 -3.2139 -2.5 60 Nord Pluie
+65 14.8 16.3 15.9 7 7 7 -4.3301 -6.0622 -5.1962 42 Ouest Pluie
+71 15.5 18 17.4 7 7 6 -3.9392 -3.0642 0 65 Ouest Sec
+96 11.3 19.4 20.2 3 3 3 -0.1736 3.7588 3.8302 71 Est Pluie
+98 15.2 19.7 20.3 2 2 2 4 5 4.3301 96 Est Sec
+92 14.7 17.6 18.2 1 4 6 5.1962 5.1423 3.5 98 Nord Sec
+76 13.3 17.7 17.7 7 7 6 -0.9397 -0.766 -0.5 92 Ouest Pluie
+84 13.3 17.7 17.8 3 5 6 0 -1 -1.2856 76 Sud Sec
+77 16.2 20.8 22.1 6 5 5 -0.6946 -2 -1.3681 71 Sud Pluie
+99 16.9 23 22.6 6 4 7 1.5 0.8682 0.8682 77 Sud Sec
+83 16.9 19.8 22.1 6 5 3 -4 -3.7588 -4 99 Ouest Pluie
+70 15.7 18.6 20.7 7 7 7 0 -1.0419 -4 83 Sud Sec
--- a/Methods/Project
+++ b/Methods/Project
@@ -0,0 +1,286 @@
+---
+title: "R Notebook"
+output: html_notebook
+---
+
+### Random Variable simulation with stratification
+
+##### Question 9
+
+We consider a partition $\mathcal{P}= (D_0,D_1,...,D_k)$, $k \in \mathbb{N}$ of $\mathbb{R}$ such that $D_0$ covers the tails of $f_1$ and $f_1$ is upper bounded and $f_2$ lower bounded in $D_1, \dots, D_k$.
+
+To simulate under $f$ using the accept-reject algorithm, we need to find a density function $g$ such that $f(x) \le M g(x)$ for all $x \in \mathbb{R}$, where M is a constant.\
+
+We generate $X \sim g$ and $U \sim \mathcal{U}([0,1])$ $\textit{(1)}$.\
+We accept $Y = X$ if $U \le \frac{f(X)}{Mg(X)}$. Otherwise, we return to $\textit{(1)}$.
+
+Here we pose $g(x) = f_1(x)$ if $x \in D_0$ and $g(x) = sup_{D_i} f_1(x)$ if $x \in D_i, i \in \{1, \dots, k\}$.
+
+To conclude, we have $M = \frac{1}{1-a}$ and the probability of acceptance is $r = \frac{1}{M} = 1-a$
+
+
+##### Question 10
+
+Let $P_n = (D_0, \dots D_n)$ a partition of $\mathbb{R}$ for $n \in \mathbb{N}$.
+We have $\forall x \in P_n$ and $\forall i \in \{0, \dots, n\}$, $\lim_{n \to\infty} sup_{D_i} f_1(x) = f_1(x)$ and $\lim_{x\to\infty} inf_{D_i} f_2(x) = f_2(x)$.
+
+$\Rightarrow \lim_{x\to\infty} g(x) = f(x)$
+
+$\Rightarrow \lim_{x\to\infty} \frac{g(x)}{f(x)} = 1$ as $f(x) > 0$ as $f$ is a density function.
+
+$\Rightarrow \forall \epsilon > 0, \exists n_{\epsilon} \in \mathbb{N}$ such that $\forall n \ge n_{\epsilon}$, $M = sup_{x \in P_n} \frac{g(x)}{f(x)} < \epsilon$
+
+$\Rightarrow r = \frac{1}{M} > \frac{1}{\epsilon} := \delta \in ]0, 1]$ where $r$ is the acceptance rate defined in the question 10.
+
+##### Question 11
+
+We recall the parameters and the functions of the problem.
+```{r}
+mu1 <- 0
+mu2 <- 1
+s1 <- 3
+s2 <- 1
+a <- 0.2
+
+f1 <- function(x) {
+  dnorm(x, mu1, s1)
+}
+
+f2 <- function(x) {
+  dnorm(x, mu2, s2)
+}
+
+f <- function(x) {
+  fx <- f1(x) - a * f2(x)
+  fx[fx < 0] <- 0
+  return(fx / (1 - a))
+}
+
+f1_bounds <- c(mu1 - 3 * s1, mu1 + 3 * s1)
+```
+
+We implement the partition, the given $g$ function to understand the behavior of $g$ compared to $f$ and the computation of the supremum and infimum of $f_1$ and $f_2$ on each partition.
+```{r}
+create_partition <- function(k = 10) {
+  return(seq(f1_bounds[1], f1_bounds[2], , length.out = k))
+}
+
+sup_inf <- function(f, P, i) {
+  x <- seq(P[i], P[i + 1], length.out = 1000)
+  f_values <- sapply(x, f)
+  return(c(max(f_values), min(f_values)))
+}
+
+g <- function(X, P) {
+  values <- numeric(0)
+  for (x in X) {
+    if (x <= P[1] | x >= P[length(P)]) {
+      values <- c(values, 1 / (1 - a) * f1(x))
+    } else {
+      for (i in 1:(length(P) - 1)) {
+        if (x >= P[i] & x <= P[i + 1]) {
+          values <- c(values, 1 / (1 - a) * (sup_inf(f1, P, i)[1]) - a * sup_inf(f2, P, i)[2])
+        }
+      }
+    }
+  }
+  return(values)
+}
+```
+
+We plot the function $f$ and the dominating function $g$ for different sizes of the partition.
+```{r}
+library(ggplot2)
+
+X <- seq(-12, 12, length.out = 1000)
+
+# Plot for different k with ggplot on same graph
+k_values <- c(10, 20, 50, 100)
+P_values <- lapply(k_values, create_partition)
+g_values <- lapply(P_values, g, X)
+
+ggplot() +
+  geom_line(aes(x = X, y = f(X)), col = "red") +
+  geom_line(aes(x = X, y = g(X, P_values[[1]])), col = "green") +
+  geom_line(aes(x = X, y = g(X, P_values[[2]])), col = "orange") +
+  geom_line(aes(x = X, y = g(X, P_values[[3]])), col = "purple") +
+  geom_line(aes(x = X, y = g(X, P_values[[4]])), col = "black") +
+  labs(title = "Dominating function g of f for different size of partition", x = "x", y = "Density") +
+  theme_minimal()
+```
+
+Here, we implement the algorithm of accept-reject with the given partition and an appropriate function $g$ to compute $f$.
+```{r}
+set.seed(123)
+
+g_accept_reject <- function(x, P) {
+  if (x < P[1] | x >= P[length(P)]) {
+    return(f1(x))
+  } else {
+    for (i in seq_along(P)) {
+      if (x >= P[i] & x < P[i + 1]) {
+        return(sup_inf(f1, P, i)[1])
+      }
+    }
+  }
+}
+
+stratified <- function(n, P) {
+  samples <- numeric(0)
+  rate <- 0
+  while (length(samples) < n) {
+    x <- rnorm(1, mu1, s1)
+    u <- runif(1)
+    if (u <= f(x) * (1 - a) / g_accept_reject(x, P)) {
+      samples <- c(samples, x)
+    }
+    rate <- rate + 1
+  }
+  list(samples = samples, acceptance_rate = n / rate)
+}
+
+n <- 10000
+k <- 100
+
+P <- create_partition(k)
+samples <- stratified(n, P)
+X <- seq(-10, 10, length.out = 1000)
+
+hist(samples$samples, breaks = 50, freq = FALSE, col = "lightblue", main = "Empirical density function f", xlab = "x")
+lines(X, f(X), col = "red")
+```
+
+We also compute the acceptance rate of the algorithm.
+```{r}
+theorical_acceptance_rate <- 1 - a
+cat(sprintf("Empirical acceptance rate: %f, Theoretical acceptance rate: %f \n", samples$acceptance_rate, theorical_acceptance_rate))
+```
+
+##### Question 12
+
+```{r}
+set.seed(123)
+
+stratified_delta <- function(n, delta) {
+  samples <- numeric(0)
+  P <- create_partition(n * delta)
+  rate <- 0
+  while (length(samples) < n | rate < delta * n) {
+    x <- rnorm(1, mu1, s1)
+    u <- runif(1)
+    if (u <= f(x) * delta / g_accept_reject(x, P)) {
+      samples <- c(samples, x)
+    }
+    rate <- rate + 1
+  }
+  list(samples = samples, partition = P, acceptance_rate = n / rate)
+}
+
+n <- 10000
+delta <- 0.8
+
+samples_delta <- stratified_delta(n, delta)
+X <- seq(-10, 10, length.out = 1000)
+
+hist(samples_delta$samples, breaks = 50, freq = FALSE, col = "lightblue", main = "Empirical density function f", xlab = "x")
+lines(X, f(X), col = "red")
+```
+
+We also compute the acceptance rate of the algorithm.
+```{r}
+theorical_acceptance_rate <- 1 - a
+cat(sprintf("Empirical acceptance rate: %f, Theoretical acceptance rate: %f \n", samples_delta$acceptance_rate, theorical_acceptance_rate))
+```
+
+Now, we will test the stratified_delta function for different delta:
+
+```{r}
+set.seed(123)
+n <- 1000
+deltas <- seq(0.1, 1, by = 0.1)
+
+for (delta in deltas) {
+  samples <- stratified_delta(n, delta)
+  cat(sprintf("Delta: %.1f, Empirical acceptance rate: %f \n", delta, samples$acceptance_rate))
+}
+```
+
+### Cumulative density function.
+
+##### Question 13
+
+The cumulative density function $\forall x \in \mathbb{R}$ $F_X(x) = \int_{-\infty}^{x} f(t) \,dt = \int_{\mathbb{R}} f(t) h(t), dt$ where $h(x) = \mathbb{1}_{X_n \le x}$
+
+For a given $x \in \mathbb{R}$, a Monte Carlo estimator $F_n(x) = \frac{1}{n} \sum_{i=1}^{n} h(X_i)$ where $h$ is the same function as above and $(X_i)_{i=1}^{n} \sim^{iid} X$
+
+##### Question 14
+
+As $X_1, \dots, X_n$ are iid and follows the law of X, and $h$ is continuous and positive, we have $h(X_1), \dots h(X_n)$ are iid and $\mathbb{E}[h(X_i)] < + \infty$. By the law of large numbers, we have $F_n(X) = \frac{1}{n} \sum_{i=1}^{n} h(X_i) \xrightarrow{a.s} \mathbb{E}[h(X_1)] = F_X(x)$.
+
+Moreover, $\forall \epsilon > 0$, $\exists N \in \mathbb{N}$ such that $\forall n \le N$, $|F_n(x) - F_X(x)| < \epsilon$, ie, $sup_{x \in \mathbb{R}} |F_n(x) - F_X(x)| \xrightarrow{a.s} 0$, by Glivenko-Cantelli theorem.
+
+Hence, $F_n$ is a good estimate of $F_X$ as a function of $x$.
+
+##### Question 15
+```{r}
+set.seed(123)
+n <- 10000
+Xn <- (rnorm(n, mu1, s1) - a * rnorm(n, mu2, s2)) / (1 - a)
+
+h <- function(x, Xn) {
+  return(Xn <= x)
+}
+
+# Fn empirical
+empirical_cdf <- function(x, Xn) {
+  return(mean(h(x, Xn)))
+}
+
+# F theoretical
+F <- function(x) {
+  Fx <- pnorm(x, mu1, s1) - a * pnorm(x, mu2, s2)
+  return(Fx / (1 - a))
+}
+
+X <- seq(-10, 10, length.out = n)
+cat(sprintf("Empirical cdf: %f, Theoretical cdf: %f \n", empirical_cdf(X, Xn), mean(F(Xn))))
+
+length(X)
+length(empirical_cdf(X, Xn))
+plot(X, empirical_cdf(X, Xn), col = "red", type = "l", xlab = "x", ylab = "F(x)")
+lines(X, F(X), col = "blue")
+```
+
+Now we plot the empirical and theoretical cumulative density functions for different n.
+```{r}
+n_values <- seq(100, 10000, length.out = 10)
+
+```
+
+##### Question 16
+
+As $X_1, \dots, X_n$ are iid and follows the law of X, and $h$ is continuous and positive, we have $h(X_1), \dots h(X_n)$ are iid and $\mathbb{E}[h(X_i)^2] < + \infty$. By the Central Limit Theorem, we have $\sqrt{n} \frac{(F_n(x) - F_X(x))}{\sigma} \xrightarrow{d} \mathcal{N}(0, 1)$ where $\sigma^2 = Var(h(X_1))$.
+
+So we have $\lim_{x\to\infty} \mathbb{P}(\sqrt{n} \frac{(F_n(x) - F_X(x))}{\sigma} \le q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)})= 1 - \alpha$
+
+So by computing the quantile of the normal distribution, we can have a confidence interval for $F_X(x)$ :
+$F_X(x) \in [F_n(x) - \frac{q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)} \sigma}{\sqrt{n}} ; F_n(x) + \frac{q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)} \sigma}{\sqrt{n}}]$
+
+```{r}
+Fn <- empirical_cdf(X, Xn)
+sigma <- sqrt(Fn - Fn^2)
+q <- qnorm(0.975)
+interval <- c(Fn - q * sigma / sqrt(n), Fn + q * sigma / sqrt(n))
+cat(sprintf("Confidence interval: [%f, %f] \n", interval[1], interval[2]))
+```
+
+##### Question 17
+
+```{r}
+q <- qnorm(0.975)
+x <- 1
+epsilon <- 0.01
+n_required <- (1.96^2 * F(x) * (1 - F(x))) / epsilon^2
+ceiling(n_required)
+```
+