diff --git a/M1/General Linear Models/TP2/TP2.rmd b/M1/General Linear Models/TP2/TP2.rmd index 72d71d6..e743a92 100644 --- a/M1/General Linear Models/TP2/TP2.rmd +++ b/M1/General Linear Models/TP2/TP2.rmd @@ -75,7 +75,6 @@ autoplot(model_full, 1:4) set.seed(12) durbinWatsonTest(model_full) ``` - [P3] is verified as the p-value is 0.7 > 0.05, so we do not reject H0 so the residuals are not auto-correlated # Bonus : Type II Test diff --git a/M1/General Linear Models/TP3/TP2.rmd b/M1/General Linear Models/TP3/TP2.rmd new file mode 100644 index 0000000..8496e70 --- /dev/null +++ b/M1/General Linear Models/TP3/TP2.rmd @@ -0,0 +1,134 @@ +```{r} +setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3') +``` + +# Question 1 : Import dataset and check variables + +```{r} +library(dplyr) +ozone <- read.table("ozone.txt", header = TRUE, sep = " ", dec = ".") +ozone$vent <- as.factor(ozone$vent) +ozone$temps <- as.factor(ozone$temps) +ozone <- ozone %>% mutate(across(where(is.character), as.numeric)) +ozone <- ozone %>% mutate(across(where(is.integer), as.numeric)) +paged_table(ozone) +``` + +# Question 2 : max03 ~ T12 + +```{r} +model_T12 <- lm(maxO3 ~ T12, data = ozone) +summary(model_T12) +``` + +```{r} +library(ggplot2) + +ggplot(ozone, aes(x = T12, y = maxO3)) + + geom_smooth(method = 'lm', se = T) + + geom_point(col = 'red', size = 0.5) + + labs(title = "maxO3 ~ T12") + + theme_minimal() +``` + +```{r} +library(ggfortify) +library(car) + +autoplot(model_T12, 1:4) +durbinWatsonTest(model_T12) +``` + +The p-value of the Durbin-Watson test is 0, so [P3] is not valid. The residuals are correlated. + +The model is not valid. + +# Question 3 : max03 ~ T12 + temps + +```{r} +library(ggplot2) + +ggplot(ozone, aes(y = maxO3, x = temps, colour = temps, fill = temps)) + + geom_boxplot(alpha = 0.5, outlier.alpha = 0) + + geom_jitter(width = 0.25, size = 1) + + stat_summary(fun = mean, colour = "black", geom = "point", shape = 18, size = 3) +``` + +```{r} +model_temps <- lm(maxO3 ~ -1 + temps, data = ozone) +summary(model_temps) +``` + +```{r} +autoplot(model_temps, 1:4) +durbinWatsonTest(model_temps) +``` +```{r} +model_vent <- lm(maxO3 ~ -1 + vent, data = ozone) +summary(model_vent) +``` + +```{r} +autoplot(model_vent, 1:4) +durbinWatsonTest(model_vent) +``` +The p-value of the Durbin-Watson test is 0, so [P3] is not valid. The residuals are correlated. + +The model is not valid. +```{r} +model_temps_vent <- lm(maxO3 ~ temps * vent, data = ozone) +summary(model_temps_vent) +``` + +```{r} +autoplot(model_temps_vent, 1:4) +durbinWatsonTest(model_temps_vent) +``` + +# Question 4 : Multiple linear regression +```{r} +model <- lm(maxO3 ~ ., data = ozone) +summary(model) +``` + +```{r} +autoplot(model, 1:4) +durbinWatsonTest(model) +``` + +[P1] is verified as the 'Residuals vs Fitted' plot shows that the points are well distributed around 0 +[P2] is verified as the 'Scale-Location' plot shows that the points are well distributed around 1 +[P4] is verified as the 'QQPlot' is aligned with the 'y=x' line + +[P3] is verified as the p-value is 0.7 > 0.05, so we do not reject H0 so the residuals are not auto-correlated + +The model is valid. + +```{r} +library(GGally) +ggpairs(ozone, progress = FALSE) +``` +```{r} +library(MASS) + +model_backward <- stepAIC(model, ~., trace = FALSE, direction = c("backward")) +summary(model_backward) +``` + +```{r} +AIC(model_backward) +autoplot(model_backward, 1:4) +durbinWatsonTest(model_backward) +``` + +# Question 5 : Prediction +```{r} +new_obs <- list( + T12 = 18, + Ne9 = 3, + Vx9 = 0.7, + maxO3v = 85 +) +predict(model_backward, new_obs, interval = "confidence") + +``` diff --git a/M1/General Linear Models/TP3/ozone.txt b/M1/General Linear Models/TP3/ozone.txt new file mode 100644 index 0000000..48ace16 --- /dev/null +++ b/M1/General Linear Models/TP3/ozone.txt @@ -0,0 +1,113 @@ +maxO3 T9 T12 T15 Ne9 Ne12 Ne15 Vx9 Vx12 Vx15 maxO3v vent temps +87 15.6 18.5 18.4 4 4 8 0.6946 -1.7101 -0.6946 84 Nord Sec +82 17 18.4 17.7 5 5 7 -4.3301 -4 -3 87 Nord Sec +92 15.3 17.6 19.5 2 5 4 2.9544 1.8794 0.5209 82 Est Sec +114 16.2 19.7 22.5 1 1 0 0.9848 0.3473 -0.1736 92 Nord Sec +94 17.4 20.5 20.4 8 8 7 -0.5 -2.9544 -4.3301 114 Ouest Sec +80 17.7 19.8 18.3 6 6 7 -5.6382 -5 -6 94 Ouest Pluie +79 16.8 15.6 14.9 7 8 8 -4.3301 -1.8794 -3.7588 80 Ouest Sec +79 14.9 17.5 18.9 5 5 4 0 -1.0419 -1.3892 99 Nord Sec +101 16.1 19.6 21.4 2 4 4 -0.766 -1.0261 -2.2981 79 Nord Sec +106 18.3 21.9 22.9 5 6 8 1.2856 -2.2981 -3.9392 101 Ouest Sec +101 17.3 19.3 20.2 7 7 3 -1.5 -1.5 -0.8682 106 Nord Sec +90 17.6 20.3 17.4 7 6 8 0.6946 -1.0419 -0.6946 101 Sud Sec +72 18.3 19.6 19.4 7 5 6 -0.8682 -2.7362 -6.8944 90 Sud Sec +70 17.1 18.2 18 7 7 7 -4.3301 -7.8785 -5.1962 72 Ouest Pluie +83 15.4 17.4 16.6 8 7 7 -4.3301 -2.0521 -3 70 Nord Sec +88 15.9 19.1 21.5 6 5 4 0.5209 -2.9544 -1.0261 83 Ouest Sec +145 21 24.6 26.9 0 1 1 -0.342 -1.5321 -0.684 121 Ouest Sec +81 16.2 22.4 23.4 8 3 1 0 0.3473 -2.5712 145 Nord Sec +121 19.7 24.2 26.9 2 1 0 1.5321 1.7321 2 81 Est Sec +146 23.6 28.6 28.4 1 1 2 1 -1.9284 -1.2155 121 Sud Sec +121 20.4 25.2 27.7 1 0 0 0 -0.5209 1.0261 146 Nord Sec +146 27 32.7 33.7 0 0 0 2.9544 6.5778 4.3301 121 Est Sec +108 24 23.5 25.1 4 4 0 -2.5712 -3.8567 -4.6985 146 Sud Sec +83 19.7 22.9 24.8 7 6 6 -2.5981 -3.9392 -4.924 108 Ouest Sec +57 20.1 22.4 22.8 7 6 7 -5.6382 -3.8302 -4.5963 83 Ouest Pluie +81 19.6 25.1 27.2 3 4 4 -1.9284 -2.5712 -4.3301 57 Sud Sec +67 19.5 23.4 23.7 5 5 4 -1.5321 -3.0642 -0.8682 81 Ouest Sec +70 18.8 22.7 24.9 5 2 1 0.684 0 1.3681 67 Nord Sec +106 24.1 28.4 30.1 0 0 1 2.8191 3.9392 3.4641 70 Est Sec +139 26.6 30.1 31.9 0 1 4 1.8794 2 1.3681 106 Sud Sec +79 19.5 18.8 17.8 8 8 8 0.6946 -0.866 -1.0261 139 Ouest Sec +93 16.8 18.2 22 8 8 6 0 0 1.2856 79 Sud Pluie +97 20.8 23.7 25 2 3 4 0 1.7101 -2.7362 93 Nord Sec +113 17.5 18.2 22.7 8 8 5 -3.7588 -3.9392 -4.6985 97 Ouest Pluie +72 18.1 21.2 23.9 7 6 4 -2.5981 -3.9392 -3.7588 113 Ouest Pluie +88 19.2 22 25.2 4 7 4 -1.9696 -3.0642 -4 72 Ouest Sec +77 19.4 20.7 22.5 7 8 7 -6.5778 -5.6382 -9 88 Ouest Sec +71 19.2 21 22.4 6 4 6 -7.8785 -6.8937 -6.8937 77 Ouest Sec +56 13.8 17.3 18.5 8 8 6 1.5 -3.8302 -2.0521 71 Ouest Pluie +45 14.3 14.5 15.2 8 8 8 0.684 4 2.9544 56 Est Pluie +67 15.6 18.6 20.3 5 7 5 -3.2139 -3.7588 -4 45 Ouest Pluie +67 16.9 19.1 19.5 5 5 6 -2.2981 -3.7588 0 67 Ouest Pluie +84 17.4 20.4 21.4 3 4 6 0 0.3473 -2.5981 67 Sud Sec +63 15.1 20.5 20.6 8 6 6 2 -5.3623 -6.1284 84 Ouest Pluie +69 15.1 15.6 15.9 8 8 8 -4.5963 -3.8302 -4.3301 63 Ouest Pluie +92 16.7 19.1 19.3 7 6 4 -2.0521 -4.4995 -2.7362 69 Nord Sec +88 16.9 20.3 20.7 6 6 5 -2.8191 -3.4641 -3 92 Ouest Pluie +66 18 21.6 23.3 8 6 5 -3 -3.5 -3.2139 88 Sud Sec +72 18.6 21.9 23.6 4 7 6 0.866 -1.9696 -1.0261 66 Ouest Sec +81 18.8 22.5 23.9 6 3 2 0.5209 -1 -2 72 Nord Sec +83 19 22.5 24.1 2 4 6 0 -1.0261 0.5209 81 Nord Sec +149 19.9 26.9 29 3 4 3 1 -0.9397 -0.6428 83 Ouest Sec +153 23.8 27.7 29.4 1 1 4 0.9397 1.5 0 149 Nord Sec +159 24 28.3 26.5 2 2 7 -0.342 1.2856 -2 153 Nord Sec +149 23.3 27.6 28.8 4 6 3 0.866 -1.5321 -0.1736 159 Ouest Sec +160 25 29.6 31.1 0 3 5 1.5321 -0.684 2.8191 149 Sud Sec +156 24.9 30.5 32.2 0 1 4 -0.5 -1.8794 -1.2856 160 Ouest Sec +84 20.5 26.3 27.8 1 0 2 -1.3681 -0.6946 0 156 Nord Sec +126 25.3 29.5 31.2 1 4 4 3 3.7588 5 84 Est Sec +116 21.3 23.8 22.1 7 7 8 0 -2.3941 -1.3892 126 Sud Pluie +77 20 18.2 23.6 5 7 6 -3.4641 -2.5981 -3.7588 116 Ouest Pluie +63 18.7 20.6 20.3 6 7 7 -5 -4.924 -5.6382 77 Ouest Pluie +54 18.6 18.7 17.8 8 8 8 -4.6985 -2.5 -0.8682 63 Sud Pluie +65 19.2 23 22.7 8 7 7 -3.8302 -4.924 -5.6382 54 Ouest Sec +72 19.9 21.6 20.4 7 7 8 -3 -4.5963 -5.1962 65 Ouest Pluie +60 18.7 21.4 21.7 7 7 7 -5.6382 -6.0622 -6.8937 72 Ouest Pluie +70 18.4 17.1 20.5 3 6 3 -5.9088 -3.2139 -4.4995 60 Nord Pluie +77 17.1 20 20.8 4 5 4 -1.9284 -1.0261 0.5209 70 Nord Sec +98 17.8 22.8 24.3 1 1 0 0 -1.5321 -1 77 Ouest Pluie +111 20.9 25.2 26.7 1 5 2 -1.0261 -3 -2.2981 98 Ouest Sec +75 18.8 20.5 26 8 7 1 -0.866 0 0 111 Nord Sec +116 23.5 29.8 31.7 1 3 5 1.8794 1.3681 0.6946 75 Sud Sec +109 20.8 23.7 26.6 8 5 4 -1.0261 -1.7101 -3.2139 116 Sud Sec +67 18.8 21.1 18.9 7 7 8 -5.3623 -5.3623 -2.5 86 Ouest Pluie +76 17.8 21.3 24 7 5 5 -3.0642 -2.2981 -3.9392 67 Ouest Pluie +113 20.6 24.8 27 1 1 2 1.3681 0.8682 -2.2981 76 Sud Sec +117 21.6 26.9 28.6 6 6 4 1.5321 1.9284 1.9284 113 Sud Pluie +131 22.7 28.4 30.1 5 3 3 0.1736 -1.9696 -1.9284 117 Ouest Sec +166 19.8 27.2 30.8 4 0 1 0.6428 -0.866 0.684 131 Ouest Sec +159 25 33.5 35.5 1 1 1 1 0.6946 -1.7101 166 Sud Sec +100 20.1 22.9 27.6 8 8 6 1.2856 -1.7321 -0.684 159 Ouest Sec +114 21 26.3 26.4 7 4 5 3.0642 2.8191 1.3681 100 Est Sec +112 21 24.4 26.8 1 6 3 4 4 3.7588 114 Est Sec +101 16.9 17.8 20.6 7 7 7 -2 -0.5209 1.8794 112 Nord Pluie +76 17.5 18.6 18.7 7 7 7 -3.4641 -4 -1.7321 101 Ouest Sec +59 16.5 20.3 20.3 5 7 6 -4.3301 -5.3623 -4.5 76 Ouest Pluie +78 17.7 20.2 21.5 5 5 3 0 0.5209 0 59 Nord Pluie +76 17.3 22.7 24.6 4 5 6 -2.9544 -2.9544 -2 78 Ouest Pluie +55 15.3 16.8 19.2 8 7 5 -1.8794 -1.8794 -2.3941 76 Ouest Pluie +71 15.9 19.2 19.5 7 5 3 -6.1284 0 -1.3892 55 Nord Pluie +66 16.2 18.9 19.3 2 5 6 -1.3681 -0.8682 1.7101 71 Nord Pluie +59 18.3 18.3 19 7 7 7 -3.9392 -1.9284 -1.7101 66 Nord Pluie +68 16.9 20.8 22.5 6 5 7 -1.5 -3.4641 -3.0642 59 Ouest Pluie +63 17.3 19.8 19.4 7 8 8 -4.5963 -6.0622 -4.3301 68 Ouest Sec +78 14.2 22.2 22 5 5 6 -0.866 -5 -5 62 Ouest Sec +74 15.8 18.7 19.1 8 7 7 -4.5963 -6.8937 -7.5175 78 Ouest Pluie +71 15.2 17.9 18.6 6 5 1 -1.0419 -1.3681 -1.0419 74 Nord Pluie +69 17.1 17.7 17.5 6 7 8 -5.1962 -2.7362 -1.0419 71 Nord Pluie +71 15.4 17.7 16.6 4 5 5 -3.8302 0 1.3892 69 Nord Sec +60 13.7 14 15.8 4 5 4 0 3.2139 0 71 Nord Pluie +42 12.7 14.3 14.9 8 7 7 -2.5 -3.2139 -2.5 60 Nord Pluie +65 14.8 16.3 15.9 7 7 7 -4.3301 -6.0622 -5.1962 42 Ouest Pluie +71 15.5 18 17.4 7 7 6 -3.9392 -3.0642 0 65 Ouest Sec +96 11.3 19.4 20.2 3 3 3 -0.1736 3.7588 3.8302 71 Est Pluie +98 15.2 19.7 20.3 2 2 2 4 5 4.3301 96 Est Sec +92 14.7 17.6 18.2 1 4 6 5.1962 5.1423 3.5 98 Nord Sec +76 13.3 17.7 17.7 7 7 6 -0.9397 -0.766 -0.5 92 Ouest Pluie +84 13.3 17.7 17.8 3 5 6 0 -1 -1.2856 76 Sud Sec +77 16.2 20.8 22.1 6 5 5 -0.6946 -2 -1.3681 71 Sud Pluie +99 16.9 23 22.6 6 4 7 1.5 0.8682 0.8682 77 Sud Sec +83 16.9 19.8 22.1 6 5 3 -4 -3.7588 -4 99 Ouest Pluie +70 15.7 18.6 20.7 7 7 7 0 -1.0419 -4 83 Sud Sec diff --git a/M1/Monte Carlo Methods/Project 2/Projet2.rmd b/M1/Monte Carlo Methods/Project 2/Projet2.rmd new file mode 100644 index 0000000..786022a --- /dev/null +++ b/M1/Monte Carlo Methods/Project 2/Projet2.rmd @@ -0,0 +1,286 @@ +--- +title: "R Notebook" +output: html_notebook +--- + +### Random Variable simulation with stratification + +##### Question 9 + +We consider a partition $\mathcal{P}= (D_0,D_1,...,D_k)$, $k \in \mathbb{N}$ of $\mathbb{R}$ such that $D_0$ covers the tails of $f_1$ and $f_1$ is upper bounded and $f_2$ lower bounded in $D_1, \dots, D_k$. + +To simulate under $f$ using the accept-reject algorithm, we need to find a density function $g$ such that $f(x) \le M g(x)$ for all $x \in \mathbb{R}$, where M is a constant.\ + +We generate $X \sim g$ and $U \sim \mathcal{U}([0,1])$ $\textit{(1)}$.\ +We accept $Y = X$ if $U \le \frac{f(X)}{Mg(X)}$. Otherwise, we return to $\textit{(1)}$. + +Here we pose $g(x) = f_1(x)$ if $x \in D_0$ and $g(x) = sup_{D_i} f_1(x)$ if $x \in D_i, i \in \{1, \dots, k\}$. + +To conclude, we have $M = \frac{1}{1-a}$ and the probability of acceptance is $r = \frac{1}{M} = 1-a$ + + +##### Question 10 + +Let $P_n = (D_0, \dots D_n)$ a partition of $\mathbb{R}$ for $n \in \mathbb{N}$. +We have $\forall x \in P_n$ and $\forall i \in \{0, \dots, n\}$, $\lim_{n \to\infty} sup_{D_i} f_1(x) = f_1(x)$ and $\lim_{x\to\infty} inf_{D_i} f_2(x) = f_2(x)$. + +$\Rightarrow \lim_{x\to\infty} g(x) = f(x)$ + +$\Rightarrow \lim_{x\to\infty} \frac{g(x)}{f(x)} = 1$ as $f(x) > 0$ as $f$ is a density function. + +$\Rightarrow \forall \epsilon > 0, \exists n_{\epsilon} \in \mathbb{N}$ such that $\forall n \ge n_{\epsilon}$, $M = sup_{x \in P_n} \frac{g(x)}{f(x)} < \epsilon$ + +$\Rightarrow r = \frac{1}{M} > \frac{1}{\epsilon} := \delta \in ]0, 1]$ where $r$ is the acceptance rate defined in the question 10. + +##### Question 11 + +We recall the parameters and the functions of the problem. +```{r} +mu1 <- 0 +mu2 <- 1 +s1 <- 3 +s2 <- 1 +a <- 0.2 + +f1 <- function(x) { + dnorm(x, mu1, s1) +} + +f2 <- function(x) { + dnorm(x, mu2, s2) +} + +f <- function(x) { + fx <- f1(x) - a * f2(x) + fx[fx < 0] <- 0 + return(fx / (1 - a)) +} + +f1_bounds <- c(mu1 - 3 * s1, mu1 + 3 * s1) +``` + +We implement the partition, the given $g$ function to understand the behavior of $g$ compared to $f$ and the computation of the supremum and infimum of $f_1$ and $f_2$ on each partition. +```{r} +create_partition <- function(k = 10) { + return(seq(f1_bounds[1], f1_bounds[2], , length.out = k)) +} + +sup_inf <- function(f, P, i) { + x <- seq(P[i], P[i + 1], length.out = 1000) + f_values <- sapply(x, f) + return(c(max(f_values), min(f_values))) +} + +g <- function(X, P) { + values <- numeric(0) + for (x in X) { + if (x <= P[1] | x >= P[length(P)]) { + values <- c(values, 1 / (1 - a) * f1(x)) + } else { + for (i in 1:(length(P) - 1)) { + if (x >= P[i] & x <= P[i + 1]) { + values <- c(values, 1 / (1 - a) * (sup_inf(f1, P, i)[1]) - a * sup_inf(f2, P, i)[2]) + } + } + } + } + return(values) +} +``` + +We plot the function $f$ and the dominating function $g$ for different sizes of the partition. +```{r} +library(ggplot2) + +X <- seq(-12, 12, length.out = 1000) + +# Plot for different k with ggplot on same graph +k_values <- c(10, 20, 50, 100) +P_values <- lapply(k_values, create_partition) +g_values <- lapply(P_values, g, X) + +ggplot() + + geom_line(aes(x = X, y = f(X)), col = "red") + + geom_line(aes(x = X, y = g(X, P_values[[1]])), col = "green") + + geom_line(aes(x = X, y = g(X, P_values[[2]])), col = "orange") + + geom_line(aes(x = X, y = g(X, P_values[[3]])), col = "purple") + + geom_line(aes(x = X, y = g(X, P_values[[4]])), col = "black") + + labs(title = "Dominating function g of f for different size of partition", x = "x", y = "Density") + + theme_minimal() +``` + +Here, we implement the algorithm of accept-reject with the given partition and an appropriate function $g$ to compute $f$. +```{r} +set.seed(123) + +g_accept_reject <- function(x, P) { + if (x < P[1] | x >= P[length(P)]) { + return(f1(x)) + } else { + for (i in seq_along(P)) { + if (x >= P[i] & x < P[i + 1]) { + return(sup_inf(f1, P, i)[1]) + } + } + } +} + +stratified <- function(n, P) { + samples <- numeric(0) + rate <- 0 + while (length(samples) < n) { + x <- rnorm(1, mu1, s1) + u <- runif(1) + if (u <= f(x) * (1 - a) / g_accept_reject(x, P)) { + samples <- c(samples, x) + } + rate <- rate + 1 + } + list(samples = samples, acceptance_rate = n / rate) +} + +n <- 10000 +k <- 100 + +P <- create_partition(k) +samples <- stratified(n, P) +X <- seq(-10, 10, length.out = 1000) + +hist(samples$samples, breaks = 50, freq = FALSE, col = "lightblue", main = "Empirical density function f", xlab = "x") +lines(X, f(X), col = "red") +``` + +We also compute the acceptance rate of the algorithm. +```{r} +theorical_acceptance_rate <- 1 - a +cat(sprintf("Empirical acceptance rate: %f, Theoretical acceptance rate: %f \n", samples$acceptance_rate, theorical_acceptance_rate)) +``` + +##### Question 12 + +```{r} +set.seed(123) + +stratified_delta <- function(n, delta) { + samples <- numeric(0) + P <- create_partition(n * delta) + rate <- 0 + while (length(samples) < n | rate < delta * n) { + x <- rnorm(1, mu1, s1) + u <- runif(1) + if (u <= f(x) * delta / g_accept_reject(x, P)) { + samples <- c(samples, x) + } + rate <- rate + 1 + } + list(samples = samples, partition = P, acceptance_rate = n / rate) +} + +n <- 10000 +delta <- 0.8 + +samples_delta <- stratified_delta(n, delta) +X <- seq(-10, 10, length.out = 1000) + +hist(samples_delta$samples, breaks = 50, freq = FALSE, col = "lightblue", main = "Empirical density function f", xlab = "x") +lines(X, f(X), col = "red") +``` + +We also compute the acceptance rate of the algorithm. +```{r} +theorical_acceptance_rate <- 1 - a +cat(sprintf("Empirical acceptance rate: %f, Theoretical acceptance rate: %f \n", samples_delta$acceptance_rate, theorical_acceptance_rate)) +``` + +Now, we will test the stratified_delta function for different delta: + +```{r} +set.seed(123) +n <- 1000 +deltas <- seq(0.1, 1, by = 0.1) + +for (delta in deltas) { + samples <- stratified_delta(n, delta) + cat(sprintf("Delta: %.1f, Empirical acceptance rate: %f \n", delta, samples$acceptance_rate)) +} +``` + +### Cumulative density function. + +##### Question 13 + +The cumulative density function $\forall x \in \mathbb{R}$ $F_X(x) = \int_{-\infty}^{x} f(t) \,dt = \int_{\mathbb{R}} f(t) h(t), dt$ where $h(x) = \mathbb{1}_{X_n \le x}$ + +For a given $x \in \mathbb{R}$, a Monte Carlo estimator $F_n(x) = \frac{1}{n} \sum_{i=1}^{n} h(X_i)$ where $h$ is the same function as above and $(X_i)_{i=1}^{n} \sim^{iid} X$ + +##### Question 14 + +As $X_1, \dots, X_n$ are iid and follows the law of X, and $h$ is continuous and positive, we have $h(X_1), \dots h(X_n)$ are iid and $\mathbb{E}[h(X_i)] < + \infty$. By the law of large numbers, we have $F_n(X) = \frac{1}{n} \sum_{i=1}^{n} h(X_i) \xrightarrow{a.s} \mathbb{E}[h(X_1)] = F_X(x)$. + +Moreover, $\forall \epsilon > 0$, $\exists N \in \mathbb{N}$ such that $\forall n \le N$, $|F_n(x) - F_X(x)| < \epsilon$, ie, $sup_{x \in \mathbb{R}} |F_n(x) - F_X(x)| \xrightarrow{a.s} 0$, by Glivenko-Cantelli theorem. + +Hence, $F_n$ is a good estimate of $F_X$ as a function of $x$. + +##### Question 15 +```{r} +set.seed(123) +n <- 10000 +Xn <- (rnorm(n, mu1, s1) - a * rnorm(n, mu2, s2)) / (1 - a) + +h <- function(x, Xn) { + return(Xn <= x) +} + +# Fn empirical +empirical_cdf <- function(x, Xn) { + return(mean(h(x, Xn))) +} + +# F theoretical +F <- function(x) { + Fx <- pnorm(x, mu1, s1) - a * pnorm(x, mu2, s2) + return(Fx / (1 - a)) +} + +X <- seq(-10, 10, length.out = n) +cat(sprintf("Empirical cdf: %f, Theoretical cdf: %f \n", empirical_cdf(X, Xn), mean(F(Xn)))) + +length(X) +length(empirical_cdf(X, Xn)) +plot(X, empirical_cdf(X, Xn), col = "red", type = "l", xlab = "x", ylab = "F(x)") +lines(X, F(X), col = "blue") +``` + +Now we plot the empirical and theoretical cumulative density functions for different n. +```{r} +n_values <- seq(100, 10000, length.out = 10) + +``` + +##### Question 16 + +As $X_1, \dots, X_n$ are iid and follows the law of X, and $h$ is continuous and positive, we have $h(X_1), \dots h(X_n)$ are iid and $\mathbb{E}[h(X_i)^2] < + \infty$. By the Central Limit Theorem, we have $\sqrt{n} \frac{(F_n(x) - F_X(x))}{\sigma} \xrightarrow{d} \mathcal{N}(0, 1)$ where $\sigma^2 = Var(h(X_1))$. + +So we have $\lim_{x\to\infty} \mathbb{P}(\sqrt{n} \frac{(F_n(x) - F_X(x))}{\sigma} \le q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)})= 1 - \alpha$ + +So by computing the quantile of the normal distribution, we can have a confidence interval for $F_X(x)$ : +$F_X(x) \in [F_n(x) - \frac{q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)} \sigma}{\sqrt{n}} ; F_n(x) + \frac{q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)} \sigma}{\sqrt{n}}]$ + +```{r} +Fn <- empirical_cdf(X, Xn) +sigma <- sqrt(Fn - Fn^2) +q <- qnorm(0.975) +interval <- c(Fn - q * sigma / sqrt(n), Fn + q * sigma / sqrt(n)) +cat(sprintf("Confidence interval: [%f, %f] \n", interval[1], interval[2])) +``` + +##### Question 17 + +```{r} +q <- qnorm(0.975) +x <- 1 +epsilon <- 0.01 +n_required <- (1.96^2 * F(x) * (1 - F(x))) / epsilon^2 +ceiling(n_required) +``` +