mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 15:54:13 +01:00
Add TP3
This commit is contained in:
@@ -75,7 +75,6 @@ autoplot(model_full, 1:4)
|
||||
set.seed(12)
|
||||
durbinWatsonTest(model_full)
|
||||
```
|
||||
|
||||
[P3] is verified as the p-value is 0.7 > 0.05, so we do not reject H0 so the residuals are not auto-correlated
|
||||
|
||||
# Bonus : Type II Test
|
||||
|
||||
134
M1/General Linear Models/TP3/TP2.rmd
Normal file
134
M1/General Linear Models/TP3/TP2.rmd
Normal file
@@ -0,0 +1,134 @@
|
||||
```{r}
|
||||
setwd('/Users/arthurdanjou/Workspace/studies/M1/General Linear Models/TP3')
|
||||
```
|
||||
|
||||
# Question 1 : Import dataset and check variables
|
||||
|
||||
```{r}
|
||||
library(dplyr)
|
||||
ozone <- read.table("ozone.txt", header = TRUE, sep = " ", dec = ".")
|
||||
ozone$vent <- as.factor(ozone$vent)
|
||||
ozone$temps <- as.factor(ozone$temps)
|
||||
ozone <- ozone %>% mutate(across(where(is.character), as.numeric))
|
||||
ozone <- ozone %>% mutate(across(where(is.integer), as.numeric))
|
||||
paged_table(ozone)
|
||||
```
|
||||
|
||||
# Question 2 : max03 ~ T12
|
||||
|
||||
```{r}
|
||||
model_T12 <- lm(maxO3 ~ T12, data = ozone)
|
||||
summary(model_T12)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(ggplot2)
|
||||
|
||||
ggplot(ozone, aes(x = T12, y = maxO3)) +
|
||||
geom_smooth(method = 'lm', se = T) +
|
||||
geom_point(col = 'red', size = 0.5) +
|
||||
labs(title = "maxO3 ~ T12") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
|
||||
autoplot(model_T12, 1:4)
|
||||
durbinWatsonTest(model_T12)
|
||||
```
|
||||
|
||||
The p-value of the Durbin-Watson test is 0, so [P3] is not valid. The residuals are correlated.
|
||||
|
||||
The model is not valid.
|
||||
|
||||
# Question 3 : max03 ~ T12 + temps
|
||||
|
||||
```{r}
|
||||
library(ggplot2)
|
||||
|
||||
ggplot(ozone, aes(y = maxO3, x = temps, colour = temps, fill = temps)) +
|
||||
geom_boxplot(alpha = 0.5, outlier.alpha = 0) +
|
||||
geom_jitter(width = 0.25, size = 1) +
|
||||
stat_summary(fun = mean, colour = "black", geom = "point", shape = 18, size = 3)
|
||||
```
|
||||
|
||||
```{r}
|
||||
model_temps <- lm(maxO3 ~ -1 + temps, data = ozone)
|
||||
summary(model_temps)
|
||||
```
|
||||
|
||||
```{r}
|
||||
autoplot(model_temps, 1:4)
|
||||
durbinWatsonTest(model_temps)
|
||||
```
|
||||
```{r}
|
||||
model_vent <- lm(maxO3 ~ -1 + vent, data = ozone)
|
||||
summary(model_vent)
|
||||
```
|
||||
|
||||
```{r}
|
||||
autoplot(model_vent, 1:4)
|
||||
durbinWatsonTest(model_vent)
|
||||
```
|
||||
The p-value of the Durbin-Watson test is 0, so [P3] is not valid. The residuals are correlated.
|
||||
|
||||
The model is not valid.
|
||||
```{r}
|
||||
model_temps_vent <- lm(maxO3 ~ temps * vent, data = ozone)
|
||||
summary(model_temps_vent)
|
||||
```
|
||||
|
||||
```{r}
|
||||
autoplot(model_temps_vent, 1:4)
|
||||
durbinWatsonTest(model_temps_vent)
|
||||
```
|
||||
|
||||
# Question 4 : Multiple linear regression
|
||||
```{r}
|
||||
model <- lm(maxO3 ~ ., data = ozone)
|
||||
summary(model)
|
||||
```
|
||||
|
||||
```{r}
|
||||
autoplot(model, 1:4)
|
||||
durbinWatsonTest(model)
|
||||
```
|
||||
|
||||
[P1] is verified as the 'Residuals vs Fitted' plot shows that the points are well distributed around 0
|
||||
[P2] is verified as the 'Scale-Location' plot shows that the points are well distributed around 1
|
||||
[P4] is verified as the 'QQPlot' is aligned with the 'y=x' line
|
||||
|
||||
[P3] is verified as the p-value is 0.7 > 0.05, so we do not reject H0 so the residuals are not auto-correlated
|
||||
|
||||
The model is valid.
|
||||
|
||||
```{r}
|
||||
library(GGally)
|
||||
ggpairs(ozone, progress = FALSE)
|
||||
```
|
||||
```{r}
|
||||
library(MASS)
|
||||
|
||||
model_backward <- stepAIC(model, ~., trace = FALSE, direction = c("backward"))
|
||||
summary(model_backward)
|
||||
```
|
||||
|
||||
```{r}
|
||||
AIC(model_backward)
|
||||
autoplot(model_backward, 1:4)
|
||||
durbinWatsonTest(model_backward)
|
||||
```
|
||||
|
||||
# Question 5 : Prediction
|
||||
```{r}
|
||||
new_obs <- list(
|
||||
T12 = 18,
|
||||
Ne9 = 3,
|
||||
Vx9 = 0.7,
|
||||
maxO3v = 85
|
||||
)
|
||||
predict(model_backward, new_obs, interval = "confidence")
|
||||
|
||||
```
|
||||
113
M1/General Linear Models/TP3/ozone.txt
Normal file
113
M1/General Linear Models/TP3/ozone.txt
Normal file
@@ -0,0 +1,113 @@
|
||||
maxO3 T9 T12 T15 Ne9 Ne12 Ne15 Vx9 Vx12 Vx15 maxO3v vent temps
|
||||
87 15.6 18.5 18.4 4 4 8 0.6946 -1.7101 -0.6946 84 Nord Sec
|
||||
82 17 18.4 17.7 5 5 7 -4.3301 -4 -3 87 Nord Sec
|
||||
92 15.3 17.6 19.5 2 5 4 2.9544 1.8794 0.5209 82 Est Sec
|
||||
114 16.2 19.7 22.5 1 1 0 0.9848 0.3473 -0.1736 92 Nord Sec
|
||||
94 17.4 20.5 20.4 8 8 7 -0.5 -2.9544 -4.3301 114 Ouest Sec
|
||||
80 17.7 19.8 18.3 6 6 7 -5.6382 -5 -6 94 Ouest Pluie
|
||||
79 16.8 15.6 14.9 7 8 8 -4.3301 -1.8794 -3.7588 80 Ouest Sec
|
||||
79 14.9 17.5 18.9 5 5 4 0 -1.0419 -1.3892 99 Nord Sec
|
||||
101 16.1 19.6 21.4 2 4 4 -0.766 -1.0261 -2.2981 79 Nord Sec
|
||||
106 18.3 21.9 22.9 5 6 8 1.2856 -2.2981 -3.9392 101 Ouest Sec
|
||||
101 17.3 19.3 20.2 7 7 3 -1.5 -1.5 -0.8682 106 Nord Sec
|
||||
90 17.6 20.3 17.4 7 6 8 0.6946 -1.0419 -0.6946 101 Sud Sec
|
||||
72 18.3 19.6 19.4 7 5 6 -0.8682 -2.7362 -6.8944 90 Sud Sec
|
||||
70 17.1 18.2 18 7 7 7 -4.3301 -7.8785 -5.1962 72 Ouest Pluie
|
||||
83 15.4 17.4 16.6 8 7 7 -4.3301 -2.0521 -3 70 Nord Sec
|
||||
88 15.9 19.1 21.5 6 5 4 0.5209 -2.9544 -1.0261 83 Ouest Sec
|
||||
145 21 24.6 26.9 0 1 1 -0.342 -1.5321 -0.684 121 Ouest Sec
|
||||
81 16.2 22.4 23.4 8 3 1 0 0.3473 -2.5712 145 Nord Sec
|
||||
121 19.7 24.2 26.9 2 1 0 1.5321 1.7321 2 81 Est Sec
|
||||
146 23.6 28.6 28.4 1 1 2 1 -1.9284 -1.2155 121 Sud Sec
|
||||
121 20.4 25.2 27.7 1 0 0 0 -0.5209 1.0261 146 Nord Sec
|
||||
146 27 32.7 33.7 0 0 0 2.9544 6.5778 4.3301 121 Est Sec
|
||||
108 24 23.5 25.1 4 4 0 -2.5712 -3.8567 -4.6985 146 Sud Sec
|
||||
83 19.7 22.9 24.8 7 6 6 -2.5981 -3.9392 -4.924 108 Ouest Sec
|
||||
57 20.1 22.4 22.8 7 6 7 -5.6382 -3.8302 -4.5963 83 Ouest Pluie
|
||||
81 19.6 25.1 27.2 3 4 4 -1.9284 -2.5712 -4.3301 57 Sud Sec
|
||||
67 19.5 23.4 23.7 5 5 4 -1.5321 -3.0642 -0.8682 81 Ouest Sec
|
||||
70 18.8 22.7 24.9 5 2 1 0.684 0 1.3681 67 Nord Sec
|
||||
106 24.1 28.4 30.1 0 0 1 2.8191 3.9392 3.4641 70 Est Sec
|
||||
139 26.6 30.1 31.9 0 1 4 1.8794 2 1.3681 106 Sud Sec
|
||||
79 19.5 18.8 17.8 8 8 8 0.6946 -0.866 -1.0261 139 Ouest Sec
|
||||
93 16.8 18.2 22 8 8 6 0 0 1.2856 79 Sud Pluie
|
||||
97 20.8 23.7 25 2 3 4 0 1.7101 -2.7362 93 Nord Sec
|
||||
113 17.5 18.2 22.7 8 8 5 -3.7588 -3.9392 -4.6985 97 Ouest Pluie
|
||||
72 18.1 21.2 23.9 7 6 4 -2.5981 -3.9392 -3.7588 113 Ouest Pluie
|
||||
88 19.2 22 25.2 4 7 4 -1.9696 -3.0642 -4 72 Ouest Sec
|
||||
77 19.4 20.7 22.5 7 8 7 -6.5778 -5.6382 -9 88 Ouest Sec
|
||||
71 19.2 21 22.4 6 4 6 -7.8785 -6.8937 -6.8937 77 Ouest Sec
|
||||
56 13.8 17.3 18.5 8 8 6 1.5 -3.8302 -2.0521 71 Ouest Pluie
|
||||
45 14.3 14.5 15.2 8 8 8 0.684 4 2.9544 56 Est Pluie
|
||||
67 15.6 18.6 20.3 5 7 5 -3.2139 -3.7588 -4 45 Ouest Pluie
|
||||
67 16.9 19.1 19.5 5 5 6 -2.2981 -3.7588 0 67 Ouest Pluie
|
||||
84 17.4 20.4 21.4 3 4 6 0 0.3473 -2.5981 67 Sud Sec
|
||||
63 15.1 20.5 20.6 8 6 6 2 -5.3623 -6.1284 84 Ouest Pluie
|
||||
69 15.1 15.6 15.9 8 8 8 -4.5963 -3.8302 -4.3301 63 Ouest Pluie
|
||||
92 16.7 19.1 19.3 7 6 4 -2.0521 -4.4995 -2.7362 69 Nord Sec
|
||||
88 16.9 20.3 20.7 6 6 5 -2.8191 -3.4641 -3 92 Ouest Pluie
|
||||
66 18 21.6 23.3 8 6 5 -3 -3.5 -3.2139 88 Sud Sec
|
||||
72 18.6 21.9 23.6 4 7 6 0.866 -1.9696 -1.0261 66 Ouest Sec
|
||||
81 18.8 22.5 23.9 6 3 2 0.5209 -1 -2 72 Nord Sec
|
||||
83 19 22.5 24.1 2 4 6 0 -1.0261 0.5209 81 Nord Sec
|
||||
149 19.9 26.9 29 3 4 3 1 -0.9397 -0.6428 83 Ouest Sec
|
||||
153 23.8 27.7 29.4 1 1 4 0.9397 1.5 0 149 Nord Sec
|
||||
159 24 28.3 26.5 2 2 7 -0.342 1.2856 -2 153 Nord Sec
|
||||
149 23.3 27.6 28.8 4 6 3 0.866 -1.5321 -0.1736 159 Ouest Sec
|
||||
160 25 29.6 31.1 0 3 5 1.5321 -0.684 2.8191 149 Sud Sec
|
||||
156 24.9 30.5 32.2 0 1 4 -0.5 -1.8794 -1.2856 160 Ouest Sec
|
||||
84 20.5 26.3 27.8 1 0 2 -1.3681 -0.6946 0 156 Nord Sec
|
||||
126 25.3 29.5 31.2 1 4 4 3 3.7588 5 84 Est Sec
|
||||
116 21.3 23.8 22.1 7 7 8 0 -2.3941 -1.3892 126 Sud Pluie
|
||||
77 20 18.2 23.6 5 7 6 -3.4641 -2.5981 -3.7588 116 Ouest Pluie
|
||||
63 18.7 20.6 20.3 6 7 7 -5 -4.924 -5.6382 77 Ouest Pluie
|
||||
54 18.6 18.7 17.8 8 8 8 -4.6985 -2.5 -0.8682 63 Sud Pluie
|
||||
65 19.2 23 22.7 8 7 7 -3.8302 -4.924 -5.6382 54 Ouest Sec
|
||||
72 19.9 21.6 20.4 7 7 8 -3 -4.5963 -5.1962 65 Ouest Pluie
|
||||
60 18.7 21.4 21.7 7 7 7 -5.6382 -6.0622 -6.8937 72 Ouest Pluie
|
||||
70 18.4 17.1 20.5 3 6 3 -5.9088 -3.2139 -4.4995 60 Nord Pluie
|
||||
77 17.1 20 20.8 4 5 4 -1.9284 -1.0261 0.5209 70 Nord Sec
|
||||
98 17.8 22.8 24.3 1 1 0 0 -1.5321 -1 77 Ouest Pluie
|
||||
111 20.9 25.2 26.7 1 5 2 -1.0261 -3 -2.2981 98 Ouest Sec
|
||||
75 18.8 20.5 26 8 7 1 -0.866 0 0 111 Nord Sec
|
||||
116 23.5 29.8 31.7 1 3 5 1.8794 1.3681 0.6946 75 Sud Sec
|
||||
109 20.8 23.7 26.6 8 5 4 -1.0261 -1.7101 -3.2139 116 Sud Sec
|
||||
67 18.8 21.1 18.9 7 7 8 -5.3623 -5.3623 -2.5 86 Ouest Pluie
|
||||
76 17.8 21.3 24 7 5 5 -3.0642 -2.2981 -3.9392 67 Ouest Pluie
|
||||
113 20.6 24.8 27 1 1 2 1.3681 0.8682 -2.2981 76 Sud Sec
|
||||
117 21.6 26.9 28.6 6 6 4 1.5321 1.9284 1.9284 113 Sud Pluie
|
||||
131 22.7 28.4 30.1 5 3 3 0.1736 -1.9696 -1.9284 117 Ouest Sec
|
||||
166 19.8 27.2 30.8 4 0 1 0.6428 -0.866 0.684 131 Ouest Sec
|
||||
159 25 33.5 35.5 1 1 1 1 0.6946 -1.7101 166 Sud Sec
|
||||
100 20.1 22.9 27.6 8 8 6 1.2856 -1.7321 -0.684 159 Ouest Sec
|
||||
114 21 26.3 26.4 7 4 5 3.0642 2.8191 1.3681 100 Est Sec
|
||||
112 21 24.4 26.8 1 6 3 4 4 3.7588 114 Est Sec
|
||||
101 16.9 17.8 20.6 7 7 7 -2 -0.5209 1.8794 112 Nord Pluie
|
||||
76 17.5 18.6 18.7 7 7 7 -3.4641 -4 -1.7321 101 Ouest Sec
|
||||
59 16.5 20.3 20.3 5 7 6 -4.3301 -5.3623 -4.5 76 Ouest Pluie
|
||||
78 17.7 20.2 21.5 5 5 3 0 0.5209 0 59 Nord Pluie
|
||||
76 17.3 22.7 24.6 4 5 6 -2.9544 -2.9544 -2 78 Ouest Pluie
|
||||
55 15.3 16.8 19.2 8 7 5 -1.8794 -1.8794 -2.3941 76 Ouest Pluie
|
||||
71 15.9 19.2 19.5 7 5 3 -6.1284 0 -1.3892 55 Nord Pluie
|
||||
66 16.2 18.9 19.3 2 5 6 -1.3681 -0.8682 1.7101 71 Nord Pluie
|
||||
59 18.3 18.3 19 7 7 7 -3.9392 -1.9284 -1.7101 66 Nord Pluie
|
||||
68 16.9 20.8 22.5 6 5 7 -1.5 -3.4641 -3.0642 59 Ouest Pluie
|
||||
63 17.3 19.8 19.4 7 8 8 -4.5963 -6.0622 -4.3301 68 Ouest Sec
|
||||
78 14.2 22.2 22 5 5 6 -0.866 -5 -5 62 Ouest Sec
|
||||
74 15.8 18.7 19.1 8 7 7 -4.5963 -6.8937 -7.5175 78 Ouest Pluie
|
||||
71 15.2 17.9 18.6 6 5 1 -1.0419 -1.3681 -1.0419 74 Nord Pluie
|
||||
69 17.1 17.7 17.5 6 7 8 -5.1962 -2.7362 -1.0419 71 Nord Pluie
|
||||
71 15.4 17.7 16.6 4 5 5 -3.8302 0 1.3892 69 Nord Sec
|
||||
60 13.7 14 15.8 4 5 4 0 3.2139 0 71 Nord Pluie
|
||||
42 12.7 14.3 14.9 8 7 7 -2.5 -3.2139 -2.5 60 Nord Pluie
|
||||
65 14.8 16.3 15.9 7 7 7 -4.3301 -6.0622 -5.1962 42 Ouest Pluie
|
||||
71 15.5 18 17.4 7 7 6 -3.9392 -3.0642 0 65 Ouest Sec
|
||||
96 11.3 19.4 20.2 3 3 3 -0.1736 3.7588 3.8302 71 Est Pluie
|
||||
98 15.2 19.7 20.3 2 2 2 4 5 4.3301 96 Est Sec
|
||||
92 14.7 17.6 18.2 1 4 6 5.1962 5.1423 3.5 98 Nord Sec
|
||||
76 13.3 17.7 17.7 7 7 6 -0.9397 -0.766 -0.5 92 Ouest Pluie
|
||||
84 13.3 17.7 17.8 3 5 6 0 -1 -1.2856 76 Sud Sec
|
||||
77 16.2 20.8 22.1 6 5 5 -0.6946 -2 -1.3681 71 Sud Pluie
|
||||
99 16.9 23 22.6 6 4 7 1.5 0.8682 0.8682 77 Sud Sec
|
||||
83 16.9 19.8 22.1 6 5 3 -4 -3.7588 -4 99 Ouest Pluie
|
||||
70 15.7 18.6 20.7 7 7 7 0 -1.0419 -4 83 Sud Sec
|
||||
286
M1/Monte Carlo Methods/Project 2/Projet2.rmd
Normal file
286
M1/Monte Carlo Methods/Project 2/Projet2.rmd
Normal file
@@ -0,0 +1,286 @@
|
||||
---
|
||||
title: "R Notebook"
|
||||
output: html_notebook
|
||||
---
|
||||
|
||||
### Random Variable simulation with stratification
|
||||
|
||||
##### Question 9
|
||||
|
||||
We consider a partition $\mathcal{P}= (D_0,D_1,...,D_k)$, $k \in \mathbb{N}$ of $\mathbb{R}$ such that $D_0$ covers the tails of $f_1$ and $f_1$ is upper bounded and $f_2$ lower bounded in $D_1, \dots, D_k$.
|
||||
|
||||
To simulate under $f$ using the accept-reject algorithm, we need to find a density function $g$ such that $f(x) \le M g(x)$ for all $x \in \mathbb{R}$, where M is a constant.\
|
||||
|
||||
We generate $X \sim g$ and $U \sim \mathcal{U}([0,1])$ $\textit{(1)}$.\
|
||||
We accept $Y = X$ if $U \le \frac{f(X)}{Mg(X)}$. Otherwise, we return to $\textit{(1)}$.
|
||||
|
||||
Here we pose $g(x) = f_1(x)$ if $x \in D_0$ and $g(x) = sup_{D_i} f_1(x)$ if $x \in D_i, i \in \{1, \dots, k\}$.
|
||||
|
||||
To conclude, we have $M = \frac{1}{1-a}$ and the probability of acceptance is $r = \frac{1}{M} = 1-a$
|
||||
|
||||
|
||||
##### Question 10
|
||||
|
||||
Let $P_n = (D_0, \dots D_n)$ a partition of $\mathbb{R}$ for $n \in \mathbb{N}$.
|
||||
We have $\forall x \in P_n$ and $\forall i \in \{0, \dots, n\}$, $\lim_{n \to\infty} sup_{D_i} f_1(x) = f_1(x)$ and $\lim_{x\to\infty} inf_{D_i} f_2(x) = f_2(x)$.
|
||||
|
||||
$\Rightarrow \lim_{x\to\infty} g(x) = f(x)$
|
||||
|
||||
$\Rightarrow \lim_{x\to\infty} \frac{g(x)}{f(x)} = 1$ as $f(x) > 0$ as $f$ is a density function.
|
||||
|
||||
$\Rightarrow \forall \epsilon > 0, \exists n_{\epsilon} \in \mathbb{N}$ such that $\forall n \ge n_{\epsilon}$, $M = sup_{x \in P_n} \frac{g(x)}{f(x)} < \epsilon$
|
||||
|
||||
$\Rightarrow r = \frac{1}{M} > \frac{1}{\epsilon} := \delta \in ]0, 1]$ where $r$ is the acceptance rate defined in the question 10.
|
||||
|
||||
##### Question 11
|
||||
|
||||
We recall the parameters and the functions of the problem.
|
||||
```{r}
|
||||
mu1 <- 0
|
||||
mu2 <- 1
|
||||
s1 <- 3
|
||||
s2 <- 1
|
||||
a <- 0.2
|
||||
|
||||
f1 <- function(x) {
|
||||
dnorm(x, mu1, s1)
|
||||
}
|
||||
|
||||
f2 <- function(x) {
|
||||
dnorm(x, mu2, s2)
|
||||
}
|
||||
|
||||
f <- function(x) {
|
||||
fx <- f1(x) - a * f2(x)
|
||||
fx[fx < 0] <- 0
|
||||
return(fx / (1 - a))
|
||||
}
|
||||
|
||||
f1_bounds <- c(mu1 - 3 * s1, mu1 + 3 * s1)
|
||||
```
|
||||
|
||||
We implement the partition, the given $g$ function to understand the behavior of $g$ compared to $f$ and the computation of the supremum and infimum of $f_1$ and $f_2$ on each partition.
|
||||
```{r}
|
||||
create_partition <- function(k = 10) {
|
||||
return(seq(f1_bounds[1], f1_bounds[2], , length.out = k))
|
||||
}
|
||||
|
||||
sup_inf <- function(f, P, i) {
|
||||
x <- seq(P[i], P[i + 1], length.out = 1000)
|
||||
f_values <- sapply(x, f)
|
||||
return(c(max(f_values), min(f_values)))
|
||||
}
|
||||
|
||||
g <- function(X, P) {
|
||||
values <- numeric(0)
|
||||
for (x in X) {
|
||||
if (x <= P[1] | x >= P[length(P)]) {
|
||||
values <- c(values, 1 / (1 - a) * f1(x))
|
||||
} else {
|
||||
for (i in 1:(length(P) - 1)) {
|
||||
if (x >= P[i] & x <= P[i + 1]) {
|
||||
values <- c(values, 1 / (1 - a) * (sup_inf(f1, P, i)[1]) - a * sup_inf(f2, P, i)[2])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return(values)
|
||||
}
|
||||
```
|
||||
|
||||
We plot the function $f$ and the dominating function $g$ for different sizes of the partition.
|
||||
```{r}
|
||||
library(ggplot2)
|
||||
|
||||
X <- seq(-12, 12, length.out = 1000)
|
||||
|
||||
# Plot for different k with ggplot on same graph
|
||||
k_values <- c(10, 20, 50, 100)
|
||||
P_values <- lapply(k_values, create_partition)
|
||||
g_values <- lapply(P_values, g, X)
|
||||
|
||||
ggplot() +
|
||||
geom_line(aes(x = X, y = f(X)), col = "red") +
|
||||
geom_line(aes(x = X, y = g(X, P_values[[1]])), col = "green") +
|
||||
geom_line(aes(x = X, y = g(X, P_values[[2]])), col = "orange") +
|
||||
geom_line(aes(x = X, y = g(X, P_values[[3]])), col = "purple") +
|
||||
geom_line(aes(x = X, y = g(X, P_values[[4]])), col = "black") +
|
||||
labs(title = "Dominating function g of f for different size of partition", x = "x", y = "Density") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
Here, we implement the algorithm of accept-reject with the given partition and an appropriate function $g$ to compute $f$.
|
||||
```{r}
|
||||
set.seed(123)
|
||||
|
||||
g_accept_reject <- function(x, P) {
|
||||
if (x < P[1] | x >= P[length(P)]) {
|
||||
return(f1(x))
|
||||
} else {
|
||||
for (i in seq_along(P)) {
|
||||
if (x >= P[i] & x < P[i + 1]) {
|
||||
return(sup_inf(f1, P, i)[1])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stratified <- function(n, P) {
|
||||
samples <- numeric(0)
|
||||
rate <- 0
|
||||
while (length(samples) < n) {
|
||||
x <- rnorm(1, mu1, s1)
|
||||
u <- runif(1)
|
||||
if (u <= f(x) * (1 - a) / g_accept_reject(x, P)) {
|
||||
samples <- c(samples, x)
|
||||
}
|
||||
rate <- rate + 1
|
||||
}
|
||||
list(samples = samples, acceptance_rate = n / rate)
|
||||
}
|
||||
|
||||
n <- 10000
|
||||
k <- 100
|
||||
|
||||
P <- create_partition(k)
|
||||
samples <- stratified(n, P)
|
||||
X <- seq(-10, 10, length.out = 1000)
|
||||
|
||||
hist(samples$samples, breaks = 50, freq = FALSE, col = "lightblue", main = "Empirical density function f", xlab = "x")
|
||||
lines(X, f(X), col = "red")
|
||||
```
|
||||
|
||||
We also compute the acceptance rate of the algorithm.
|
||||
```{r}
|
||||
theorical_acceptance_rate <- 1 - a
|
||||
cat(sprintf("Empirical acceptance rate: %f, Theoretical acceptance rate: %f \n", samples$acceptance_rate, theorical_acceptance_rate))
|
||||
```
|
||||
|
||||
##### Question 12
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
|
||||
stratified_delta <- function(n, delta) {
|
||||
samples <- numeric(0)
|
||||
P <- create_partition(n * delta)
|
||||
rate <- 0
|
||||
while (length(samples) < n | rate < delta * n) {
|
||||
x <- rnorm(1, mu1, s1)
|
||||
u <- runif(1)
|
||||
if (u <= f(x) * delta / g_accept_reject(x, P)) {
|
||||
samples <- c(samples, x)
|
||||
}
|
||||
rate <- rate + 1
|
||||
}
|
||||
list(samples = samples, partition = P, acceptance_rate = n / rate)
|
||||
}
|
||||
|
||||
n <- 10000
|
||||
delta <- 0.8
|
||||
|
||||
samples_delta <- stratified_delta(n, delta)
|
||||
X <- seq(-10, 10, length.out = 1000)
|
||||
|
||||
hist(samples_delta$samples, breaks = 50, freq = FALSE, col = "lightblue", main = "Empirical density function f", xlab = "x")
|
||||
lines(X, f(X), col = "red")
|
||||
```
|
||||
|
||||
We also compute the acceptance rate of the algorithm.
|
||||
```{r}
|
||||
theorical_acceptance_rate <- 1 - a
|
||||
cat(sprintf("Empirical acceptance rate: %f, Theoretical acceptance rate: %f \n", samples_delta$acceptance_rate, theorical_acceptance_rate))
|
||||
```
|
||||
|
||||
Now, we will test the stratified_delta function for different delta:
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
n <- 1000
|
||||
deltas <- seq(0.1, 1, by = 0.1)
|
||||
|
||||
for (delta in deltas) {
|
||||
samples <- stratified_delta(n, delta)
|
||||
cat(sprintf("Delta: %.1f, Empirical acceptance rate: %f \n", delta, samples$acceptance_rate))
|
||||
}
|
||||
```
|
||||
|
||||
### Cumulative density function.
|
||||
|
||||
##### Question 13
|
||||
|
||||
The cumulative density function $\forall x \in \mathbb{R}$ $F_X(x) = \int_{-\infty}^{x} f(t) \,dt = \int_{\mathbb{R}} f(t) h(t), dt$ where $h(x) = \mathbb{1}_{X_n \le x}$
|
||||
|
||||
For a given $x \in \mathbb{R}$, a Monte Carlo estimator $F_n(x) = \frac{1}{n} \sum_{i=1}^{n} h(X_i)$ where $h$ is the same function as above and $(X_i)_{i=1}^{n} \sim^{iid} X$
|
||||
|
||||
##### Question 14
|
||||
|
||||
As $X_1, \dots, X_n$ are iid and follows the law of X, and $h$ is continuous and positive, we have $h(X_1), \dots h(X_n)$ are iid and $\mathbb{E}[h(X_i)] < + \infty$. By the law of large numbers, we have $F_n(X) = \frac{1}{n} \sum_{i=1}^{n} h(X_i) \xrightarrow{a.s} \mathbb{E}[h(X_1)] = F_X(x)$.
|
||||
|
||||
Moreover, $\forall \epsilon > 0$, $\exists N \in \mathbb{N}$ such that $\forall n \le N$, $|F_n(x) - F_X(x)| < \epsilon$, ie, $sup_{x \in \mathbb{R}} |F_n(x) - F_X(x)| \xrightarrow{a.s} 0$, by Glivenko-Cantelli theorem.
|
||||
|
||||
Hence, $F_n$ is a good estimate of $F_X$ as a function of $x$.
|
||||
|
||||
##### Question 15
|
||||
```{r}
|
||||
set.seed(123)
|
||||
n <- 10000
|
||||
Xn <- (rnorm(n, mu1, s1) - a * rnorm(n, mu2, s2)) / (1 - a)
|
||||
|
||||
h <- function(x, Xn) {
|
||||
return(Xn <= x)
|
||||
}
|
||||
|
||||
# Fn empirical
|
||||
empirical_cdf <- function(x, Xn) {
|
||||
return(mean(h(x, Xn)))
|
||||
}
|
||||
|
||||
# F theoretical
|
||||
F <- function(x) {
|
||||
Fx <- pnorm(x, mu1, s1) - a * pnorm(x, mu2, s2)
|
||||
return(Fx / (1 - a))
|
||||
}
|
||||
|
||||
X <- seq(-10, 10, length.out = n)
|
||||
cat(sprintf("Empirical cdf: %f, Theoretical cdf: %f \n", empirical_cdf(X, Xn), mean(F(Xn))))
|
||||
|
||||
length(X)
|
||||
length(empirical_cdf(X, Xn))
|
||||
plot(X, empirical_cdf(X, Xn), col = "red", type = "l", xlab = "x", ylab = "F(x)")
|
||||
lines(X, F(X), col = "blue")
|
||||
```
|
||||
|
||||
Now we plot the empirical and theoretical cumulative density functions for different n.
|
||||
```{r}
|
||||
n_values <- seq(100, 10000, length.out = 10)
|
||||
|
||||
```
|
||||
|
||||
##### Question 16
|
||||
|
||||
As $X_1, \dots, X_n$ are iid and follows the law of X, and $h$ is continuous and positive, we have $h(X_1), \dots h(X_n)$ are iid and $\mathbb{E}[h(X_i)^2] < + \infty$. By the Central Limit Theorem, we have $\sqrt{n} \frac{(F_n(x) - F_X(x))}{\sigma} \xrightarrow{d} \mathcal{N}(0, 1)$ where $\sigma^2 = Var(h(X_1))$.
|
||||
|
||||
So we have $\lim_{x\to\infty} \mathbb{P}(\sqrt{n} \frac{(F_n(x) - F_X(x))}{\sigma} \le q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)})= 1 - \alpha$
|
||||
|
||||
So by computing the quantile of the normal distribution, we can have a confidence interval for $F_X(x)$ :
|
||||
$F_X(x) \in [F_n(x) - \frac{q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)} \sigma}{\sqrt{n}} ; F_n(x) + \frac{q_{1-\frac{\alpha}{2}}^{\mathcal{N}(0, 1)} \sigma}{\sqrt{n}}]$
|
||||
|
||||
```{r}
|
||||
Fn <- empirical_cdf(X, Xn)
|
||||
sigma <- sqrt(Fn - Fn^2)
|
||||
q <- qnorm(0.975)
|
||||
interval <- c(Fn - q * sigma / sqrt(n), Fn + q * sigma / sqrt(n))
|
||||
cat(sprintf("Confidence interval: [%f, %f] \n", interval[1], interval[2]))
|
||||
```
|
||||
|
||||
##### Question 17
|
||||
|
||||
```{r}
|
||||
q <- qnorm(0.975)
|
||||
x <- 1
|
||||
epsilon <- 0.01
|
||||
n_required <- (1.96^2 * F(x) * (1 - F(x))) / epsilon^2
|
||||
ceiling(n_required)
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user