mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-01-14 13:54:06 +01:00
Add TP1 GLM & DM Monte Carlo
This commit is contained in:
BIN
M1/General Linear Models/TP1/.RData
Normal file
BIN
M1/General Linear Models/TP1/.RData
Normal file
Binary file not shown.
345
M1/General Linear Models/TP1/.Rhistory
Normal file
345
M1/General Linear Models/TP1/.Rhistory
Normal file
@@ -0,0 +1,345 @@
|
||||
library(rmarkdown)
|
||||
health <- read.table("health.txt", header = TRUE, sep = " ", dec = ".")
|
||||
paged_table(health)
|
||||
library(dplyr)
|
||||
names(health)
|
||||
library(dplyr)
|
||||
names(health)
|
||||
Health<-health[,2:4]
|
||||
names(Ozone)
|
||||
library(dplyr)
|
||||
names(health)
|
||||
Health<-health[,2:4]
|
||||
names(Health)
|
||||
library(dplyr)
|
||||
names(health)
|
||||
Health<-health[,2:5]
|
||||
names(Health)
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
p1<-ggplot(health) + aes(x = T12, y = maxO3) + geom_point(col = 'red', size = 0.5) + geom_smooth(method = "lm", se = FALSE)
|
||||
ggplotly(p1)
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
p1<-ggplot(health) + aes(x = age, y = y) + geom_point(col = 'red', size = 0.5) + geom_smooth(method = "lm", se = FALSE)
|
||||
ggplotly(p1)
|
||||
library(rmarkdown)
|
||||
health <- read.table("health.txt", header = TRUE, sep = " ", dec = ".")
|
||||
paged_table(health)
|
||||
library(rmarkdown)
|
||||
health <- read.table("health.txt", header = TRUE, sep = " ", dec = ".")
|
||||
paged_table(health)
|
||||
library(dplyr)
|
||||
library(corrplot)
|
||||
install.packages("corrplot")
|
||||
library(dplyr)
|
||||
library(corrplot)
|
||||
correlation_matrix<-cor(Health)
|
||||
library(dplyr)
|
||||
library(corrplot)
|
||||
correlation_matrix<-cor(health)
|
||||
corrplot(correlation_matrix, order = 'hclust',addrect = 3)
|
||||
GGally
|
||||
install.packages("GGally")
|
||||
library(dplyr)
|
||||
library(corrplot)
|
||||
correlation_matrix<-cor(health)
|
||||
corrplot(correlation_matrix, order = 'hclust',addrect = 3)
|
||||
library(GGally)
|
||||
ggpairs(Ozone)
|
||||
library(dplyr)
|
||||
library(corrplot)
|
||||
correlation_matrix<-cor(health)
|
||||
corrplot(correlation_matrix, order = 'hclust',addrect = 3)
|
||||
library(GGally)
|
||||
ggpairs(health)
|
||||
library(dplyr)
|
||||
library(corrplot)
|
||||
correlation_matrix<-cor(health)
|
||||
corrplot(correlation_matrix, order = 'hclust',addrect = 3)
|
||||
library(GGally)
|
||||
ggpairs(health)
|
||||
library(dplyr)
|
||||
library(corrplot)
|
||||
correlation_matrix<-cor(health)
|
||||
corrplot(correlation_matrix, order = 'hclust',addrect = 3)
|
||||
Health <- health[2:5]
|
||||
library(dplyr)
|
||||
library(corrplot)
|
||||
correlation_matrix<-cor(Health)
|
||||
corrplot(correlation_matrix, order = 'hclust',addrect = 3)
|
||||
model <- lm(y ~ ., data = Health)
|
||||
coefficients(model)
|
||||
summary(model)
|
||||
library(ggfortify)
|
||||
install.packages("ggfortify")
|
||||
library(ggfortify)
|
||||
autoplot(mod,1)
|
||||
library(ggfortify)
|
||||
autoplot(model,1)
|
||||
library(ggfortify)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
library(ggfortify)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
install.packages("car")
|
||||
library(ggfortify)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
library(ggfortify)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
autoplot(model,2)
|
||||
model <- lm(y ~ age, data = Health)
|
||||
coefficients(model)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
autoplot(model,2)
|
||||
model <- lm(y ~ ., data = Health)
|
||||
coefficients(model)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
autoplot(model,2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
autoplot(model,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model)
|
||||
autoplot(model,2)
|
||||
model <- lm(y ~ ., data = Health)
|
||||
coefficients(model)
|
||||
summary(model)
|
||||
model <- lm(y ~ age, data = Health)
|
||||
coefficients(model)
|
||||
summary(model)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
model <- lm(y ~ ., data = Health)
|
||||
coefficients(model)
|
||||
summary(model)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
library(GGally)
|
||||
ggpairs(model)
|
||||
library(GGally)
|
||||
ggpairs(health)
|
||||
library(GGally)
|
||||
ggpairs(Health)
|
||||
library(GGally)
|
||||
ggpairs(Health, mapping = F)
|
||||
library(GGally)
|
||||
ggpairs(Health, progress = F)
|
||||
model2 <- lm(y ~ age**2 + tri + chol, data = Health)
|
||||
model2 <- lm(y ~ age**2 + tri + chol, data = Health)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
autoplot(model2,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model2)
|
||||
autoplot(model2,2)
|
||||
model2 <- lm(y ~ sqrt(age) + tri + chol, data = Health)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
model2 <- lm(y ~ sqrt(age) + tri + chol, data = Health)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
Health2 <- Health
|
||||
Health2[1] <- Health2[1]^2
|
||||
model2 <- lm(y ~ ., data = Health)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
View(Health)
|
||||
View(Health)
|
||||
View(Health2)
|
||||
View(Health2)
|
||||
Health2 <- Health
|
||||
View(Health2)
|
||||
View(Health2)
|
||||
Health2 <- Health
|
||||
Health2[2] <- Health2[2]^2
|
||||
model2 <- lm(y ~ ., data = Health)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
View(Health2)
|
||||
View(Health2)
|
||||
autoplot(model2,3)
|
||||
Health2 <- Health
|
||||
Health2[2] <- Health2[2]^2
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
autoplot(model2,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model2)
|
||||
autoplot(model2,2)
|
||||
Health2 <- Health
|
||||
Health2[2] <- sqrt(Health2[2])
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
Health2 <- Health
|
||||
Health2[2] <- Health2[2]^2
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
Health2 <- Health
|
||||
Health2[2] <- Health2[2]^2
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
autoplot(model2,3)
|
||||
Health2 <- Health
|
||||
Health2[6] <- Health2[2]^2
|
||||
View(Health2)
|
||||
View(Health2)
|
||||
Health2 <- c(Health, c())
|
||||
Health2[6] <- Health2[2]^2
|
||||
Health2 <- c(Health, c())
|
||||
Health2[5] <- Health2[2]^2
|
||||
View(Health2)
|
||||
View(Health2)
|
||||
Health2 <- Health
|
||||
Health2 <- append(Health2, Health[2]^2)
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
View(Health2)
|
||||
Health2 <- Health
|
||||
Health2 <- append(Health2, Health[2]^2)
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
Health2 <- Health
|
||||
# Add to Health a new column with the square of the age named age_sq
|
||||
Health2$age_sq <- Health2$age^2
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
View(Health2)
|
||||
Health2 <- Health
|
||||
the age named age_sq
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
Health2 <- Health
|
||||
Health2$age_sq <- Health2$age^2
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
autoplot(model2,3)
|
||||
autoplot(model2,2)
|
||||
View(Health2)
|
||||
View(Health2)
|
||||
Health2 <- Health
|
||||
Health2$age_sq <- sqrt(Health2$age)
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
Health2 <- Health
|
||||
Health2$age_sq <- Health2$age^2
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
View(Health2)
|
||||
View(Health2)
|
||||
Health2 <- Health
|
||||
Health2$age_sq <- Health2$age^2
|
||||
Health2 <- Health2[1:99,]
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
Health2 <- Health
|
||||
Health2$age_sq <- Health2$age^2
|
||||
Health2 <- Health2[1:5,24]
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
Health2 <- Health
|
||||
Health2$age_sq <- Health2$age^2
|
||||
Health2 <- Health2[1:5,]
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
View(Health2)
|
||||
Health2 <- Health
|
||||
Health2$age_sq <- Health2$age^2
|
||||
Health2 <- Health2[1:24,]
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
autoplot(model2,3)
|
||||
set.seed(0)
|
||||
durbinWatsonTest(model2)
|
||||
autoplot(model2,2)
|
||||
Health2 <- Health
|
||||
Health2$age_sq <- Health2$age^2
|
||||
Health2 <- Health2[1:24,]
|
||||
model2 <- lm(y ~ ., data = Health2)
|
||||
summary(model2)
|
||||
coefficients(model2)
|
||||
@@ -23,16 +23,12 @@ summary(model)
|
||||
```{r}
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model,1)
|
||||
autoplot(model,1:3)
|
||||
```
|
||||
|
||||
The points are not well distributed around 0 -> [P1] is not verified
|
||||
|
||||
```{r}
|
||||
autoplot(model,3)
|
||||
```
|
||||
|
||||
The points are not well distributed around 1 -> [P2] is not verified
|
||||
The QQPlot is align with the line y = x, so it is globally gaussian -> [P4] is verified
|
||||
|
||||
```{r}
|
||||
set.seed(0)
|
||||
@@ -41,12 +37,6 @@ durbinWatsonTest(model)
|
||||
|
||||
The p-value is 0.58 > 0.05 -> We do not reject H0 so the residuals are not autocorrelated -> [P3] is verified
|
||||
|
||||
```{r}
|
||||
autoplot(model,2)
|
||||
```
|
||||
|
||||
The QQPlot is align with the line y = x, so it is globally gaussian -> [P4] is verified
|
||||
|
||||
```{r}
|
||||
library(GGally)
|
||||
ggpairs(Health, progress = F)
|
||||
@@ -68,16 +58,12 @@ coefficients(model2)
|
||||
```{r}
|
||||
library(ggfortify)
|
||||
library(car)
|
||||
autoplot(model2,1)
|
||||
autoplot(model2,1:4)
|
||||
```
|
||||
|
||||
The points are well distributed around 0 -> [P1] is verified
|
||||
|
||||
```{r}
|
||||
autoplot(model2,3)
|
||||
```
|
||||
|
||||
The points are not well distributed around 1 -> [P2] is verified
|
||||
The QQPlot is align with the line y = x, so it is gaussian -> [P4] is verified
|
||||
|
||||
```{r}
|
||||
set.seed(0)
|
||||
@@ -86,8 +72,3 @@ durbinWatsonTest(model2)
|
||||
|
||||
The p-value is 0.294 > 0.05 -> We do not reject H0 so the residuals are not autocorrelated -> [P3] is verified
|
||||
|
||||
```{r}
|
||||
autoplot(model2,2)
|
||||
```
|
||||
|
||||
The QQPlot is align with the line y = x, so it is gaussian -> [P4] is verified
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,313 @@
|
||||
---
|
||||
title: "Groupe 03 Projet DANJOU - DUROUSSEAU"
|
||||
output: html_document
|
||||
---
|
||||
|
||||
# Exercise 1 : Negative weighted mixture
|
||||
|
||||
### Definition
|
||||
|
||||
##### Question 1
|
||||
|
||||
The conditions for a function f to be a probability density are :
|
||||
|
||||
- f is defined on $\mathbb{R}$
|
||||
- f is non-negative, ie $f(x) \ge 0$, $\forall x \in \mathbb{R}$
|
||||
- f is Lebesgue-integrable
|
||||
- and $\int_{\mathbb{R}} f(x) \,dx = 1$
|
||||
|
||||
The function f, to be a density, needs to be non-negative, ie $f(|x|) \ge 0$ when $|x| \rightarrow \infty$
|
||||
|
||||
We have, when $|x| \rightarrow \infty$, $f_i(|x|) \sim exp(\frac{-x^2}{\sigma_i^2})$ for $i = 1, 2$
|
||||
|
||||
Then, $f(|x|) \sim exp(\frac{-x^2}{\sigma_1^2}) - exp(\frac{-x^2}{\sigma_2^2})$
|
||||
|
||||
We want $f(|x|) \ge 0$, ie, $exp(\frac{-x^2}{\sigma_1^2}) - exp(\frac{-x^2}{\sigma_2^2}) \ge 0$
|
||||
|
||||
$\Leftrightarrow \sigma_1^2 \ge \sigma_2^2$
|
||||
|
||||
We can see that $f_1$ dominates the tail behavior.
|
||||
|
||||
\
|
||||
|
||||
##### Question 2
|
||||
|
||||
For given parameters $(\mu_1, \sigma_1^2)$ and $(\mu_2, \sigma_2^2)$, we have $\forall x \in \mathbb{R}$, $f(x) \ge 0$
|
||||
|
||||
$\Leftrightarrow \frac{1}{\sigma_1} exp(\frac{-(x-\mu_1)^2}{2 \sigma_1^2}) \ge \frac{a}{\sigma_2} exp(\frac{-(x-\mu_2)^2}{2 \sigma_2^2})$
|
||||
|
||||
$\Leftrightarrow 0 < a \le a^* = \min_{x \in \mathbb{R}} \frac{f_1(x)}{f_2(x)} = \min_{x \in \mathbb{R}} \frac{\sigma_2}{\sigma_1} exp(\frac{(x-\mu_2)^2}{2 \sigma_2^2} - \frac{(x-\mu_1)^2}{2 \sigma_1^2})$
|
||||
|
||||
To find $a^*$, we just have to minimize $g(x) := \frac{(x-\mu_2)^2}{2 \sigma_2^2} - \frac{(x-\mu_1)^2}{2 \sigma_1^2}$
|
||||
|
||||
First we derive $g$: $\forall x \in \mathbb{R}, g^{\prime}(x) = \frac{x - \mu_2}{\sigma_2^2} - \frac{x - \mu_1}{\sigma_1^2}$
|
||||
|
||||
We search $x^*$ such that $g^{\prime}(x^*) = 0$
|
||||
|
||||
$\Leftrightarrow x^* = \frac{\mu_2 \sigma_1^2 - \mu_1 \sigma_2^2}{\sigma_1^2 - \sigma_2^2}$
|
||||
|
||||
Then, we compute $a^* = \frac{f_1(x^*)}{f_2(x^*)}$
|
||||
|
||||
We call $C \in \mathbb{R}$ the normalization constant such that $f(x) = C (f_1(x) - af_2(x))$
|
||||
|
||||
To find $C$, we know that $1 = \int_{\mathbb{R}} f(x) \,dx = \int_{\mathbb{R}} C (f_1(x) - af_2(x)) \,dx = C \int_{\mathbb{R}} f_1(x) \,dx - Ca \int_{\mathbb{R}} f_2(x) \,dx = C(1-a)$ as $f_1$ and $f_2$ are density functions and by linearity of the integrals.
|
||||
|
||||
$\Leftrightarrow C = \frac{1}{1-a}$
|
||||
|
||||
\
|
||||
|
||||
##### Question 3
|
||||
|
||||
```{r}
|
||||
f <- function(a, mu1, mu2, s1, s2, x) {
|
||||
fx <- dnorm(x, mu1, s1) - a * dnorm(x, mu2, s2)
|
||||
fx[fx < 0] <- 0
|
||||
return(fx / (1 - a))
|
||||
}
|
||||
|
||||
a_star <- function(mu1, mu2, s1, s2) {
|
||||
x_star <- (mu2 * s1^2 - mu1 * s2^2) / (s1^2 - s2^2)
|
||||
return(dnorm(x_star, mu1, s1) / dnorm(x_star, mu2, s2))
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
mu1 <- 0
|
||||
mu2 <- 1
|
||||
s1 <- 3
|
||||
s2 <- 1
|
||||
|
||||
x <- seq(-10, 10, length.out = 1000)
|
||||
as <- a_star(mu1, mu2, s1, s2)
|
||||
a_values <- c(0.1, 0.2, 0.3, 0.4, 0.5, as)
|
||||
|
||||
plot(x, f(as, mu1, mu2, s1, s2, x),
|
||||
type = "l",
|
||||
col = "red",
|
||||
xlab = "x",
|
||||
ylab = "f(a, mu1, mu2, s1, s2, x)",
|
||||
main = "Density function of f(a, mu1, mu2, s1, s2, x) for different a"
|
||||
)
|
||||
for (i in (length(a_values) - 1):1) {
|
||||
lines(x, f(a_values[i], mu1, mu2, s1, s2, x), lty = 3, col = "blue")
|
||||
}
|
||||
legend("topright", legend = c("a = a*", "a != a*"), col = c("red", "blue"), lty = 1)
|
||||
```
|
||||
|
||||
We observe that for small values of a, the density f is close to the density of $f_1 \sim \mathcal{N}(\mu_1, \sigma_1^2)$. When a increases, the shape of evolves into the combinaison of two normal distributions. We observe that for $a = a^*$, the density the largest value of a for which the density is still a density function, indeed for $a > a^*$, the function f takes negative values so it is no longer a density.
|
||||
|
||||
```{r}
|
||||
s2_values <- seq(1, 10, length.out = 5)
|
||||
a <- 0.2
|
||||
|
||||
plot(x, f(a, mu1, mu2, s1, max(s2_values), x),
|
||||
type = "l",
|
||||
xlab = "x",
|
||||
ylab = "f(a, mu1, mu2, s1, s2, x)",
|
||||
col = "red",
|
||||
main = "Density function of f(a, mu1, mu2, s1, s2, x) for different s2"
|
||||
)
|
||||
|
||||
for (i in length(s2_values):1) {
|
||||
lines(x, f(a, mu1, mu2, s1, s2_values[i], x), lty = 1, col = rainbow(length(s2_values))[i])
|
||||
}
|
||||
|
||||
legend("topright", legend = paste("s2 =", s2_values), col = rainbow(length(s2_values)), lty = 1)
|
||||
```
|
||||
|
||||
We observe that when $\sigma_2^2 = 1$, the density $f$ has two peaks and when $\sigma_2^1 > 1$, the density $f$ has only one peak.
|
||||
|
||||
```{r}
|
||||
mu1 <- 0
|
||||
mu2 <- 1
|
||||
sigma1 <- 3
|
||||
sigma2 <- 1
|
||||
a <- 0.2
|
||||
as <- a_star(mu1, mu2, sigma1, sigma2)
|
||||
|
||||
cat(sprintf("a* = %f, a = %f, a <= a* [%s]", as, a, a <= as))
|
||||
```
|
||||
|
||||
We have $\sigma_1^2 \ge \sigma_2^2$ and $0 < a \le a^*$, so the numerical values are compatible with the constraints defined above.
|
||||
|
||||
### Inverse c.d.f Random Variable simulation
|
||||
|
||||
##### Question 4
|
||||
|
||||
To prove that the cumulative density function F associated with f is available in closed form, we need to compute $F(x) = \int_{-\infty}^{x} f(t) \,dt = \frac{1}{1-a} (\int_{-\infty}^{x} f_1(t)\, dt - a \int_{-\infty}^{x} f_2(t)\, dt) = \frac{1}{1-a} (F_1(x) - aF_2(x))$ where $F_1$ and $F_2$ are the cumulative density functions of $f_1 \sim \mathcal{N}(\mu1, \sigma_1^2)$ and $f_2 \sim \mathcal{N}(\mu2, \sigma_2^2)$ respectively.
|
||||
|
||||
Then, $F$ is a closed-form as a finite sum of closed forms.
|
||||
|
||||
```{r}
|
||||
F <- function(a, mu1, mu2, s1, s2, x) {
|
||||
Fx <- pnorm(x, mu1, s1) - a * pnorm(x, mu2, s2)
|
||||
return(Fx / (1 - a))
|
||||
}
|
||||
```
|
||||
|
||||
To construct an algorithm that returns the value of the inverse function method as a function of u ∈(0,1), of the parameters $a, \mu_1, \mu_2, \sigma_1, \sigma_2, x$, and of an approximation precision $\epsilon$, we can use the bisection method.
|
||||
|
||||
We fixe $\epsilon > 0$.\
|
||||
We set $u \in (0, 1)$.\
|
||||
We define $L = -10$ and $U = -L$, the bounds and $M = \frac{L + U}{2}$, the middle of our interval.\
|
||||
While $U - L > \epsilon$ :
|
||||
|
||||
- We compute $F(M) = \frac{1}{1-a} (F_1(M) - aF_2(M))$
|
||||
- If $F(M) < u$, we set $L = M$
|
||||
- Else, we set $U = M$
|
||||
- We set $M = \frac{L + U}{2}$
|
||||
|
||||
End while
|
||||
|
||||
For the generation of random variables from F, we can use the inverse transform sampling method.\
|
||||
|
||||
We set $X$ as an empty array and $n$ the number of random variables we want to generate.\
|
||||
We fixe $\epsilon > 0$.\
|
||||
For $i = 1, \dots, n$ :
|
||||
|
||||
- We set $u \in (0, 1)$.\
|
||||
|
||||
- We define $L = -10$ and $U = -L$, the bounds and $M = \frac{L + U}{2}$, the middle of our interval.\
|
||||
|
||||
- While $U - L > \epsilon$ :
|
||||
|
||||
- We compute $F(M) = \frac{1}{1-a} (F_1(M) - aF_2(M))$
|
||||
|
||||
- If $F(M) < u$, we set $L = M$
|
||||
|
||||
- Else, we set $U = M$
|
||||
|
||||
- We set $M = \frac{L + U}{2}$
|
||||
|
||||
- End while
|
||||
|
||||
- We add $M$ to $X$
|
||||
|
||||
End for We return $X$
|
||||
|
||||
##### Question 5
|
||||
|
||||
```{r}
|
||||
inv_cdf <- function(n) {
|
||||
X <- numeric(n)
|
||||
for (i in 1:n) {
|
||||
u <- runif(1)
|
||||
L <- -10
|
||||
U <- -L
|
||||
M <- (L + U) / 2
|
||||
while (U - L > 1e-6) {
|
||||
FM <- F(a, mu1, mu2, s1, s2, M)
|
||||
if (FM < u) {
|
||||
L <- M
|
||||
} else {
|
||||
U <- M
|
||||
}
|
||||
M <- (L + U) / 2
|
||||
}
|
||||
X[i] <- M
|
||||
}
|
||||
return(X)
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
n <- 10000
|
||||
X <- inv_cdf(n)
|
||||
x <- seq(-10, 10, length.out = 1000)
|
||||
|
||||
hist(X, breaks = 100, freq = FALSE, col = "lightblue", main = "Empirical density function", xlab = "x")
|
||||
lines(x, f(a, mu1, mu2, s1, s2, x), col = "red")
|
||||
legend("topright", legend = c("Theorical density", "Empirical density"), col = c("red", "lightblue"), lty = 1)
|
||||
```
|
||||
|
||||
```{r}
|
||||
plot(ecdf(X), col = "blue", main = "Empirical cumulative distribution function", xlab = "x", ylab = "F(x)")
|
||||
lines(x, F(a, mu1, mu2, s1, s2, x), col = "red")
|
||||
legend("bottomright", legend = c("Theoretical cdf", "Empirical cdf"), col = c("red", "blue"), lty = 1)
|
||||
```
|
||||
|
||||
We can see of both graphs that the empirical cumulative distribution function is close to the theoretical one.
|
||||
|
||||
### Accept-Reject Random Variable simulation
|
||||
|
||||
##### Question 6
|
||||
|
||||
To simulate under f using the accept-reject algorithm, we need to find a density function g such that $f(x) \le M g(x)$ for all $x \in \mathbb{R}$, where M is a constant.\
|
||||
|
||||
Then, we generate $X \sim g$ and $U \sim \mathcal{U}([0,1])$.\
|
||||
We accept $Y = X$ if $U \le \frac{f(X)}{Mg(X)}$. Return $1$ otherwise.
|
||||
|
||||
The probability of acceptance is $\int_{\mathbb{R}} \frac{f(x)}{Mg(x)} g(x) \,dx = \frac{1}{M} \int_{\mathbb{R}} f(x) \,dx = \frac{1}{M}$
|
||||
|
||||
Here we pose $g = f1$.
|
||||
|
||||
Then we have $\frac{f(x)}{g(x)} = \frac{1}{1-a} (1 - a\frac{f_2(x)}{f_1(x)})$
|
||||
|
||||
We pose earlier that $a^* = \min_{x \in \mathbb{R}} \frac{f_1(x)}{f_2(x)} \Rightarrow \frac{1}{a^*} = \max_{x \in \mathbb{R}} \frac{f_2(x)}{f_1(x)}$.
|
||||
|
||||
We compute in our fist equation : $\frac{1}{1-a} (1 - a\frac{f_2(x)}{f_1(x)}) \leq \frac{1}{1-a} (1 - \frac{a}{a^*}) \leq \frac{1}{1-a}$ because $a \leq a^* \Rightarrow 1 - \frac{a}{a^*} \leq 0$
|
||||
|
||||
To conclude, we have $M = \frac{1}{1-a}$ and the probability of acceptance is $\frac{1}{M} = 1-a$
|
||||
|
||||
##### Question 7
|
||||
|
||||
```{r}
|
||||
accept_reject <- function(n, a) {
|
||||
X <- numeric(0)
|
||||
M <- 1 / (1 - a)
|
||||
|
||||
while (length(X) < n) {
|
||||
Y <- rnorm(1, mu1, s1)
|
||||
U <- runif(1)
|
||||
|
||||
if (U <= f(a, mu1, mu2, s1, s2, Y) / (M * dnorm(Y, mu1, s1))) {
|
||||
X <- append(X, Y)
|
||||
}
|
||||
}
|
||||
return(X)
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
n <- 10000
|
||||
a <- 0.2
|
||||
X <- accept_reject(n, a)
|
||||
x <- seq(-10, 10, length.out = 1000)
|
||||
|
||||
hist(X, breaks = 100, freq = FALSE, col = "lightblue", main = "Empirical density function", xlab = "x")
|
||||
lines(x, f(a, mu1, mu2, s1, s2, x), col = "red")
|
||||
legend("topright", legend = c("Theorical density", "Empirical density"), col = c("red", "lightblue"), lty = 1)
|
||||
```
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
|
||||
acceptance_rate <- function(n, a = 0.2) {
|
||||
Y <- rnorm(n, mu1, s1)
|
||||
U <- runif(n)
|
||||
return(mean(U <= f(a, mu1, mu2, s1, s2, Y) / (M * dnorm(Y, mu1, s1))))
|
||||
}
|
||||
|
||||
M <- 1 / (1 - a)
|
||||
n <- 10000
|
||||
cat(sprintf("[M = %.2f] Empirical acceptance rate: %f, Theoretical acceptance rate: %f \n", M, acceptance_rate(n), 1 / M))
|
||||
```
|
||||
|
||||
##### Question 8
|
||||
|
||||
```{r}
|
||||
set.seed(123)
|
||||
a_values <- seq(0.01, 1, length.out = 100)
|
||||
acceptance_rates <- numeric(length(a_values))
|
||||
as <- a_star(mu1, mu2, s1, s2)
|
||||
|
||||
for (i in seq_along(a_values)) {
|
||||
acceptance_rates[i] <- acceptance_rate(n, a_values[i])
|
||||
}
|
||||
|
||||
plot(a_values, acceptance_rates, type = "l", col = "blue", xlab = "a", ylab = "Acceptance rate", main = "Acceptance rate as a function of a")
|
||||
points(as, acceptance_rate(n, as), col = "red", pch = 16)
|
||||
legend("topright", legend = c("Acceptance rate for all a", "Acceptance rate for a_star"), col = c("lightblue", "red"), lty = c(1, NA), pch = c(NA, 16))
|
||||
```
|
||||
Reference in New Issue
Block a user