mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-02-13 00:07:33 +01:00
Compare commits
6 Commits
7b8cf72324
...
3a7567c122
| Author | SHA1 | Date | |
|---|---|---|---|
| 3a7567c122 | |||
| 02d1048b8f | |||
| 3d0bb19a3f | |||
| be05d6fff3 | |||
| b7ad115386 | |||
| 456a2fe96d |
24
M2/Data Visualisation/init.R
Normal file
24
M2/Data Visualisation/init.R
Normal file
@@ -0,0 +1,24 @@
|
||||
# Liste des packages à installer
|
||||
packages_to_install <- c(
|
||||
"lattice", "grid", "ggplot2", "gridExtra", "locfit", "scales",
|
||||
"formattable", "RColorBrewer", "plotly", "dplyr", "tidyr",
|
||||
"rmarkdown", "ggthemes", "cowplot", "kableExtra", "ggridges",
|
||||
"colorspace", "sf", "mapview", "tidyverse", "readxl", "readr",
|
||||
"giscoR", "gapminder", "GGally", "ggfortify", "lubridate", "zoo",
|
||||
"xts", "forecast", "feasts", "tseries", "tsibble", "fable"
|
||||
)
|
||||
|
||||
# Fonction pour installer les packages manquants
|
||||
install_if_absent <- function(package_name) {
|
||||
if (!requireNamespace(package_name, quietly = TRUE)) {
|
||||
install.packages(package_name)
|
||||
message(paste("Package", package_name, "installé avec succès."))
|
||||
} else {
|
||||
message(
|
||||
paste("Package", package_name, "déjà installé, installation ignorée.")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
# Appliquer la fonction à la liste de packages
|
||||
lapply(packages_to_install, install_if_absent)
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,144 +0,0 @@
|
||||
"""
|
||||
Created on Thu Oct 3 15:57:44 2024
|
||||
|
||||
@author: turinici
|
||||
"""
|
||||
|
||||
|
||||
"""
|
||||
This program uses historical data in the format in :
|
||||
https://turinici.com/wp-content/uploads/cours/common/close_cac40_historical.csv
|
||||
|
||||
It can also be downloaded form yahoo finance in daily
|
||||
prices (at least the "close") if possible at lest 5 years
|
||||
|
||||
Idea: use yahoo e.g., yfinance package
|
||||
"pip install yfinance"
|
||||
|
||||
|
||||
Then the code does :
|
||||
|
||||
1'/ order by increasing date
|
||||
|
||||
2/ plot price histogram and returns (with "log" and/or "actuarial")
|
||||
|
||||
3/ test normality of : prices, log returns, actuarial returns
|
||||
for instance can use scipy.stats.normaltest
|
||||
|
||||
4/ shows the random versus optimal results
|
||||
|
||||
TODO : replace "None" by what is required to implement the task.
|
||||
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.stats import kstest, normaltest # type: ignore
|
||||
|
||||
#from scipy.special import softmax
|
||||
|
||||
# take csv from www course :we suppose it is available locally))
|
||||
data = pd.read_csv('M2/Risks Management/TP1/close_cac40_historical.csv', sep=';', index_col = 'Date')
|
||||
data.head()
|
||||
# order by increasing date, keep variable 'data'
|
||||
data = data.sort_index(ascending=True)
|
||||
data.head()
|
||||
data.tail()
|
||||
|
||||
# plot histogram of prices
|
||||
_ = data.hist(bins=30, figsize = (15,15))
|
||||
|
||||
def normality_test(data,kolmogorov_smirnov=False,level=0.01,print_results=True):
|
||||
"""
|
||||
Tests normality of each column of dataframe "data".
|
||||
Inputs:
|
||||
kolmogorov_smirnov= false: use "normaltest", otherswise use kstest, both from scipy.stats
|
||||
level = p-value threshold level for the conclusions
|
||||
|
||||
Outputs: the number of yes/no in the results
|
||||
"""
|
||||
|
||||
pvalues = []
|
||||
for cols in data.keys():
|
||||
pv = kstest(data[cols].dropna(), 'norm', args=(data[cols].mean(), data[cols].std())).pvalue if kolmogorov_smirnov else normaltest(data[cols].dropna()).pvalue
|
||||
pvalues.append(pv)
|
||||
res = 'normal' if pv >= level else 'not normal'
|
||||
print("Test pval=", pv, 'res=', res)
|
||||
normalok = sum([1 for pv in pvalues if pv >= level])
|
||||
normalnotok = sum([1 for pv in pvalues if pv < level])
|
||||
if (print_results):
|
||||
print("no. of normal = ", normalok)
|
||||
print("no. of not normal = ", normalnotok)
|
||||
return normalok, normalnotok
|
||||
|
||||
normality_test(data)
|
||||
|
||||
# use 'data' to compute returns
|
||||
# returns = data.pct_change() #actuarial
|
||||
returns = np.log(data/data.shift(1))
|
||||
|
||||
_ = returns.hist(bins = int(np.sqrt(returns.shape[0])), figsize = (15,15)) # type: ignore
|
||||
|
||||
normality_test(returns.tail(25*3)) # type: ignore # test last 3 months
|
||||
|
||||
###########################################################
|
||||
print('normality tests for increments, not returns!!')
|
||||
increments = data - data.shift(1)
|
||||
_ = increments.hist(bins=int(np.sqrt(increments.shape[0])), figsize = (15,15))
|
||||
|
||||
normality_test(increments.tail(25*3))
|
||||
########################################################################
|
||||
|
||||
|
||||
|
||||
#%%
|
||||
nb = 10 #will work with nb stocks
|
||||
all_returns = returns.copy() #backup
|
||||
nb_all = all_returns.shape[1]
|
||||
if (nb > nb_all):
|
||||
print("too many number of stocks, revert to max")
|
||||
nb = nb_all
|
||||
#choose the stock names
|
||||
nb_stocks_names = np.random.choice(all_returns.keys(), nb, replace=False) # type: ignore
|
||||
returns_small = all_returns.loc[:, nb_stocks_names] # type: ignore
|
||||
|
||||
#%%
|
||||
#compute avg and cov of returns
|
||||
mean_returns = returns_small.mean()
|
||||
cov_matrix = returns_small.cov()
|
||||
rdt_list = []
|
||||
std_list = []
|
||||
for _ in range(500):
|
||||
#sample at random some "allocation"
|
||||
allocation = np.random.random(nb)
|
||||
rdt_port = allocation@mean_returns
|
||||
std_port = np.sqrt(allocation@cov_matrix@allocation)
|
||||
rdt_list.append(rdt_port)
|
||||
std_list.append(std_port)
|
||||
|
||||
|
||||
inverse_cov = np.linalg.inv(cov_matrix)
|
||||
# compute and draw the efficient frontier on the same graph
|
||||
onesM = np.ones_like(mean_returns)
|
||||
#compute 'a' and 'b' using formulas from the course
|
||||
a = onesM.T @ inverse_cov @ onesM
|
||||
b = onesM.T @ inverse_cov @ mean_returns
|
||||
|
||||
# plot the frontier and its symmetric w/r to origin
|
||||
sigmarange = np.linspace(1. / np.sqrt(a) + 1.e-10, 1.1 * np.max(std_list), 47)
|
||||
# compute the return of the optimal portfolio for sigma in sigmarange
|
||||
# will use the "factor" auxiliary variable
|
||||
factor = np.sqrt(sigmarange**2 - 1. / a)
|
||||
optimal_return = b / a + np.sqrt(sigmarange**2 - 1. / a) * factor
|
||||
|
||||
fig = plt.figure('perf')
|
||||
plt.scatter(std_list, rdt_list)
|
||||
plt.plot(sigmarange, optimal_return, 'r-')
|
||||
plt.xlabel('std')
|
||||
plt.ylabel('rdt')
|
||||
#plt.xlim([0,.2])
|
||||
#plt.ylim([-.05,.05])
|
||||
plt.show()
|
||||
|
||||
|
||||
# %%
|
||||
File diff suppressed because one or more lines are too long
@@ -29,6 +29,7 @@ The projects are organized into two main sections:
|
||||
- `Statistical Learning`
|
||||
|
||||
- `M2`
|
||||
- `Data Visualisation`
|
||||
- `Linear Models`
|
||||
- `Machine Learning`
|
||||
- `Risks Management`
|
||||
@@ -50,4 +51,5 @@ The projects are organized into two main sections:
|
||||
- [RMarkdown](https://rmarkdown.rstudio.com): A dynamic tool for combining code, results, and narrative into high-quality documents and presentations.
|
||||
- [FactoMineR](https://factominer.free.fr/): An R package focused on multivariate exploratory data analysis (e.g., PCA, MCA, CA).
|
||||
- [ggplot2](https://ggplot2.tidyverse.org): A grammar-based graphics package for creating complex and elegant visualizations in R.
|
||||
- [RShiny](https://shiny.rstudio.com): A web application framework for building interactive web apps directly from R.
|
||||
- and my 🧠.
|
||||
|
||||
Reference in New Issue
Block a user