mirror of
https://github.com/ArthurDanjou/ArtStudies.git
synced 2026-03-16 05:11:40 +01:00
- Created a new R script 'packages.R' to manage necessary packages for the Classification and Regression module. - Included a list of required packages and a function to install any missing packages. - Implemented loading of all packages and added a success message upon completion.
4439 lines
509 KiB
HTML
4439 lines
509 KiB
HTML
<!DOCTYPE html>
|
||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="generator" content="quarto-1.8.27">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||
|
||
<meta name="author" content="Artgur DANJOU, Camille LEGRAND, Axelle MERIC, Moritz VON SIEMENS">
|
||
|
||
<title>IMPLIED VOLATILITY FROM OPTIONS DATA</title>
|
||
<style>
|
||
code{white-space: pre-wrap;}
|
||
span.smallcaps{font-variant: small-caps;}
|
||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||
div.column{flex: auto; overflow-x: auto;}
|
||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||
ul.task-list{list-style: none;}
|
||
ul.task-list li input[type="checkbox"] {
|
||
width: 0.8em;
|
||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||
vertical-align: middle;
|
||
}
|
||
/* CSS for syntax highlighting */
|
||
html { -webkit-text-size-adjust: 100%; }
|
||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
|
||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||
.sourceCode { overflow: visible; }
|
||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||
div.sourceCode { margin: 1em 0; }
|
||
pre.sourceCode { margin: 0; }
|
||
@media screen {
|
||
div.sourceCode { overflow: auto; }
|
||
}
|
||
@media print {
|
||
pre > code.sourceCode { white-space: pre-wrap; }
|
||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||
}
|
||
pre.numberSource code
|
||
{ counter-reset: source-line 0; }
|
||
pre.numberSource code > span
|
||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||
pre.numberSource code > span > a:first-child::before
|
||
{ content: counter(source-line);
|
||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||
border: none; display: inline-block;
|
||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||
-khtml-user-select: none; -moz-user-select: none;
|
||
-ms-user-select: none; user-select: none;
|
||
padding: 0 4px; width: 4em;
|
||
}
|
||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||
div.sourceCode
|
||
{ }
|
||
@media screen {
|
||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||
}
|
||
</style>
|
||
|
||
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/clipboard/clipboard.min.js"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/quarto-html/quarto.js" type="module"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/quarto-html/tabsets/tabsets.js" type="module"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/quarto-html/axe/axe-check.js" type="module"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/quarto-html/popper.min.js"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/quarto-html/tippy.umd.min.js"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/quarto-html/anchor.min.js"></script>
|
||
<link href="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/quarto-html/tippy.css" rel="stylesheet">
|
||
<link href="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/quarto-html/quarto-syntax-highlighting-ed96de9b727972fe78a7b5d16c58bf87.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/bootstrap/bootstrap.min.js"></script>
|
||
<link href="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||
<link href="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/bootstrap/bootstrap-d6a003b94517c951b2d65075d42fb01b.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
|
||
<link href="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/pagedtable-1.1/css/pagedtable.css" rel="stylesheet">
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/pagedtable-1.1/js/pagedtable.js"></script>
|
||
<link href="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/htmltools-fill-0.5.8.1/fill.css" rel="stylesheet">
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/htmlwidgets-1.6.4/htmlwidgets.js"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/plotly-binding-4.11.0/plotly.js"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/typedarray-0.1/typedarray.min.js"></script>
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/jquery-3.5.1/jquery.min.js"></script>
|
||
<link href="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/crosstalk-1.2.2/css/crosstalk.min.css" rel="stylesheet">
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/crosstalk-1.2.2/js/crosstalk.min.js"></script>
|
||
<link href="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/plotly-htmlwidgets-css-2.11.1/plotly-htmlwidgets.css" rel="stylesheet">
|
||
<script src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/libs/plotly-main-2.11.1/plotly-latest.min.js"></script>
|
||
|
||
<script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script>
|
||
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
|
||
|
||
<script type="text/javascript">
|
||
const typesetMath = (el) => {
|
||
if (window.MathJax) {
|
||
// MathJax Typeset
|
||
window.MathJax.typeset([el]);
|
||
} else if (window.katex) {
|
||
// KaTeX Render
|
||
var mathElements = el.getElementsByClassName("math");
|
||
var macros = [];
|
||
for (var i = 0; i < mathElements.length; i++) {
|
||
var texText = mathElements[i].firstChild;
|
||
if (mathElements[i].tagName == "SPAN" && texText && texText.data) {
|
||
window.katex.render(texText.data, mathElements[i], {
|
||
displayMode: mathElements[i].classList.contains('display'),
|
||
throwOnError: false,
|
||
macros: macros,
|
||
fleqn: false
|
||
});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
window.Quarto = {
|
||
typesetMath
|
||
};
|
||
</script>
|
||
|
||
</head>
|
||
|
||
<body class="fullcontent quarto-light">
|
||
|
||
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
|
||
|
||
<main class="content" id="quarto-document-content">
|
||
|
||
<header id="title-block-header" class="quarto-title-block default">
|
||
<div class="quarto-title">
|
||
<h1 class="title">IMPLIED VOLATILITY FROM OPTIONS DATA</h1>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="quarto-title-meta">
|
||
|
||
<div>
|
||
<div class="quarto-title-meta-heading">Author</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p>Artgur DANJOU, Camille LEGRAND, Axelle MERIC, Moritz VON SIEMENS </p>
|
||
</div>
|
||
</div>
|
||
|
||
<div>
|
||
<div class="quarto-title-meta-heading">Published</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="date">Invalid Date</p>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</div>
|
||
|
||
|
||
|
||
</header>
|
||
|
||
|
||
<section id="implied-volatility-from-options-data" class="level1">
|
||
<h1>IMPLIED VOLATILITY FROM OPTIONS DATA</h1>
|
||
<section id="preliminary" class="level2">
|
||
<h2 class="anchored" data-anchor-id="preliminary">Preliminary</h2>
|
||
<p>We begin by loading the necessary packages for this analysis.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(pacman)</span>
|
||
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">p_load</span>(</span>
|
||
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a> brulee,</span>
|
||
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a> car,</span>
|
||
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a> carData,</span>
|
||
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a> caret,</span>
|
||
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a> class,</span>
|
||
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a> corrplot,</span>
|
||
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a> DataExplorer,</span>
|
||
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a> data.table,</span>
|
||
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a> dplyr,</span>
|
||
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a> fitdistrplus,</span>
|
||
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a> glmnet,</span>
|
||
<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a> ggfortify,</span>
|
||
<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a> ggplot2,</span>
|
||
<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a> glue,</span>
|
||
<span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a> grid,</span>
|
||
<span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a> gridExtra,</span>
|
||
<span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a> hexbin,</span>
|
||
<span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a> kableExtra,</span>
|
||
<span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a> MASS,</span>
|
||
<span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a> lightgbm,</span>
|
||
<span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a> lme4,</span>
|
||
<span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a> lmerTest,</span>
|
||
<span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a> paletteer,</span>
|
||
<span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a> plotly,</span>
|
||
<span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a> pls,</span>
|
||
<span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a> randomForest,</span>
|
||
<span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a> ranger,</span>
|
||
<span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a> rBayesianOptimization,</span>
|
||
<span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a> reshape2,</span>
|
||
<span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a> rlang,</span>
|
||
<span id="cb1-33"><a href="#cb1-33" aria-hidden="true" tabindex="-1"></a> ROCR,</span>
|
||
<span id="cb1-34"><a href="#cb1-34" aria-hidden="true" tabindex="-1"></a> rsample,</span>
|
||
<span id="cb1-35"><a href="#cb1-35" aria-hidden="true" tabindex="-1"></a> shapviz,</span>
|
||
<span id="cb1-36"><a href="#cb1-36" aria-hidden="true" tabindex="-1"></a> scales,</span>
|
||
<span id="cb1-37"><a href="#cb1-37" aria-hidden="true" tabindex="-1"></a> skimr,</span>
|
||
<span id="cb1-38"><a href="#cb1-38" aria-hidden="true" tabindex="-1"></a> tibble,</span>
|
||
<span id="cb1-39"><a href="#cb1-39" aria-hidden="true" tabindex="-1"></a> tidyr,</span>
|
||
<span id="cb1-40"><a href="#cb1-40" aria-hidden="true" tabindex="-1"></a> tidymodels,</span>
|
||
<span id="cb1-41"><a href="#cb1-41" aria-hidden="true" tabindex="-1"></a> tidyverse,</span>
|
||
<span id="cb1-42"><a href="#cb1-42" aria-hidden="true" tabindex="-1"></a> xgboost</span>
|
||
<span id="cb1-43"><a href="#cb1-43" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2025</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>We fix the random seed to ensure the reproducibility of our results. This is a critical step in any data analysis or machine learning pipeline, as it allows others to replicate our findings and verify the robustness of our models.</p>
|
||
</section>
|
||
<section id="introduction" class="level2">
|
||
<h2 class="anchored" data-anchor-id="introduction">Introduction</h2>
|
||
<section id="financial-context-problem-formulation" class="level3">
|
||
<h3 class="anchored" data-anchor-id="financial-context-problem-formulation">Financial Context & Problem Formulation</h3>
|
||
<p>Implied volatility is the market’s anticipation of the future level of an underlying asset’s volatility. The easiest way to access implied volatility is by deriving it from option prices using the Black–Scholes formula.</p>
|
||
<p>Unlike historical volatility, also known as realized volatility, which measures past price fluctuations, implied volatility is forward-looking. It reflects the risk and uncertainty perceived by investors.</p>
|
||
<p>Obtaining an accurate measure of implied volatility is important not only to understand the market environment and investor sentiment, but also for option pricing, risk management, and hedging.</p>
|
||
<p>Predicting implied volatility allows us to better understand and anticipate market dynamics. Accurate forecasts are crucial for pricing options correctly, hedging positions, and improving risk management to reduce unexpected losses. For volatility-based trading strategies, anticipating movements in volatility can generate profitable opportunities.</p>
|
||
</section>
|
||
<section id="raw-dataset-overview" class="level3">
|
||
<h3 class="anchored" data-anchor-id="raw-dataset-overview">Raw Dataset Overview</h3>
|
||
<p>The dataset used for this study, named , provides a comprehensive view of option market dynamics. It is structured as panel data, tracking multiple underlying assets over a specific time horizon. The raw dataset contains 1,909,465 observations and 21 variables.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>data_train <span class="ot"><-</span> <span class="fu">read_csv</span>(<span class="st">"Train_ISF.csv"</span>)</span>
|
||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>test <span class="ot"><-</span> <span class="fu">read.csv</span>(<span class="st">"Test_ISF.csv"</span>)</span>
|
||
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="fu">cat</span>(<span class="st">"Rows:"</span>, <span class="fu">nrow</span>(data_train), <span class="st">"</span><span class="sc">\n</span><span class="st">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Rows: 1909465 </code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">cat</span>(<span class="st">"Columns:"</span>, <span class="fu">ncol</span>(data_train), <span class="st">"</span><span class="sc">\n</span><span class="st">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Columns: 19 </code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(data_train, <span class="dv">10</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code># A tibble: 10 × 19
|
||
asset_id obs_date strike_dispersion call_volume put_volume call_oi put_oi
|
||
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
|
||
1 A 2019-10-14 17.1 19878 1001 9365 12705
|
||
2 AA 2019-10-14 4.42 6194 3010 54013 38674
|
||
3 AABA 2019-10-14 5.23 4301 2863 159161 53453
|
||
4 AAL 2019-10-14 5.28 20034 7695 321062 217407
|
||
5 AAN 2019-10-14 7.99 133 520 621 657
|
||
6 AAOI 2019-10-14 9.13 568 867 13355 29898
|
||
7 AAON 2019-10-14 12.1 60 151 145 933
|
||
8 AAP 2019-10-14 5.04 921 943 11301 7080
|
||
9 AAPL 2019-10-14 2.88 84986 47927 1136840 960145
|
||
10 AAT 2019-10-14 12.2 2 73 3 78
|
||
# ℹ 12 more variables: maturity_count <dbl>, implied_vol_ref <dbl>,
|
||
# total_contracts <dbl>, realized_vol_short <dbl>, realized_vol_mid1 <dbl>,
|
||
# realized_vol_mid2 <dbl>, realized_vol_mid3 <dbl>, realized_vol_long1 <dbl>,
|
||
# realized_vol_long2 <dbl>, realized_vol_long3 <dbl>,
|
||
# realized_vol_long4 <dbl>, market_vol_index <dbl></code></pre>
|
||
</div>
|
||
</div>
|
||
<p>The variables can be categorized into four main groups describing the market conditions:</p>
|
||
<ul>
|
||
<li><strong>Identifiers:</strong> <code>asset_id</code> (categorical) and <code>obs_date</code> (temporal).</li>
|
||
<li><strong>Target Variable:</strong> <code>implied_vol_ref</code>, representing the implied volatility we aim to predict.</li>
|
||
<li><strong>Market Activity & Liquidity:</strong> This includes trading volumes (<code>call_volume</code>, <code>put_volume</code>), open interest (<code>call_oi</code>, <code>put_oi</code>), and the total number of contracts exchanged (<code>total_contracts</code>).</li>
|
||
<li><strong>Volatility Metrics:</strong> Historical realized volatility at different horizons (<code>realized_vol_short</code>, <code>mid</code>, <code>long</code>) and the global market stress index (<code>market_vol_index</code>).</li>
|
||
<li><strong>Option Structure:</strong> Variables such as <code>strike_dispersion</code> and <code>maturity_count</code>, which describe the depth and breadth of the available option chain.</li>
|
||
</ul>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>nb_assets <span class="ot"><-</span> <span class="fu">uniqueN</span>(data_train<span class="sc">$</span>asset_id)</span>
|
||
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>nb_dates <span class="ot"><-</span> <span class="fu">uniqueN</span>(data_train<span class="sc">$</span>obs_date)</span>
|
||
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste</span>(<span class="st">"Number of assets:"</span>, nb_assets))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "Number of assets: 3887"</code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste</span>(<span class="st">"Number of dates:"</span>, nb_dates))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "Number of dates: 544"</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>The dataset covers a universe of 3,887 unique underlying assets across 544 distinct observation dates.</p>
|
||
<p>It is important to note that the panel is unbalanced. The theoretical maximum number of observations (3,887 x 544 = 2,114,528) exceeds the actual row count (1,909,465), indicating that not all assets are quoted or have available data for every date in the period. This sparsity is visualized in the following figure, which shows the availability of data points for a subset of assets.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>selected_assets <span class="ot"><-</span> <span class="fu">unique</span>(data_train<span class="sc">$</span>asset_id)[<span class="dv">100</span><span class="sc">:</span><span class="dv">120</span>]</span>
|
||
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>selected_dates <span class="ot"><-</span> <span class="fu">unique</span>(data_train<span class="sc">$</span>obs_date)[<span class="dv">1</span><span class="sc">:</span><span class="dv">100</span>]</span>
|
||
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>data_subset <span class="ot"><-</span> data_train <span class="sc">|></span></span>
|
||
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(asset_id <span class="sc">%in%</span> selected_assets) <span class="sc">|></span></span>
|
||
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(obs_date <span class="sc">%in%</span> selected_dates)</span>
|
||
<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(</span>
|
||
<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a> data_subset,</span>
|
||
<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(</span>
|
||
<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> obs_date,</span>
|
||
<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> implied_vol_ref,</span>
|
||
<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a> <span class="at">group =</span> asset_id,</span>
|
||
<span id="cb13-14"><a href="#cb13-14" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="fu">as.factor</span>(asset_id)</span>
|
||
<span id="cb13-15"><a href="#cb13-15" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb13-16"><a href="#cb13-16" aria-hidden="true" tabindex="-1"></a>) <span class="sc">+</span></span>
|
||
<span id="cb13-17"><a href="#cb13-17" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>() <span class="sc">+</span></span>
|
||
<span id="cb13-18"><a href="#cb13-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">1</span>) <span class="sc">+</span></span>
|
||
<span id="cb13-19"><a href="#cb13-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_minimal</span>() <span class="sc">+</span></span>
|
||
<span id="cb13-20"><a href="#cb13-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
|
||
<span id="cb13-21"><a href="#cb13-21" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Evolution of the Target for 10 Assets over time"</span>,</span>
|
||
<span id="cb13-22"><a href="#cb13-22" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> <span class="st">"Date"</span>,</span>
|
||
<span id="cb13-23"><a href="#cb13-23" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> <span class="st">"Value of the Target"</span>,</span>
|
||
<span id="cb13-24"><a href="#cb13-24" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="st">"Asset ID"</span></span>
|
||
<span id="cb13-25"><a href="#cb13-25" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb13-26"><a href="#cb13-26" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">legend.position =</span> <span class="st">"right"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/first%20visu%20of%20assets-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="data-pipeline-exploratory-analysis" class="level2">
|
||
<h2 class="anchored" data-anchor-id="data-pipeline-exploratory-analysis">Data Pipeline & Exploratory Analysis</h2>
|
||
<section id="data-splitting-strategy-preventing-data-leakage" class="level3">
|
||
<h3 class="anchored" data-anchor-id="data-splitting-strategy-preventing-data-leakage">Data Splitting Strategy: Preventing Data Leakage</h3>
|
||
<p>To ensure the reliability of our model and avoid look-ahead bias, the dataset was split into training and validation sets before performing any data manipulation, scaling, or feature engineering.</p>
|
||
<p>Instead of randomly splitting, we adopted a chronological splitting strategy. We identified the unique observation dates, sorted them, and established a temporal cutoff at the 80% mark. The first 80% of dates constitute the training set which will be used to learn patterns.The subsequent 20% form the validation set, used to evaluate performance on unseen future data. This approach ensures that the model is evaluated on a future market regime it has never encountered during training. All data transformations were strictly calibrated on the training set alone; these fixed statistical parameters were then deterministically projected onto the out-of-sample datasets to ensure perfect methodological isolation.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb14"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>all_dates <span class="ot"><-</span> <span class="fu">sort</span>(<span class="fu">unique</span>(data_train<span class="sc">$</span>obs_date))</span>
|
||
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>cutoff_index <span class="ot"><-</span> <span class="fu">floor</span>(<span class="fu">length</span>(all_dates) <span class="sc">*</span> <span class="fl">0.8</span>)</span>
|
||
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>cutoff_date <span class="ot"><-</span> all_dates[cutoff_index]</span>
|
||
<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste</span>(<span class="st">"Cutoff Date:"</span>, cutoff_date))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "Cutoff Date: 2021-07-20"</code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>train <span class="ot"><-</span> data_train <span class="sc">|></span> <span class="fu">arrange</span>(obs_date) <span class="sc">|></span> <span class="fu">filter</span>(obs_date <span class="sc"><=</span> cutoff_date)</span>
|
||
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>val <span class="ot"><-</span> data_train <span class="sc">|></span> <span class="fu">arrange</span>(obs_date) <span class="sc">|></span> <span class="fu">filter</span>(obs_date <span class="sc">></span> cutoff_date)</span>
|
||
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="fu">cat</span>(<span class="st">"Training Set Size:"</span>, <span class="fu">nrow</span>(train), <span class="st">"</span><span class="sc">\n</span><span class="st">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Training Set Size: 1533234 </code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb18"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">cat</span>(<span class="st">"Validation Set Size :"</span>, <span class="fu">nrow</span>(val), <span class="st">"</span><span class="sc">\n</span><span class="st">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Validation Set Size : 376231 </code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb20"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">cat</span>(<span class="st">"Last date in the Training set:"</span>, <span class="fu">as.character</span>(<span class="fu">max</span>(train<span class="sc">$</span>obs_date)), <span class="st">"</span><span class="sc">\n</span><span class="st">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Last date in the Training set: 2021-07-20 </code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb22"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">cat</span>(<span class="st">"First date in the Validation set:"</span>, <span class="fu">as.character</span>(<span class="fu">min</span>(val<span class="sc">$</span>obs_date)), <span class="st">"</span><span class="sc">\n</span><span class="st">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>First date in the Validation set: 2021-07-21 </code></pre>
|
||
</div>
|
||
</div>
|
||
<p>Following this split, the training set covers the period up to <strong>2021-07-20</strong>, containing approximately <span class="math inline">\(1,533,234\)</span> observations. The test set begins immediately after, ensuring a continuous timeline without overlap.</p>
|
||
</section>
|
||
<section id="data-cleansing-outlier-management" class="level3">
|
||
<h3 class="anchored" data-anchor-id="data-cleansing-outlier-management">Data Cleansing & Outlier Management</h3>
|
||
<p>Financial market data is inherently characterized by fat-tailed distributions. Metrics such as realized volatility, trading volumes, and strike dispersions frequently exhibit extreme, asymmetric spikes driven by macroeconomic shocks, earnings announcements, or transient liquidity crises. However, before addressing these structural anomalies, the baseline integrity of the dataset must be established.</p>
|
||
<section id="foundational-data-integrity-completeness-and-uniqueness" class="level4">
|
||
<h4 class="anchored" data-anchor-id="foundational-data-integrity-completeness-and-uniqueness">Foundational Data Integrity: Completeness and Uniqueness</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb24"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>skimr<span class="sc">::</span><span class="fu">skim</span>(data_train) <span class="sc">|></span> rmarkdown<span class="sc">::</span><span class="fu">paged_table</span>()</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div data-pagedtable="false">
|
||
<script data-pagedtable-source="" type="application/json">
|
||
{"columns":[{"label":[""],"name":["_rn_"],"type":[""],"align":["left"]},{"label":["skim_type"],"name":[1],"type":["chr"],"align":["left"]},{"label":["skim_variable"],"name":[2],"type":["chr"],"align":["left"]},{"label":["n_missing"],"name":[3],"type":["int"],"align":["right"]},{"label":["complete_rate"],"name":[4],"type":["dbl"],"align":["right"]},{"label":["Date.min"],"name":[5],"type":["date"],"align":["right"]},{"label":["Date.max"],"name":[6],"type":["date"],"align":["right"]},{"label":["Date.median"],"name":[7],"type":["date"],"align":["right"]},{"label":["Date.n_unique"],"name":[8],"type":["int"],"align":["right"]},{"label":["character.min"],"name":[9],"type":["int"],"align":["right"]},{"label":["character.max"],"name":[10],"type":["int"],"align":["right"]},{"label":["character.empty"],"name":[11],"type":["int"],"align":["right"]},{"label":["character.n_unique"],"name":[12],"type":["int"],"align":["right"]},{"label":["character.whitespace"],"name":[13],"type":["int"],"align":["right"]},{"label":["numeric.mean"],"name":[14],"type":["dbl"],"align":["right"]},{"label":["numeric.sd"],"name":[15],"type":["dbl"],"align":["right"]},{"label":["numeric.p0"],"name":[16],"type":["dbl"],"align":["right"]},{"label":["numeric.p25"],"name":[17],"type":["dbl"],"align":["right"]},{"label":["numeric.p50"],"name":[18],"type":["dbl"],"align":["right"]},{"label":["numeric.p75"],"name":[19],"type":["dbl"],"align":["right"]},{"label":["numeric.p100"],"name":[20],"type":["dbl"],"align":["right"]},{"label":["numeric.hist"],"name":[21],"type":["chr"],"align":["left"]}],"data":[{"1":"Date","2":"obs_date","3":"0","4":"1","5":"2019-10-14","6":"2021-12-31","7":"2020-11-20","8":"544","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"NA","15":"NA","16":"NA","17":"NA","18":"NA","19":"NA","20":"NA","21":"NA","_rn_":"1"},{"1":"character","2":"asset_id","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"1","10":"5","11":"0","12":"3887","13":"0","14":"NA","15":"NA","16":"NA","17":"NA","18":"NA","19":"NA","20":"NA","21":"NA","_rn_":"2"},{"1":"numeric","2":"strike_dispersion","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"15.686960","15":"1.609823e+01","16":"0.330","17":"4.320","18":"9.640","19":"23.550","20":"375.00","21":"▇▁▁▁▁","_rn_":"3"},{"1":"numeric","2":"call_volume","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"1387.693155","15":"8.751769e+03","16":"0.000","17":"48.000","18":"181.000","19":"697.000","20":"1547190.00","21":"▇▁▁▁▁","_rn_":"4"},{"1":"numeric","2":"put_volume","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"1259.468259","15":"9.079191e+03","16":"0.000","17":"31.000","18":"144.000","19":"632.000","20":"996290.00","21":"▇▁▁▁▁","_rn_":"5"},{"1":"numeric","2":"call_oi","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"26483.762179","15":"1.513984e+05","16":"0.000","17":"202.000","18":"1410.000","19":"9452.000","20":"7121560.00","21":"▇▁▁▁▁","_rn_":"6"},{"1":"numeric","2":"put_oi","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"23867.841131","15":"1.752596e+05","16":"0.000","17":"91.000","18":"718.000","19":"5892.000","20":"9221790.00","21":"▇▁▁▁▁","_rn_":"7"},{"1":"numeric","2":"maturity_count","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"3.640026","15":"1.666131e+00","16":"1.000","17":"3.000","18":"3.000","19":"4.000","20":"25.00","21":"▇▁▁▁▁","_rn_":"8"},{"1":"numeric","2":"implied_vol_ref","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"45.712581","15":"2.508936e+01","16":"1.000","17":"27.810","18":"39.800","19":"57.340","20":"149.00","21":"▅▇▂▁▁","_rn_":"9"},{"1":"numeric","2":"total_contracts","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"151.250892","15":"2.540512e+02","16":"3.000","17":"43.000","18":"85.000","19":"180.000","20":"10808.00","21":"▇▁▁▁▁","_rn_":"10"},{"1":"numeric","2":"realized_vol_short","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"53.024624","15":"3.537581e+03","16":"0.000","17":"21.436","18":"33.562","19":"53.163","20":"3468890.00","21":"▇▁▁▁▁","_rn_":"11"},{"1":"numeric","2":"realized_vol_mid1","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"53.809966","15":"2.543233e+03","16":"0.000","17":"23.080","18":"35.750","19":"56.943","20":"2484930.00","21":"▇▁▁▁▁","_rn_":"12"},{"1":"numeric","2":"realized_vol_mid2","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"53.886008","15":"2.085671e+03","16":"0.000","17":"24.170","18":"37.535","19":"59.325","20":"2037590.00","21":"▇▁▁▁▁","_rn_":"13"},{"1":"numeric","2":"realized_vol_mid3","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"54.027114","15":"1.868783e+03","16":"0.121","17":"24.827","18":"38.747","19":"60.476","20":"1825560.00","21":"▇▁▁▁▁","_rn_":"14"},{"1":"numeric","2":"realized_vol_long1","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"54.151872","15":"1.708010e+03","16":"0.120","17":"25.449","18":"39.878","19":"61.358","20":"1668380.00","21":"▇▁▁▁▁","_rn_":"15"},{"1":"numeric","2":"realized_vol_long2","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"54.423751","15":"1.481470e+03","16":"0.122","17":"26.488","18":"41.714","19":"62.771","20":"1446880.00","21":"▇▁▁▁▁","_rn_":"16"},{"1":"numeric","2":"realized_vol_long3","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"54.686252","15":"1.211573e+03","16":"0.125","17":"28.622","18":"43.817","19":"63.925","20":"1183030.00","21":"▇▁▁▁▁","_rn_":"17"},{"1":"numeric","2":"realized_vol_long4","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"55.149326","15":"9.416093e+02","16":"0.130","17":"31.868","18":"45.984","19":"64.115","20":"917394.00","21":"▇▁▁▁▁","_rn_":"18"},{"1":"numeric","2":"market_vol_index","3":"0","4":"1","5":"<NA>","6":"<NA>","7":"<NA>","8":"NA","9":"NA","10":"NA","11":"NA","12":"NA","13":"NA","14":"23.448530","15":"1.019110e+01","16":"11.540","17":"16.950","18":"21.340","19":"26.870","20":"82.69","21":"▇▂▁▁▁","_rn_":"19"}],"options":{"columns":{"min":{},"max":[10]},"rows":{"min":[10],"max":[10]},"pages":{}}}
|
||
</script>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>A systematic programmatic verification was executed on the raw dataset prior to any feature engineering. This audit confirmed two critical structural properties:</p>
|
||
<ul>
|
||
<li><strong>Absolute Uniqueness</strong>: No duplicate rows were detected across the primary temporal and cross-sectional keys (<code>asset_id</code> and <code>obs_date</code>).</li>
|
||
<li><strong>Strict Completeness</strong>: The dataset was entirely free of missing values (NA).</li>
|
||
</ul>
|
||
<p>The innate completeness of the data precluded the necessity for computationally expensive imputation algorithms (such as MICE or KNN) during the exploratory phase. Nevertheless, following strict MLOps principles, a median imputation step (<code>$step_impute_median$</code>) was retained within the unified tidymodels pipeline. This acts as a robust fail-safe mechanism to prevent pipeline crashes during future out-of-sample inferences should upstream data pipelines temporarily drop feature payloads. With structural purity confirmed, the primary data quality challenge shifted exclusively to the management of extreme values.</p>
|
||
</section>
|
||
<section id="the-mathematical-threat-to-linear-and-gradient-based-models" class="level4">
|
||
<h4 class="anchored" data-anchor-id="the-mathematical-threat-to-linear-and-gradient-based-models">The Mathematical Threat to Linear and Gradient-Based Models</h4>
|
||
<p>While extreme spikes in volatility or volume are genuine market phenomena rather than measurement errors, their presence poses a severe mathematical threat to specific families of predictive algorithms. Models relying on distance metrics and continuous optimization, such as <strong>Ordinary Least Squares (OLS)</strong>, penalized regressions (<strong>Elastic Net</strong>), and <strong>Multi-Layer Perceptrons (MLP)</strong>, are highly sensitive to these outliers due to two primary factors:</p>
|
||
<ul>
|
||
<li><p><strong>Quadratic Loss Functions</strong>: Algorithms optimizing Mean Squared Error (MSE) heavily penalize large deviations. A single extreme outlier forces the algorithm to shift the entire regression hyperplane to minimize that specific localized error, thereby destroying the model’s generalization capability on the remaining 99% of “normal” observations.</p></li>
|
||
<li><p><strong>Scaling Distortion</strong>: Linear models mathematically require strict feature standardization (Z-score normalization). If raw outliers are left untreated, they artificially inflate the standard deviation (σ) of the feature. Consequently, the normalized values of the vast majority of the data are compressed into an infinitesimally narrow band around zero, effectively erasing the predictive signal and preventing L1/L2 regularizations from operating fairly.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="dynamic-thresholding-strategy" class="level4">
|
||
<h4 class="anchored" data-anchor-id="dynamic-thresholding-strategy">Dynamic Thresholding Strategy</h4>
|
||
<p>To neutralize this threat without discarding valuable rows of data (which would disrupt the temporal continuity of the dataset), we implemented a rigorous thresholding strategy known as <strong>Winsorization</strong> (or <em>capping</em>).</p>
|
||
<p>Instead of applying arbitrary fixed values, the limits are dynamically calculated based on the empirical distribution of the data:</p>
|
||
<ul>
|
||
<li><p><strong>Single-Sided Capping</strong>: For strictly positive features characterized by severe right-skewness (e.g., <code>realized_vol_short</code>, <code>put_call_ratio_volume</code>, <code>strike_dispersion</code>), all values exceeding the <span class="math inline">\(99.5\)</span>th percentile are capped at that exact threshold.</p></li>
|
||
<li><p><strong>Dual-Sided Winsorization</strong>: For symmetrical features capable of taking both negative and positive values (e.g., <code>stress_spread</code>), values are constrained within the <span class="math inline">\(0.5\)</span>th and <span class="math inline">\(99.5\)</span>th percentiles.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="algorithmic-differentiation-and-data-leakage-prevention" class="level4">
|
||
<h4 class="anchored" data-anchor-id="algorithmic-differentiation-and-data-leakage-prevention">Algorithmic Differentiation and Data Leakage Prevention</h4>
|
||
<p>A fundamental MLOps principle enforced in this study is the strict prevention of data leakage. The <span class="math inline">\(99.5\)</span>th and <span class="math inline">\(0.5\)</span>th percentiles are computed exclusively on the training set. These “frozen” thresholds are subsequently applied to the validation and hidden test sets, ensuring that the out-of-sample evaluations remain statistically robust and untainted by future information.</p>
|
||
<p>Furthermore, acknowledging the divergent mechanics of machine learning algorithms, this capping procedure is intentionally isolated. As detailed in the architecture overview (Section 2.6), this <strong>Winsorization</strong> step is only applied to the linear and neural network pipelines (<code>$rec_linear$</code> and <code>$rec_pca$</code>). Tree-based models (<strong>LightGBM</strong>, <strong>XGBoost</strong>) operate via orthogonal, axis-aligned splits and naturally isolate extreme values into terminal leaves. Feeding them raw, un-capped data allows the boosting ensembles to fully capture the true non-linear magnitude of extreme market stress without suffering from the gradient distortion that afflicts linear architectures.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb25"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>calc_upper_limit <span class="ot"><-</span> <span class="cf">function</span>(x) {</span>
|
||
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">quantile</span>(x, <span class="fl">0.995</span>, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
|
||
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>}</span>
|
||
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a>calc_dual_limits <span class="ot"><-</span> <span class="cf">function</span>(x) {</span>
|
||
<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">quantile</span>(x, <span class="at">probs =</span> <span class="fu">c</span>(<span class="fl">0.005</span>, <span class="fl">0.995</span>), <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
|
||
<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a>}</span>
|
||
<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb25-9"><a href="#cb25-9" aria-hidden="true" tabindex="-1"></a>clip_max_func <span class="ot"><-</span> <span class="cf">function</span>(x, var_name, stats_list) {</span>
|
||
<span id="cb25-10"><a href="#cb25-10" aria-hidden="true" tabindex="-1"></a> limit <span class="ot"><-</span> stats_list[[var_name]]</span>
|
||
<span id="cb25-11"><a href="#cb25-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">pmin</span>(x, limit)</span>
|
||
<span id="cb25-12"><a href="#cb25-12" aria-hidden="true" tabindex="-1"></a>}</span>
|
||
<span id="cb25-13"><a href="#cb25-13" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb25-14"><a href="#cb25-14" aria-hidden="true" tabindex="-1"></a>clip_dual_func <span class="ot"><-</span> <span class="cf">function</span>(x, var_name, stats_list) {</span>
|
||
<span id="cb25-15"><a href="#cb25-15" aria-hidden="true" tabindex="-1"></a> limits <span class="ot"><-</span> stats_list[[var_name]]</span>
|
||
<span id="cb25-16"><a href="#cb25-16" aria-hidden="true" tabindex="-1"></a> <span class="fu">pmax</span>(<span class="fu">pmin</span>(x, limits[<span class="dv">2</span>]), limits[<span class="dv">1</span>])</span>
|
||
<span id="cb25-17"><a href="#cb25-17" aria-hidden="true" tabindex="-1"></a>}</span>
|
||
<span id="cb25-18"><a href="#cb25-18" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb25-19"><a href="#cb25-19" aria-hidden="true" tabindex="-1"></a>vars_clip_max <span class="ot"><-</span> <span class="fu">c</span>(</span>
|
||
<span id="cb25-20"><a href="#cb25-20" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_short"</span>,</span>
|
||
<span id="cb25-21"><a href="#cb25-21" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_mid"</span>,</span>
|
||
<span id="cb25-22"><a href="#cb25-22" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_long"</span>,</span>
|
||
<span id="cb25-23"><a href="#cb25-23" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_volume"</span>,</span>
|
||
<span id="cb25-24"><a href="#cb25-24" aria-hidden="true" tabindex="-1"></a> <span class="st">"call_volume"</span>,</span>
|
||
<span id="cb25-25"><a href="#cb25-25" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_oi"</span>,</span>
|
||
<span id="cb25-26"><a href="#cb25-26" aria-hidden="true" tabindex="-1"></a> <span class="st">"call_oi"</span>,</span>
|
||
<span id="cb25-27"><a href="#cb25-27" aria-hidden="true" tabindex="-1"></a> <span class="st">"strike_dispersion"</span>,</span>
|
||
<span id="cb25-28"><a href="#cb25-28" aria-hidden="true" tabindex="-1"></a> <span class="st">"total_contracts"</span>,</span>
|
||
<span id="cb25-29"><a href="#cb25-29" aria-hidden="true" tabindex="-1"></a> <span class="st">"pulse_ratio"</span>,</span>
|
||
<span id="cb25-30"><a href="#cb25-30" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_call_ratio_volume"</span>,</span>
|
||
<span id="cb25-31"><a href="#cb25-31" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_call_ratio_oi"</span>,</span>
|
||
<span id="cb25-32"><a href="#cb25-32" aria-hidden="true" tabindex="-1"></a> <span class="st">"liquidity_ratio"</span>,</span>
|
||
<span id="cb25-33"><a href="#cb25-33" aria-hidden="true" tabindex="-1"></a> <span class="st">"option_dispersion"</span>,</span>
|
||
<span id="cb25-34"><a href="#cb25-34" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_low_strike"</span></span>
|
||
<span id="cb25-35"><a href="#cb25-35" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb25-36"><a href="#cb25-36" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb25-37"><a href="#cb25-37" aria-hidden="true" tabindex="-1"></a>vars_clip_dual <span class="ot"><-</span> <span class="fu">c</span>(</span>
|
||
<span id="cb25-38"><a href="#cb25-38" aria-hidden="true" tabindex="-1"></a> <span class="st">"stress_spread"</span></span>
|
||
<span id="cb25-39"><a href="#cb25-39" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="feature-engineering" class="level3">
|
||
<h3 class="anchored" data-anchor-id="feature-engineering">Feature Engineering</h3>
|
||
<p>Prior to constructing advanced indicators, we streamlined the input space by averaging the granular mid-term and long-term realized volatility measures. This aggregation strategy mitigates multicollinearity among highly correlated historical horizons while preserving the integrity of the volatility signal.</p>
|
||
<p>Then, to capture the complex dynamics of option markets, we constructed a series of advanced financial indicators. These features are designed to isolate specific phenomena: market stress, liquidity constraints, asymmetric fear, or the structure of speculation.</p>
|
||
<section id="volatility-regime-indicators" class="level4">
|
||
<h4 class="anchored" data-anchor-id="volatility-regime-indicators">Volatility Regime Indicators</h4>
|
||
<section id="pulse-ratio" class="level5">
|
||
<h5 class="anchored" data-anchor-id="pulse-ratio">Pulse Ratio</h5>
|
||
<p>This indicator gives the direction of volatility, if it is currently increasing or decreasing. Absolute volatility data would not determine the tendancy of volatilty, because volatility levels differ significantly across underlying assets.</p>
|
||
<p>When there are massive markets moves, investors tend to panic and buy options for protection, which pushes implied volatility higher.</p>
|
||
<p><span class="math display">\[
|
||
\text{Pulse Ratio} = \dfrac{\text{Realized Short Volatility}}{\text{Realized Long Volatility}}
|
||
\]</span></p>
|
||
</section>
|
||
<section id="stress-spread" class="level5">
|
||
<h5 class="anchored" data-anchor-id="stress-spread">Stress Spread</h5>
|
||
<p>Stress Spread assesses whether we are aligned with the overall market stress represented by the VIX, the volatility index. This spread measures the underlying’s stress relative to market stress i.e. whether the underlying is more or less volatile than the market.</p>
|
||
<p>If the Stress Spread increases, the underlying becomes more volatile than the market. The underlying’s volatility is not only driven by markets conditions, but also by an idiosyncratic risk.</p>
|
||
<p><span class="math display">\[
|
||
\text{Stress Spread} = \text{Realized Short Volatility} - \text{Market Volatility Index}
|
||
\]</span></p>
|
||
</section>
|
||
</section>
|
||
<section id="market-sentiment-indicators" class="level4">
|
||
<h4 class="anchored" data-anchor-id="market-sentiment-indicators">Market Sentiment Indicators</h4>
|
||
<p>Sentiment analysis is based on the dichotomy between immediate flow (Volume) and the stock of positions (Open Interest).</p>
|
||
<section id="put-call-ratio-on-volume" class="level5">
|
||
<h5 class="anchored" data-anchor-id="put-call-ratio-on-volume">Put-Call Ratio on Volume</h5>
|
||
<p>The Put Call Volume Ratio is the indicator of immediate market stress. Volatility is skewed because fear is asymmetric, the fear of the downside is stronger than the desire to speculate on an upside.</p>
|
||
<p>A strong increase of the Put Call Volume Ratio means markets are panicking and therefore buying protection via put options. This higher demand leads to higher prices meaning higher implied volatility.</p>
|
||
<p><span class="math display">\[
|
||
\text{Put-Call Ratio Volume} = \frac{\text{Put Volume}}{\text{Call Volume}}
|
||
\]</span></p>
|
||
</section>
|
||
<section id="put-call-ratio-on-open-interest" class="level5">
|
||
<h5 class="anchored" data-anchor-id="put-call-ratio-on-open-interest">Put-Call Ratio on Open Interest</h5>
|
||
<p>This is the indicator of “risk structure” or long-term conviction.</p>
|
||
<p><span class="math display">\[
|
||
\text{Put-Call Ratio OI} = \frac{\text{Put Open Interest}}{\text{Call Open Interest}}
|
||
\]</span></p>
|
||
</section>
|
||
<section id="put-proportion" class="level5">
|
||
<h5 class="anchored" data-anchor-id="put-proportion">Put Proportion</h5>
|
||
<p>Put Proportion Ratio measures the balance between put buyers and call buyers. It detects whether the trading activity is dominated by hedging or by market optimism.</p>
|
||
<p>Volatility is skewed because fear is asymmetric, when put buying surges, it signals urgent hedging, leading to a higher implied volatility. On the other hand, when call buying surges, implied volatility will increase but less than for puts.</p>
|
||
<p><span class="math display">\[
|
||
\text{Put Proportion} = \frac{\text{Put Volume}}{\text{Total Volume}}
|
||
\]</span></p>
|
||
</section>
|
||
</section>
|
||
<section id="structure-and-liquidity-indicators" class="level4">
|
||
<h4 class="anchored" data-anchor-id="structure-and-liquidity-indicators">Structure and Liquidity Indicators</h4>
|
||
<section id="liquidity-ratio" class="level5">
|
||
<h5 class="anchored" data-anchor-id="liquidity-ratio">Liquidity Ratio</h5>
|
||
<p>The Liquidity Ratio measures the speed at which contracts are being exchanged compared to the total number of contracts outstanding in the market. It helps understand if markets are overheating or experiencing a blockage.</p>
|
||
<p>The Liquidity Ratio showcases that volatility is also impacted by liquidity of the underlying asset. Unusual high activity amplifies price movements whereas unusual low activity means that even small order can shift the price up or down. In both cases, the imbalance between flow and market depth justifies a higher volatility forecast.</p>
|
||
<p><span class="math display">\[
|
||
\text{Liquidity Ratio} = \frac{\text{Total Volume}}{\text{Total Open Interest}}
|
||
\]</span></p>
|
||
</section>
|
||
<section id="option-dispersion" class="level5">
|
||
<h5 class="anchored" data-anchor-id="option-dispersion">Option Dispersion</h5>
|
||
<p>Option Dispersion indicates if activity is concentrated around a specific strike i.e. a specific scenario.</p>
|
||
<p>Concentrated markets means that liquidity is high in a certain strike, order flow is predictable and the spread, that can be seen as a uncertainty premium, decrease.</p>
|
||
<p>Orders split across a very large number of strikes mean the market struggles to determine the direction of the underlying. This results in lower liquidity, meaning the market has a lower capacity to absorb large orders.</p>
|
||
<p><span class="math display">\[
|
||
\text{Option Dispersion} = \frac{\text{Strike Dispersion}}{\text{Total Contracts}}
|
||
\]</span></p>
|
||
</section>
|
||
<section id="put-low-strike-liquidity-trap" class="level5">
|
||
<h5 class="anchored" data-anchor-id="put-low-strike-liquidity-trap">Put Low Strike (Liquidity Trap)</h5>
|
||
<p>This indicator focuses on the density of “Out-of-the-Money” protection.</p>
|
||
<p>It helps identify “breaking points.” If a mass of contracts concentrates on a specific strike (high ratio), it creates a “trap”. If the asset price approaches this level, the forced hedging mechanisms of market makers can trigger a volatility explosion via a snowball effect.</p>
|
||
<p><span class="math display">\[
|
||
\text{Put Low Strike} = \frac{\text{Strike Dispersion}}{\text{Put Open Interest}}
|
||
\]</span></p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb26"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>create_features <span class="ot"><-</span> <span class="cf">function</span>(df) {</span>
|
||
<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a> epsilon <span class="ot"><-</span> <span class="fl">1e-6</span></span>
|
||
<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a> df_enriched <span class="ot"><-</span> df <span class="sc">|></span></span>
|
||
<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
|
||
<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a> <span class="at">realized_vol_mid =</span> (realized_vol_mid1 <span class="sc">+</span></span>
|
||
<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a> realized_vol_mid2 <span class="sc">+</span></span>
|
||
<span id="cb26-8"><a href="#cb26-8" aria-hidden="true" tabindex="-1"></a> realized_vol_mid3) <span class="sc">/</span></span>
|
||
<span id="cb26-9"><a href="#cb26-9" aria-hidden="true" tabindex="-1"></a> <span class="dv">3</span>,</span>
|
||
<span id="cb26-10"><a href="#cb26-10" aria-hidden="true" tabindex="-1"></a> <span class="at">realized_vol_long =</span> (realized_vol_long1 <span class="sc">+</span></span>
|
||
<span id="cb26-11"><a href="#cb26-11" aria-hidden="true" tabindex="-1"></a> realized_vol_long2 <span class="sc">+</span></span>
|
||
<span id="cb26-12"><a href="#cb26-12" aria-hidden="true" tabindex="-1"></a> realized_vol_long3 <span class="sc">+</span></span>
|
||
<span id="cb26-13"><a href="#cb26-13" aria-hidden="true" tabindex="-1"></a> realized_vol_long4) <span class="sc">/</span></span>
|
||
<span id="cb26-14"><a href="#cb26-14" aria-hidden="true" tabindex="-1"></a> <span class="dv">4</span>,</span>
|
||
<span id="cb26-15"><a href="#cb26-15" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|></span></span>
|
||
<span id="cb26-16"><a href="#cb26-16" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(</span>
|
||
<span id="cb26-17"><a href="#cb26-17" aria-hidden="true" tabindex="-1"></a> <span class="sc">-</span>realized_vol_mid1,</span>
|
||
<span id="cb26-18"><a href="#cb26-18" aria-hidden="true" tabindex="-1"></a> <span class="sc">-</span>realized_vol_mid2,</span>
|
||
<span id="cb26-19"><a href="#cb26-19" aria-hidden="true" tabindex="-1"></a> <span class="sc">-</span>realized_vol_mid3,</span>
|
||
<span id="cb26-20"><a href="#cb26-20" aria-hidden="true" tabindex="-1"></a> <span class="sc">-</span>realized_vol_long1,</span>
|
||
<span id="cb26-21"><a href="#cb26-21" aria-hidden="true" tabindex="-1"></a> <span class="sc">-</span>realized_vol_long2,</span>
|
||
<span id="cb26-22"><a href="#cb26-22" aria-hidden="true" tabindex="-1"></a> <span class="sc">-</span>realized_vol_long3,</span>
|
||
<span id="cb26-23"><a href="#cb26-23" aria-hidden="true" tabindex="-1"></a> <span class="sc">-</span>realized_vol_long4</span>
|
||
<span id="cb26-24"><a href="#cb26-24" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|></span></span>
|
||
<span id="cb26-25"><a href="#cb26-25" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
|
||
<span id="cb26-26"><a href="#cb26-26" aria-hidden="true" tabindex="-1"></a> <span class="at">pulse_ratio =</span> realized_vol_short <span class="sc">/</span> (realized_vol_long <span class="sc">+</span> epsilon),</span>
|
||
<span id="cb26-27"><a href="#cb26-27" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb26-28"><a href="#cb26-28" aria-hidden="true" tabindex="-1"></a> <span class="at">put_call_ratio_volume =</span> put_volume <span class="sc">/</span> (call_volume <span class="sc">+</span> epsilon),</span>
|
||
<span id="cb26-29"><a href="#cb26-29" aria-hidden="true" tabindex="-1"></a> <span class="at">put_call_ratio_oi =</span> put_oi <span class="sc">/</span> (call_oi <span class="sc">+</span> epsilon),</span>
|
||
<span id="cb26-30"><a href="#cb26-30" aria-hidden="true" tabindex="-1"></a> <span class="at">liquidity_ratio =</span> (put_volume <span class="sc">+</span> call_volume) <span class="sc">/</span></span>
|
||
<span id="cb26-31"><a href="#cb26-31" aria-hidden="true" tabindex="-1"></a> (call_oi <span class="sc">+</span> put_oi <span class="sc">+</span> epsilon),</span>
|
||
<span id="cb26-32"><a href="#cb26-32" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb26-33"><a href="#cb26-33" aria-hidden="true" tabindex="-1"></a> <span class="at">option_dispersion =</span> strike_dispersion <span class="sc">/</span> (total_contracts <span class="sc">+</span> epsilon),</span>
|
||
<span id="cb26-34"><a href="#cb26-34" aria-hidden="true" tabindex="-1"></a> <span class="at">put_low_strike =</span> strike_dispersion <span class="sc">/</span> (put_oi <span class="sc">+</span> epsilon),</span>
|
||
<span id="cb26-35"><a href="#cb26-35" aria-hidden="true" tabindex="-1"></a> <span class="at">put_proportion =</span> put_volume <span class="sc">/</span> (put_volume <span class="sc">+</span> call_volume <span class="sc">+</span> epsilon),</span>
|
||
<span id="cb26-36"><a href="#cb26-36" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb26-37"><a href="#cb26-37" aria-hidden="true" tabindex="-1"></a> <span class="at">stress_spread =</span> realized_vol_short <span class="sc">-</span> market_vol_index,</span>
|
||
<span id="cb26-38"><a href="#cb26-38" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb26-39"><a href="#cb26-39" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb26-40"><a href="#cb26-40" aria-hidden="true" tabindex="-1"></a> <span class="fu">return</span>(df_enriched)</span>
|
||
<span id="cb26-41"><a href="#cb26-41" aria-hidden="true" tabindex="-1"></a>}</span>
|
||
<span id="cb26-42"><a href="#cb26-42" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb26-43"><a href="#cb26-43" aria-hidden="true" tabindex="-1"></a>train_eng <span class="ot"><-</span> <span class="fu">create_features</span>(train)</span>
|
||
<span id="cb26-44"><a href="#cb26-44" aria-hidden="true" tabindex="-1"></a>val_eng <span class="ot"><-</span> <span class="fu">create_features</span>(val)</span>
|
||
<span id="cb26-45"><a href="#cb26-45" aria-hidden="true" tabindex="-1"></a>test_eng <span class="ot"><-</span> <span class="fu">create_features</span>(test)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="data-scaling-normalization" class="level3">
|
||
<h3 class="anchored" data-anchor-id="data-scaling-normalization">Data Scaling & Normalization</h3>
|
||
<p>Following the isolation and neutralization of extreme outliers (<strong>Winsorization</strong>), the core distributions of the financial predictors remain inherently skewed. Standard financial variables, such as realized volatility and trading volumes, are strictly positive and typically follow log-normal distributions. While tree-based ensembles are indifferent to such monotonic skewness, linear frameworks, distance-based algorithms, and Neural Networks require symmetric, standardized feature spaces to ensure gradient stability and unbiased coefficient estimation.</p>
|
||
<p>To fulfill these strict algorithmic prerequisites, we engineered a multi-step transformation pipeline utilizing targeted statistical mapping.</p>
|
||
<section id="asymmetry-correction-logarithmic-and-power-transformations" class="level4">
|
||
<h4 class="anchored" data-anchor-id="asymmetry-correction-logarithmic-and-power-transformations">Asymmetry Correction: Logarithmic and Power Transformations</h4>
|
||
<p>The first objective of the transformation phase is to center the data mass and correct severe right-skewness, thereby approximating a Gaussian distribution. We employed two distinct mathematical approaches depending on the domain of the predictors:</p>
|
||
<ul>
|
||
<li><p><strong>1. Logarithmic Transformation for Bounded Features</strong>: For strictly positive, highly skewed variables (e.g., <code>realized_vol_short</code>, <code>put_call_ratio_volume</code>, <code>strike_dispersion</code>), a natural logarithmic transformation <span class="math inline">\(f(x) = \log(x + c)\)</span> was applied. This mapping exponentially compresses the long right tail while expanding the lower bounds, stabilizing the variance across the feature space (homoscedasticity). To prevent mathematically undefined operations (<span class="math inline">\(\log(0)\)</span>) on features like volumes, a strictly positive offset (<span class="math inline">\(c = 1\)</span>) was systematically added prior to transformation.</p></li>
|
||
<li><p><strong>2. Yeo-Johnson Transformation for Unbounded Features</strong>: Certain engineered features, such as the <code>stress_spread</code> and <code>vol_slope</code>, cross the zero-bound, taking both negative and positive values depending on market contango or backwardation regimes. The standard logarithmic or Box-Cox transformations are mathematically invalid for negative inputs. Consequently, we applied the <strong>Yeo-Johnson</strong> power transformation, a robust generalization of Box-Cox that smoothly handles the entire real line <span class="math inline">\(\mathbb{R}\)</span>. The optimal transformation parameter (<span class="math inline">\(\lambda\)</span>) for each variable was estimated strictly via maximum likelihood on the training set.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="feature-standardization-z-score-normalization" class="level4">
|
||
<h4 class="anchored" data-anchor-id="feature-standardization-z-score-normalization">Feature Standardization (<span class="math inline">\(Z\)</span>-Score Normalization)</h4>
|
||
<p>Once the distributions were geometrically symmetrized, the final mathematical prerequisite for the linear and neural pipelines was absolute scale homogenization.</p>
|
||
<p>Financial features are inherently measured in vastly different units: implied volatility is expressed as a percentage, strike dispersion in nominal currency, and ratios as unitless scalars. If passed raw to a penalized regression (such as Elastic Net), the algorithm’s objective function would unfairly penalize features with large absolute numerical values, while ignoring the coefficients of small-scale variables, regardless of their actual predictive power.</p>
|
||
<p>To enforce mathematical equity, a strict <span class="math inline">\(Z\)</span>-score normalization was applied to all numerical predictors:</p>
|
||
<p><span class="math inline">\(z = \frac{x - \mu}{\sigma}\)</span></p>
|
||
<p>This operation rescales every feature to a mean of zero (<span class="math inline">\(\mu = 0\)</span>) and a unit variance (<span class="math inline">\(\sigma = 1\)</span>). Consequently, the <span class="math inline">\(L_1\)</span> (Lasso) and <span class="math inline">\(L_2\)</span> (Ridge) regularization penalties evaluate the features purely on their predictive signal rather than their arbitrary measurement scale.</p>
|
||
</section>
|
||
<section id="algorithmic-execution-and-methodological-integrity" class="level4">
|
||
<h4 class="anchored" data-anchor-id="algorithmic-execution-and-methodological-integrity">Algorithmic Execution and Methodological Integrity</h4>
|
||
<p>Consistent with our strict temporal isolation strategy, the parameters governing these transformations, namely the <strong>Yeo-Johnson</strong> <span class="math inline">\(\lambda\)</span> values, the empirical means <span class="math inline">\(\mu\)</span>, and the standard deviations <span class="math inline">\(\sigma\)</span>, were computed exclusively on the training set. These static parameters were encapsulated within the tidymodels recipe state and deterministically projected onto the validation and test sets via the <code>$step_log$</code>, <code>$step_YeoJohnson$</code>, and <code>$step_normalize$</code> functions.</p>
|
||
<p>Furthermore, as established in our architectural blueprint (Section 2.6), these transformations were explicitly omitted from the tree-based pipeline (<code>$rec_tree$</code>) to preserve the natural, interpretable scale of the financial metrics for post-hoc SHAP analysis.</p>
|
||
</section>
|
||
</section>
|
||
<section id="exploratory-data-analysis-eda" class="level3">
|
||
<h3 class="anchored" data-anchor-id="exploratory-data-analysis-eda">Exploratory Data Analysis (EDA)</h3>
|
||
<section id="target-variable-analysis" class="level4">
|
||
<h4 class="anchored" data-anchor-id="target-variable-analysis">Target Variable Analysis</h4>
|
||
<p>Before implementing any regression model, we conducted a thorough statistical analysis of the target variable, . Understanding the distribution of the target is crucial, as linear models generally assume that the residuals follow a normal distribution.</p>
|
||
<p>We first plotted the summary and the empirical density of the target. The initial histogram revealed a strictly positive, right-skewed distribution with a heavy tail, characteristic of financial volatility data.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb27"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>target <span class="ot"><-</span> train_eng<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(target)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max.
|
||
1.00 28.84 41.33 47.15 59.22 149.00 </code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb29"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>target_dist <span class="ot"><-</span> <span class="fu">ggplot</span>(train_eng, <span class="fu">aes</span>(<span class="at">x =</span> target)) <span class="sc">+</span></span>
|
||
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_histogram</span>(</span>
|
||
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">y =</span> ..density..),</span>
|
||
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a> <span class="at">binwidth =</span> <span class="dv">1</span>,</span>
|
||
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a> <span class="at">fill =</span> <span class="st">"#E4CBF9"</span>,</span>
|
||
<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="st">"white"</span>,</span>
|
||
<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a> <span class="at">lwd =</span> <span class="fl">0.1</span>,</span>
|
||
<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a> <span class="at">alpha =</span> <span class="fl">0.8</span></span>
|
||
<span id="cb29-9"><a href="#cb29-9" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span> <span class="co">#draw the histogram</span></span>
|
||
<span id="cb29-10"><a href="#cb29-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="at">stat =</span> <span class="st">"density"</span>, <span class="at">color =</span> <span class="st">"#983399"</span>, <span class="at">size =</span> <span class="dv">1</span>)</span>
|
||
<span id="cb29-11"><a href="#cb29-11" aria-hidden="true" tabindex="-1"></a><span class="fu">theme_minimal</span>() <span class="sc">+</span></span>
|
||
<span id="cb29-12"><a href="#cb29-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(<span class="at">title =</span> <span class="st">"Target distribution"</span>, <span class="at">x =</span> <span class="st">"Target"</span>, <span class="at">y =</span> <span class="st">"Density"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code><theme> List of 146
|
||
$ line : <ggplot2::element_line>
|
||
..@ colour : chr "black"
|
||
..@ linewidth : num 0.5
|
||
..@ linetype : num 1
|
||
..@ lineend : chr "butt"
|
||
..@ linejoin : chr "round"
|
||
..@ arrow : logi FALSE
|
||
..@ arrow.fill : chr "black"
|
||
..@ inherit.blank: logi TRUE
|
||
$ rect : <ggplot2::element_rect>
|
||
..@ fill : chr "white"
|
||
..@ colour : chr "black"
|
||
..@ linewidth : num 0.5
|
||
..@ linetype : num 1
|
||
..@ linejoin : chr "round"
|
||
..@ inherit.blank: logi TRUE
|
||
$ text : <ggplot2::element_text>
|
||
..@ family : chr ""
|
||
..@ face : chr "plain"
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : chr "black"
|
||
..@ size : num 11
|
||
..@ hjust : num 0.5
|
||
..@ vjust : num 0.5
|
||
..@ angle : num 0
|
||
..@ lineheight : num 0.9
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 0 0 0
|
||
..@ debug : logi FALSE
|
||
..@ inherit.blank: logi TRUE
|
||
$ title : chr "Target distribution"
|
||
$ point : <ggplot2::element_point>
|
||
..@ colour : chr "black"
|
||
..@ shape : num 19
|
||
..@ size : num 1.5
|
||
..@ fill : chr "white"
|
||
..@ stroke : num 0.5
|
||
..@ inherit.blank: logi TRUE
|
||
$ polygon : <ggplot2::element_polygon>
|
||
..@ fill : chr "white"
|
||
..@ colour : chr "black"
|
||
..@ linewidth : num 0.5
|
||
..@ linetype : num 1
|
||
..@ linejoin : chr "round"
|
||
..@ inherit.blank: logi TRUE
|
||
$ geom : <ggplot2::element_geom>
|
||
..@ ink : chr "black"
|
||
..@ paper : chr "white"
|
||
..@ accent : chr "#3366FF"
|
||
..@ linewidth : num 0.5
|
||
..@ borderwidth: num 0.5
|
||
..@ linetype : int 1
|
||
..@ bordertype : int 1
|
||
..@ family : chr ""
|
||
..@ fontsize : num 3.87
|
||
..@ pointsize : num 1.5
|
||
..@ pointshape : num 19
|
||
..@ colour : NULL
|
||
..@ fill : NULL
|
||
$ spacing : 'simpleUnit' num 5.5points
|
||
..- attr(*, "unit")= int 8
|
||
$ margins : <ggplot2::margin> num [1:4] 5.5 5.5 5.5 5.5
|
||
$ aspect.ratio : NULL
|
||
$ axis.title : NULL
|
||
$ axis.title.x : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : num 1
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 2.75 0 0 0
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.title.x.top : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : num 0
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 0 2.75 0
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.title.x.bottom : NULL
|
||
$ axis.title.y : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : num 1
|
||
..@ angle : num 90
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 2.75 0 0
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.title.y.left : NULL
|
||
$ axis.title.y.right : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : num 1
|
||
..@ angle : num -90
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 0 0 2.75
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.text : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : chr "#4D4D4DFF"
|
||
..@ size : 'rel' num 0.8
|
||
..@ hjust : NULL
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : NULL
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.text.x : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : num 1
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 2.2 0 0 0
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.text.x.top : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 0 4.95 0
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.text.x.bottom : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 4.95 0 0 0
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.text.y : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : num 1
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 2.2 0 0
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.text.y.left : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 4.95 0 0
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.text.y.right : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : NULL
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 0 0 4.95
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.text.theta : NULL
|
||
$ axis.text.r : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : num 0.5
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : <ggplot2::margin> num [1:4] 0 2.2 0 2.2
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ axis.ticks : <ggplot2::element_blank>
|
||
$ axis.ticks.x : NULL
|
||
$ axis.ticks.x.top : NULL
|
||
$ axis.ticks.x.bottom : NULL
|
||
$ axis.ticks.y : NULL
|
||
$ axis.ticks.y.left : NULL
|
||
$ axis.ticks.y.right : NULL
|
||
$ axis.ticks.theta : NULL
|
||
$ axis.ticks.r : NULL
|
||
$ axis.minor.ticks.x.top : NULL
|
||
$ axis.minor.ticks.x.bottom : NULL
|
||
$ axis.minor.ticks.y.left : NULL
|
||
$ axis.minor.ticks.y.right : NULL
|
||
$ axis.minor.ticks.theta : NULL
|
||
$ axis.minor.ticks.r : NULL
|
||
$ axis.ticks.length : 'rel' num 0.5
|
||
$ axis.ticks.length.x : NULL
|
||
$ axis.ticks.length.x.top : NULL
|
||
$ axis.ticks.length.x.bottom : NULL
|
||
$ axis.ticks.length.y : NULL
|
||
$ axis.ticks.length.y.left : NULL
|
||
$ axis.ticks.length.y.right : NULL
|
||
$ axis.ticks.length.theta : NULL
|
||
$ axis.ticks.length.r : NULL
|
||
$ axis.minor.ticks.length : 'rel' num 0.75
|
||
$ axis.minor.ticks.length.x : NULL
|
||
$ axis.minor.ticks.length.x.top : NULL
|
||
$ axis.minor.ticks.length.x.bottom: NULL
|
||
$ axis.minor.ticks.length.y : NULL
|
||
$ axis.minor.ticks.length.y.left : NULL
|
||
$ axis.minor.ticks.length.y.right : NULL
|
||
$ axis.minor.ticks.length.theta : NULL
|
||
$ axis.minor.ticks.length.r : NULL
|
||
$ axis.line : <ggplot2::element_blank>
|
||
$ axis.line.x : NULL
|
||
$ axis.line.x.top : NULL
|
||
$ axis.line.x.bottom : NULL
|
||
$ axis.line.y : NULL
|
||
$ axis.line.y.left : NULL
|
||
$ axis.line.y.right : NULL
|
||
$ axis.line.theta : NULL
|
||
$ axis.line.r : NULL
|
||
$ legend.background : <ggplot2::element_blank>
|
||
$ legend.margin : NULL
|
||
$ legend.spacing : 'rel' num 2
|
||
$ legend.spacing.x : NULL
|
||
$ legend.spacing.y : NULL
|
||
$ legend.key : <ggplot2::element_blank>
|
||
$ legend.key.size : 'simpleUnit' num 1.2lines
|
||
..- attr(*, "unit")= int 3
|
||
$ legend.key.height : NULL
|
||
$ legend.key.width : NULL
|
||
$ legend.key.spacing : NULL
|
||
$ legend.key.spacing.x : NULL
|
||
$ legend.key.spacing.y : NULL
|
||
$ legend.key.justification : NULL
|
||
$ legend.frame : NULL
|
||
$ legend.ticks : NULL
|
||
$ legend.ticks.length : 'rel' num 0.2
|
||
$ legend.axis.line : NULL
|
||
$ legend.text : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : 'rel' num 0.8
|
||
..@ hjust : NULL
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : NULL
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ legend.text.position : NULL
|
||
$ legend.title : <ggplot2::element_text>
|
||
..@ family : NULL
|
||
..@ face : NULL
|
||
..@ italic : chr NA
|
||
..@ fontweight : num NA
|
||
..@ fontwidth : num NA
|
||
..@ colour : NULL
|
||
..@ size : NULL
|
||
..@ hjust : num 0
|
||
..@ vjust : NULL
|
||
..@ angle : NULL
|
||
..@ lineheight : NULL
|
||
..@ margin : NULL
|
||
..@ debug : NULL
|
||
..@ inherit.blank: logi TRUE
|
||
$ legend.title.position : NULL
|
||
$ legend.position : chr "right"
|
||
$ legend.position.inside : NULL
|
||
$ legend.direction : NULL
|
||
$ legend.byrow : NULL
|
||
$ legend.justification : chr "center"
|
||
$ legend.justification.top : NULL
|
||
$ legend.justification.bottom : NULL
|
||
$ legend.justification.left : NULL
|
||
$ legend.justification.right : NULL
|
||
$ legend.justification.inside : NULL
|
||
[list output truncated]
|
||
@ complete: logi TRUE
|
||
@ validate: logi TRUE</code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb31"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplotly</span>(target_dist)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="plotly html-widget html-fill-item" id="htmlwidget-8ccba399c016fdba09cc" style="width:100%;height:464px;"></div>
|
||
<script type="application/json" data-for="htmlwidget-8ccba399c016fdba09cc">{"x":{"data":[{"orientation":"v","width":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],"base":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"x":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149],"y":[0.00015392301501271171,0.00030132386837234239,0.00038350310520116304,0.00053612168788325857,0.0011961644471750561,0.0017264161895705417,0.0030660681931133798,0.0030438928434929047,0.002826052644279999,0.0028501846423963986,0.0033484777926917875,0.0043131055011824682,0.0056455831269069173,0.0062390998373372886,0.0072500348935648443,0.0088323113106022963,0.0097480228066948692,0.011073978270766236,0.012284491473578071,0.01356218294141664,0.01457833572696666,0.015850157249317456,0.017195679198348066,0.018500763745129576,0.019040798730004683,0.019756279863347669,0.020010644167817827,0.02011238988960589,0.020245441987328746,0.020348492141447422,0.020054994867058779,0.020593725419603269,0.020505676237286677,0.020795260214683472,0.020663512549291236,0.020429366945945628,0.020172393776814238,0.019678013923510696,0.019276248765680907,0.018818393017634622,0.018153132529020358,0.018050734591066986,0.017780064882464124,0.01712523985249479,0.016584552651454375,0.016211485004898142,0.015528614679820562,0.014847701003238905,0.014439413683756034,0.013881768862417609,0.013565444022243181,0.013045627738492624,0.012589076422776953,0.012193833426600244,0.011642058550749592,0.011238336744423878,0.010719172676838629,0.010352627191935478,0.0098373764213420777,0.0095053983932002553,0.0090977632898826925,0.0087038247260365995,0.0084135885324744947,0.0080594351547121964,0.0074770061190920631,0.0071848132770340337,0.0070002361022518415,0.0066721713711018673,0.0063297578843151143,0.0061412674125410731,0.005865379974615747,0.0058458134896565038,0.0057231968505785814,0.0054975300573819781,0.005298604126963008,0.0050983737642134206,0.004904665563116915,0.0047442203864511224,0.0046307347736875126,0.0042361439936761123,0.0041878799974433122,0.0039791708245447205,0.0039269935313200723,0.0037202410069174046,0.0036367573377579679,0.0034573978922982401,0.003418264922379754,0.0033086926066079932,0.0031932503453484594,0.0029917155502682566,0.0029277983660680627,0.0027914851875186697,0.002728220219483784,0.002669520764606055,0.0026069080127364775,0.0025038578586177975,0.0024177653247971281,0.002364283599241864,0.0022547112834701032,0.0021718798304759743,0.0021255724827390992,0.0020936138906390025,0.0019912159526856306,0.0019573007120896094,0.0018836002854097939,0.0017485915391910172,0.0017570703493400225,0.0016488024658988778,0.0017257639734052335,0.0016024951181620028,0.0015392301501271168,0.0015235769621597226,0.0014798784790840797,0.0014492243193145991,0.0013592184885020812,0.0013174766539223628,0.0012718215223507957,0.0012405151464160069,0.0011752935298851969,0.0011792068268770455,0.0011002886708747653,0.0010800699697502143,0.0010422414321623445,0.0010246315956990257,0.0009633232761600643,0.00095745333067229134,0.00088049182316593554,0.00084331550174337382,0.00079113820851872576,0.00076178848107986128,0.0007135244848470618,0.00066265162395302997,0.00063851962583663028,0.00062351865403454396,0.00057851573862828509,0.00056155811833027449,0.0005048153119484697,0.00047546558450960516,0.00043633261459111915,0.00040633067098694655,0.00038415532136647115,0.00034176127062144463,0.000318281488670353,0.00030001943604172617,0.0002850184642396399,0.00025632095296608347,0.00024131998116399715,0.00019827371425366251,7.8918156002280151e-05],"text":["target: 1<br />density: 1.539230e-04","target: 2<br />density: 3.013239e-04","target: 3<br />density: 3.835031e-04","target: 4<br />density: 5.361217e-04","target: 5<br />density: 1.196164e-03","target: 6<br />density: 1.726416e-03","target: 7<br />density: 3.066068e-03","target: 8<br />density: 3.043893e-03","target: 9<br />density: 2.826053e-03","target: 10<br />density: 2.850185e-03","target: 11<br />density: 3.348478e-03","target: 12<br />density: 4.313106e-03","target: 13<br />density: 5.645583e-03","target: 14<br />density: 6.239100e-03","target: 15<br />density: 7.250035e-03","target: 16<br />density: 8.832311e-03","target: 17<br />density: 9.748023e-03","target: 18<br />density: 1.107398e-02","target: 19<br />density: 1.228449e-02","target: 20<br />density: 1.356218e-02","target: 21<br />density: 1.457834e-02","target: 22<br />density: 1.585016e-02","target: 23<br />density: 1.719568e-02","target: 24<br />density: 1.850076e-02","target: 25<br />density: 1.904080e-02","target: 26<br />density: 1.975628e-02","target: 27<br />density: 2.001064e-02","target: 28<br />density: 2.011239e-02","target: 29<br />density: 2.024544e-02","target: 30<br />density: 2.034849e-02","target: 31<br />density: 2.005499e-02","target: 32<br />density: 2.059373e-02","target: 33<br />density: 2.050568e-02","target: 34<br />density: 2.079526e-02","target: 35<br />density: 2.066351e-02","target: 36<br />density: 2.042937e-02","target: 37<br />density: 2.017239e-02","target: 38<br />density: 1.967801e-02","target: 39<br />density: 1.927625e-02","target: 40<br />density: 1.881839e-02","target: 41<br />density: 1.815313e-02","target: 42<br />density: 1.805073e-02","target: 43<br />density: 1.778006e-02","target: 44<br />density: 1.712524e-02","target: 45<br />density: 1.658455e-02","target: 46<br />density: 1.621149e-02","target: 47<br />density: 1.552861e-02","target: 48<br />density: 1.484770e-02","target: 49<br />density: 1.443941e-02","target: 50<br />density: 1.388177e-02","target: 51<br />density: 1.356544e-02","target: 52<br />density: 1.304563e-02","target: 53<br />density: 1.258908e-02","target: 54<br />density: 1.219383e-02","target: 55<br />density: 1.164206e-02","target: 56<br />density: 1.123834e-02","target: 57<br />density: 1.071917e-02","target: 58<br />density: 1.035263e-02","target: 59<br />density: 9.837376e-03","target: 60<br />density: 9.505398e-03","target: 61<br />density: 9.097763e-03","target: 62<br />density: 8.703825e-03","target: 63<br />density: 8.413589e-03","target: 64<br />density: 8.059435e-03","target: 65<br />density: 7.477006e-03","target: 66<br />density: 7.184813e-03","target: 67<br />density: 7.000236e-03","target: 68<br />density: 6.672171e-03","target: 69<br />density: 6.329758e-03","target: 70<br />density: 6.141267e-03","target: 71<br />density: 5.865380e-03","target: 72<br />density: 5.845813e-03","target: 73<br />density: 5.723197e-03","target: 74<br />density: 5.497530e-03","target: 75<br />density: 5.298604e-03","target: 76<br />density: 5.098374e-03","target: 77<br />density: 4.904666e-03","target: 78<br />density: 4.744220e-03","target: 79<br />density: 4.630735e-03","target: 80<br />density: 4.236144e-03","target: 81<br />density: 4.187880e-03","target: 82<br />density: 3.979171e-03","target: 83<br />density: 3.926994e-03","target: 84<br />density: 3.720241e-03","target: 85<br />density: 3.636757e-03","target: 86<br />density: 3.457398e-03","target: 87<br />density: 3.418265e-03","target: 88<br />density: 3.308693e-03","target: 89<br />density: 3.193250e-03","target: 90<br />density: 2.991716e-03","target: 91<br />density: 2.927798e-03","target: 92<br />density: 2.791485e-03","target: 93<br />density: 2.728220e-03","target: 94<br />density: 2.669521e-03","target: 95<br />density: 2.606908e-03","target: 96<br />density: 2.503858e-03","target: 97<br />density: 2.417765e-03","target: 98<br />density: 2.364284e-03","target: 99<br />density: 2.254711e-03","target: 100<br />density: 2.171880e-03","target: 101<br />density: 2.125572e-03","target: 102<br />density: 2.093614e-03","target: 103<br />density: 1.991216e-03","target: 104<br />density: 1.957301e-03","target: 105<br />density: 1.883600e-03","target: 106<br />density: 1.748592e-03","target: 107<br />density: 1.757070e-03","target: 108<br />density: 1.648802e-03","target: 109<br />density: 1.725764e-03","target: 110<br />density: 1.602495e-03","target: 111<br />density: 1.539230e-03","target: 112<br />density: 1.523577e-03","target: 113<br />density: 1.479878e-03","target: 114<br />density: 1.449224e-03","target: 115<br />density: 1.359218e-03","target: 116<br />density: 1.317477e-03","target: 117<br />density: 1.271822e-03","target: 118<br />density: 1.240515e-03","target: 119<br />density: 1.175294e-03","target: 120<br />density: 1.179207e-03","target: 121<br />density: 1.100289e-03","target: 122<br />density: 1.080070e-03","target: 123<br />density: 1.042241e-03","target: 124<br />density: 1.024632e-03","target: 125<br />density: 9.633233e-04","target: 126<br />density: 9.574533e-04","target: 127<br />density: 8.804918e-04","target: 128<br />density: 8.433155e-04","target: 129<br />density: 7.911382e-04","target: 130<br />density: 7.617885e-04","target: 131<br />density: 7.135245e-04","target: 132<br />density: 6.626516e-04","target: 133<br />density: 6.385196e-04","target: 134<br />density: 6.235187e-04","target: 135<br />density: 5.785157e-04","target: 136<br />density: 5.615581e-04","target: 137<br />density: 5.048153e-04","target: 138<br />density: 4.754656e-04","target: 139<br />density: 4.363326e-04","target: 140<br />density: 4.063307e-04","target: 141<br />density: 3.841553e-04","target: 142<br />density: 3.417613e-04","target: 143<br />density: 3.182815e-04","target: 144<br />density: 3.000194e-04","target: 145<br />density: 2.850185e-04","target: 146<br />density: 2.563210e-04","target: 147<br />density: 2.413200e-04","target: 148<br />density: 1.982737e-04","target: 149<br />density: 7.891816e-05"],"type":"bar","textposition":"none","marker":{"autocolorscale":false,"color":"rgba(228,203,249,0.8)","line":{"width":0.37795275590551186,"color":"rgba(255,255,255,1)"}},"showlegend":false,"xaxis":"x","yaxis":"y","hoverinfo":"text","frame":null},{"x":[1,1.2896281800391389,1.5792563600782779,1.8688845401174168,2.1585127201565557,2.4481409001956944,2.7377690802348336,3.0273972602739727,3.3170254403131114,3.6066536203522501,3.8962818003913893,4.1859099804305284,4.4755381604696671,4.7651663405088058,5.0547945205479454,5.3444227005870841,5.6340508806262228,5.9236790606653615,6.2133072407045002,6.5029354207436398,6.7925636007827785,7.0821917808219172,7.3718199608610568,7.6614481409001955,7.9510763209393343,8.2407045009784738,8.5303326810176117,8.8199608610567513,9.1095890410958908,9.3992172211350287,9.6888454011741683,9.9784735812133061,10.268101761252446,10.557729941291585,10.847358121330723,11.136986301369863,11.426614481409,11.71624266144814,12.00587084148728,12.295499021526417,12.585127201565557,12.874755381604697,13.164383561643834,13.454011741682974,13.743639921722114,14.033268101761252,14.322896281800391,14.612524461839529,14.902152641878669,15.191780821917808,15.481409001956946,15.771037181996086,16.060665362035223,16.350293542074365,16.639921722113503,16.92954990215264,17.219178082191782,17.50880626223092,17.798434442270057,18.088062622309195,18.377690802348337,18.667318982387474,18.956947162426612,19.246575342465754,19.536203522504891,19.825831702544029,20.115459882583171,20.405088062622308,20.694716242661446,20.984344422700588,21.273972602739725,21.563600782778863,21.853228962818001,22.142857142857142,22.43248532289628,22.722113502935418,23.011741682974559,23.301369863013697,23.590998043052835,23.880626223091976,24.170254403131114,24.459882583170252,24.749510763209393,25.039138943248531,25.328767123287669,25.61839530332681,25.908023483365948,26.197651663405086,26.487279843444227,26.776908023483365,27.066536203522503,27.356164383561641,27.645792563600782,27.93542074363992,28.225048923679058,28.514677103718199,28.804305283757337,29.093933463796475,29.383561643835616,29.673189823874754,29.962818003913892,30.252446183953033,30.542074363992171,30.831702544031309,31.121330724070447,31.410958904109588,31.700587084148726,31.990215264187864,32.279843444227005,32.569471624266143,32.859099804305281,33.148727984344418,33.438356164383563,33.727984344422701,34.017612524461839,34.307240704500977,34.596868884540115,34.886497064579252,35.17612524461839,35.465753424657535,35.755381604696673,36.045009784735811,36.334637964774949,36.624266144814086,36.913894324853224,37.203522504892369,37.493150684931507,37.782778864970645,38.072407045009783,38.36203522504892,38.651663405088058,38.941291585127196,39.230919765166341,39.520547945205479,39.810176125244617,40.099804305283755,40.389432485322892,40.67906066536203,40.968688845401175,41.258317025440313,41.547945205479451,41.837573385518589,42.127201565557726,42.416829745596864,42.706457925636002,42.996086105675147,43.285714285714285,43.575342465753423,43.86497064579256,44.154598825831698,44.444227005870836,44.733855185909981,45.023483365949119,45.313111545988257,45.602739726027394,45.892367906066532,46.18199608610567,46.471624266144815,46.761252446183953,47.050880626223091,47.340508806262228,47.630136986301366,47.919765166340504,48.209393346379642,48.499021526418787,48.788649706457925,49.078277886497062,49.3679060665362,49.657534246575338,49.947162426614476,50.236790606653621,50.526418786692759,50.816046966731896,51.105675146771034,51.395303326810172,51.68493150684931,51.974559686888455,52.264187866927593,52.55381604696673,52.843444227005868,53.133072407045006,53.422700587084144,53.712328767123282,54.001956947162427,54.291585127201564,54.581213307240702,54.87084148727984,55.160469667318978,55.450097847358116,55.739726027397261,56.029354207436398,56.318982387475536,56.608610567514674,56.898238747553812,57.18786692759295,57.477495107632087,57.767123287671232,58.05675146771037,58.346379647749508,58.636007827788646,58.925636007827784,59.215264187866921,59.504892367906066,59.794520547945204,60.084148727984342,60.37377690802348,60.663405088062618,60.953033268101755,61.242661448140893,61.532289628180038,61.821917808219176,62.111545988258314,62.401174168297452,62.690802348336589,62.980430528375727,63.270058708414872,63.55968688845401,63.849315068493148,64.138943248532286,64.428571428571431,64.718199608610561,65.007827788649706,65.297455968688837,65.587084148727982,65.876712328767127,66.166340508806258,66.455968688845402,66.745596868884533,67.035225048923678,67.324853228962809,67.614481409001954,67.904109589041099,68.193737769080229,68.483365949119374,68.772994129158505,69.06262230919765,69.352250489236781,69.641878669275926,69.93150684931507,70.221135029354201,70.510763209393346,70.800391389432477,71.090019569471622,71.379647749510752,71.669275929549897,71.958904109589042,72.248532289628173,72.538160469667318,72.827788649706449,73.117416829745594,73.407045009784738,73.696673189823869,73.986301369863014,74.275929549902145,74.56555772994129,74.85518590998042,75.144814090019565,75.43444227005871,75.724070450097841,76.013698630136986,76.303326810176117,76.592954990215262,76.882583170254392,77.172211350293537,77.461839530332682,77.751467710371813,78.041095890410958,78.330724070450088,78.620352250489233,78.909980430528378,79.199608610567509,79.489236790606654,79.778864970645785,80.06849315068493,80.35812133072406,80.647749510763205,80.93737769080235,81.227005870841481,81.516634050880626,81.806262230919756,82.095890410958901,82.385518590998032,82.675146771037177,82.964774951076322,83.254403131115453,83.544031311154598,83.833659491193728,84.123287671232873,84.412915851272004,84.702544031311149,84.992172211350294,85.281800391389424,85.571428571428569,85.8610567514677,86.150684931506845,86.44031311154599,86.729941291585121,87.019569471624266,87.309197651663396,87.598825831702541,87.888454011741672,88.178082191780817,88.467710371819962,88.757338551859092,89.046966731898237,89.336594911937368,89.626223091976513,89.915851272015644,90.205479452054789,90.495107632093934,90.784735812133064,91.074363992172209,91.36399217221134,91.653620352250485,91.94324853228963,92.232876712328761,92.522504892367905,92.812133072407036,93.101761252446181,93.391389432485312,93.681017612524457,93.970645792563602,94.260273972602732,94.549902152641877,94.839530332681008,95.129158512720153,95.418786692759284,95.708414872798429,95.998043052837573,96.287671232876704,96.577299412915849,96.86692759295498,97.156555772994125,97.44618395303327,97.7358121330724,98.025440313111545,98.315068493150676,98.604696673189821,98.894324853228952,99.183953033268097,99.473581213307241,99.763209393346372,100.05283757338552,100.34246575342465,100.63209393346379,100.92172211350292,101.21135029354207,101.50097847358121,101.79060665362034,102.08023483365949,102.36986301369862,102.65949119373776,102.94911937377691,103.23874755381604,103.52837573385519,103.81800391389432,104.10763209393346,104.39726027397259,104.68688845401174,104.97651663405088,105.26614481409001,105.55577299412916,105.84540117416829,106.13502935420743,106.42465753424656,106.71428571428571,107.00391389432485,107.29354207436398,107.58317025440313,107.87279843444226,108.1624266144814,108.45205479452054,108.74168297455968,109.03131115459882,109.32093933463796,109.6105675146771,109.90019569471623,110.18982387475538,110.47945205479452,110.76908023483365,111.0587084148728,111.34833659491193,111.63796477495107,111.9275929549902,112.21722113502935,112.50684931506849,112.79647749510762,113.08610567514677,113.3757338551859,113.66536203522504,113.95499021526417,114.24461839530332,114.53424657534246,114.8238747553816,115.11350293542074,115.40313111545987,115.69275929549902,115.98238747553816,116.27201565557729,116.56164383561644,116.85127201565557,117.14090019569471,117.43052837573384,117.72015655577299,118.00978473581213,118.29941291585126,118.58904109589041,118.87866927592954,119.16829745596868,119.45792563600781,119.74755381604696,120.0371819960861,120.32681017612524,120.61643835616438,120.90606653620351,121.19569471624266,121.48532289628179,121.77495107632093,122.06457925636008,122.35420743639921,122.64383561643835,122.93346379647748,123.22309197651663,123.51272015655577,123.8023483365949,124.09197651663405,124.38160469667318,124.67123287671232,124.96086105675145,125.2504892367906,125.54011741682974,125.82974559686888,126.11937377690802,126.40900195694715,126.6986301369863,126.98825831702543,127.27788649706457,127.56751467710372,127.85714285714285,128.14677103718199,128.43639921722112,128.72602739726025,129.01565557729941,129.30528375733854,129.59491193737767,129.88454011741683,130.17416829745596,130.46379647749509,130.75342465753425,131.04305283757338,131.33268101761252,131.62230919765165,131.9119373776908,132.20156555772994,132.49119373776907,132.78082191780823,133.07045009784736,133.36007827788649,133.64970645792562,133.93933463796478,134.22896281800391,134.51859099804304,134.8082191780822,135.09784735812133,135.38747553816046,135.67710371819959,135.96673189823875,136.25636007827788,136.54598825831701,136.83561643835617,137.1252446183953,137.41487279843443,137.70450097847356,137.99412915851272,138.28375733855185,138.57338551859098,138.86301369863014,139.15264187866927,139.4422700587084,139.73189823874753,140.02152641878669,140.31115459882582,140.60078277886495,140.89041095890411,141.18003913894324,141.46966731898237,141.7592954990215,142.04892367906066,142.33855185909979,142.62818003913893,142.91780821917808,143.20743639921722,143.49706457925635,143.78669275929551,144.07632093933464,144.36594911937377,144.6555772994129,144.94520547945206,145.23483365949119,145.52446183953032,145.81409001956948,146.10371819960861,146.39334637964774,146.68297455968687,146.97260273972603,147.26223091976516,147.55185909980429,147.84148727984345,148.13111545988258,148.42074363992171,148.71037181996084,149],"y":[0.00015072827263546749,0.00018720646963921238,0.00022655538128141942,0.00026829258949761955,0.00031241792526217233,0.00035965098085447716,0.00041169053338724001,0.00047136769866223277,0.00054134742822363132,0.0006245115550888577,0.00072332171697761397,0.0008395201790307921,0.00097395997643770808,0.0011265180938168219,0.001296009944696104,0.0014800449728370865,0.0016748353872331637,0.00187505887191948,0.0020739342437316099,0.0022636568274881541,0.002436245558482221,0.0025847022875849675,0.0027042345091920889,0.0027920214462666144,0.0028511866693052179,0.0028883324407094502,0.0029114155801448598,0.0029294116675736598,0.0029513073874254028,0.0029853437144068879,0.0030385350482721668,0.003116403133295175,0.0032228348945200669,0.0033599928540831653,0.0035282528459724357,0.0037261915597486253,0.0039506781319057433,0.0041971312793874227,0.0044599851178189842,0.0047333645956874233,0.0050119723954240199,0.0052915283899387976,0.0055705544934840263,0.0058497521061158762,0.0061315780748266513,0.0064194263375911206,0.0067166114828030024,0.0070254201352082961,0.0073464800329210971,0.0076786077659773298,0.0080191602127096511,0.0083647709304270016,0.0087122432528602897,0.0090593303135517098,0.0094051737067935848,0.0097502867049654596,0.010096174595384207,0.010444707905164972,0.01079692060956221,0.011152929742282365,0.011511873922277252,0.01187214798864109,0.012231791564286547,0.012588923351207348,0.012942136022648104,0.013290792012121619,0.013635178323301701,0.013976491152750304,0.01431663712143713,0.014657863750310161,0.015002266540226633,0.015351254681851906,0.015705079526353312,0.016062547752839485,0.016420411107099075,0.0167744079634966,0.017119763898065372,0.017451757936735045,0.017766316050915314,0.018060421916404321,0.018332266953324197,0.018581141966740253,0.018807150590120007,0.019010870211577616,0.019193082252040285,0.019354644694596558,0.019496508913501825,0.019619821879695867,0.019726028783729154,0.019816888536856689,0.019893738306870665,0.019959732974565894,0.020016830521865561,0.020066622315183035,0.02011020633425099,0.020148175680157698,0.020180749615537914,0.020208034264331355,0.020230349106999328,0.020248515135145827,0.020263993288339992,0.020278798674306597,0.020295189563471261,0.020315214531390423,0.020340261148013059,0.020370755384739675,0.020406184741911228,0.020444976360194052,0.020484919446116016,0.020523882641164823,0.020559782711248475,0.020590663409160929,0.020614728145245318,0.020630373886403777,0.020636245323436792,0.020631299936781079,0.020614861769970658,0.020586646023658201,0.020546749254595539,0.020495610019813485,0.020433946431956569,0.020362672727266256,0.020282794473335156,0.02019495914918619,0.020100261573896584,0.019999387649522464,0.01989263916812133,0.019780152786625958,0.019662018218109659,0.019538415505057739,0.019409747641918069,0.019276753548130125,0.019140591100022121,0.01900287513286842,0.018865643414757254,0.018731214901581257,0.018601912719038115,0.018479658359317443,0.018365500124820473,0.018259223678566985,0.018159196862833321,0.018061625001507077,0.01796260989378809,0.017858440996848985,0.017746334718715347,0.017624918230401496,0.017494342923778366,0.017356028686550444,0.017212142994154276,0.017064977864400921,0.016916392237290235,0.016767445760706056,0.016618283040147168,0.01646825886550193,0.016316243052679251,0.016161016560977595,0.016001544847886424,0.015837552180181565,0.01566989049538435,0.015499982214844112,0.015329755550610264,0.015161303885549949,0.014996511844960375,0.0148367393004155,0.014682643832006414,0.014534182757573089,0.014390777666276638,0.014251569163348193,0.014115661551917541,0.013982271376758485,0.013850746548475559,0.013720490605413372,0.013590875050209419,0.013461186790099992,0.013330772610325855,0.01319928036817453,0.013066688806546642,0.012933280079685602,0.012799504284764293,0.012665802341255878,0.012532466584951057,0.012399591835451897,0.012267120516743992,0.012134940463106212,0.012002975050881583,0.011871219287383529,0.01173971253942983,0.011608477313513065,0.011477472138274812,0.011346595096441208,0.011215739312126303,0.011084885787762113,0.01095413856203024,0.010823688592781214,0.010693752285368173,0.010564514654843619,0.010436107612912856,0.010308630842433527,0.010182197898087587,0.010056976890335755,0.0099331996032798221,0.0098111317645818829,0.009691019542214456,0.0095730404413592058,0.0094572827291537868,0.0093437581205680718,0.0092324945081779554,0.0091234107677156621,0.0090162513849790914,0.0089106914808226474,0.0088062021513439122,0.0087020464594983159,0.0085973435494206569,0.0084911971031480515,0.0083828646528792353,0.0082719321724600559,0.0081584545954510868,0.0080430264669662504,0.0079267570713706537,0.0078111408567897766,0.0076978359035682455,0.0075883871988565079,0.0074839509294549779,0.0073853741039138759,0.0072921792612490269,0.0072034033135087603,0.0071177725276728252,0.0070338996926648115,0.0069505578152862174,0.0068669013288729531,0.0067825929306388763,0.0066978163685923195,0.0066131839188906987,0.0065295745534431661,0.0064479553290159759,0.0063692368830316256,0.006294193086585893,0.0062234424131332507,0.0061574587813407475,0.0060966217293636415,0.0060412886862858829,0.0059909245976910901,0.0059449108881007877,0.0059022585265567197,0.0058617056565168054,0.0058218840347017244,0.0057815101689639671,0.0057395540718510199,0.0056953512221100226,0.0056486416643147163,0.0055995361923133922,0.0055484210130960611,0.0054958215142310411,0.0054422553376475532,0.0053881131786181948,0.0053336052812555188,0.0052787848340597725,0.0052236840197475206,0.005168438807930149,0.0051133036462655442,0.0050585930295925612,0.0050045487248513727,0.0049512071543331062,0.0048983280152070348,0.004845415255477447,0.0047918239125311667,0.0047369190284060232,0.0046802436872929332,0.0046216587547676623,0.0045614277785729664,0.0045002291293790271,0.0044390835836824339,0.0043791960114737778,0.0043219252192255135,0.0042680389522151505,0.0042178160143201381,0.0041710704560273537,0.0041272417439695338,0.0040855791767524345,0.0040453262236902678,0.0040058519428732156,0.0039667100060995462,0.0039276374839122439,0.0038885217100317129,0.0038493616266111925,0.0038102371044485138,0.0037712867275635433,0.0037326891744529464,0.0036946459048343228,0.0036573804580272229,0.0036211606272654401,0.003586135651309929,0.0035524522035485064,0.0035201684089571973,0.0034892062638886357,0.0034593206436511941,0.0034301024949601163,0.0034010220646944873,0.0033715036311906029,0.0033410130075353429,0.0033091368442539726,0.0032756379969827803,0.0032404802361806471,0.0032038237402445967,0.0031659970859637203,0.0031274518490057987,0.0030887188780125046,0.0030503664062618272,0.0030128448808523743,0.0029765072942231057,0.0029416204892914657,0.0029083927739649092,0.0028770038038317897,0.0028476147464278397,0.0028203459551258249,0.0027952264111691595,0.002772135936431002,0.0027507691949370896,0.0027306451231452599,0.002711168172029555,0.0026917260236783776,0.0026717925288502645,0.0026510027018401931,0.0026291180667344363,0.0026062067615310945,0.0025824165365619092,0.0025579616652803084,0.0025331007916632436,0.0025081096425308663,0.0024832475420322846,0.0024587170853865854,0.0024346266465101406,0.0024109707908996094,0.0023876405783678427,0.0023644656510534096,0.0023412780152762985,0.0023179792542972,0.0022945918337041317,0.0022712804908062603,0.0022483504038325799,0.0022262321019934582,0.0022052581608834562,0.002185664252777636,0.00216749976845223,0.0021505993390827562,0.0021346083016088374,0.0021190575768659644,0.0021034661800040841,0.0020874396517303762,0.002070734308192621,0.0020532696664101045,0.0020350899284124965,0.0020162932889365584,0.0019969588633213125,0.0019771014210321967,0.0019566734218163626,0.001935590856825192,0.001913887461532349,0.0018917596899804246,0.0018695367835611014,0.0018476452012008223,0.0018265193578176813,0.0018065117524032226,0.0017878360930274487,0.0017705602091203992,0.0017546418942309871,0.0017399815696396651,0.0017264599615201528,0.0017139398037278558,0.0017022331142766212,0.0016910589348072022,0.00168002808674667,0.0016686834802639805,0.0016565340787089919,0.001643375609712853,0.0016292591523953991,0.0016144889966779377,0.0015995381279327848,0.0015849145427922189,0.0015710282921301384,0.0015580992004501166,0.0015461251611377314,0.001534907868495166,0.0015241160338427457,0.0015133602611308804,0.0015022583168123064,0.0014904801867758554,0.0014777734657861524,0.0014639768668521654,0.0014490101946520648,0.0014329172728619955,0.001415968437608386,0.0013985016626618446,0.0013809467155435485,0.0013637663589151721,0.001347381155151927,0.001332096317305242,0.001318049767218855,0.0013051944095046086,0.0012933180155656296,0.0012820949727075845,0.0012711579163485745,0.0012601742932042399,0.001248912323474754,0.001237281985097251,0.001225340096090724,0.0012132595131552253,0.0012012709801376548,0.0011895321527124355,0.0011780903621630748,0.0011668799632372588,0.0011557606398755298,0.0011445832359712551,0.0011332562845339496,0.0011217872807931879,0.0011102837559228006,0.0010989153798306446,0.0010878529503099517,0.0010772077021968718,0.001066992668875953,0.001057118452166767,0.0010474230105869447,0.0010377237532243711,0.0010278589912072699,0.0010177760122137144,0.0010075018244189637,0.00099710809927129433,0.0009866600703351601,0.00097616833402570805,0.00096556288963215125,0.00095470010136521012,0.0009434005657573102,0.00093150405754165101,0.0009189215652232655,0.00090566625034425093,0.00089185395448265195,0.00087767572745318055,0.00086335463014565027,0.00084910272064549327,0.00083509876303680718,0.00082147241408438696,0.00080826372057731931,0.00079548124074730979,0.00078310181249895507,0.00077108242234606196,0.00075937292251008126,0.00074792802705466073,0.00073671581794195048,0.00072572030856174865,0.00071493745441643278,0.00070436641340331798,0.00069399965917082496,0.00068381596333193207,0.00067377919689610261,0.00066384385068840525,0.00065396586786714366,0.00064411666008322877,0.00063429380147330989,0.0006245240056096062,0.00061485035017238795,0.00060531250658533704,0.00059592385209143413,0.00058665229124443992,0.00057741275527636274,0.0005680769156142881,0.00055850041637027373,0.00054856146093296154,0.00053819936496852963,0.00052744010040614824,0.00051639908676647493,0.0005052587981523763,0.00049422751616500621,0.00048349291465728977,0.00047321245646544154,0.00046339659579503671,0.00045400346472462832,0.00044493945237490434,0.00043608227643427089,0.00042730162638960928,0.00041847622897970231,0.0004095086862167187,0.00040033944542542781,0.00039095937480945514,0.00038141813997222176,0.00037182445194154667,0.00036233516963567387,0.00035313307504784489,0.00034439686424691157,0.00033626997906422524,0.000328853642450306,0.00032216401962437293,0.00031609834695829647,0.00031053120234521026,0.00030530336345281346,0.00030024054978848786,0.00029517148839390387,0.00028994452263885932,0.00028444001538413695,0.00027857354094006521,0.00027228471320747449,0.00026550987913106941,0.00025814334049504688,0.00024999894732540955,0.00024078861019604682,0.00023013372855309634,0.00021761863708793581,0.00020276546243522915,0.00018547612056653246,0.00016595851920633226,0.00014471286226446566,0.00012253846147953327,0.00010042865498515228],"text":["density: 0.0001507283<br />target: 1.000000","density: 0.0001872065<br />target: 1.289628","density: 0.0002265554<br />target: 1.579256","density: 0.0002682926<br />target: 1.868885","density: 0.0003124179<br />target: 2.158513","density: 0.0003596510<br />target: 2.448141","density: 0.0004116905<br />target: 2.737769","density: 0.0004713677<br />target: 3.027397","density: 0.0005413474<br />target: 3.317025","density: 0.0006245116<br />target: 3.606654","density: 0.0007233217<br />target: 3.896282","density: 0.0008395202<br />target: 4.185910","density: 0.0009739600<br />target: 4.475538","density: 0.0011265181<br />target: 4.765166","density: 0.0012960099<br />target: 5.054795","density: 0.0014800450<br />target: 5.344423","density: 0.0016748354<br />target: 5.634051","density: 0.0018750589<br />target: 5.923679","density: 0.0020739342<br />target: 6.213307","density: 0.0022636568<br />target: 6.502935","density: 0.0024362456<br />target: 6.792564","density: 0.0025847023<br />target: 7.082192","density: 0.0027042345<br />target: 7.371820","density: 0.0027920214<br />target: 7.661448","density: 0.0028511867<br />target: 7.951076","density: 0.0028883324<br />target: 8.240705","density: 0.0029114156<br />target: 8.530333","density: 0.0029294117<br />target: 8.819961","density: 0.0029513074<br />target: 9.109589","density: 0.0029853437<br />target: 9.399217","density: 0.0030385350<br />target: 9.688845","density: 0.0031164031<br />target: 9.978474","density: 0.0032228349<br />target: 10.268102","density: 0.0033599929<br />target: 10.557730","density: 0.0035282528<br />target: 10.847358","density: 0.0037261916<br />target: 11.136986","density: 0.0039506781<br />target: 11.426614","density: 0.0041971313<br />target: 11.716243","density: 0.0044599851<br />target: 12.005871","density: 0.0047333646<br />target: 12.295499","density: 0.0050119724<br />target: 12.585127","density: 0.0052915284<br />target: 12.874755","density: 0.0055705545<br />target: 13.164384","density: 0.0058497521<br />target: 13.454012","density: 0.0061315781<br />target: 13.743640","density: 0.0064194263<br />target: 14.033268","density: 0.0067166115<br />target: 14.322896","density: 0.0070254201<br />target: 14.612524","density: 0.0073464800<br />target: 14.902153","density: 0.0076786078<br />target: 15.191781","density: 0.0080191602<br />target: 15.481409","density: 0.0083647709<br />target: 15.771037","density: 0.0087122433<br />target: 16.060665","density: 0.0090593303<br />target: 16.350294","density: 0.0094051737<br />target: 16.639922","density: 0.0097502867<br />target: 16.929550","density: 0.0100961746<br />target: 17.219178","density: 0.0104447079<br />target: 17.508806","density: 0.0107969206<br />target: 17.798434","density: 0.0111529297<br />target: 18.088063","density: 0.0115118739<br />target: 18.377691","density: 0.0118721480<br />target: 18.667319","density: 0.0122317916<br />target: 18.956947","density: 0.0125889234<br />target: 19.246575","density: 0.0129421360<br />target: 19.536204","density: 0.0132907920<br />target: 19.825832","density: 0.0136351783<br />target: 20.115460","density: 0.0139764912<br />target: 20.405088","density: 0.0143166371<br />target: 20.694716","density: 0.0146578638<br />target: 20.984344","density: 0.0150022665<br />target: 21.273973","density: 0.0153512547<br />target: 21.563601","density: 0.0157050795<br />target: 21.853229","density: 0.0160625478<br />target: 22.142857","density: 0.0164204111<br />target: 22.432485","density: 0.0167744080<br />target: 22.722114","density: 0.0171197639<br />target: 23.011742","density: 0.0174517579<br />target: 23.301370","density: 0.0177663161<br />target: 23.590998","density: 0.0180604219<br />target: 23.880626","density: 0.0183322670<br />target: 24.170254","density: 0.0185811420<br />target: 24.459883","density: 0.0188071506<br />target: 24.749511","density: 0.0190108702<br />target: 25.039139","density: 0.0191930823<br />target: 25.328767","density: 0.0193546447<br />target: 25.618395","density: 0.0194965089<br />target: 25.908023","density: 0.0196198219<br />target: 26.197652","density: 0.0197260288<br />target: 26.487280","density: 0.0198168885<br />target: 26.776908","density: 0.0198937383<br />target: 27.066536","density: 0.0199597330<br />target: 27.356164","density: 0.0200168305<br />target: 27.645793","density: 0.0200666223<br />target: 27.935421","density: 0.0201102063<br />target: 28.225049","density: 0.0201481757<br />target: 28.514677","density: 0.0201807496<br />target: 28.804305","density: 0.0202080343<br />target: 29.093933","density: 0.0202303491<br />target: 29.383562","density: 0.0202485151<br />target: 29.673190","density: 0.0202639933<br />target: 29.962818","density: 0.0202787987<br />target: 30.252446","density: 0.0202951896<br />target: 30.542074","density: 0.0203152145<br />target: 30.831703","density: 0.0203402611<br />target: 31.121331","density: 0.0203707554<br />target: 31.410959","density: 0.0204061847<br />target: 31.700587","density: 0.0204449764<br />target: 31.990215","density: 0.0204849194<br />target: 32.279843","density: 0.0205238826<br />target: 32.569472","density: 0.0205597827<br />target: 32.859100","density: 0.0205906634<br />target: 33.148728","density: 0.0206147281<br />target: 33.438356","density: 0.0206303739<br />target: 33.727984","density: 0.0206362453<br />target: 34.017613","density: 0.0206312999<br />target: 34.307241","density: 0.0206148618<br />target: 34.596869","density: 0.0205866460<br />target: 34.886497","density: 0.0205467493<br />target: 35.176125","density: 0.0204956100<br />target: 35.465753","density: 0.0204339464<br />target: 35.755382","density: 0.0203626727<br />target: 36.045010","density: 0.0202827945<br />target: 36.334638","density: 0.0201949591<br />target: 36.624266","density: 0.0201002616<br />target: 36.913894","density: 0.0199993876<br />target: 37.203523","density: 0.0198926392<br />target: 37.493151","density: 0.0197801528<br />target: 37.782779","density: 0.0196620182<br />target: 38.072407","density: 0.0195384155<br />target: 38.362035","density: 0.0194097476<br />target: 38.651663","density: 0.0192767535<br />target: 38.941292","density: 0.0191405911<br />target: 39.230920","density: 0.0190028751<br />target: 39.520548","density: 0.0188656434<br />target: 39.810176","density: 0.0187312149<br />target: 40.099804","density: 0.0186019127<br />target: 40.389432","density: 0.0184796584<br />target: 40.679061","density: 0.0183655001<br />target: 40.968689","density: 0.0182592237<br />target: 41.258317","density: 0.0181591969<br />target: 41.547945","density: 0.0180616250<br />target: 41.837573","density: 0.0179626099<br />target: 42.127202","density: 0.0178584410<br />target: 42.416830","density: 0.0177463347<br />target: 42.706458","density: 0.0176249182<br />target: 42.996086","density: 0.0174943429<br />target: 43.285714","density: 0.0173560287<br />target: 43.575342","density: 0.0172121430<br />target: 43.864971","density: 0.0170649779<br />target: 44.154599","density: 0.0169163922<br />target: 44.444227","density: 0.0167674458<br />target: 44.733855","density: 0.0166182830<br />target: 45.023483","density: 0.0164682589<br />target: 45.313112","density: 0.0163162431<br />target: 45.602740","density: 0.0161610166<br />target: 45.892368","density: 0.0160015448<br />target: 46.181996","density: 0.0158375522<br />target: 46.471624","density: 0.0156698905<br />target: 46.761252","density: 0.0154999822<br />target: 47.050881","density: 0.0153297556<br />target: 47.340509","density: 0.0151613039<br />target: 47.630137","density: 0.0149965118<br />target: 47.919765","density: 0.0148367393<br />target: 48.209393","density: 0.0146826438<br />target: 48.499022","density: 0.0145341828<br />target: 48.788650","density: 0.0143907777<br />target: 49.078278","density: 0.0142515692<br />target: 49.367906","density: 0.0141156616<br />target: 49.657534","density: 0.0139822714<br />target: 49.947162","density: 0.0138507465<br />target: 50.236791","density: 0.0137204906<br />target: 50.526419","density: 0.0135908751<br />target: 50.816047","density: 0.0134611868<br />target: 51.105675","density: 0.0133307726<br />target: 51.395303","density: 0.0131992804<br />target: 51.684932","density: 0.0130666888<br />target: 51.974560","density: 0.0129332801<br />target: 52.264188","density: 0.0127995043<br />target: 52.553816","density: 0.0126658023<br />target: 52.843444","density: 0.0125324666<br />target: 53.133072","density: 0.0123995918<br />target: 53.422701","density: 0.0122671205<br />target: 53.712329","density: 0.0121349405<br />target: 54.001957","density: 0.0120029751<br />target: 54.291585","density: 0.0118712193<br />target: 54.581213","density: 0.0117397125<br />target: 54.870841","density: 0.0116084773<br />target: 55.160470","density: 0.0114774721<br />target: 55.450098","density: 0.0113465951<br />target: 55.739726","density: 0.0112157393<br />target: 56.029354","density: 0.0110848858<br />target: 56.318982","density: 0.0109541386<br />target: 56.608611","density: 0.0108236886<br />target: 56.898239","density: 0.0106937523<br />target: 57.187867","density: 0.0105645147<br />target: 57.477495","density: 0.0104361076<br />target: 57.767123","density: 0.0103086308<br />target: 58.056751","density: 0.0101821979<br />target: 58.346380","density: 0.0100569769<br />target: 58.636008","density: 0.0099331996<br />target: 58.925636","density: 0.0098111318<br />target: 59.215264","density: 0.0096910195<br />target: 59.504892","density: 0.0095730404<br />target: 59.794521","density: 0.0094572827<br />target: 60.084149","density: 0.0093437581<br />target: 60.373777","density: 0.0092324945<br />target: 60.663405","density: 0.0091234108<br />target: 60.953033","density: 0.0090162514<br />target: 61.242661","density: 0.0089106915<br />target: 61.532290","density: 0.0088062022<br />target: 61.821918","density: 0.0087020465<br />target: 62.111546","density: 0.0085973435<br />target: 62.401174","density: 0.0084911971<br />target: 62.690802","density: 0.0083828647<br />target: 62.980431","density: 0.0082719322<br />target: 63.270059","density: 0.0081584546<br />target: 63.559687","density: 0.0080430265<br />target: 63.849315","density: 0.0079267571<br />target: 64.138943","density: 0.0078111409<br />target: 64.428571","density: 0.0076978359<br />target: 64.718200","density: 0.0075883872<br />target: 65.007828","density: 0.0074839509<br />target: 65.297456","density: 0.0073853741<br />target: 65.587084","density: 0.0072921793<br />target: 65.876712","density: 0.0072034033<br />target: 66.166341","density: 0.0071177725<br />target: 66.455969","density: 0.0070338997<br />target: 66.745597","density: 0.0069505578<br />target: 67.035225","density: 0.0068669013<br />target: 67.324853","density: 0.0067825929<br />target: 67.614481","density: 0.0066978164<br />target: 67.904110","density: 0.0066131839<br />target: 68.193738","density: 0.0065295746<br />target: 68.483366","density: 0.0064479553<br />target: 68.772994","density: 0.0063692369<br />target: 69.062622","density: 0.0062941931<br />target: 69.352250","density: 0.0062234424<br />target: 69.641879","density: 0.0061574588<br />target: 69.931507","density: 0.0060966217<br />target: 70.221135","density: 0.0060412887<br />target: 70.510763","density: 0.0059909246<br />target: 70.800391","density: 0.0059449109<br />target: 71.090020","density: 0.0059022585<br />target: 71.379648","density: 0.0058617057<br />target: 71.669276","density: 0.0058218840<br />target: 71.958904","density: 0.0057815102<br />target: 72.248532","density: 0.0057395541<br />target: 72.538160","density: 0.0056953512<br />target: 72.827789","density: 0.0056486417<br />target: 73.117417","density: 0.0055995362<br />target: 73.407045","density: 0.0055484210<br />target: 73.696673","density: 0.0054958215<br />target: 73.986301","density: 0.0054422553<br />target: 74.275930","density: 0.0053881132<br />target: 74.565558","density: 0.0053336053<br />target: 74.855186","density: 0.0052787848<br />target: 75.144814","density: 0.0052236840<br />target: 75.434442","density: 0.0051684388<br />target: 75.724070","density: 0.0051133036<br />target: 76.013699","density: 0.0050585930<br />target: 76.303327","density: 0.0050045487<br />target: 76.592955","density: 0.0049512072<br />target: 76.882583","density: 0.0048983280<br />target: 77.172211","density: 0.0048454153<br />target: 77.461840","density: 0.0047918239<br />target: 77.751468","density: 0.0047369190<br />target: 78.041096","density: 0.0046802437<br />target: 78.330724","density: 0.0046216588<br />target: 78.620352","density: 0.0045614278<br />target: 78.909980","density: 0.0045002291<br />target: 79.199609","density: 0.0044390836<br />target: 79.489237","density: 0.0043791960<br />target: 79.778865","density: 0.0043219252<br />target: 80.068493","density: 0.0042680390<br />target: 80.358121","density: 0.0042178160<br />target: 80.647750","density: 0.0041710705<br />target: 80.937378","density: 0.0041272417<br />target: 81.227006","density: 0.0040855792<br />target: 81.516634","density: 0.0040453262<br />target: 81.806262","density: 0.0040058519<br />target: 82.095890","density: 0.0039667100<br />target: 82.385519","density: 0.0039276375<br />target: 82.675147","density: 0.0038885217<br />target: 82.964775","density: 0.0038493616<br />target: 83.254403","density: 0.0038102371<br />target: 83.544031","density: 0.0037712867<br />target: 83.833659","density: 0.0037326892<br />target: 84.123288","density: 0.0036946459<br />target: 84.412916","density: 0.0036573805<br />target: 84.702544","density: 0.0036211606<br />target: 84.992172","density: 0.0035861357<br />target: 85.281800","density: 0.0035524522<br />target: 85.571429","density: 0.0035201684<br />target: 85.861057","density: 0.0034892063<br />target: 86.150685","density: 0.0034593206<br />target: 86.440313","density: 0.0034301025<br />target: 86.729941","density: 0.0034010221<br />target: 87.019569","density: 0.0033715036<br />target: 87.309198","density: 0.0033410130<br />target: 87.598826","density: 0.0033091368<br />target: 87.888454","density: 0.0032756380<br />target: 88.178082","density: 0.0032404802<br />target: 88.467710","density: 0.0032038237<br />target: 88.757339","density: 0.0031659971<br />target: 89.046967","density: 0.0031274518<br />target: 89.336595","density: 0.0030887189<br />target: 89.626223","density: 0.0030503664<br />target: 89.915851","density: 0.0030128449<br />target: 90.205479","density: 0.0029765073<br />target: 90.495108","density: 0.0029416205<br />target: 90.784736","density: 0.0029083928<br />target: 91.074364","density: 0.0028770038<br />target: 91.363992","density: 0.0028476147<br />target: 91.653620","density: 0.0028203460<br />target: 91.943249","density: 0.0027952264<br />target: 92.232877","density: 0.0027721359<br />target: 92.522505","density: 0.0027507692<br />target: 92.812133","density: 0.0027306451<br />target: 93.101761","density: 0.0027111682<br />target: 93.391389","density: 0.0026917260<br />target: 93.681018","density: 0.0026717925<br />target: 93.970646","density: 0.0026510027<br />target: 94.260274","density: 0.0026291181<br />target: 94.549902","density: 0.0026062068<br />target: 94.839530","density: 0.0025824165<br />target: 95.129159","density: 0.0025579617<br />target: 95.418787","density: 0.0025331008<br />target: 95.708415","density: 0.0025081096<br />target: 95.998043","density: 0.0024832475<br />target: 96.287671","density: 0.0024587171<br />target: 96.577299","density: 0.0024346266<br />target: 96.866928","density: 0.0024109708<br />target: 97.156556","density: 0.0023876406<br />target: 97.446184","density: 0.0023644657<br />target: 97.735812","density: 0.0023412780<br />target: 98.025440","density: 0.0023179793<br />target: 98.315068","density: 0.0022945918<br />target: 98.604697","density: 0.0022712805<br />target: 98.894325","density: 0.0022483504<br />target: 99.183953","density: 0.0022262321<br />target: 99.473581","density: 0.0022052582<br />target: 99.763209","density: 0.0021856643<br />target: 100.052838","density: 0.0021674998<br />target: 100.342466","density: 0.0021505993<br />target: 100.632094","density: 0.0021346083<br />target: 100.921722","density: 0.0021190576<br />target: 101.211350","density: 0.0021034662<br />target: 101.500978","density: 0.0020874397<br />target: 101.790607","density: 0.0020707343<br />target: 102.080235","density: 0.0020532697<br />target: 102.369863","density: 0.0020350899<br />target: 102.659491","density: 0.0020162933<br />target: 102.949119","density: 0.0019969589<br />target: 103.238748","density: 0.0019771014<br />target: 103.528376","density: 0.0019566734<br />target: 103.818004","density: 0.0019355909<br />target: 104.107632","density: 0.0019138875<br />target: 104.397260","density: 0.0018917597<br />target: 104.686888","density: 0.0018695368<br />target: 104.976517","density: 0.0018476452<br />target: 105.266145","density: 0.0018265194<br />target: 105.555773","density: 0.0018065118<br />target: 105.845401","density: 0.0017878361<br />target: 106.135029","density: 0.0017705602<br />target: 106.424658","density: 0.0017546419<br />target: 106.714286","density: 0.0017399816<br />target: 107.003914","density: 0.0017264600<br />target: 107.293542","density: 0.0017139398<br />target: 107.583170","density: 0.0017022331<br />target: 107.872798","density: 0.0016910589<br />target: 108.162427","density: 0.0016800281<br />target: 108.452055","density: 0.0016686835<br />target: 108.741683","density: 0.0016565341<br />target: 109.031311","density: 0.0016433756<br />target: 109.320939","density: 0.0016292592<br />target: 109.610568","density: 0.0016144890<br />target: 109.900196","density: 0.0015995381<br />target: 110.189824","density: 0.0015849145<br />target: 110.479452","density: 0.0015710283<br />target: 110.769080","density: 0.0015580992<br />target: 111.058708","density: 0.0015461252<br />target: 111.348337","density: 0.0015349079<br />target: 111.637965","density: 0.0015241160<br />target: 111.927593","density: 0.0015133603<br />target: 112.217221","density: 0.0015022583<br />target: 112.506849","density: 0.0014904802<br />target: 112.796477","density: 0.0014777735<br />target: 113.086106","density: 0.0014639769<br />target: 113.375734","density: 0.0014490102<br />target: 113.665362","density: 0.0014329173<br />target: 113.954990","density: 0.0014159684<br />target: 114.244618","density: 0.0013985017<br />target: 114.534247","density: 0.0013809467<br />target: 114.823875","density: 0.0013637664<br />target: 115.113503","density: 0.0013473812<br />target: 115.403131","density: 0.0013320963<br />target: 115.692759","density: 0.0013180498<br />target: 115.982387","density: 0.0013051944<br />target: 116.272016","density: 0.0012933180<br />target: 116.561644","density: 0.0012820950<br />target: 116.851272","density: 0.0012711579<br />target: 117.140900","density: 0.0012601743<br />target: 117.430528","density: 0.0012489123<br />target: 117.720157","density: 0.0012372820<br />target: 118.009785","density: 0.0012253401<br />target: 118.299413","density: 0.0012132595<br />target: 118.589041","density: 0.0012012710<br />target: 118.878669","density: 0.0011895322<br />target: 119.168297","density: 0.0011780904<br />target: 119.457926","density: 0.0011668800<br />target: 119.747554","density: 0.0011557606<br />target: 120.037182","density: 0.0011445832<br />target: 120.326810","density: 0.0011332563<br />target: 120.616438","density: 0.0011217873<br />target: 120.906067","density: 0.0011102838<br />target: 121.195695","density: 0.0010989154<br />target: 121.485323","density: 0.0010878530<br />target: 121.774951","density: 0.0010772077<br />target: 122.064579","density: 0.0010669927<br />target: 122.354207","density: 0.0010571185<br />target: 122.643836","density: 0.0010474230<br />target: 122.933464","density: 0.0010377238<br />target: 123.223092","density: 0.0010278590<br />target: 123.512720","density: 0.0010177760<br />target: 123.802348","density: 0.0010075018<br />target: 124.091977","density: 0.0009971081<br />target: 124.381605","density: 0.0009866601<br />target: 124.671233","density: 0.0009761683<br />target: 124.960861","density: 0.0009655629<br />target: 125.250489","density: 0.0009547001<br />target: 125.540117","density: 0.0009434006<br />target: 125.829746","density: 0.0009315041<br />target: 126.119374","density: 0.0009189216<br />target: 126.409002","density: 0.0009056663<br />target: 126.698630","density: 0.0008918540<br />target: 126.988258","density: 0.0008776757<br />target: 127.277886","density: 0.0008633546<br />target: 127.567515","density: 0.0008491027<br />target: 127.857143","density: 0.0008350988<br />target: 128.146771","density: 0.0008214724<br />target: 128.436399","density: 0.0008082637<br />target: 128.726027","density: 0.0007954812<br />target: 129.015656","density: 0.0007831018<br />target: 129.305284","density: 0.0007710824<br />target: 129.594912","density: 0.0007593729<br />target: 129.884540","density: 0.0007479280<br />target: 130.174168","density: 0.0007367158<br />target: 130.463796","density: 0.0007257203<br />target: 130.753425","density: 0.0007149375<br />target: 131.043053","density: 0.0007043664<br />target: 131.332681","density: 0.0006939997<br />target: 131.622309","density: 0.0006838160<br />target: 131.911937","density: 0.0006737792<br />target: 132.201566","density: 0.0006638439<br />target: 132.491194","density: 0.0006539659<br />target: 132.780822","density: 0.0006441167<br />target: 133.070450","density: 0.0006342938<br />target: 133.360078","density: 0.0006245240<br />target: 133.649706","density: 0.0006148504<br />target: 133.939335","density: 0.0006053125<br />target: 134.228963","density: 0.0005959239<br />target: 134.518591","density: 0.0005866523<br />target: 134.808219","density: 0.0005774128<br />target: 135.097847","density: 0.0005680769<br />target: 135.387476","density: 0.0005585004<br />target: 135.677104","density: 0.0005485615<br />target: 135.966732","density: 0.0005381994<br />target: 136.256360","density: 0.0005274401<br />target: 136.545988","density: 0.0005163991<br />target: 136.835616","density: 0.0005052588<br />target: 137.125245","density: 0.0004942275<br />target: 137.414873","density: 0.0004834929<br />target: 137.704501","density: 0.0004732125<br />target: 137.994129","density: 0.0004633966<br />target: 138.283757","density: 0.0004540035<br />target: 138.573386","density: 0.0004449395<br />target: 138.863014","density: 0.0004360823<br />target: 139.152642","density: 0.0004273016<br />target: 139.442270","density: 0.0004184762<br />target: 139.731898","density: 0.0004095087<br />target: 140.021526","density: 0.0004003394<br />target: 140.311155","density: 0.0003909594<br />target: 140.600783","density: 0.0003814181<br />target: 140.890411","density: 0.0003718245<br />target: 141.180039","density: 0.0003623352<br />target: 141.469667","density: 0.0003531331<br />target: 141.759295","density: 0.0003443969<br />target: 142.048924","density: 0.0003362700<br />target: 142.338552","density: 0.0003288536<br />target: 142.628180","density: 0.0003221640<br />target: 142.917808","density: 0.0003160983<br />target: 143.207436","density: 0.0003105312<br />target: 143.497065","density: 0.0003053034<br />target: 143.786693","density: 0.0003002405<br />target: 144.076321","density: 0.0002951715<br />target: 144.365949","density: 0.0002899445<br />target: 144.655577","density: 0.0002844400<br />target: 144.945205","density: 0.0002785735<br />target: 145.234834","density: 0.0002722847<br />target: 145.524462","density: 0.0002655099<br />target: 145.814090","density: 0.0002581433<br />target: 146.103718","density: 0.0002499989<br />target: 146.393346","density: 0.0002407886<br />target: 146.682975","density: 0.0002301337<br />target: 146.972603","density: 0.0002176186<br />target: 147.262231","density: 0.0002027655<br />target: 147.551859","density: 0.0001854761<br />target: 147.841487","density: 0.0001659585<br />target: 148.131115","density: 0.0001447129<br />target: 148.420744","density: 0.0001225385<br />target: 148.710372","density: 0.0001004287<br />target: 149.000000"],"type":"scatter","mode":"lines","line":{"width":3.7795275590551185,"color":"rgba(152,51,153,1)","dash":"solid"},"hoveron":"points","showlegend":false,"xaxis":"x","yaxis":"y","hoverinfo":"text","frame":null}],"layout":{"margin":{"t":23.305936073059364,"r":7.3059360730593621,"b":37.260273972602747,"l":54.794520547945211},"plot_bgcolor":"rgba(235,235,235,1)","paper_bgcolor":"rgba(255,255,255,1)","font":{"color":"rgba(0,0,0,1)","family":"","size":14.611872146118724},"xaxis":{"domain":[0,1],"automargin":true,"type":"linear","autorange":false,"range":[-6.9500000000000002,156.94999999999999],"tickmode":"array","ticktext":["0","50","100","150"],"tickvals":[0,50,100,150],"categoryorder":"array","categoryarray":["0","50","100","150"],"nticks":null,"ticks":"outside","tickcolor":"rgba(51,51,51,1)","ticklen":3.6529680365296811,"tickwidth":0,"showticklabels":true,"tickfont":{"color":"rgba(77,77,77,1)","family":"","size":11.68949771689498},"tickangle":-0,"showline":false,"linecolor":null,"linewidth":0,"showgrid":true,"gridcolor":"rgba(255,255,255,1)","gridwidth":0,"zeroline":false,"anchor":"y","title":{"text":"target","font":{"color":"rgba(0,0,0,1)","family":"","size":14.611872146118724}},"hoverformat":".2f"},"yaxis":{"domain":[0,1],"automargin":true,"type":"linear","autorange":false,"range":[-0.0010397630107341737,0.021835023225417645],"tickmode":"array","ticktext":["0.000","0.005","0.010","0.015","0.020"],"tickvals":[0,0.0050000000000000001,0.01,0.014999999999999999,0.02],"categoryorder":"array","categoryarray":["0.000","0.005","0.010","0.015","0.020"],"nticks":null,"ticks":"outside","tickcolor":"rgba(51,51,51,1)","ticklen":3.6529680365296811,"tickwidth":0,"showticklabels":true,"tickfont":{"color":"rgba(77,77,77,1)","family":"","size":11.68949771689498},"tickangle":-0,"showline":false,"linecolor":null,"linewidth":0,"showgrid":true,"gridcolor":"rgba(255,255,255,1)","gridwidth":0,"zeroline":false,"anchor":"x","title":{"text":"density","font":{"color":"rgba(0,0,0,1)","family":"","size":14.611872146118724}},"hoverformat":".2f"},"shapes":[{"type":"rect","fillcolor":null,"line":{"color":null,"width":0,"linetype":[]},"yref":"paper","xref":"paper","layer":"below","x0":0,"x1":1,"y0":0,"y1":1}],"showlegend":false,"legend":{"bgcolor":"rgba(255,255,255,1)","bordercolor":"transparent","borderwidth":0,"font":{"color":"rgba(0,0,0,1)","family":"","size":11.68949771689498}},"hovermode":"closest","barmode":"relative"},"config":{"doubleClick":"reset","modeBarButtonsToAdd":["hoverclosest","hovercompare"],"showSendToCloud":false},"source":"A","attrs":{"10a361b2d8dcc":{"x":{},"y":{},"type":"bar"},"10a364bb85a04":{"x":{}}},"cur_data":"10a361b2d8dcc","visdat":{"10a361b2d8dcc":["function (y) ","x"],"10a364bb85a04":["function (y) ","x"]},"highlight":{"on":"plotly_click","persistent":false,"dynamic":false,"selectize":false,"opacityDim":0.20000000000000001,"selected":{"opacity":1},"debounce":0},"shinyEvents":["plotly_hover","plotly_click","plotly_selected","plotly_relayout","plotly_brushed","plotly_brushing","plotly_clickannotation","plotly_doubleclick","plotly_deselect","plotly_afterplot","plotly_sunburstclick"],"base_url":"https://plot.ly"},"evals":[],"jsHooks":[]}</script>
|
||
</div>
|
||
</div>
|
||
<p>To identify the most appropriate theoretical distribution families, we utilized the Cullen and Frey graph. We performed a bootstrap analysis to assess the robustness of the statistical properties.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb32"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">descdist</span>(<span class="fu">sample</span>(target, <span class="dv">5000</span>), <span class="at">boot =</span> <span class="dv">100</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/Cullen%20graph-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>summary statistics
|
||
------
|
||
min: 1.03 max: 145.5
|
||
median: 41.26
|
||
mean: 47.44376
|
||
estimated sd: 25.80176
|
||
estimated skewness: 1.179019
|
||
estimated kurtosis: 4.303189 </code></pre>
|
||
</div>
|
||
</div>
|
||
As shown in this Cullen graph, the observation point falls within the zone of specific distributions, suggesting three potential candidates for the normalized target:
|
||
<p>We proceeded to fit these three distributions to the data using the Maximum Likelihood Estimation (MLE) method.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb34"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>n <span class="ot"><-</span> <span class="fu">length</span>(target)</span>
|
||
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>target_norm <span class="ot"><-</span> (target <span class="sc">-</span> <span class="fu">min</span>(target)) <span class="sc">/</span> (<span class="fu">max</span>(target) <span class="sc">-</span> <span class="fu">min</span>(target))</span>
|
||
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>target_norm <span class="ot"><-</span> (target_norm <span class="sc">*</span> (n <span class="sc">-</span> <span class="dv">1</span>) <span class="sc">+</span> <span class="fl">0.5</span>) <span class="sc">/</span> n </span>
|
||
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a>fit_g_norm <span class="ot"><-</span> <span class="fu">fitdist</span>(target_norm, <span class="st">"gamma"</span>)</span>
|
||
<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a>fit_ln_norm <span class="ot"><-</span> <span class="fu">fitdist</span>(target_norm, <span class="st">"lnorm"</span>)</span>
|
||
<span id="cb34-7"><a href="#cb34-7" aria-hidden="true" tabindex="-1"></a>fit_b_norm <span class="ot"><-</span> <span class="fu">fitdist</span>(target_norm, <span class="st">"beta"</span>)</span>
|
||
<span id="cb34-8"><a href="#cb34-8" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb34-9"><a href="#cb34-9" aria-hidden="true" tabindex="-1"></a>statistique_norm <span class="ot"><-</span> <span class="fu">gofstat</span>(<span class="fu">list</span>(fit_g_norm, fit_b_norm, fit_ln_norm))</span>
|
||
<span id="cb34-10"><a href="#cb34-10" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(statistique_norm)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Goodness-of-fit statistics
|
||
1-mle-gamma 2-mle-beta 3-mle-lnorm
|
||
Kolmogorov-Smirnov statistic 0.0269811 7.736077e-02 0.0234458
|
||
Cramer-von Mises statistic 399.8647559 3.381029e+03 288.8318332
|
||
Anderson-Darling statistic 2490.5876688 Inf 2331.8231830
|
||
|
||
Goodness-of-fit criteria
|
||
1-mle-gamma 2-mle-beta 3-mle-lnorm
|
||
Akaike's Information Criterion -1423631 -1247091 -1359067
|
||
Bayesian Information Criterion -1423607 -1247066 -1359043</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>The Beta distribution yielded incoherent results. Since the Beta distribution is bounded on the interval [0,1], it struggled to capture the tail dynamics of the volatility, even after normalization. The fitting process showed poor convergence and failed to represent the data structure adequately. Consequently, we discarded the Beta distribution.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb36"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>fit_g <span class="ot"><-</span> <span class="fu">fitdist</span>(target, <span class="st">"gamma"</span>)</span>
|
||
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>fit_ln <span class="ot"><-</span> <span class="fu">fitdist</span>(target, <span class="st">"lnorm"</span>)</span>
|
||
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a>statistique <span class="ot"><-</span> <span class="fu">gofstat</span>(<span class="fu">list</span>(fit_g, fit_ln))</span>
|
||
<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(statistique)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Goodness-of-fit statistics
|
||
1-mle-gamma 2-mle-lnorm
|
||
Kolmogorov-Smirnov statistic 2.903834e-02 1.810247e-02
|
||
Cramer-von Mises statistic 4.619020e+02 1.675541e+02
|
||
Anderson-Darling statistic 2.820148e+03 1.503318e+03
|
||
|
||
Goodness-of-fit criteria
|
||
1-mle-gamma 2-mle-lnorm
|
||
Akaike's Information Criterion 13899334 13931707
|
||
Bayesian Information Criterion 13899359 13931732</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>The final comparison was made between the Gamma and Log-normal distributions. We evaluated them using both statistical information criteria (AIC and BIC) and visual inspection.</p>
|
||
<p>While the two distributions minimized different statistical criteria, the graphical analysis provided a decisive conclusion. We superimposed the theoretical density curves of both distributions onto the empirical histogram of the target.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb38"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="fu">denscomp</span>(</span>
|
||
<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">list</span>(fit_g, fit_ln),</span>
|
||
<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a> <span class="at">legendtext =</span> <span class="fu">c</span>(<span class="st">"Gamma"</span>, <span class="st">"Lognorm"</span>),</span>
|
||
<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a> <span class="at">fitcol =</span> <span class="fu">c</span>(<span class="st">"#983399"</span>, <span class="st">"#E4CBF9"</span>),</span>
|
||
<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a> <span class="at">fitlwd =</span> <span class="fu">c</span>(<span class="dv">2</span>, <span class="dv">2</span>),</span>
|
||
<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a> <span class="at">fitlty =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">1</span>)</span>
|
||
<span id="cb38-7"><a href="#cb38-7" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/approximation%20test%20of%20the%20density-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>As illustrated in figure plotted above, the {Log-normal distribution fits the empirical data almost perfectly, capturing both the peak and the fat tail of the implied volatility.</p>
|
||
<p>Based on this analysis, we conclude that <span class="math inline">\(\log(\texttt{implied\_vol\_ref})\)</span> follows a Normal distribution. This justifies the use of a log-transformation on the target variable for our Linear Models, ensuring that the normality assumption of the regression is respected.</p>
|
||
</section>
|
||
<section id="analysis-of-other-relevant-features" class="level4">
|
||
<h4 class="anchored" data-anchor-id="analysis-of-other-relevant-features">Analysis of other relevant features</h4>
|
||
<p>Beyond the target variable itself, our modeling strategy relies on three fundamental pillars: the asset’s memory (), the systemic environment (), and the market structure (). We analyze their relationship with the implied volatility below.</p>
|
||
<section id="historical-volatility-the-anchor-effect" class="level5">
|
||
<h5 class="anchored" data-anchor-id="historical-volatility-the-anchor-effect">Historical Volatility: The Anchor Effect</h5>
|
||
<p>The short-term realized volatility acts as the “memory” of the asset. Financial theory suggests a strong autocorrelation known as “volatility clustering”.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb39"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>g1 <span class="ot"><-</span> <span class="fu">ggplot</span>(</span>
|
||
<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a> train_eng,</span>
|
||
<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">x =</span> realized_vol_short, <span class="at">y =</span> <span class="fu">log</span>(implied_vol_ref))</span>
|
||
<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a>) <span class="sc">+</span></span>
|
||
<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_hex</span>(<span class="at">bins =</span> <span class="dv">70</span>) <span class="sc">+</span></span>
|
||
<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_fill_viridis_c</span>() <span class="sc">+</span></span>
|
||
<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_abline</span>(</span>
|
||
<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a> <span class="at">intercept =</span> <span class="dv">0</span>,</span>
|
||
<span id="cb39-9"><a href="#cb39-9" aria-hidden="true" tabindex="-1"></a> <span class="at">slope =</span> <span class="dv">1</span>,</span>
|
||
<span id="cb39-10"><a href="#cb39-10" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="st">"#A3D5FF"</span>,</span>
|
||
<span id="cb39-11"><a href="#cb39-11" aria-hidden="true" tabindex="-1"></a> <span class="at">linetype =</span> <span class="st">"dashed"</span></span>
|
||
<span id="cb39-12"><a href="#cb39-12" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb39-13"><a href="#cb39-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
|
||
<span id="cb39-14"><a href="#cb39-14" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Implied vs Realized Volatility"</span>,</span>
|
||
<span id="cb39-15"><a href="#cb39-15" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> <span class="st">"Realized Vol Short"</span>,</span>
|
||
<span id="cb39-16"><a href="#cb39-16" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> <span class="st">"Log(Implied Vol)"</span></span>
|
||
<span id="cb39-17"><a href="#cb39-17" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb39-18"><a href="#cb39-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_minimal</span>()</span>
|
||
<span id="cb39-19"><a href="#cb39-19" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb39-20"><a href="#cb39-20" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(g1)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/plot%201-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>This plot reveals a dense, elliptical cloud of points that indicates a robust positive linear relationship (<span class="math inline">\(Correlation \approx 0.8\)</span>). This strong correlation confirms that market pricing is heavily anchored to the asset’s recent physical behavior, meaning implied volatility is rarely disconnected from its realized counterpart. However, the dispersion observed in the graph demonstrates that implied volatility is not merely a perfect copy of the past; the vertical spread represents the “Variance Risk Premium” investors pay for future uncertainty, which is precisely the variation our model aims to capture using additional features.</p>
|
||
</section>
|
||
<section id="market-volatility-index-the-systemic-driver" class="level5">
|
||
<h5 class="anchored" data-anchor-id="market-volatility-index-the-systemic-driver">Market Volatility Index: The Systemic Driver</h5>
|
||
<p>This variable represents the “tide that lifts all boats”. We analyze how the average implied volatility of our 3,887 assets correlates with the global market stress.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb40"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>daily_stats <span class="ot"><-</span> train_eng <span class="sc">|></span></span>
|
||
<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(obs_date) <span class="sc">|></span></span>
|
||
<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
|
||
<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a> <span class="at">Avg_Implied =</span> <span class="fu">mean</span>(implied_vol_ref, <span class="at">na.rm =</span> <span class="cn">TRUE</span>),</span>
|
||
<span id="cb40-5"><a href="#cb40-5" aria-hidden="true" tabindex="-1"></a> <span class="at">Market_Index =</span> <span class="fu">mean</span>(market_vol_index, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
|
||
<span id="cb40-6"><a href="#cb40-6" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb40-7"><a href="#cb40-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb40-8"><a href="#cb40-8" aria-hidden="true" tabindex="-1"></a>g2 <span class="ot"><-</span> <span class="fu">ggplot</span>(daily_stats, <span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">as.Date</span>(obs_date))) <span class="sc">+</span></span>
|
||
<span id="cb40-9"><a href="#cb40-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">y =</span> Avg_Implied, <span class="at">color =</span> <span class="st">"Average Asset Vol"</span>), <span class="at">size =</span> <span class="fl">0.8</span>) <span class="sc">+</span></span>
|
||
<span id="cb40-10"><a href="#cb40-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(</span>
|
||
<span id="cb40-11"><a href="#cb40-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">y =</span> Market_Index, <span class="at">color =</span> <span class="st">"Market Vol Index"</span>),</span>
|
||
<span id="cb40-12"><a href="#cb40-12" aria-hidden="true" tabindex="-1"></a> <span class="at">size =</span> <span class="fl">0.8</span>,</span>
|
||
<span id="cb40-13"><a href="#cb40-13" aria-hidden="true" tabindex="-1"></a> <span class="at">linetype =</span> <span class="st">"dashed"</span></span>
|
||
<span id="cb40-14"><a href="#cb40-14" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb40-15"><a href="#cb40-15" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
|
||
<span id="cb40-16"><a href="#cb40-16" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Systemic Risk Correlation"</span>,</span>
|
||
<span id="cb40-17"><a href="#cb40-17" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> <span class="st">"Date"</span>,</span>
|
||
<span id="cb40-18"><a href="#cb40-18" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> <span class="st">"Volatility Level"</span>,</span>
|
||
<span id="cb40-19"><a href="#cb40-19" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="st">"Legend"</span></span>
|
||
<span id="cb40-20"><a href="#cb40-20" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb40-21"><a href="#cb40-21" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_color_manual</span>(</span>
|
||
<span id="cb40-22"><a href="#cb40-22" aria-hidden="true" tabindex="-1"></a> <span class="at">values =</span> <span class="fu">c</span>(<span class="st">"Average Asset Vol"</span> <span class="ot">=</span> <span class="st">"#983399"</span>, <span class="st">"Market Vol Index"</span> <span class="ot">=</span> <span class="st">"#E4CBF9"</span>)</span>
|
||
<span id="cb40-23"><a href="#cb40-23" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb40-24"><a href="#cb40-24" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_minimal</span>() <span class="sc">+</span></span>
|
||
<span id="cb40-25"><a href="#cb40-25" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">legend.position =</span> <span class="st">"bottom"</span>)</span>
|
||
<span id="cb40-26"><a href="#cb40-26" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb40-27"><a href="#cb40-27" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(g2)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/plot2-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>This second plot highlights the synchronization between idiosyncratic and systemic risk, particularly during crisis events. The massive spike in early 2020, corresponding to the COVID-19 crisis, is clearly visible on both curves; when the Market Index (represented by the light purple dashed line) jumps, the average asset volatility (the dark purple line) reacts instantly and violently. This visualisation demonstrates a strong regime dependency: in high-stress environments, correlations tend towards 1 as macro-factors dominate market behavior, whereas in calmer periods, such as late 2020, the curves flatten and diverge slightly, allowing asset-specific drivers to take precedence over systemic panic.</p>
|
||
</section>
|
||
<section id="liquidity-ratio-the-liquidity-premium" class="level5">
|
||
<h5 class="anchored" data-anchor-id="liquidity-ratio-the-liquidity-premium">Liquidity Ratio: The Liquidity Premium</h5>
|
||
<p>We hypothesized that liquidity impacts volatility pricing. We plot the average Implied Volatility for each decile of the Liquidity Ratio (Volume / Open Interest).</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb41"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>liquidity_analysis <span class="ot"><-</span> train_eng <span class="sc">|></span></span>
|
||
<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">decile =</span> <span class="fu">ntile</span>(liquidity_ratio, <span class="dv">10</span>)) <span class="sc">|></span></span>
|
||
<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(decile) <span class="sc">|></span></span>
|
||
<span id="cb41-4"><a href="#cb41-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
|
||
<span id="cb41-5"><a href="#cb41-5" aria-hidden="true" tabindex="-1"></a> <span class="at">Mean_Liquidity =</span> <span class="fu">mean</span>(liquidity_ratio, <span class="at">na.rm =</span> <span class="cn">TRUE</span>),</span>
|
||
<span id="cb41-6"><a href="#cb41-6" aria-hidden="true" tabindex="-1"></a> <span class="at">Mean_Implied =</span> <span class="fu">mean</span>(implied_vol_ref, <span class="at">na.rm =</span> <span class="cn">TRUE</span>),</span>
|
||
<span id="cb41-7"><a href="#cb41-7" aria-hidden="true" tabindex="-1"></a> <span class="at">SE_Implied =</span> <span class="fu">sd</span>(implied_vol_ref, <span class="at">na.rm =</span> <span class="cn">TRUE</span>) <span class="sc">/</span> <span class="fu">sqrt</span>(<span class="fu">n</span>())</span>
|
||
<span id="cb41-8"><a href="#cb41-8" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb41-9"><a href="#cb41-9" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb41-10"><a href="#cb41-10" aria-hidden="true" tabindex="-1"></a>g3 <span class="ot"><-</span> <span class="fu">ggplot</span>(liquidity_analysis, <span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">factor</span>(decile), <span class="at">y =</span> Mean_Implied)) <span class="sc">+</span></span>
|
||
<span id="cb41-11"><a href="#cb41-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_bar</span>(<span class="at">stat =</span> <span class="st">"identity"</span>, <span class="at">fill =</span> <span class="st">"#E4CBF9"</span>, <span class="at">alpha =</span> <span class="fl">0.8</span>) <span class="sc">+</span></span>
|
||
<span id="cb41-12"><a href="#cb41-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_errorbar</span>(</span>
|
||
<span id="cb41-13"><a href="#cb41-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(</span>
|
||
<span id="cb41-14"><a href="#cb41-14" aria-hidden="true" tabindex="-1"></a> <span class="at">ymin =</span> Mean_Implied <span class="sc">-</span> SE_Implied <span class="sc">*</span> <span class="dv">2</span>,</span>
|
||
<span id="cb41-15"><a href="#cb41-15" aria-hidden="true" tabindex="-1"></a> <span class="at">ymax =</span> Mean_Implied <span class="sc">+</span> SE_Implied <span class="sc">*</span> <span class="dv">2</span></span>
|
||
<span id="cb41-16"><a href="#cb41-16" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb41-17"><a href="#cb41-17" aria-hidden="true" tabindex="-1"></a> <span class="at">width =</span> <span class="fl">0.2</span></span>
|
||
<span id="cb41-18"><a href="#cb41-18" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb41-19"><a href="#cb41-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
|
||
<span id="cb41-20"><a href="#cb41-20" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Implied Volatility by Liquidity Decile"</span>,</span>
|
||
<span id="cb41-21"><a href="#cb41-21" aria-hidden="true" tabindex="-1"></a> <span class="at">subtitle =</span> <span class="st">"Checking for U-Shaped Relationship"</span>,</span>
|
||
<span id="cb41-22"><a href="#cb41-22" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> <span class="st">"Liquidity Ratio Decile (1=Low, 10=High)"</span>,</span>
|
||
<span id="cb41-23"><a href="#cb41-23" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> <span class="st">"Average Implied Volatility"</span></span>
|
||
<span id="cb41-24"><a href="#cb41-24" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb41-25"><a href="#cb41-25" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_minimal</span>()</span>
|
||
<span id="cb41-26"><a href="#cb41-26" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb41-27"><a href="#cb41-27" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(g3)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/graph%203-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>Contrary to the theoretical expectation of a “U-Shaped” curve, the empirical data presented in this third figure reveals a strictly decreasing relationship. Assets in the lowest liquidity deciles exhibit the highest implied volatility (<span class="math inline">\(\approx 50\%\)</span>), effectively validating the existence of a “Liquidity Premium.” This phenomenon suggests that Market Makers charge higher option prices—resulting in higher implied volatility—to compensate for the increased difficulty of hedging and the risk of price gaps characteristic of thin markets. As liquidity improves in the upper deciles, market friction decreases and spreads tighten, causing implied volatility to stabilize at a significantly lower level (<span class="math inline">\(\approx 38\%\)</span>). This distinct trend confirms that the is a crucial predictor for our model, serving as a reliable proxy for the cost of execution embedded in option prices</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="multicollinearity-dimensionality-reduction-pca" class="level3">
|
||
<h3 class="anchored" data-anchor-id="multicollinearity-dimensionality-reduction-pca">Multicollinearity & Dimensionality Reduction (PCA)</h3>
|
||
<p>Despite the initial pairwise correlation filter applied in the base recipe (removing features with a Spearman correlation above <span class="math inline">\(0.90\)</span>), residual multicollinearity inevitably persists within the financial feature space. While penalized regressions (<strong>Lasso</strong>, <strong>Ridge</strong>) are mathematically equipped to handle this redundancy through their regularization norms, unpenalized models like <strong>Ordinary Least Squares (OLS)</strong> and deep architectures like <strong>Multi-Layer Perceptrons (MLP)</strong> remain highly vulnerable.</p>
|
||
<p>To provide a mathematically optimal feature space for these specific algorithms, we implemented a dimensionality reduction phase using <strong>Principal Component Analysis (PCA)</strong>.</p>
|
||
<section id="mathematical-justification" class="level4">
|
||
<h4 class="anchored" data-anchor-id="mathematical-justification">Mathematical Justification</h4>
|
||
<ul>
|
||
<li><p><strong>1. Stabilizing Unpenalized Linear Models (OLS)</strong>: In standard OLS regression, the parameter vector is estimated via the normal equation: <span class="math inline">\(\hat{\beta} = (X^T X)^{-1} X^T Y\)</span>. If residual multicollinearity exists, the covariance matrix <span class="math inline">\(X^T X\)</span> becomes ill-conditioned (approaching singularity). This mathematical instability leads to an inflated Variance Inflation Factor (VIF), making the coefficient estimates highly erratic and hypersensitive to minor variations in the training data. PCA projects the original features into a new subspace of strictly orthogonal (zero-correlation) principal components, guaranteeing a perfectly invertible covariance matrix.</p></li>
|
||
<li><p><strong>2. Optimizing Neural Network Convergence (MLP)</strong>: For Multi-Layer Perceptrons, feeding highly correlated inputs leads to elongated, non-isotropic error surfaces. This forces the Stochastic Gradient Descent (SGD) algorithm to oscillate inefficiently, slowing down convergence and increasing the risk of trapping the network in local minima. By supplying orthogonal components, PCA ensures a symmetrical error topology, accelerating gradient convergence and stabilizing the weight updates.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="execution-and-variance-thresholding" class="level4">
|
||
<h4 class="anchored" data-anchor-id="execution-and-variance-thresholding">Execution and Variance Thresholding</h4>
|
||
<p>Because PCA seeks to maximize projected variance, it is fundamentally scale-sensitive. If applied to raw data, features with naturally large nominal values (such as unscaled volumes) would disproportionately dominate the principal components, regardless of their actual informational value. Therefore, this step is exclusively applied after the rigorous <span class="math inline">\(Z\)</span>-score standardization detailed in Section 2.4.</p>
|
||
<p>Rather than selecting an arbitrary number of components, we dynamically threshold the PCA to retain exactly <span class="math inline">\(95\%\)</span> of the cumulative explained variance. This approach acts as a secondary, mathematical feature selection: it captures the core structural signal of the market while discarding the remaining <span class="math inline">\(5\%\)</span> of variance as idiosyncratic, stochastic noise.</p>
|
||
</section>
|
||
<section id="methodological-isolation" class="level4">
|
||
<h4 class="anchored" data-anchor-id="methodological-isolation">Methodological Isolation</h4>
|
||
<p>Maintaining strict adherence to our statistical isolation protocols, the PCA projection matrix (the eigenvectors and eigenvalues) is computed exclusively using the covariance structure of the standardized training set. These fixed geometric rotations are then deterministically applied to the validation and test sets via the <code>$step_pca$</code> function, ensuring that no future variance distributions leak into the model’s structural parameters.</p>
|
||
</section>
|
||
</section>
|
||
<section id="unified-data-pipeline-implementation-tidymodels" class="level3">
|
||
<h3 class="anchored" data-anchor-id="unified-data-pipeline-implementation-tidymodels">Unified Data Pipeline Implementation (tidymodels)</h3>
|
||
<p>To ensure strict methodological rigor, guarantee reproducibility, and absolutely prevent any data leakage from the validation and test sets, all the theoretical preprocessing steps detailed in sections 2.2 through 2.6 were computationally encapsulated into a unified, sequential pipeline. Leveraging the recipes package from the tidymodels ecosystem, we constructed a multi-branch data blueprint. This architecture explicitly creates three distinct datasets, each mathematically optimized for a specific family of machine learning algorithms.</p>
|
||
<ul>
|
||
<li><p><strong>1. The Base Recipe and Tree-Based Dataset (<code>$rec_tree$</code>)</strong>: The foundation of our pipeline (<code>$rec_base$</code>) begins by assigning the target role to the log-transformed implied volatility (<code>log_implied_vol</code>). Missing values are handled via median imputation, and an aggressive multicollinearity filter (<code>$step_corr$</code> at <span class="math inline">\(0.90\)</span>) removes redundant information. This base recipe directly forms our first dataset: the Tree-Based Dataset. Decision trees (<strong>LightGBM</strong>}, <strong>XGBoost</strong>) operate via orthogonal splits and are scale-invariant. To preserve the natural financial interpretability of the predictors for post-hoc SHAP analysis, this dataset intentionally bypasses all clipping, geometric transformations, and standardization steps.</p></li>
|
||
<li><p><strong>2. The Classic Linear Dataset (<code>$rec_linear$</code>)</strong>: Distance-based and penalized algorithms (<strong>Lasso</strong>, <strong>Ridge</strong>, <strong>Elastic Net</strong>) require symmetric, standardized, and outlier-free feature spaces. Branching off from the base recipe, we create our second dataset: the Classic Linear Dataset. This branch dynamically applies our <strong>Winsorization</strong> thresholds (computed strictly on the training set) via <code>$step_mutate$</code> to neutralize extreme outliers. Subsequently, right-skewed variables undergo a logarithmic transformation, while unbounded spreads undergo a <strong>Yeo-Johnson</strong> transformation. Finally, strict <span class="math inline">\(Z\)</span>-score normalization (<code>$step_normalize$</code>) is applied to enforce mathematical equity among features prior to <span class="math inline">\(L_1/L_2\)</span> penalization.</p></li>
|
||
<li><p><strong>3. The Dimensionality Reduction Dataset (<code>$rec_acp$</code>)</strong>: Our third dataset targets algorithms highly sensitive to even minor multicollinearity and dense gradient spaces, such as <strong>Ordinary Least Squares (OLS)</strong> or <strong>Multi-Layer Perceptrons (MLP)</strong>. Derived directly from the fully scaled linear dataset, this branch incorporates a Principal Component Analysis (<code>$step_pca$</code>). The transformation is calibrated to retain exactly <span class="math inline">\(95\%\)</span> of the underlying training variance, providing a perfectly orthogonal and dimensionally reduced feature space.</p></li>
|
||
<li><p><strong>4. State Execution and Strict Data Isolation (<code>$prep()$</code> and <code>$bake()$</code>)</strong>: The theoretical guarantee against data leakage is computationally enforced during the pipeline’s execution phase. The <code>$prep()$</code> function estimates all statistical parameters (medians, limits, variances, PCA eigenvectors) strictly from train_data. The <code>$bake()$</code> function then deterministically projects these frozen transformations onto the validation and test sets, generating the three final data triads (<code>$_tree$</code>, <code>$_linear$</code>, <code>$_acp$</code>) used for modeling.</p></li>
|
||
</ul>
|
||
<p>To ensure strict methodological rigor, guarantee reproducibility, and absolutely prevent any data leakage from the validation and test sets, all the theoretical preprocessing steps detailed in sections 2.2 through 2.6 were computationally encapsulated into a unified, sequential pipeline. Leveraging the recipes package from the <span class="math inline">\(tidymodels\)</span> ecosystem, we constructed a data blueprint that strictly learns transformation parameters (such as medians, variances, and PCA eigenvectors) exclusively on the training set before applying them to unseen data.</p>
|
||
<p>This pipeline architecture addresses several critical statistical requirements for modeling financial data:</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb42"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>stats_max <span class="ot"><-</span> <span class="fu">lapply</span>(train_eng[vars_clip_max], calc_upper_limit)</span>
|
||
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a>stats_dual <span class="ot"><-</span> <span class="fu">lapply</span>(train_eng[vars_clip_dual], calc_dual_limits)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb43"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>rec_base <span class="ot"><-</span> <span class="fu">recipe</span>(implied_vol_ref <span class="sc">~</span> ., <span class="at">data =</span> train_eng) <span class="sc">|></span></span>
|
||
<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">update_role</span>(asset_id, obs_date, <span class="at">new_role =</span> <span class="st">"id"</span>) <span class="sc">|></span></span>
|
||
<span id="cb43-3"><a href="#cb43-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_impute_median</span>(<span class="fu">all_numeric_predictors</span>())</span>
|
||
<span id="cb43-4"><a href="#cb43-4" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb43-5"><a href="#cb43-5" aria-hidden="true" tabindex="-1"></a>rec_tree <span class="ot"><-</span> rec_base <span class="sc">|></span></span>
|
||
<span id="cb43-6"><a href="#cb43-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_corr</span>(<span class="fu">all_numeric_predictors</span>(), <span class="at">threshold =</span> <span class="fl">0.90</span>)</span>
|
||
<span id="cb43-7"><a href="#cb43-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb43-8"><a href="#cb43-8" aria-hidden="true" tabindex="-1"></a>rec_linear <span class="ot"><-</span> rec_base <span class="sc">|></span></span>
|
||
<span id="cb43-9"><a href="#cb43-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_mutate</span>(</span>
|
||
<span id="cb43-10"><a href="#cb43-10" aria-hidden="true" tabindex="-1"></a> <span class="at">realized_vol_short =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-11"><a href="#cb43-11" aria-hidden="true" tabindex="-1"></a> realized_vol_short,</span>
|
||
<span id="cb43-12"><a href="#cb43-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_short"</span>,</span>
|
||
<span id="cb43-13"><a href="#cb43-13" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-14"><a href="#cb43-14" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-15"><a href="#cb43-15" aria-hidden="true" tabindex="-1"></a> <span class="at">realized_vol_mid =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-16"><a href="#cb43-16" aria-hidden="true" tabindex="-1"></a> realized_vol_mid,</span>
|
||
<span id="cb43-17"><a href="#cb43-17" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_mid"</span>,</span>
|
||
<span id="cb43-18"><a href="#cb43-18" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-19"><a href="#cb43-19" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-20"><a href="#cb43-20" aria-hidden="true" tabindex="-1"></a> <span class="at">realized_vol_long =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-21"><a href="#cb43-21" aria-hidden="true" tabindex="-1"></a> realized_vol_long,</span>
|
||
<span id="cb43-22"><a href="#cb43-22" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_long"</span>,</span>
|
||
<span id="cb43-23"><a href="#cb43-23" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-24"><a href="#cb43-24" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-25"><a href="#cb43-25" aria-hidden="true" tabindex="-1"></a> <span class="at">put_volume =</span> <span class="fu">clip_max_func</span>(put_volume, <span class="st">"put_volume"</span>, stats_max),</span>
|
||
<span id="cb43-26"><a href="#cb43-26" aria-hidden="true" tabindex="-1"></a> <span class="at">call_volume =</span> <span class="fu">clip_max_func</span>(call_volume, <span class="st">"call_volume"</span>, stats_max),</span>
|
||
<span id="cb43-27"><a href="#cb43-27" aria-hidden="true" tabindex="-1"></a> <span class="at">put_oi =</span> <span class="fu">clip_max_func</span>(put_oi, <span class="st">"put_oi"</span>, stats_max),</span>
|
||
<span id="cb43-28"><a href="#cb43-28" aria-hidden="true" tabindex="-1"></a> <span class="at">call_oi =</span> <span class="fu">clip_max_func</span>(call_oi, <span class="st">"call_oi"</span>, stats_max),</span>
|
||
<span id="cb43-29"><a href="#cb43-29" aria-hidden="true" tabindex="-1"></a> <span class="at">strike_dispersion =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-30"><a href="#cb43-30" aria-hidden="true" tabindex="-1"></a> strike_dispersion,</span>
|
||
<span id="cb43-31"><a href="#cb43-31" aria-hidden="true" tabindex="-1"></a> <span class="st">"strike_dispersion"</span>,</span>
|
||
<span id="cb43-32"><a href="#cb43-32" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-33"><a href="#cb43-33" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-34"><a href="#cb43-34" aria-hidden="true" tabindex="-1"></a> <span class="at">total_contracts =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-35"><a href="#cb43-35" aria-hidden="true" tabindex="-1"></a> total_contracts,</span>
|
||
<span id="cb43-36"><a href="#cb43-36" aria-hidden="true" tabindex="-1"></a> <span class="st">"total_contracts"</span>,</span>
|
||
<span id="cb43-37"><a href="#cb43-37" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-38"><a href="#cb43-38" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-39"><a href="#cb43-39" aria-hidden="true" tabindex="-1"></a> <span class="at">pulse_ratio =</span> <span class="fu">clip_max_func</span>(pulse_ratio, <span class="st">"pulse_ratio"</span>, stats_max),</span>
|
||
<span id="cb43-40"><a href="#cb43-40" aria-hidden="true" tabindex="-1"></a> <span class="at">put_call_ratio_volume =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-41"><a href="#cb43-41" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume,</span>
|
||
<span id="cb43-42"><a href="#cb43-42" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_call_ratio_volume"</span>,</span>
|
||
<span id="cb43-43"><a href="#cb43-43" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-44"><a href="#cb43-44" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-45"><a href="#cb43-45" aria-hidden="true" tabindex="-1"></a> <span class="at">put_call_ratio_oi =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-46"><a href="#cb43-46" aria-hidden="true" tabindex="-1"></a> put_call_ratio_oi,</span>
|
||
<span id="cb43-47"><a href="#cb43-47" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_call_ratio_oi"</span>,</span>
|
||
<span id="cb43-48"><a href="#cb43-48" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-49"><a href="#cb43-49" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-50"><a href="#cb43-50" aria-hidden="true" tabindex="-1"></a> <span class="at">liquidity_ratio =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-51"><a href="#cb43-51" aria-hidden="true" tabindex="-1"></a> liquidity_ratio,</span>
|
||
<span id="cb43-52"><a href="#cb43-52" aria-hidden="true" tabindex="-1"></a> <span class="st">"liquidity_ratio"</span>,</span>
|
||
<span id="cb43-53"><a href="#cb43-53" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-54"><a href="#cb43-54" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-55"><a href="#cb43-55" aria-hidden="true" tabindex="-1"></a> <span class="at">option_dispersion =</span> <span class="fu">clip_max_func</span>(</span>
|
||
<span id="cb43-56"><a href="#cb43-56" aria-hidden="true" tabindex="-1"></a> option_dispersion,</span>
|
||
<span id="cb43-57"><a href="#cb43-57" aria-hidden="true" tabindex="-1"></a> <span class="st">"option_dispersion"</span>,</span>
|
||
<span id="cb43-58"><a href="#cb43-58" aria-hidden="true" tabindex="-1"></a> stats_max</span>
|
||
<span id="cb43-59"><a href="#cb43-59" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb43-60"><a href="#cb43-60" aria-hidden="true" tabindex="-1"></a> <span class="at">put_low_strike =</span> <span class="fu">clip_max_func</span>(put_low_strike, <span class="st">"put_low_strike"</span>, stats_max),</span>
|
||
<span id="cb43-61"><a href="#cb43-61" aria-hidden="true" tabindex="-1"></a> <span class="at">stress_spread =</span> <span class="fu">clip_dual_func</span>(stress_spread, <span class="st">"stress_spread"</span>, stats_dual)</span>
|
||
<span id="cb43-62"><a href="#cb43-62" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|></span></span>
|
||
<span id="cb43-63"><a href="#cb43-63" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_log</span>(</span>
|
||
<span id="cb43-64"><a href="#cb43-64" aria-hidden="true" tabindex="-1"></a> <span class="fu">any_of</span>(<span class="fu">c</span>(</span>
|
||
<span id="cb43-65"><a href="#cb43-65" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_short"</span>,</span>
|
||
<span id="cb43-66"><a href="#cb43-66" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_mid"</span>,</span>
|
||
<span id="cb43-67"><a href="#cb43-67" aria-hidden="true" tabindex="-1"></a> <span class="st">"realized_vol_long"</span>,</span>
|
||
<span id="cb43-68"><a href="#cb43-68" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_volume"</span>,</span>
|
||
<span id="cb43-69"><a href="#cb43-69" aria-hidden="true" tabindex="-1"></a> <span class="st">"call_volume"</span>,</span>
|
||
<span id="cb43-70"><a href="#cb43-70" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_oi"</span>,</span>
|
||
<span id="cb43-71"><a href="#cb43-71" aria-hidden="true" tabindex="-1"></a> <span class="st">"call_oi"</span>,</span>
|
||
<span id="cb43-72"><a href="#cb43-72" aria-hidden="true" tabindex="-1"></a> <span class="st">"pulse_ratio"</span>,</span>
|
||
<span id="cb43-73"><a href="#cb43-73" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_call_ratio_volume"</span>,</span>
|
||
<span id="cb43-74"><a href="#cb43-74" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_call_ratio_oi"</span>,</span>
|
||
<span id="cb43-75"><a href="#cb43-75" aria-hidden="true" tabindex="-1"></a> <span class="st">"liquidity_ratio"</span></span>
|
||
<span id="cb43-76"><a href="#cb43-76" aria-hidden="true" tabindex="-1"></a> )),</span>
|
||
<span id="cb43-77"><a href="#cb43-77" aria-hidden="true" tabindex="-1"></a> <span class="at">offset =</span> <span class="dv">1</span></span>
|
||
<span id="cb43-78"><a href="#cb43-78" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|></span></span>
|
||
<span id="cb43-79"><a href="#cb43-79" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_log</span>(</span>
|
||
<span id="cb43-80"><a href="#cb43-80" aria-hidden="true" tabindex="-1"></a> <span class="fu">any_of</span>(<span class="fu">c</span>(</span>
|
||
<span id="cb43-81"><a href="#cb43-81" aria-hidden="true" tabindex="-1"></a> <span class="st">"strike_dispersion"</span>,</span>
|
||
<span id="cb43-82"><a href="#cb43-82" aria-hidden="true" tabindex="-1"></a> <span class="st">"total_contracts"</span>,</span>
|
||
<span id="cb43-83"><a href="#cb43-83" aria-hidden="true" tabindex="-1"></a> <span class="st">"option_dispersion"</span>,</span>
|
||
<span id="cb43-84"><a href="#cb43-84" aria-hidden="true" tabindex="-1"></a> <span class="st">"put_low_strike"</span></span>
|
||
<span id="cb43-85"><a href="#cb43-85" aria-hidden="true" tabindex="-1"></a> )),</span>
|
||
<span id="cb43-86"><a href="#cb43-86" aria-hidden="true" tabindex="-1"></a> <span class="at">offset =</span> <span class="dv">0</span></span>
|
||
<span id="cb43-87"><a href="#cb43-87" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|></span></span>
|
||
<span id="cb43-88"><a href="#cb43-88" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_YeoJohnson</span>(</span>
|
||
<span id="cb43-89"><a href="#cb43-89" aria-hidden="true" tabindex="-1"></a> <span class="fu">any_of</span>(<span class="fu">c</span>(</span>
|
||
<span id="cb43-90"><a href="#cb43-90" aria-hidden="true" tabindex="-1"></a> <span class="st">"stress_spread"</span></span>
|
||
<span id="cb43-91"><a href="#cb43-91" aria-hidden="true" tabindex="-1"></a> ))</span>
|
||
<span id="cb43-92"><a href="#cb43-92" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|></span></span>
|
||
<span id="cb43-93"><a href="#cb43-93" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_normalize</span>(<span class="fu">all_numeric_predictors</span>())</span>
|
||
<span id="cb43-94"><a href="#cb43-94" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb43-95"><a href="#cb43-95" aria-hidden="true" tabindex="-1"></a>rec_pca <span class="ot"><-</span> rec_linear <span class="sc">|></span></span>
|
||
<span id="cb43-96"><a href="#cb43-96" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_pca</span>(<span class="fu">all_numeric_predictors</span>(), <span class="at">threshold =</span> <span class="fl">0.95</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<section id="state-execution-and-strict-data-isolation-prep-and-bake" class="level4">
|
||
<h4 class="anchored" data-anchor-id="state-execution-and-strict-data-isolation-prep-and-bake">State Execution and Strict Data Isolation (’prep‘ and ’bake‘)</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb44"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>prep_rec_tree <span class="ot"><-</span> <span class="fu">prep</span>(rec_tree, <span class="at">training =</span> train_eng)</span>
|
||
<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a>train_tree <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_tree, <span class="at">new_data =</span> <span class="cn">NULL</span>)</span>
|
||
<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a>val_tree <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_tree, <span class="at">new_data =</span> val_eng)</span>
|
||
<span id="cb44-5"><a href="#cb44-5" aria-hidden="true" tabindex="-1"></a>test_tree <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_tree, <span class="at">new_data =</span> test_eng)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb45"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a>prep_rec_linear <span class="ot"><-</span> <span class="fu">prep</span>(rec_linear, <span class="at">training =</span> train_eng)</span>
|
||
<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb45-3"><a href="#cb45-3" aria-hidden="true" tabindex="-1"></a>train_linear <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_linear, <span class="at">new_data =</span> <span class="cn">NULL</span>)</span>
|
||
<span id="cb45-4"><a href="#cb45-4" aria-hidden="true" tabindex="-1"></a>val_linear <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_linear, <span class="at">new_data =</span> val_eng)</span>
|
||
<span id="cb45-5"><a href="#cb45-5" aria-hidden="true" tabindex="-1"></a>test_linear <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_linear, <span class="at">new_data =</span> test_eng)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb46"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>prep_rec_pca <span class="ot"><-</span> <span class="fu">prep</span>(rec_pca, <span class="at">training =</span> train_eng)</span>
|
||
<span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb46-3"><a href="#cb46-3" aria-hidden="true" tabindex="-1"></a>train_pca <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_pca, <span class="at">new_data =</span> <span class="cn">NULL</span>)</span>
|
||
<span id="cb46-4"><a href="#cb46-4" aria-hidden="true" tabindex="-1"></a>val_pca <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_pca, <span class="at">new_data =</span> val_eng)</span>
|
||
<span id="cb46-5"><a href="#cb46-5" aria-hidden="true" tabindex="-1"></a>test_pca <span class="ot"><-</span> <span class="fu">bake</span>(prep_rec_pca, <span class="at">new_data =</span> test_eng)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="experimental-framework-optimization-strategy" class="level2">
|
||
<h2 class="anchored" data-anchor-id="experimental-framework-optimization-strategy">Experimental Framework & Optimization Strategy</h2>
|
||
<section id="evaluation-metrics-the-exponential-rmse" class="level3">
|
||
<h3 class="anchored" data-anchor-id="evaluation-metrics-the-exponential-rmse">Evaluation Metrics: The Exponential RMSE</h3>
|
||
<p>The primary objective of this study is to minimize the forecasting error of implied volatility. The standard metric for continuous predictive tasks is the <strong>Root Mean Squared Error (RMSE)</strong>, which heavily penalizes large deviations due to its quadratic loss function.</p>
|
||
<p>However, a critical methodological adjustment is required due to our preprocessing architecture. As established in Section 2.4, all models were strictly trained on the natural logarithm of the implied volatility (<span class="math inline">\(\log(Y)\)</span>) to stabilize variance and correct the right-skewness of the financial distribution. Evaluating the models on this logarithmic scale would yield artificially compressed error metrics that fail to reflect the true financial magnitude of the forecasting errors.</p>
|
||
<p>Therefore, to guarantee absolute fairness across all models (linear, tree-based, and neural networks) and to assess performance in the true operational domain, the predictions (<span class="math inline">\(\hat{y}_{\log}\)</span>) must be exponentially transformed prior to calculating the final validation and test errors.</p>
|
||
<p>The evaluation metric is strictly defined as the Exponential RMSE on the original scale:</p>
|
||
<p><span class="math display">\[
|
||
RMSE_{real} = \sqrt{\frac{1}{n} \sum_{i=1}^{n} \left( \exp(\hat{y}_{\log, i}) - Y_i \right)^2}
|
||
\]</span></p>
|
||
<p>This metric ensures that the bias-variance trade-off is evaluated exactly as it would impact a real-world financial portfolio, where absolute volatility spreads dictate pricing and risk management decisions.</p>
|
||
</section>
|
||
<section id="hyperparameter-tuning-grid-search-vs.-bayesian-optimization" class="level3">
|
||
<h3 class="anchored" data-anchor-id="hyperparameter-tuning-grid-search-vs.-bayesian-optimization">Hyperparameter Tuning: Grid Search vs. Bayesian Optimization</h3>
|
||
<p>Financial machine learning models, whether penalized regressions like <strong>Elastic Net</strong> or complex ensembles like <strong>LightGBM</strong>, rely heavily on hyperparameter configurations. To discover the optimal parameter set, we must navigate the computational trade-off between exhaustive search and hardware constraints.</p>
|
||
<ul>
|
||
<li><p><strong>1. The Limits of Grid Search</strong>: Traditional <strong>Grid Search</strong> operates by defining a discrete matrix of hyperparameter combinations and evaluating the objective function (validation <strong>RMSE</strong>) for every single point. While mathematically exhaustive, this approach scales exponentially with the number of dimensions (the curse of dimensionality). For high-capacity models requiring the simultaneous optimization of learning rates, depths, structural penalties, and bagging fractions, Grid Search becomes computationally intractable, resulting in an immense waste of <strong>CPU</strong> cycles evaluating areas of the hyperparameter space that yield poor performance.</p></li>
|
||
<li><p><strong>2. The Probabilistic Superiority of Bayesian Optimization</strong>: To resolve this computational bottleneck, we discarded <strong>Grid Search</strong> in favor of <strong>Bayesian Optimization using Gaussian Processes (GP)</strong>. Unlike naive grid or random searches, <strong>Bayesian Optimization</strong> treats the hyperparameter tuning process as a probabilistic regression problem.</p></li>
|
||
</ul>
|
||
<p>The algorithm builds a surrogate probability model of the objective function (the validation <strong>RMSE</strong>) based on past evaluations. At each iteration, it uses an acquisition function (typically Expected Improvement) to determine the next set of hyperparameters to evaluate. This acquisition function mathematically balances two competing objectives: - <strong>Exploration</strong>: Sampling regions of the hyperparameter space with high uncertainty. - <strong>Exploitation</strong>: Sampling regions where the surrogate model predicts a very low RMSE.</p>
|
||
<p>By learning from previous iterations, <strong>Bayesian Optimization</strong> converges toward the global minimum significantly faster and with fewer total evaluations than exhaustive methods. This probabilistic efficiency was crucial for tuning our tree-based ensembles (Section 5) on dense tabular data without exceeding the processing limits of our local computational infrastructure. All hyperparameter tuning loops in this study were executed using this Bayesian framework, maximizing the negative <strong>Exponential RMSE</strong> to pinpoint the optimal architectural configurations.</p>
|
||
</section>
|
||
</section>
|
||
<section id="linear-interpretable-models" class="level2">
|
||
<h2 class="anchored" data-anchor-id="linear-interpretable-models">Linear & Interpretable Models</h2>
|
||
<section id="linear-regressions-on-the-regular-dataset" class="level3">
|
||
<h3 class="anchored" data-anchor-id="linear-regressions-on-the-regular-dataset">Linear regressions on the regular dataset</h3>
|
||
<section id="baseline-linear-regression" class="level4">
|
||
<h4 class="anchored" data-anchor-id="baseline-linear-regression">Baseline Linear Regression</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb47"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a>train_linear_lm <span class="ot"><-</span> train_linear <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>asset_id, <span class="sc">-</span>obs_date)</span>
|
||
<span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a>train_linear_lm<span class="sc">$</span>implied_vol_ref <span class="ot"><-</span> <span class="fu">log</span>(train_linear_lm<span class="sc">$</span>implied_vol_ref)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb48"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a>val_linear_lm <span class="ot"><-</span> val_linear <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>asset_id, <span class="sc">-</span>obs_date)</span>
|
||
<span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a>val_linear_lm<span class="sc">$</span>implied_vol_ref <span class="ot"><-</span> <span class="fu">log</span>(val_linear_lm<span class="sc">$</span>implied_vol_ref)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
<section id="linear-hypotheses" class="level4">
|
||
<h4 class="anchored" data-anchor-id="linear-hypotheses">Linear Hypotheses</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb49"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a>mod1 <span class="ot"><-</span> <span class="fu">lm</span>(implied_vol_ref <span class="sc">~</span> ., <span class="at">data =</span> train_linear_lm)</span>
|
||
<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod1)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>After the logarithmic transformation, we use a basic linear model as a benchmark. We validate this hypothesis with an autoplot.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb50"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="fu">autoplot</span>(mod1)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>While the volume of residual points makes for difficult interpretation, the blue lines visually confirm our P1-P4 hypotheses. For the Q-Q plot, we “validate” our assumption given that the larger middle part of the plot follows the x=y axis. Only the ends drop, respectively rise. This can be explained by outliers within the extremities of the dataset. We now look at the evolution of our AIC with a step-by-step method.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb51"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a><span class="fu">stepAIC</span>(mod1, <span class="sc">~</span>., <span class="at">trace =</span> T, <span class="at">direction =</span> <span class="fu">c</span>(<span class="st">"forward"</span>))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb52"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a><span class="fu">stepAIC</span>(mod1, <span class="sc">~</span>., <span class="at">trace =</span> T, <span class="at">direction =</span> <span class="fu">c</span>(<span class="st">"backward"</span>))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb53"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>Y_val <span class="ot"><-</span> val_linear_lm<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb53-2"><a href="#cb53-2" aria-hidden="true" tabindex="-1"></a>Y_hat1 <span class="ot"><-</span> <span class="fu">predict</span>(mod1, <span class="at">newdata =</span> val_linear_lm, <span class="at">type =</span> <span class="st">"response"</span>)</span>
|
||
<span id="cb53-3"><a href="#cb53-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb53-4"><a href="#cb53-4" aria-hidden="true" tabindex="-1"></a>MSS_1 <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(Y_val) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat1))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb53-5"><a href="#cb53-5" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb53-6"><a href="#cb53-6" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_1)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>We first look at a general model with all features but without any interactions. The objective of this exercise was to identify the variables with little impact on the model’s predictive capability, in order to eliminate them within our future models and kickstart the variable selection. However, only one variable presented a result of a fisher test with a score above 0.05 - this variable being <span class="math inline">\(call\_volume\)</span>. While testing with a step-by-step method, both going forward and backward, the complete model was judged as the best. However, in order to introduce interactions without stretching the machine’s limits, we decided to eliminate both <span class="math inline">\(realized\_vol\_long\)</span> and <span class="math inline">\(call\_volume\)</span>, <span class="math inline">\(realized\_vol\_long\)</span> being the only other variable with a t-score higher than <span class="math inline">\(10^{-3}\)</span>. The RMSE of the general model was 12.01.</p>
|
||
</section>
|
||
<section id="first-particular-model" class="level4">
|
||
<h4 class="anchored" data-anchor-id="first-particular-model">First particular model</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb54"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>mod2 <span class="ot"><-</span> <span class="fu">lm</span>(</span>
|
||
<span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a> implied_vol_ref <span class="sc">~</span> realized_vol_short <span class="sc">*</span></span>
|
||
<span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a> realized_vol_mid <span class="sc">*</span></span>
|
||
<span id="cb54-4"><a href="#cb54-4" aria-hidden="true" tabindex="-1"></a> strike_dispersion <span class="sc">*</span></span>
|
||
<span id="cb54-5"><a href="#cb54-5" aria-hidden="true" tabindex="-1"></a> (put_volume <span class="sc">+</span></span>
|
||
<span id="cb54-6"><a href="#cb54-6" aria-hidden="true" tabindex="-1"></a> call_oi <span class="sc">+</span></span>
|
||
<span id="cb54-7"><a href="#cb54-7" aria-hidden="true" tabindex="-1"></a> put_oi <span class="sc">+</span></span>
|
||
<span id="cb54-8"><a href="#cb54-8" aria-hidden="true" tabindex="-1"></a> maturity_count <span class="sc">+</span></span>
|
||
<span id="cb54-9"><a href="#cb54-9" aria-hidden="true" tabindex="-1"></a> total_contracts <span class="sc">+</span></span>
|
||
<span id="cb54-10"><a href="#cb54-10" aria-hidden="true" tabindex="-1"></a> market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb54-11"><a href="#cb54-11" aria-hidden="true" tabindex="-1"></a> pulse_ratio <span class="sc">+</span></span>
|
||
<span id="cb54-12"><a href="#cb54-12" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume <span class="sc">+</span></span>
|
||
<span id="cb54-13"><a href="#cb54-13" aria-hidden="true" tabindex="-1"></a> put_call_ratio_oi <span class="sc">+</span></span>
|
||
<span id="cb54-14"><a href="#cb54-14" aria-hidden="true" tabindex="-1"></a> liquidity_ratio <span class="sc">+</span></span>
|
||
<span id="cb54-15"><a href="#cb54-15" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">+</span></span>
|
||
<span id="cb54-16"><a href="#cb54-16" aria-hidden="true" tabindex="-1"></a> put_low_strike <span class="sc">+</span></span>
|
||
<span id="cb54-17"><a href="#cb54-17" aria-hidden="true" tabindex="-1"></a> put_proportion <span class="sc">+</span></span>
|
||
<span id="cb54-18"><a href="#cb54-18" aria-hidden="true" tabindex="-1"></a> stress_spread),</span>
|
||
<span id="cb54-19"><a href="#cb54-19" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_linear_lm</span>
|
||
<span id="cb54-20"><a href="#cb54-20" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb54-21"><a href="#cb54-21" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod2)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb55"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a>Y_hat2 <span class="ot"><-</span> <span class="fu">predict</span>(mod2, <span class="at">newdata =</span> val_linear_lm, <span class="at">type =</span> <span class="st">"response"</span>)</span>
|
||
<span id="cb55-2"><a href="#cb55-2" aria-hidden="true" tabindex="-1"></a>Y_val <span class="ot"><-</span> val_linear_lm<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb55-3"><a href="#cb55-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb55-4"><a href="#cb55-4" aria-hidden="true" tabindex="-1"></a>MSS_2 <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(Y_val) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat2))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb55-5"><a href="#cb55-5" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb55-6"><a href="#cb55-6" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"the RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_2)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>Given that all remaining variables showed a high relevance for the modeling situation, finding the right interactions was based of off intuition as well as trial and error. We remove the aforementioned two variables, and add the interactions between <span class="math inline">\(realized\_vol\_short\)</span>, <span class="math inline">\(realized\_vol\_mid\)</span>, <span class="math inline">\(strike\_dispersion\)</span> and all other variables. This seems intuitive as the realised volatility can be a straightforward indicator for the implied volatility. Measuring its interactions with the other features shows how these features impact volatility. The generalisation from realised to implied volatility is simpler than from the other variables to implied volatility. <span class="math inline">\(strike\_dispersion\)</span> is selected as a first representative among all option-related features. This model drastically improves upon the general, interaction free model, with an RMSE of 11.32.</p>
|
||
</section>
|
||
<section id="second-particular-model" class="level4">
|
||
<h4 class="anchored" data-anchor-id="second-particular-model">Second particular model</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb56"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>mod3 <span class="ot"><-</span> <span class="fu">lm</span>(</span>
|
||
<span id="cb56-2"><a href="#cb56-2" aria-hidden="true" tabindex="-1"></a> implied_vol_ref <span class="sc">~</span> realized_vol_mid <span class="sc">*</span></span>
|
||
<span id="cb56-3"><a href="#cb56-3" aria-hidden="true" tabindex="-1"></a> strike_dispersion <span class="sc">*</span></span>
|
||
<span id="cb56-4"><a href="#cb56-4" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">*</span></span>
|
||
<span id="cb56-5"><a href="#cb56-5" aria-hidden="true" tabindex="-1"></a> (market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb56-6"><a href="#cb56-6" aria-hidden="true" tabindex="-1"></a> liquidity_ratio <span class="sc">+</span></span>
|
||
<span id="cb56-7"><a href="#cb56-7" aria-hidden="true" tabindex="-1"></a> realized_vol_short <span class="sc">+</span></span>
|
||
<span id="cb56-8"><a href="#cb56-8" aria-hidden="true" tabindex="-1"></a> put_volume <span class="sc">+</span></span>
|
||
<span id="cb56-9"><a href="#cb56-9" aria-hidden="true" tabindex="-1"></a> call_oi <span class="sc">+</span></span>
|
||
<span id="cb56-10"><a href="#cb56-10" aria-hidden="true" tabindex="-1"></a> put_oi <span class="sc">+</span></span>
|
||
<span id="cb56-11"><a href="#cb56-11" aria-hidden="true" tabindex="-1"></a> maturity_count <span class="sc">+</span></span>
|
||
<span id="cb56-12"><a href="#cb56-12" aria-hidden="true" tabindex="-1"></a> total_contracts <span class="sc">+</span></span>
|
||
<span id="cb56-13"><a href="#cb56-13" aria-hidden="true" tabindex="-1"></a> pulse_ratio <span class="sc">+</span></span>
|
||
<span id="cb56-14"><a href="#cb56-14" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume <span class="sc">+</span></span>
|
||
<span id="cb56-15"><a href="#cb56-15" aria-hidden="true" tabindex="-1"></a> put_call_ratio_oi <span class="sc">+</span></span>
|
||
<span id="cb56-16"><a href="#cb56-16" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">+</span></span>
|
||
<span id="cb56-17"><a href="#cb56-17" aria-hidden="true" tabindex="-1"></a> put_low_strike <span class="sc">+</span></span>
|
||
<span id="cb56-18"><a href="#cb56-18" aria-hidden="true" tabindex="-1"></a> put_proportion <span class="sc">+</span></span>
|
||
<span id="cb56-19"><a href="#cb56-19" aria-hidden="true" tabindex="-1"></a> stress_spread),</span>
|
||
<span id="cb56-20"><a href="#cb56-20" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_linear_lm</span>
|
||
<span id="cb56-21"><a href="#cb56-21" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb56-22"><a href="#cb56-22" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod3)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb57"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>Y_hat3 <span class="ot"><-</span> <span class="fu">predict</span>(mod3, <span class="at">newdata =</span> val_linear_lm, <span class="at">type =</span> <span class="st">"response"</span>)</span>
|
||
<span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a>Y_val <span class="ot"><-</span> val_linear_lm<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb57-4"><a href="#cb57-4" aria-hidden="true" tabindex="-1"></a>MSS_3 <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(Y_val) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat3))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb57-5"><a href="#cb57-5" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb57-6"><a href="#cb57-6" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_3)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>The following thought occurred to us: could just having a single realised volatility among the features interacting with all others suffice? Because of the size of the dataset, we couldn’t add infinite interactions, thus making the variable selection process an interaction selection process. We tested a model with the interactions between <span class="math inline">\(realized\_vol\_mid\)</span>, <span class="math inline">\(strike\_dispersion\)</span>, <span class="math inline">\(option\_dispersion\)</span> and all other variables. This model improved upon the general model, but was less precise than the previous personalised model, with an RMSE of 11.42.</p>
|
||
</section>
|
||
<section id="third-particular-model" class="level4">
|
||
<h4 class="anchored" data-anchor-id="third-particular-model">Third particular model</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb58"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a>mod4 <span class="ot"><-</span> <span class="fu">lm</span>(</span>
|
||
<span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a> implied_vol_ref <span class="sc">~</span> market_vol_index <span class="sc">*</span></span>
|
||
<span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a> realized_vol_mid <span class="sc">*</span></span>
|
||
<span id="cb58-4"><a href="#cb58-4" aria-hidden="true" tabindex="-1"></a> strike_dispersion <span class="sc">*</span></span>
|
||
<span id="cb58-5"><a href="#cb58-5" aria-hidden="true" tabindex="-1"></a> (realized_vol_short <span class="sc">+</span></span>
|
||
<span id="cb58-6"><a href="#cb58-6" aria-hidden="true" tabindex="-1"></a> put_volume <span class="sc">+</span></span>
|
||
<span id="cb58-7"><a href="#cb58-7" aria-hidden="true" tabindex="-1"></a> call_oi <span class="sc">+</span></span>
|
||
<span id="cb58-8"><a href="#cb58-8" aria-hidden="true" tabindex="-1"></a> put_oi <span class="sc">+</span></span>
|
||
<span id="cb58-9"><a href="#cb58-9" aria-hidden="true" tabindex="-1"></a> maturity_count <span class="sc">+</span></span>
|
||
<span id="cb58-10"><a href="#cb58-10" aria-hidden="true" tabindex="-1"></a> total_contracts <span class="sc">+</span></span>
|
||
<span id="cb58-11"><a href="#cb58-11" aria-hidden="true" tabindex="-1"></a> pulse_ratio <span class="sc">+</span></span>
|
||
<span id="cb58-12"><a href="#cb58-12" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume <span class="sc">+</span></span>
|
||
<span id="cb58-13"><a href="#cb58-13" aria-hidden="true" tabindex="-1"></a> put_call_ratio_oi <span class="sc">+</span></span>
|
||
<span id="cb58-14"><a href="#cb58-14" aria-hidden="true" tabindex="-1"></a> liquidity_ratio <span class="sc">+</span></span>
|
||
<span id="cb58-15"><a href="#cb58-15" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">+</span></span>
|
||
<span id="cb58-16"><a href="#cb58-16" aria-hidden="true" tabindex="-1"></a> put_low_strike <span class="sc">+</span></span>
|
||
<span id="cb58-17"><a href="#cb58-17" aria-hidden="true" tabindex="-1"></a> put_proportion <span class="sc">+</span></span>
|
||
<span id="cb58-18"><a href="#cb58-18" aria-hidden="true" tabindex="-1"></a> stress_spread),</span>
|
||
<span id="cb58-19"><a href="#cb58-19" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_linear_lm</span>
|
||
<span id="cb58-20"><a href="#cb58-20" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb58-21"><a href="#cb58-21" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod4)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>
|
||
Call:
|
||
lm(formula = implied_vol_ref ~ market_vol_index * realized_vol_mid *
|
||
strike_dispersion * (realized_vol_short + put_volume + call_oi +
|
||
put_oi + maturity_count + total_contracts + pulse_ratio +
|
||
put_call_ratio_volume + put_call_ratio_oi + liquidity_ratio +
|
||
option_dispersion + put_low_strike + put_proportion + stress_spread),
|
||
data = train_linear_lm)
|
||
|
||
Residuals:
|
||
Min 1Q Median 3Q Max
|
||
-4.5906 -0.1230 -0.0010 0.1256 3.7556
|
||
|
||
Coefficients:
|
||
Estimate
|
||
(Intercept) 3.6599088
|
||
market_vol_index -0.0126850
|
||
realized_vol_mid -0.0045858
|
||
strike_dispersion 0.2916720
|
||
realized_vol_short 0.5922568
|
||
put_volume 0.0470339
|
||
call_oi -0.0022644
|
||
put_oi -0.0668333
|
||
maturity_count 0.0033262
|
||
total_contracts -0.1122176
|
||
pulse_ratio -0.2229992
|
||
put_call_ratio_volume 0.0096309
|
||
put_call_ratio_oi 0.0139323
|
||
liquidity_ratio -0.0165726
|
||
option_dispersion -0.3294306
|
||
put_low_strike -0.0020143
|
||
put_proportion -0.0216820
|
||
stress_spread -0.0770662
|
||
market_vol_index:realized_vol_mid -0.0517138
|
||
market_vol_index:strike_dispersion 0.0242089
|
||
realized_vol_mid:strike_dispersion 0.0117204
|
||
market_vol_index:realized_vol_short 0.0071117
|
||
market_vol_index:put_volume -0.0036265
|
||
market_vol_index:call_oi -0.0036837
|
||
market_vol_index:put_oi 0.0124329
|
||
market_vol_index:maturity_count -0.0035557
|
||
market_vol_index:total_contracts -0.0255472
|
||
market_vol_index:pulse_ratio 0.0341983
|
||
market_vol_index:put_call_ratio_volume -0.0038803
|
||
market_vol_index:put_call_ratio_oi -0.0006246
|
||
market_vol_index:liquidity_ratio 0.0063348
|
||
market_vol_index:option_dispersion -0.0564068
|
||
market_vol_index:put_low_strike 0.0173188
|
||
market_vol_index:put_proportion 0.0023782
|
||
market_vol_index:stress_spread 0.0003182
|
||
realized_vol_mid:realized_vol_short 0.0492174
|
||
realized_vol_mid:put_volume 0.0151216
|
||
realized_vol_mid:call_oi 0.0377888
|
||
realized_vol_mid:put_oi -0.0386033
|
||
realized_vol_mid:maturity_count -0.0266407
|
||
realized_vol_mid:total_contracts 0.0952750
|
||
realized_vol_mid:pulse_ratio 0.0067576
|
||
realized_vol_mid:put_call_ratio_volume 0.0111785
|
||
realized_vol_mid:put_call_ratio_oi -0.0017824
|
||
realized_vol_mid:liquidity_ratio -0.0104341
|
||
realized_vol_mid:option_dispersion 0.0861263
|
||
realized_vol_mid:put_low_strike -0.0378495
|
||
realized_vol_mid:put_proportion -0.0078994
|
||
realized_vol_mid:stress_spread -0.0597938
|
||
strike_dispersion:realized_vol_short 0.0335257
|
||
strike_dispersion:put_volume 0.0053345
|
||
strike_dispersion:call_oi -0.0007664
|
||
strike_dispersion:put_oi 0.0163583
|
||
strike_dispersion:maturity_count 0.0281394
|
||
strike_dispersion:total_contracts -0.0707158
|
||
strike_dispersion:pulse_ratio -0.0059344
|
||
strike_dispersion:put_call_ratio_volume -0.0012166
|
||
strike_dispersion:put_call_ratio_oi 0.0024963
|
||
strike_dispersion:liquidity_ratio 0.0057028
|
||
strike_dispersion:option_dispersion -0.0302037
|
||
strike_dispersion:put_low_strike 0.0155818
|
||
strike_dispersion:put_proportion -0.0034315
|
||
strike_dispersion:stress_spread -0.0266549
|
||
market_vol_index:realized_vol_mid:strike_dispersion 0.0001718
|
||
market_vol_index:realized_vol_mid:realized_vol_short 0.0073971
|
||
market_vol_index:realized_vol_mid:put_volume -0.0061054
|
||
market_vol_index:realized_vol_mid:call_oi -0.0065984
|
||
market_vol_index:realized_vol_mid:put_oi 0.0231759
|
||
market_vol_index:realized_vol_mid:maturity_count 0.0062026
|
||
market_vol_index:realized_vol_mid:total_contracts -0.0072373
|
||
market_vol_index:realized_vol_mid:pulse_ratio 0.0024777
|
||
market_vol_index:realized_vol_mid:put_call_ratio_volume -0.0005433
|
||
market_vol_index:realized_vol_mid:put_call_ratio_oi 0.0015789
|
||
market_vol_index:realized_vol_mid:liquidity_ratio -0.0015917
|
||
market_vol_index:realized_vol_mid:option_dispersion -0.0026361
|
||
market_vol_index:realized_vol_mid:put_low_strike 0.0121182
|
||
market_vol_index:realized_vol_mid:put_proportion 0.0030698
|
||
market_vol_index:realized_vol_mid:stress_spread 0.0062935
|
||
market_vol_index:strike_dispersion:realized_vol_short -0.0068279
|
||
market_vol_index:strike_dispersion:put_volume -0.0027751
|
||
market_vol_index:strike_dispersion:call_oi -0.0041980
|
||
market_vol_index:strike_dispersion:put_oi -0.0042709
|
||
market_vol_index:strike_dispersion:maturity_count -0.0066489
|
||
market_vol_index:strike_dispersion:total_contracts 0.0051983
|
||
market_vol_index:strike_dispersion:pulse_ratio 0.0010537
|
||
market_vol_index:strike_dispersion:put_call_ratio_volume 0.0005396
|
||
market_vol_index:strike_dispersion:put_call_ratio_oi -0.0012008
|
||
market_vol_index:strike_dispersion:liquidity_ratio -0.0034812
|
||
market_vol_index:strike_dispersion:option_dispersion -0.0046580
|
||
market_vol_index:strike_dispersion:put_low_strike -0.0073667
|
||
market_vol_index:strike_dispersion:put_proportion -0.0017369
|
||
market_vol_index:strike_dispersion:stress_spread -0.0002573
|
||
realized_vol_mid:strike_dispersion:realized_vol_short 0.0109381
|
||
realized_vol_mid:strike_dispersion:put_volume -0.0066819
|
||
realized_vol_mid:strike_dispersion:call_oi 0.0128811
|
||
realized_vol_mid:strike_dispersion:put_oi 0.0110468
|
||
realized_vol_mid:strike_dispersion:maturity_count 0.0059875
|
||
realized_vol_mid:strike_dispersion:total_contracts -0.0105222
|
||
realized_vol_mid:strike_dispersion:pulse_ratio 0.0086745
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_volume 0.0016179
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_oi -0.0021449
|
||
realized_vol_mid:strike_dispersion:liquidity_ratio 0.0007098
|
||
realized_vol_mid:strike_dispersion:option_dispersion 0.0129298
|
||
realized_vol_mid:strike_dispersion:put_low_strike 0.0026190
|
||
realized_vol_mid:strike_dispersion:put_proportion 0.0059383
|
||
realized_vol_mid:strike_dispersion:stress_spread -0.0045061
|
||
market_vol_index:realized_vol_mid:strike_dispersion:realized_vol_short 0.0106188
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_volume 0.0029074
|
||
market_vol_index:realized_vol_mid:strike_dispersion:call_oi 0.0003358
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_oi -0.0013165
|
||
market_vol_index:realized_vol_mid:strike_dispersion:maturity_count 0.0055705
|
||
market_vol_index:realized_vol_mid:strike_dispersion:total_contracts -0.0261061
|
||
market_vol_index:realized_vol_mid:strike_dispersion:pulse_ratio -0.0091059
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_volume -0.0005469
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_oi 0.0007077
|
||
market_vol_index:realized_vol_mid:strike_dispersion:liquidity_ratio -0.0014683
|
||
market_vol_index:realized_vol_mid:strike_dispersion:option_dispersion -0.0241698
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_low_strike 0.0018223
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_proportion 0.0009296
|
||
market_vol_index:realized_vol_mid:strike_dispersion:stress_spread 0.0012156
|
||
Std. Error
|
||
(Intercept) 0.0004425
|
||
market_vol_index 0.0009575
|
||
realized_vol_mid 0.0007645
|
||
strike_dispersion 0.0056724
|
||
realized_vol_short 0.0017430
|
||
put_volume 0.0010158
|
||
call_oi 0.0009061
|
||
put_oi 0.0011265
|
||
maturity_count 0.0004960
|
||
total_contracts 0.0053052
|
||
pulse_ratio 0.0004931
|
||
put_call_ratio_volume 0.0006479
|
||
put_call_ratio_oi 0.0006530
|
||
liquidity_ratio 0.0004054
|
||
option_dispersion 0.0100279
|
||
put_low_strike 0.0006396
|
||
put_proportion 0.0005094
|
||
stress_spread 0.0017457
|
||
market_vol_index:realized_vol_mid 0.0010083
|
||
market_vol_index:strike_dispersion 0.0061632
|
||
realized_vol_mid:strike_dispersion 0.0051933
|
||
market_vol_index:realized_vol_short 0.0011768
|
||
market_vol_index:put_volume 0.0010721
|
||
market_vol_index:call_oi 0.0007694
|
||
market_vol_index:put_oi 0.0012196
|
||
market_vol_index:maturity_count 0.0005702
|
||
market_vol_index:total_contracts 0.0056958
|
||
market_vol_index:pulse_ratio 0.0004737
|
||
market_vol_index:put_call_ratio_volume 0.0004600
|
||
market_vol_index:put_call_ratio_oi 0.0004606
|
||
market_vol_index:liquidity_ratio 0.0003864
|
||
market_vol_index:option_dispersion 0.0107983
|
||
market_vol_index:put_low_strike 0.0006681
|
||
market_vol_index:put_proportion 0.0004760
|
||
market_vol_index:stress_spread 0.0003809
|
||
realized_vol_mid:realized_vol_short 0.0006906
|
||
realized_vol_mid:put_volume 0.0009527
|
||
realized_vol_mid:call_oi 0.0007889
|
||
realized_vol_mid:put_oi 0.0010691
|
||
realized_vol_mid:maturity_count 0.0004938
|
||
realized_vol_mid:total_contracts 0.0048759
|
||
realized_vol_mid:pulse_ratio 0.0003325
|
||
realized_vol_mid:put_call_ratio_volume 0.0004510
|
||
realized_vol_mid:put_call_ratio_oi 0.0004545
|
||
realized_vol_mid:liquidity_ratio 0.0003850
|
||
realized_vol_mid:option_dispersion 0.0091457
|
||
realized_vol_mid:put_low_strike 0.0006215
|
||
realized_vol_mid:put_proportion 0.0004742
|
||
realized_vol_mid:stress_spread 0.0006742
|
||
strike_dispersion:realized_vol_short 0.0016526
|
||
strike_dispersion:put_volume 0.0009816
|
||
strike_dispersion:call_oi 0.0008453
|
||
strike_dispersion:put_oi 0.0010904
|
||
strike_dispersion:maturity_count 0.0004287
|
||
strike_dispersion:total_contracts 0.0011110
|
||
strike_dispersion:pulse_ratio 0.0004919
|
||
strike_dispersion:put_call_ratio_volume 0.0006163
|
||
strike_dispersion:put_call_ratio_oi 0.0006110
|
||
strike_dispersion:liquidity_ratio 0.0003899
|
||
strike_dispersion:option_dispersion 0.0009638
|
||
strike_dispersion:put_low_strike 0.0005679
|
||
strike_dispersion:put_proportion 0.0005029
|
||
strike_dispersion:stress_spread 0.0016014
|
||
market_vol_index:realized_vol_mid:strike_dispersion 0.0054271
|
||
market_vol_index:realized_vol_mid:realized_vol_short 0.0004767
|
||
market_vol_index:realized_vol_mid:put_volume 0.0009875
|
||
market_vol_index:realized_vol_mid:call_oi 0.0006936
|
||
market_vol_index:realized_vol_mid:put_oi 0.0011018
|
||
market_vol_index:realized_vol_mid:maturity_count 0.0004888
|
||
market_vol_index:realized_vol_mid:total_contracts 0.0050053
|
||
market_vol_index:realized_vol_mid:pulse_ratio 0.0003078
|
||
market_vol_index:realized_vol_mid:put_call_ratio_volume 0.0004338
|
||
market_vol_index:realized_vol_mid:put_call_ratio_oi 0.0004330
|
||
market_vol_index:realized_vol_mid:liquidity_ratio 0.0003596
|
||
market_vol_index:realized_vol_mid:option_dispersion 0.0094431
|
||
market_vol_index:realized_vol_mid:put_low_strike 0.0005928
|
||
market_vol_index:realized_vol_mid:put_proportion 0.0004475
|
||
market_vol_index:realized_vol_mid:stress_spread 0.0003147
|
||
market_vol_index:strike_dispersion:realized_vol_short 0.0012178
|
||
market_vol_index:strike_dispersion:put_volume 0.0010071
|
||
market_vol_index:strike_dispersion:call_oi 0.0007007
|
||
market_vol_index:strike_dispersion:put_oi 0.0011734
|
||
market_vol_index:strike_dispersion:maturity_count 0.0004192
|
||
market_vol_index:strike_dispersion:total_contracts 0.0010577
|
||
market_vol_index:strike_dispersion:pulse_ratio 0.0004755
|
||
market_vol_index:strike_dispersion:put_call_ratio_volume 0.0004035
|
||
market_vol_index:strike_dispersion:put_call_ratio_oi 0.0003982
|
||
market_vol_index:strike_dispersion:liquidity_ratio 0.0003553
|
||
market_vol_index:strike_dispersion:option_dispersion 0.0009145
|
||
market_vol_index:strike_dispersion:put_low_strike 0.0005954
|
||
market_vol_index:strike_dispersion:put_proportion 0.0004490
|
||
market_vol_index:strike_dispersion:stress_spread 0.0003695
|
||
realized_vol_mid:strike_dispersion:realized_vol_short 0.0005537
|
||
realized_vol_mid:strike_dispersion:put_volume 0.0007668
|
||
realized_vol_mid:strike_dispersion:call_oi 0.0006374
|
||
realized_vol_mid:strike_dispersion:put_oi 0.0008762
|
||
realized_vol_mid:strike_dispersion:maturity_count 0.0003502
|
||
realized_vol_mid:strike_dispersion:total_contracts 0.0008224
|
||
realized_vol_mid:strike_dispersion:pulse_ratio 0.0002809
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_volume 0.0003734
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_oi 0.0003838
|
||
realized_vol_mid:strike_dispersion:liquidity_ratio 0.0003100
|
||
realized_vol_mid:strike_dispersion:option_dispersion 0.0007656
|
||
realized_vol_mid:strike_dispersion:put_low_strike 0.0004706
|
||
realized_vol_mid:strike_dispersion:put_proportion 0.0003858
|
||
realized_vol_mid:strike_dispersion:stress_spread 0.0005582
|
||
market_vol_index:realized_vol_mid:strike_dispersion:realized_vol_short 0.0003844
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_volume 0.0008397
|
||
market_vol_index:realized_vol_mid:strike_dispersion:call_oi 0.0006260
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_oi 0.0009684
|
||
market_vol_index:realized_vol_mid:strike_dispersion:maturity_count 0.0003514
|
||
market_vol_index:realized_vol_mid:strike_dispersion:total_contracts 0.0008171
|
||
market_vol_index:realized_vol_mid:strike_dispersion:pulse_ratio 0.0002558
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_volume 0.0003748
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_oi 0.0003735
|
||
market_vol_index:realized_vol_mid:strike_dispersion:liquidity_ratio 0.0003115
|
||
market_vol_index:realized_vol_mid:strike_dispersion:option_dispersion 0.0007605
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_low_strike 0.0004944
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_proportion 0.0003994
|
||
market_vol_index:realized_vol_mid:strike_dispersion:stress_spread 0.0002648
|
||
t value
|
||
(Intercept) 8271.085
|
||
market_vol_index -13.248
|
||
realized_vol_mid -5.999
|
||
strike_dispersion 51.420
|
||
realized_vol_short 339.795
|
||
put_volume 46.301
|
||
call_oi -2.499
|
||
put_oi -59.330
|
||
maturity_count 6.705
|
||
total_contracts -21.152
|
||
pulse_ratio -452.276
|
||
put_call_ratio_volume 14.865
|
||
put_call_ratio_oi 21.335
|
||
liquidity_ratio -40.885
|
||
option_dispersion -32.851
|
||
put_low_strike -3.149
|
||
put_proportion -42.564
|
||
stress_spread -44.145
|
||
market_vol_index:realized_vol_mid -51.290
|
||
market_vol_index:strike_dispersion 3.928
|
||
realized_vol_mid:strike_dispersion 2.257
|
||
market_vol_index:realized_vol_short 6.043
|
||
market_vol_index:put_volume -3.383
|
||
market_vol_index:call_oi -4.788
|
||
market_vol_index:put_oi 10.194
|
||
market_vol_index:maturity_count -6.236
|
||
market_vol_index:total_contracts -4.485
|
||
market_vol_index:pulse_ratio 72.198
|
||
market_vol_index:put_call_ratio_volume -8.435
|
||
market_vol_index:put_call_ratio_oi -1.356
|
||
market_vol_index:liquidity_ratio 16.395
|
||
market_vol_index:option_dispersion -5.224
|
||
market_vol_index:put_low_strike 25.923
|
||
market_vol_index:put_proportion 4.996
|
||
market_vol_index:stress_spread 0.835
|
||
realized_vol_mid:realized_vol_short 71.267
|
||
realized_vol_mid:put_volume 15.872
|
||
realized_vol_mid:call_oi 47.903
|
||
realized_vol_mid:put_oi -36.109
|
||
realized_vol_mid:maturity_count -53.947
|
||
realized_vol_mid:total_contracts 19.540
|
||
realized_vol_mid:pulse_ratio 20.322
|
||
realized_vol_mid:put_call_ratio_volume 24.788
|
||
realized_vol_mid:put_call_ratio_oi -3.921
|
||
realized_vol_mid:liquidity_ratio -27.101
|
||
realized_vol_mid:option_dispersion 9.417
|
||
realized_vol_mid:put_low_strike -60.901
|
||
realized_vol_mid:put_proportion -16.659
|
||
realized_vol_mid:stress_spread -88.694
|
||
strike_dispersion:realized_vol_short 20.286
|
||
strike_dispersion:put_volume 5.435
|
||
strike_dispersion:call_oi -0.907
|
||
strike_dispersion:put_oi 15.003
|
||
strike_dispersion:maturity_count 65.634
|
||
strike_dispersion:total_contracts -63.652
|
||
strike_dispersion:pulse_ratio -12.064
|
||
strike_dispersion:put_call_ratio_volume -1.974
|
||
strike_dispersion:put_call_ratio_oi 4.086
|
||
strike_dispersion:liquidity_ratio 14.626
|
||
strike_dispersion:option_dispersion -31.337
|
||
strike_dispersion:put_low_strike 27.437
|
||
strike_dispersion:put_proportion -6.823
|
||
strike_dispersion:stress_spread -16.645
|
||
market_vol_index:realized_vol_mid:strike_dispersion 0.032
|
||
market_vol_index:realized_vol_mid:realized_vol_short 15.519
|
||
market_vol_index:realized_vol_mid:put_volume -6.183
|
||
market_vol_index:realized_vol_mid:call_oi -9.514
|
||
market_vol_index:realized_vol_mid:put_oi 21.035
|
||
market_vol_index:realized_vol_mid:maturity_count 12.689
|
||
market_vol_index:realized_vol_mid:total_contracts -1.446
|
||
market_vol_index:realized_vol_mid:pulse_ratio 8.048
|
||
market_vol_index:realized_vol_mid:put_call_ratio_volume -1.252
|
||
market_vol_index:realized_vol_mid:put_call_ratio_oi 3.646
|
||
market_vol_index:realized_vol_mid:liquidity_ratio -4.427
|
||
market_vol_index:realized_vol_mid:option_dispersion -0.279
|
||
market_vol_index:realized_vol_mid:put_low_strike 20.442
|
||
market_vol_index:realized_vol_mid:put_proportion 6.860
|
||
market_vol_index:realized_vol_mid:stress_spread 20.001
|
||
market_vol_index:strike_dispersion:realized_vol_short -5.607
|
||
market_vol_index:strike_dispersion:put_volume -2.755
|
||
market_vol_index:strike_dispersion:call_oi -5.991
|
||
market_vol_index:strike_dispersion:put_oi -3.640
|
||
market_vol_index:strike_dispersion:maturity_count -15.860
|
||
market_vol_index:strike_dispersion:total_contracts 4.915
|
||
market_vol_index:strike_dispersion:pulse_ratio 2.216
|
||
market_vol_index:strike_dispersion:put_call_ratio_volume 1.337
|
||
market_vol_index:strike_dispersion:put_call_ratio_oi -3.015
|
||
market_vol_index:strike_dispersion:liquidity_ratio -9.798
|
||
market_vol_index:strike_dispersion:option_dispersion -5.094
|
||
market_vol_index:strike_dispersion:put_low_strike -12.373
|
||
market_vol_index:strike_dispersion:put_proportion -3.868
|
||
market_vol_index:strike_dispersion:stress_spread -0.697
|
||
realized_vol_mid:strike_dispersion:realized_vol_short 19.754
|
||
realized_vol_mid:strike_dispersion:put_volume -8.715
|
||
realized_vol_mid:strike_dispersion:call_oi 20.207
|
||
realized_vol_mid:strike_dispersion:put_oi 12.607
|
||
realized_vol_mid:strike_dispersion:maturity_count 17.096
|
||
realized_vol_mid:strike_dispersion:total_contracts -12.794
|
||
realized_vol_mid:strike_dispersion:pulse_ratio 30.881
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_volume 4.333
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_oi -5.588
|
||
realized_vol_mid:strike_dispersion:liquidity_ratio 2.290
|
||
realized_vol_mid:strike_dispersion:option_dispersion 16.888
|
||
realized_vol_mid:strike_dispersion:put_low_strike 5.565
|
||
realized_vol_mid:strike_dispersion:put_proportion 15.393
|
||
realized_vol_mid:strike_dispersion:stress_spread -8.073
|
||
market_vol_index:realized_vol_mid:strike_dispersion:realized_vol_short 27.626
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_volume 3.462
|
||
market_vol_index:realized_vol_mid:strike_dispersion:call_oi 0.536
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_oi -1.359
|
||
market_vol_index:realized_vol_mid:strike_dispersion:maturity_count 15.852
|
||
market_vol_index:realized_vol_mid:strike_dispersion:total_contracts -31.948
|
||
market_vol_index:realized_vol_mid:strike_dispersion:pulse_ratio -35.603
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_volume -1.459
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_oi 1.895
|
||
market_vol_index:realized_vol_mid:strike_dispersion:liquidity_ratio -4.713
|
||
market_vol_index:realized_vol_mid:strike_dispersion:option_dispersion -31.782
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_low_strike 3.686
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_proportion 2.327
|
||
market_vol_index:realized_vol_mid:strike_dispersion:stress_spread 4.591
|
||
Pr(>|t|)
|
||
(Intercept) < 2e-16
|
||
market_vol_index < 2e-16
|
||
realized_vol_mid 1.99e-09
|
||
strike_dispersion < 2e-16
|
||
realized_vol_short < 2e-16
|
||
put_volume < 2e-16
|
||
call_oi 0.012456
|
||
put_oi < 2e-16
|
||
maturity_count 2.01e-11
|
||
total_contracts < 2e-16
|
||
pulse_ratio < 2e-16
|
||
put_call_ratio_volume < 2e-16
|
||
put_call_ratio_oi < 2e-16
|
||
liquidity_ratio < 2e-16
|
||
option_dispersion < 2e-16
|
||
put_low_strike 0.001637
|
||
put_proportion < 2e-16
|
||
stress_spread < 2e-16
|
||
market_vol_index:realized_vol_mid < 2e-16
|
||
market_vol_index:strike_dispersion 8.57e-05
|
||
realized_vol_mid:strike_dispersion 0.024018
|
||
market_vol_index:realized_vol_short 1.51e-09
|
||
market_vol_index:put_volume 0.000718
|
||
market_vol_index:call_oi 1.69e-06
|
||
market_vol_index:put_oi < 2e-16
|
||
market_vol_index:maturity_count 4.50e-10
|
||
market_vol_index:total_contracts 7.28e-06
|
||
market_vol_index:pulse_ratio < 2e-16
|
||
market_vol_index:put_call_ratio_volume < 2e-16
|
||
market_vol_index:put_call_ratio_oi 0.175051
|
||
market_vol_index:liquidity_ratio < 2e-16
|
||
market_vol_index:option_dispersion 1.75e-07
|
||
market_vol_index:put_low_strike < 2e-16
|
||
market_vol_index:put_proportion 5.86e-07
|
||
market_vol_index:stress_spread 0.403479
|
||
realized_vol_mid:realized_vol_short < 2e-16
|
||
realized_vol_mid:put_volume < 2e-16
|
||
realized_vol_mid:call_oi < 2e-16
|
||
realized_vol_mid:put_oi < 2e-16
|
||
realized_vol_mid:maturity_count < 2e-16
|
||
realized_vol_mid:total_contracts < 2e-16
|
||
realized_vol_mid:pulse_ratio < 2e-16
|
||
realized_vol_mid:put_call_ratio_volume < 2e-16
|
||
realized_vol_mid:put_call_ratio_oi 8.81e-05
|
||
realized_vol_mid:liquidity_ratio < 2e-16
|
||
realized_vol_mid:option_dispersion < 2e-16
|
||
realized_vol_mid:put_low_strike < 2e-16
|
||
realized_vol_mid:put_proportion < 2e-16
|
||
realized_vol_mid:stress_spread < 2e-16
|
||
strike_dispersion:realized_vol_short < 2e-16
|
||
strike_dispersion:put_volume 5.49e-08
|
||
strike_dispersion:call_oi 0.364603
|
||
strike_dispersion:put_oi < 2e-16
|
||
strike_dispersion:maturity_count < 2e-16
|
||
strike_dispersion:total_contracts < 2e-16
|
||
strike_dispersion:pulse_ratio < 2e-16
|
||
strike_dispersion:put_call_ratio_volume 0.048380
|
||
strike_dispersion:put_call_ratio_oi 4.40e-05
|
||
strike_dispersion:liquidity_ratio < 2e-16
|
||
strike_dispersion:option_dispersion < 2e-16
|
||
strike_dispersion:put_low_strike < 2e-16
|
||
strike_dispersion:put_proportion 8.91e-12
|
||
strike_dispersion:stress_spread < 2e-16
|
||
market_vol_index:realized_vol_mid:strike_dispersion 0.974748
|
||
market_vol_index:realized_vol_mid:realized_vol_short < 2e-16
|
||
market_vol_index:realized_vol_mid:put_volume 6.31e-10
|
||
market_vol_index:realized_vol_mid:call_oi < 2e-16
|
||
market_vol_index:realized_vol_mid:put_oi < 2e-16
|
||
market_vol_index:realized_vol_mid:maturity_count < 2e-16
|
||
market_vol_index:realized_vol_mid:total_contracts 0.148194
|
||
market_vol_index:realized_vol_mid:pulse_ratio 8.40e-16
|
||
market_vol_index:realized_vol_mid:put_call_ratio_volume 0.210426
|
||
market_vol_index:realized_vol_mid:put_call_ratio_oi 0.000266
|
||
market_vol_index:realized_vol_mid:liquidity_ratio 9.56e-06
|
||
market_vol_index:realized_vol_mid:option_dispersion 0.780128
|
||
market_vol_index:realized_vol_mid:put_low_strike < 2e-16
|
||
market_vol_index:realized_vol_mid:put_proportion 6.88e-12
|
||
market_vol_index:realized_vol_mid:stress_spread < 2e-16
|
||
market_vol_index:strike_dispersion:realized_vol_short 2.06e-08
|
||
market_vol_index:strike_dispersion:put_volume 0.005861
|
||
market_vol_index:strike_dispersion:call_oi 2.08e-09
|
||
market_vol_index:strike_dispersion:put_oi 0.000273
|
||
market_vol_index:strike_dispersion:maturity_count < 2e-16
|
||
market_vol_index:strike_dispersion:total_contracts 8.89e-07
|
||
market_vol_index:strike_dispersion:pulse_ratio 0.026680
|
||
market_vol_index:strike_dispersion:put_call_ratio_volume 0.181097
|
||
market_vol_index:strike_dispersion:put_call_ratio_oi 0.002566
|
||
market_vol_index:strike_dispersion:liquidity_ratio < 2e-16
|
||
market_vol_index:strike_dispersion:option_dispersion 3.51e-07
|
||
market_vol_index:strike_dispersion:put_low_strike < 2e-16
|
||
market_vol_index:strike_dispersion:put_proportion 0.000110
|
||
market_vol_index:strike_dispersion:stress_spread 0.486101
|
||
realized_vol_mid:strike_dispersion:realized_vol_short < 2e-16
|
||
realized_vol_mid:strike_dispersion:put_volume < 2e-16
|
||
realized_vol_mid:strike_dispersion:call_oi < 2e-16
|
||
realized_vol_mid:strike_dispersion:put_oi < 2e-16
|
||
realized_vol_mid:strike_dispersion:maturity_count < 2e-16
|
||
realized_vol_mid:strike_dispersion:total_contracts < 2e-16
|
||
realized_vol_mid:strike_dispersion:pulse_ratio < 2e-16
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_volume 1.47e-05
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_oi 2.30e-08
|
||
realized_vol_mid:strike_dispersion:liquidity_ratio 0.022033
|
||
realized_vol_mid:strike_dispersion:option_dispersion < 2e-16
|
||
realized_vol_mid:strike_dispersion:put_low_strike 2.63e-08
|
||
realized_vol_mid:strike_dispersion:put_proportion < 2e-16
|
||
realized_vol_mid:strike_dispersion:stress_spread 6.86e-16
|
||
market_vol_index:realized_vol_mid:strike_dispersion:realized_vol_short < 2e-16
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_volume 0.000536
|
||
market_vol_index:realized_vol_mid:strike_dispersion:call_oi 0.591674
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_oi 0.174004
|
||
market_vol_index:realized_vol_mid:strike_dispersion:maturity_count < 2e-16
|
||
market_vol_index:realized_vol_mid:strike_dispersion:total_contracts < 2e-16
|
||
market_vol_index:realized_vol_mid:strike_dispersion:pulse_ratio < 2e-16
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_volume 0.144564
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_oi 0.058120
|
||
market_vol_index:realized_vol_mid:strike_dispersion:liquidity_ratio 2.44e-06
|
||
market_vol_index:realized_vol_mid:strike_dispersion:option_dispersion < 2e-16
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_low_strike 0.000228
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_proportion 0.019942
|
||
market_vol_index:realized_vol_mid:strike_dispersion:stress_spread 4.41e-06
|
||
|
||
(Intercept) ***
|
||
market_vol_index ***
|
||
realized_vol_mid ***
|
||
strike_dispersion ***
|
||
realized_vol_short ***
|
||
put_volume ***
|
||
call_oi *
|
||
put_oi ***
|
||
maturity_count ***
|
||
total_contracts ***
|
||
pulse_ratio ***
|
||
put_call_ratio_volume ***
|
||
put_call_ratio_oi ***
|
||
liquidity_ratio ***
|
||
option_dispersion ***
|
||
put_low_strike **
|
||
put_proportion ***
|
||
stress_spread ***
|
||
market_vol_index:realized_vol_mid ***
|
||
market_vol_index:strike_dispersion ***
|
||
realized_vol_mid:strike_dispersion *
|
||
market_vol_index:realized_vol_short ***
|
||
market_vol_index:put_volume ***
|
||
market_vol_index:call_oi ***
|
||
market_vol_index:put_oi ***
|
||
market_vol_index:maturity_count ***
|
||
market_vol_index:total_contracts ***
|
||
market_vol_index:pulse_ratio ***
|
||
market_vol_index:put_call_ratio_volume ***
|
||
market_vol_index:put_call_ratio_oi
|
||
market_vol_index:liquidity_ratio ***
|
||
market_vol_index:option_dispersion ***
|
||
market_vol_index:put_low_strike ***
|
||
market_vol_index:put_proportion ***
|
||
market_vol_index:stress_spread
|
||
realized_vol_mid:realized_vol_short ***
|
||
realized_vol_mid:put_volume ***
|
||
realized_vol_mid:call_oi ***
|
||
realized_vol_mid:put_oi ***
|
||
realized_vol_mid:maturity_count ***
|
||
realized_vol_mid:total_contracts ***
|
||
realized_vol_mid:pulse_ratio ***
|
||
realized_vol_mid:put_call_ratio_volume ***
|
||
realized_vol_mid:put_call_ratio_oi ***
|
||
realized_vol_mid:liquidity_ratio ***
|
||
realized_vol_mid:option_dispersion ***
|
||
realized_vol_mid:put_low_strike ***
|
||
realized_vol_mid:put_proportion ***
|
||
realized_vol_mid:stress_spread ***
|
||
strike_dispersion:realized_vol_short ***
|
||
strike_dispersion:put_volume ***
|
||
strike_dispersion:call_oi
|
||
strike_dispersion:put_oi ***
|
||
strike_dispersion:maturity_count ***
|
||
strike_dispersion:total_contracts ***
|
||
strike_dispersion:pulse_ratio ***
|
||
strike_dispersion:put_call_ratio_volume *
|
||
strike_dispersion:put_call_ratio_oi ***
|
||
strike_dispersion:liquidity_ratio ***
|
||
strike_dispersion:option_dispersion ***
|
||
strike_dispersion:put_low_strike ***
|
||
strike_dispersion:put_proportion ***
|
||
strike_dispersion:stress_spread ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion
|
||
market_vol_index:realized_vol_mid:realized_vol_short ***
|
||
market_vol_index:realized_vol_mid:put_volume ***
|
||
market_vol_index:realized_vol_mid:call_oi ***
|
||
market_vol_index:realized_vol_mid:put_oi ***
|
||
market_vol_index:realized_vol_mid:maturity_count ***
|
||
market_vol_index:realized_vol_mid:total_contracts
|
||
market_vol_index:realized_vol_mid:pulse_ratio ***
|
||
market_vol_index:realized_vol_mid:put_call_ratio_volume
|
||
market_vol_index:realized_vol_mid:put_call_ratio_oi ***
|
||
market_vol_index:realized_vol_mid:liquidity_ratio ***
|
||
market_vol_index:realized_vol_mid:option_dispersion
|
||
market_vol_index:realized_vol_mid:put_low_strike ***
|
||
market_vol_index:realized_vol_mid:put_proportion ***
|
||
market_vol_index:realized_vol_mid:stress_spread ***
|
||
market_vol_index:strike_dispersion:realized_vol_short ***
|
||
market_vol_index:strike_dispersion:put_volume **
|
||
market_vol_index:strike_dispersion:call_oi ***
|
||
market_vol_index:strike_dispersion:put_oi ***
|
||
market_vol_index:strike_dispersion:maturity_count ***
|
||
market_vol_index:strike_dispersion:total_contracts ***
|
||
market_vol_index:strike_dispersion:pulse_ratio *
|
||
market_vol_index:strike_dispersion:put_call_ratio_volume
|
||
market_vol_index:strike_dispersion:put_call_ratio_oi **
|
||
market_vol_index:strike_dispersion:liquidity_ratio ***
|
||
market_vol_index:strike_dispersion:option_dispersion ***
|
||
market_vol_index:strike_dispersion:put_low_strike ***
|
||
market_vol_index:strike_dispersion:put_proportion ***
|
||
market_vol_index:strike_dispersion:stress_spread
|
||
realized_vol_mid:strike_dispersion:realized_vol_short ***
|
||
realized_vol_mid:strike_dispersion:put_volume ***
|
||
realized_vol_mid:strike_dispersion:call_oi ***
|
||
realized_vol_mid:strike_dispersion:put_oi ***
|
||
realized_vol_mid:strike_dispersion:maturity_count ***
|
||
realized_vol_mid:strike_dispersion:total_contracts ***
|
||
realized_vol_mid:strike_dispersion:pulse_ratio ***
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_volume ***
|
||
realized_vol_mid:strike_dispersion:put_call_ratio_oi ***
|
||
realized_vol_mid:strike_dispersion:liquidity_ratio *
|
||
realized_vol_mid:strike_dispersion:option_dispersion ***
|
||
realized_vol_mid:strike_dispersion:put_low_strike ***
|
||
realized_vol_mid:strike_dispersion:put_proportion ***
|
||
realized_vol_mid:strike_dispersion:stress_spread ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:realized_vol_short ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_volume ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:call_oi
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_oi
|
||
market_vol_index:realized_vol_mid:strike_dispersion:maturity_count ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:total_contracts ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:pulse_ratio ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_volume
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_call_ratio_oi .
|
||
market_vol_index:realized_vol_mid:strike_dispersion:liquidity_ratio ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:option_dispersion ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_low_strike ***
|
||
market_vol_index:realized_vol_mid:strike_dispersion:put_proportion *
|
||
market_vol_index:realized_vol_mid:strike_dispersion:stress_spread ***
|
||
---
|
||
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
|
||
|
||
Residual standard error: 0.2595 on 1533114 degrees of freedom
|
||
Multiple R-squared: 0.7837, Adjusted R-squared: 0.7837
|
||
F-statistic: 4.667e+04 on 119 and 1533114 DF, p-value: < 2.2e-16</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb60"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb60-1"><a href="#cb60-1" aria-hidden="true" tabindex="-1"></a>Y_hat4 <span class="ot"><-</span> <span class="fu">predict</span>(mod4, <span class="at">newdata =</span> val_linear_lm, <span class="at">type =</span> <span class="st">"response"</span>)</span>
|
||
<span id="cb60-2"><a href="#cb60-2" aria-hidden="true" tabindex="-1"></a>Y_val <span class="ot"><-</span> val_linear_lm<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb60-3"><a href="#cb60-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb60-4"><a href="#cb60-4" aria-hidden="true" tabindex="-1"></a>MSS_4 <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(Y_val) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat4))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb60-5"><a href="#cb60-5" aria-hidden="true" tabindex="-1"></a>MedSS_4 <span class="ot"><-</span> <span class="fu">median</span>((<span class="fu">exp</span>(Y_val) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat4))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb60-6"><a href="#cb60-6" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb60-7"><a href="#cb60-7" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_4)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "The RMSE is: 11.2618186831934"</code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb62"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The root of the median squared error is: "</span>, <span class="fu">sqrt</span>(MedSS_4)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "The root of the median squared error is: 3.68978501195564"</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>However, a model where <span class="math inline">\(option\_dispersion\)</span> was replaced by <span class="math inline">\(market\_vol\_index\)</span>. Since <span class="math inline">\(market\_vol\_index\)</span> is a global indicator of risk assessment that day, we hoped to ally volatility information with global information. Indeed, this model proved to be the best among our classic linear models, with an RMSE of 11.26. In order to get a better grasp of this model’s performance, we looked at the root of the median squared error. With a value of 3.69, it was significantly lower than the RMSE. This suggests that several grave errors drag the metric value up. This could come from an inability on the models side to adapt to extreme changes quickly, for example in times of crises or panic movements.</p>
|
||
</section>
|
||
</section>
|
||
<section id="linear-regressions-on-the-pca-dataset" class="level3">
|
||
<h3 class="anchored" data-anchor-id="linear-regressions-on-the-pca-dataset">Linear regressions on the PCA Dataset</h3>
|
||
<section id="train-test-data-import" class="level4">
|
||
<h4 class="anchored" data-anchor-id="train-test-data-import">Train-test data import</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb64"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a>train_pca_lm <span class="ot"><-</span> train_pca <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>asset_id, <span class="sc">-</span>obs_date)</span>
|
||
<span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a>train_pca_lm<span class="sc">$</span>implied_vol_ref <span class="ot"><-</span> <span class="fu">log</span>(train_pca<span class="sc">$</span>implied_vol_ref)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb65"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a>val_pca_lm <span class="ot"><-</span> val_pca <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>asset_id, <span class="sc">-</span>obs_date)</span>
|
||
<span id="cb65-2"><a href="#cb65-2" aria-hidden="true" tabindex="-1"></a>val_pca_lm<span class="sc">$</span>implied_vol_ref <span class="ot"><-</span> <span class="fu">log</span>(val_pca<span class="sc">$</span>implied_vol_ref)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
<section id="first-model" class="level4">
|
||
<h4 class="anchored" data-anchor-id="first-model">First model</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb66"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a>mod1_pca <span class="ot"><-</span> <span class="fu">lm</span>(implied_vol_ref <span class="sc">~</span> ., <span class="at">data =</span> train_pca_lm)</span>
|
||
<span id="cb66-2"><a href="#cb66-2" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod1_pca)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb67"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a>Y_hat1_pca <span class="ot"><-</span> <span class="fu">predict</span>(mod1_pca, <span class="at">newdata =</span> val_pca_lm, <span class="at">type =</span> <span class="st">"response"</span>)</span>
|
||
<span id="cb67-2"><a href="#cb67-2" aria-hidden="true" tabindex="-1"></a>Y_val_pca <span class="ot"><-</span> val_pca_lm<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb67-3"><a href="#cb67-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb67-4"><a href="#cb67-4" aria-hidden="true" tabindex="-1"></a>MSS_1_pca <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(Y_val_pca) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat1_pca))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb67-5"><a href="#cb67-5" aria-hidden="true" tabindex="-1"></a>MedSS_1_pca <span class="ot"><-</span> <span class="fu">median</span>((<span class="fu">exp</span>(Y_val_pca) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat1_pca))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb67-6"><a href="#cb67-6" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb67-7"><a href="#cb67-7" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_1_pca)))</span>
|
||
<span id="cb67-8"><a href="#cb67-8" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The root of the median squared error is: "</span>, <span class="fu">sqrt</span>(MedSS_1_pca)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>We now look at linear models, trained on the datasets created through the principal component’s analysis. The first model without interactions reveals that all components have a high impact on the prediction, with fisher test scores of under <span class="math inline">\(e^-16\)</span>. However, the RMSE of 12.79 is worse than the previous linear model’s. The PCA has indeed led to a loss of information. When looking at the median, it is also substantially worse than before, at 4.11. Not only is the prediction worse on average, but the errors get higher much quicker.</p>
|
||
</section>
|
||
<section id="first-selected-model" class="level4">
|
||
<h4 class="anchored" data-anchor-id="first-selected-model">First selected model</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb68"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>mod2_pca <span class="ot"><-</span> <span class="fu">lm</span>(</span>
|
||
<span id="cb68-2"><a href="#cb68-2" aria-hidden="true" tabindex="-1"></a> implied_vol_ref <span class="sc">~</span> PC1 <span class="sc">*</span></span>
|
||
<span id="cb68-3"><a href="#cb68-3" aria-hidden="true" tabindex="-1"></a> PC2 <span class="sc">*</span></span>
|
||
<span id="cb68-4"><a href="#cb68-4" aria-hidden="true" tabindex="-1"></a> PC3 <span class="sc">*</span></span>
|
||
<span id="cb68-5"><a href="#cb68-5" aria-hidden="true" tabindex="-1"></a> PC4 <span class="sc">*</span></span>
|
||
<span id="cb68-6"><a href="#cb68-6" aria-hidden="true" tabindex="-1"></a> (PC5 <span class="sc">+</span> PC6 <span class="sc">+</span> PC7 <span class="sc">+</span> PC8 <span class="sc">+</span> PC9),</span>
|
||
<span id="cb68-7"><a href="#cb68-7" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_pca_lm</span>
|
||
<span id="cb68-8"><a href="#cb68-8" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb68-9"><a href="#cb68-9" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod2_pca)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb69"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a>Y_hat2_pca <span class="ot"><-</span> <span class="fu">predict</span>(mod2_pca, <span class="at">newdata =</span> val_pca_lm, <span class="at">type =</span> <span class="st">"response"</span>)</span>
|
||
<span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a>Y_val_pca <span class="ot"><-</span> val_pca_lm<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb69-3"><a href="#cb69-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb69-4"><a href="#cb69-4" aria-hidden="true" tabindex="-1"></a>MSS_2_pca <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(Y_val_pca) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat2_pca))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb69-5"><a href="#cb69-5" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb69-6"><a href="#cb69-6" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_2_pca)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
<section id="second-selected-model" class="level4">
|
||
<h4 class="anchored" data-anchor-id="second-selected-model">Second selected model</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb70"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a>mod3_pca <span class="ot"><-</span> <span class="fu">lm</span>(</span>
|
||
<span id="cb70-2"><a href="#cb70-2" aria-hidden="true" tabindex="-1"></a> implied_vol_ref <span class="sc">~</span> PC1 <span class="sc">*</span></span>
|
||
<span id="cb70-3"><a href="#cb70-3" aria-hidden="true" tabindex="-1"></a> PC2 <span class="sc">*</span></span>
|
||
<span id="cb70-4"><a href="#cb70-4" aria-hidden="true" tabindex="-1"></a> PC3 <span class="sc">*</span></span>
|
||
<span id="cb70-5"><a href="#cb70-5" aria-hidden="true" tabindex="-1"></a> PC4 <span class="sc">*</span></span>
|
||
<span id="cb70-6"><a href="#cb70-6" aria-hidden="true" tabindex="-1"></a> PC5 <span class="sc">*</span></span>
|
||
<span id="cb70-7"><a href="#cb70-7" aria-hidden="true" tabindex="-1"></a> (PC6 <span class="sc">+</span> PC7 <span class="sc">+</span> PC8 <span class="sc">+</span> PC9),</span>
|
||
<span id="cb70-8"><a href="#cb70-8" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_pca_lm</span>
|
||
<span id="cb70-9"><a href="#cb70-9" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb70-10"><a href="#cb70-10" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod3_pca)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>
|
||
Call:
|
||
lm(formula = implied_vol_ref ~ PC1 * PC2 * PC3 * PC4 * PC5 *
|
||
(PC6 + PC7 + PC8 + PC9), data = train_pca_lm)
|
||
|
||
Residuals:
|
||
Min 1Q Median 3Q Max
|
||
-4.4189 -0.1271 -0.0003 0.1312 2.4098
|
||
|
||
Coefficients:
|
||
Estimate Std. Error t value Pr(>|t|)
|
||
(Intercept) 3.712e+00 2.969e-04 12501.039 < 2e-16 ***
|
||
PC1 4.176e-02 1.375e-04 303.726 < 2e-16 ***
|
||
PC2 2.137e-01 1.463e-04 1460.678 < 2e-16 ***
|
||
PC3 -4.172e-02 4.759e-04 -87.663 < 2e-16 ***
|
||
PC4 -1.515e-02 3.103e-04 -48.826 < 2e-16 ***
|
||
PC5 -1.271e-01 3.538e-04 -359.242 < 2e-16 ***
|
||
PC6 5.418e-02 4.907e-04 110.395 < 2e-16 ***
|
||
PC7 -6.105e-02 4.326e-04 -141.111 < 2e-16 ***
|
||
PC8 5.400e-03 4.328e-04 12.477 < 2e-16 ***
|
||
PC9 -5.401e-02 5.126e-04 -105.354 < 2e-16 ***
|
||
PC1:PC2 1.088e-03 6.671e-05 16.305 < 2e-16 ***
|
||
PC1:PC3 -7.032e-03 1.645e-04 -42.746 < 2e-16 ***
|
||
PC2:PC3 -3.980e-03 1.976e-04 -20.139 < 2e-16 ***
|
||
PC1:PC4 -5.043e-03 1.269e-04 -39.758 < 2e-16 ***
|
||
PC2:PC4 2.175e-02 1.396e-04 155.808 < 2e-16 ***
|
||
PC3:PC4 7.605e-03 2.353e-04 32.322 < 2e-16 ***
|
||
PC1:PC5 -1.336e-02 1.589e-04 -84.127 < 2e-16 ***
|
||
PC2:PC5 1.243e-03 1.501e-04 8.281 < 2e-16 ***
|
||
PC3:PC5 2.338e-02 3.689e-04 63.386 < 2e-16 ***
|
||
PC4:PC5 -3.858e-03 2.832e-04 -13.619 < 2e-16 ***
|
||
PC1:PC6 -5.465e-03 1.799e-04 -30.372 < 2e-16 ***
|
||
PC1:PC7 -1.078e-03 1.589e-04 -6.788 1.13e-11 ***
|
||
PC1:PC8 -9.518e-03 1.608e-04 -59.188 < 2e-16 ***
|
||
PC1:PC9 6.692e-03 1.783e-04 37.530 < 2e-16 ***
|
||
PC2:PC6 7.085e-03 2.206e-04 32.115 < 2e-16 ***
|
||
PC2:PC7 -1.125e-03 1.935e-04 -5.814 6.12e-09 ***
|
||
PC2:PC8 1.111e-02 1.929e-04 57.581 < 2e-16 ***
|
||
PC2:PC9 7.667e-03 2.334e-04 32.855 < 2e-16 ***
|
||
PC3:PC6 8.246e-03 2.321e-04 35.519 < 2e-16 ***
|
||
PC3:PC7 -5.427e-03 2.797e-04 -19.402 < 2e-16 ***
|
||
PC3:PC8 3.661e-03 3.256e-04 11.242 < 2e-16 ***
|
||
PC3:PC9 1.372e-03 3.419e-04 4.012 6.03e-05 ***
|
||
PC4:PC6 -4.867e-03 3.402e-04 -14.308 < 2e-16 ***
|
||
PC4:PC7 4.357e-02 2.769e-04 157.358 < 2e-16 ***
|
||
PC4:PC8 -2.920e-03 3.407e-04 -8.572 < 2e-16 ***
|
||
PC4:PC9 -2.537e-03 3.510e-04 -7.227 4.94e-13 ***
|
||
PC5:PC6 2.964e-02 4.739e-04 62.536 < 2e-16 ***
|
||
PC5:PC7 -3.977e-02 3.781e-04 -105.160 < 2e-16 ***
|
||
PC5:PC8 3.083e-02 3.971e-04 77.654 < 2e-16 ***
|
||
PC5:PC9 1.636e-02 4.897e-04 33.414 < 2e-16 ***
|
||
PC1:PC2:PC3 -6.898e-04 6.753e-05 -10.214 < 2e-16 ***
|
||
PC1:PC2:PC4 1.060e-03 5.531e-05 19.159 < 2e-16 ***
|
||
PC1:PC3:PC4 2.410e-03 8.242e-05 29.237 < 2e-16 ***
|
||
PC2:PC3:PC4 -1.041e-02 1.019e-04 -102.204 < 2e-16 ***
|
||
PC1:PC2:PC5 3.672e-03 6.139e-05 59.817 < 2e-16 ***
|
||
PC1:PC3:PC5 4.743e-03 1.270e-04 37.348 < 2e-16 ***
|
||
PC2:PC3:PC5 -4.989e-03 1.269e-04 -39.310 < 2e-16 ***
|
||
PC1:PC4:PC5 3.417e-04 1.155e-04 2.959 0.003089 **
|
||
PC2:PC4:PC5 -4.490e-03 1.114e-04 -40.324 < 2e-16 ***
|
||
PC3:PC4:PC5 -1.494e-03 1.904e-04 -7.846 4.29e-15 ***
|
||
PC1:PC2:PC6 8.052e-04 7.800e-05 10.323 < 2e-16 ***
|
||
PC1:PC2:PC7 -2.933e-03 6.832e-05 -42.934 < 2e-16 ***
|
||
PC1:PC2:PC8 7.180e-04 7.150e-05 10.043 < 2e-16 ***
|
||
PC1:PC2:PC9 1.111e-04 7.037e-05 1.579 0.114372
|
||
PC1:PC3:PC6 1.571e-03 7.998e-05 19.642 < 2e-16 ***
|
||
PC1:PC3:PC7 -4.418e-04 8.632e-05 -5.117 3.10e-07 ***
|
||
PC1:PC3:PC8 8.451e-04 9.232e-05 9.154 < 2e-16 ***
|
||
PC1:PC3:PC9 -1.635e-03 9.626e-05 -16.983 < 2e-16 ***
|
||
PC2:PC3:PC6 1.460e-03 9.834e-05 14.846 < 2e-16 ***
|
||
PC2:PC3:PC7 -9.543e-04 1.032e-04 -9.244 < 2e-16 ***
|
||
PC2:PC3:PC8 -7.973e-04 1.342e-04 -5.943 2.80e-09 ***
|
||
PC2:PC3:PC9 9.474e-04 1.308e-04 7.243 4.39e-13 ***
|
||
PC1:PC4:PC6 -1.191e-03 1.197e-04 -9.948 < 2e-16 ***
|
||
PC1:PC4:PC7 4.337e-03 1.047e-04 41.405 < 2e-16 ***
|
||
PC1:PC4:PC8 -1.029e-03 1.160e-04 -8.866 < 2e-16 ***
|
||
PC1:PC4:PC9 4.835e-04 1.282e-04 3.771 0.000163 ***
|
||
PC2:PC4:PC6 -3.747e-03 1.553e-04 -24.127 < 2e-16 ***
|
||
PC2:PC4:PC7 -7.401e-03 1.115e-04 -66.373 < 2e-16 ***
|
||
PC2:PC4:PC8 -1.281e-03 1.472e-04 -8.697 < 2e-16 ***
|
||
PC2:PC4:PC9 -6.620e-03 1.571e-04 -42.148 < 2e-16 ***
|
||
PC3:PC4:PC6 -3.426e-04 1.294e-04 -2.646 0.008135 **
|
||
PC3:PC4:PC7 -6.179e-03 1.505e-04 -41.061 < 2e-16 ***
|
||
PC3:PC4:PC8 2.580e-03 1.920e-04 13.437 < 2e-16 ***
|
||
PC3:PC4:PC9 1.076e-03 1.796e-04 5.991 2.09e-09 ***
|
||
PC1:PC5:PC6 4.750e-03 1.646e-04 28.862 < 2e-16 ***
|
||
PC1:PC5:PC7 -2.139e-03 1.372e-04 -15.587 < 2e-16 ***
|
||
PC1:PC5:PC8 2.334e-03 1.438e-04 16.235 < 2e-16 ***
|
||
PC1:PC5:PC9 1.865e-03 1.638e-04 11.384 < 2e-16 ***
|
||
PC2:PC5:PC6 -2.924e-03 1.852e-04 -15.794 < 2e-16 ***
|
||
PC2:PC5:PC7 -5.402e-04 1.375e-04 -3.928 8.58e-05 ***
|
||
PC2:PC5:PC8 -1.093e-02 1.404e-04 -77.859 < 2e-16 ***
|
||
PC2:PC5:PC9 3.502e-03 1.767e-04 19.826 < 2e-16 ***
|
||
PC3:PC5:PC6 -5.136e-03 1.833e-04 -28.027 < 2e-16 ***
|
||
PC3:PC5:PC7 7.055e-03 1.748e-04 40.358 < 2e-16 ***
|
||
PC3:PC5:PC8 -8.107e-03 2.481e-04 -32.674 < 2e-16 ***
|
||
PC3:PC5:PC9 -1.075e-03 2.590e-04 -4.152 3.30e-05 ***
|
||
PC4:PC5:PC6 1.197e-02 3.065e-04 39.074 < 2e-16 ***
|
||
PC4:PC5:PC7 -1.942e-02 2.467e-04 -78.712 < 2e-16 ***
|
||
PC4:PC5:PC8 3.484e-03 2.782e-04 12.523 < 2e-16 ***
|
||
PC4:PC5:PC9 8.209e-03 3.252e-04 25.241 < 2e-16 ***
|
||
PC1:PC2:PC3:PC4 -1.082e-03 3.274e-05 -33.045 < 2e-16 ***
|
||
PC1:PC2:PC3:PC5 -1.154e-03 4.599e-05 -25.100 < 2e-16 ***
|
||
PC1:PC2:PC4:PC5 9.923e-05 4.290e-05 2.313 0.020715 *
|
||
PC1:PC3:PC4:PC5 -1.463e-03 6.087e-05 -24.030 < 2e-16 ***
|
||
PC2:PC3:PC4:PC5 2.283e-03 7.615e-05 29.981 < 2e-16 ***
|
||
PC1:PC2:PC3:PC6 1.251e-04 3.270e-05 3.827 0.000130 ***
|
||
PC1:PC2:PC3:PC7 8.523e-04 3.328e-05 25.612 < 2e-16 ***
|
||
PC1:PC2:PC3:PC8 4.727e-06 3.867e-05 0.122 0.902701
|
||
PC1:PC2:PC3:PC9 4.017e-04 3.482e-05 11.538 < 2e-16 ***
|
||
PC1:PC2:PC4:PC6 4.511e-04 5.234e-05 8.619 < 2e-16 ***
|
||
PC1:PC2:PC4:PC7 -1.544e-03 4.077e-05 -37.872 < 2e-16 ***
|
||
PC1:PC2:PC4:PC8 3.957e-04 4.884e-05 8.102 5.43e-16 ***
|
||
PC1:PC2:PC4:PC9 4.233e-04 5.066e-05 8.356 < 2e-16 ***
|
||
PC1:PC3:PC4:PC6 -1.308e-04 3.911e-05 -3.344 0.000826 ***
|
||
PC1:PC3:PC4:PC7 -1.074e-03 4.065e-05 -26.432 < 2e-16 ***
|
||
PC1:PC3:PC4:PC8 2.722e-04 4.935e-05 5.516 3.47e-08 ***
|
||
PC1:PC3:PC4:PC9 6.807e-04 4.810e-05 14.152 < 2e-16 ***
|
||
PC2:PC3:PC4:PC6 9.268e-04 5.299e-05 17.490 < 2e-16 ***
|
||
PC2:PC3:PC4:PC7 2.758e-03 5.391e-05 51.157 < 2e-16 ***
|
||
PC2:PC3:PC4:PC8 1.022e-03 7.899e-05 12.934 < 2e-16 ***
|
||
PC2:PC3:PC4:PC9 1.003e-03 7.097e-05 14.131 < 2e-16 ***
|
||
PC1:PC2:PC5:PC6 -5.341e-04 6.390e-05 -8.359 < 2e-16 ***
|
||
PC1:PC2:PC5:PC7 1.193e-03 5.056e-05 23.586 < 2e-16 ***
|
||
PC1:PC2:PC5:PC8 -1.742e-03 5.441e-05 -32.013 < 2e-16 ***
|
||
PC1:PC2:PC5:PC9 -1.082e-03 5.675e-05 -19.070 < 2e-16 ***
|
||
PC1:PC3:PC5:PC6 -3.494e-04 5.899e-05 -5.923 3.17e-09 ***
|
||
PC1:PC3:PC5:PC7 1.350e-03 5.467e-05 24.695 < 2e-16 ***
|
||
PC1:PC3:PC5:PC8 -1.515e-03 7.197e-05 -21.053 < 2e-16 ***
|
||
PC1:PC3:PC5:PC9 -4.855e-04 7.025e-05 -6.911 4.81e-12 ***
|
||
PC2:PC3:PC5:PC6 4.911e-04 7.375e-05 6.660 2.75e-11 ***
|
||
PC2:PC3:PC5:PC7 -5.853e-04 6.143e-05 -9.528 < 2e-16 ***
|
||
PC2:PC3:PC5:PC8 2.373e-03 8.840e-05 26.844 < 2e-16 ***
|
||
PC2:PC3:PC5:PC9 -2.703e-03 8.236e-05 -32.812 < 2e-16 ***
|
||
PC1:PC4:PC5:PC6 3.735e-03 9.754e-05 38.290 < 2e-16 ***
|
||
PC1:PC4:PC5:PC7 -1.355e-04 8.398e-05 -1.613 0.106736
|
||
PC1:PC4:PC5:PC8 -7.141e-04 8.737e-05 -8.173 3.00e-16 ***
|
||
PC1:PC4:PC5:PC9 1.510e-03 1.033e-04 14.619 < 2e-16 ***
|
||
PC2:PC4:PC5:PC6 4.562e-03 1.244e-04 36.678 < 2e-16 ***
|
||
PC2:PC4:PC5:PC7 3.101e-03 9.395e-05 33.006 < 2e-16 ***
|
||
PC2:PC4:PC5:PC8 -3.605e-03 1.073e-04 -33.605 < 2e-16 ***
|
||
PC2:PC4:PC5:PC9 2.848e-03 1.149e-04 24.783 < 2e-16 ***
|
||
PC3:PC4:PC5:PC6 -2.638e-03 9.787e-05 -26.959 < 2e-16 ***
|
||
PC3:PC4:PC5:PC7 3.449e-03 8.882e-05 38.836 < 2e-16 ***
|
||
PC3:PC4:PC5:PC8 1.130e-03 1.320e-04 8.561 < 2e-16 ***
|
||
PC3:PC4:PC5:PC9 -3.072e-03 1.246e-04 -24.658 < 2e-16 ***
|
||
PC1:PC2:PC3:PC4:PC5 2.170e-04 2.429e-05 8.931 < 2e-16 ***
|
||
PC1:PC2:PC3:PC4:PC6 -1.236e-05 1.568e-05 -0.788 0.430827
|
||
PC1:PC2:PC3:PC4:PC7 3.321e-04 1.529e-05 21.713 < 2e-16 ***
|
||
PC1:PC2:PC3:PC4:PC8 7.917e-05 1.994e-05 3.971 7.16e-05 ***
|
||
PC1:PC2:PC3:PC4:PC9 -1.315e-04 1.981e-05 -6.640 3.14e-11 ***
|
||
PC1:PC2:PC3:PC5:PC6 1.486e-04 2.349e-05 6.324 2.56e-10 ***
|
||
PC1:PC2:PC3:PC5:PC7 -6.312e-04 2.046e-05 -30.847 < 2e-16 ***
|
||
PC1:PC2:PC3:PC5:PC8 4.307e-04 2.690e-05 16.014 < 2e-16 ***
|
||
PC1:PC2:PC3:PC5:PC9 1.682e-06 2.492e-05 0.067 0.946206
|
||
PC1:PC2:PC4:PC5:PC6 1.396e-03 3.798e-05 36.743 < 2e-16 ***
|
||
PC1:PC2:PC4:PC5:PC7 1.066e-03 3.079e-05 34.612 < 2e-16 ***
|
||
PC1:PC2:PC4:PC5:PC8 -8.999e-04 3.377e-05 -26.645 < 2e-16 ***
|
||
PC1:PC2:PC4:PC5:PC9 6.552e-05 3.665e-05 1.788 0.073781 .
|
||
PC1:PC3:PC4:PC5:PC6 -6.516e-04 2.846e-05 -22.897 < 2e-16 ***
|
||
PC1:PC3:PC4:PC5:PC7 2.073e-04 2.484e-05 8.346 < 2e-16 ***
|
||
PC1:PC3:PC4:PC5:PC8 6.207e-04 3.265e-05 19.011 < 2e-16 ***
|
||
PC1:PC3:PC4:PC5:PC9 -4.809e-04 3.283e-05 -14.650 < 2e-16 ***
|
||
PC2:PC3:PC4:PC5:PC6 4.775e-05 3.916e-05 1.220 0.222617
|
||
PC2:PC3:PC4:PC5:PC7 -1.424e-03 3.364e-05 -42.325 < 2e-16 ***
|
||
PC2:PC3:PC4:PC5:PC8 6.694e-05 5.311e-05 1.260 0.207536
|
||
PC2:PC3:PC4:PC5:PC9 -5.787e-04 4.377e-05 -13.222 < 2e-16 ***
|
||
PC1:PC2:PC3:PC4:PC5:PC6 -1.123e-04 1.144e-05 -9.816 < 2e-16 ***
|
||
PC1:PC2:PC3:PC4:PC5:PC7 -2.817e-04 9.656e-06 -29.174 < 2e-16 ***
|
||
PC1:PC2:PC3:PC4:PC5:PC8 1.209e-04 1.336e-05 9.050 < 2e-16 ***
|
||
PC1:PC2:PC3:PC4:PC5:PC9 -2.575e-05 1.361e-05 -1.892 0.058483 .
|
||
---
|
||
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
|
||
|
||
Residual standard error: 0.2593 on 1533074 degrees of freedom
|
||
Multiple R-squared: 0.7839, Adjusted R-squared: 0.7839
|
||
F-statistic: 3.498e+04 on 159 and 1533074 DF, p-value: < 2.2e-16</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb72"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a>Y_hat3_pca <span class="ot"><-</span> <span class="fu">predict</span>(mod3_pca, <span class="at">newdata =</span> val_pca_lm, <span class="at">type =</span> <span class="st">"response"</span>)</span>
|
||
<span id="cb72-2"><a href="#cb72-2" aria-hidden="true" tabindex="-1"></a>Y_val_pca <span class="ot"><-</span> val_pca_lm<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb72-3"><a href="#cb72-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb72-4"><a href="#cb72-4" aria-hidden="true" tabindex="-1"></a>MSS_3_pca <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(Y_val_pca) <span class="sc">-</span> <span class="fu">exp</span>(Y_hat3_pca))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb72-5"><a href="#cb72-5" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb72-6"><a href="#cb72-6" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_3_pca)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "The RMSE is: 11.7313443364877"</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>We now add interactions. First, we take the first four component’s interactions between each other and the other 7 components. This improves the RMSE to 11.88. These models run much quicker than the previous ones on the full dataset, and have a nearly identical RMSE. After adding the fifth component’s interaction, we get a lower RMSE, 11.73. However, adding further interactions leads to computational problems that cannot be resolved with a 16 GB RAM.</p>
|
||
</section>
|
||
</section>
|
||
<section id="biased-regression" class="level3">
|
||
<h3 class="anchored" data-anchor-id="biased-regression">Biased Regression</h3>
|
||
<p>Since we have gotten a better performance through the regular dataset, we will use said dataset for the penalised regression. The only exception being partial least squares, which needs the reduced dimension in order to execute.</p>
|
||
<section id="ridge" class="level4">
|
||
<h4 class="anchored" data-anchor-id="ridge">Ridge</h4>
|
||
<p>The first biased regression model used is RIDGE. The model adds a regularisation parameter <span class="math inline">\(\lambda > 0\)</span>.</p>
|
||
<p><span class="math display">\[
|
||
\hat{\beta}^{R}(\lambda) \;=\; \arg\min_{\beta}\; \|Y - X\beta\|_{n}^{2} \;+\; \lambda\,\|\beta\|^{2}
|
||
\]</span></p>
|
||
<p>This does not impact the intercept, and no variable selection is done as no estimates are exactly 0. We get the best lambda through cross-validation.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb74"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a>train_noY <span class="ot"><-</span> train_linear_lm <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref)</span>
|
||
<span id="cb74-2"><a href="#cb74-2" aria-hidden="true" tabindex="-1"></a>y_train_log <span class="ot"><-</span> train_linear_lm<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb74-3"><a href="#cb74-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb74-4"><a href="#cb74-4" aria-hidden="true" tabindex="-1"></a>val_noY <span class="ot"><-</span> val_linear_lm <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref)</span>
|
||
<span id="cb74-5"><a href="#cb74-5" aria-hidden="true" tabindex="-1"></a>y_val_log <span class="ot"><-</span> val_linear_lm<span class="sc">$</span>implied_vol_ref</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb75"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb75-1"><a href="#cb75-1" aria-hidden="true" tabindex="-1"></a>cv.ridge <span class="ot"><-</span> <span class="fu">cv.glmnet</span>(<span class="fu">as.matrix</span>(train_noY), y_train_log, <span class="at">alpha =</span> <span class="dv">0</span>)</span>
|
||
<span id="cb75-2"><a href="#cb75-2" aria-hidden="true" tabindex="-1"></a>s_ridge <span class="ot">=</span> cv.ridge<span class="sc">$</span>lambda.min</span>
|
||
<span id="cb75-3"><a href="#cb75-3" aria-hidden="true" tabindex="-1"></a>fit.ridge <span class="ot"><-</span> <span class="fu">glmnet</span>(</span>
|
||
<span id="cb75-4"><a href="#cb75-4" aria-hidden="true" tabindex="-1"></a> train_noY,</span>
|
||
<span id="cb75-5"><a href="#cb75-5" aria-hidden="true" tabindex="-1"></a> y_train_log,</span>
|
||
<span id="cb75-6"><a href="#cb75-6" aria-hidden="true" tabindex="-1"></a> <span class="at">lambda =</span> s_ridge,</span>
|
||
<span id="cb75-7"><a href="#cb75-7" aria-hidden="true" tabindex="-1"></a> <span class="at">alpha =</span> <span class="dv">0</span></span>
|
||
<span id="cb75-8"><a href="#cb75-8" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb75-9"><a href="#cb75-9" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(fit.ridge)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>20 x 1 sparse Matrix of class "dgCMatrix"
|
||
s0
|
||
(Intercept) 3.707945581
|
||
strike_dispersion 0.066642443
|
||
call_volume 0.008091265
|
||
put_volume 0.017378847
|
||
call_oi 0.008054584
|
||
put_oi -0.014366114
|
||
maturity_count 0.016893062
|
||
total_contracts 0.021135478
|
||
realized_vol_short 0.195001834
|
||
market_vol_index 0.046033081
|
||
realized_vol_mid 0.068107180
|
||
realized_vol_long 0.150329552
|
||
pulse_ratio -0.060030239
|
||
put_call_ratio_volume 0.010104841
|
||
put_call_ratio_oi 0.005461722
|
||
liquidity_ratio -0.013761483
|
||
option_dispersion 0.025614892
|
||
put_low_strike 0.030860894
|
||
put_proportion -0.011503510
|
||
stress_spread 0.039569879</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb77"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb77-1"><a href="#cb77-1" aria-hidden="true" tabindex="-1"></a>Y_hatridge <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb77-2"><a href="#cb77-2" aria-hidden="true" tabindex="-1"></a> fit.ridge,</span>
|
||
<span id="cb77-3"><a href="#cb77-3" aria-hidden="true" tabindex="-1"></a> <span class="at">newx =</span> <span class="fu">as.matrix</span>(val_noY),</span>
|
||
<span id="cb77-4"><a href="#cb77-4" aria-hidden="true" tabindex="-1"></a> <span class="at">type =</span> <span class="st">"response"</span>,</span>
|
||
<span id="cb77-5"><a href="#cb77-5" aria-hidden="true" tabindex="-1"></a> <span class="at">s =</span> s_ridge</span>
|
||
<span id="cb77-6"><a href="#cb77-6" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb77-7"><a href="#cb77-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb77-8"><a href="#cb77-8" aria-hidden="true" tabindex="-1"></a>MSS_ridge <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(y_val_log) <span class="sc">-</span> <span class="fu">exp</span>(Y_hatridge))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb77-9"><a href="#cb77-9" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb77-10"><a href="#cb77-10" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The Ridge RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_ridge)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "The Ridge RMSE is: 12.4870766968991"</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>Looking at the coefficients, the intercept has the biggest impact on the final prediction. Amongst the features, the short and long term realised volatilities have the largest coefficients, so the most influence on the final result. However, Ridge is the worst model up until now, with an RMSE of 12.48. This seems coherent, as Ridge’s biggest strength, preventing overfitting by shrinking the coefficients, isn’t relevant as we are not in an overfitting situation.</p>
|
||
</section>
|
||
<section id="lasso-regression" class="level4">
|
||
<h4 class="anchored" data-anchor-id="lasso-regression">Lasso Regression</h4>
|
||
<p>LASSO regression is close to ridge regression. However, the formula slightly changes.</p>
|
||
<p><span class="math display">\[
|
||
\hat{\beta}^{L}(\lambda)=\arg\min_{\beta}\;\|Y - X\beta\|_{n}^{2} + \lambda\|\beta\|_{1}
|
||
\]</span></p>
|
||
<p>Unlike ridge, lasso can set estimators to 0, operating with an internalised variable selection.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb79"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb79-1"><a href="#cb79-1" aria-hidden="true" tabindex="-1"></a>cv.lasso <span class="ot"><-</span> <span class="fu">cv.glmnet</span>(<span class="fu">as.matrix</span>(train_noY), y_train_log, <span class="at">alpha =</span> <span class="dv">1</span>)</span>
|
||
<span id="cb79-2"><a href="#cb79-2" aria-hidden="true" tabindex="-1"></a>s_lasso <span class="ot">=</span> cv.lasso<span class="sc">$</span>lambda.min</span>
|
||
<span id="cb79-3"><a href="#cb79-3" aria-hidden="true" tabindex="-1"></a>fit.lasso <span class="ot"><-</span> <span class="fu">glmnet</span>(</span>
|
||
<span id="cb79-4"><a href="#cb79-4" aria-hidden="true" tabindex="-1"></a> train_noY,</span>
|
||
<span id="cb79-5"><a href="#cb79-5" aria-hidden="true" tabindex="-1"></a> y_train_log,</span>
|
||
<span id="cb79-6"><a href="#cb79-6" aria-hidden="true" tabindex="-1"></a> <span class="at">lambda =</span> s_lasso,</span>
|
||
<span id="cb79-7"><a href="#cb79-7" aria-hidden="true" tabindex="-1"></a> <span class="at">alpha =</span> <span class="dv">1</span>,</span>
|
||
<span id="cb79-8"><a href="#cb79-8" aria-hidden="true" tabindex="-1"></a> <span class="at">standardize =</span> <span class="cn">FALSE</span></span>
|
||
<span id="cb79-9"><a href="#cb79-9" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb79-10"><a href="#cb79-10" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(fit.lasso)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>20 x 1 sparse Matrix of class "dgCMatrix"
|
||
s0
|
||
(Intercept) 3.7079455811
|
||
strike_dispersion 0.0828260235
|
||
call_volume 0.0016005427
|
||
put_volume 0.0497937673
|
||
call_oi 0.0008723776
|
||
put_oi -0.0322067399
|
||
maturity_count 0.0151152360
|
||
total_contracts 0.0098843542
|
||
realized_vol_short 0.5161194376
|
||
market_vol_index 0.0485475729
|
||
realized_vol_mid .
|
||
realized_vol_long 0.0053070551
|
||
pulse_ratio -0.2148769609
|
||
put_call_ratio_volume 0.0089017316
|
||
put_call_ratio_oi 0.0111171482
|
||
liquidity_ratio -0.0160239070
|
||
option_dispersion .
|
||
put_low_strike 0.0344600349
|
||
put_proportion -0.0198225534
|
||
stress_spread -0.0029979084</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb81"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb81-1"><a href="#cb81-1" aria-hidden="true" tabindex="-1"></a>Y_hatlasso <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb81-2"><a href="#cb81-2" aria-hidden="true" tabindex="-1"></a> fit.lasso,</span>
|
||
<span id="cb81-3"><a href="#cb81-3" aria-hidden="true" tabindex="-1"></a> <span class="at">newx =</span> <span class="fu">as.matrix</span>(val_noY),</span>
|
||
<span id="cb81-4"><a href="#cb81-4" aria-hidden="true" tabindex="-1"></a> <span class="at">type =</span> <span class="st">"response"</span></span>
|
||
<span id="cb81-5"><a href="#cb81-5" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb81-6"><a href="#cb81-6" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb81-7"><a href="#cb81-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb81-8"><a href="#cb81-8" aria-hidden="true" tabindex="-1"></a>MSS_lasso <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(y_val_log) <span class="sc">-</span> <span class="fu">exp</span>(Y_hatlasso))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb81-9"><a href="#cb81-9" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb81-10"><a href="#cb81-10" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The Lasso RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_lasso)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "The Lasso RMSE is: 12.0370753712144"</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>This leads to better results, with an RMSE of 12.03. We are rather in a situation of have to select variables, than in an overfitting scenario. The eliminated variables are <span class="math inline">\(realized vol mid\)</span> and <span class="math inline">\(option dispersion\)</span>. They are seen as irrelevant for the prediction. This contradicts the linear models, which achieved better results as Lasso. This shows that while Lasso benefits from variable selection, it cannot replicate human variable selection. It is still worse than our classical linear regression, which uses said variables. We move on to the next penalised model.</p>
|
||
</section>
|
||
<section id="elastic-net" class="level4">
|
||
<h4 class="anchored" data-anchor-id="elastic-net">Elastic-Net</h4>
|
||
<p>Elastic-Net combines Ridge and Lasso regression, attempting to preserve the benefits of both models and reducing overfitting while having variable selection. For <span class="math inline">\(\lambda_{1}, \lambda_{2} > 0\)</span></p>
|
||
<p><span class="math display">\[
|
||
\hat{\beta}^{EN}(\lambda_{1}, \lambda_{2}) = \arg\min_{\beta}\|Y-X\beta\|^{2} + \lambda_{1}\|\beta\|_1 + \lambda_{2}{\|\beta\|_2}^{2}
|
||
\]</span></p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb83"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb83-1"><a href="#cb83-1" aria-hidden="true" tabindex="-1"></a>cv.elasticnet <span class="ot"><-</span> <span class="fu">cv.glmnet</span>(</span>
|
||
<span id="cb83-2"><a href="#cb83-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">as.matrix</span>(train_noY),</span>
|
||
<span id="cb83-3"><a href="#cb83-3" aria-hidden="true" tabindex="-1"></a> y_train_log,</span>
|
||
<span id="cb83-4"><a href="#cb83-4" aria-hidden="true" tabindex="-1"></a> <span class="at">alpha =</span> <span class="fl">0.5</span></span>
|
||
<span id="cb83-5"><a href="#cb83-5" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb83-6"><a href="#cb83-6" aria-hidden="true" tabindex="-1"></a>s_en <span class="ot">=</span> cv.elasticnet<span class="sc">$</span>lambda.min</span>
|
||
<span id="cb83-7"><a href="#cb83-7" aria-hidden="true" tabindex="-1"></a>fit.elasticnet <span class="ot"><-</span> <span class="fu">glmnet</span>(</span>
|
||
<span id="cb83-8"><a href="#cb83-8" aria-hidden="true" tabindex="-1"></a> train_noY,</span>
|
||
<span id="cb83-9"><a href="#cb83-9" aria-hidden="true" tabindex="-1"></a> y_train_log,</span>
|
||
<span id="cb83-10"><a href="#cb83-10" aria-hidden="true" tabindex="-1"></a> <span class="at">lambda =</span> s_en,</span>
|
||
<span id="cb83-11"><a href="#cb83-11" aria-hidden="true" tabindex="-1"></a> <span class="at">alpha =</span> <span class="fl">0.5</span></span>
|
||
<span id="cb83-12"><a href="#cb83-12" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb83-13"><a href="#cb83-13" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(fit.elasticnet)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>20 x 1 sparse Matrix of class "dgCMatrix"
|
||
s0
|
||
(Intercept) 3.7079455811
|
||
strike_dispersion 0.0968051818
|
||
call_volume 0.0006080303
|
||
put_volume 0.0514668950
|
||
call_oi 0.0017100680
|
||
put_oi -0.0343559893
|
||
maturity_count 0.0143531881
|
||
total_contracts .
|
||
realized_vol_short 0.5114617598
|
||
market_vol_index 0.0483281737
|
||
realized_vol_mid -0.0002823096
|
||
realized_vol_long 0.0092017012
|
||
pulse_ratio -0.2117217524
|
||
put_call_ratio_volume 0.0089154925
|
||
put_call_ratio_oi 0.0115496767
|
||
liquidity_ratio -0.0163079430
|
||
option_dispersion -0.0222861716
|
||
put_low_strike 0.0341647693
|
||
put_proportion -0.0205208052
|
||
stress_spread -0.0035417269</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb85"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb85-1"><a href="#cb85-1" aria-hidden="true" tabindex="-1"></a>Y_hatEN <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb85-2"><a href="#cb85-2" aria-hidden="true" tabindex="-1"></a> fit.elasticnet,</span>
|
||
<span id="cb85-3"><a href="#cb85-3" aria-hidden="true" tabindex="-1"></a> <span class="at">newx =</span> <span class="fu">as.matrix</span>(val_noY),</span>
|
||
<span id="cb85-4"><a href="#cb85-4" aria-hidden="true" tabindex="-1"></a> <span class="at">type =</span> <span class="st">"response"</span>,</span>
|
||
<span id="cb85-5"><a href="#cb85-5" aria-hidden="true" tabindex="-1"></a> <span class="at">s =</span> s_en</span>
|
||
<span id="cb85-6"><a href="#cb85-6" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb85-7"><a href="#cb85-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb85-8"><a href="#cb85-8" aria-hidden="true" tabindex="-1"></a>MSS_EN <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(y_val_log) <span class="sc">-</span> <span class="fu">exp</span>(Y_hatEN))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb85-9"><a href="#cb85-9" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb85-10"><a href="#cb85-10" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The E-N RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_EN)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "The E-N RMSE is: 12.0313627692977"</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>Elastic-Net does not eliminate the same variables as Lasso. However, it eliminates <span class="math inline">\(total contracts\)</span> This proves that it preserves Lasso’s variable selection impact. The RMSE is slightly improved, yet the change compared to Lasso is negligeable.</p>
|
||
</section>
|
||
<section id="partial-least-squares" class="level4">
|
||
<h4 class="anchored" data-anchor-id="partial-least-squares">Partial Least Squares</h4>
|
||
<p>The main idea behind Partial Least Squares is to construct new features, that are linear combinations of the original dataset features, and mutually orthogonal. The computation is done on the already reduced dimension.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb87"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb87-1"><a href="#cb87-1" aria-hidden="true" tabindex="-1"></a>train_pca_noY <span class="ot"><-</span> train_pca_lm <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref)</span>
|
||
<span id="cb87-2"><a href="#cb87-2" aria-hidden="true" tabindex="-1"></a>val_pca_noY <span class="ot"><-</span> val_pca_lm <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref)</span>
|
||
<span id="cb87-3"><a href="#cb87-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb87-4"><a href="#cb87-4" aria-hidden="true" tabindex="-1"></a>fit.pls <span class="ot"><-</span> <span class="fu">plsr</span>(</span>
|
||
<span id="cb87-5"><a href="#cb87-5" aria-hidden="true" tabindex="-1"></a> train_pca_lm<span class="sc">$</span>implied_vol_ref <span class="sc">~</span> <span class="fu">as.matrix</span>(train_pca_noY),</span>
|
||
<span id="cb87-6"><a href="#cb87-6" aria-hidden="true" tabindex="-1"></a> <span class="at">ncomp =</span> <span class="dv">8</span>,</span>
|
||
<span id="cb87-7"><a href="#cb87-7" aria-hidden="true" tabindex="-1"></a> <span class="at">validation =</span> <span class="st">"CV"</span></span>
|
||
<span id="cb87-8"><a href="#cb87-8" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb87-9"><a href="#cb87-9" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(fit.pls)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>, , 8 comps
|
||
|
||
train_pca_lm$implied_vol_ref
|
||
as.matrix(train_pca_noY)PC1 0.032235567
|
||
as.matrix(train_pca_noY)PC2 0.215238058
|
||
as.matrix(train_pca_noY)PC3 -0.016715731
|
||
as.matrix(train_pca_noY)PC4 -0.014672291
|
||
as.matrix(train_pca_noY)PC5 -0.098566933
|
||
as.matrix(train_pca_noY)PC6 0.077464828
|
||
as.matrix(train_pca_noY)PC7 -0.061847857
|
||
as.matrix(train_pca_noY)PC8 0.005858342
|
||
as.matrix(train_pca_noY)PC9 -0.033099268</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb89"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb89-1"><a href="#cb89-1" aria-hidden="true" tabindex="-1"></a>explained_variance <span class="ot"><-</span> <span class="fu">explvar</span>(fit.pls)</span>
|
||
<span id="cb89-2"><a href="#cb89-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb89-3"><a href="#cb89-3" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(</span>
|
||
<span id="cb89-4"><a href="#cb89-4" aria-hidden="true" tabindex="-1"></a> <span class="dv">1</span><span class="sc">:</span><span class="fu">length</span>(explained_variance),</span>
|
||
<span id="cb89-5"><a href="#cb89-5" aria-hidden="true" tabindex="-1"></a> explained_variance,</span>
|
||
<span id="cb89-6"><a href="#cb89-6" aria-hidden="true" tabindex="-1"></a> <span class="at">type =</span> <span class="st">"b"</span>,</span>
|
||
<span id="cb89-7"><a href="#cb89-7" aria-hidden="true" tabindex="-1"></a> <span class="at">pch =</span> <span class="dv">16</span>,</span>
|
||
<span id="cb89-8"><a href="#cb89-8" aria-hidden="true" tabindex="-1"></a> <span class="at">col =</span> <span class="st">"blue"</span>,</span>
|
||
<span id="cb89-9"><a href="#cb89-9" aria-hidden="true" tabindex="-1"></a> <span class="at">xlab =</span> <span class="st">"PLS Component"</span>,</span>
|
||
<span id="cb89-10"><a href="#cb89-10" aria-hidden="true" tabindex="-1"></a> <span class="at">ylab =</span> <span class="st">"Variance Explained (%)"</span>,</span>
|
||
<span id="cb89-11"><a href="#cb89-11" aria-hidden="true" tabindex="-1"></a> <span class="at">main =</span> <span class="st">"PLS: Variance Explained by Each Component"</span>,</span>
|
||
<span id="cb89-12"><a href="#cb89-12" aria-hidden="true" tabindex="-1"></a> <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fu">max</span>(explained_variance) <span class="sc">*</span> <span class="fl">1.1</span>)</span>
|
||
<span id="cb89-13"><a href="#cb89-13" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb89-14"><a href="#cb89-14" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb89-15"><a href="#cb89-15" aria-hidden="true" tabindex="-1"></a><span class="fu">grid</span>()</span>
|
||
<span id="cb89-16"><a href="#cb89-16" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb89-17"><a href="#cb89-17" aria-hidden="true" tabindex="-1"></a><span class="fu">text</span>(</span>
|
||
<span id="cb89-18"><a href="#cb89-18" aria-hidden="true" tabindex="-1"></a> <span class="dv">1</span><span class="sc">:</span><span class="fu">length</span>(explained_variance),</span>
|
||
<span id="cb89-19"><a href="#cb89-19" aria-hidden="true" tabindex="-1"></a> explained_variance,</span>
|
||
<span id="cb89-20"><a href="#cb89-20" aria-hidden="true" tabindex="-1"></a> <span class="at">labels =</span> <span class="fu">round</span>(explained_variance, <span class="dv">2</span>),</span>
|
||
<span id="cb89-21"><a href="#cb89-21" aria-hidden="true" tabindex="-1"></a> <span class="at">pos =</span> <span class="dv">3</span>,</span>
|
||
<span id="cb89-22"><a href="#cb89-22" aria-hidden="true" tabindex="-1"></a> <span class="at">col =</span> <span class="st">"blue"</span></span>
|
||
<span id="cb89-23"><a href="#cb89-23" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/pls%20explained%20variance-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>After visualising the new features, we realise a steep dropoff in explained variance after the first two components, which account for 50.7% of the variance. This shows that a lot of the present information can be explained within two linear combinaitions - more than half of it. This suggests that it is initially easy to extract information, but it gets progressively harder, with the fifth best new feature only explaining 6.91% of the variance.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb90"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb90-1"><a href="#cb90-1" aria-hidden="true" tabindex="-1"></a>pred_val <span class="ot"><-</span> <span class="fu">predict</span>(fit.pls, <span class="at">newdata =</span> <span class="fu">as.matrix</span>(val_pca_noY), <span class="at">ncomp =</span> <span class="dv">8</span>)</span>
|
||
<span id="cb90-2"><a href="#cb90-2" aria-hidden="true" tabindex="-1"></a>Y_hatPLS <span class="ot"><-</span> pred_val[, <span class="dv">1</span>, <span class="dv">1</span>]</span>
|
||
<span id="cb90-3"><a href="#cb90-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb90-4"><a href="#cb90-4" aria-hidden="true" tabindex="-1"></a>MSS_PLS <span class="ot"><-</span> <span class="fu">mean</span>((<span class="fu">exp</span>(val_pca_lm<span class="sc">$</span>implied_vol_ref) <span class="sc">-</span> <span class="fu">exp</span>(Y_hatPLS))<span class="sc">**</span><span class="dv">2</span>)</span>
|
||
<span id="cb90-5"><a href="#cb90-5" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb90-6"><a href="#cb90-6" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"The PLS RMSE is: "</span>, <span class="fu">sqrt</span>(MSS_PLS)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "The PLS RMSE is: 12.7949712506425"</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>Since we reduce the dimension on an already dimension-reduced dataset, the loss of information is too big: PLS is the worst model with an RMSE of 12.79.</p>
|
||
</section>
|
||
</section>
|
||
<section id="linear-mixed-effects-models-lmm" class="level3">
|
||
<h3 class="anchored" data-anchor-id="linear-mixed-effects-models-lmm">Linear Mixed-Effects Models (LMM)</h3>
|
||
<p>Given the panel structure of our dataset (<span class="math inline">\(N=3887\)</span> assets, <span class="math inline">\(T=544\)</span> dates),we try a Linear Mixed-Effects Model. This approach allows us to model a global market trend (Fixed Effects) while estimating a specific baseline level for each asset (Random Intercept), capturing the idiosyncratic risk inherent to each underlying instrument.</p>
|
||
<section id="model-lmm-1-the-baseline" class="level4">
|
||
<h4 class="anchored" data-anchor-id="model-lmm-1-the-baseline">Model LMM 1: The Baseline</h4>
|
||
<p>Our first approach was to include all available explanatory variables: the raw market data and our engineered features. The objective was to establish a performance benchmark.</p>
|
||
<p>The model specification is: <span class="math display">\[
|
||
\log(\texttt{ImpliedVol}_{it}) = \beta_0 + \sum_{k=1}^{p} \beta_k X_{k,it} + u_i + \epsilon_{it}
|
||
\]</span> Where <span class="math inline">\(u_i \sim \mathcal{N}(0, \sigma_u^2)\)</span> represents the random intercept for asset <span class="math inline">\(i\)</span>.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb92"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb92-1"><a href="#cb92-1" aria-hidden="true" tabindex="-1"></a>mod_lmm_1 <span class="ot"><-</span> <span class="fu">lmer</span>(</span>
|
||
<span id="cb92-2"><a href="#cb92-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">log</span>(implied_vol_ref) <span class="sc">~</span> strike_dispersion <span class="sc">+</span></span>
|
||
<span id="cb92-3"><a href="#cb92-3" aria-hidden="true" tabindex="-1"></a> call_volume <span class="sc">+</span></span>
|
||
<span id="cb92-4"><a href="#cb92-4" aria-hidden="true" tabindex="-1"></a> put_volume <span class="sc">+</span></span>
|
||
<span id="cb92-5"><a href="#cb92-5" aria-hidden="true" tabindex="-1"></a> call_oi <span class="sc">+</span></span>
|
||
<span id="cb92-6"><a href="#cb92-6" aria-hidden="true" tabindex="-1"></a> put_oi <span class="sc">+</span></span>
|
||
<span id="cb92-7"><a href="#cb92-7" aria-hidden="true" tabindex="-1"></a> maturity_count <span class="sc">+</span></span>
|
||
<span id="cb92-8"><a href="#cb92-8" aria-hidden="true" tabindex="-1"></a> total_contracts <span class="sc">+</span></span>
|
||
<span id="cb92-9"><a href="#cb92-9" aria-hidden="true" tabindex="-1"></a> realized_vol_long <span class="sc">+</span></span>
|
||
<span id="cb92-10"><a href="#cb92-10" aria-hidden="true" tabindex="-1"></a> realized_vol_mid <span class="sc">+</span></span>
|
||
<span id="cb92-11"><a href="#cb92-11" aria-hidden="true" tabindex="-1"></a> realized_vol_short <span class="sc">+</span></span>
|
||
<span id="cb92-12"><a href="#cb92-12" aria-hidden="true" tabindex="-1"></a> market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb92-13"><a href="#cb92-13" aria-hidden="true" tabindex="-1"></a> pulse_ratio <span class="sc">+</span></span>
|
||
<span id="cb92-14"><a href="#cb92-14" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume <span class="sc">+</span></span>
|
||
<span id="cb92-15"><a href="#cb92-15" aria-hidden="true" tabindex="-1"></a> put_call_ratio_oi <span class="sc">+</span></span>
|
||
<span id="cb92-16"><a href="#cb92-16" aria-hidden="true" tabindex="-1"></a> liquidity_ratio <span class="sc">+</span></span>
|
||
<span id="cb92-17"><a href="#cb92-17" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">+</span></span>
|
||
<span id="cb92-18"><a href="#cb92-18" aria-hidden="true" tabindex="-1"></a> put_low_strike <span class="sc">+</span></span>
|
||
<span id="cb92-19"><a href="#cb92-19" aria-hidden="true" tabindex="-1"></a> put_proportion <span class="sc">+</span></span>
|
||
<span id="cb92-20"><a href="#cb92-20" aria-hidden="true" tabindex="-1"></a> stress_spread <span class="sc">+</span></span>
|
||
<span id="cb92-21"><a href="#cb92-21" aria-hidden="true" tabindex="-1"></a> (<span class="dv">1</span> <span class="sc">|</span> asset_id),</span>
|
||
<span id="cb92-22"><a href="#cb92-22" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_linear</span>
|
||
<span id="cb92-23"><a href="#cb92-23" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb92-24"><a href="#cb92-24" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb92-25"><a href="#cb92-25" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod_lmm_1)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb93"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb93-1"><a href="#cb93-1" aria-hidden="true" tabindex="-1"></a>predictions_log_1 <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb93-2"><a href="#cb93-2" aria-hidden="true" tabindex="-1"></a> mod_lmm_1,</span>
|
||
<span id="cb93-3"><a href="#cb93-3" aria-hidden="true" tabindex="-1"></a> <span class="at">newdata =</span> val_linear,</span>
|
||
<span id="cb93-4"><a href="#cb93-4" aria-hidden="true" tabindex="-1"></a> <span class="at">allow.new.levels =</span> <span class="cn">TRUE</span></span>
|
||
<span id="cb93-5"><a href="#cb93-5" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb93-6"><a href="#cb93-6" aria-hidden="true" tabindex="-1"></a>predictions_real_1 <span class="ot"><-</span> <span class="fu">exp</span>(predictions_log_1)</span>
|
||
<span id="cb93-7"><a href="#cb93-7" aria-hidden="true" tabindex="-1"></a>erreurs_1 <span class="ot"><-</span> val_linear<span class="sc">$</span>implied_vol_ref <span class="sc">-</span> predictions_real_1</span>
|
||
<span id="cb93-8"><a href="#cb93-8" aria-hidden="true" tabindex="-1"></a>rmse_score_1 <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>(erreurs_1<span class="sc">^</span><span class="dv">2</span>))</span>
|
||
<span id="cb93-9"><a href="#cb93-9" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste</span>(<span class="st">"RMSE of the first LMM :"</span>, <span class="fu">round</span>(rmse_score_1, <span class="dv">4</span>)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>This baseline model achieved a Root Mean Square Error (RMSE) of 8.77. It is an improvement from the other linear models. However, while it captures the general variance, the inclusion of all variables likely introduced multicollinearity, inflating the standard errors of the coefficients.</p>
|
||
</section>
|
||
<section id="model-lmm-2-feature-selection-and-collinearity-reduction" class="level4">
|
||
<h4 class="anchored" data-anchor-id="model-lmm-2-feature-selection-and-collinearity-reduction">Model LMM 2: Feature Selection and Collinearity Reduction</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb94"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb94-1"><a href="#cb94-1" aria-hidden="true" tabindex="-1"></a>num_var <span class="ot"><-</span> train_linear <span class="sc">|></span> dplyr<span class="sc">::</span><span class="fu">select</span>(<span class="sc">-</span>asset_id, <span class="sc">-</span>obs_date)</span>
|
||
<span id="cb94-2"><a href="#cb94-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb94-3"><a href="#cb94-3" aria-hidden="true" tabindex="-1"></a>correlation_matrix <span class="ot"><-</span> <span class="fu">cor</span>(num_var, <span class="at">method =</span> <span class="st">"pearson"</span>, <span class="at">use =</span> <span class="st">"complete.obs"</span>)</span>
|
||
<span id="cb94-4"><a href="#cb94-4" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb94-5"><a href="#cb94-5" aria-hidden="true" tabindex="-1"></a>melted_cormat <span class="ot"><-</span> reshape2<span class="sc">::</span><span class="fu">melt</span>(correlation_matrix)</span>
|
||
<span id="cb94-6"><a href="#cb94-6" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb94-7"><a href="#cb94-7" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(melted_cormat, <span class="fu">aes</span>(<span class="at">x =</span> Var1, <span class="at">y =</span> Var2, <span class="at">fill =</span> value)) <span class="sc">+</span></span>
|
||
<span id="cb94-8"><a href="#cb94-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_tile</span>() <span class="sc">+</span></span>
|
||
<span id="cb94-9"><a href="#cb94-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_fill_gradientn</span>(<span class="at">colors =</span> <span class="fu">paletteer_c</span>(<span class="st">"grDevices::Sunset"</span>, <span class="dv">30</span>)) <span class="sc">+</span></span>
|
||
<span id="cb94-10"><a href="#cb94-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
|
||
<span id="cb94-11"><a href="#cb94-11" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Correlation Matrix for Numerical Variables"</span>,</span>
|
||
<span id="cb94-12"><a href="#cb94-12" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> <span class="cn">NULL</span>,</span>
|
||
<span id="cb94-13"><a href="#cb94-13" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> <span class="cn">NULL</span>,</span>
|
||
<span id="cb94-14"><a href="#cb94-14" aria-hidden="true" tabindex="-1"></a> <span class="at">fill =</span> <span class="st">"Corr"</span></span>
|
||
<span id="cb94-15"><a href="#cb94-15" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb94-16"><a href="#cb94-16" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_minimal</span>() <span class="sc">+</span></span>
|
||
<span id="cb94-17"><a href="#cb94-17" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">angle =</span> <span class="dv">45</span>, <span class="at">hjust =</span> <span class="dv">1</span>))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/correlation%20matrix-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>To improve robustness, we analyzed the correlation matrix. We removed redundant variables that carried duplicate information. This simplification stabilized the model without significantly degrading the RMSE, confirming that a parsimonious set of features is sufficient to describe market dynamics.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb95"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb95-1"><a href="#cb95-1" aria-hidden="true" tabindex="-1"></a>mod_lmm_2 <span class="ot"><-</span> <span class="fu">lmer</span>(</span>
|
||
<span id="cb95-2"><a href="#cb95-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">log</span>(implied_vol_ref) <span class="sc">~</span> strike_dispersion <span class="sc">+</span></span>
|
||
<span id="cb95-3"><a href="#cb95-3" aria-hidden="true" tabindex="-1"></a> call_volume <span class="sc">+</span></span>
|
||
<span id="cb95-4"><a href="#cb95-4" aria-hidden="true" tabindex="-1"></a> call_oi <span class="sc">+</span></span>
|
||
<span id="cb95-5"><a href="#cb95-5" aria-hidden="true" tabindex="-1"></a> maturity_count <span class="sc">+</span></span>
|
||
<span id="cb95-6"><a href="#cb95-6" aria-hidden="true" tabindex="-1"></a> total_contracts <span class="sc">+</span></span>
|
||
<span id="cb95-7"><a href="#cb95-7" aria-hidden="true" tabindex="-1"></a> realized_vol_short <span class="sc">+</span></span>
|
||
<span id="cb95-8"><a href="#cb95-8" aria-hidden="true" tabindex="-1"></a> realized_vol_mid <span class="sc">+</span></span>
|
||
<span id="cb95-9"><a href="#cb95-9" aria-hidden="true" tabindex="-1"></a> realized_vol_long <span class="sc">+</span></span>
|
||
<span id="cb95-10"><a href="#cb95-10" aria-hidden="true" tabindex="-1"></a> market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb95-11"><a href="#cb95-11" aria-hidden="true" tabindex="-1"></a> pulse_ratio <span class="sc">+</span></span>
|
||
<span id="cb95-12"><a href="#cb95-12" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume <span class="sc">+</span></span>
|
||
<span id="cb95-13"><a href="#cb95-13" aria-hidden="true" tabindex="-1"></a> liquidity_ratio <span class="sc">+</span></span>
|
||
<span id="cb95-14"><a href="#cb95-14" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">+</span></span>
|
||
<span id="cb95-15"><a href="#cb95-15" aria-hidden="true" tabindex="-1"></a> put_low_strike <span class="sc">+</span></span>
|
||
<span id="cb95-16"><a href="#cb95-16" aria-hidden="true" tabindex="-1"></a> put_proportion <span class="sc">+</span></span>
|
||
<span id="cb95-17"><a href="#cb95-17" aria-hidden="true" tabindex="-1"></a> stress_spread <span class="sc">+</span></span>
|
||
<span id="cb95-18"><a href="#cb95-18" aria-hidden="true" tabindex="-1"></a> (<span class="dv">1</span> <span class="sc">|</span> asset_id),</span>
|
||
<span id="cb95-19"><a href="#cb95-19" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_linear</span>
|
||
<span id="cb95-20"><a href="#cb95-20" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb95-21"><a href="#cb95-21" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb95-22"><a href="#cb95-22" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod_lmm_2)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb96"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb96-1"><a href="#cb96-1" aria-hidden="true" tabindex="-1"></a>predictions_log_2 <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb96-2"><a href="#cb96-2" aria-hidden="true" tabindex="-1"></a> mod_lmm_2,</span>
|
||
<span id="cb96-3"><a href="#cb96-3" aria-hidden="true" tabindex="-1"></a> <span class="at">newdata =</span> val_linear,</span>
|
||
<span id="cb96-4"><a href="#cb96-4" aria-hidden="true" tabindex="-1"></a> <span class="at">allow.new.levels =</span> <span class="cn">TRUE</span></span>
|
||
<span id="cb96-5"><a href="#cb96-5" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb96-6"><a href="#cb96-6" aria-hidden="true" tabindex="-1"></a>predictions_real_2 <span class="ot"><-</span> <span class="fu">exp</span>(predictions_log_2)</span>
|
||
<span id="cb96-7"><a href="#cb96-7" aria-hidden="true" tabindex="-1"></a>erreurs_2 <span class="ot"><-</span> val_linear<span class="sc">$</span>implied_vol_ref <span class="sc">-</span> predictions_real_2</span>
|
||
<span id="cb96-8"><a href="#cb96-8" aria-hidden="true" tabindex="-1"></a>rmse_score_2 <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>(erreurs_2<span class="sc">^</span><span class="dv">2</span>))</span>
|
||
<span id="cb96-9"><a href="#cb96-9" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste</span>(<span class="st">"RMSE of the second LMM :"</span>, <span class="fu">round</span>(rmse_score_2, <span class="dv">4</span>)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
<section id="model-lmm-3-financial-interactions" class="level4">
|
||
<h4 class="anchored" data-anchor-id="model-lmm-3-financial-interactions">Model LMM 3: Financial Interactions</h4>
|
||
<p>Markets are non-linear systems where factors amplify each other. Based on our financial analysis, we introduced five key interaction terms to capture these complex dynamics:</p>
|
||
<section id="the-volatility-beta-effect" class="level6">
|
||
<h6 class="anchored" data-anchor-id="the-volatility-beta-effect">The volatility Beta effect:</h6>
|
||
<p><span class="math display">\[
|
||
\texttt{market_vol_index : realized_vol_long}
|
||
\]</span></p>
|
||
<p>This interaction captures the Volatility Beta Effect. It measures the sensitivity of the underlying’s short-term realized volatility compared the market volatility index. It gives indication on how the volatility of the underlying is impacted by the market environment, that is wether markets are calm or panicking.</p>
|
||
</section>
|
||
<section id="the-fear-factor-interaction" class="level6">
|
||
<h6 class="anchored" data-anchor-id="the-fear-factor-interaction">The fear factor interaction:</h6>
|
||
<p><span class="math display">\[
|
||
\texttt{put_call_ratio_volume : stress_spread}
|
||
\]</span></p>
|
||
<p>This Fear Factor Interaction is a combination of the market sentiment (the Put Call Volume Ratio) and the idiosyncratic stress specific to the underlying. It helps to make the distinction between daily hedging and “urgent hedging” caused by market panic. High idiosyncratic risk and high Put Call Volume Ratio confirms a strong panic signal that should trigger a massive surge in volatility.</p>
|
||
</section>
|
||
<section id="the-market-depth-ratio" class="level6">
|
||
<h6 class="anchored" data-anchor-id="the-market-depth-ratio">The market depth ratio:</h6>
|
||
<p><span class="math display">\[
|
||
\texttt{total_contracts : liquidity_ratio}
|
||
\]</span></p>
|
||
<p>The Market Depth Ratio measures the impact of transactions on the market structural stability. Depending on liquidity and how deep the market is, a high nominal transaction will have a different impact on volatility. In a deep market, a surge in contracts is absorbed smoothly which is not the case for very thin markets.</p>
|
||
</section>
|
||
<section id="the-skew-tension-ratio" class="level6">
|
||
<h6 class="anchored" data-anchor-id="the-skew-tension-ratio">The skew tension ratio:</h6>
|
||
<p><span class="math display">\[
|
||
\texttt{put_low_strike : market_vol_index}
|
||
\]</span></p>
|
||
<p>The Skew Tension Ratio indicates how convex the fear is. In a stable environment, crash protection is cheap. When risk appears, investors buy put options for protection which push the volatility up leading to higher option prices. This ratio integrates the skew to the model to take into account sudden surge in implied volatility that occurs when market participants stop calculating value and start buying protection at any price.</p>
|
||
</section>
|
||
<section id="the-volatility-shock-ratio" class="level6">
|
||
<h6 class="anchored" data-anchor-id="the-volatility-shock-ratio">The volatility shock ratio:</h6>
|
||
<p><span class="math display">\[
|
||
\texttt{realized_vol_short : realized_vol_long}
|
||
\]</span></p>
|
||
<p>The Volatility Shock Ratio compares short-term realized volatility to long-term realized volatility. It identifies the nature of a volatility spike based on the principle of “Mean Reversion”, the volatility should, in the long term, converge with its mean. The point is to make the distinction between an usual risk and a stress peak which helps to understand if the current price movement is passing anomaly or is a fundamental shift in the risk profile.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb97"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb97-1"><a href="#cb97-1" aria-hidden="true" tabindex="-1"></a>mod_lmm_3 <span class="ot"><-</span> <span class="fu">lmer</span>(</span>
|
||
<span id="cb97-2"><a href="#cb97-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">log</span>(implied_vol_ref) <span class="sc">~</span></span>
|
||
<span id="cb97-3"><a href="#cb97-3" aria-hidden="true" tabindex="-1"></a> realized_vol_long <span class="sc">*</span></span>
|
||
<span id="cb97-4"><a href="#cb97-4" aria-hidden="true" tabindex="-1"></a> market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb97-5"><a href="#cb97-5" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume <span class="sc">*</span> stress_spread <span class="sc">+</span></span>
|
||
<span id="cb97-6"><a href="#cb97-6" aria-hidden="true" tabindex="-1"></a> total_contracts <span class="sc">*</span> liquidity_ratio <span class="sc">+</span></span>
|
||
<span id="cb97-7"><a href="#cb97-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb97-8"><a href="#cb97-8" aria-hidden="true" tabindex="-1"></a> put_low_strike<span class="sc">:</span>market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb97-9"><a href="#cb97-9" aria-hidden="true" tabindex="-1"></a> realized_vol_short<span class="sc">:</span>realized_vol_long <span class="sc">+</span></span>
|
||
<span id="cb97-10"><a href="#cb97-10" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb97-11"><a href="#cb97-11" aria-hidden="true" tabindex="-1"></a> strike_dispersion <span class="sc">+</span></span>
|
||
<span id="cb97-12"><a href="#cb97-12" aria-hidden="true" tabindex="-1"></a> call_volume <span class="sc">+</span></span>
|
||
<span id="cb97-13"><a href="#cb97-13" aria-hidden="true" tabindex="-1"></a> call_oi <span class="sc">+</span></span>
|
||
<span id="cb97-14"><a href="#cb97-14" aria-hidden="true" tabindex="-1"></a> maturity_count <span class="sc">+</span></span>
|
||
<span id="cb97-15"><a href="#cb97-15" aria-hidden="true" tabindex="-1"></a> realized_vol_short <span class="sc">+</span></span>
|
||
<span id="cb97-16"><a href="#cb97-16" aria-hidden="true" tabindex="-1"></a> realized_vol_mid <span class="sc">+</span></span>
|
||
<span id="cb97-17"><a href="#cb97-17" aria-hidden="true" tabindex="-1"></a> put_low_strike <span class="sc">+</span></span>
|
||
<span id="cb97-18"><a href="#cb97-18" aria-hidden="true" tabindex="-1"></a> put_proportion <span class="sc">+</span></span>
|
||
<span id="cb97-19"><a href="#cb97-19" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">+</span></span>
|
||
<span id="cb97-20"><a href="#cb97-20" aria-hidden="true" tabindex="-1"></a> pulse_ratio <span class="sc">+</span></span>
|
||
<span id="cb97-21"><a href="#cb97-21" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb97-22"><a href="#cb97-22" aria-hidden="true" tabindex="-1"></a> (<span class="dv">1</span> <span class="sc">|</span> asset_id),</span>
|
||
<span id="cb97-23"><a href="#cb97-23" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_linear</span>
|
||
<span id="cb97-24"><a href="#cb97-24" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb97-25"><a href="#cb97-25" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb97-26"><a href="#cb97-26" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod_lmm_3)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb98"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb98-1"><a href="#cb98-1" aria-hidden="true" tabindex="-1"></a>predictions_log_3 <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb98-2"><a href="#cb98-2" aria-hidden="true" tabindex="-1"></a> mod_lmm_3,</span>
|
||
<span id="cb98-3"><a href="#cb98-3" aria-hidden="true" tabindex="-1"></a> <span class="at">newdata =</span> val_linear,</span>
|
||
<span id="cb98-4"><a href="#cb98-4" aria-hidden="true" tabindex="-1"></a> <span class="at">allow.new.levels =</span> <span class="cn">TRUE</span></span>
|
||
<span id="cb98-5"><a href="#cb98-5" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb98-6"><a href="#cb98-6" aria-hidden="true" tabindex="-1"></a>predictions_real_3 <span class="ot"><-</span> <span class="fu">exp</span>(predictions_log_3)</span>
|
||
<span id="cb98-7"><a href="#cb98-7" aria-hidden="true" tabindex="-1"></a>erreurs_3 <span class="ot"><-</span> val_linear<span class="sc">$</span>implied_vol_ref <span class="sc">-</span> predictions_real_3</span>
|
||
<span id="cb98-8"><a href="#cb98-8" aria-hidden="true" tabindex="-1"></a>rmse_score_3 <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>(erreurs_3<span class="sc">^</span><span class="dv">2</span>))</span>
|
||
<span id="cb98-9"><a href="#cb98-9" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste</span>(<span class="st">"RMSE of the third LMM :"</span>, <span class="fu">round</span>(rmse_score_3, <span class="dv">4</span>)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="model-lmm-4-addition-of-quadratic-terms" class="level4">
|
||
<h4 class="anchored" data-anchor-id="model-lmm-4-addition-of-quadratic-terms">Model LMM 4: Addition of Quadratic Terms</h4>
|
||
<p>Volatility often exhibits a convex behavior (the “Vol of Vol”). Extreme variations in realized volatility tend to have a disproportionate impact on implied volatility. To capture this, we added squared terms (<span class="math inline">\(X^2\)</span>) for the most significant variables: <code>realized_vol_short</code>, <code>realized_vol_long</code>, <code>market_vol_index</code>, and <code>pulse_ratio</code>.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb99"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb99-1"><a href="#cb99-1" aria-hidden="true" tabindex="-1"></a>mod_lmm_4 <span class="ot"><-</span> <span class="fu">lmer</span>(</span>
|
||
<span id="cb99-2"><a href="#cb99-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">log</span>(implied_vol_ref) <span class="sc">~</span></span>
|
||
<span id="cb99-3"><a href="#cb99-3" aria-hidden="true" tabindex="-1"></a> realized_vol_long <span class="sc">*</span></span>
|
||
<span id="cb99-4"><a href="#cb99-4" aria-hidden="true" tabindex="-1"></a> market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb99-5"><a href="#cb99-5" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume <span class="sc">*</span> stress_spread <span class="sc">+</span></span>
|
||
<span id="cb99-6"><a href="#cb99-6" aria-hidden="true" tabindex="-1"></a> total_contracts <span class="sc">*</span> liquidity_ratio <span class="sc">+</span></span>
|
||
<span id="cb99-7"><a href="#cb99-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb99-8"><a href="#cb99-8" aria-hidden="true" tabindex="-1"></a> put_low_strike<span class="sc">:</span>market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb99-9"><a href="#cb99-9" aria-hidden="true" tabindex="-1"></a> realized_vol_short<span class="sc">:</span>realized_vol_long <span class="sc">+</span></span>
|
||
<span id="cb99-10"><a href="#cb99-10" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb99-11"><a href="#cb99-11" aria-hidden="true" tabindex="-1"></a> strike_dispersion <span class="sc">+</span></span>
|
||
<span id="cb99-12"><a href="#cb99-12" aria-hidden="true" tabindex="-1"></a> call_volume <span class="sc">+</span></span>
|
||
<span id="cb99-13"><a href="#cb99-13" aria-hidden="true" tabindex="-1"></a> call_oi <span class="sc">+</span></span>
|
||
<span id="cb99-14"><a href="#cb99-14" aria-hidden="true" tabindex="-1"></a> maturity_count <span class="sc">+</span></span>
|
||
<span id="cb99-15"><a href="#cb99-15" aria-hidden="true" tabindex="-1"></a> realized_vol_short <span class="sc">+</span></span>
|
||
<span id="cb99-16"><a href="#cb99-16" aria-hidden="true" tabindex="-1"></a> realized_vol_mid <span class="sc">+</span></span>
|
||
<span id="cb99-17"><a href="#cb99-17" aria-hidden="true" tabindex="-1"></a> put_low_strike <span class="sc">+</span></span>
|
||
<span id="cb99-18"><a href="#cb99-18" aria-hidden="true" tabindex="-1"></a> put_proportion <span class="sc">+</span></span>
|
||
<span id="cb99-19"><a href="#cb99-19" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">+</span></span>
|
||
<span id="cb99-20"><a href="#cb99-20" aria-hidden="true" tabindex="-1"></a> pulse_ratio <span class="sc">+</span></span>
|
||
<span id="cb99-21"><a href="#cb99-21" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb99-22"><a href="#cb99-22" aria-hidden="true" tabindex="-1"></a> <span class="fu">I</span>(realized_vol_short<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span></span>
|
||
<span id="cb99-23"><a href="#cb99-23" aria-hidden="true" tabindex="-1"></a> <span class="fu">I</span>(market_vol_index<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span></span>
|
||
<span id="cb99-24"><a href="#cb99-24" aria-hidden="true" tabindex="-1"></a> <span class="fu">I</span>(realized_vol_long<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span></span>
|
||
<span id="cb99-25"><a href="#cb99-25" aria-hidden="true" tabindex="-1"></a> <span class="fu">I</span>(pulse_ratio<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span></span>
|
||
<span id="cb99-26"><a href="#cb99-26" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb99-27"><a href="#cb99-27" aria-hidden="true" tabindex="-1"></a> (<span class="dv">1</span> <span class="sc">|</span> asset_id),</span>
|
||
<span id="cb99-28"><a href="#cb99-28" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_linear</span>
|
||
<span id="cb99-29"><a href="#cb99-29" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb99-30"><a href="#cb99-30" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb99-31"><a href="#cb99-31" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod_lmm_4)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Linear mixed model fit by REML. t-tests use Satterthwaite's method [
|
||
lmerModLmerTest]
|
||
Formula: log(implied_vol_ref) ~ realized_vol_long * market_vol_index +
|
||
put_call_ratio_volume * stress_spread + total_contracts *
|
||
liquidity_ratio + put_low_strike:market_vol_index + realized_vol_short:realized_vol_long +
|
||
strike_dispersion + call_volume + call_oi + maturity_count +
|
||
realized_vol_short + realized_vol_mid + put_low_strike +
|
||
put_proportion + option_dispersion + pulse_ratio + I(realized_vol_short^2) +
|
||
I(market_vol_index^2) + I(realized_vol_long^2) + I(pulse_ratio^2) +
|
||
(1 | asset_id)
|
||
Data: train_linear
|
||
|
||
REML criterion at convergence: -506957.9
|
||
|
||
Scaled residuals:
|
||
Min 1Q Median 3Q Max
|
||
-22.1432 -0.3906 0.0188 0.4271 12.0260
|
||
|
||
Random effects:
|
||
Groups Name Variance Std.Dev.
|
||
asset_id (Intercept) 0.07753 0.2784
|
||
Residual 0.04137 0.2034
|
||
Number of obs: 1533234, groups: asset_id, 3886
|
||
|
||
Fixed effects:
|
||
Estimate Std. Error df t value
|
||
(Intercept) 3.742e+00 4.492e-03 3.867e+03 833.120
|
||
realized_vol_long 2.739e-03 2.411e-03 1.531e+06 1.136
|
||
market_vol_index 1.245e-01 6.470e-04 1.532e+06 192.344
|
||
put_call_ratio_volume 8.798e-03 2.364e-04 1.531e+06 37.217
|
||
stress_spread 3.691e-02 1.146e-03 1.530e+06 32.209
|
||
total_contracts -3.338e-02 6.236e-03 1.532e+06 -5.352
|
||
liquidity_ratio -1.142e-02 3.397e-04 1.532e+06 -33.624
|
||
strike_dispersion 7.688e-02 6.692e-03 1.532e+06 11.489
|
||
call_volume 2.207e-02 6.914e-04 1.531e+06 31.921
|
||
call_oi -1.782e-02 7.491e-04 1.532e+06 -23.789
|
||
maturity_count 2.737e-03 4.421e-04 1.533e+06 6.192
|
||
realized_vol_short 2.723e-01 3.209e-03 1.531e+06 84.858
|
||
realized_vol_mid 1.735e-02 6.415e-04 1.530e+06 27.053
|
||
put_low_strike 1.212e-02 3.792e-04 1.531e+06 31.972
|
||
put_proportion 2.784e-03 2.847e-04 1.531e+06 9.778
|
||
option_dispersion -8.780e-02 1.186e-02 1.532e+06 -7.404
|
||
pulse_ratio -1.513e-01 1.801e-03 1.531e+06 -84.026
|
||
I(realized_vol_short^2) -3.701e-02 4.622e-04 1.531e+06 -80.059
|
||
I(market_vol_index^2) -4.417e-03 1.080e-04 1.533e+06 -40.908
|
||
I(realized_vol_long^2) -1.276e-02 3.811e-04 1.532e+06 -33.469
|
||
I(pulse_ratio^2) 1.220e-02 2.064e-04 1.531e+06 59.097
|
||
realized_vol_long:market_vol_index -2.543e-02 3.112e-04 1.531e+06 -81.700
|
||
put_call_ratio_volume:stress_spread 4.601e-03 1.245e-04 1.530e+06 36.963
|
||
total_contracts:liquidity_ratio -2.600e-03 2.485e-04 1.532e+06 -10.465
|
||
market_vol_index:put_low_strike -2.088e-03 1.728e-04 1.530e+06 -12.081
|
||
realized_vol_long:realized_vol_short 4.591e-02 7.569e-04 1.531e+06 60.657
|
||
Pr(>|t|)
|
||
(Intercept) < 2e-16 ***
|
||
realized_vol_long 0.256
|
||
market_vol_index < 2e-16 ***
|
||
put_call_ratio_volume < 2e-16 ***
|
||
stress_spread < 2e-16 ***
|
||
total_contracts 8.68e-08 ***
|
||
liquidity_ratio < 2e-16 ***
|
||
strike_dispersion < 2e-16 ***
|
||
call_volume < 2e-16 ***
|
||
call_oi < 2e-16 ***
|
||
maturity_count 5.94e-10 ***
|
||
realized_vol_short < 2e-16 ***
|
||
realized_vol_mid < 2e-16 ***
|
||
put_low_strike < 2e-16 ***
|
||
put_proportion < 2e-16 ***
|
||
option_dispersion 1.32e-13 ***
|
||
pulse_ratio < 2e-16 ***
|
||
I(realized_vol_short^2) < 2e-16 ***
|
||
I(market_vol_index^2) < 2e-16 ***
|
||
I(realized_vol_long^2) < 2e-16 ***
|
||
I(pulse_ratio^2) < 2e-16 ***
|
||
realized_vol_long:market_vol_index < 2e-16 ***
|
||
put_call_ratio_volume:stress_spread < 2e-16 ***
|
||
total_contracts:liquidity_ratio < 2e-16 ***
|
||
market_vol_index:put_low_strike < 2e-16 ***
|
||
realized_vol_long:realized_vol_short < 2e-16 ***
|
||
---
|
||
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb101"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb101-1"><a href="#cb101-1" aria-hidden="true" tabindex="-1"></a>predictions_log_4 <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb101-2"><a href="#cb101-2" aria-hidden="true" tabindex="-1"></a> mod_lmm_4,</span>
|
||
<span id="cb101-3"><a href="#cb101-3" aria-hidden="true" tabindex="-1"></a> <span class="at">newdata =</span> val_linear,</span>
|
||
<span id="cb101-4"><a href="#cb101-4" aria-hidden="true" tabindex="-1"></a> <span class="at">allow.new.levels =</span> <span class="cn">TRUE</span></span>
|
||
<span id="cb101-5"><a href="#cb101-5" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb101-6"><a href="#cb101-6" aria-hidden="true" tabindex="-1"></a>predictions_real_4 <span class="ot"><-</span> <span class="fu">exp</span>(predictions_log_4)</span>
|
||
<span id="cb101-7"><a href="#cb101-7" aria-hidden="true" tabindex="-1"></a>erreurs_4 <span class="ot"><-</span> val_linear<span class="sc">$</span>implied_vol_ref <span class="sc">-</span> predictions_real_4</span>
|
||
<span id="cb101-8"><a href="#cb101-8" aria-hidden="true" tabindex="-1"></a>rmse_score_4 <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>(erreurs_4<span class="sc">^</span><span class="dv">2</span>))</span>
|
||
<span id="cb101-9"><a href="#cb101-9" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste</span>(<span class="st">"RMSE of the fourth LMM :"</span>, <span class="fu">round</span>(rmse_score_4, <span class="dv">4</span>)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "RMSE of the fourth LMM : 8.3192"</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb103"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb103-1"><a href="#cb103-1" aria-hidden="true" tabindex="-1"></a>selected_assets <span class="ot"><-</span> <span class="fu">sample</span>(<span class="fu">unique</span>(val_linear<span class="sc">$</span>asset_id), <span class="dv">5</span>)</span>
|
||
<span id="cb103-2"><a href="#cb103-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb103-3"><a href="#cb103-3" aria-hidden="true" tabindex="-1"></a>n_train_display <span class="ot"><-</span> <span class="fu">floor</span>(<span class="dv">544</span> <span class="sc">/</span> <span class="dv">2</span>)</span>
|
||
<span id="cb103-4"><a href="#cb103-4" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb103-5"><a href="#cb103-5" aria-hidden="true" tabindex="-1"></a>dates_train_all <span class="ot"><-</span> <span class="fu">sort</span>(<span class="fu">unique</span>(train_linear<span class="sc">$</span>obs_date))</span>
|
||
<span id="cb103-6"><a href="#cb103-6" aria-hidden="true" tabindex="-1"></a>dates_train_subset <span class="ot"><-</span> <span class="fu">tail</span>(dates_train_all, n_train_display)</span>
|
||
<span id="cb103-7"><a href="#cb103-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb103-8"><a href="#cb103-8" aria-hidden="true" tabindex="-1"></a>df_truth_train <span class="ot"><-</span> train_linear <span class="sc">|></span></span>
|
||
<span id="cb103-9"><a href="#cb103-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(asset_id <span class="sc">%in%</span> selected_assets) <span class="sc">|></span></span>
|
||
<span id="cb103-10"><a href="#cb103-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(obs_date <span class="sc">%in%</span> dates_train_subset) <span class="sc">|></span></span>
|
||
<span id="cb103-11"><a href="#cb103-11" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(asset_id, obs_date, implied_vol_ref)</span>
|
||
<span id="cb103-12"><a href="#cb103-12" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb103-13"><a href="#cb103-13" aria-hidden="true" tabindex="-1"></a>df_truth_val <span class="ot"><-</span> val_linear <span class="sc">|></span></span>
|
||
<span id="cb103-14"><a href="#cb103-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(asset_id <span class="sc">%in%</span> selected_assets) <span class="sc">|></span></span>
|
||
<span id="cb103-15"><a href="#cb103-15" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(asset_id, obs_date, implied_vol_ref)</span>
|
||
<span id="cb103-16"><a href="#cb103-16" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb103-17"><a href="#cb103-17" aria-hidden="true" tabindex="-1"></a>df_truth_full <span class="ot"><-</span> <span class="fu">bind_rows</span>(df_truth_train, df_truth_val) <span class="sc">|></span></span>
|
||
<span id="cb103-18"><a href="#cb103-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">obs_date =</span> <span class="fu">as.Date</span>(obs_date))</span>
|
||
<span id="cb103-19"><a href="#cb103-19" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb103-20"><a href="#cb103-20" aria-hidden="true" tabindex="-1"></a>df_pred <span class="ot"><-</span> val_linear <span class="sc">|></span></span>
|
||
<span id="cb103-21"><a href="#cb103-21" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(asset_id <span class="sc">%in%</span> selected_assets) <span class="sc">%>%</span></span>
|
||
<span id="cb103-22"><a href="#cb103-22" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
|
||
<span id="cb103-23"><a href="#cb103-23" aria-hidden="true" tabindex="-1"></a> <span class="at">pred_log =</span> <span class="fu">predict</span>(mod_lmm_4, <span class="at">newdata =</span> ., <span class="at">allow.new.levels =</span> <span class="cn">TRUE</span>),</span>
|
||
<span id="cb103-24"><a href="#cb103-24" aria-hidden="true" tabindex="-1"></a> <span class="at">prediction =</span> <span class="fu">exp</span>(pred_log),</span>
|
||
<span id="cb103-25"><a href="#cb103-25" aria-hidden="true" tabindex="-1"></a> <span class="at">obs_date =</span> <span class="fu">as.Date</span>(obs_date)</span>
|
||
<span id="cb103-26"><a href="#cb103-26" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|></span></span>
|
||
<span id="cb103-27"><a href="#cb103-27" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(asset_id, obs_date, prediction)</span>
|
||
<span id="cb103-28"><a href="#cb103-28" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb103-29"><a href="#cb103-29" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>() <span class="sc">+</span></span>
|
||
<span id="cb103-30"><a href="#cb103-30" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(</span>
|
||
<span id="cb103-31"><a href="#cb103-31" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> df_truth_full,</span>
|
||
<span id="cb103-32"><a href="#cb103-32" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">x =</span> obs_date, <span class="at">y =</span> implied_vol_ref, <span class="at">color =</span> <span class="fu">as.factor</span>(asset_id)),</span>
|
||
<span id="cb103-33"><a href="#cb103-33" aria-hidden="true" tabindex="-1"></a> <span class="at">size =</span> <span class="fl">0.7</span>,</span>
|
||
<span id="cb103-34"><a href="#cb103-34" aria-hidden="true" tabindex="-1"></a> <span class="at">alpha =</span> <span class="fl">0.8</span></span>
|
||
<span id="cb103-35"><a href="#cb103-35" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb103-36"><a href="#cb103-36" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(</span>
|
||
<span id="cb103-37"><a href="#cb103-37" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> df_pred,</span>
|
||
<span id="cb103-38"><a href="#cb103-38" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">x =</span> obs_date, <span class="at">y =</span> prediction, <span class="at">color =</span> <span class="fu">as.factor</span>(asset_id)),</span>
|
||
<span id="cb103-39"><a href="#cb103-39" aria-hidden="true" tabindex="-1"></a> <span class="at">linetype =</span> <span class="st">"dashed"</span>,</span>
|
||
<span id="cb103-40"><a href="#cb103-40" aria-hidden="true" tabindex="-1"></a> <span class="at">size =</span> <span class="fl">0.7</span></span>
|
||
<span id="cb103-41"><a href="#cb103-41" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb103-42"><a href="#cb103-42" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_vline</span>(</span>
|
||
<span id="cb103-43"><a href="#cb103-43" aria-hidden="true" tabindex="-1"></a> <span class="at">xintercept =</span> <span class="fu">as.numeric</span>(<span class="fu">min</span>(df_pred<span class="sc">$</span>obs_date)),</span>
|
||
<span id="cb103-44"><a href="#cb103-44" aria-hidden="true" tabindex="-1"></a> <span class="at">linetype =</span> <span class="st">"dotted"</span>,</span>
|
||
<span id="cb103-45"><a href="#cb103-45" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="st">"black"</span>,</span>
|
||
<span id="cb103-46"><a href="#cb103-46" aria-hidden="true" tabindex="-1"></a> <span class="at">size =</span> <span class="dv">1</span></span>
|
||
<span id="cb103-47"><a href="#cb103-47" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb103-48"><a href="#cb103-48" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_minimal</span>() <span class="sc">+</span></span>
|
||
<span id="cb103-49"><a href="#cb103-49" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
|
||
<span id="cb103-50"><a href="#cb103-50" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Predictions (Dashed) vs. Reality (Solid)"</span>,</span>
|
||
<span id="cb103-51"><a href="#cb103-51" aria-hidden="true" tabindex="-1"></a> <span class="at">subtitle =</span> <span class="st">"Visualisation of 10 Random Assets (Focus on Train/Val Transition)"</span>,</span>
|
||
<span id="cb103-52"><a href="#cb103-52" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> <span class="st">"Date"</span>,</span>
|
||
<span id="cb103-53"><a href="#cb103-53" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> <span class="st">"Implied Volatility"</span>,</span>
|
||
<span id="cb103-54"><a href="#cb103-54" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="st">"Asset ID"</span></span>
|
||
<span id="cb103-55"><a href="#cb103-55" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb103-56"><a href="#cb103-56" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">legend.position =</span> <span class="st">"right"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/mod4%20graph-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>We plotted the predictions against the actual values for a random subset of assets for this model. While the model captures the general trend well, it often underpredicts extreme volatility spikes. This suggests that a simple Random Intercept (<span class="math inline">\(u_i\)</span>) is insufficient: assets do not just have different levels of volatility, they have different sensitivities to market stress. A defensive stock (low beta) and a tech stock (high beta) do not react with the same intensity to a VIX spike.</p>
|
||
</section>
|
||
<section id="model-lmm-5-random-slopes" class="level4">
|
||
<h4 class="anchored" data-anchor-id="model-lmm-5-random-slopes">Model LMM 5: Random Slopes</h4>
|
||
<p>To adress this issue, we introduced Random Slopes. Instead of forcing a global coefficient for key variables, we allowed the slope to vary by asset:</p>
|
||
<p><span class="math display">\[
|
||
\log(Y_{it}) = (\beta_{market} + b_{i,m})X_{market,t} + \dots + u_i + \epsilon_{it}
|
||
\]</span></p>
|
||
<p>We included <code>realized_vol_short</code> and <code>realized_vol_long:market_vol_index</code> in the random effects structure <code>(1 + realized_vol_short + realized_vol_long:market_vol_index | asset_id)</code>. This modification allows the model to learn the specific beta and reactivity of each asset, significantly improving the fit for high-volatility profiles.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb104"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb104-1"><a href="#cb104-1" aria-hidden="true" tabindex="-1"></a>mod_lmm_5 <span class="ot"><-</span> <span class="fu">lmer</span>(</span>
|
||
<span id="cb104-2"><a href="#cb104-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">log</span>(implied_vol_ref) <span class="sc">~</span></span>
|
||
<span id="cb104-3"><a href="#cb104-3" aria-hidden="true" tabindex="-1"></a> put_call_ratio_volume <span class="sc">*</span></span>
|
||
<span id="cb104-4"><a href="#cb104-4" aria-hidden="true" tabindex="-1"></a> stress_spread <span class="sc">+</span></span>
|
||
<span id="cb104-5"><a href="#cb104-5" aria-hidden="true" tabindex="-1"></a> total_contracts <span class="sc">*</span> liquidity_ratio <span class="sc">+</span></span>
|
||
<span id="cb104-6"><a href="#cb104-6" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb104-7"><a href="#cb104-7" aria-hidden="true" tabindex="-1"></a> put_low_strike<span class="sc">:</span>market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb104-8"><a href="#cb104-8" aria-hidden="true" tabindex="-1"></a> realized_vol_short<span class="sc">:</span>realized_vol_long <span class="sc">+</span></span>
|
||
<span id="cb104-9"><a href="#cb104-9" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb104-10"><a href="#cb104-10" aria-hidden="true" tabindex="-1"></a> strike_dispersion <span class="sc">+</span></span>
|
||
<span id="cb104-11"><a href="#cb104-11" aria-hidden="true" tabindex="-1"></a> call_volume <span class="sc">+</span></span>
|
||
<span id="cb104-12"><a href="#cb104-12" aria-hidden="true" tabindex="-1"></a> call_oi <span class="sc">+</span></span>
|
||
<span id="cb104-13"><a href="#cb104-13" aria-hidden="true" tabindex="-1"></a> maturity_count <span class="sc">+</span></span>
|
||
<span id="cb104-14"><a href="#cb104-14" aria-hidden="true" tabindex="-1"></a> realized_vol_long <span class="sc">+</span></span>
|
||
<span id="cb104-15"><a href="#cb104-15" aria-hidden="true" tabindex="-1"></a> realized_vol_mid <span class="sc">+</span></span>
|
||
<span id="cb104-16"><a href="#cb104-16" aria-hidden="true" tabindex="-1"></a> market_vol_index <span class="sc">+</span></span>
|
||
<span id="cb104-17"><a href="#cb104-17" aria-hidden="true" tabindex="-1"></a> put_low_strike <span class="sc">+</span></span>
|
||
<span id="cb104-18"><a href="#cb104-18" aria-hidden="true" tabindex="-1"></a> put_proportion <span class="sc">+</span></span>
|
||
<span id="cb104-19"><a href="#cb104-19" aria-hidden="true" tabindex="-1"></a> option_dispersion <span class="sc">+</span></span>
|
||
<span id="cb104-20"><a href="#cb104-20" aria-hidden="true" tabindex="-1"></a> pulse_ratio <span class="sc">+</span></span>
|
||
<span id="cb104-21"><a href="#cb104-21" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb104-22"><a href="#cb104-22" aria-hidden="true" tabindex="-1"></a> <span class="fu">I</span>(realized_vol_short<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span></span>
|
||
<span id="cb104-23"><a href="#cb104-23" aria-hidden="true" tabindex="-1"></a> <span class="fu">I</span>(market_vol_index<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span></span>
|
||
<span id="cb104-24"><a href="#cb104-24" aria-hidden="true" tabindex="-1"></a> <span class="fu">I</span>(realized_vol_long<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span></span>
|
||
<span id="cb104-25"><a href="#cb104-25" aria-hidden="true" tabindex="-1"></a> <span class="fu">I</span>(pulse_ratio<span class="sc">^</span><span class="dv">2</span>) <span class="sc">+</span></span>
|
||
<span id="cb104-26"><a href="#cb104-26" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb104-27"><a href="#cb104-27" aria-hidden="true" tabindex="-1"></a> (<span class="dv">1</span> <span class="sc">+</span> realized_vol_short <span class="sc">+</span> realized_vol_long<span class="sc">:</span>market_vol_index <span class="sc">|</span> asset_id),</span>
|
||
<span id="cb104-28"><a href="#cb104-28" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> train_linear</span>
|
||
<span id="cb104-29"><a href="#cb104-29" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb104-30"><a href="#cb104-30" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb104-31"><a href="#cb104-31" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(mod_lmm_5)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>Linear mixed model fit by REML. t-tests use Satterthwaite's method [
|
||
lmerModLmerTest]
|
||
Formula: log(implied_vol_ref) ~ put_call_ratio_volume * stress_spread +
|
||
total_contracts * liquidity_ratio + put_low_strike:market_vol_index +
|
||
realized_vol_short:realized_vol_long + strike_dispersion +
|
||
call_volume + call_oi + maturity_count + realized_vol_long +
|
||
realized_vol_mid + market_vol_index + put_low_strike + put_proportion +
|
||
option_dispersion + pulse_ratio + I(realized_vol_short^2) +
|
||
I(market_vol_index^2) + I(realized_vol_long^2) + I(pulse_ratio^2) +
|
||
(1 + realized_vol_short + realized_vol_long:market_vol_index |
|
||
asset_id)
|
||
Data: train_linear
|
||
|
||
REML criterion at convergence: -794671.7
|
||
|
||
Scaled residuals:
|
||
Min 1Q Median 3Q Max
|
||
-24.9130 -0.3898 0.0179 0.4286 12.6215
|
||
|
||
Random effects:
|
||
Groups Name Variance Std.Dev. Corr
|
||
asset_id (Intercept) 0.100984 0.31778
|
||
realized_vol_short 0.014391 0.11996 0.10
|
||
realized_vol_long:market_vol_index 0.007767 0.08813 -0.21 -0.18
|
||
Residual 0.033598 0.18330
|
||
Number of obs: 1533234, groups: asset_id, 3886
|
||
|
||
Fixed effects:
|
||
Estimate Std. Error df t value
|
||
(Intercept) 3.732e+00 5.024e-03 3.594e+03 742.886
|
||
put_call_ratio_volume 8.216e-03 2.249e-04 1.529e+06 36.530
|
||
stress_spread 5.963e-02 1.132e-03 5.737e+05 52.669
|
||
total_contracts -5.658e-02 6.074e-03 1.530e+06 -9.315
|
||
liquidity_ratio -8.112e-03 3.227e-04 1.530e+06 -25.138
|
||
strike_dispersion 9.638e-02 6.521e-03 1.530e+06 14.779
|
||
call_volume 1.408e-02 6.495e-04 1.528e+06 21.674
|
||
call_oi -1.244e-02 7.171e-04 1.530e+06 -17.345
|
||
maturity_count 5.641e-03 4.275e-04 1.531e+06 13.196
|
||
realized_vol_long 1.345e-01 1.661e-03 2.118e+04 80.967
|
||
realized_vol_mid 1.976e-02 6.284e-04 1.523e+06 31.443
|
||
market_vol_index 1.495e-01 6.363e-04 8.498e+05 234.917
|
||
put_low_strike 1.026e-02 3.584e-04 1.529e+06 28.630
|
||
put_proportion 6.142e-04 2.676e-04 1.528e+06 2.295
|
||
option_dispersion -1.282e-01 1.155e-02 1.530e+06 -11.095
|
||
pulse_ratio -4.017e-02 1.166e-03 1.476e+04 -34.443
|
||
I(realized_vol_short^2) -1.144e-01 5.545e-04 1.702e+05 -206.242
|
||
I(market_vol_index^2) -6.087e-03 1.016e-04 1.529e+06 -59.903
|
||
I(realized_vol_long^2) -5.797e-02 4.443e-04 4.486e+05 -130.472
|
||
I(pulse_ratio^2) 2.290e-02 2.287e-04 9.545e+05 100.136
|
||
put_call_ratio_volume:stress_spread 3.270e-03 1.215e-04 1.527e+06 26.906
|
||
total_contracts:liquidity_ratio 1.708e-04 2.380e-04 1.531e+06 0.718
|
||
put_low_strike:market_vol_index 1.041e-04 2.531e-04 1.326e+06 0.411
|
||
realized_vol_short:realized_vol_long 1.506e-01 9.633e-04 3.105e+05 156.367
|
||
Pr(>|t|)
|
||
(Intercept) <2e-16 ***
|
||
put_call_ratio_volume <2e-16 ***
|
||
stress_spread <2e-16 ***
|
||
total_contracts <2e-16 ***
|
||
liquidity_ratio <2e-16 ***
|
||
strike_dispersion <2e-16 ***
|
||
call_volume <2e-16 ***
|
||
call_oi <2e-16 ***
|
||
maturity_count <2e-16 ***
|
||
realized_vol_long <2e-16 ***
|
||
realized_vol_mid <2e-16 ***
|
||
market_vol_index <2e-16 ***
|
||
put_low_strike <2e-16 ***
|
||
put_proportion 0.0217 *
|
||
option_dispersion <2e-16 ***
|
||
pulse_ratio <2e-16 ***
|
||
I(realized_vol_short^2) <2e-16 ***
|
||
I(market_vol_index^2) <2e-16 ***
|
||
I(realized_vol_long^2) <2e-16 ***
|
||
I(pulse_ratio^2) <2e-16 ***
|
||
put_call_ratio_volume:stress_spread <2e-16 ***
|
||
total_contracts:liquidity_ratio 0.4730
|
||
put_low_strike:market_vol_index 0.6809
|
||
realized_vol_short:realized_vol_long <2e-16 ***
|
||
---
|
||
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1</code></pre>
|
||
</div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>optimizer (nloptwrap) convergence code: 0 (OK)
|
||
Model failed to converge with max|grad| = 0.00268325 (tol = 0.002, component 1)</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb107"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb107-1"><a href="#cb107-1" aria-hidden="true" tabindex="-1"></a>predictions_log_5 <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb107-2"><a href="#cb107-2" aria-hidden="true" tabindex="-1"></a> mod_lmm_5,</span>
|
||
<span id="cb107-3"><a href="#cb107-3" aria-hidden="true" tabindex="-1"></a> <span class="at">newdata =</span> val_linear,</span>
|
||
<span id="cb107-4"><a href="#cb107-4" aria-hidden="true" tabindex="-1"></a> <span class="at">allow.new.levels =</span> <span class="cn">TRUE</span></span>
|
||
<span id="cb107-5"><a href="#cb107-5" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb107-6"><a href="#cb107-6" aria-hidden="true" tabindex="-1"></a>predictions_real_5 <span class="ot"><-</span> <span class="fu">exp</span>(predictions_log_5)</span>
|
||
<span id="cb107-7"><a href="#cb107-7" aria-hidden="true" tabindex="-1"></a>erreurs_5 <span class="ot"><-</span> val_linear<span class="sc">$</span>implied_vol_ref <span class="sc">-</span> predictions_real_5</span>
|
||
<span id="cb107-8"><a href="#cb107-8" aria-hidden="true" tabindex="-1"></a>rmse_score_5 <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>(erreurs_5<span class="sc">^</span><span class="dv">2</span>))</span>
|
||
<span id="cb107-9"><a href="#cb107-9" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste</span>(<span class="st">"RMSE of the fifth LMM :"</span>, <span class="fu">round</span>(rmse_score_5, <span class="dv">4</span>)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "RMSE of the fifth LMM : 8.1011"</code></pre>
|
||
</div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb109"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb109-1"><a href="#cb109-1" aria-hidden="true" tabindex="-1"></a>selected_assets <span class="ot"><-</span> <span class="fu">sample</span>(<span class="fu">unique</span>(val_linear<span class="sc">$</span>asset_id), <span class="dv">5</span>)</span>
|
||
<span id="cb109-2"><a href="#cb109-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb109-3"><a href="#cb109-3" aria-hidden="true" tabindex="-1"></a>n_train_display <span class="ot"><-</span> <span class="fu">floor</span>(<span class="dv">544</span> <span class="sc">/</span> <span class="dv">2</span>)</span>
|
||
<span id="cb109-4"><a href="#cb109-4" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb109-5"><a href="#cb109-5" aria-hidden="true" tabindex="-1"></a>dates_train_all <span class="ot"><-</span> <span class="fu">sort</span>(<span class="fu">unique</span>(train_linear<span class="sc">$</span>obs_date))</span>
|
||
<span id="cb109-6"><a href="#cb109-6" aria-hidden="true" tabindex="-1"></a>dates_train_subset <span class="ot"><-</span> <span class="fu">tail</span>(dates_train_all, n_train_display)</span>
|
||
<span id="cb109-7"><a href="#cb109-7" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb109-8"><a href="#cb109-8" aria-hidden="true" tabindex="-1"></a>df_truth_train <span class="ot"><-</span> train_linear <span class="sc">|></span></span>
|
||
<span id="cb109-9"><a href="#cb109-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(asset_id <span class="sc">%in%</span> selected_assets) <span class="sc">|></span></span>
|
||
<span id="cb109-10"><a href="#cb109-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(obs_date <span class="sc">%in%</span> dates_train_subset) <span class="sc">|></span></span>
|
||
<span id="cb109-11"><a href="#cb109-11" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(asset_id, obs_date, implied_vol_ref)</span>
|
||
<span id="cb109-12"><a href="#cb109-12" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb109-13"><a href="#cb109-13" aria-hidden="true" tabindex="-1"></a>df_truth_val <span class="ot"><-</span> val_linear <span class="sc">|></span></span>
|
||
<span id="cb109-14"><a href="#cb109-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(asset_id <span class="sc">%in%</span> selected_assets) <span class="sc">|></span></span>
|
||
<span id="cb109-15"><a href="#cb109-15" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(asset_id, obs_date, implied_vol_ref)</span>
|
||
<span id="cb109-16"><a href="#cb109-16" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb109-17"><a href="#cb109-17" aria-hidden="true" tabindex="-1"></a>df_truth_full <span class="ot"><-</span> <span class="fu">bind_rows</span>(df_truth_train, df_truth_val) <span class="sc">|></span></span>
|
||
<span id="cb109-18"><a href="#cb109-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
|
||
<span id="cb109-19"><a href="#cb109-19" aria-hidden="true" tabindex="-1"></a> <span class="at">obs_date =</span> <span class="fu">as.Date</span>(obs_date),</span>
|
||
<span id="cb109-20"><a href="#cb109-20" aria-hidden="true" tabindex="-1"></a> <span class="at">implied_vol_ref =</span> implied_vol_ref</span>
|
||
<span id="cb109-21"><a href="#cb109-21" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb109-22"><a href="#cb109-22" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb109-23"><a href="#cb109-23" aria-hidden="true" tabindex="-1"></a>df_pred_temp <span class="ot"><-</span> val_linear <span class="sc">|></span></span>
|
||
<span id="cb109-24"><a href="#cb109-24" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(asset_id <span class="sc">%in%</span> selected_assets)</span>
|
||
<span id="cb109-25"><a href="#cb109-25" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb109-26"><a href="#cb109-26" aria-hidden="true" tabindex="-1"></a>df_pred <span class="ot"><-</span> df_pred_temp <span class="sc">|></span></span>
|
||
<span id="cb109-27"><a href="#cb109-27" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
|
||
<span id="cb109-28"><a href="#cb109-28" aria-hidden="true" tabindex="-1"></a> <span class="at">pred_log =</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb109-29"><a href="#cb109-29" aria-hidden="true" tabindex="-1"></a> mod_lmm_5,</span>
|
||
<span id="cb109-30"><a href="#cb109-30" aria-hidden="true" tabindex="-1"></a> <span class="at">newdata =</span> df_pred_temp,</span>
|
||
<span id="cb109-31"><a href="#cb109-31" aria-hidden="true" tabindex="-1"></a> <span class="at">allow.new.levels =</span> <span class="cn">TRUE</span></span>
|
||
<span id="cb109-32"><a href="#cb109-32" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb109-33"><a href="#cb109-33" aria-hidden="true" tabindex="-1"></a> <span class="at">prediction =</span> <span class="fu">exp</span>(pred_log),</span>
|
||
<span id="cb109-34"><a href="#cb109-34" aria-hidden="true" tabindex="-1"></a> <span class="at">obs_date =</span> <span class="fu">as.Date</span>(obs_date)</span>
|
||
<span id="cb109-35"><a href="#cb109-35" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|></span></span>
|
||
<span id="cb109-36"><a href="#cb109-36" aria-hidden="true" tabindex="-1"></a> dplyr<span class="sc">::</span><span class="fu">select</span>(asset_id, obs_date, prediction)</span>
|
||
<span id="cb109-37"><a href="#cb109-37" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb109-38"><a href="#cb109-38" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>() <span class="sc">+</span></span>
|
||
<span id="cb109-39"><a href="#cb109-39" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(</span>
|
||
<span id="cb109-40"><a href="#cb109-40" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> df_truth_full,</span>
|
||
<span id="cb109-41"><a href="#cb109-41" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">x =</span> obs_date, <span class="at">y =</span> implied_vol_ref, <span class="at">color =</span> <span class="fu">as.factor</span>(asset_id)),</span>
|
||
<span id="cb109-42"><a href="#cb109-42" aria-hidden="true" tabindex="-1"></a> <span class="at">linewidth =</span> <span class="fl">0.7</span>,</span>
|
||
<span id="cb109-43"><a href="#cb109-43" aria-hidden="true" tabindex="-1"></a> <span class="at">alpha =</span> <span class="fl">0.8</span></span>
|
||
<span id="cb109-44"><a href="#cb109-44" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb109-45"><a href="#cb109-45" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>(</span>
|
||
<span id="cb109-46"><a href="#cb109-46" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> df_pred,</span>
|
||
<span id="cb109-47"><a href="#cb109-47" aria-hidden="true" tabindex="-1"></a> <span class="fu">aes</span>(<span class="at">x =</span> obs_date, <span class="at">y =</span> prediction, <span class="at">color =</span> <span class="fu">as.factor</span>(asset_id)),</span>
|
||
<span id="cb109-48"><a href="#cb109-48" aria-hidden="true" tabindex="-1"></a> <span class="at">linetype =</span> <span class="st">"dashed"</span>,</span>
|
||
<span id="cb109-49"><a href="#cb109-49" aria-hidden="true" tabindex="-1"></a> <span class="at">linewidth =</span> <span class="fl">0.7</span></span>
|
||
<span id="cb109-50"><a href="#cb109-50" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb109-51"><a href="#cb109-51" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_vline</span>(</span>
|
||
<span id="cb109-52"><a href="#cb109-52" aria-hidden="true" tabindex="-1"></a> <span class="at">xintercept =</span> <span class="fu">as.numeric</span>(<span class="fu">min</span>(df_pred<span class="sc">$</span>obs_date)),</span>
|
||
<span id="cb109-53"><a href="#cb109-53" aria-hidden="true" tabindex="-1"></a> <span class="at">linetype =</span> <span class="st">"dotted"</span>,</span>
|
||
<span id="cb109-54"><a href="#cb109-54" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="st">"black"</span>,</span>
|
||
<span id="cb109-55"><a href="#cb109-55" aria-hidden="true" tabindex="-1"></a> <span class="at">linewidth =</span> <span class="dv">1</span></span>
|
||
<span id="cb109-56"><a href="#cb109-56" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb109-57"><a href="#cb109-57" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_minimal</span>() <span class="sc">+</span></span>
|
||
<span id="cb109-58"><a href="#cb109-58" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
|
||
<span id="cb109-59"><a href="#cb109-59" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Predictions (Dashed) vs. Reality (Solid)"</span>,</span>
|
||
<span id="cb109-60"><a href="#cb109-60" aria-hidden="true" tabindex="-1"></a> <span class="at">subtitle =</span> <span class="st">"Visualisation of 5 Random Assets"</span>,</span>
|
||
<span id="cb109-61"><a href="#cb109-61" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> <span class="st">"Date"</span>,</span>
|
||
<span id="cb109-62"><a href="#cb109-62" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> <span class="st">"Implied Volatility"</span>,</span>
|
||
<span id="cb109-63"><a href="#cb109-63" aria-hidden="true" tabindex="-1"></a> <span class="at">color =</span> <span class="st">"Asset ID"</span></span>
|
||
<span id="cb109-64"><a href="#cb109-64" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
|
||
<span id="cb109-65"><a href="#cb109-65" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">legend.position =</span> <span class="st">"right"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/mod%205%20graph-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<section id="random-effects-analysis" class="level5">
|
||
<h5 class="anchored" data-anchor-id="random-effects-analysis">Random Effects Analysis:</h5>
|
||
<p>The summary of the model confirms our hypothesis that assets are structurally different. We observe a intercept variance at <span class="math inline">\(0.103\)</span>.This large variance confirms that different assets have vastly different baseline volatility levels. Then, the non-zero standard deviations (0.1269 and 0.1446 respectively) prove that assets have unique sensitivities. We can conclude that some assets are “high beta” (tech stocks) and react violently to market stress, while others are “defensive” and react mildly. This Random Slope structure allows the model to “learn” the specific risk profile of each of the 3,886 assets in the training set, correcting the “parallel trend” bias observed in earlier models.</p>
|
||
</section>
|
||
<section id="fixed-effect-analysis" class="level5">
|
||
<h5 class="anchored" data-anchor-id="fixed-effect-analysis">Fixed Effect Analysis:</h5>
|
||
<p>The Fixed Effects part of the summary provides powerful insights into the general laws of the options market. With <span class="math inline">\(t=319.9\)</span>, the market volatility index is by far the strongest driver. Unsurprisingly, implied volatility is systemically linked to the VIX. When the market panics, all assets follow. Then we observe strong short and long realized volatility. The past remains the best predictor of the future. The significance of the squared term. The quadratic term for the short one confirms the convexity of volatility: extreme moves in the underlying asset lead to disproportionately higher implied volatility premiums. Moreover, the positive coefficient of the put-call ratio on volume confirms the “Fear Gauge” theory. A surge in Put buying volume exerts a strong upward pressure on volatility prices.</p>
|
||
</section>
|
||
<section id="analysis-of-the-interactions" class="level5">
|
||
<h5 class="anchored" data-anchor-id="analysis-of-the-interactions">Analysis of the interactions:</h5>
|
||
<p>Our financial engineering efforts yielded mixed but informative results:</p>
|
||
<ul>
|
||
<li><p><strong>Volatility Shock Ratio (<code>realized_vol_short : realized_vol_long</code>)</strong>: With a t-value of , this is the most significant interaction. The negative coefficient is fascinating: it mathematically represents Mean Reversion. When short-term volatility deviates too much from long-term volatility, the model applies a correction, preventing the prediction from exploding linearly.</p></li>
|
||
<li><p><strong>Market Depth:</strong> With a t-value of <span class="math inline">\(21.6\)</span>, this interaction is highly significant. It confirms that liquidity constraints amplify volatility.</p></li>
|
||
<li><p><strong>Skew Tension:</strong> With a t-value of <span class="math inline">\(2.64\)</span>, this interaction is statistically significant (at the 95% confidence level). It validates the “Crash Convexity” theory, although its marginal impact is lower than expected.</p></li>
|
||
<li><p><strong>Fear Factor:</strong> With a t-value of <span class="math inline">\(1.23\)</span>, this interaction is not statistically significant. This suggests that the information contained in this interaction is already fully captured by the main effects of the variables separately.</p></li>
|
||
</ul>
|
||
<p>Finally concerning model diagnostics, we denote that the REML criterion dropped to <span class="math inline">\(-775,843\)</span>, indicating a significantly better fit compared to the baseline models. However, we note a convergence warning (<code>Model failed to converge</code>). This is common in complex mixed models with large datasets (1.5 million observations) and complex random structures. While scaling variables helped, the complexity of the “Random Slope” optimization remains a computational challenge, though the estimates remain robust given the extremely low standard errors.</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="non-linear-black-box-models" class="level2">
|
||
<h2 class="anchored" data-anchor-id="non-linear-black-box-models">Non-Linear & Black-Box Models</h2>
|
||
<p>Following the evaluation of linear frameworks, this section explores high-capacity, non-linear algorithms capable of mapping complex, multidimensional interactions within the financial feature space. A critical methodological distinction must be noted regarding the data pipeline: while linear models and deep learning architectures rely on the PCA-reduced dataset to mitigate multicollinearity and stabilize gradients, tree-based models were trained exclusively on the raw dataset (<span class="math inline">\(train\_final\)</span>). Decision trees naturally handle collinearity through their splitting mechanism, and applying PCA beforehand would orthogonally mix the original financial indicators, thereby destroying the localized, non-linear thresholds that tree ensembles are designed to capture.</p>
|
||
<p>To efficiently navigate the hyperparameter space of these complex models, Grid Search methodologies were discarded in favor of Bayesian Optimization using Gaussian Processes, allowing for a directed and computationally efficient convergence toward the optimal parameter sets by maximizing the negative Root Mean Squared Error (<span class="math inline">\(-RMSE\)</span>).</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb110"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb110-1"><a href="#cb110-1" aria-hidden="true" tabindex="-1"></a>train_tree<span class="sc">$</span>implied_vol_ref <span class="ot"><-</span> <span class="fu">log</span>(train_tree<span class="sc">$</span>implied_vol_ref)</span>
|
||
<span id="cb110-2"><a href="#cb110-2" aria-hidden="true" tabindex="-1"></a>val_tree<span class="sc">$</span>implied_vol_ref <span class="ot"><-</span> <span class="fu">log</span>(val_tree<span class="sc">$</span>implied_vol_ref)</span>
|
||
<span id="cb110-3"><a href="#cb110-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb110-4"><a href="#cb110-4" aria-hidden="true" tabindex="-1"></a>train_clean <span class="ot"><-</span> train_tree <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span>asset_id, <span class="sc">-</span>obs_date)</span>
|
||
<span id="cb110-5"><a href="#cb110-5" aria-hidden="true" tabindex="-1"></a>val_clean <span class="ot"><-</span> val_tree <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span>asset_id, <span class="sc">-</span>obs_date)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<section id="gradient-boosting-frameworks-xgboost-and-lightgbm" class="level3">
|
||
<h3 class="anchored" data-anchor-id="gradient-boosting-frameworks-xgboost-and-lightgbm">Gradient Boosting Frameworks: XGBoost and LightGBM</h3>
|
||
<p>Gradient boosting decision trees represent the state-of-the-art for tabular data. We benchmarked two industry-standard architectures: <strong>XGBoost</strong>, which relies on level-wise tree growth, and <strong>LightGBM</strong>, which employs a leaf-wise expansion strategy.</p>
|
||
<section id="xgboost-performance-and-boundary-effect" class="level4">
|
||
<h4 class="anchored" data-anchor-id="xgboost-performance-and-boundary-effect">XGBoost Performance and Boundary Effect</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb111"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb111-1"><a href="#cb111-1" aria-hidden="true" tabindex="-1"></a>x_train_mat <span class="ot"><-</span> <span class="fu">as.matrix</span>(train_clean <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref))</span>
|
||
<span id="cb111-2"><a href="#cb111-2" aria-hidden="true" tabindex="-1"></a>y_train_vec <span class="ot"><-</span> train_clean<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb111-3"><a href="#cb111-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-4"><a href="#cb111-4" aria-hidden="true" tabindex="-1"></a>x_val_mat <span class="ot"><-</span> <span class="fu">as.matrix</span>(val_clean <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref))</span>
|
||
<span id="cb111-5"><a href="#cb111-5" aria-hidden="true" tabindex="-1"></a>y_val_vec <span class="ot"><-</span> val_clean<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb111-6"><a href="#cb111-6" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-7"><a href="#cb111-7" aria-hidden="true" tabindex="-1"></a>dtrain <span class="ot"><-</span> <span class="fu">xgb.DMatrix</span>(<span class="at">data =</span> x_train_mat, <span class="at">label =</span> y_train_vec)</span>
|
||
<span id="cb111-8"><a href="#cb111-8" aria-hidden="true" tabindex="-1"></a>dval <span class="ot"><-</span> <span class="fu">xgb.DMatrix</span>(<span class="at">data =</span> x_val_mat, <span class="at">label =</span> y_val_vec)</span>
|
||
<span id="cb111-9"><a href="#cb111-9" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-10"><a href="#cb111-10" aria-hidden="true" tabindex="-1"></a>scoring_function <span class="ot"><-</span> <span class="cf">function</span>(eta, max_depth, subsample, colsample_bytree) {</span>
|
||
<span id="cb111-11"><a href="#cb111-11" aria-hidden="true" tabindex="-1"></a> parsed_depth <span class="ot"><-</span> <span class="fu">as.integer</span>(<span class="fu">round</span>(max_depth))</span>
|
||
<span id="cb111-12"><a href="#cb111-12" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-13"><a href="#cb111-13" aria-hidden="true" tabindex="-1"></a> params <span class="ot"><-</span> <span class="fu">list</span>(</span>
|
||
<span id="cb111-14"><a href="#cb111-14" aria-hidden="true" tabindex="-1"></a> <span class="at">booster =</span> <span class="st">"gbtree"</span>,</span>
|
||
<span id="cb111-15"><a href="#cb111-15" aria-hidden="true" tabindex="-1"></a> <span class="at">objective =</span> <span class="st">"reg:squarederror"</span>,</span>
|
||
<span id="cb111-16"><a href="#cb111-16" aria-hidden="true" tabindex="-1"></a> <span class="at">eta =</span> eta,</span>
|
||
<span id="cb111-17"><a href="#cb111-17" aria-hidden="true" tabindex="-1"></a> <span class="at">max_depth =</span> parsed_depth,</span>
|
||
<span id="cb111-18"><a href="#cb111-18" aria-hidden="true" tabindex="-1"></a> <span class="at">subsample =</span> subsample,</span>
|
||
<span id="cb111-19"><a href="#cb111-19" aria-hidden="true" tabindex="-1"></a> <span class="at">colsample_bytree =</span> colsample_bytree,</span>
|
||
<span id="cb111-20"><a href="#cb111-20" aria-hidden="true" tabindex="-1"></a> <span class="at">tree_method =</span> <span class="st">"hist"</span>,</span>
|
||
<span id="cb111-21"><a href="#cb111-21" aria-hidden="true" tabindex="-1"></a> <span class="at">nthread =</span> parallel<span class="sc">::</span><span class="fu">detectCores</span>() <span class="sc">-</span> <span class="dv">1</span></span>
|
||
<span id="cb111-22"><a href="#cb111-22" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb111-23"><a href="#cb111-23" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-24"><a href="#cb111-24" aria-hidden="true" tabindex="-1"></a> cv_model <span class="ot"><-</span> <span class="fu">tryCatch</span>(</span>
|
||
<span id="cb111-25"><a href="#cb111-25" aria-hidden="true" tabindex="-1"></a> {</span>
|
||
<span id="cb111-26"><a href="#cb111-26" aria-hidden="true" tabindex="-1"></a> <span class="fu">xgb.cv</span>(</span>
|
||
<span id="cb111-27"><a href="#cb111-27" aria-hidden="true" tabindex="-1"></a> <span class="at">params =</span> params,</span>
|
||
<span id="cb111-28"><a href="#cb111-28" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> dtrain,</span>
|
||
<span id="cb111-29"><a href="#cb111-29" aria-hidden="true" tabindex="-1"></a> <span class="at">nrounds =</span> <span class="dv">150</span>,</span>
|
||
<span id="cb111-30"><a href="#cb111-30" aria-hidden="true" tabindex="-1"></a> <span class="at">nfold =</span> <span class="dv">3</span>,</span>
|
||
<span id="cb111-31"><a href="#cb111-31" aria-hidden="true" tabindex="-1"></a> <span class="at">early_stopping_rounds =</span> <span class="dv">15</span>,</span>
|
||
<span id="cb111-32"><a href="#cb111-32" aria-hidden="true" tabindex="-1"></a> <span class="at">verbose =</span> <span class="dv">0</span>,</span>
|
||
<span id="cb111-33"><a href="#cb111-33" aria-hidden="true" tabindex="-1"></a> <span class="at">metrics =</span> <span class="st">"rmse"</span></span>
|
||
<span id="cb111-34"><a href="#cb111-34" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb111-35"><a href="#cb111-35" aria-hidden="true" tabindex="-1"></a> },</span>
|
||
<span id="cb111-36"><a href="#cb111-36" aria-hidden="true" tabindex="-1"></a> <span class="at">error =</span> <span class="cf">function</span>(e) <span class="fu">return</span>(<span class="cn">NULL</span>)</span>
|
||
<span id="cb111-37"><a href="#cb111-37" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb111-38"><a href="#cb111-38" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-39"><a href="#cb111-39" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> (<span class="fu">is.null</span>(cv_model)) {</span>
|
||
<span id="cb111-40"><a href="#cb111-40" aria-hidden="true" tabindex="-1"></a> <span class="fu">return</span>(<span class="fu">list</span>(<span class="at">Score =</span> <span class="sc">-</span><span class="dv">9999</span>, <span class="at">Pred =</span> <span class="dv">0</span>))</span>
|
||
<span id="cb111-41"><a href="#cb111-41" aria-hidden="true" tabindex="-1"></a> }</span>
|
||
<span id="cb111-42"><a href="#cb111-42" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-43"><a href="#cb111-43" aria-hidden="true" tabindex="-1"></a> best_iter <span class="ot"><-</span> cv_model<span class="sc">$</span>best_iteration</span>
|
||
<span id="cb111-44"><a href="#cb111-44" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> (<span class="fu">is.null</span>(best_iter) <span class="sc">||</span> <span class="fu">length</span>(best_iter) <span class="sc">==</span> <span class="dv">0</span>) {</span>
|
||
<span id="cb111-45"><a href="#cb111-45" aria-hidden="true" tabindex="-1"></a> best_iter <span class="ot"><-</span> <span class="fu">which.min</span>(cv_model<span class="sc">$</span>evaluation_log<span class="sc">$</span>test_rmse_mean)</span>
|
||
<span id="cb111-46"><a href="#cb111-46" aria-hidden="true" tabindex="-1"></a> }</span>
|
||
<span id="cb111-47"><a href="#cb111-47" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-48"><a href="#cb111-48" aria-hidden="true" tabindex="-1"></a> best_rmse <span class="ot"><-</span> cv_model<span class="sc">$</span>evaluation_log<span class="sc">$</span>test_rmse_mean[best_iter]</span>
|
||
<span id="cb111-49"><a href="#cb111-49" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-50"><a href="#cb111-50" aria-hidden="true" tabindex="-1"></a> <span class="fu">list</span>(<span class="at">Score =</span> <span class="sc">-</span>best_rmse, <span class="at">Pred =</span> <span class="dv">0</span>)</span>
|
||
<span id="cb111-51"><a href="#cb111-51" aria-hidden="true" tabindex="-1"></a>}</span>
|
||
<span id="cb111-52"><a href="#cb111-52" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-53"><a href="#cb111-53" aria-hidden="true" tabindex="-1"></a>bounds <span class="ot"><-</span> <span class="fu">list</span>(</span>
|
||
<span id="cb111-54"><a href="#cb111-54" aria-hidden="true" tabindex="-1"></a> <span class="at">eta =</span> <span class="fu">c</span>(<span class="fl">0.05</span>, <span class="fl">0.3</span>),</span>
|
||
<span id="cb111-55"><a href="#cb111-55" aria-hidden="true" tabindex="-1"></a> <span class="at">max_depth =</span> <span class="fu">c</span>(<span class="dv">8</span>L, <span class="dv">15</span>L),</span>
|
||
<span id="cb111-56"><a href="#cb111-56" aria-hidden="true" tabindex="-1"></a> <span class="at">subsample =</span> <span class="fu">c</span>(<span class="fl">0.6</span>, <span class="fl">1.0</span>),</span>
|
||
<span id="cb111-57"><a href="#cb111-57" aria-hidden="true" tabindex="-1"></a> <span class="at">colsample_bytree =</span> <span class="fu">c</span>(<span class="fl">0.6</span>, <span class="fl">1.0</span>)</span>
|
||
<span id="cb111-58"><a href="#cb111-58" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb111-59"><a href="#cb111-59" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-60"><a href="#cb111-60" aria-hidden="true" tabindex="-1"></a>opt_obj <span class="ot"><-</span> <span class="fu">BayesianOptimization</span>(</span>
|
||
<span id="cb111-61"><a href="#cb111-61" aria-hidden="true" tabindex="-1"></a> <span class="at">FUN =</span> scoring_function,</span>
|
||
<span id="cb111-62"><a href="#cb111-62" aria-hidden="true" tabindex="-1"></a> <span class="at">bounds =</span> bounds,</span>
|
||
<span id="cb111-63"><a href="#cb111-63" aria-hidden="true" tabindex="-1"></a> <span class="at">init_points =</span> <span class="dv">3</span>,</span>
|
||
<span id="cb111-64"><a href="#cb111-64" aria-hidden="true" tabindex="-1"></a> <span class="at">n_iter =</span> <span class="dv">5</span>,</span>
|
||
<span id="cb111-65"><a href="#cb111-65" aria-hidden="true" tabindex="-1"></a> <span class="at">acq =</span> <span class="st">"ucb"</span>,</span>
|
||
<span id="cb111-66"><a href="#cb111-66" aria-hidden="true" tabindex="-1"></a> <span class="at">kappa =</span> <span class="fl">2.576</span></span>
|
||
<span id="cb111-67"><a href="#cb111-67" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb111-68"><a href="#cb111-68" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-69"><a href="#cb111-69" aria-hidden="true" tabindex="-1"></a>best_params_raw <span class="ot"><-</span> opt_obj<span class="sc">$</span>Best_Par</span>
|
||
<span id="cb111-70"><a href="#cb111-70" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-71"><a href="#cb111-71" aria-hidden="true" tabindex="-1"></a>best_params_xgb <span class="ot"><-</span> <span class="fu">list</span>(</span>
|
||
<span id="cb111-72"><a href="#cb111-72" aria-hidden="true" tabindex="-1"></a> <span class="at">booster =</span> <span class="st">"gbtree"</span>,</span>
|
||
<span id="cb111-73"><a href="#cb111-73" aria-hidden="true" tabindex="-1"></a> <span class="at">objective =</span> <span class="st">"reg:squarederror"</span>,</span>
|
||
<span id="cb111-74"><a href="#cb111-74" aria-hidden="true" tabindex="-1"></a> <span class="at">eta =</span> best_params_raw[<span class="st">"eta"</span>],</span>
|
||
<span id="cb111-75"><a href="#cb111-75" aria-hidden="true" tabindex="-1"></a> <span class="at">max_depth =</span> best_params_raw[<span class="st">"max_depth"</span>],</span>
|
||
<span id="cb111-76"><a href="#cb111-76" aria-hidden="true" tabindex="-1"></a> <span class="at">subsample =</span> best_params_raw[<span class="st">"subsample"</span>],</span>
|
||
<span id="cb111-77"><a href="#cb111-77" aria-hidden="true" tabindex="-1"></a> <span class="at">colsample_bytree =</span> best_params_raw[<span class="st">"colsample_bytree"</span>],</span>
|
||
<span id="cb111-78"><a href="#cb111-78" aria-hidden="true" tabindex="-1"></a> <span class="at">tree_method =</span> <span class="st">"hist"</span>,</span>
|
||
<span id="cb111-79"><a href="#cb111-79" aria-hidden="true" tabindex="-1"></a> <span class="at">nthread =</span> parallel<span class="sc">::</span><span class="fu">detectCores</span>() <span class="sc">-</span> <span class="dv">1</span></span>
|
||
<span id="cb111-80"><a href="#cb111-80" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb111-81"><a href="#cb111-81" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-82"><a href="#cb111-82" aria-hidden="true" tabindex="-1"></a>final_model_xgb <span class="ot"><-</span> <span class="fu">xgb.train</span>(</span>
|
||
<span id="cb111-83"><a href="#cb111-83" aria-hidden="true" tabindex="-1"></a> <span class="at">params =</span> best_params_xgb,</span>
|
||
<span id="cb111-84"><a href="#cb111-84" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> dtrain,</span>
|
||
<span id="cb111-85"><a href="#cb111-85" aria-hidden="true" tabindex="-1"></a> <span class="at">nrounds =</span> <span class="dv">1000</span>,</span>
|
||
<span id="cb111-86"><a href="#cb111-86" aria-hidden="true" tabindex="-1"></a> <span class="at">evals =</span> <span class="fu">list</span>(<span class="at">val =</span> dval, <span class="at">train =</span> dtrain),</span>
|
||
<span id="cb111-87"><a href="#cb111-87" aria-hidden="true" tabindex="-1"></a> <span class="at">early_stopping_rounds =</span> <span class="dv">50</span>,</span>
|
||
<span id="cb111-88"><a href="#cb111-88" aria-hidden="true" tabindex="-1"></a> <span class="at">verbose =</span> <span class="dv">1</span></span>
|
||
<span id="cb111-89"><a href="#cb111-89" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb111-90"><a href="#cb111-90" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-91"><a href="#cb111-91" aria-hidden="true" tabindex="-1"></a>preds_xgb_log <span class="ot"><-</span> <span class="fu">predict</span>(final_model_xgb, dval)</span>
|
||
<span id="cb111-92"><a href="#cb111-92" aria-hidden="true" tabindex="-1"></a>rmse_xgb_real <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>((<span class="fu">exp</span>(preds_xgb_log) <span class="sc">-</span> <span class="fu">exp</span>(y_val_vec))<span class="sc">^</span><span class="dv">2</span>))</span>
|
||
<span id="cb111-93"><a href="#cb111-93" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-94"><a href="#cb111-94" aria-hidden="true" tabindex="-1"></a>preds_xgb_train_log <span class="ot"><-</span> <span class="fu">predict</span>(final_model_xgb, dtrain)</span>
|
||
<span id="cb111-95"><a href="#cb111-95" aria-hidden="true" tabindex="-1"></a>rmse_xgb_train_real <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>(</span>
|
||
<span id="cb111-96"><a href="#cb111-96" aria-hidden="true" tabindex="-1"></a> (<span class="fu">exp</span>(preds_xgb_train_log) <span class="sc">-</span> <span class="fu">exp</span>(y_train_vec))<span class="sc">^</span><span class="dv">2</span></span>
|
||
<span id="cb111-97"><a href="#cb111-97" aria-hidden="true" tabindex="-1"></a>))</span>
|
||
<span id="cb111-98"><a href="#cb111-98" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb111-99"><a href="#cb111-99" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"XGBoost RMSE on validation set: "</span>, <span class="fu">round</span>(rmse_xgb_real, <span class="dv">4</span>)))</span>
|
||
<span id="cb111-100"><a href="#cb111-100" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"XGBoost RMSE on training set: "</span>, <span class="fu">round</span>(rmse_xgb_train_real, <span class="dv">4</span>)))</span>
|
||
<span id="cb111-101"><a href="#cb111-101" aria-hidden="true" tabindex="-1"></a><span class="fu">paste0</span>(</span>
|
||
<span id="cb111-102"><a href="#cb111-102" aria-hidden="true" tabindex="-1"></a> <span class="st">"Best Hyperparameters: "</span>,</span>
|
||
<span id="cb111-103"><a href="#cb111-103" aria-hidden="true" tabindex="-1"></a> <span class="fu">paste</span>(<span class="fu">names</span>(best_params_raw), best_params_raw, <span class="at">sep =</span> <span class="st">" = "</span>, <span class="at">collapse =</span> <span class="st">", "</span>)</span>
|
||
<span id="cb111-104"><a href="#cb111-104" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>The <strong>XGBoost</strong> model was tuned over <span class="math inline">\(15\)</span> Bayesian iterations. The optimization process revealed a distinct statistical behavior: the algorithm consistently converged toward the extreme upper boundaries of the predefined search space. The optimal configuration selected a learning rate (eta) of <span class="math inline">\(0.2\)</span>, a maximum tree depth (<span class="math inline">\(max\_depth\)</span>) of <span class="math inline">\(10\)</span>, and utilized <span class="math inline">\(100\%\)</span> of the data at each split (<span class="math inline">\(subsample = 1.0\)</span>, <span class="math inline">\(colsample\_bytree = 1.0\)</span>).</p>
|
||
<p>On the original exponential scale, this configuration yielded a validation <span class="math inline">\(RMSE\)</span> of <span class="math inline">\(10.70\)</span>. However, comparing this generalization error to the training <span class="math inline">\(RMSE\)</span> of <span class="math inline">\(0.565\)</span> reveals the extreme nature of the bias-variance trade-off in this specific task. The saturation of complexity limits (a depth of <span class="math inline">\(10\)</span> is exceptionally deep for boosting) and the algorithm’s mathematical refusal to apply stochastic regularization (row or column subsampling) indicate that the underlying implied volatility signal is highly complex and difficult to separate from structural noise. Consequently, the model operates in a high-variance regime, effectively memorizing the training set (yielding a near-zero training error) but failing to generalize beyond the <span class="math inline">\(~10.70\)</span> validation barrier.</p>
|
||
</section>
|
||
<section id="lightgbm-leaf-wise-validation" class="level4">
|
||
<h4 class="anchored" data-anchor-id="lightgbm-leaf-wise-validation">LightGBM: Leaf-Wise Validation</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb112"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb112-1"><a href="#cb112-1" aria-hidden="true" tabindex="-1"></a>x_train_mat <span class="ot"><-</span> <span class="fu">as.matrix</span>(train_clean <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref))</span>
|
||
<span id="cb112-2"><a href="#cb112-2" aria-hidden="true" tabindex="-1"></a>y_train_vec <span class="ot"><-</span> train_clean<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb112-3"><a href="#cb112-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-4"><a href="#cb112-4" aria-hidden="true" tabindex="-1"></a>x_val_mat <span class="ot"><-</span> <span class="fu">as.matrix</span>(val_clean <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref))</span>
|
||
<span id="cb112-5"><a href="#cb112-5" aria-hidden="true" tabindex="-1"></a>y_val_vec <span class="ot"><-</span> val_clean<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb112-6"><a href="#cb112-6" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-7"><a href="#cb112-7" aria-hidden="true" tabindex="-1"></a>dtrain <span class="ot"><-</span> <span class="fu">lgb.Dataset</span>(<span class="at">data =</span> x_train_mat, <span class="at">label =</span> y_train_vec)</span>
|
||
<span id="cb112-8"><a href="#cb112-8" aria-hidden="true" tabindex="-1"></a>dval <span class="ot"><-</span> <span class="fu">lgb.Dataset</span>(<span class="at">data =</span> x_val_mat, <span class="at">label =</span> y_val_vec, <span class="at">reference =</span> dtrain)</span>
|
||
<span id="cb112-9"><a href="#cb112-9" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-10"><a href="#cb112-10" aria-hidden="true" tabindex="-1"></a>scoring_function <span class="ot"><-</span> <span class="cf">function</span>(</span>
|
||
<span id="cb112-11"><a href="#cb112-11" aria-hidden="true" tabindex="-1"></a> num_leaves,</span>
|
||
<span id="cb112-12"><a href="#cb112-12" aria-hidden="true" tabindex="-1"></a> learning_rate,</span>
|
||
<span id="cb112-13"><a href="#cb112-13" aria-hidden="true" tabindex="-1"></a> bagging_fraction,</span>
|
||
<span id="cb112-14"><a href="#cb112-14" aria-hidden="true" tabindex="-1"></a> feature_fraction</span>
|
||
<span id="cb112-15"><a href="#cb112-15" aria-hidden="true" tabindex="-1"></a>) {</span>
|
||
<span id="cb112-16"><a href="#cb112-16" aria-hidden="true" tabindex="-1"></a> parsed_num_leaves <span class="ot"><-</span> <span class="fu">as.integer</span>(<span class="fu">round</span>(num_leaves))</span>
|
||
<span id="cb112-17"><a href="#cb112-17" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-18"><a href="#cb112-18" aria-hidden="true" tabindex="-1"></a> params <span class="ot"><-</span> <span class="fu">list</span>(</span>
|
||
<span id="cb112-19"><a href="#cb112-19" aria-hidden="true" tabindex="-1"></a> <span class="at">objective =</span> <span class="st">"regression"</span>,</span>
|
||
<span id="cb112-20"><a href="#cb112-20" aria-hidden="true" tabindex="-1"></a> <span class="at">metric =</span> <span class="st">"rmse"</span>,</span>
|
||
<span id="cb112-21"><a href="#cb112-21" aria-hidden="true" tabindex="-1"></a> <span class="at">num_threads =</span> parallel<span class="sc">::</span><span class="fu">detectCores</span>() <span class="sc">-</span> <span class="dv">1</span>,</span>
|
||
<span id="cb112-22"><a href="#cb112-22" aria-hidden="true" tabindex="-1"></a> <span class="at">learning_rate =</span> learning_rate,</span>
|
||
<span id="cb112-23"><a href="#cb112-23" aria-hidden="true" tabindex="-1"></a> <span class="at">num_leaves =</span> parsed_num_leaves,</span>
|
||
<span id="cb112-24"><a href="#cb112-24" aria-hidden="true" tabindex="-1"></a> <span class="at">bagging_fraction =</span> bagging_fraction,</span>
|
||
<span id="cb112-25"><a href="#cb112-25" aria-hidden="true" tabindex="-1"></a> <span class="at">bagging_freq =</span> <span class="fu">ifelse</span>(bagging_fraction <span class="sc"><</span> <span class="fl">1.0</span>, <span class="dv">1</span>, <span class="dv">0</span>),</span>
|
||
<span id="cb112-26"><a href="#cb112-26" aria-hidden="true" tabindex="-1"></a> <span class="at">feature_fraction =</span> feature_fraction</span>
|
||
<span id="cb112-27"><a href="#cb112-27" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb112-28"><a href="#cb112-28" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-29"><a href="#cb112-29" aria-hidden="true" tabindex="-1"></a> cv_model <span class="ot"><-</span> <span class="fu">tryCatch</span>(</span>
|
||
<span id="cb112-30"><a href="#cb112-30" aria-hidden="true" tabindex="-1"></a> {</span>
|
||
<span id="cb112-31"><a href="#cb112-31" aria-hidden="true" tabindex="-1"></a> <span class="fu">lgb.cv</span>(</span>
|
||
<span id="cb112-32"><a href="#cb112-32" aria-hidden="true" tabindex="-1"></a> <span class="at">params =</span> params,</span>
|
||
<span id="cb112-33"><a href="#cb112-33" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> dtrain,</span>
|
||
<span id="cb112-34"><a href="#cb112-34" aria-hidden="true" tabindex="-1"></a> <span class="at">nrounds =</span> <span class="dv">150</span>,</span>
|
||
<span id="cb112-35"><a href="#cb112-35" aria-hidden="true" tabindex="-1"></a> <span class="at">nfold =</span> <span class="dv">3</span>,</span>
|
||
<span id="cb112-36"><a href="#cb112-36" aria-hidden="true" tabindex="-1"></a> <span class="at">early_stopping_rounds =</span> <span class="dv">15</span>,</span>
|
||
<span id="cb112-37"><a href="#cb112-37" aria-hidden="true" tabindex="-1"></a> <span class="at">verbose =</span> <span class="sc">-</span><span class="dv">1</span></span>
|
||
<span id="cb112-38"><a href="#cb112-38" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb112-39"><a href="#cb112-39" aria-hidden="true" tabindex="-1"></a> },</span>
|
||
<span id="cb112-40"><a href="#cb112-40" aria-hidden="true" tabindex="-1"></a> <span class="at">error =</span> <span class="cf">function</span>(e) <span class="fu">return</span>(<span class="cn">NULL</span>)</span>
|
||
<span id="cb112-41"><a href="#cb112-41" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb112-42"><a href="#cb112-42" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-43"><a href="#cb112-43" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> (<span class="fu">is.null</span>(cv_model)) {</span>
|
||
<span id="cb112-44"><a href="#cb112-44" aria-hidden="true" tabindex="-1"></a> <span class="fu">return</span>(<span class="fu">list</span>(<span class="at">Score =</span> <span class="sc">-</span><span class="dv">9999</span>, <span class="at">Pred =</span> <span class="dv">0</span>))</span>
|
||
<span id="cb112-45"><a href="#cb112-45" aria-hidden="true" tabindex="-1"></a> }</span>
|
||
<span id="cb112-46"><a href="#cb112-46" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-47"><a href="#cb112-47" aria-hidden="true" tabindex="-1"></a> best_iter <span class="ot"><-</span> cv_model<span class="sc">$</span>best_iter</span>
|
||
<span id="cb112-48"><a href="#cb112-48" aria-hidden="true" tabindex="-1"></a> best_rmse <span class="ot"><-</span> cv_model<span class="sc">$</span>best_score</span>
|
||
<span id="cb112-49"><a href="#cb112-49" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-50"><a href="#cb112-50" aria-hidden="true" tabindex="-1"></a> <span class="fu">list</span>(<span class="at">Score =</span> <span class="sc">-</span>best_rmse, <span class="at">Pred =</span> best_iter)</span>
|
||
<span id="cb112-51"><a href="#cb112-51" aria-hidden="true" tabindex="-1"></a>}</span>
|
||
<span id="cb112-52"><a href="#cb112-52" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-53"><a href="#cb112-53" aria-hidden="true" tabindex="-1"></a>bounds <span class="ot"><-</span> <span class="fu">list</span>(</span>
|
||
<span id="cb112-54"><a href="#cb112-54" aria-hidden="true" tabindex="-1"></a> <span class="at">num_leaves =</span> <span class="fu">c</span>(<span class="dv">20</span>L, <span class="dv">150</span>L),</span>
|
||
<span id="cb112-55"><a href="#cb112-55" aria-hidden="true" tabindex="-1"></a> <span class="at">learning_rate =</span> <span class="fu">c</span>(<span class="fl">0.01</span>, <span class="fl">0.2</span>),</span>
|
||
<span id="cb112-56"><a href="#cb112-56" aria-hidden="true" tabindex="-1"></a> <span class="at">bagging_fraction =</span> <span class="fu">c</span>(<span class="fl">0.6</span>, <span class="fl">1.0</span>),</span>
|
||
<span id="cb112-57"><a href="#cb112-57" aria-hidden="true" tabindex="-1"></a> <span class="at">feature_fraction =</span> <span class="fu">c</span>(<span class="fl">0.6</span>, <span class="fl">1.0</span>)</span>
|
||
<span id="cb112-58"><a href="#cb112-58" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb112-59"><a href="#cb112-59" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb112-60"><a href="#cb112-60" aria-hidden="true" tabindex="-1"></a>opt_obj <span class="ot"><-</span> <span class="fu">BayesianOptimization</span>(</span>
|
||
<span id="cb112-61"><a href="#cb112-61" aria-hidden="true" tabindex="-1"></a> <span class="at">FUN =</span> scoring_function,</span>
|
||
<span id="cb112-62"><a href="#cb112-62" aria-hidden="true" tabindex="-1"></a> <span class="at">bounds =</span> bounds,</span>
|
||
<span id="cb112-63"><a href="#cb112-63" aria-hidden="true" tabindex="-1"></a> <span class="at">init_points =</span> <span class="dv">3</span>,</span>
|
||
<span id="cb112-64"><a href="#cb112-64" aria-hidden="true" tabindex="-1"></a> <span class="at">n_iter =</span> <span class="dv">5</span>,</span>
|
||
<span id="cb112-65"><a href="#cb112-65" aria-hidden="true" tabindex="-1"></a> <span class="at">acq =</span> <span class="st">"ucb"</span>,</span>
|
||
<span id="cb112-66"><a href="#cb112-66" aria-hidden="true" tabindex="-1"></a> <span class="at">kappa =</span> <span class="fl">2.576</span></span>
|
||
<span id="cb112-67"><a href="#cb112-67" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>elapsed = 29.374 Round = 1 num_leaves = 44.0000 learning_rate = 0.163952 bagging_fraction = 0.6955755 feature_fraction = 0.6948811 Value = -0.2214716
|
||
elapsed = 33.006 Round = 2 num_leaves = 44.0000 learning_rate = 0.09588281 bagging_fraction = 0.7020396 feature_fraction = 0.6533932 Value = -0.226821
|
||
elapsed = 41.814 Round = 3 num_leaves = 83.0000 learning_rate = 0.01450508 bagging_fraction = 0.8053167 feature_fraction = 0.9658835 Value = -0.2577391
|
||
elapsed = 34.042 Round = 4 num_leaves = 124.0000 learning_rate = 0.2000 bagging_fraction = 0.6000 feature_fraction = 1.0000 Value = -0.2073121
|
||
elapsed = 20.033 Round = 5 num_leaves = 20.0000 learning_rate = 0.2000 bagging_fraction = 1.0000 feature_fraction = 0.6073779 Value = -0.2290632
|
||
elapsed = 27.617 Round = 6 num_leaves = 20.0000 learning_rate = 0.0100 bagging_fraction = 0.6000 feature_fraction = 0.9715227 Value = -0.3091457
|
||
elapsed = 33.275 Round = 7 num_leaves = 150.0000 learning_rate = 0.2000 bagging_fraction = 0.99869 feature_fraction = 0.6000 Value = -0.205424
|
||
elapsed = 42.416 Round = 8 num_leaves = 150.0000 learning_rate = 0.0971526 bagging_fraction = 0.8371583 feature_fraction = 0.9994332 Value = -0.2107813
|
||
|
||
Best Parameters Found:
|
||
Round = 7 num_leaves = 150.0000 learning_rate = 0.2000 bagging_fraction = 0.99869 feature_fraction = 0.6000 Value = -0.205424 </code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb114"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb114-1"><a href="#cb114-1" aria-hidden="true" tabindex="-1"></a>best_history_index <span class="ot"><-</span> <span class="fu">which.max</span>(opt_obj<span class="sc">$</span>History<span class="sc">$</span>Value)</span>
|
||
<span id="cb114-2"><a href="#cb114-2" aria-hidden="true" tabindex="-1"></a>best_iteration_val <span class="ot"><-</span> opt_obj<span class="sc">$</span>Pred[[best_history_index]]</span>
|
||
<span id="cb114-3"><a href="#cb114-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb114-4"><a href="#cb114-4" aria-hidden="true" tabindex="-1"></a>best_params_row <span class="ot"><-</span> <span class="fu">data.frame</span>(</span>
|
||
<span id="cb114-5"><a href="#cb114-5" aria-hidden="true" tabindex="-1"></a> <span class="at">num_leaves =</span> <span class="fu">as.integer</span>(<span class="fu">round</span>(opt_obj<span class="sc">$</span>Best_Par[<span class="st">"num_leaves"</span>])),</span>
|
||
<span id="cb114-6"><a href="#cb114-6" aria-hidden="true" tabindex="-1"></a> <span class="at">learning_rate =</span> opt_obj<span class="sc">$</span>Best_Par[<span class="st">"learning_rate"</span>],</span>
|
||
<span id="cb114-7"><a href="#cb114-7" aria-hidden="true" tabindex="-1"></a> <span class="at">bagging_fraction =</span> opt_obj<span class="sc">$</span>Best_Par[<span class="st">"bagging_fraction"</span>],</span>
|
||
<span id="cb114-8"><a href="#cb114-8" aria-hidden="true" tabindex="-1"></a> <span class="at">feature_fraction =</span> opt_obj<span class="sc">$</span>Best_Par[<span class="st">"feature_fraction"</span>],</span>
|
||
<span id="cb114-9"><a href="#cb114-9" aria-hidden="true" tabindex="-1"></a> <span class="at">best_iter =</span> best_iteration_val,</span>
|
||
<span id="cb114-10"><a href="#cb114-10" aria-hidden="true" tabindex="-1"></a> <span class="at">cv_rmse =</span> <span class="sc">-</span>opt_obj<span class="sc">$</span>Best_Value</span>
|
||
<span id="cb114-11"><a href="#cb114-11" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb114-12"><a href="#cb114-12" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb114-13"><a href="#cb114-13" aria-hidden="true" tabindex="-1"></a>best_params_lgb <span class="ot"><-</span> <span class="fu">list</span>(</span>
|
||
<span id="cb114-14"><a href="#cb114-14" aria-hidden="true" tabindex="-1"></a> <span class="at">objective =</span> <span class="st">"regression"</span>,</span>
|
||
<span id="cb114-15"><a href="#cb114-15" aria-hidden="true" tabindex="-1"></a> <span class="at">metric =</span> <span class="st">"rmse"</span>,</span>
|
||
<span id="cb114-16"><a href="#cb114-16" aria-hidden="true" tabindex="-1"></a> <span class="at">num_threads =</span> parallel<span class="sc">::</span><span class="fu">detectCores</span>() <span class="sc">-</span> <span class="dv">1</span>,</span>
|
||
<span id="cb114-17"><a href="#cb114-17" aria-hidden="true" tabindex="-1"></a> <span class="at">learning_rate =</span> best_params_row<span class="sc">$</span>learning_rate,</span>
|
||
<span id="cb114-18"><a href="#cb114-18" aria-hidden="true" tabindex="-1"></a> <span class="at">num_leaves =</span> best_params_row<span class="sc">$</span>num_leaves,</span>
|
||
<span id="cb114-19"><a href="#cb114-19" aria-hidden="true" tabindex="-1"></a> <span class="at">bagging_fraction =</span> best_params_row<span class="sc">$</span>bagging_fraction,</span>
|
||
<span id="cb114-20"><a href="#cb114-20" aria-hidden="true" tabindex="-1"></a> <span class="at">bagging_freq =</span> <span class="fu">ifelse</span>(best_params_row<span class="sc">$</span>bagging_fraction <span class="sc"><</span> <span class="fl">1.0</span>, <span class="dv">1</span>, <span class="dv">0</span>),</span>
|
||
<span id="cb114-21"><a href="#cb114-21" aria-hidden="true" tabindex="-1"></a> <span class="at">feature_fraction =</span> best_params_row<span class="sc">$</span>feature_fraction</span>
|
||
<span id="cb114-22"><a href="#cb114-22" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb114-23"><a href="#cb114-23" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb114-24"><a href="#cb114-24" aria-hidden="true" tabindex="-1"></a>final_model_lgb <span class="ot"><-</span> <span class="fu">lgb.train</span>(</span>
|
||
<span id="cb114-25"><a href="#cb114-25" aria-hidden="true" tabindex="-1"></a> <span class="at">params =</span> best_params_lgb,</span>
|
||
<span id="cb114-26"><a href="#cb114-26" aria-hidden="true" tabindex="-1"></a> <span class="at">data =</span> dtrain,</span>
|
||
<span id="cb114-27"><a href="#cb114-27" aria-hidden="true" tabindex="-1"></a> <span class="at">nrounds =</span> best_params_row<span class="sc">$</span>best_iter,</span>
|
||
<span id="cb114-28"><a href="#cb114-28" aria-hidden="true" tabindex="-1"></a> <span class="at">valids =</span> <span class="fu">list</span>(<span class="at">val =</span> dval, <span class="at">train =</span> dtrain),</span>
|
||
<span id="cb114-29"><a href="#cb114-29" aria-hidden="true" tabindex="-1"></a> <span class="at">verbose =</span> <span class="dv">1</span></span>
|
||
<span id="cb114-30"><a href="#cb114-30" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.106895 seconds.
|
||
You can set `force_col_wise=true` to remove the overhead.
|
||
[LightGBM] [Info] Total Bins 3591
|
||
[LightGBM] [Info] Number of data points in the train set: 1533234, number of used features: 15
|
||
[LightGBM] [Info] Start training from score 3.707946
|
||
[1]: train's rmse:0.476294 val's rmse:0.483766
|
||
[2]: train's rmse:0.436687 val's rmse:0.447189
|
||
[3]: train's rmse:0.388227 val's rmse:0.395272
|
||
[4]: train's rmse:0.351259 val's rmse:0.352975
|
||
[5]: train's rmse:0.319342 val's rmse:0.317443
|
||
[6]: train's rmse:0.308286 val's rmse:0.304685
|
||
[7]: train's rmse:0.29213 val's rmse:0.286823
|
||
[8]: train's rmse:0.285282 val's rmse:0.279945
|
||
[9]: train's rmse:0.279973 val's rmse:0.275082
|
||
[10]: train's rmse:0.272954 val's rmse:0.268086
|
||
[11]: train's rmse:0.270808 val's rmse:0.265993
|
||
[12]: train's rmse:0.265075 val's rmse:0.262929
|
||
[13]: train's rmse:0.259271 val's rmse:0.256574
|
||
[14]: train's rmse:0.257372 val's rmse:0.255157
|
||
[15]: train's rmse:0.251781 val's rmse:0.250573
|
||
[16]: train's rmse:0.249575 val's rmse:0.248782
|
||
[17]: train's rmse:0.244283 val's rmse:0.245211
|
||
[18]: train's rmse:0.241612 val's rmse:0.243641
|
||
[19]: train's rmse:0.239576 val's rmse:0.243298
|
||
[20]: train's rmse:0.237666 val's rmse:0.241796
|
||
[21]: train's rmse:0.235026 val's rmse:0.24036
|
||
[22]: train's rmse:0.234248 val's rmse:0.240534
|
||
[23]: train's rmse:0.233063 val's rmse:0.240336
|
||
[24]: train's rmse:0.231473 val's rmse:0.239555
|
||
[25]: train's rmse:0.230572 val's rmse:0.238856
|
||
[26]: train's rmse:0.229453 val's rmse:0.238397
|
||
[27]: train's rmse:0.228545 val's rmse:0.238288
|
||
[28]: train's rmse:0.227854 val's rmse:0.238272
|
||
[29]: train's rmse:0.227018 val's rmse:0.238578
|
||
[30]: train's rmse:0.225762 val's rmse:0.238287
|
||
[31]: train's rmse:0.224685 val's rmse:0.238206
|
||
[32]: train's rmse:0.224169 val's rmse:0.238135
|
||
[33]: train's rmse:0.223539 val's rmse:0.238197
|
||
[34]: train's rmse:0.222695 val's rmse:0.238346
|
||
[35]: train's rmse:0.222149 val's rmse:0.23829
|
||
[36]: train's rmse:0.221545 val's rmse:0.238247
|
||
[37]: train's rmse:0.221142 val's rmse:0.2382
|
||
[38]: train's rmse:0.220744 val's rmse:0.238207
|
||
[39]: train's rmse:0.220274 val's rmse:0.238167
|
||
[40]: train's rmse:0.219637 val's rmse:0.238145
|
||
[41]: train's rmse:0.219175 val's rmse:0.238365
|
||
[42]: train's rmse:0.218764 val's rmse:0.238499
|
||
[43]: train's rmse:0.218348 val's rmse:0.238594
|
||
[44]: train's rmse:0.21797 val's rmse:0.238556
|
||
[45]: train's rmse:0.217575 val's rmse:0.238736
|
||
[46]: train's rmse:0.217116 val's rmse:0.238595
|
||
[47]: train's rmse:0.216742 val's rmse:0.238603
|
||
[48]: train's rmse:0.21645 val's rmse:0.238692
|
||
[49]: train's rmse:0.216056 val's rmse:0.238725
|
||
[50]: train's rmse:0.215753 val's rmse:0.238822
|
||
[51]: train's rmse:0.215403 val's rmse:0.23881
|
||
[52]: train's rmse:0.215122 val's rmse:0.239036
|
||
[53]: train's rmse:0.214732 val's rmse:0.239108
|
||
[54]: train's rmse:0.214453 val's rmse:0.239129
|
||
[55]: train's rmse:0.21411 val's rmse:0.239214
|
||
[56]: train's rmse:0.213758 val's rmse:0.23908
|
||
[57]: train's rmse:0.213482 val's rmse:0.239068
|
||
[58]: train's rmse:0.213173 val's rmse:0.239033
|
||
[59]: train's rmse:0.212949 val's rmse:0.239055
|
||
[60]: train's rmse:0.21267 val's rmse:0.23908
|
||
[61]: train's rmse:0.212322 val's rmse:0.239188
|
||
[62]: train's rmse:0.212096 val's rmse:0.239236
|
||
[63]: train's rmse:0.21181 val's rmse:0.239369
|
||
[64]: train's rmse:0.21152 val's rmse:0.239405
|
||
[65]: train's rmse:0.211224 val's rmse:0.23932
|
||
[66]: train's rmse:0.210855 val's rmse:0.239226
|
||
[67]: train's rmse:0.210609 val's rmse:0.23932
|
||
[68]: train's rmse:0.210338 val's rmse:0.23926
|
||
[69]: train's rmse:0.210085 val's rmse:0.239234
|
||
[70]: train's rmse:0.209859 val's rmse:0.239237
|
||
[71]: train's rmse:0.209599 val's rmse:0.239256
|
||
[72]: train's rmse:0.209382 val's rmse:0.239298
|
||
[73]: train's rmse:0.209212 val's rmse:0.239357
|
||
[74]: train's rmse:0.208976 val's rmse:0.239384
|
||
[75]: train's rmse:0.208743 val's rmse:0.239379
|
||
[76]: train's rmse:0.208577 val's rmse:0.239321
|
||
[77]: train's rmse:0.208352 val's rmse:0.239368
|
||
[78]: train's rmse:0.208076 val's rmse:0.239371
|
||
[79]: train's rmse:0.2079 val's rmse:0.239415
|
||
[80]: train's rmse:0.207709 val's rmse:0.239435
|
||
[81]: train's rmse:0.207526 val's rmse:0.239402
|
||
[82]: train's rmse:0.207347 val's rmse:0.239389
|
||
[83]: train's rmse:0.207148 val's rmse:0.239491
|
||
[84]: train's rmse:0.206966 val's rmse:0.239582
|
||
[85]: train's rmse:0.206789 val's rmse:0.239676
|
||
[86]: train's rmse:0.206619 val's rmse:0.239681
|
||
[87]: train's rmse:0.206399 val's rmse:0.239778
|
||
[88]: train's rmse:0.206187 val's rmse:0.239808
|
||
[89]: train's rmse:0.205945 val's rmse:0.239809
|
||
[90]: train's rmse:0.205735 val's rmse:0.239848
|
||
[91]: train's rmse:0.20552 val's rmse:0.239928
|
||
[92]: train's rmse:0.205367 val's rmse:0.239937
|
||
[93]: train's rmse:0.205194 val's rmse:0.239958
|
||
[94]: train's rmse:0.20499 val's rmse:0.239969
|
||
[95]: train's rmse:0.204791 val's rmse:0.240026
|
||
[96]: train's rmse:0.204587 val's rmse:0.239993
|
||
[97]: train's rmse:0.204465 val's rmse:0.239996
|
||
[98]: train's rmse:0.204295 val's rmse:0.240013
|
||
[99]: train's rmse:0.204127 val's rmse:0.240039
|
||
[100]: train's rmse:0.203978 val's rmse:0.240101
|
||
[101]: train's rmse:0.203786 val's rmse:0.240082
|
||
[102]: train's rmse:0.203605 val's rmse:0.240138
|
||
[103]: train's rmse:0.203428 val's rmse:0.240204
|
||
[104]: train's rmse:0.203215 val's rmse:0.240217
|
||
[105]: train's rmse:0.20303 val's rmse:0.24015
|
||
[106]: train's rmse:0.202878 val's rmse:0.240143
|
||
[107]: train's rmse:0.202731 val's rmse:0.240172
|
||
[108]: train's rmse:0.202499 val's rmse:0.240197
|
||
[109]: train's rmse:0.202347 val's rmse:0.240205
|
||
[110]: train's rmse:0.202171 val's rmse:0.240235
|
||
[111]: train's rmse:0.202037 val's rmse:0.240269
|
||
[112]: train's rmse:0.201903 val's rmse:0.240319
|
||
[113]: train's rmse:0.20175 val's rmse:0.240325
|
||
[114]: train's rmse:0.201646 val's rmse:0.240357
|
||
[115]: train's rmse:0.201513 val's rmse:0.240382
|
||
[116]: train's rmse:0.201371 val's rmse:0.240456
|
||
[117]: train's rmse:0.201244 val's rmse:0.240416
|
||
[118]: train's rmse:0.20112 val's rmse:0.240418
|
||
[119]: train's rmse:0.200979 val's rmse:0.240416
|
||
[120]: train's rmse:0.200865 val's rmse:0.240418
|
||
[121]: train's rmse:0.200649 val's rmse:0.240406
|
||
[122]: train's rmse:0.200496 val's rmse:0.240447
|
||
[123]: train's rmse:0.200378 val's rmse:0.240496
|
||
[124]: train's rmse:0.200269 val's rmse:0.24048
|
||
[125]: train's rmse:0.200131 val's rmse:0.240487
|
||
[126]: train's rmse:0.199971 val's rmse:0.240546
|
||
[127]: train's rmse:0.199823 val's rmse:0.240784
|
||
[128]: train's rmse:0.199713 val's rmse:0.240787
|
||
[129]: train's rmse:0.199603 val's rmse:0.2408
|
||
[130]: train's rmse:0.199479 val's rmse:0.240808
|
||
[131]: train's rmse:0.199342 val's rmse:0.240851
|
||
[132]: train's rmse:0.199202 val's rmse:0.24093
|
||
[133]: train's rmse:0.199118 val's rmse:0.240925
|
||
[134]: train's rmse:0.198958 val's rmse:0.240919
|
||
[135]: train's rmse:0.1988 val's rmse:0.240848
|
||
[136]: train's rmse:0.198652 val's rmse:0.240774
|
||
[137]: train's rmse:0.198479 val's rmse:0.240771
|
||
[138]: train's rmse:0.198354 val's rmse:0.24079
|
||
[139]: train's rmse:0.198242 val's rmse:0.240994
|
||
[140]: train's rmse:0.198108 val's rmse:0.24101
|
||
[141]: train's rmse:0.197997 val's rmse:0.241056
|
||
[142]: train's rmse:0.197899 val's rmse:0.241052
|
||
[143]: train's rmse:0.197724 val's rmse:0.241058
|
||
[144]: train's rmse:0.197599 val's rmse:0.241106
|
||
[145]: train's rmse:0.197472 val's rmse:0.241114
|
||
[146]: train's rmse:0.197386 val's rmse:0.241105
|
||
[147]: train's rmse:0.197226 val's rmse:0.241121
|
||
[148]: train's rmse:0.197131 val's rmse:0.241106
|
||
[149]: train's rmse:0.197042 val's rmse:0.241142
|
||
[150]: train's rmse:0.196921 val's rmse:0.241163 </code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb116"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb116-1"><a href="#cb116-1" aria-hidden="true" tabindex="-1"></a>preds_lgb_log <span class="ot"><-</span> <span class="fu">predict</span>(final_model_lgb, x_val_mat)</span>
|
||
<span id="cb116-2"><a href="#cb116-2" aria-hidden="true" tabindex="-1"></a>rmse_lgb_real <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>((<span class="fu">exp</span>(preds_lgb_log) <span class="sc">-</span> <span class="fu">exp</span>(y_val_vec))<span class="sc">^</span><span class="dv">2</span>))</span>
|
||
<span id="cb116-3"><a href="#cb116-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb116-4"><a href="#cb116-4" aria-hidden="true" tabindex="-1"></a>preds_lgb_train_log <span class="ot"><-</span> <span class="fu">predict</span>(final_model_lgb, x_train_mat)</span>
|
||
<span id="cb116-5"><a href="#cb116-5" aria-hidden="true" tabindex="-1"></a>rmse_lgb_train_real <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>(</span>
|
||
<span id="cb116-6"><a href="#cb116-6" aria-hidden="true" tabindex="-1"></a> (<span class="fu">exp</span>(preds_lgb_train_log) <span class="sc">-</span> <span class="fu">exp</span>(y_train_vec))<span class="sc">^</span><span class="dv">2</span></span>
|
||
<span id="cb116-7"><a href="#cb116-7" aria-hidden="true" tabindex="-1"></a>))</span>
|
||
<span id="cb116-8"><a href="#cb116-8" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb116-9"><a href="#cb116-9" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"LightGBM RMSE on validation set: "</span>, <span class="fu">round</span>(rmse_lgb_real, <span class="dv">4</span>)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "LightGBM RMSE on validation set: 10.7502"</code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb118"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb118-1"><a href="#cb118-1" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"LightGBM RMSE on training set: "</span>, <span class="fu">round</span>(rmse_lgb_train_real, <span class="dv">4</span>)))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "LightGBM RMSE on training set: 11.8411"</code></pre>
|
||
</div>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb120"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb120-1"><a href="#cb120-1" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(</span>
|
||
<span id="cb120-2"><a href="#cb120-2" aria-hidden="true" tabindex="-1"></a> <span class="st">"Best Hyperparameters: num_leaves = "</span>,</span>
|
||
<span id="cb120-3"><a href="#cb120-3" aria-hidden="true" tabindex="-1"></a> best_params_row<span class="sc">$</span>num_leaves,</span>
|
||
<span id="cb120-4"><a href="#cb120-4" aria-hidden="true" tabindex="-1"></a> <span class="st">", learning_rate = "</span>,</span>
|
||
<span id="cb120-5"><a href="#cb120-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">round</span>(best_params_row<span class="sc">$</span>learning_rate, <span class="dv">4</span>),</span>
|
||
<span id="cb120-6"><a href="#cb120-6" aria-hidden="true" tabindex="-1"></a> <span class="st">", bagging_fraction = "</span>,</span>
|
||
<span id="cb120-7"><a href="#cb120-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">round</span>(best_params_row<span class="sc">$</span>bagging_fraction, <span class="dv">4</span>),</span>
|
||
<span id="cb120-8"><a href="#cb120-8" aria-hidden="true" tabindex="-1"></a> <span class="st">", feature_fraction = "</span>,</span>
|
||
<span id="cb120-9"><a href="#cb120-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">round</span>(best_params_row<span class="sc">$</span>feature_fraction, <span class="dv">4</span>)</span>
|
||
<span id="cb120-10"><a href="#cb120-10" aria-hidden="true" tabindex="-1"></a>))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output cell-output-stdout">
|
||
<pre><code>[1] "Best Hyperparameters: num_leaves = 150, learning_rate = 0.2, bagging_fraction = 0.9987, feature_fraction = 0.6"</code></pre>
|
||
</div>
|
||
</div>
|
||
<p>To challenge the <strong>XGBoost</strong> baseline and cross-validate the limits of tree ensembles, <strong>LightGBM</strong> was evaluated. Its leaf-wise growth optimizes for maximum loss reduction rather than symmetrical tree balance, making it theoretically prone to overfitting on smaller datasets but highly efficient on dense data.</p>
|
||
<p>The Bayesian optimization converged at the maximum allowed iterations (<span class="math inline">\(best\_iter = 150\)</span>), selecting the maximum permitted complexity of <span class="math inline">\(150\)</span> leaves per tree (<span class="math inline">\(num\_leaves\)</span>), a learning rate of <span class="math inline">\(0.2\)</span>, and a <span class="math inline">\(bagging fraction\)</span> of <span class="math inline">\(1.0\)</span>. Interestingly, contrary to <strong>XGBoost</strong>, the algorithm introduced feature-level regularization by selecting a <span class="math inline">\(feature\_fraction\)</span> of <span class="math inline">\(0.6\)</span>. This indicates that injecting random subspaces at the node level helped mitigate the aggressive asymmetric expansion of the leaf-wise strategy.</p>
|
||
<p>The <strong>LightGBM</strong> model achieved a validation <span class="math inline">\(RMSE\)</span> of <span class="math inline">\(10.61\)</span> and a training <span class="math inline">\(RMSE\)</span> of <span class="math inline">\(10.90\)</span>. This strict proximity between training and validation errors mathematically proves that the model successfully avoided the overfitting trap that captured <strong>XGBoost</strong>. By leveraging feature regularization, <strong>LightGBM</strong> captured a robust, generalized representation of the data without memorizing the stochastic noise inherent to the training set.</p>
|
||
</section>
|
||
</section>
|
||
<section id="algorithmic-scalability-hardware-constraints" class="level3">
|
||
<h3 class="anchored" data-anchor-id="algorithmic-scalability-hardware-constraints">Algorithmic Scalability & Hardware Constraints</h3>
|
||
<p>In a modern Data Science and MLOps paradigm, theoretical predictive capacity must be critically weighed against computational feasibility. While gradient boosting models proved exceptionally efficient due to histogram-based approximations, the evaluation of traditional bagging ensembles and deep learning architectures was ultimately bottlenecked by local hardware constraints and algorithmic overhead.</p>
|
||
<section id="random-forest-and-the-cost-of-bagging" class="level4">
|
||
<h4 class="anchored" data-anchor-id="random-forest-and-the-cost-of-bagging">Random Forest and the Cost of Bagging</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb122"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb122-1"><a href="#cb122-1" aria-hidden="true" tabindex="-1"></a>y_train <span class="ot"><-</span> train_clean<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb122-2"><a href="#cb122-2" aria-hidden="true" tabindex="-1"></a>X_train <span class="ot"><-</span> <span class="fu">as.matrix</span>(train_clean <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref))</span>
|
||
<span id="cb122-3"><a href="#cb122-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-4"><a href="#cb122-4" aria-hidden="true" tabindex="-1"></a>y_val <span class="ot"><-</span> val_clean<span class="sc">$</span>implied_vol_ref</span>
|
||
<span id="cb122-5"><a href="#cb122-5" aria-hidden="true" tabindex="-1"></a>X_val <span class="ot"><-</span> <span class="fu">as.matrix</span>(val_clean <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span>implied_vol_ref))</span>
|
||
<span id="cb122-6"><a href="#cb122-6" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-7"><a href="#cb122-7" aria-hidden="true" tabindex="-1"></a>num_features <span class="ot"><-</span> <span class="fu">ncol</span>(X_train)</span>
|
||
<span id="cb122-8"><a href="#cb122-8" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-9"><a href="#cb122-9" aria-hidden="true" tabindex="-1"></a>fold_ids <span class="ot"><-</span> <span class="fu">sample</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>, <span class="fu">nrow</span>(X_train), <span class="at">replace =</span> <span class="cn">TRUE</span>)</span>
|
||
<span id="cb122-10"><a href="#cb122-10" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-11"><a href="#cb122-11" aria-hidden="true" tabindex="-1"></a>fold_index <span class="ot"><-</span> <span class="fu">lapply</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>, <span class="cf">function</span>(k) {</span>
|
||
<span id="cb122-12"><a href="#cb122-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">list</span>(</span>
|
||
<span id="cb122-13"><a href="#cb122-13" aria-hidden="true" tabindex="-1"></a> <span class="at">train =</span> <span class="fu">which</span>(fold_ids <span class="sc">!=</span> k),</span>
|
||
<span id="cb122-14"><a href="#cb122-14" aria-hidden="true" tabindex="-1"></a> <span class="at">valid =</span> <span class="fu">which</span>(fold_ids <span class="sc">==</span> k)</span>
|
||
<span id="cb122-15"><a href="#cb122-15" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb122-16"><a href="#cb122-16" aria-hidden="true" tabindex="-1"></a>})</span>
|
||
<span id="cb122-17"><a href="#cb122-17" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-18"><a href="#cb122-18" aria-hidden="true" tabindex="-1"></a>scoring_function <span class="ot"><-</span> <span class="cf">function</span>(mtry, min_node_size, sample_fraction) {</span>
|
||
<span id="cb122-19"><a href="#cb122-19" aria-hidden="true" tabindex="-1"></a> mtry <span class="ot"><-</span> <span class="fu">as.integer</span>(<span class="fu">round</span>(mtry))</span>
|
||
<span id="cb122-20"><a href="#cb122-20" aria-hidden="true" tabindex="-1"></a> min_node_size <span class="ot"><-</span> <span class="fu">as.integer</span>(<span class="fu">round</span>(min_node_size))</span>
|
||
<span id="cb122-21"><a href="#cb122-21" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-22"><a href="#cb122-22" aria-hidden="true" tabindex="-1"></a> fold_rmses <span class="ot"><-</span> <span class="fu">numeric</span>(<span class="dv">3</span>)</span>
|
||
<span id="cb122-23"><a href="#cb122-23" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-24"><a href="#cb122-24" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> (k <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>) {</span>
|
||
<span id="cb122-25"><a href="#cb122-25" aria-hidden="true" tabindex="-1"></a> idx_train <span class="ot"><-</span> fold_index[[k]]<span class="sc">$</span>train</span>
|
||
<span id="cb122-26"><a href="#cb122-26" aria-hidden="true" tabindex="-1"></a> idx_valid <span class="ot"><-</span> fold_index[[k]]<span class="sc">$</span>valid</span>
|
||
<span id="cb122-27"><a href="#cb122-27" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-28"><a href="#cb122-28" aria-hidden="true" tabindex="-1"></a> rf_model <span class="ot"><-</span> <span class="fu">tryCatch</span>(</span>
|
||
<span id="cb122-29"><a href="#cb122-29" aria-hidden="true" tabindex="-1"></a> <span class="fu">ranger</span>(</span>
|
||
<span id="cb122-30"><a href="#cb122-30" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> X_train[idx_train, ],</span>
|
||
<span id="cb122-31"><a href="#cb122-31" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> y_train[idx_train],</span>
|
||
<span id="cb122-32"><a href="#cb122-32" aria-hidden="true" tabindex="-1"></a> <span class="at">num.trees =</span> <span class="dv">80</span>,</span>
|
||
<span id="cb122-33"><a href="#cb122-33" aria-hidden="true" tabindex="-1"></a> <span class="at">mtry =</span> mtry,</span>
|
||
<span id="cb122-34"><a href="#cb122-34" aria-hidden="true" tabindex="-1"></a> <span class="at">min.node.size =</span> min_node_size,</span>
|
||
<span id="cb122-35"><a href="#cb122-35" aria-hidden="true" tabindex="-1"></a> <span class="at">sample.fraction =</span> sample_fraction,</span>
|
||
<span id="cb122-36"><a href="#cb122-36" aria-hidden="true" tabindex="-1"></a> <span class="at">max.depth =</span> <span class="dv">15</span>,</span>
|
||
<span id="cb122-37"><a href="#cb122-37" aria-hidden="true" tabindex="-1"></a> <span class="at">num.threads =</span> parallel<span class="sc">::</span><span class="fu">detectCores</span>() <span class="sc">-</span> <span class="dv">1</span></span>
|
||
<span id="cb122-38"><a href="#cb122-38" aria-hidden="true" tabindex="-1"></a> ),</span>
|
||
<span id="cb122-39"><a href="#cb122-39" aria-hidden="true" tabindex="-1"></a> <span class="at">error =</span> <span class="cf">function</span>(e) <span class="fu">return</span>(<span class="cn">NULL</span>)</span>
|
||
<span id="cb122-40"><a href="#cb122-40" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb122-41"><a href="#cb122-41" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-42"><a href="#cb122-42" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> (<span class="fu">is.null</span>(rf_model)) {</span>
|
||
<span id="cb122-43"><a href="#cb122-43" aria-hidden="true" tabindex="-1"></a> <span class="fu">return</span>(<span class="fu">list</span>(<span class="at">Score =</span> <span class="sc">-</span><span class="dv">9999</span>, <span class="at">Pred =</span> <span class="dv">0</span>))</span>
|
||
<span id="cb122-44"><a href="#cb122-44" aria-hidden="true" tabindex="-1"></a> }</span>
|
||
<span id="cb122-45"><a href="#cb122-45" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-46"><a href="#cb122-46" aria-hidden="true" tabindex="-1"></a> preds <span class="ot"><-</span> <span class="fu">predict</span>(rf_model, X_train[idx_valid, ])<span class="sc">$</span>predictions</span>
|
||
<span id="cb122-47"><a href="#cb122-47" aria-hidden="true" tabindex="-1"></a> fold_rmses[k] <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>((preds <span class="sc">-</span> y_train[idx_valid])<span class="sc">^</span><span class="dv">2</span>))</span>
|
||
<span id="cb122-48"><a href="#cb122-48" aria-hidden="true" tabindex="-1"></a> }</span>
|
||
<span id="cb122-49"><a href="#cb122-49" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-50"><a href="#cb122-50" aria-hidden="true" tabindex="-1"></a> <span class="fu">list</span>(<span class="at">Score =</span> <span class="sc">-</span><span class="fu">mean</span>(fold_rmses), <span class="at">Pred =</span> <span class="dv">0</span>)</span>
|
||
<span id="cb122-51"><a href="#cb122-51" aria-hidden="true" tabindex="-1"></a>}</span>
|
||
<span id="cb122-52"><a href="#cb122-52" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-53"><a href="#cb122-53" aria-hidden="true" tabindex="-1"></a>bounds <span class="ot"><-</span> <span class="fu">list</span>(</span>
|
||
<span id="cb122-54"><a href="#cb122-54" aria-hidden="true" tabindex="-1"></a> <span class="at">mtry =</span> <span class="fu">c</span>(<span class="dv">1</span>L, num_features),</span>
|
||
<span id="cb122-55"><a href="#cb122-55" aria-hidden="true" tabindex="-1"></a> <span class="at">min_node_size =</span> <span class="fu">c</span>(<span class="dv">1</span>L, <span class="dv">20</span>L),</span>
|
||
<span id="cb122-56"><a href="#cb122-56" aria-hidden="true" tabindex="-1"></a> <span class="at">sample_fraction =</span> <span class="fu">c</span>(<span class="fl">0.6</span>, <span class="fl">1.0</span>)</span>
|
||
<span id="cb122-57"><a href="#cb122-57" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb122-58"><a href="#cb122-58" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-59"><a href="#cb122-59" aria-hidden="true" tabindex="-1"></a>opt_obj <span class="ot"><-</span> <span class="fu">BayesianOptimization</span>(</span>
|
||
<span id="cb122-60"><a href="#cb122-60" aria-hidden="true" tabindex="-1"></a> <span class="at">FUN =</span> scoring_function,</span>
|
||
<span id="cb122-61"><a href="#cb122-61" aria-hidden="true" tabindex="-1"></a> <span class="at">bounds =</span> bounds,</span>
|
||
<span id="cb122-62"><a href="#cb122-62" aria-hidden="true" tabindex="-1"></a> <span class="at">init_points =</span> <span class="dv">3</span>,</span>
|
||
<span id="cb122-63"><a href="#cb122-63" aria-hidden="true" tabindex="-1"></a> <span class="at">n_iter =</span> <span class="dv">5</span>,</span>
|
||
<span id="cb122-64"><a href="#cb122-64" aria-hidden="true" tabindex="-1"></a> <span class="at">acq =</span> <span class="st">"ucb"</span>,</span>
|
||
<span id="cb122-65"><a href="#cb122-65" aria-hidden="true" tabindex="-1"></a> <span class="at">kappa =</span> <span class="fl">2.576</span>,</span>
|
||
<span id="cb122-66"><a href="#cb122-66" aria-hidden="true" tabindex="-1"></a> <span class="at">verbose =</span> <span class="cn">TRUE</span></span>
|
||
<span id="cb122-67"><a href="#cb122-67" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb122-68"><a href="#cb122-68" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-69"><a href="#cb122-69" aria-hidden="true" tabindex="-1"></a>best_params_row <span class="ot"><-</span> <span class="fu">data.frame</span>(</span>
|
||
<span id="cb122-70"><a href="#cb122-70" aria-hidden="true" tabindex="-1"></a> <span class="at">mtry =</span> <span class="fu">as.integer</span>(<span class="fu">round</span>(opt_obj<span class="sc">$</span>Best_Par[<span class="st">"mtry"</span>])),</span>
|
||
<span id="cb122-71"><a href="#cb122-71" aria-hidden="true" tabindex="-1"></a> <span class="at">min.node.size =</span> <span class="fu">as.integer</span>(<span class="fu">round</span>(opt_obj<span class="sc">$</span>Best_Par[<span class="st">"min_node_size"</span>])),</span>
|
||
<span id="cb122-72"><a href="#cb122-72" aria-hidden="true" tabindex="-1"></a> <span class="at">sample.fraction =</span> opt_obj<span class="sc">$</span>Best_Par[<span class="st">"sample_fraction"</span>],</span>
|
||
<span id="cb122-73"><a href="#cb122-73" aria-hidden="true" tabindex="-1"></a> <span class="at">cv_rmse =</span> <span class="sc">-</span>opt_obj<span class="sc">$</span>Best_Value</span>
|
||
<span id="cb122-74"><a href="#cb122-74" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb122-75"><a href="#cb122-75" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-76"><a href="#cb122-76" aria-hidden="true" tabindex="-1"></a>final_model_rf <span class="ot"><-</span> <span class="fu">ranger</span>(</span>
|
||
<span id="cb122-77"><a href="#cb122-77" aria-hidden="true" tabindex="-1"></a> <span class="at">x =</span> X_train,</span>
|
||
<span id="cb122-78"><a href="#cb122-78" aria-hidden="true" tabindex="-1"></a> <span class="at">y =</span> y_train,</span>
|
||
<span id="cb122-79"><a href="#cb122-79" aria-hidden="true" tabindex="-1"></a> <span class="at">num.trees =</span> <span class="dv">500</span>,</span>
|
||
<span id="cb122-80"><a href="#cb122-80" aria-hidden="true" tabindex="-1"></a> <span class="at">mtry =</span> best_params_row<span class="sc">$</span>mtry,</span>
|
||
<span id="cb122-81"><a href="#cb122-81" aria-hidden="true" tabindex="-1"></a> <span class="at">min.node.size =</span> best_params_row<span class="sc">$</span>min.node.size,</span>
|
||
<span id="cb122-82"><a href="#cb122-82" aria-hidden="true" tabindex="-1"></a> <span class="at">sample.fraction =</span> best_params_row<span class="sc">$</span>sample.fraction,</span>
|
||
<span id="cb122-83"><a href="#cb122-83" aria-hidden="true" tabindex="-1"></a> <span class="at">num.threads =</span> parallel<span class="sc">::</span><span class="fu">detectCores</span>() <span class="sc">-</span> <span class="dv">1</span>,</span>
|
||
<span id="cb122-84"><a href="#cb122-84" aria-hidden="true" tabindex="-1"></a> <span class="at">importance =</span> <span class="st">"permutation"</span></span>
|
||
<span id="cb122-85"><a href="#cb122-85" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb122-86"><a href="#cb122-86" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-87"><a href="#cb122-87" aria-hidden="true" tabindex="-1"></a>preds_rf_log <span class="ot"><-</span> <span class="fu">predict</span>(final_model_rf, X_val)<span class="sc">$</span>predictions</span>
|
||
<span id="cb122-88"><a href="#cb122-88" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-89"><a href="#cb122-89" aria-hidden="true" tabindex="-1"></a>rmse_rf_real <span class="ot"><-</span> <span class="fu">sqrt</span>(<span class="fu">mean</span>(</span>
|
||
<span id="cb122-90"><a href="#cb122-90" aria-hidden="true" tabindex="-1"></a> (<span class="fu">exp</span>(preds_rf_log) <span class="sc">-</span> <span class="fu">exp</span>(y_val))<span class="sc">^</span><span class="dv">2</span></span>
|
||
<span id="cb122-91"><a href="#cb122-91" aria-hidden="true" tabindex="-1"></a>))</span>
|
||
<span id="cb122-92"><a href="#cb122-92" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb122-93"><a href="#cb122-93" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"Random Forest RMSE on validation set: "</span>, <span class="fu">round</span>(rmse_rf_real, <span class="dv">4</span>)))</span>
|
||
<span id="cb122-94"><a href="#cb122-94" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(</span>
|
||
<span id="cb122-95"><a href="#cb122-95" aria-hidden="true" tabindex="-1"></a> <span class="st">"Random Forest RMSE on training set: "</span>,</span>
|
||
<span id="cb122-96"><a href="#cb122-96" aria-hidden="true" tabindex="-1"></a> <span class="fu">round</span>(</span>
|
||
<span id="cb122-97"><a href="#cb122-97" aria-hidden="true" tabindex="-1"></a> <span class="fu">sqrt</span>(<span class="fu">mean</span>(</span>
|
||
<span id="cb122-98"><a href="#cb122-98" aria-hidden="true" tabindex="-1"></a> (<span class="fu">exp</span>(<span class="fu">predict</span>(final_model_rf, X_train)<span class="sc">$</span>predictions) <span class="sc">-</span> <span class="fu">exp</span>(y_train))<span class="sc">^</span><span class="dv">2</span></span>
|
||
<span id="cb122-99"><a href="#cb122-99" aria-hidden="true" tabindex="-1"></a> )),</span>
|
||
<span id="cb122-100"><a href="#cb122-100" aria-hidden="true" tabindex="-1"></a> <span class="dv">4</span></span>
|
||
<span id="cb122-101"><a href="#cb122-101" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb122-102"><a href="#cb122-102" aria-hidden="true" tabindex="-1"></a>))</span>
|
||
<span id="cb122-103"><a href="#cb122-103" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(</span>
|
||
<span id="cb122-104"><a href="#cb122-104" aria-hidden="true" tabindex="-1"></a> <span class="st">"Best Hyperparameters: mtry = "</span>,</span>
|
||
<span id="cb122-105"><a href="#cb122-105" aria-hidden="true" tabindex="-1"></a> best_params_row<span class="sc">$</span>mtry,</span>
|
||
<span id="cb122-106"><a href="#cb122-106" aria-hidden="true" tabindex="-1"></a> <span class="st">", min.node.size = "</span>,</span>
|
||
<span id="cb122-107"><a href="#cb122-107" aria-hidden="true" tabindex="-1"></a> best_params_row<span class="sc">$</span>min.node.size,</span>
|
||
<span id="cb122-108"><a href="#cb122-108" aria-hidden="true" tabindex="-1"></a> <span class="st">", sample.fraction = "</span>,</span>
|
||
<span id="cb122-109"><a href="#cb122-109" aria-hidden="true" tabindex="-1"></a> <span class="fu">round</span>(best_params_row<span class="sc">$</span>sample.fraction, <span class="dv">4</span>)</span>
|
||
<span id="cb122-110"><a href="#cb122-110" aria-hidden="true" tabindex="-1"></a>))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>Unlike boosting frameworks that build shallow trees sequentially, a <strong>Random Forest</strong> constructs hundreds of deep, independent trees simultaneously to reduce overall variance. Evaluating the <strong>Random Forest</strong> on our continuous financial dataset led to an explosion in computational complexity. Without strictly enforcing a severe maximum depth limit, the exact greedy split-finding algorithm of the ranger implementation saturated the CPU and local memory, requiring disproportionate execution times (exceeding <span class="math inline">\(30\)</span> minutes per hyperparameter combination). This algorithmic inefficiency rendered rigorous Bayesian hyperparameter tuning computationally intractable locally, justifying the exclusion of <strong>Random Forest</strong> from the final predictive benchmark.</p>
|
||
</section>
|
||
<section id="multi-layer-perceptron-mlp-on-pca-space" class="level4">
|
||
<h4 class="anchored" data-anchor-id="multi-layer-perceptron-mlp-on-pca-space">Multi-Layer Perceptron (MLP) on PCA Space</h4>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb123"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb123-1"><a href="#cb123-1" aria-hidden="true" tabindex="-1"></a>mlp_spec <span class="ot"><-</span> <span class="fu">mlp</span>(</span>
|
||
<span id="cb123-2"><a href="#cb123-2" aria-hidden="true" tabindex="-1"></a> <span class="at">hidden_units =</span> <span class="fu">c</span>(<span class="dv">128</span>, <span class="dv">64</span>),</span>
|
||
<span id="cb123-3"><a href="#cb123-3" aria-hidden="true" tabindex="-1"></a> <span class="at">penalty =</span> <span class="fl">0.001</span>,</span>
|
||
<span id="cb123-4"><a href="#cb123-4" aria-hidden="true" tabindex="-1"></a> <span class="at">epochs =</span> <span class="dv">150</span>,</span>
|
||
<span id="cb123-5"><a href="#cb123-5" aria-hidden="true" tabindex="-1"></a> <span class="at">activation =</span> <span class="st">"tanh"</span>,</span>
|
||
<span id="cb123-6"><a href="#cb123-6" aria-hidden="true" tabindex="-1"></a> <span class="at">learn_rate =</span> <span class="fl">0.01</span></span>
|
||
<span id="cb123-7"><a href="#cb123-7" aria-hidden="true" tabindex="-1"></a>) <span class="sc">|></span></span>
|
||
<span id="cb123-8"><a href="#cb123-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">set_engine</span>(<span class="st">"brulee"</span>) <span class="sc">|></span></span>
|
||
<span id="cb123-9"><a href="#cb123-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">set_mode</span>(<span class="st">"regression"</span>)</span>
|
||
<span id="cb123-10"><a href="#cb123-10" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb123-11"><a href="#cb123-11" aria-hidden="true" tabindex="-1"></a>mlp_rec <span class="ot"><-</span> <span class="fu">recipe</span>(implied_vol_ref <span class="sc">~</span> ., <span class="at">data =</span> train_clean) <span class="sc">|></span></span>
|
||
<span id="cb123-12"><a href="#cb123-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_nzv</span>(<span class="fu">all_predictors</span>()) <span class="sc">|></span></span>
|
||
<span id="cb123-13"><a href="#cb123-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_normalize</span>(<span class="fu">all_numeric_predictors</span>()) <span class="sc">|></span></span>
|
||
<span id="cb123-14"><a href="#cb123-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">step_dummy</span>(<span class="fu">all_nominal_predictors</span>())</span>
|
||
<span id="cb123-15"><a href="#cb123-15" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb123-16"><a href="#cb123-16" aria-hidden="true" tabindex="-1"></a>mlp_wf <span class="ot"><-</span> <span class="fu">workflow</span>() <span class="sc">|></span></span>
|
||
<span id="cb123-17"><a href="#cb123-17" aria-hidden="true" tabindex="-1"></a> <span class="fu">add_recipe</span>(mlp_rec) <span class="sc">|></span></span>
|
||
<span id="cb123-18"><a href="#cb123-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">add_model</span>(mlp_spec)</span>
|
||
<span id="cb123-19"><a href="#cb123-19" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb123-20"><a href="#cb123-20" aria-hidden="true" tabindex="-1"></a>final_fit <span class="ot"><-</span> <span class="fu">fit</span>(mlp_wf, <span class="at">data =</span> train_clean)</span>
|
||
<span id="cb123-21"><a href="#cb123-21" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb123-22"><a href="#cb123-22" aria-hidden="true" tabindex="-1"></a>val_results <span class="ot"><-</span> <span class="fu">predict</span>(final_fit, <span class="at">new_data =</span> val_clean) <span class="sc">|></span></span>
|
||
<span id="cb123-23"><a href="#cb123-23" aria-hidden="true" tabindex="-1"></a> <span class="fu">bind_cols</span>(val_clean <span class="sc">|></span> <span class="fu">select</span>(implied_vol_ref))</span>
|
||
<span id="cb123-24"><a href="#cb123-24" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb123-25"><a href="#cb123-25" aria-hidden="true" tabindex="-1"></a>val_results_real <span class="ot"><-</span> val_results <span class="sc">|></span></span>
|
||
<span id="cb123-26"><a href="#cb123-26" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
|
||
<span id="cb123-27"><a href="#cb123-27" aria-hidden="true" tabindex="-1"></a> <span class="at">truth_real =</span> <span class="fu">exp</span>(implied_vol_ref),</span>
|
||
<span id="cb123-28"><a href="#cb123-28" aria-hidden="true" tabindex="-1"></a> <span class="at">estimate_real =</span> <span class="fu">exp</span>(.pred)</span>
|
||
<span id="cb123-29"><a href="#cb123-29" aria-hidden="true" tabindex="-1"></a> )</span>
|
||
<span id="cb123-30"><a href="#cb123-30" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb123-31"><a href="#cb123-31" aria-hidden="true" tabindex="-1"></a>rmse_real_scale_yardstick <span class="ot"><-</span> <span class="fu">rmse</span>(</span>
|
||
<span id="cb123-32"><a href="#cb123-32" aria-hidden="true" tabindex="-1"></a> val_results_real,</span>
|
||
<span id="cb123-33"><a href="#cb123-33" aria-hidden="true" tabindex="-1"></a> <span class="at">truth =</span> truth_real,</span>
|
||
<span id="cb123-34"><a href="#cb123-34" aria-hidden="true" tabindex="-1"></a> <span class="at">estimate =</span> estimate_real</span>
|
||
<span id="cb123-35"><a href="#cb123-35" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb123-36"><a href="#cb123-36" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb123-37"><a href="#cb123-37" aria-hidden="true" tabindex="-1"></a>rmse_real_scale <span class="ot"><-</span> rmse_real_scale_yardstick<span class="sc">$</span>.estimate</span>
|
||
<span id="cb123-38"><a href="#cb123-38" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb123-39"><a href="#cb123-39" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"MLP RMSE on validation set: "</span>, <span class="fu">round</span>(rmse_real_scale, <span class="dv">4</span>)))</span>
|
||
<span id="cb123-40"><a href="#cb123-40" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(<span class="st">"MLP RMSE on training set: "</span>, <span class="fu">round</span>(rmse_real_scale, <span class="dv">4</span>)))</span>
|
||
<span id="cb123-41"><a href="#cb123-41" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(<span class="fu">paste0</span>(</span>
|
||
<span id="cb123-42"><a href="#cb123-42" aria-hidden="true" tabindex="-1"></a> <span class="st">"MLP Hyperparameters: hidden_units = c(128, 64), penalty = 0.001, epochs = 150, activation = 'tanh', learn_rate = 0.01"</span></span>
|
||
<span id="cb123-43"><a href="#cb123-43" aria-hidden="true" tabindex="-1"></a>))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<p>A similar scalability issue was encountered when deploying a <strong>Multi-Layer Perceptron (MLP)</strong>. As neural networks require strictly standardized and uncorrelated inputs to prevent gradient explosion, the <strong>MLP</strong> was trained on the <span class="math inline">\(train\_pca\_final\)</span> dataset.</p>
|
||
<p>However, running the <code>brulee</code> (Torch-based) engine on a standard CPU architecture proved inefficient for tabular data. The dense nature of the orthogonal PCA components, combined with the lack of GPU acceleration, led to severe optimization instability. The objective function frequently failed to converge, returning infinite deviance values during the Gaussian Process evaluation of the Bayesian optimization loop.</p>
|
||
<p>These practical engineering failures highlight a critical constraint in applied machine learning: for high-dimensional tabular financial data processed on local infrastructure, modern histogram-based tree-boosting frameworks offer a vastly superior performance-to-computation ratio compared to deep neural networks or traditional exact-greedy bagging methods.</p>
|
||
</section>
|
||
</section>
|
||
<section id="selection-of-the-optimal-black-box-model" class="level3">
|
||
<h3 class="anchored" data-anchor-id="selection-of-the-optimal-black-box-model">Selection of the Optimal Black-Box Model</h3>
|
||
<p>Based on the rigorous benchmarking of non-linear architectures, <strong>LightGBM</strong> is undeniably retained as the optimal black-box model for this predictive task.</p>
|
||
<p>While <strong>XGBoost</strong> and <strong>LightGBM</strong> achieved broadly comparable validation scores (<span class="math inline">\(10.70\)</span> vs. <span class="math inline">\(10.61\)</span>, respectively), their underlying learning dynamics were fundamentally opposed. <strong>XGBoost</strong>’s near-zero training <span class="math inline">\(RMSE\)</span> (<span class="math inline">\(0.565\)</span>) exposed a severe overfitting issue, driven by a forced maximum depth of <span class="math inline">\(10\)</span> and a complete lack of stochastic regularization. The model acted as a high-variance memorization engine rather than a generalized predictive tool.</p>
|
||
<p>Conversely, <strong>LightGBM</strong> demonstrated a vastly superior structural adaptation. By leveraging a leaf-wise growth strategy combined with active feature-level regularization (<span class="math inline">\(feature\_fraction = 0.6\)</span>), it maintained a balanced training <span class="math inline">\(RMSE\)</span> of <span class="math inline">\(10.90\)</span> alongside its <span class="math inline">\(10.61\)</span> validation <span class="math inline">\(RMSE\)</span>. This indicates a robust, generalized fit that successfully isolated the predictive signal from the market noise. Furthermore, from an MLOps and scalability perspective, <strong>LightGBM</strong>’s advanced histogram-based algorithm executes significantly faster with a lower memory footprint, solidifying it as the superior engineering choice for deploying high-capacity models on dense financial data.</p>
|
||
</section>
|
||
</section>
|
||
<section id="results-comparison-discussion" class="level2">
|
||
<h2 class="anchored" data-anchor-id="results-comparison-discussion">Results Comparison & Discussion</h2>
|
||
<section id="the-trade-off-accuracy-vs.-interpretability" class="level3">
|
||
<h3 class="anchored" data-anchor-id="the-trade-off-accuracy-vs.-interpretability">The Trade-off: Accuracy vs. Interpretability</h3>
|
||
<p>While the <strong>LightGBM</strong> model provides superior predictive performance, its “black-box” nature requires post-hoc interpretation to ensure the captured signals align with financial theory. We employ a dual approach for interpretability: Global Feature Importance (Gain-based) and Local Explanations using SHAP values.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb124"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb124-1"><a href="#cb124-1" aria-hidden="true" tabindex="-1"></a>shp <span class="ot"><-</span> <span class="fu">shapviz</span>(final_model_lgb, <span class="at">X_pred =</span> x_val_mat)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<section id="global-feature-importance-the-primacy-of-volatility-persistence" class="level4">
|
||
<h4 class="anchored" data-anchor-id="global-feature-importance-the-primacy-of-volatility-persistence">Global Feature Importance: The Primacy of Volatility Persistence</h4>
|
||
<p>The first level of interpretation focuses on the “Gain” metric, which measures the total reduction in the objective function brought by each feature across all trees in the ensemble.</p>
|
||
<p>The analysis of the Gain-based importance reveals three critical insights:</p>
|
||
<ul>
|
||
<li><p><strong>1. The Dominance of Realized Volatility (The “Clustering” Effect)</strong>: The model is heavily dominated by historical realized volatility metrics. <span class="math inline">\(realized\_vol\_mid\)</span> alone accounts for approximately <span class="math inline">\(57\%\)</span> of the total gain, followed by <span class="math inline">\(realized\_vol\_long\)</span> and <span class="math inline">\(realized\_vol\_short\)</span>. From a financial econometrics perspective, this confirms that the model has successfully identified the “volatility clustering” phenomenon, where past variance is the most significant predictor of future implied volatility. The fact that the “mid” horizon carries the most weight suggests that the model prioritizes structural volatility trends over daily noise.</p></li>
|
||
<li><p><strong>2. Market Sentiment and Uncertainty Indicators</strong>: Beyond historical volatility, the model identifies <span class="math inline">\(strike\_dispersion\)</span> and the <span class="math inline">\(market\_vol\_index\)</span> as the next most influential features. In the financial context, this is highly coherent: a high dispersion in strikes signals a lack of consensus among market participants regarding future asset prices, which naturally drives up the implied volatility premium.</p></li>
|
||
<li><p><strong>3. Tail Risk and Liquidity Proxies</strong>: Secondary variables such as <span class="math inline">\(stress\_spread\)</span> and <span class="math inline">\(put\_low\_strike\)</span> contribute at a lower but non-negligible level. These variables act as proxies for downside protection demand (tail risk), allowing the model to fine-tune its predictions during periods of market stress.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="local-interpretability-shap-beeswarm-and-magnitude-analysis" class="level4">
|
||
<h4 class="anchored" data-anchor-id="local-interpretability-shap-beeswarm-and-magnitude-analysis">Local Interpretability: SHAP Beeswarm and Magnitude Analysis</h4>
|
||
<p>To move beyond global rankings, we utilize SHAP values to quantify the direction and magnitude of each feature’s impact on individual predictions.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb125"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb125-1"><a href="#cb125-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sv_importance</span>(shp, <span class="at">kind =</span> <span class="st">"beeswarm"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/shap%20beeswarm-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>The SHAP beeswarm plot reveals the sign of the relationship between predictors and the target:</p>
|
||
<section id="positive-correlation-with-iv" class="level5">
|
||
<h5 class="anchored" data-anchor-id="positive-correlation-with-iv">Positive Correlation with IV:</h5>
|
||
<p>Higher values (orange) of <span class="math inline">\(realized\_vol\_mid\)</span> and <span class="math inline">\(strike\_dispersion\)</span> consistently lead to positive SHAP values, increasing the predicted implied volatility.</p>
|
||
</section>
|
||
<section id="extreme-tails-and-asymmetry" class="level5">
|
||
<h5 class="anchored" data-anchor-id="extreme-tails-and-asymmetry">Extreme Tails and Asymmetry:</h5>
|
||
<p><span class="math inline">\(realized\_vol\_short\)</span> exhibits a very wide horizontal spread. While its average impact is lower than the “mid” version, it is responsible for the most extreme “tail” predictions, with SHAP values reaching as low as <span class="math inline">\(-1.5\)</span>. This indicates that the model uses short-term volatility shocks to capture sudden, sharp shifts in market regimes.</p>
|
||
</section>
|
||
<section id="mean-reversion-signals" class="level5">
|
||
<h5 class="anchored" data-anchor-id="mean-reversion-signals">Mean-Reversion Signals:</h5>
|
||
<p>Variables like <span class="math inline">\(vol\_instability\)</span> show a long left tail (purple), indicating that low instability can occasionally exert a significant downward pressure on the prediction, likely acting as a mean-reversion signal captured by the boosted trees.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb126"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb126-1"><a href="#cb126-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sv_importance</span>(shp, <span class="at">kind =</span> <span class="st">"bar"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/shap%20importance%20bar-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>The bar chart representing <span class="math inline">\(mean(|SHAP\; value|)\)</span> confirms the hierarchy seen in the Gain analysis. The convergence between these two independent mathematical approaches (Gain vs. SHAP) robustly validates the feature selection. <span class="math inline">\(realized\_vol\_mid\)</span> remains the undisputed primary driver with an average impact of approximately <span class="math inline">\(0.18\)</span> on the log-volatility scale.</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="feature-interaction-and-non-linearity-stress-spread-and-volatility-slope" class="level3">
|
||
<h3 class="anchored" data-anchor-id="feature-interaction-and-non-linearity-stress-spread-and-volatility-slope">Feature Interaction and Non-Linearity: Stress Spread and Volatility Slope</h3>
|
||
<p>The power of <strong>LightGBM</strong> lies in its ability to capture non-linearities and cross-feature interactions. We analyze this through a SHAP dependence plot of the <span class="math inline">\(stress\_spread\)</span> variable, colored by the <span class="math inline">\(vol\_slope\)</span>.</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb127"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb127-1"><a href="#cb127-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sv_dependence</span>(shp, <span class="at">v =</span> <span class="st">"stress_spread"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="cell-output-display">
|
||
<div class="quarto-figure quarto-figure-center">
|
||
<figure class="figure">
|
||
<p><img src="Projet_MRC_DANJOU_LEGRAND_MERIC_VONSIEMENS_files/figure-html/stress%20spread%20interaction-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" width="672"></p>
|
||
</figure>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<ul>
|
||
<li><p><strong>1. Non-Linear Regime Switching</strong>: The relationship between <span class="math inline">\(stress\_spread\)</span> and its impact on implied volatility follows a clear non-linear “S-curve”. For low <span class="math inline">\(stress\_spread\)</span> values (below <span class="math inline">\(0\)</span>), the impact is negative. As the spread increases and crosses the <span class="math inline">\(0\)</span> threshold, the SHAP value rises sharply before plateauing around a spread of <span class="math inline">\(2\)</span>. This suggests a “regime switch” where the model identifies a specific threshold beyond which market stress becomes a dominant, non-linear driver of the volatility premium.</p></li>
|
||
<li><p><strong>2. Interaction with Term Structure (<span class="math inline">\(vol\_slope\)</span>)</strong>: The color encoding reveals a subtle interaction effect. At high levels of <span class="math inline">\(stress\_spread\)</span>, a positive <span class="math inline">\(vol\_slope\)</span> (orange dots) tends to amplify the positive impact on the prediction compared to a flat or negative slope (purple dots). This alignment between market stress and a steepening volatility term structure allows the model to capture complex “crisis” signatures that a standard linear model would overlook.</p></li>
|
||
</ul>
|
||
<p>In conclusion, the interpretability analysis confirms that the <strong>LightGBM</strong> model has reconstructed a sophisticated, yet financially sound, representation of volatility dynamics, combining persistence effects with non-linear risk premiums.</p>
|
||
</section>
|
||
<section id="final-inference" class="level3">
|
||
<h3 class="anchored" data-anchor-id="final-inference">Final Inference</h3>
|
||
<p>The ultimate objective of this study is to provide accurate point estimates for the implied volatility of the assets contained in the hidden test set (<span class="math inline">\(test\_eng\)</span>). Having identified <strong>LightGBM</strong> as the optimal non-linear model and the xxx as the most robust linear benchmark, we proceed to the final inference phase.</p>
|
||
<p>As specified in the data pipeline (Section 2.7), the test dataset was transformed using the <code>$bake()$</code> function, ensuring that all scaling, winsorization, and distribution adjustments were strictly aligned with the training set’s statistics. Since the models were trained on log-transformed targets to stabilize variance, the raw predictions are returned in the logarithmic domain. To comply with the submission requirements, an exponential transformation <span class="math inline">\(f(x) = e^x\)</span> is applied to project the results back to the original volatility scale.</p>
|
||
<p>The following implementation handles the final prediction generation and exports the results into the required CSV format:</p>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb128"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb128-1"><a href="#cb128-1" aria-hidden="true" tabindex="-1"></a>final_model_lin <span class="ot"><-</span> mod_lmm_5</span>
|
||
<span id="cb128-2"><a href="#cb128-2" aria-hidden="true" tabindex="-1"></a>preds_linear_log <span class="ot"><-</span> <span class="fu">predict</span>(</span>
|
||
<span id="cb128-3"><a href="#cb128-3" aria-hidden="true" tabindex="-1"></a> final_model_lin,</span>
|
||
<span id="cb128-4"><a href="#cb128-4" aria-hidden="true" tabindex="-1"></a> <span class="at">newdata =</span> test_linear,</span>
|
||
<span id="cb128-5"><a href="#cb128-5" aria-hidden="true" tabindex="-1"></a> <span class="at">allow.new.levels =</span> <span class="cn">TRUE</span></span>
|
||
<span id="cb128-6"><a href="#cb128-6" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb128-7"><a href="#cb128-7" aria-hidden="true" tabindex="-1"></a>preds_linear_real <span class="ot"><-</span> <span class="fu">exp</span>(<span class="fu">as.numeric</span>(preds_linear_log))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb129"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb129-1"><a href="#cb129-1" aria-hidden="true" tabindex="-1"></a>x_test_mat <span class="ot"><-</span> <span class="fu">as.matrix</span>(</span>
|
||
<span id="cb129-2"><a href="#cb129-2" aria-hidden="true" tabindex="-1"></a> test_tree <span class="sc">|></span> <span class="fu">select</span>(<span class="sc">-</span><span class="fu">any_of</span>(<span class="fu">c</span>(<span class="st">"asset_id"</span>, <span class="st">"obs_date"</span>, <span class="st">"implied_vol_ref"</span>)))</span>
|
||
<span id="cb129-3"><a href="#cb129-3" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb129-4"><a href="#cb129-4" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb129-5"><a href="#cb129-5" aria-hidden="true" tabindex="-1"></a>preds_lgb_log <span class="ot"><-</span> <span class="fu">predict</span>(final_model_lgb, x_test_mat)</span>
|
||
<span id="cb129-6"><a href="#cb129-6" aria-hidden="true" tabindex="-1"></a>preds_lgb_real <span class="ot"><-</span> <span class="fu">exp</span>(<span class="fu">as.numeric</span>(preds_lgb_log))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
<div class="cell" data-layout-align="center">
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb130"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb130-1"><a href="#cb130-1" aria-hidden="true" tabindex="-1"></a>submission <span class="ot"><-</span> <span class="fu">tibble</span>(</span>
|
||
<span id="cb130-2"><a href="#cb130-2" aria-hidden="true" tabindex="-1"></a> <span class="at">linear_model =</span> preds_linear_real,</span>
|
||
<span id="cb130-3"><a href="#cb130-3" aria-hidden="true" tabindex="-1"></a> <span class="at">lightgbm =</span> preds_lgb_real</span>
|
||
<span id="cb130-4"><a href="#cb130-4" aria-hidden="true" tabindex="-1"></a>)</span>
|
||
<span id="cb130-5"><a href="#cb130-5" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb130-6"><a href="#cb130-6" aria-hidden="true" tabindex="-1"></a><span class="fu">write_csv</span>(submission, <span class="st">"hat_y.csv"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="conclusion" class="level2">
|
||
<h2 class="anchored" data-anchor-id="conclusion">Conclusion</h2>
|
||
</section>
|
||
</section>
|
||
|
||
</main>
|
||
<!-- /main column -->
|
||
<script id="quarto-html-after-body" type="application/javascript">
|
||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||
const icon = "";
|
||
const anchorJS = new window.AnchorJS();
|
||
anchorJS.options = {
|
||
placement: 'right',
|
||
icon: icon
|
||
};
|
||
anchorJS.add('.anchored');
|
||
const isCodeAnnotation = (el) => {
|
||
for (const clz of el.classList) {
|
||
if (clz.startsWith('code-annotation-')) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
const onCopySuccess = function(e) {
|
||
// button target
|
||
const button = e.trigger;
|
||
// don't keep focus
|
||
button.blur();
|
||
// flash "checked"
|
||
button.classList.add('code-copy-button-checked');
|
||
var currentTitle = button.getAttribute("title");
|
||
button.setAttribute("title", "Copied!");
|
||
let tooltip;
|
||
if (window.bootstrap) {
|
||
button.setAttribute("data-bs-toggle", "tooltip");
|
||
button.setAttribute("data-bs-placement", "left");
|
||
button.setAttribute("data-bs-title", "Copied!");
|
||
tooltip = new bootstrap.Tooltip(button,
|
||
{ trigger: "manual",
|
||
customClass: "code-copy-button-tooltip",
|
||
offset: [0, -8]});
|
||
tooltip.show();
|
||
}
|
||
setTimeout(function() {
|
||
if (tooltip) {
|
||
tooltip.hide();
|
||
button.removeAttribute("data-bs-title");
|
||
button.removeAttribute("data-bs-toggle");
|
||
button.removeAttribute("data-bs-placement");
|
||
}
|
||
button.setAttribute("title", currentTitle);
|
||
button.classList.remove('code-copy-button-checked');
|
||
}, 1000);
|
||
// clear code selection
|
||
e.clearSelection();
|
||
}
|
||
const getTextToCopy = function(trigger) {
|
||
const outerScaffold = trigger.parentElement.cloneNode(true);
|
||
const codeEl = outerScaffold.querySelector('code');
|
||
for (const childEl of codeEl.children) {
|
||
if (isCodeAnnotation(childEl)) {
|
||
childEl.remove();
|
||
}
|
||
}
|
||
return codeEl.innerText;
|
||
}
|
||
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
|
||
text: getTextToCopy
|
||
});
|
||
clipboard.on('success', onCopySuccess);
|
||
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
|
||
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
|
||
text: getTextToCopy,
|
||
container: window.document.getElementById('quarto-embedded-source-code-modal')
|
||
});
|
||
clipboardModal.on('success', onCopySuccess);
|
||
}
|
||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||
var mailtoRegex = new RegExp(/^mailto:/);
|
||
var filterRegex = new RegExp('/' + window.location.host + '/');
|
||
var isInternal = (href) => {
|
||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||
}
|
||
// Inspect non-navigation links and adorn them if external
|
||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
|
||
for (var i=0; i<links.length; i++) {
|
||
const link = links[i];
|
||
if (!isInternal(link.href)) {
|
||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||
// links that we want to consider external
|
||
if (link.dataset.originalHref !== undefined) {
|
||
link.href = link.dataset.originalHref;
|
||
}
|
||
}
|
||
}
|
||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||
const config = {
|
||
allowHTML: true,
|
||
maxWidth: 500,
|
||
delay: 100,
|
||
arrow: false,
|
||
appendTo: function(el) {
|
||
return el.parentElement;
|
||
},
|
||
interactive: true,
|
||
interactiveBorder: 10,
|
||
theme: 'quarto',
|
||
placement: 'bottom-start',
|
||
};
|
||
if (contentFn) {
|
||
config.content = contentFn;
|
||
}
|
||
if (onTriggerFn) {
|
||
config.onTrigger = onTriggerFn;
|
||
}
|
||
if (onUntriggerFn) {
|
||
config.onUntrigger = onUntriggerFn;
|
||
}
|
||
window.tippy(el, config);
|
||
}
|
||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||
for (var i=0; i<noterefs.length; i++) {
|
||
const ref = noterefs[i];
|
||
tippyHover(ref, function() {
|
||
// use id or data attribute instead here
|
||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||
try { href = new URL(href).hash; } catch {}
|
||
const id = href.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note) {
|
||
return note.innerHTML;
|
||
} else {
|
||
return "";
|
||
}
|
||
});
|
||
}
|
||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||
const processXRef = (id, note) => {
|
||
// Strip column container classes
|
||
const stripColumnClz = (el) => {
|
||
el.classList.remove("page-full", "page-columns");
|
||
if (el.children) {
|
||
for (const child of el.children) {
|
||
stripColumnClz(child);
|
||
}
|
||
}
|
||
}
|
||
stripColumnClz(note)
|
||
if (id === null || id.startsWith('sec-')) {
|
||
// Special case sections, only their first couple elements
|
||
const container = document.createElement("div");
|
||
if (note.children && note.children.length > 2) {
|
||
container.appendChild(note.children[0].cloneNode(true));
|
||
for (let i = 1; i < note.children.length; i++) {
|
||
const child = note.children[i];
|
||
if (child.tagName === "P" && child.innerText === "") {
|
||
continue;
|
||
} else {
|
||
container.appendChild(child.cloneNode(true));
|
||
break;
|
||
}
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(container);
|
||
}
|
||
return container.innerHTML
|
||
} else {
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
return note.innerHTML;
|
||
}
|
||
} else {
|
||
// Remove any anchor links if they are present
|
||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||
if (anchorLink) {
|
||
anchorLink.remove();
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
if (note.classList.contains("callout")) {
|
||
return note.outerHTML;
|
||
} else {
|
||
return note.innerHTML;
|
||
}
|
||
}
|
||
}
|
||
for (var i=0; i<xrefs.length; i++) {
|
||
const xref = xrefs[i];
|
||
tippyHover(xref, undefined, function(instance) {
|
||
instance.disable();
|
||
let url = xref.getAttribute('href');
|
||
let hash = undefined;
|
||
if (url.startsWith('#')) {
|
||
hash = url;
|
||
} else {
|
||
try { hash = new URL(url).hash; } catch {}
|
||
}
|
||
if (hash) {
|
||
const id = hash.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note !== null) {
|
||
try {
|
||
const html = processXRef(id, note.cloneNode(true));
|
||
instance.setContent(html);
|
||
} finally {
|
||
instance.enable();
|
||
instance.show();
|
||
}
|
||
} else {
|
||
// See if we can fetch this
|
||
fetch(url.split('#')[0])
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.getElementById(id);
|
||
if (note !== null) {
|
||
const html = processXRef(id, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
} else {
|
||
// See if we can fetch a full url (with no hash to target)
|
||
// This is a special case and we should probably do some content thinning / targeting
|
||
fetch(url)
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.querySelector('main.content');
|
||
if (note !== null) {
|
||
// This should only happen for chapter cross references
|
||
// (since there is no id in the URL)
|
||
// remove the first header
|
||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||
note.children[0].remove();
|
||
}
|
||
const html = processXRef(null, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
}, function(instance) {
|
||
});
|
||
}
|
||
let selectedAnnoteEl;
|
||
const selectorForAnnotation = ( cell, annotation) => {
|
||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||
return selector;
|
||
}
|
||
const selectCodeLines = (annoteEl) => {
|
||
const doc = window.document;
|
||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||
const lineIds = lines.map((line) => {
|
||
return targetCell + "-" + line;
|
||
})
|
||
let top = null;
|
||
let height = null;
|
||
let parent = null;
|
||
if (lineIds.length > 0) {
|
||
//compute the position of the single el (top and bottom and make a div)
|
||
const el = window.document.getElementById(lineIds[0]);
|
||
top = el.offsetTop;
|
||
height = el.offsetHeight;
|
||
parent = el.parentElement.parentElement;
|
||
if (lineIds.length > 1) {
|
||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||
height = bottom - top;
|
||
}
|
||
if (top !== null && height !== null && parent !== null) {
|
||
// cook up a div (if necessary) and position it
|
||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||
if (div === null) {
|
||
div = window.document.createElement("div");
|
||
div.setAttribute("id", "code-annotation-line-highlight");
|
||
div.style.position = 'absolute';
|
||
parent.appendChild(div);
|
||
}
|
||
div.style.top = top - 2 + "px";
|
||
div.style.height = height + 4 + "px";
|
||
div.style.left = 0;
|
||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||
if (gutterDiv === null) {
|
||
gutterDiv = window.document.createElement("div");
|
||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||
gutterDiv.style.position = 'absolute';
|
||
const codeCell = window.document.getElementById(targetCell);
|
||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||
gutter.appendChild(gutterDiv);
|
||
}
|
||
gutterDiv.style.top = top - 2 + "px";
|
||
gutterDiv.style.height = height + 4 + "px";
|
||
}
|
||
selectedAnnoteEl = annoteEl;
|
||
}
|
||
};
|
||
const unselectCodeLines = () => {
|
||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||
elementsIds.forEach((elId) => {
|
||
const div = window.document.getElementById(elId);
|
||
if (div) {
|
||
div.remove();
|
||
}
|
||
});
|
||
selectedAnnoteEl = undefined;
|
||
};
|
||
// Handle positioning of the toggle
|
||
window.addEventListener(
|
||
"resize",
|
||
throttle(() => {
|
||
elRect = undefined;
|
||
if (selectedAnnoteEl) {
|
||
selectCodeLines(selectedAnnoteEl);
|
||
}
|
||
}, 10)
|
||
);
|
||
function throttle(fn, ms) {
|
||
let throttle = false;
|
||
let timer;
|
||
return (...args) => {
|
||
if(!throttle) { // first call gets through
|
||
fn.apply(this, args);
|
||
throttle = true;
|
||
} else { // all the others get throttled
|
||
if(timer) clearTimeout(timer); // cancel #2
|
||
timer = setTimeout(() => {
|
||
fn.apply(this, args);
|
||
timer = throttle = false;
|
||
}, ms);
|
||
}
|
||
};
|
||
}
|
||
// Attach click handler to the DT
|
||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||
for (const annoteDlNode of annoteDls) {
|
||
annoteDlNode.addEventListener('click', (event) => {
|
||
const clickedEl = event.target;
|
||
if (clickedEl !== selectedAnnoteEl) {
|
||
unselectCodeLines();
|
||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||
if (activeEl) {
|
||
activeEl.classList.remove('code-annotation-active');
|
||
}
|
||
selectCodeLines(clickedEl);
|
||
clickedEl.classList.add('code-annotation-active');
|
||
} else {
|
||
// Unselect the line
|
||
unselectCodeLines();
|
||
clickedEl.classList.remove('code-annotation-active');
|
||
}
|
||
});
|
||
}
|
||
const findCites = (el) => {
|
||
const parentEl = el.parentElement;
|
||
if (parentEl) {
|
||
const cites = parentEl.dataset.cites;
|
||
if (cites) {
|
||
return {
|
||
el,
|
||
cites: cites.split(' ')
|
||
};
|
||
} else {
|
||
return findCites(el.parentElement)
|
||
}
|
||
} else {
|
||
return undefined;
|
||
}
|
||
};
|
||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||
for (var i=0; i<bibliorefs.length; i++) {
|
||
const ref = bibliorefs[i];
|
||
const citeInfo = findCites(ref);
|
||
if (citeInfo) {
|
||
tippyHover(citeInfo.el, function() {
|
||
var popup = window.document.createElement('div');
|
||
citeInfo.cites.forEach(function(cite) {
|
||
var citeDiv = window.document.createElement('div');
|
||
citeDiv.classList.add('hanging-indent');
|
||
citeDiv.classList.add('csl-entry');
|
||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||
if (biblioDiv) {
|
||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||
}
|
||
popup.appendChild(citeDiv);
|
||
});
|
||
return popup.innerHTML;
|
||
});
|
||
}
|
||
}
|
||
});
|
||
</script>
|
||
</div> <!-- /content -->
|
||
|
||
|
||
|
||
|
||
</body></html> |