From 55adea1ff431d7616ca9a13511249d7c757d34a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Tue, 8 May 2018 19:41:47 +0200 Subject: [PATCH] Add code to compute a confidence interval --- 02_end_to_end_machine_learning_project.ipynb | 124 ++++++++++++++----- 1 file changed, 96 insertions(+), 28 deletions(-) diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index b82b519..e3fd493 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -1437,6 +1437,74 @@ "final_rmse" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can compute a 95% confidence interval for the test RMSE:" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import stats" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "confidence = 0.95\n", + "squared_errors = (final_predictions - y_test) ** 2\n", + "mean = squared_errors.mean()\n", + "m = len(squared_errors)\n", + "\n", + "np.sqrt(stats.t.interval(confidence, m - 1,\n", + " loc=np.mean(squared_errors),\n", + " scale=stats.sem(squared_errors)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We could compute the interval manually like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "tscore = stats.t.ppf((1 + confidence) / 2, df=m - 1)\n", + "tmargin = tscore * squared_errors.std(ddof=1) / np.sqrt(m)\n", + "np.sqrt(mean - tmargin), np.sqrt(mean + tmargin)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, we could use a z-scores rather than t-scores:" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "zscore = stats.norm.ppf((1 + confidence) / 2)\n", + "zmargin = zscore * squared_errors.std(ddof=1) / np.sqrt(m)\n", + "np.sqrt(mean - zmargin), np.sqrt(mean + zmargin)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1453,7 +1521,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 107, "metadata": {}, "outputs": [], "source": [ @@ -1475,7 +1543,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 108, "metadata": {}, "outputs": [], "source": [ @@ -1484,7 +1552,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 109, "metadata": {}, "outputs": [], "source": [ @@ -1503,7 +1571,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 110, "metadata": {}, "outputs": [], "source": [ @@ -1541,7 +1609,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 111, "metadata": {}, "outputs": [], "source": [ @@ -1567,7 +1635,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 112, "metadata": {}, "outputs": [], "source": [ @@ -1585,7 +1653,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ @@ -1615,14 +1683,14 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import RandomizedSearchCV\n", "from scipy.stats import expon, reciprocal\n", "\n", - "# see https://docs.scipy.org/doc/scipy-0.19.0/reference/stats.html\n", + "# see https://docs.scipy.org/doc/scipy/reference/stats.html\n", "# for `expon()` and `reciprocal()` documentation and more probability distribution functions.\n", "\n", "# Note: gamma is ignored when kernel is \"linear\"\n", @@ -1648,7 +1716,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -1666,7 +1734,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 116, "metadata": {}, "outputs": [], "source": [ @@ -1689,7 +1757,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ @@ -1714,7 +1782,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 118, "metadata": {}, "outputs": [], "source": [ @@ -1753,7 +1821,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -1789,7 +1857,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 120, "metadata": {}, "outputs": [], "source": [ @@ -1805,7 +1873,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 121, "metadata": {}, "outputs": [], "source": [ @@ -1815,7 +1883,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 122, "metadata": {}, "outputs": [], "source": [ @@ -1831,7 +1899,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 123, "metadata": {}, "outputs": [], "source": [ @@ -1847,7 +1915,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 124, "metadata": {}, "outputs": [], "source": [ @@ -1859,7 +1927,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 125, "metadata": {}, "outputs": [], "source": [ @@ -1875,7 +1943,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 126, "metadata": {}, "outputs": [], "source": [ @@ -1891,7 +1959,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -1921,7 +1989,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 128, "metadata": {}, "outputs": [], "source": [ @@ -1934,7 +2002,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 129, "metadata": {}, "outputs": [], "source": [ @@ -1950,7 +2018,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 130, "metadata": {}, "outputs": [], "source": [ @@ -1984,7 +2052,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 131, "metadata": {}, "outputs": [], "source": [ @@ -2000,7 +2068,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 132, "metadata": {}, "outputs": [], "source": [ @@ -2038,7 +2106,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.5.2" }, "nav_menu": { "height": "279px",