Merge pull request #396 from lebaste77/master

very minor change on greedy policy variable usage
2026-01-14 20:19:29 +01:00 · 2021-03-02 10:16:43 +13:00
parent b201196be1 64f0e05a94
commit 47538082f8
1 changed files with 1 additions and 1 deletions
--- a/18_reinforcement_learning.ipynb
+++ b/18_reinforcement_learning.ipynb
@@ -1306,7 +1306,7 @@
   "source": [
    "def epsilon_greedy_policy(state, epsilon=0):\n",
    "    if np.random.rand() < epsilon:\n",
-    "        return np.random.randint(2)\n",
+    "        return np.random.randint(n_outputs)\n",
    "    else:\n",
    "        Q_values = model.predict(state[np.newaxis])\n",
    "        return np.argmax(Q_values[0])"