Merge pull request #396 from lebaste77/master

very minor change on greedy policy variable usage
This commit is contained in:
Aurélien Geron
2021-03-02 10:16:43 +13:00
committed by GitHub

View File

@@ -1306,7 +1306,7 @@
"source": [
"def epsilon_greedy_policy(state, epsilon=0):\n",
" if np.random.rand() < epsilon:\n",
" return np.random.randint(2)\n",
" return np.random.randint(n_outputs)\n",
" else:\n",
" Q_values = model.predict(state[np.newaxis])\n",
" return np.argmax(Q_values[0])"