diff --git a/16_reinforcement_learning.ipynb b/16_reinforcement_learning.ipynb index 96e94a2..9459440 100644 --- a/16_reinforcement_learning.ipynb +++ b/16_reinforcement_learning.ipynb @@ -31,9 +31,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# To support both python 2 and python 3\n", @@ -95,9 +93,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import gym" @@ -129,9 +125,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs = env.reset()" @@ -163,9 +157,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "img = env.render(mode=\"rgb_array\")" @@ -226,9 +218,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def plot_environment(env, figsize=(5,4)):\n", @@ -273,9 +263,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "env.reset()\n", @@ -311,9 +299,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs, reward, done, info = env.step(0)" @@ -393,9 +379,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "frames = []\n", @@ -424,9 +408,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def update_scene(num, frames, patch):\n", @@ -461,9 +443,7 @@ { "cell_type": "code", "execution_count": 21, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "env.close()" @@ -502,9 +482,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs = env.reset()" @@ -547,9 +525,7 @@ { "cell_type": "code", "execution_count": 25, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from PIL import Image, ImageDraw\n", @@ -633,9 +609,7 @@ { "cell_type": "code", "execution_count": 28, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs = env.reset()\n", @@ -677,9 +651,7 @@ { "cell_type": "code", "execution_count": 31, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs = env.reset()\n", @@ -722,9 +694,7 @@ { "cell_type": "code", "execution_count": 33, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "frames = []\n", @@ -795,9 +765,7 @@ { "cell_type": "code", "execution_count": 35, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -846,9 +814,7 @@ { "cell_type": "code", "execution_count": 36, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_max_steps = 1000\n", @@ -895,9 +861,7 @@ { "cell_type": "code", "execution_count": 38, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -965,9 +929,7 @@ { "cell_type": "code", "execution_count": 40, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def render_policy_net(model_path, action, X, n_max_steps = 1000):\n", @@ -1024,9 +986,7 @@ { "cell_type": "code", "execution_count": 42, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -1069,9 +1029,7 @@ { "cell_type": "code", "execution_count": 43, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def discount_rewards(rewards, discount_rate):\n", @@ -1157,9 +1115,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "env.close()" @@ -1309,9 +1265,7 @@ { "cell_type": "code", "execution_count": 51, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_states = 3\n", @@ -1336,9 +1290,7 @@ { "cell_type": "code", "execution_count": 52, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def optimal_policy(state):\n", @@ -1439,23 +1391,28 @@ { "cell_type": "code", "execution_count": 57, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "mspacman_color = np.array([210, 164, 74]).mean()\n", + "mspacman_color = 210 + 164 + 74\n", "\n", "def preprocess_observation(obs):\n", " img = obs[1:176:2, ::2] # crop and downsize\n", - " img = img.mean(axis=2) # to greyscale\n", + " img = img.sum(axis=2) # to greyscale\n", " img[img==mspacman_color] = 0 # Improve contrast\n", - " img = (img - 128) / 128 - 1 # normalize from -1. to 1.\n", + " img = (img // 3 - 128).astype(np.int8) # normalize from -128 to 127\n", " return img.reshape(88, 80, 1)\n", "\n", "img = preprocess_observation(obs)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: the `preprocess_observation()` function is slightly different from the one in the book: instead of representing pixels as 64-bit floats from -1.0 to 1.0, it represents them as 8-bit integers from -128 to 127. The benefit is that the replay memory will take up about 6.5 GB of RAM instead of 52 GB. The reduced precision has no impact on training." + ] + }, { "cell_type": "code", "execution_count": 58, @@ -1498,9 +1455,7 @@ { "cell_type": "code", "execution_count": 59, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1545,9 +1500,7 @@ { "cell_type": "code", "execution_count": 60, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_state = tf.placeholder(tf.float32, shape=[None, input_height, input_width,\n", @@ -1572,9 +1525,7 @@ { "cell_type": "code", "execution_count": 62, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.001\n", @@ -1608,9 +1559,7 @@ { "cell_type": "code", "execution_count": 63, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from collections import deque\n", @@ -1632,9 +1581,7 @@ { "cell_type": "code", "execution_count": 64, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "eps_min = 0.1\n", @@ -1678,9 +1625,7 @@ { "cell_type": "code", "execution_count": 66, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "loss_val = np.infty\n", @@ -1970,7 +1915,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.3" }, "nav_menu": {}, "toc": {