diff --git a/M2/Reinforcement Learning/project/Project.ipynb b/M2/Reinforcement Learning/project/Project.ipynb index 4fcbfeb..48d3d29 100644 --- a/M2/Reinforcement Learning/project/Project.ipynb +++ b/M2/Reinforcement Learning/project/Project.ipynb @@ -528,6 +528,12 @@ " - Updates weights with semi-gradient: W[a] += alpha * (G - q(s,a)) * phi(s)\n", "\n", " Unlike TD methods (SARSA, Q-Learning), no update occurs until the episode ends.\n", + "\n", + " Performance optimizations over naive per-step implementation:\n", + " - float32 weights & features (halves memory bandwidth, faster SIMD)\n", + " - Raw observations stored compactly as uint8, batch-normalized at episode end\n", + " - Vectorized return computation & chunk-based weight updates via einsum\n", + " - Single weight sanitization per episode instead of per-step\n", " \"\"\"\n", "\n", " def __init__(\n", @@ -538,32 +544,20 @@ " gamma: float = 0.99,\n", " seed: int = 42,\n", " ) -> None:\n", - " \"\"\"Initialize Monte Carlo agent.\n", - "\n", - " Args:\n", - " n_features: Dimension of the feature vector phi(s).\n", - " n_actions: Number of discrete actions.\n", - " alpha: Learning rate.\n", - " gamma: Discount factor.\n", - " seed: RNG seed.\n", - "\n", - " \"\"\"\n", " super().__init__(seed, n_actions)\n", " self.n_features = n_features\n", " self.alpha = alpha\n", " self.gamma = gamma\n", - " self.W = np.zeros((n_actions, n_features), dtype=np.float64)\n", - " # Episode buffer: stores (state, action, reward) tuples\n", - " # Analogous to Lab 4's episode list in generate_episode\n", - " self.episode_buffer: list[tuple[np.ndarray, int, float]] = []\n", + " self.W = np.zeros((n_actions, n_features), dtype=np.float32)\n", + " self._obs_buf: list[np.ndarray] = []\n", + " self._act_buf: list[int] = []\n", + " self._rew_buf: list[float] = []\n", "\n", " def _q_values(self, phi: np.ndarray) -> np.ndarray:\n", - " \"\"\"Compute Q-values for all actions: q(s, a) = W[a] @ phi for each a.\"\"\"\n", " return self.W @ phi\n", "\n", " def get_action(self, observation: np.ndarray, epsilon: float = 0.0) -> int:\n", - " \"\"\"Select action using ε-greedy policy over linear Q-values.\"\"\"\n", - " phi = normalize_obs(observation)\n", + " phi = observation.flatten().astype(np.float32) / np.float32(255.0)\n", " q_vals = self._q_values(phi)\n", " return epsilon_greedy(q_vals, epsilon, self.rng)\n", "\n", @@ -576,53 +570,51 @@ " done: bool,\n", " next_action: int | None = None,\n", " ) -> None:\n", - " \"\"\"Accumulate transitions and update at episode end with MC returns.\n", + " _ = next_state, next_action\n", "\n", - " Follows Lab 4 mc_control_epsilon_soft / mc_control_exploring_starts:\n", - " 1. Append (state, action, reward) to episode buffer\n", - " 2. If not done: wait (no update yet)\n", - " 3. If done: compute returns backward and update weights\n", - "\n", - " The backward loop is exactly the Lab 4 pattern:\n", - " G = 0\n", - " for s, a, r in reversed(episode_buffer):\n", - " G = gamma * G + r\n", - " # update Q(s, a) toward G\n", - " \"\"\"\n", - " _ = next_state, next_action # Not used in MC\n", - "\n", - " self.episode_buffer.append((state, action, reward))\n", + " self._obs_buf.append(state)\n", + " self._act_buf.append(action)\n", + " self._rew_buf.append(reward)\n", "\n", " if not done:\n", - " return # Wait until episode ends\n", + " return\n", "\n", - " # Episode finished: compute MC returns and update\n", - " # Backward pass through episode (Lab 4 pattern)\n", - " returns = 0.0\n", - " for s, a, r in reversed(self.episode_buffer):\n", - " returns = self.gamma * returns + r\n", + " n = len(self._rew_buf)\n", + " actions = np.array(self._act_buf, dtype=np.intp)\n", "\n", - " phi = np.nan_to_num(normalize_obs(s), nan=0.0, posinf=0.0, neginf=0.0)\n", - " q_sa = float(self.W[a] @ phi)\n", - " if not np.isfinite(q_sa):\n", - " q_sa = 0.0\n", + " returns = np.empty(n, dtype=np.float32)\n", + " G = np.float32(0.0)\n", + " gamma32 = np.float32(self.gamma)\n", + " for i in range(n - 1, -1, -1):\n", + " G = gamma32 * G + np.float32(self._rew_buf[i])\n", + " returns[i] = G\n", "\n", - " # Semi-gradient update toward the MC return G\n", - " # Analogous to Lab 4: Q[(s,a)] += (G - Q[(s,a)]) / N[(s,a)]\n", - " # but with linear approximation and fixed step size\n", - " if not np.isfinite(returns):\n", - " continue\n", + " alpha32 = np.float32(self.alpha)\n", + " chunk_size = 500\n", + " for start in range(0, n, chunk_size):\n", + " end = min(start + chunk_size, n)\n", + " cs = end - start\n", "\n", - " delta = float(returns - q_sa)\n", - " if not np.isfinite(delta):\n", - " continue\n", + " raw = np.array(self._obs_buf[start:end])\n", + " phi = raw.reshape(cs, -1).astype(np.float32)\n", + " phi /= np.float32(255.0)\n", "\n", - " td_step = float(np.clip(delta, -1_000.0, 1_000.0))\n", - " self.W[a] += self.alpha * td_step * phi\n", - " self.W[a] = np.nan_to_num(self.W[a], nan=0.0, posinf=1e6, neginf=-1e6)\n", + " ca = actions[start:end]\n", + " q_sa = np.einsum(\"ij,ij->i\", self.W[ca], phi)\n", "\n", - " # Clear episode buffer for next episode\n", - " self.episode_buffer = []\n" + " deltas = np.clip(returns[start:end] - q_sa, -1000.0, 1000.0)\n", + "\n", + " for a in range(self.action_space):\n", + " mask = ca == a\n", + " if not np.any(mask):\n", + " continue\n", + " self.W[a] += alpha32 * (deltas[mask] @ phi[mask])\n", + "\n", + " self.W = np.nan_to_num(self.W, nan=0.0, posinf=1e6, neginf=-1e6)\n", + "\n", + " self._obs_buf.clear()\n", + " self._act_buf.clear()\n", + " self._rew_buf.clear()\n" ] }, { @@ -948,7 +940,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "6f6ba8df", "metadata": {}, "outputs": [ @@ -973,34 +965,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "objc[68875]: Class SDLApplication is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d2c8) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8890). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDLAppDelegate is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d318) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e88e0). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDLTranslatorResponder is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d390) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8958). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDLMessageBoxPresenter is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d3b8) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8980). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDL_cocoametalview is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d408) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e89d0). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDLOpenGLContext is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d458) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8a20). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDL_ShapeData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d4d0) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8a98). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDL_CocoaClosure is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d520) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8ae8). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDL_VideoData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d570) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8b38). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDL_WindowData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d5c0) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8b88). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDLWindow is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d5e8) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8bb0). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class Cocoa_WindowListener is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d610) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8bd8). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDLView is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d688) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8c50). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class METAL_RenderData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d700) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8cc8). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class METAL_TextureData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d750) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8d18). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDL_RumbleMotor is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d778) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8d40). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", - "objc[68875]: Class SDL_RumbleContext is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d7c8) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1243e8d90). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n" - ] - }, - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mLe noyau s’est bloqué lors de l’exécution du code dans une cellule active ou une cellule précédente. \n", - "\u001b[1;31mVeuillez vérifier le code dans la ou les cellules pour identifier une cause possible de l’échec. \n", - "\u001b[1;31mCliquez ici pour plus d’informations. \n", - "\u001b[1;31mPour plus d’informations, consultez Jupyter log." + "objc[49878]: Class SDLApplication is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d2c8) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418890). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDLAppDelegate is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d318) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1244188e0). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDLTranslatorResponder is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d390) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418958). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDLMessageBoxPresenter is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d3b8) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418980). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDL_cocoametalview is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d408) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x1244189d0). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDLOpenGLContext is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d458) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418a20). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDL_ShapeData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d4d0) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418a98). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDL_CocoaClosure is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d520) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418ae8). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDL_VideoData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d570) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418b38). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDL_WindowData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d5c0) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418b88). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDLWindow is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d5e8) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418bb0). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class Cocoa_WindowListener is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d610) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418bd8). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDLView is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d688) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418c50). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class METAL_RenderData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d700) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418cc8). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class METAL_TextureData is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d750) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418d18). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDL_RumbleMotor is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d778) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418d40). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n", + "objc[49878]: Class SDL_RumbleContext is implemented in both /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x11118d7c8) and /Users/arthurdanjou/Workspace/studies/.venv/lib/python3.13/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x124418d90). This may cause spurious casting failures and mysterious crashes. One of the duplicates must be removed or renamed.\n" ] } ], @@ -1051,7 +1032,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3346e326a5c54902b5d3698add8cb9d4", + "model_id": "0d5d098d18014fe6b736683e0b8b2488", "version_major": 2, "version_minor": 0 },