diff --git a/Notebooks/Chap19/19_1_Markov_Decision_Processes.ipynb b/Notebooks/Chap19/19_1_Markov_Decision_Processes.ipynb
index c5ac07f..c5cd6d6 100644
--- a/Notebooks/Chap19/19_1_Markov_Decision_Processes.ipynb
+++ b/Notebooks/Chap19/19_1_Markov_Decision_Processes.ipynb
@@ -4,7 +4,7 @@
   "metadata": {
     "colab": {
       "provenance": [],
-      "authorship_tag": "ABX9TyMForqbtn4usiIlRAenjCfh",
+      "authorship_tag": "ABX9TyPg3umHnqmIXX6jGe809Nxf",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -46,13 +46,691 @@
       "source": [
         "import numpy as np\n",
         "import matplotlib.pyplot as plt\n",
-        "\n"
+        "from PIL import Image"
       ],
       "metadata": {
         "id": "OLComQyvCIJ7"
       },
       "execution_count": null,
       "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Get local copies of components of images\n",
+        "!wget https://raw.githubusercontent.com/udlbook/udlbook/main/Notebooks/Chap19/Empty.png\n",
+        "!wget https://raw.githubusercontent.com/udlbook/udlbook/main/Notebooks/Chap19/Hole.png\n",
+        "!wget https://raw.githubusercontent.com/udlbook/udlbook/main/Notebooks/Chap19/Fish.png\n",
+        "!wget https://raw.githubusercontent.com/udlbook/udlbook/main/Notebooks/Chap19/Penguin.png"
+      ],
+      "metadata": {
+        "id": "ZsvrUszPLyEG"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Ugly class that takes care of drawing pictures like in the book.\n",
+        "# You can totally ignore this code!\n",
+        "class DrawMDP:\n",
+        "  # Constructor initializes parameters\n",
+        "  def __init__(self, n_row, n_col):\n",
+        "    self.empty_image = np.asarray(Image.open('Empty.png'))\n",
+        "    self.hole_image = np.asarray(Image.open('Hole.png'))\n",
+        "    self.fish_image = np.asarray(Image.open('Fish.png'))\n",
+        "    self.penguin_image = np.asarray(Image.open('Penguin.png'))\n",
+        "    self.fig,self.ax = plt.subplots()\n",
+        "    self.n_row = n_row\n",
+        "    self.n_col = n_col\n",
+        "\n",
+        "    my_colormap_vals_hex =('2a0902', '2b0a03', '2c0b04', '2d0c05', '2e0c06', '2f0d07', '300d08', '310e09', '320f0a', '330f0b', '34100b', '35110c', '36110d', '37120e', '38120f', '39130f', '3a1410', '3b1411', '3c1511', '3d1612', '3e1613', '3f1713', '401714', '411814', '421915', '431915', '451a16', '461b16', '471b17', '481c17', '491d18', '4a1d18', '4b1e19', '4c1f19', '4d1f1a', '4e201b', '50211b', '51211c', '52221c', '53231d', '54231d', '55241e', '56251e', '57261f', '58261f', '592720', '5b2821', '5c2821', '5d2922', '5e2a22', '5f2b23', '602b23', '612c24', '622d25', '632e25', '652e26', '662f26', '673027', '683027', '693128', '6a3229', '6b3329', '6c342a', '6d342a', '6f352b', '70362c', '71372c', '72372d', '73382e', '74392e', '753a2f', '763a2f', '773b30', '783c31', '7a3d31', '7b3e32', '7c3e33', '7d3f33', '7e4034', '7f4134', '804235', '814236', '824336', '834437', '854538', '864638', '874739', '88473a', '89483a', '8a493b', '8b4a3c', '8c4b3c', '8d4c3d', '8e4c3e', '8f4d3f', '904e3f', '924f40', '935041', '945141', '955242', '965343', '975343', '985444', '995545', '9a5646', '9b5746', '9c5847', '9d5948', '9e5a49', '9f5a49', 'a05b4a', 'a15c4b', 'a35d4b', 'a45e4c', 'a55f4d', 'a6604e', 'a7614e', 'a8624f', 'a96350', 'aa6451', 'ab6552', 'ac6552', 'ad6653', 'ae6754', 'af6855', 'b06955', 'b16a56', 'b26b57', 'b36c58', 'b46d59', 'b56e59', 'b66f5a', 'b7705b', 'b8715c', 'b9725d', 'ba735d', 'bb745e', 'bc755f', 'bd7660', 'be7761', 'bf7862', 'c07962', 'c17a63', 'c27b64', 'c27c65', 'c37d66', 'c47e67', 'c57f68', 'c68068', 'c78169', 'c8826a', 'c9836b', 'ca846c', 'cb856d', 'cc866e', 'cd876f', 'ce886f', 'ce8970', 'cf8a71', 'd08b72', 'd18c73', 'd28d74', 'd38e75', 'd48f76', 'd59077', 'd59178', 'd69279', 'd7937a', 'd8957b', 'd9967b', 'da977c', 'da987d', 'db997e', 'dc9a7f', 'dd9b80', 'de9c81', 'de9d82', 'df9e83', 'e09f84', 'e1a185', 'e2a286', 'e2a387', 'e3a488', 'e4a589', 'e5a68a', 'e5a78b', 'e6a88c', 'e7aa8d', 'e7ab8e', 'e8ac8f', 'e9ad90', 'eaae91', 'eaaf92', 'ebb093', 'ecb295', 'ecb396', 'edb497', 'eeb598', 'eeb699', 'efb79a', 'efb99b', 'f0ba9c', 'f1bb9d', 'f1bc9e', 'f2bd9f', 'f2bfa1', 'f3c0a2', 'f3c1a3', 'f4c2a4', 'f5c3a5', 'f5c5a6', 'f6c6a7', 'f6c7a8', 'f7c8aa', 'f7c9ab', 'f8cbac', 'f8ccad', 'f8cdae', 'f9ceb0', 'f9d0b1', 'fad1b2', 'fad2b3', 'fbd3b4', 'fbd5b6', 'fbd6b7', 'fcd7b8', 'fcd8b9', 'fcdaba', 'fddbbc', 'fddcbd', 'fddebe', 'fddfbf', 'fee0c1', 'fee1c2', 'fee3c3', 'fee4c5', 'ffe5c6', 'ffe7c7', 'ffe8c9', 'ffe9ca', 'ffebcb', 'ffeccd', 'ffedce', 'ffefcf', 'fff0d1', 'fff2d2', 'fff3d3', 'fff4d5', 'fff6d6', 'fff7d8', 'fff8d9', 'fffada', 'fffbdc', 'fffcdd', 'fffedf', 'ffffe0')\n",
+        "    my_colormap_vals_dec = np.array([int(element,base=16) for element in my_colormap_vals_hex])\n",
+        "    r = np.floor(my_colormap_vals_dec/(256*256))\n",
+        "    g = np.floor((my_colormap_vals_dec - r *256 *256)/256)\n",
+        "    b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
+        "    self.colormap = np.vstack((r,g,b)).transpose()/255.0\n",
+        "\n",
+        "\n",
+        "  def draw_text(self, text, row, col, position, color):\n",
+        "    if position == 'bc':\n",
+        "      self.ax.text( 83*col+41,83 * (row+1) -10, text, horizontalalignment=\"center\", color=color, fontweight='bold')\n",
+        "    if position == 'tl':\n",
+        "      self.ax.text( 83*col+5,83 * row +5, text, verticalalignment = 'top', horizontalalignment=\"left\", color=color, fontweight='bold')\n",
+        "\n",
+        "  # Draws a set of states\n",
+        "  def draw_path(self, path, color1, color2):\n",
+        "    for i in range(len(path)-1):\n",
+        "      row_start = np.floor(path[i]/self.n_col)\n",
+        "      row_end = np.floor(path[i+1]/self.n_col)\n",
+        "      col_start = path[i] - row_start * self.n_col\n",
+        "      col_end = path[i+1] - row_end * self.n_col\n",
+        "\n",
+        "      color_index = int(np.floor(255 * i/(len(path)-1.)))\n",
+        "      self.ax.plot([col_start * 83+41 + i, col_end * 83+41 + i ],[row_start * 83+41 +  i, row_end * 83+41 + i ], color=(self.colormap[color_index,0],self.colormap[color_index,1],self.colormap[color_index,2]))\n",
+        "\n",
+        "\n",
+        "  # Draw deterministic policy\n",
+        "  def draw_deterministic_policy(self,i, action):\n",
+        "      row = np.floor(i/self.n_col)\n",
+        "      col = i - row * self.n_col\n",
+        "      center_x = 83 * col + 41\n",
+        "      center_y = 83 * row + 41\n",
+        "      arrow_base_width = 10\n",
+        "      arrow_height = 15\n",
+        "      # Draw arrow pointing upward\n",
+        "      if action ==0:\n",
+        "          triangle_indices = np.array([[center_x, center_y-arrow_height/2],\n",
+        "                              [center_x - arrow_base_width/2, center_y+arrow_height/2],\n",
+        "                              [center_x + arrow_base_width/2, center_y+arrow_height/2]])\n",
+        "      # Draw arrow pointing right\n",
+        "      if action ==1:\n",
+        "          triangle_indices = np.array([[center_x + arrow_height/2, center_y],\n",
+        "                              [center_x - arrow_height/2, center_y-arrow_base_width/2],\n",
+        "                              [center_x - arrow_height/2, center_y+arrow_base_width/2]])\n",
+        "      # Draw arrow pointing downward\n",
+        "      if action ==2:\n",
+        "          triangle_indices = np.array([[center_x, center_y+arrow_height/2],\n",
+        "                              [center_x - arrow_base_width/2, center_y-arrow_height/2],\n",
+        "                              [center_x + arrow_base_width/2, center_y-arrow_height/2]])\n",
+        "      # Draw arrow pointing left\n",
+        "      if action ==3:\n",
+        "          triangle_indices = np.array([[center_x - arrow_height/2, center_y],\n",
+        "                              [center_x + arrow_height/2, center_y-arrow_base_width/2],\n",
+        "                              [center_x + arrow_height/2, center_y+arrow_base_width/2]])\n",
+        "      self.ax.fill(triangle_indices[:,0], triangle_indices[:,1],facecolor='cyan', edgecolor='darkcyan', linewidth=1)\n",
+        "\n",
+        "  # Draw stochastic policy\n",
+        "  def draw_stochastic_policy(self,i, action_probs):\n",
+        "      row = np.floor(i/self.n_col)\n",
+        "      col = i - row * self.n_col\n",
+        "      offset = 20\n",
+        "      # Draw arrow pointing upward\n",
+        "      center_x = 83 * col + 41\n",
+        "      center_y = 83 * row + 41 - offset\n",
+        "      arrow_base_width = 15 * action_probs[0]\n",
+        "      arrow_height = 20 * action_probs[0]\n",
+        "      triangle_indices = np.array([[center_x, center_y-arrow_height/2],\n",
+        "                          [center_x - arrow_base_width/2, center_y+arrow_height/2],\n",
+        "                          [center_x + arrow_base_width/2, center_y+arrow_height/2]])\n",
+        "      self.ax.fill(triangle_indices[:,0], triangle_indices[:,1],facecolor='cyan', edgecolor='darkcyan', linewidth=1)\n",
+        "\n",
+        "      # Draw arrow pointing right\n",
+        "      center_x = 83 * col + 41 + offset\n",
+        "      center_y = 83 * row + 41\n",
+        "      arrow_base_width = 15 * action_probs[1]\n",
+        "      arrow_height = 20 * action_probs[1]\n",
+        "      triangle_indices = np.array([[center_x + arrow_height/2, center_y],\n",
+        "                          [center_x - arrow_height/2, center_y-arrow_base_width/2],\n",
+        "                          [center_x - arrow_height/2, center_y+arrow_base_width/2]])\n",
+        "      self.ax.fill(triangle_indices[:,0], triangle_indices[:,1],facecolor='cyan', edgecolor='darkcyan', linewidth=1)\n",
+        "\n",
+        "      # Draw arrow pointing downward\n",
+        "      center_x = 83 * col + 41\n",
+        "      center_y = 83 * row + 41 +offset\n",
+        "      arrow_base_width = 15 * action_probs[2]\n",
+        "      arrow_height = 20 * action_probs[2]\n",
+        "      triangle_indices = np.array([[center_x, center_y+arrow_height/2],\n",
+        "                          [center_x - arrow_base_width/2, center_y-arrow_height/2],\n",
+        "                          [center_x + arrow_base_width/2, center_y-arrow_height/2]])\n",
+        "      self.ax.fill(triangle_indices[:,0], triangle_indices[:,1],facecolor='cyan', edgecolor='darkcyan', linewidth=1)\n",
+        "\n",
+        "      # Draw arrow pointing left\n",
+        "      center_x = 83 * col + 41 -offset\n",
+        "      center_y = 83 * row + 41\n",
+        "      arrow_base_width = 15 * action_probs[3]\n",
+        "      arrow_height = 20 * action_probs[3]\n",
+        "      triangle_indices = np.array([[center_x - arrow_height/2, center_y],\n",
+        "                          [center_x + arrow_height/2, center_y-arrow_base_width/2],\n",
+        "                          [center_x + arrow_height/2, center_y+arrow_base_width/2]])\n",
+        "      self.ax.fill(triangle_indices[:,0], triangle_indices[:,1],facecolor='cyan', edgecolor='darkcyan', linewidth=1)\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "  def draw(self, layout, state, draw_state_index= False, rewards=None, policy=None, state_values=None, action_values=None,path1=None, path2 = None):\n",
+        "    # Construct the image\n",
+        "    image_out = np.zeros((self.n_row * 83, self.n_col * 83, 4),dtype='uint8')\n",
+        "    for c_row in range (self.n_row):\n",
+        "      for c_col in range(self.n_col):\n",
+        "        if layout[c_row * self.n_col + c_col]==0:\n",
+        "          image_out[c_row*83:c_row*83+83, c_col*83:c_col*83+83,:] = self.empty_image\n",
+        "        elif layout[c_row * self.n_col + c_col]==1:\n",
+        "          image_out[c_row*83:c_row*83+83, c_col*83:c_col*83+83,:] = self.hole_image\n",
+        "        else:\n",
+        "          image_out[c_row*83:c_row*83+83, c_col*83:c_col*83+83,:] = self.fish_image\n",
+        "        if state == c_row * self.n_col + c_col:\n",
+        "          image_out[c_row*83:c_row*83+83, c_col*83:c_col*83+83,:] = self.penguin_image\n",
+        "\n",
+        "    # Draw the image\n",
+        "    plt.imshow(image_out)\n",
+        "    self.ax.get_xaxis().set_visible(False)\n",
+        "    self.ax.get_yaxis().set_visible(False)\n",
+        "    self.ax.spines['top'].set_visible(False)\n",
+        "    self.ax.spines['right'].set_visible(False)\n",
+        "    self.ax.spines['bottom'].set_visible(False)\n",
+        "    self.ax.spines['left'].set_visible(False)\n",
+        "\n",
+        "    if draw_state_index:\n",
+        "      for c_cell in range(layout.size):\n",
+        "          self.draw_text(\"%d\"%(c_cell), np.floor(c_cell/self.n_col), c_cell-np.floor(c_cell/self.n_col)*self.n_col,'tl','k')\n",
+        "\n",
+        "    # Draw the policy as triangles\n",
+        "    if policy is not None:\n",
+        "        # If the policy is deterministic\n",
+        "        if len(policy) == len(layout):\n",
+        "          for i in range(len(layout)):\n",
+        "            self.draw_deterministic_policy(i, policy[i])\n",
+        "        # Else it is stochastic\n",
+        "        else:\n",
+        "          for i in range(len(layout)):\n",
+        "            self.draw_stochastic_policy(i,policy[:,i])\n",
+        "\n",
+        "\n",
+        "    if path1 is not None:\n",
+        "      # self.draw_path(path1, np.array([0.81, 0.51, 0.38]), np.array([1.0, 0.2, 0.5]))\n",
+        "      self.draw_path(path1, np.array([1.0, 0.0, 0.0]), np.array([0.0, 1.0, 1.0]))\n",
+        "\n",
+        "\n",
+        "    plt.show()"
+      ],
+      "metadata": {
+        "id": "Gq1HfJsHN3SB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Let's draw the initial situation with the penguin in top right\n",
+        "n_rows = 4; n_cols = 4\n",
+        "layout = np.zeros(n_rows * n_cols)\n",
+        "initial_state = 0\n",
+        "mdp_drawer = DrawMDP(n_rows, n_cols)\n",
+        "mdp_drawer.draw(layout, state = initial_state, draw_state_index = True)"
+      ],
+      "metadata": {
+        "id": "eBQ7lTpJQBSe"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Note that the states are indexed from 0 rather than 1 as in the book to make\n",
+        "the code neater."
+      ],
+      "metadata": {
+        "id": "P7P40UyMunKb"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define the state probabilities\n",
+        "transition_probabilities = np.array( \\\n",
+        "[[0.00 , 0.33, 0.00, 0.00,  0.33, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.50 , 0.00, 0.33, 0.00,  0.00, 0.25, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.33, 0.00, 0.50,  0.00, 0.00, 0.25, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.33, 0.00,  0.00, 0.00, 0.00, 0.33,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.50 , 0.00, 0.00, 0.00,  0.00, 0.25, 0.00, 0.00,   0.33, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.34, 0.00, 0.00,  0.33, 0.00, 0.25, 0.00,   0.00, 0.25, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.34, 0.00,  0.00, 0.25, 0.00, 0.33,   0.00, 0.00, 0.25, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.50,  0.00, 0.00, 0.25, 0.00,   0.00, 0.00, 0.00, 0.33,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.34, 0.00, 0.00, 0.00,   0.00, 0.25, 0.00, 0.00,   0.50, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.25, 0.00, 0.00,   0.33, 0.00, 0.25, 0.00,   0.00, 0.33, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.25, 0.00,   0.00, 0.25, 0.00, 0.33,   0.00, 0.00, 0.33, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.34,   0.00, 0.00, 0.25, 0.00,   0.00, 0.00, 0.00, 0.50 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.34, 0.00, 0.00, 0.00,   0.00, 0.33, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.25, 0.00, 0.00,   0.50, 0.00, 0.33, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.25, 0.00,   0.00, 0.34, 0.00, 0.50 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.34,   0.00, 0.00, 0.34, 0.00 ],\n",
+        "])\n",
+        "initial_state = 0"
+      ],
+      "metadata": {
+        "id": "wgFcIi4YQJWI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Define a step from the Markov process"
+      ],
+      "metadata": {
+        "id": "axllRDDuDDLS"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def markov_process_step(state, transition_probabilities):\n",
+        "  # TODO -- update the state according to the appropriate transition probabilities\n",
+        "  # One way to do this is to use np.random.choice\n",
+        "  # Replace this line:\n",
+        "  new_state = 0\n",
+        "\n",
+        "\n",
+        "  return new_state"
+      ],
+      "metadata": {
+        "id": "FrSZrS67sdbN"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Run the Markov process for 10 steps and visualise the results"
+      ],
+      "metadata": {
+        "id": "uTj7rN6LDFXd"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "np.random.seed(0)\n",
+        "T = 10\n",
+        "states = np.zeros(T, dtype='uint8')\n",
+        "states[0] = 0\n",
+        "for t in range(T-1):\n",
+        "  states[t+1] = markov_process_step(states[t], transition_probabilities)\n",
+        "\n",
+        "\n",
+        "\n",
+        "print(\"Your States:\", states)\n",
+        "print(\"True States: [ 0  4  8  9 10  9 10  9 13 14]\")\n",
+        "mdp_drawer = DrawMDP(n_rows, n_cols)\n",
+        "mdp_drawer.draw(layout, state = states[0], path1=states, draw_state_index = True)"
+      ],
+      "metadata": {
+        "id": "lRIdjagCwP62"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Define a Markov one step of a reward process."
+      ],
+      "metadata": {
+        "id": "QLyjyBjjDMin"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def markov_reward_process_step(state, transition_probabilities, reward_structure):\n",
+        "\n",
+        "    # TODO -- write this function\n",
+        "    # Update the state.  Return a reward of +1 if the Penguin lands on the fish\n",
+        "    # or zero otherwise.\n",
+        "    # Replace this line\n",
+        "    new_state = 0; reward = 0\n",
+        "\n",
+        "\n",
+        "    return new_state, reward"
+      ],
+      "metadata": {
+        "id": "YPHSJRKx-pgO"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Run the Markov reward process for 10 steps and visualise the results"
+      ],
+      "metadata": {
+        "id": "AIz8QEiRFoCm"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set up the reward structure so it matches figure 19.2\n",
+        "reward_structure = np.zeros((16,1))\n",
+        "reward_structure[3] = 1; reward_structure[8] = 1; reward_structure[10] = 1\n",
+        "\n",
+        "# Initialize random numbers\n",
+        "np.random.seed(0)\n",
+        "T = 10\n",
+        "# Set up the states, so the fish are in the same positions as figure 19.2\n",
+        "states = np.zeros(T, dtype='uint8')\n",
+        "rewards = np.zeros(T, dtype='uint8')\n",
+        "\n",
+        "states[0] = 0\n",
+        "for t in range(T-1):\n",
+        "  states[t+1],rewards[t+1] = markov_reward_process_step(states[t], transition_probabilities, reward_structure)\n",
+        "\n",
+        "print(\"Your States:\", states)\n",
+        "print(\"Your Rewards:\", rewards)\n",
+        "print(\"True Rewards: [0 0 1 0 1 0 1 0 0 0]\")\n",
+        "\n",
+        "\n",
+        "# Draw the figure\n",
+        "layout = np.zeros(n_rows * n_cols)\n",
+        "layout[3] = 2; layout[8] = 2 ; layout[10] = 2\n",
+        "mdp_drawer = DrawMDP(n_rows, n_cols)\n",
+        "mdp_drawer.draw(layout, state = states[0], path1=states, draw_state_index = True)"
+      ],
+      "metadata": {
+        "id": "0p1gCpGoFn4M"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Now let's calculate the return -- the sum of discounted future rewards"
+      ],
+      "metadata": {
+        "id": "lyz47NWrITfj"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def calculate_return(rewards, gamma):\n",
+        "  # TODO -- you write this function\n",
+        "  # It should compute one return for the start of the sequence (i.e. G_1)\n",
+        "  # Replace this line\n",
+        "  return_val = 0.0\n",
+        "\n",
+        "\n",
+        "  return return_val"
+      ],
+      "metadata": {
+        "id": "4fEuBRPnFm_N"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "gamma = 0.9\n",
+        "for t in range(len(states)):\n",
+        "  print(\"Return at time %d = %3.3f\"%(t, calculate_return(rewards[t:],gamma)))\n",
+        "\n",
+        "# Reality check!\n",
+        "print(\"True return at time 0: 1.998\")"
+      ],
+      "metadata": {
+        "id": "o19lQgM3JrOz"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Now let's define the state transition function $Pr(s_{t+1}|s_{t},a)$ in full where $a$ is the actions.  Here $a=0$ means try to go upward, $a=1$, right, $a=2$ down and $a=3$ right.  However, the ice is slippery, so we don't always go the direction we want to.\n",
+        "\n",
+        "Note that as for the states, we've indexed the actions from zero (unlike in the book, so they map to the indices of arrays better)"
+      ],
+      "metadata": {
+        "id": "Fhc6DzZNOjiC"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "transition_probabilities_given_action1 = np.array(\\\n",
+        "[[0.00 , 0.33, 0.00, 0.00,  0.50, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.50 , 0.00, 0.33, 0.00,  0.00, 0.50, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.33, 0.00, 0.50,  0.00, 0.00, 0.50, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.33, 0.00,  0.00, 0.00, 0.00, 0.50,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.50 , 0.00, 0.00, 0.00,  0.00, 0.17, 0.00, 0.00,   0.50, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.34, 0.00, 0.00,  0.25, 0.00, 0.17, 0.00,   0.00, 0.50, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.34, 0.00,  0.00, 0.17, 0.00, 0.25,   0.00, 0.00, 0.50, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.50,  0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.50,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.25, 0.00, 0.00, 0.00,   0.00, 0.17, 0.00, 0.00,   0.75, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.16, 0.00, 0.00,   0.25, 0.00, 0.17, 0.00,   0.00, 0.50, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.16, 0.00,   0.00, 0.17, 0.00, 0.25,   0.00, 0.00, 0.50, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.25,   0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.75 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.25, 0.00, 0.00, 0.00,   0.00, 0.25, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.16, 0.00, 0.00,   0.25, 0.00, 0.25, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.16, 0.00,   0.00, 0.25, 0.00, 0.25 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.25,   0.00, 0.00, 0.25, 0.00 ],\n",
+        "])\n",
+        "\n",
+        "transition_probabilities_given_action2 = np.array(\\\n",
+        "[[0.00 , 0.25, 0.00, 0.00,  0.25, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.75 , 0.00, 0.25, 0.00,  0.00, 0.17, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.50, 0.00, 0.50,  0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.50, 0.00,  0.00, 0.00, 0.00, 0.33,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.25 , 0.00, 0.00, 0.00,  0.00, 0.17, 0.00, 0.00,   0.25, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.25, 0.00, 0.00,  0.50, 0.00, 0.17, 0.00,   0.00, 0.17, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.25, 0.00,  0.00, 0.50, 0.00, 0.33,   0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.50,  0.00, 0.00, 0.50, 0.00,   0.00, 0.00, 0.00, 0.33,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.25, 0.00, 0.00, 0.00,   0.00, 0.17, 0.00, 0.00,   0.25, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.16, 0.00, 0.00,   0.50, 0.00, 0.17, 0.00,   0.00, 0.25, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.16, 0.00,   0.00, 0.50, 0.00, 0.33,   0.00, 0.00, 0.25, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.34,   0.00, 0.00, 0.50, 0.00,   0.00, 0.00, 0.00, 0.50 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.25, 0.00, 0.00, 0.00,   0.00, 0.25, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.16, 0.00, 0.00,   0.75, 0.00, 0.25, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.16, 0.00,   0.00, 0.50, 0.00, 0.50 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.34,   0.00, 0.00, 0.50, 0.00 ],\n",
+        "])\n",
+        "\n",
+        "transition_probabilities_given_action3 = np.array(\\\n",
+        "[[0.00 , 0.25, 0.00, 0.00,  0.25, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.25 , 0.00, 0.25, 0.00,  0.00, 0.17, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.25, 0.00, 0.25,  0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.25, 0.00,  0.00, 0.00, 0.00, 0.25,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.75 , 0.00, 0.00, 0.00,  0.00, 0.17, 0.00, 0.00,   0.25, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.50, 0.00, 0.00,  0.25, 0.00, 0.17, 0.00,   0.00, 0.17, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.50, 0.00,  0.00, 0.16, 0.00, 0.25,   0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.75,  0.00, 0.00, 0.16, 0.00,   0.00, 0.00, 0.00, 0.25,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.50, 0.00, 0.00, 0.00,   0.00, 0.17, 0.00, 0.00,   0.50, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.50, 0.00, 0.00,   0.25, 0.00, 0.17, 0.00,   0.00, 0.33, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.50, 0.00,   0.00, 0.16, 0.00, 0.25,   0.00, 0.00, 0.33, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.50,   0.00, 0.00, 0.16, 0.00,   0.00, 0.00, 0.00, 0.50 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.50, 0.00, 0.00, 0.00,   0.00, 0.33, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.50, 0.00, 0.00,   0.50, 0.00, 0.33, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.50, 0.00,   0.00, 0.34, 0.00, 0.50 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.50,   0.00, 0.00, 0.34, 0.00 ],\n",
+        "])\n",
+        "\n",
+        "transition_probabilities_given_action4 = np.array(\\\n",
+        "[[0.00 , 0.25, 0.00, 0.00,  0.33, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.50 , 0.00, 0.25, 0.00,  0.00, 0.17, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.50, 0.00, 0.75,  0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.50, 0.00,  0.00, 0.00, 0.00, 0.25,   0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.50 , 0.00, 0.00, 0.00,  0.00, 0.50, 0.00, 0.00,   0.33, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.25, 0.00, 0.00,  0.33, 0.00, 0.50, 0.00,   0.00, 0.17, 0.00, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.25, 0.00,  0.00, 0.17, 0.00, 0.50,   0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.25,  0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.25,   0.00, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.34, 0.00, 0.00, 0.00,   0.00, 0.50, 0.00, 0.00,   0.50, 0.00, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.16, 0.00, 0.00,   0.33, 0.00, 0.50, 0.00,   0.00, 0.25, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.16, 0.00,   0.00, 0.17, 0.00, 0.50,   0.00, 0.00, 0.25, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.25,   0.00, 0.00, 0.17, 0.00,   0.00, 0.00, 0.00, 0.25 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.34, 0.00, 0.00, 0.00,   0.00, 0.50, 0.00, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.16, 0.00, 0.00,   0.50, 0.00, 0.50, 0.00 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.16, 0.00,   0.00, 0.25, 0.00, 0.75 ],\n",
+        " [0.00 , 0.00, 0.00, 0.00,  0.00, 0.00, 0.00, 0.00,   0.00, 0.00, 0.00, 0.25,   0.00, 0.00, 0.25, 0.00 ],\n",
+        "])\n",
+        "\n",
+        "# Store all of these in a three dimension array\n",
+        "# Pr(s_{t+1}=2|s_{t}=1, a_{t}=3] is stored at position [2,1,3]\n",
+        "transition_probabilities_given_action = np.concatenate((np.expand_dims(transition_probabilities_given_action1,2),\n",
+        "                                                        np.expand_dims(transition_probabilities_given_action2,2),\n",
+        "                                                        np.expand_dims(transition_probabilities_given_action3,2),\n",
+        "                                                        np.expand_dims(transition_probabilities_given_action4,2)),axis=2)"
+      ],
+      "metadata": {
+        "id": "l7rT78BbOgTi"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Now we need a policy.  Let's start with the deterministic policy in figure 19.5a:\n",
+        "policy = [2,2,1,1, 2,1,1,1, 1,1,0,2, 1,0,1,1]\n",
+        "\n",
+        "# Let's draw the policy first\n",
+        "layout = np.zeros(n_rows * n_cols)\n",
+        "layout[15] = 2\n",
+        "mdp_drawer = DrawMDP(n_rows, n_cols)\n",
+        "mdp_drawer.draw(layout, state = states[0], policy = policy, draw_state_index = True)"
+      ],
+      "metadata": {
+        "id": "8jWhDlkaKj7Q"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def markov_decision_process_step_deterministic(state, transition_probabilities_given_action, reward_structure, policy):\n",
+        "  # TODO -- complete this function.\n",
+        "  # For each state, theres is a corresponding action.\n",
+        "  # Draw the next state based on the current state and that action\n",
+        "  # and calculate the reward\n",
+        "  # Replace this line:\n",
+        "  new_state = 0; reward = 0;\n",
+        "\n",
+        "  return new_state, reward\n"
+      ],
+      "metadata": {
+        "id": "dueNbS2SUVUK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set up the reward structure so it matches figure 19.2\n",
+        "reward_structure = np.zeros((16,1))\n",
+        "reward_structure[15] = 1\n",
+        "\n",
+        "# Initialize random number seed\n",
+        "np.random.seed(3)\n",
+        "T = 10\n",
+        "# Set up the states, so the fish are in the same positions as figure 19.5\n",
+        "states = np.zeros(T, dtype='uint8')\n",
+        "rewards = np.zeros(T, dtype='uint8')\n",
+        "\n",
+        "states[0] = 0\n",
+        "for t in range(T-1):\n",
+        "  states[t+1],rewards[t+1] = markov_decision_process_step_deterministic(states[t], transition_probabilities_given_action, reward_structure, policy)\n",
+        "\n",
+        "print(\"Your States:\", states)\n",
+        "print(\"True States: [ 0  4  8  9 13 14 15 11  7  3]\")\n",
+        "print(\"Your Rewards:\", rewards)\n",
+        "print(\"True Rewards: [0 0 0 0 0 0 1 0 0 0]\")\n",
+        "\n",
+        "mdp_drawer = DrawMDP(n_rows, n_cols)\n",
+        "mdp_drawer.draw(layout, state = states[0], path1=states, policy = policy, draw_state_index = True)"
+      ],
+      "metadata": {
+        "id": "4Du5aUfd2Lci"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "You can see that the Penguin usually follows the policy, (heads in the direction of the cyan arrows (when it can).  But sometimes, the penguin \"slips\" to a different neighboring state\n",
+        "\n",
+        "Now let's investigate a stochastic policy"
+      ],
+      "metadata": {
+        "id": "bLEd8xug33b-"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "np.random.seed(0)\n",
+        "# Let's now choose a random policy.  We'll generate a set of random numbers and pass\n",
+        "# them through a softmax function\n",
+        "stochastic_policy = np.random.normal(size=(4,n_rows*n_cols))\n",
+        "stochastic_policy = np.exp(stochastic_policy) / (np.ones((4,1))@ np.expand_dims(np.sum(np.exp(stochastic_policy), axis=0),0))\n",
+        "np.set_printoptions(precision=2)\n",
+        "print(stochastic_policy)\n",
+        "\n",
+        "# Let's draw the policy first\n",
+        "layout = np.zeros(n_rows * n_cols)\n",
+        "layout[15] = 2\n",
+        "mdp_drawer = DrawMDP(n_rows, n_cols)\n",
+        "mdp_drawer.draw(layout, state = states[0], path1=states, policy = stochastic_policy, draw_state_index = True)"
+      ],
+      "metadata": {
+        "id": "o7T0b3tyilDc"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def markov_decision_process_step_stochastic(state, transition_probabilities_given_action, reward_structure, stochastic_policy):\n",
+        "  # TODO -- complete this function.\n",
+        "  # For each state, theres is a corresponding distribution over actions\n",
+        "  # Draw a sample from that distribution to get the action\n",
+        "  # Draw the next state based on the current state and that action\n",
+        "  # and calculate the reward\n",
+        "  # Replace this line:\n",
+        "  new_state = 0; reward = 0;action = 0\n",
+        "\n",
+        "\n",
+        "\n",
+        "  return new_state, reward, action"
+      ],
+      "metadata": {
+        "id": "T68mTZSe6A3w"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set up the reward structure so it matches figure 19.2\n",
+        "reward_structure = np.zeros((16,1))\n",
+        "reward_structure[15] = 1\n",
+        "\n",
+        "# Initialize random number seed\n",
+        "np.random.seed(0)\n",
+        "T = 10\n",
+        "# Set up the states, so the fish are in the same positions as figure 19.5\n",
+        "states = np.zeros(T, dtype='uint8')\n",
+        "rewards = np.zeros(T, dtype='uint8')\n",
+        "actions = np.zeros(T-1, dtype='uint8')\n",
+        "\n",
+        "states[0] = 0\n",
+        "for t in range(T-1):\n",
+        "  states[t+1],rewards[t+1],actions[t] = markov_decision_process_step_stochastic(states[t], transition_probabilities_given_action, reward_structure, stochastic_policy)\n",
+        "\n",
+        "print(\"Actions\", actions)\n",
+        "print(\"Your States:\", states)\n",
+        "print(\"Your Rewards:\", rewards)\n",
+        "\n",
+        "mdp_drawer = DrawMDP(n_rows, n_cols)\n",
+        "mdp_drawer.draw(layout, state = states[0], path1=states, policy = stochastic_policy, draw_state_index = True)"
+      ],
+      "metadata": {
+        "id": "hMRVYX2HtqMg"
+      },
+      "execution_count": null,
+      "outputs": []
     }
   ]
 }
\ No newline at end of file