Add files via upload

2024-01-02 13:09:22 -05:00
parent db836826f6
commit c19e2411c5
3 changed files with 471 additions and 442 deletions
--- a/Notebooks/Chap17/17_3_Importance_Sampling.ipynb
+++ b/Notebooks/Chap17/17_3_Importance_Sampling.ipynb
@@ -1,33 +1,22 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "authorship_tag": "ABX9TyNecz9/CDOggPSmy1LjT/Dv",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
  "cells": [
    {
+      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
+        "colab_type": "text",
+        "id": "view-in-github"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap17/17_3_Importance_Sampling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "t9vk9Elugvmi"
+      },
      "source": [
        "# **Notebook 17.3: Importance sampling**\n",
        "\n",
@@ -36,25 +25,26 @@
        "Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and make predictions about what is going to happen or write code to complete the functions.\n",
        "\n",
        "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions."
-      ],
-      "metadata": {
-        "id": "t9vk9Elugvmi"
-      }
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "import numpy as np\n",
-        "import matplotlib.pyplot as plt"
-      ],
+      "execution_count": null,
      "metadata": {
        "id": "OLComQyvCIJ7"
      },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt"
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "f7a6xqKjkmvT"
+      },
      "source": [
        "Let's approximate the expectation\n",
        "\n",
@@ -65,7 +55,7 @@
        "where\n",
        "\n",
        "\\begin{equation}\n",
-        "Pr(y)=\\mbox{Norm}_y[0,1]\n",
+        "Pr(y)=\\text{Norm}_y[0,1]\n",
        "\\end{equation}\n",
        "\n",
        "by drawing $I$ samples $y_i$ and using the formula:\n",
@@ -73,13 +63,15 @@
        "\\begin{equation}\n",
        "\\mathbb{E}_{y}\\Bigl[\\exp\\bigl[- (y-1)^4\\bigr]\\Bigr] \\approx \\frac{1}{I} \\sum_{i=1}^I \\exp\\bigl[-(y-1)^4 \\bigr]\n",
        "\\end{equation}"
-      ],
-      "metadata": {
-        "id": "f7a6xqKjkmvT"
-      }
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VjkzRr8o2ksg"
+      },
+      "outputs": [],
      "source": [
        "def f(y):\n",
        "  return np.exp(-(y-1) *(y-1) *(y-1) * (y-1))\n",
@@ -95,15 +87,15 @@
        "ax.set_xlabel(\"$y$\")\n",
        "ax.legend()\n",
        "plt.show()"
-      ],
-      "metadata": {
-        "id": "VjkzRr8o2ksg"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LGAKHjUJnWmy"
+      },
+      "outputs": [],
      "source": [
        "def compute_expectation(n_samples):\n",
        "  # TODO -- compute this expectation\n",
@@ -114,15 +106,15 @@
        "\n",
        "\n",
        "  return expectation"
-      ],
-      "metadata": {
-        "id": "LGAKHjUJnWmy"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nmvixMqgodIP"
+      },
+      "outputs": [],
      "source": [
        "# Set the seed so the random numbers are all the same\n",
        "np.random.seed(0)\n",
@@ -131,26 +123,27 @@
        "n_samples = 100000000\n",
        "expected_f= compute_expectation(n_samples)\n",
        "print(\"Your value: \", expected_f, \", True value:  0.43160702267383166\")"
-      ],
-      "metadata": {
-        "id": "nmvixMqgodIP"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "Jr4UPcqmnXCS"
+      },
      "source": [
        "Let's investigate how the variance of this approximation decreases as we increase the number of samples $N$.\n",
        "\n",
        "\n"
-      ],
-      "metadata": {
-        "id": "Jr4UPcqmnXCS"
-      }
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yrDp1ILUo08j"
+      },
+      "outputs": [],
      "source": [
        "def compute_mean_variance(n_sample):\n",
        "  n_estimate = 10000\n",
@@ -158,15 +151,15 @@
        "  for i in range(n_estimate):\n",
        "    estimates[i] = compute_expectation(n_sample.astype(int))\n",
        "  return np.mean(estimates), np.var(estimates)"
-      ],
-      "metadata": {
-        "id": "yrDp1ILUo08j"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BcUVsodtqdey"
+      },
+      "outputs": [],
      "source": [
        "# Compute the mean and variance for 1,2,... 20 samples\n",
        "n_sample_all = np.array([1.,2,3,4,5,6,7,8,9,10,15,20,25,30,45,50,60,70,80,90,100,150,200,250,300,350,400,450,500])\n",
@@ -175,15 +168,15 @@
        "for i in range(len(n_sample_all)):\n",
        "  print(\"Computing mean and variance for expectation with %d samples\"%(n_sample_all[i]))\n",
        "  mean_all[i],variance_all[i] = compute_mean_variance(n_sample_all[i])"
-      ],
-      "metadata": {
-        "id": "BcUVsodtqdey"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "feXmyk0krpUi"
+      },
+      "outputs": [],
      "source": [
        "fig,ax = plt.subplots()\n",
        "ax.semilogx(n_sample_all, mean_all,'r-',label='mean estimate')\n",
@@ -193,24 +186,24 @@
        "ax.plot([0,500],[0.43160702267383166,0.43160702267383166],'k--',label='true value')\n",
        "ax.legend()\n",
        "plt.show()\n"
-      ],
-      "metadata": {
-        "id": "feXmyk0krpUi"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
-      "source": [
-        "As you might expect, the more samples that we use to compute the approximate estimate, the lower the variance of the estimate."
-      ],
      "metadata": {
        "id": "XTUpxFlSuOl7"
-      }
+      },
+      "source": [
+        "As you might expect, the more samples that we use to compute the approximate estimate, the lower the variance of the estimate."
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "6hxsl3Pxo1TT"
+      },
      "source": [
        " Now consider the function\n",
        " \\begin{equation}\n",
@@ -218,13 +211,15 @@
        " \\end{equation}\n",
        "\n",
        "which decreases rapidly as we move away from the position $y=3$."
-      ],
-      "metadata": {
-        "id": "6hxsl3Pxo1TT"
-      }
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "znydVPW7sL4P"
+      },
+      "outputs": [],
      "source": [
        "def f2(y):\n",
        "  return 20.446*np.exp(- (y-3) *(y-3) *(y-3) * (y-3))\n",
@@ -236,46 +231,47 @@
        "ax.set_xlabel(\"$y$\")\n",
        "ax.legend()\n",
        "plt.show()"
-      ],
-      "metadata": {
-        "id": "znydVPW7sL4P"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "G9Xxo0OJsIqD"
+      },
      "source": [
        "Let's again, compute the expectation:\n",
        "\n",
-        "\\begin{eqnarray}\n",
-        "\\mathbb{E}_{y}\\left[\\mbox{f}[y]\\right] &=& \\int \\mbox{f}[y] Pr(y) dy\\\\\n",
-        "&\\approx& \\frac{1}{I} \\mbox{f}[y]\n",
-        "\\end{eqnarray}\n",
+        "\\begin{align}\n",
+        "\\mathbb{E}_{y}\\left[\\text{f}[y]\\right] &=& \\int \\text{f}[y] Pr(y) dy\\\\\n",
+        "&\\approx& \\frac{1}{I} \\text{f}[y]\n",
+        "\\end{align}\n",
        "\n",
-        "where $Pr(y)=\\mbox{Norm}_y[0,1]$ by approximating with samples $y_{i}$.\n"
-      ],
-      "metadata": {
-        "id": "G9Xxo0OJsIqD"
-      }
+        "where $Pr(y)=\\text{Norm}_y[0,1]$ by approximating with samples $y_{i}$.\n"
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "l8ZtmkA2vH4y"
+      },
+      "outputs": [],
      "source": [
        "def compute_expectation2(n_samples):\n",
        "  y = np.random.normal(size=(n_samples,1))\n",
        "  expectation = np.mean(f2(y))\n",
        "\n",
        "  return expectation"
-      ],
-      "metadata": {
-        "id": "l8ZtmkA2vH4y"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dfUQyJ-svZ6F"
+      },
+      "outputs": [],
      "source": [
        "# Set the seed so the random numbers are all the same\n",
        "np.random.seed(0)\n",
@@ -284,26 +280,27 @@
        "n_samples = 100000000\n",
        "expected_f2= compute_expectation2(n_samples)\n",
        "print(\"Expected value: \", expected_f2)"
-      ],
-      "metadata": {
-        "id": "dfUQyJ-svZ6F"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "2sVDqP0BvxqM"
+      },
      "source": [
        "I deliberately chose this function, because it's expectation is roughly the same as for the previous function.\n",
        "\n",
        "Again, let's look at the mean and the  variance of the estimates"
-      ],
-      "metadata": {
-        "id": "2sVDqP0BvxqM"
-      }
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mHnILRkOv0Ir"
+      },
+      "outputs": [],
      "source": [
        "def compute_mean_variance2(n_sample):\n",
        "  n_estimate = 10000\n",
@@ -318,15 +315,15 @@
        "for i in range(len(n_sample_all)):\n",
        "  print(\"Computing variance for expectation with %d samples\"%(n_sample_all[i]))\n",
        "  mean_all2[i], variance_all2[i] = compute_mean_variance2(n_sample_all[i])"
-      ],
-      "metadata": {
-        "id": "mHnILRkOv0Ir"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FkCX-hxxAnsw"
+      },
+      "outputs": [],
      "source": [
        "fig,ax1 = plt.subplots()\n",
        "ax1.semilogx(n_sample_all, mean_all,'r-',label='mean estimate')\n",
@@ -348,39 +345,41 @@
        "ax2.set_title(\"Second function\")\n",
        "ax2.legend()\n",
        "plt.show()"
-      ],
-      "metadata": {
-        "id": "FkCX-hxxAnsw"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "EtBP6NeLwZqz"
+      },
      "source": [
        "You can see that the variance of the estimate of the second function is considerably worse than the estimate of the variance of estimate of the first function\n",
        "\n",
        "TODO:  Think about why this is."
-      ],
-      "metadata": {
-        "id": "EtBP6NeLwZqz"
-      }
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "_wuF-NoQu1--"
+      },
      "source": [
        " Now let's repeat this experiment with the second function, but this time use importance sampling with auxiliary distribution:\n",
        "\n",
        " \\begin{equation}\n",
-        "   q(y)=\\mbox{Norm}_{y}[3,1]\n",
+        "   q(y)=\\text{Norm}_{y}[3,1]\n",
        " \\end{equation}\n"
-      ],
-      "metadata": {
-        "id": "_wuF-NoQu1--"
-      }
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jPm0AVYVIDnn"
+      },
+      "outputs": [],
      "source": [
        "def q_y(y):\n",
        "  return (1/np.sqrt(2*np.pi)) * np.exp(-0.5 * (y-3) * (y-3))\n",
@@ -395,15 +394,15 @@
        "  expectation = 0\n",
        "\n",
        "  return expectation"
-      ],
-      "metadata": {
-        "id": "jPm0AVYVIDnn"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "No2ByVvOM2yQ"
+      },
+      "outputs": [],
      "source": [
        "# Set the seed so the random numbers are all the same\n",
        "np.random.seed(0)\n",
@@ -412,15 +411,15 @@
        "n_samples = 100000000\n",
        "expected_f2= compute_expectation2b(n_samples)\n",
        "print(\"Your value: \", expected_f2,\", True value:  0.43163734204459125 \")"
-      ],
-      "metadata": {
-        "id": "No2ByVvOM2yQ"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6v8Jc7z4M3Mk"
+      },
+      "outputs": [],
      "source": [
        "def compute_mean_variance2b(n_sample):\n",
        "  n_estimate = 10000\n",
@@ -435,15 +434,15 @@
        "for i in range(len(n_sample_all)):\n",
        "  print(\"Computing variance for expectation with %d samples\"%(n_sample_all[i]))\n",
        "  mean_all2b[i], variance_all2b[i] = compute_mean_variance2b(n_sample_all[i])"
-      ],
-      "metadata": {
-        "id": "6v8Jc7z4M3Mk"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "C0beD4sNNM3L"
+      },
+      "outputs": [],
      "source": [
        "fig,ax1 = plt.subplots()\n",
        "ax1.semilogx(n_sample_all, mean_all,'r-',label='mean estimate')\n",
@@ -476,21 +475,33 @@
        "ax2.set_title(\"Second function with importance sampling\")\n",
        "ax2.legend()\n",
        "plt.show()"
-      ],
-      "metadata": {
-        "id": "C0beD4sNNM3L"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
-      "source": [
-        "You can see that the importance sampling technique has reduced the amount of variance for any given number of samples."
-      ],
      "metadata": {
        "id": "y8rgge9MNiOc"
-      }
+      },
+      "source": [
+        "You can see that the importance sampling technique has reduced the amount of variance for any given number of samples."
+      ]
    }
-  ]
-}
+  ],
+  "metadata": {
+    "colab": {
+      "authorship_tag": "ABX9TyNecz9/CDOggPSmy1LjT/Dv",
+      "include_colab_link": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}