Add files via upload

2024-01-02 12:23:29 -05:00
parent 9409fbb447
commit 351199ec7e
2 changed files with 313 additions and 297 deletions
--- a/Notebooks/Chap06/6_2_Gradient_Descent.ipynb
+++ b/Notebooks/Chap06/6_2_Gradient_Descent.ipynb
@@ -1,32 +1,22 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
  "cells": [
    {
+      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
+        "colab_type": "text",
+        "id": "view-in-github"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap06/6_2_Gradient_Descent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "el8l05WQEO46"
+      },
      "source": [
        "# **Notebook 6.2 Gradient descent**\n",
        "\n",
@@ -36,10 +26,7 @@
        "\n",
        "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n",
        "\n"
-      ],
-      "metadata": {
-        "id": "el8l05WQEO46"
-      }
+      ]
    },
    {
      "cell_type": "code",
@@ -58,34 +45,39 @@
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4cRkrh9MZ58Z"
+      },
+      "outputs": [],
      "source": [
        "# Let's create our training data 12 pairs {x_i, y_i}\n",
        "# We'll try to fit the straight line model to these data\n",
        "data = np.array([[0.03,0.19,0.34,0.46,0.78,0.81,1.08,1.18,1.39,1.60,1.65,1.90],\n",
        "                 [0.67,0.85,1.05,1.00,1.40,1.50,1.30,1.54,1.55,1.68,1.73,1.60]])"
-      ],
-      "metadata": {
-        "id": "4cRkrh9MZ58Z"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WQUERmb2erAe"
+      },
+      "outputs": [],
      "source": [
        "# Let's define our model -- just a straight line with intercept phi[0] and slope phi[1]\n",
        "def model(phi,x):\n",
        "  y_pred = phi[0]+phi[1] * x\n",
        "  return y_pred"
-      ],
-      "metadata": {
-        "id": "WQUERmb2erAe"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qFRe9POHF2le"
+      },
+      "outputs": [],
      "source": [
        "# Draw model\n",
        "def draw_model(data,model,phi,title=None):\n",
@@ -101,39 +93,40 @@
        "  if title is not None:\n",
        "    ax.set_title(title)\n",
        "  plt.show()"
-      ],
-      "metadata": {
-        "id": "qFRe9POHF2le"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TXx1Tpd1Tl-I"
+      },
+      "outputs": [],
      "source": [
        "# Initialize the parameters to some arbitrary values and draw the model\n",
        "phi = np.zeros((2,1))\n",
        "phi[0] = 0.6      # Intercept\n",
        "phi[1] = -0.2      # Slope\n",
        "draw_model(data,model,phi, \"Initial parameters\")\n"
-      ],
-      "metadata": {
-        "id": "TXx1Tpd1Tl-I"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
-      "source": [
-        "Now lets create compute the sum of squares loss for the training data"
-      ],
      "metadata": {
        "id": "QU5mdGvpTtEG"
-      }
+      },
+      "source": [
+        "Now lets create compute the sum of squares loss for the training data"
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "I7dqTY2Gg7CR"
+      },
+      "outputs": [],
      "source": [
        "def compute_loss(data_x, data_y, model, phi):\n",
        "  # TODO -- Write this function -- replace the line below\n",
@@ -144,45 +137,47 @@
        "  loss = 0\n",
        "\n",
        "  return loss"
-      ],
-      "metadata": {
-        "id": "I7dqTY2Gg7CR"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
-      "source": [
-        "Let's just test that we got that right"
-      ],
      "metadata": {
        "id": "eB5DQvU5hYNx"
-      }
+      },
+      "source": [
+        "Let's just test that we got that right"
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n",
-        "print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 12.367))"
-      ],
+      "execution_count": null,
      "metadata": {
        "id": "Ty05UtEEg9tc"
      },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n",
+        "print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 12.367))"
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
-      "source": [
-        "Now let's plot the whole loss function"
-      ],
      "metadata": {
        "id": "F3trnavPiHpH"
-      }
+      },
+      "source": [
+        "Now let's plot the whole loss function"
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "K-NTHpAAHlCl"
+      },
+      "outputs": [],
      "source": [
        "def draw_loss_function(compute_loss, data,  model, phi_iters = None):\n",
        "  # Define pretty colormap\n",
@@ -209,39 +204,40 @@
        "  ax.set_ylim([1,-1])\n",
        "  ax.set_xlabel('Intercept $\\phi_{0}$'); ax.set_ylabel('Slope, $\\phi_{1}$')\n",
        "  plt.show()"
-      ],
-      "metadata": {
-        "id": "K-NTHpAAHlCl"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "draw_loss_function(compute_loss, data, model)"
-      ],
+      "execution_count": null,
      "metadata": {
        "id": "l8HbvIupnTME"
      },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "draw_loss_function(compute_loss, data, model)"
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "s9Duf05WqqSC"
+      },
      "source": [
        "Now let's compute the gradient vector for a given set of parameters:\n",
        "\n",
        "\\begin{equation}\n",
        "\\frac{\\partial L}{\\partial \\boldsymbol\\phi} = \\begin{bmatrix}\\frac{\\partial L}{\\partial \\phi_0} \\\\\\frac{\\partial L}{\\partial \\phi_1} \\end{bmatrix}.\n",
        "\\end{equation}"
-      ],
-      "metadata": {
-        "id": "s9Duf05WqqSC"
-      }
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UpswmkL2qwBT"
+      },
+      "outputs": [],
      "source": [
        "# These are in the lecture slides and notes, but worth trying to calculate them yourself to\n",
        "# check that you get them right.  Write out the expression for the sum of squares loss and take the\n",
@@ -253,31 +249,32 @@
        "\n",
        "    # Return the gradient\n",
        "    return np.array([[dl_dphi0],[dl_dphi1]])"
-      ],
-      "metadata": {
-        "id": "UpswmkL2qwBT"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
+      "metadata": {
+        "id": "RS1nEcYVuEAM"
+      },
      "source": [
        "We can check we got this right using a trick known as **finite differences**.  If we evaluate the function and then change one of the parameters by a very small amount and normalize by that amount, we get an approximation to the gradient, so:\n",
        "\n",
-        "\\begin{eqnarray}\n",
+        "\\begin{align}\n",
        "\\frac{\\partial L}{\\partial \\phi_{0}}&\\approx & \\frac{L[\\phi_0+\\delta, \\phi_1]-L[\\phi_0, \\phi_1]}{\\delta}\\\\\n",
        "\\frac{\\partial L}{\\partial \\phi_{1}}&\\approx & \\frac{L[\\phi_0, \\phi_1+\\delta]-L[\\phi_0, \\phi_1]}{\\delta}\n",
-        "\\end{eqnarray}\n",
+        "\\end{align}\n",
        "\n",
        "We can't do this when there are many parameters;  for a million parameters, we would have to evaluate the loss function two million times, and usually computing the gradients directly is much more efficient."
-      ],
-      "metadata": {
-        "id": "RS1nEcYVuEAM"
-      }
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QuwAHN7yt-gi"
+      },
+      "outputs": [],
      "source": [
        "# Compute the gradient using your function\n",
        "gradient = compute_gradient(data[0,:],data[1,:], phi)\n",
@@ -290,24 +287,25 @@
        "                    compute_loss(data[0,:],data[1,:],model,phi))/delta\n",
        "print(\"Approx gradients: (%3.3f,%3.3f)\"%(dl_dphi0_est,dl_dphi1_est))\n",
        "# There might be small differences in the last significant figure because finite gradients is an approximation\n"
-      ],
-      "metadata": {
-        "id": "QuwAHN7yt-gi"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
+      "attachments": {},
      "cell_type": "markdown",
-      "source": [
-        "Now we are ready to perform gradient descent.  We'll need to use our line search routine from notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that maps the search along the negative gradient direction in 2D space to a 1D problem (distance along this direction)"
-      ],
      "metadata": {
        "id": "5EIjMM9Fw2eT"
-      }
+      },
+      "source": [
+        "Now we are ready to perform gradient descent.  We'll need to use our line search routine from notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that maps the search along the negative gradient direction in 2D space to a 1D problem (distance along this direction)"
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "XrJ2gQjfw1XP"
+      },
+      "outputs": [],
      "source": [
        "def loss_function_1D(dist_prop, data, model, phi_start, search_direction):\n",
        "  # Return the loss after moving this far\n",
@@ -362,15 +360,15 @@
        "\n",
        "    # Return average of two middle points\n",
        "    return (b+c)/2.0"
-      ],
-      "metadata": {
-        "id": "XrJ2gQjfw1XP"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YVq6rmaWRD2M"
+      },
+      "outputs": [],
      "source": [
        "def gradient_descent_step(phi, data,  model):\n",
        "  # TODO -- update Phi with the gradient descent step (equation 6.3)\n",
@@ -379,15 +377,15 @@
        "  # 3. Update the parameters phi based on the gradient and the step size alpha.\n",
        "\n",
        "  return phi"
-      ],
-      "metadata": {
-        "id": "YVq6rmaWRD2M"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tOLd0gtdRLLS"
+      },
+      "outputs": [],
      "source": [
        "# Initialize the parameters and draw the model\n",
        "n_steps = 10\n",
@@ -409,12 +407,22 @@
        "\n",
        "# Draw the trajectory on the loss function\n",
        "draw_loss_function(compute_loss, data, model,phi_all)\n"
-      ],
-      "metadata": {
-        "id": "tOLd0gtdRLLS"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    }
-  ]
+  ],
+  "metadata": {
+    "colab": {
+      "include_colab_link": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
 }