diff --git a/Notebooks/Chap06/6_2_Gradient_Descent.ipynb b/Notebooks/Chap06/6_2_Gradient_Descent.ipynb index 7082444..fb8cd60 100644 --- a/Notebooks/Chap06/6_2_Gradient_Descent.ipynb +++ b/Notebooks/Chap06/6_2_Gradient_Descent.ipynb @@ -1,32 +1,22 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "el8l05WQEO46" + }, "source": [ "# **Notebook 6.2 Gradient descent**\n", "\n", @@ -36,10 +26,7 @@ "\n", "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n", "\n" - ], - "metadata": { - "id": "el8l05WQEO46" - } + ] }, { "cell_type": "code", @@ -58,34 +45,39 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4cRkrh9MZ58Z" + }, + "outputs": [], "source": [ "# Let's create our training data 12 pairs {x_i, y_i}\n", "# We'll try to fit the straight line model to these data\n", "data = np.array([[0.03,0.19,0.34,0.46,0.78,0.81,1.08,1.18,1.39,1.60,1.65,1.90],\n", " [0.67,0.85,1.05,1.00,1.40,1.50,1.30,1.54,1.55,1.68,1.73,1.60]])" - ], - "metadata": { - "id": "4cRkrh9MZ58Z" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WQUERmb2erAe" + }, + "outputs": [], "source": [ "# Let's define our model -- just a straight line with intercept phi[0] and slope phi[1]\n", "def model(phi,x):\n", " y_pred = phi[0]+phi[1] * x\n", " return y_pred" - ], - "metadata": { - "id": "WQUERmb2erAe" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qFRe9POHF2le" + }, + "outputs": [], "source": [ "# Draw model\n", "def draw_model(data,model,phi,title=None):\n", @@ -101,39 +93,40 @@ " if title is not None:\n", " ax.set_title(title)\n", " plt.show()" - ], - "metadata": { - "id": "qFRe9POHF2le" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TXx1Tpd1Tl-I" + }, + "outputs": [], "source": [ "# Initialize the parameters to some arbitrary values and draw the model\n", "phi = np.zeros((2,1))\n", "phi[0] = 0.6 # Intercept\n", "phi[1] = -0.2 # Slope\n", "draw_model(data,model,phi, \"Initial parameters\")\n" - ], - "metadata": { - "id": "TXx1Tpd1Tl-I" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "Now lets create compute the sum of squares loss for the training data" - ], "metadata": { "id": "QU5mdGvpTtEG" - } + }, + "source": [ + "Now lets create compute the sum of squares loss for the training data" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I7dqTY2Gg7CR" + }, + "outputs": [], "source": [ "def compute_loss(data_x, data_y, model, phi):\n", " # TODO -- Write this function -- replace the line below\n", @@ -144,45 +137,47 @@ " loss = 0\n", "\n", " return loss" - ], - "metadata": { - "id": "I7dqTY2Gg7CR" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "Let's just test that we got that right" - ], "metadata": { "id": "eB5DQvU5hYNx" - } + }, + "source": [ + "Let's just test that we got that right" + ] }, { "cell_type": "code", - "source": [ - "loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n", - "print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 12.367))" - ], + "execution_count": null, "metadata": { "id": "Ty05UtEEg9tc" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n", + "print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 12.367))" + ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "Now let's plot the whole loss function" - ], "metadata": { "id": "F3trnavPiHpH" - } + }, + "source": [ + "Now let's plot the whole loss function" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K-NTHpAAHlCl" + }, + "outputs": [], "source": [ "def draw_loss_function(compute_loss, data, model, phi_iters = None):\n", " # Define pretty colormap\n", @@ -209,39 +204,40 @@ " ax.set_ylim([1,-1])\n", " ax.set_xlabel('Intercept $\\phi_{0}$'); ax.set_ylabel('Slope, $\\phi_{1}$')\n", " plt.show()" - ], - "metadata": { - "id": "K-NTHpAAHlCl" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "draw_loss_function(compute_loss, data, model)" - ], + "execution_count": null, "metadata": { "id": "l8HbvIupnTME" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "draw_loss_function(compute_loss, data, model)" + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "s9Duf05WqqSC" + }, "source": [ "Now let's compute the gradient vector for a given set of parameters:\n", "\n", "\\begin{equation}\n", "\\frac{\\partial L}{\\partial \\boldsymbol\\phi} = \\begin{bmatrix}\\frac{\\partial L}{\\partial \\phi_0} \\\\\\frac{\\partial L}{\\partial \\phi_1} \\end{bmatrix}.\n", "\\end{equation}" - ], - "metadata": { - "id": "s9Duf05WqqSC" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UpswmkL2qwBT" + }, + "outputs": [], "source": [ "# These are in the lecture slides and notes, but worth trying to calculate them yourself to\n", "# check that you get them right. Write out the expression for the sum of squares loss and take the\n", @@ -253,31 +249,32 @@ "\n", " # Return the gradient\n", " return np.array([[dl_dphi0],[dl_dphi1]])" - ], - "metadata": { - "id": "UpswmkL2qwBT" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "RS1nEcYVuEAM" + }, "source": [ "We can check we got this right using a trick known as **finite differences**. If we evaluate the function and then change one of the parameters by a very small amount and normalize by that amount, we get an approximation to the gradient, so:\n", "\n", - "\\begin{eqnarray}\n", + "\\begin{align}\n", "\\frac{\\partial L}{\\partial \\phi_{0}}&\\approx & \\frac{L[\\phi_0+\\delta, \\phi_1]-L[\\phi_0, \\phi_1]}{\\delta}\\\\\n", "\\frac{\\partial L}{\\partial \\phi_{1}}&\\approx & \\frac{L[\\phi_0, \\phi_1+\\delta]-L[\\phi_0, \\phi_1]}{\\delta}\n", - "\\end{eqnarray}\n", + "\\end{align}\n", "\n", "We can't do this when there are many parameters; for a million parameters, we would have to evaluate the loss function two million times, and usually computing the gradients directly is much more efficient." - ], - "metadata": { - "id": "RS1nEcYVuEAM" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QuwAHN7yt-gi" + }, + "outputs": [], "source": [ "# Compute the gradient using your function\n", "gradient = compute_gradient(data[0,:],data[1,:], phi)\n", @@ -290,24 +287,25 @@ " compute_loss(data[0,:],data[1,:],model,phi))/delta\n", "print(\"Approx gradients: (%3.3f,%3.3f)\"%(dl_dphi0_est,dl_dphi1_est))\n", "# There might be small differences in the last significant figure because finite gradients is an approximation\n" - ], - "metadata": { - "id": "QuwAHN7yt-gi" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "Now we are ready to perform gradient descent. We'll need to use our line search routine from notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that maps the search along the negative gradient direction in 2D space to a 1D problem (distance along this direction)" - ], "metadata": { "id": "5EIjMM9Fw2eT" - } + }, + "source": [ + "Now we are ready to perform gradient descent. We'll need to use our line search routine from notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that maps the search along the negative gradient direction in 2D space to a 1D problem (distance along this direction)" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XrJ2gQjfw1XP" + }, + "outputs": [], "source": [ "def loss_function_1D(dist_prop, data, model, phi_start, search_direction):\n", " # Return the loss after moving this far\n", @@ -362,15 +360,15 @@ "\n", " # Return average of two middle points\n", " return (b+c)/2.0" - ], - "metadata": { - "id": "XrJ2gQjfw1XP" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YVq6rmaWRD2M" + }, + "outputs": [], "source": [ "def gradient_descent_step(phi, data, model):\n", " # TODO -- update Phi with the gradient descent step (equation 6.3)\n", @@ -379,15 +377,15 @@ " # 3. Update the parameters phi based on the gradient and the step size alpha.\n", "\n", " return phi" - ], - "metadata": { - "id": "YVq6rmaWRD2M" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tOLd0gtdRLLS" + }, + "outputs": [], "source": [ "# Initialize the parameters and draw the model\n", "n_steps = 10\n", @@ -409,12 +407,22 @@ "\n", "# Draw the trajectory on the loss function\n", "draw_loss_function(compute_loss, data, model,phi_all)\n" - ], - "metadata": { - "id": "tOLd0gtdRLLS" - }, - "execution_count": null, - "outputs": [] + ] } - ] + ], + "metadata": { + "colab": { + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/Notebooks/Chap06/6_3_Stochastic_Gradient_Descent.ipynb b/Notebooks/Chap06/6_3_Stochastic_Gradient_Descent.ipynb index e0ac0df..30ae779 100644 --- a/Notebooks/Chap06/6_3_Stochastic_Gradient_Descent.ipynb +++ b/Notebooks/Chap06/6_3_Stochastic_Gradient_Descent.ipynb @@ -1,33 +1,22 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyNk5FN4qlw3pk8BwDVWw1jN", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "el8l05WQEO46" + }, "source": [ "# **Notebook 6.3: Stochastic gradient descent**\n", "\n", @@ -39,10 +28,7 @@ "\n", "\n", "\n" - ], - "metadata": { - "id": "el8l05WQEO46" - } + ] }, { "cell_type": "code", @@ -61,6 +47,11 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4cRkrh9MZ58Z" + }, + "outputs": [], "source": [ "# Let's create our training data 30 pairs {x_i, y_i}\n", "# We'll try to fit the Gabor model to these data\n", @@ -74,15 +65,15 @@ " -2.365e-02,5.098e-01,-2.777e-01,3.367e-01,1.927e-01,-2.222e-01,\n", " 6.352e-02,6.888e-03,3.224e-02,1.091e-02,-5.706e-01,-5.258e-02,\n", " -3.666e-02,1.709e-01,-4.805e-02,2.008e-01,-1.904e-01,5.952e-01]])" - ], - "metadata": { - "id": "4cRkrh9MZ58Z" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WQUERmb2erAe" + }, + "outputs": [], "source": [ "# Let's define our model\n", "def model(phi,x):\n", @@ -90,15 +81,15 @@ " gauss_component = np.exp(-(phi[0] + 0.06 * phi[1] * x) * (phi[0] + 0.06 * phi[1] * x) / 32)\n", " y_pred= sin_component * gauss_component\n", " return y_pred" - ], - "metadata": { - "id": "WQUERmb2erAe" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qFRe9POHF2le" + }, + "outputs": [], "source": [ "# Draw model\n", "def draw_model(data,model,phi,title=None):\n", @@ -113,39 +104,40 @@ " if title is not None:\n", " ax.set_title(title)\n", " plt.show()" - ], - "metadata": { - "id": "qFRe9POHF2le" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TXx1Tpd1Tl-I" + }, + "outputs": [], "source": [ "# Initialize the parameters and draw the model\n", "phi = np.zeros((2,1))\n", "phi[0] = -5 # Horizontal offset\n", "phi[1] = 25 # Frequency\n", "draw_model(data,model,phi, \"Initial parameters\")\n" - ], - "metadata": { - "id": "TXx1Tpd1Tl-I" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "Now lets create compute the sum of squares loss for the training data" - ], "metadata": { "id": "QU5mdGvpTtEG" - } + }, + "source": [ + "Now lets create compute the sum of squares loss for the training data" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I7dqTY2Gg7CR" + }, + "outputs": [], "source": [ "def compute_loss(data_x, data_y, model, phi):\n", " # TODO -- Write this function -- replace the line below\n", @@ -155,45 +147,47 @@ " loss = 0\n", "\n", " return loss" - ], - "metadata": { - "id": "I7dqTY2Gg7CR" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "Let's just test that we got that right" - ], "metadata": { "id": "eB5DQvU5hYNx" - } + }, + "source": [ + "Let's just test that we got that right" + ] }, { "cell_type": "code", - "source": [ - "loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n", - "print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 16.419))" - ], + "execution_count": null, "metadata": { "id": "Ty05UtEEg9tc" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n", + "print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 16.419))" + ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "Now let's plot the whole loss function" - ], "metadata": { "id": "F3trnavPiHpH" - } + }, + "source": [ + "Now let's plot the whole loss function" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K-NTHpAAHlCl" + }, + "outputs": [], "source": [ "def draw_loss_function(compute_loss, data, model, phi_iters = None):\n", " # Define pretty colormap\n", @@ -220,39 +214,40 @@ " ax.set_ylim([2.5,22.5])\n", " ax.set_xlabel('Offset $\\phi_{0}$'); ax.set_ylabel('Frequency, $\\phi_{1}$')\n", " plt.show()" - ], - "metadata": { - "id": "K-NTHpAAHlCl" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "draw_loss_function(compute_loss, data, model)" - ], + "execution_count": null, "metadata": { "id": "l8HbvIupnTME" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "draw_loss_function(compute_loss, data, model)" + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "s9Duf05WqqSC" + }, "source": [ "Now let's compute the gradient vector for a given set of parameters:\n", "\n", "\\begin{equation}\n", "\\frac{\\partial L}{\\partial \\boldsymbol\\phi} = \\begin{bmatrix}\\frac{\\partial L}{\\partial \\phi_0} \\\\\\frac{\\partial L}{\\partial \\phi_1} \\end{bmatrix}.\n", "\\end{equation}" - ], - "metadata": { - "id": "s9Duf05WqqSC" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UpswmkL2qwBT" + }, + "outputs": [], "source": [ "# These came from writing out the expression for the sum of squares loss and taking the\n", "# derivative with respect to phi0 and phi1. It was a lot of hassle to get it right!\n", @@ -281,31 +276,32 @@ " dl_dphi1 = gabor_deriv_phi1(data_x, data_y, phi[0],phi[1])\n", " # Return the gradient\n", " return np.array([[dl_dphi0],[dl_dphi1]])" - ], - "metadata": { - "id": "UpswmkL2qwBT" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": { + "id": "RS1nEcYVuEAM" + }, "source": [ "We can check we got this right using a trick known as **finite differences**. If we evaluate the function and then change one of the parameters by a very small amount and normalize by that amount, we get an approximation to the gradient, so:\n", "\n", - "\\begin{eqnarray}\n", + "\\begin{align}\n", "\\frac{\\partial L}{\\partial \\phi_{0}}&\\approx & \\frac{L[\\phi_0+\\delta, \\phi_1]-L[\\phi_0, \\phi_1]}{\\delta}\\\\\n", "\\frac{\\partial L}{\\partial \\phi_{1}}&\\approx & \\frac{L[\\phi_0, \\phi_1+\\delta]-L[\\phi_0, \\phi_1]}{\\delta}\n", - "\\end{eqnarray}\n", + "\\end{align}\n", "\n", "We can't do this when there are many parameters; for a million parameters, we would have to evaluate the loss function two million times, and usually computing the gradients directly is much more efficient." - ], - "metadata": { - "id": "RS1nEcYVuEAM" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QuwAHN7yt-gi" + }, + "outputs": [], "source": [ "# Compute the gradient using your function\n", "gradient = compute_gradient(data[0,:],data[1,:], phi)\n", @@ -317,24 +313,25 @@ "dl_dphi1_est = (compute_loss(data[0,:],data[1,:],model,phi+np.array([[0],[delta]])) - \\\n", " compute_loss(data[0,:],data[1,:],model,phi))/delta\n", "print(\"Approx gradients: (%3.3f,%3.3f)\"%(dl_dphi0_est,dl_dphi1_est))\n" - ], - "metadata": { - "id": "QuwAHN7yt-gi" - }, - "execution_count": null, - "outputs": [] + ] }, { + "attachments": {}, "cell_type": "markdown", - "source": [ - "Now we are ready to perform gradient descent. We'll need to use our line search routine from Notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that converts from a 2D problem to a 1D problem" - ], "metadata": { "id": "5EIjMM9Fw2eT" - } + }, + "source": [ + "Now we are ready to perform gradient descent. We'll need to use our line search routine from Notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that converts from a 2D problem to a 1D problem" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XrJ2gQjfw1XP" + }, + "outputs": [], "source": [ "def loss_function_1D(dist_prop, data, model, phi_start, gradient):\n", " # Return the loss after moving this far\n", @@ -389,15 +386,15 @@ "\n", " # Return average of two middle points\n", " return (b+c)/2.0" - ], - "metadata": { - "id": "XrJ2gQjfw1XP" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YVq6rmaWRD2M" + }, + "outputs": [], "source": [ "def gradient_descent_step(phi, data, model):\n", " # Step 1: Compute the gradient\n", @@ -406,15 +403,15 @@ " alpha = line_search(data, model, phi, gradient*-1, max_dist = 2.0)\n", " phi = phi - alpha * gradient\n", " return phi" - ], - "metadata": { - "id": "YVq6rmaWRD2M" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tOLd0gtdRLLS" + }, + "outputs": [], "source": [ "# Initialize the parameters\n", "n_steps = 21\n", @@ -435,41 +432,41 @@ " draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n", "\n", "draw_loss_function(compute_loss, data, model,phi_all)\n" - ], - "metadata": { - "id": "tOLd0gtdRLLS" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "# TODO Experiment with starting the optimization in the previous cell in different places\n", - "# and show that it heads to a local minimum if we don't start it in the right valley" - ], + "execution_count": null, "metadata": { "id": "Oi8ZlH0ptLqA" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# TODO Experiment with starting the optimization in the previous cell in different places\n", + "# and show that it heads to a local minimum if we don't start it in the right valley" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4l-ueLk-oAxV" + }, + "outputs": [], "source": [ "def gradient_descent_step_fixed_learning_rate(phi, data, alpha):\n", " # TODO -- fill in this routine so that we take a fixed size step of size alpha without using line search\n", "\n", " return phi" - ], - "metadata": { - "id": "4l-ueLk-oAxV" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oi9MX_GRpM41" + }, + "outputs": [], "source": [ "# Initialize the parameters\n", "n_steps = 21\n", @@ -490,28 +487,28 @@ " draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n", "\n", "draw_loss_function(compute_loss, data, model,phi_all)\n" - ], - "metadata": { - "id": "oi9MX_GRpM41" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "In6sQ5YCpMqn" + }, + "outputs": [], "source": [ "# TODO Experiment with the learning rate, alpha.\n", "# What happens if you set it too large?\n", "# What happens if you set it too small?" - ], - "metadata": { - "id": "In6sQ5YCpMqn" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VKTC9-1Gpm3N" + }, + "outputs": [], "source": [ "def stochastic_gradient_descent_step(phi, data, alpha, batch_size):\n", " # TODO -- fill in this routine so that we take a fixed size step of size alpha but only using a subset (batch) of the data\n", @@ -522,15 +519,15 @@ "\n", "\n", " return phi" - ], - "metadata": { - "id": "VKTC9-1Gpm3N" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "469OP_UHskJ4" + }, + "outputs": [], "source": [ "# Set the random number generator so you always get same numbers (disable if you don't want this)\n", "np.random.seed(1)\n", @@ -553,34 +550,45 @@ " draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n", "\n", "draw_loss_function(compute_loss, data, model,phi_all)" - ], - "metadata": { - "id": "469OP_UHskJ4" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "# TODO -- Experiment with different learning rates, starting points, batch sizes, number of steps. Get a feel for this." - ], + "execution_count": null, "metadata": { "id": "LxE2kTa3s29p" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# TODO -- Experiment with different learning rates, starting points, batch sizes, number of steps. Get a feel for this." + ] }, { "cell_type": "code", - "source": [ - "# TODO -- Add a learning rate schedule. Reduce the learning rate by a factor of beta every M iterations" - ], + "execution_count": null, "metadata": { "id": "lw4QPOaQTh5e" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# TODO -- Add a learning rate schedule. Reduce the learning rate by a factor of beta every M iterations" + ] } - ] + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyNk5FN4qlw3pk8BwDVWw1jN", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 }