Add files via upload

This commit is contained in:
udlbook
2024-01-02 12:23:29 -05:00
committed by GitHub
parent 9409fbb447
commit 351199ec7e
2 changed files with 313 additions and 297 deletions

View File

@@ -1,32 +1,22 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "view-in-github", "colab_type": "text",
"colab_type": "text" "id": "view-in-github"
}, },
"source": [ "source": [
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap06/6_2_Gradient_Descent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap06/6_2_Gradient_Descent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
] ]
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "el8l05WQEO46"
},
"source": [ "source": [
"# **Notebook 6.2 Gradient descent**\n", "# **Notebook 6.2 Gradient descent**\n",
"\n", "\n",
@@ -36,10 +26,7 @@
"\n", "\n",
"Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n", "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n",
"\n" "\n"
], ]
"metadata": {
"id": "el8l05WQEO46"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
@@ -58,34 +45,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4cRkrh9MZ58Z"
},
"outputs": [],
"source": [ "source": [
"# Let's create our training data 12 pairs {x_i, y_i}\n", "# Let's create our training data 12 pairs {x_i, y_i}\n",
"# We'll try to fit the straight line model to these data\n", "# We'll try to fit the straight line model to these data\n",
"data = np.array([[0.03,0.19,0.34,0.46,0.78,0.81,1.08,1.18,1.39,1.60,1.65,1.90],\n", "data = np.array([[0.03,0.19,0.34,0.46,0.78,0.81,1.08,1.18,1.39,1.60,1.65,1.90],\n",
" [0.67,0.85,1.05,1.00,1.40,1.50,1.30,1.54,1.55,1.68,1.73,1.60]])" " [0.67,0.85,1.05,1.00,1.40,1.50,1.30,1.54,1.55,1.68,1.73,1.60]])"
], ]
"metadata": {
"id": "4cRkrh9MZ58Z"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WQUERmb2erAe"
},
"outputs": [],
"source": [ "source": [
"# Let's define our model -- just a straight line with intercept phi[0] and slope phi[1]\n", "# Let's define our model -- just a straight line with intercept phi[0] and slope phi[1]\n",
"def model(phi,x):\n", "def model(phi,x):\n",
" y_pred = phi[0]+phi[1] * x\n", " y_pred = phi[0]+phi[1] * x\n",
" return y_pred" " return y_pred"
], ]
"metadata": {
"id": "WQUERmb2erAe"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qFRe9POHF2le"
},
"outputs": [],
"source": [ "source": [
"# Draw model\n", "# Draw model\n",
"def draw_model(data,model,phi,title=None):\n", "def draw_model(data,model,phi,title=None):\n",
@@ -101,39 +93,40 @@
" if title is not None:\n", " if title is not None:\n",
" ax.set_title(title)\n", " ax.set_title(title)\n",
" plt.show()" " plt.show()"
], ]
"metadata": {
"id": "qFRe9POHF2le"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TXx1Tpd1Tl-I"
},
"outputs": [],
"source": [ "source": [
"# Initialize the parameters to some arbitrary values and draw the model\n", "# Initialize the parameters to some arbitrary values and draw the model\n",
"phi = np.zeros((2,1))\n", "phi = np.zeros((2,1))\n",
"phi[0] = 0.6 # Intercept\n", "phi[0] = 0.6 # Intercept\n",
"phi[1] = -0.2 # Slope\n", "phi[1] = -0.2 # Slope\n",
"draw_model(data,model,phi, \"Initial parameters\")\n" "draw_model(data,model,phi, \"Initial parameters\")\n"
], ]
"metadata": {
"id": "TXx1Tpd1Tl-I"
},
"execution_count": null,
"outputs": []
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"Now lets create compute the sum of squares loss for the training data"
],
"metadata": { "metadata": {
"id": "QU5mdGvpTtEG" "id": "QU5mdGvpTtEG"
} },
"source": [
"Now lets create compute the sum of squares loss for the training data"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "I7dqTY2Gg7CR"
},
"outputs": [],
"source": [ "source": [
"def compute_loss(data_x, data_y, model, phi):\n", "def compute_loss(data_x, data_y, model, phi):\n",
" # TODO -- Write this function -- replace the line below\n", " # TODO -- Write this function -- replace the line below\n",
@@ -144,45 +137,47 @@
" loss = 0\n", " loss = 0\n",
"\n", "\n",
" return loss" " return loss"
], ]
"metadata": {
"id": "I7dqTY2Gg7CR"
},
"execution_count": null,
"outputs": []
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"Let's just test that we got that right"
],
"metadata": { "metadata": {
"id": "eB5DQvU5hYNx" "id": "eB5DQvU5hYNx"
} },
"source": [
"Let's just test that we got that right"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n",
"print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 12.367))"
],
"metadata": { "metadata": {
"id": "Ty05UtEEg9tc" "id": "Ty05UtEEg9tc"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n",
"print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 12.367))"
]
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"Now let's plot the whole loss function"
],
"metadata": { "metadata": {
"id": "F3trnavPiHpH" "id": "F3trnavPiHpH"
} },
"source": [
"Now let's plot the whole loss function"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "K-NTHpAAHlCl"
},
"outputs": [],
"source": [ "source": [
"def draw_loss_function(compute_loss, data, model, phi_iters = None):\n", "def draw_loss_function(compute_loss, data, model, phi_iters = None):\n",
" # Define pretty colormap\n", " # Define pretty colormap\n",
@@ -209,39 +204,40 @@
" ax.set_ylim([1,-1])\n", " ax.set_ylim([1,-1])\n",
" ax.set_xlabel('Intercept $\\phi_{0}$'); ax.set_ylabel('Slope, $\\phi_{1}$')\n", " ax.set_xlabel('Intercept $\\phi_{0}$'); ax.set_ylabel('Slope, $\\phi_{1}$')\n",
" plt.show()" " plt.show()"
], ]
"metadata": {
"id": "K-NTHpAAHlCl"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"draw_loss_function(compute_loss, data, model)"
],
"metadata": { "metadata": {
"id": "l8HbvIupnTME" "id": "l8HbvIupnTME"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"draw_loss_function(compute_loss, data, model)"
]
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "s9Duf05WqqSC"
},
"source": [ "source": [
"Now let's compute the gradient vector for a given set of parameters:\n", "Now let's compute the gradient vector for a given set of parameters:\n",
"\n", "\n",
"\\begin{equation}\n", "\\begin{equation}\n",
"\\frac{\\partial L}{\\partial \\boldsymbol\\phi} = \\begin{bmatrix}\\frac{\\partial L}{\\partial \\phi_0} \\\\\\frac{\\partial L}{\\partial \\phi_1} \\end{bmatrix}.\n", "\\frac{\\partial L}{\\partial \\boldsymbol\\phi} = \\begin{bmatrix}\\frac{\\partial L}{\\partial \\phi_0} \\\\\\frac{\\partial L}{\\partial \\phi_1} \\end{bmatrix}.\n",
"\\end{equation}" "\\end{equation}"
], ]
"metadata": {
"id": "s9Duf05WqqSC"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "UpswmkL2qwBT"
},
"outputs": [],
"source": [ "source": [
"# These are in the lecture slides and notes, but worth trying to calculate them yourself to\n", "# These are in the lecture slides and notes, but worth trying to calculate them yourself to\n",
"# check that you get them right. Write out the expression for the sum of squares loss and take the\n", "# check that you get them right. Write out the expression for the sum of squares loss and take the\n",
@@ -253,31 +249,32 @@
"\n", "\n",
" # Return the gradient\n", " # Return the gradient\n",
" return np.array([[dl_dphi0],[dl_dphi1]])" " return np.array([[dl_dphi0],[dl_dphi1]])"
], ]
"metadata": {
"id": "UpswmkL2qwBT"
},
"execution_count": null,
"outputs": []
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "RS1nEcYVuEAM"
},
"source": [ "source": [
"We can check we got this right using a trick known as **finite differences**. If we evaluate the function and then change one of the parameters by a very small amount and normalize by that amount, we get an approximation to the gradient, so:\n", "We can check we got this right using a trick known as **finite differences**. If we evaluate the function and then change one of the parameters by a very small amount and normalize by that amount, we get an approximation to the gradient, so:\n",
"\n", "\n",
"\\begin{eqnarray}\n", "\\begin{align}\n",
"\\frac{\\partial L}{\\partial \\phi_{0}}&\\approx & \\frac{L[\\phi_0+\\delta, \\phi_1]-L[\\phi_0, \\phi_1]}{\\delta}\\\\\n", "\\frac{\\partial L}{\\partial \\phi_{0}}&\\approx & \\frac{L[\\phi_0+\\delta, \\phi_1]-L[\\phi_0, \\phi_1]}{\\delta}\\\\\n",
"\\frac{\\partial L}{\\partial \\phi_{1}}&\\approx & \\frac{L[\\phi_0, \\phi_1+\\delta]-L[\\phi_0, \\phi_1]}{\\delta}\n", "\\frac{\\partial L}{\\partial \\phi_{1}}&\\approx & \\frac{L[\\phi_0, \\phi_1+\\delta]-L[\\phi_0, \\phi_1]}{\\delta}\n",
"\\end{eqnarray}\n", "\\end{align}\n",
"\n", "\n",
"We can't do this when there are many parameters; for a million parameters, we would have to evaluate the loss function two million times, and usually computing the gradients directly is much more efficient." "We can't do this when there are many parameters; for a million parameters, we would have to evaluate the loss function two million times, and usually computing the gradients directly is much more efficient."
], ]
"metadata": {
"id": "RS1nEcYVuEAM"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QuwAHN7yt-gi"
},
"outputs": [],
"source": [ "source": [
"# Compute the gradient using your function\n", "# Compute the gradient using your function\n",
"gradient = compute_gradient(data[0,:],data[1,:], phi)\n", "gradient = compute_gradient(data[0,:],data[1,:], phi)\n",
@@ -290,24 +287,25 @@
" compute_loss(data[0,:],data[1,:],model,phi))/delta\n", " compute_loss(data[0,:],data[1,:],model,phi))/delta\n",
"print(\"Approx gradients: (%3.3f,%3.3f)\"%(dl_dphi0_est,dl_dphi1_est))\n", "print(\"Approx gradients: (%3.3f,%3.3f)\"%(dl_dphi0_est,dl_dphi1_est))\n",
"# There might be small differences in the last significant figure because finite gradients is an approximation\n" "# There might be small differences in the last significant figure because finite gradients is an approximation\n"
], ]
"metadata": {
"id": "QuwAHN7yt-gi"
},
"execution_count": null,
"outputs": []
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"Now we are ready to perform gradient descent. We'll need to use our line search routine from notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that maps the search along the negative gradient direction in 2D space to a 1D problem (distance along this direction)"
],
"metadata": { "metadata": {
"id": "5EIjMM9Fw2eT" "id": "5EIjMM9Fw2eT"
} },
"source": [
"Now we are ready to perform gradient descent. We'll need to use our line search routine from notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that maps the search along the negative gradient direction in 2D space to a 1D problem (distance along this direction)"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XrJ2gQjfw1XP"
},
"outputs": [],
"source": [ "source": [
"def loss_function_1D(dist_prop, data, model, phi_start, search_direction):\n", "def loss_function_1D(dist_prop, data, model, phi_start, search_direction):\n",
" # Return the loss after moving this far\n", " # Return the loss after moving this far\n",
@@ -362,15 +360,15 @@
"\n", "\n",
" # Return average of two middle points\n", " # Return average of two middle points\n",
" return (b+c)/2.0" " return (b+c)/2.0"
], ]
"metadata": {
"id": "XrJ2gQjfw1XP"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YVq6rmaWRD2M"
},
"outputs": [],
"source": [ "source": [
"def gradient_descent_step(phi, data, model):\n", "def gradient_descent_step(phi, data, model):\n",
" # TODO -- update Phi with the gradient descent step (equation 6.3)\n", " # TODO -- update Phi with the gradient descent step (equation 6.3)\n",
@@ -379,15 +377,15 @@
" # 3. Update the parameters phi based on the gradient and the step size alpha.\n", " # 3. Update the parameters phi based on the gradient and the step size alpha.\n",
"\n", "\n",
" return phi" " return phi"
], ]
"metadata": {
"id": "YVq6rmaWRD2M"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tOLd0gtdRLLS"
},
"outputs": [],
"source": [ "source": [
"# Initialize the parameters and draw the model\n", "# Initialize the parameters and draw the model\n",
"n_steps = 10\n", "n_steps = 10\n",
@@ -409,12 +407,22 @@
"\n", "\n",
"# Draw the trajectory on the loss function\n", "# Draw the trajectory on the loss function\n",
"draw_loss_function(compute_loss, data, model,phi_all)\n" "draw_loss_function(compute_loss, data, model,phi_all)\n"
],
"metadata": {
"id": "tOLd0gtdRLLS"
},
"execution_count": null,
"outputs": []
}
] ]
} }
],
"metadata": {
"colab": {
"include_colab_link": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -1,33 +1,22 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyNk5FN4qlw3pk8BwDVWw1jN",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "view-in-github", "colab_type": "text",
"colab_type": "text" "id": "view-in-github"
}, },
"source": [ "source": [
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap06/6_3_Stochastic_Gradient_Descent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap06/6_3_Stochastic_Gradient_Descent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
] ]
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "el8l05WQEO46"
},
"source": [ "source": [
"# **Notebook 6.3: Stochastic gradient descent**\n", "# **Notebook 6.3: Stochastic gradient descent**\n",
"\n", "\n",
@@ -39,10 +28,7 @@
"\n", "\n",
"\n", "\n",
"\n" "\n"
], ]
"metadata": {
"id": "el8l05WQEO46"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
@@ -61,6 +47,11 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4cRkrh9MZ58Z"
},
"outputs": [],
"source": [ "source": [
"# Let's create our training data 30 pairs {x_i, y_i}\n", "# Let's create our training data 30 pairs {x_i, y_i}\n",
"# We'll try to fit the Gabor model to these data\n", "# We'll try to fit the Gabor model to these data\n",
@@ -74,15 +65,15 @@
" -2.365e-02,5.098e-01,-2.777e-01,3.367e-01,1.927e-01,-2.222e-01,\n", " -2.365e-02,5.098e-01,-2.777e-01,3.367e-01,1.927e-01,-2.222e-01,\n",
" 6.352e-02,6.888e-03,3.224e-02,1.091e-02,-5.706e-01,-5.258e-02,\n", " 6.352e-02,6.888e-03,3.224e-02,1.091e-02,-5.706e-01,-5.258e-02,\n",
" -3.666e-02,1.709e-01,-4.805e-02,2.008e-01,-1.904e-01,5.952e-01]])" " -3.666e-02,1.709e-01,-4.805e-02,2.008e-01,-1.904e-01,5.952e-01]])"
], ]
"metadata": {
"id": "4cRkrh9MZ58Z"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WQUERmb2erAe"
},
"outputs": [],
"source": [ "source": [
"# Let's define our model\n", "# Let's define our model\n",
"def model(phi,x):\n", "def model(phi,x):\n",
@@ -90,15 +81,15 @@
" gauss_component = np.exp(-(phi[0] + 0.06 * phi[1] * x) * (phi[0] + 0.06 * phi[1] * x) / 32)\n", " gauss_component = np.exp(-(phi[0] + 0.06 * phi[1] * x) * (phi[0] + 0.06 * phi[1] * x) / 32)\n",
" y_pred= sin_component * gauss_component\n", " y_pred= sin_component * gauss_component\n",
" return y_pred" " return y_pred"
], ]
"metadata": {
"id": "WQUERmb2erAe"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qFRe9POHF2le"
},
"outputs": [],
"source": [ "source": [
"# Draw model\n", "# Draw model\n",
"def draw_model(data,model,phi,title=None):\n", "def draw_model(data,model,phi,title=None):\n",
@@ -113,39 +104,40 @@
" if title is not None:\n", " if title is not None:\n",
" ax.set_title(title)\n", " ax.set_title(title)\n",
" plt.show()" " plt.show()"
], ]
"metadata": {
"id": "qFRe9POHF2le"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TXx1Tpd1Tl-I"
},
"outputs": [],
"source": [ "source": [
"# Initialize the parameters and draw the model\n", "# Initialize the parameters and draw the model\n",
"phi = np.zeros((2,1))\n", "phi = np.zeros((2,1))\n",
"phi[0] = -5 # Horizontal offset\n", "phi[0] = -5 # Horizontal offset\n",
"phi[1] = 25 # Frequency\n", "phi[1] = 25 # Frequency\n",
"draw_model(data,model,phi, \"Initial parameters\")\n" "draw_model(data,model,phi, \"Initial parameters\")\n"
], ]
"metadata": {
"id": "TXx1Tpd1Tl-I"
},
"execution_count": null,
"outputs": []
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"Now lets create compute the sum of squares loss for the training data"
],
"metadata": { "metadata": {
"id": "QU5mdGvpTtEG" "id": "QU5mdGvpTtEG"
} },
"source": [
"Now lets create compute the sum of squares loss for the training data"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "I7dqTY2Gg7CR"
},
"outputs": [],
"source": [ "source": [
"def compute_loss(data_x, data_y, model, phi):\n", "def compute_loss(data_x, data_y, model, phi):\n",
" # TODO -- Write this function -- replace the line below\n", " # TODO -- Write this function -- replace the line below\n",
@@ -155,45 +147,47 @@
" loss = 0\n", " loss = 0\n",
"\n", "\n",
" return loss" " return loss"
], ]
"metadata": {
"id": "I7dqTY2Gg7CR"
},
"execution_count": null,
"outputs": []
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"Let's just test that we got that right"
],
"metadata": { "metadata": {
"id": "eB5DQvU5hYNx" "id": "eB5DQvU5hYNx"
} },
"source": [
"Let's just test that we got that right"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n",
"print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 16.419))"
],
"metadata": { "metadata": {
"id": "Ty05UtEEg9tc" "id": "Ty05UtEEg9tc"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"loss = compute_loss(data[0,:],data[1,:],model,np.array([[0.6],[-0.2]]))\n",
"print('Your loss = %3.3f, Correct loss = %3.3f'%(loss, 16.419))"
]
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"Now let's plot the whole loss function"
],
"metadata": { "metadata": {
"id": "F3trnavPiHpH" "id": "F3trnavPiHpH"
} },
"source": [
"Now let's plot the whole loss function"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "K-NTHpAAHlCl"
},
"outputs": [],
"source": [ "source": [
"def draw_loss_function(compute_loss, data, model, phi_iters = None):\n", "def draw_loss_function(compute_loss, data, model, phi_iters = None):\n",
" # Define pretty colormap\n", " # Define pretty colormap\n",
@@ -220,39 +214,40 @@
" ax.set_ylim([2.5,22.5])\n", " ax.set_ylim([2.5,22.5])\n",
" ax.set_xlabel('Offset $\\phi_{0}$'); ax.set_ylabel('Frequency, $\\phi_{1}$')\n", " ax.set_xlabel('Offset $\\phi_{0}$'); ax.set_ylabel('Frequency, $\\phi_{1}$')\n",
" plt.show()" " plt.show()"
], ]
"metadata": {
"id": "K-NTHpAAHlCl"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"draw_loss_function(compute_loss, data, model)"
],
"metadata": { "metadata": {
"id": "l8HbvIupnTME" "id": "l8HbvIupnTME"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"draw_loss_function(compute_loss, data, model)"
]
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "s9Duf05WqqSC"
},
"source": [ "source": [
"Now let's compute the gradient vector for a given set of parameters:\n", "Now let's compute the gradient vector for a given set of parameters:\n",
"\n", "\n",
"\\begin{equation}\n", "\\begin{equation}\n",
"\\frac{\\partial L}{\\partial \\boldsymbol\\phi} = \\begin{bmatrix}\\frac{\\partial L}{\\partial \\phi_0} \\\\\\frac{\\partial L}{\\partial \\phi_1} \\end{bmatrix}.\n", "\\frac{\\partial L}{\\partial \\boldsymbol\\phi} = \\begin{bmatrix}\\frac{\\partial L}{\\partial \\phi_0} \\\\\\frac{\\partial L}{\\partial \\phi_1} \\end{bmatrix}.\n",
"\\end{equation}" "\\end{equation}"
], ]
"metadata": {
"id": "s9Duf05WqqSC"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "UpswmkL2qwBT"
},
"outputs": [],
"source": [ "source": [
"# These came from writing out the expression for the sum of squares loss and taking the\n", "# These came from writing out the expression for the sum of squares loss and taking the\n",
"# derivative with respect to phi0 and phi1. It was a lot of hassle to get it right!\n", "# derivative with respect to phi0 and phi1. It was a lot of hassle to get it right!\n",
@@ -281,31 +276,32 @@
" dl_dphi1 = gabor_deriv_phi1(data_x, data_y, phi[0],phi[1])\n", " dl_dphi1 = gabor_deriv_phi1(data_x, data_y, phi[0],phi[1])\n",
" # Return the gradient\n", " # Return the gradient\n",
" return np.array([[dl_dphi0],[dl_dphi1]])" " return np.array([[dl_dphi0],[dl_dphi1]])"
], ]
"metadata": {
"id": "UpswmkL2qwBT"
},
"execution_count": null,
"outputs": []
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "RS1nEcYVuEAM"
},
"source": [ "source": [
"We can check we got this right using a trick known as **finite differences**. If we evaluate the function and then change one of the parameters by a very small amount and normalize by that amount, we get an approximation to the gradient, so:\n", "We can check we got this right using a trick known as **finite differences**. If we evaluate the function and then change one of the parameters by a very small amount and normalize by that amount, we get an approximation to the gradient, so:\n",
"\n", "\n",
"\\begin{eqnarray}\n", "\\begin{align}\n",
"\\frac{\\partial L}{\\partial \\phi_{0}}&\\approx & \\frac{L[\\phi_0+\\delta, \\phi_1]-L[\\phi_0, \\phi_1]}{\\delta}\\\\\n", "\\frac{\\partial L}{\\partial \\phi_{0}}&\\approx & \\frac{L[\\phi_0+\\delta, \\phi_1]-L[\\phi_0, \\phi_1]}{\\delta}\\\\\n",
"\\frac{\\partial L}{\\partial \\phi_{1}}&\\approx & \\frac{L[\\phi_0, \\phi_1+\\delta]-L[\\phi_0, \\phi_1]}{\\delta}\n", "\\frac{\\partial L}{\\partial \\phi_{1}}&\\approx & \\frac{L[\\phi_0, \\phi_1+\\delta]-L[\\phi_0, \\phi_1]}{\\delta}\n",
"\\end{eqnarray}\n", "\\end{align}\n",
"\n", "\n",
"We can't do this when there are many parameters; for a million parameters, we would have to evaluate the loss function two million times, and usually computing the gradients directly is much more efficient." "We can't do this when there are many parameters; for a million parameters, we would have to evaluate the loss function two million times, and usually computing the gradients directly is much more efficient."
], ]
"metadata": {
"id": "RS1nEcYVuEAM"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QuwAHN7yt-gi"
},
"outputs": [],
"source": [ "source": [
"# Compute the gradient using your function\n", "# Compute the gradient using your function\n",
"gradient = compute_gradient(data[0,:],data[1,:], phi)\n", "gradient = compute_gradient(data[0,:],data[1,:], phi)\n",
@@ -317,24 +313,25 @@
"dl_dphi1_est = (compute_loss(data[0,:],data[1,:],model,phi+np.array([[0],[delta]])) - \\\n", "dl_dphi1_est = (compute_loss(data[0,:],data[1,:],model,phi+np.array([[0],[delta]])) - \\\n",
" compute_loss(data[0,:],data[1,:],model,phi))/delta\n", " compute_loss(data[0,:],data[1,:],model,phi))/delta\n",
"print(\"Approx gradients: (%3.3f,%3.3f)\"%(dl_dphi0_est,dl_dphi1_est))\n" "print(\"Approx gradients: (%3.3f,%3.3f)\"%(dl_dphi0_est,dl_dphi1_est))\n"
], ]
"metadata": {
"id": "QuwAHN7yt-gi"
},
"execution_count": null,
"outputs": []
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"Now we are ready to perform gradient descent. We'll need to use our line search routine from Notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that converts from a 2D problem to a 1D problem"
],
"metadata": { "metadata": {
"id": "5EIjMM9Fw2eT" "id": "5EIjMM9Fw2eT"
} },
"source": [
"Now we are ready to perform gradient descent. We'll need to use our line search routine from Notebook 6.1, which I've reproduced here plus the helper function loss_function_1D that converts from a 2D problem to a 1D problem"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XrJ2gQjfw1XP"
},
"outputs": [],
"source": [ "source": [
"def loss_function_1D(dist_prop, data, model, phi_start, gradient):\n", "def loss_function_1D(dist_prop, data, model, phi_start, gradient):\n",
" # Return the loss after moving this far\n", " # Return the loss after moving this far\n",
@@ -389,15 +386,15 @@
"\n", "\n",
" # Return average of two middle points\n", " # Return average of two middle points\n",
" return (b+c)/2.0" " return (b+c)/2.0"
], ]
"metadata": {
"id": "XrJ2gQjfw1XP"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YVq6rmaWRD2M"
},
"outputs": [],
"source": [ "source": [
"def gradient_descent_step(phi, data, model):\n", "def gradient_descent_step(phi, data, model):\n",
" # Step 1: Compute the gradient\n", " # Step 1: Compute the gradient\n",
@@ -406,15 +403,15 @@
" alpha = line_search(data, model, phi, gradient*-1, max_dist = 2.0)\n", " alpha = line_search(data, model, phi, gradient*-1, max_dist = 2.0)\n",
" phi = phi - alpha * gradient\n", " phi = phi - alpha * gradient\n",
" return phi" " return phi"
], ]
"metadata": {
"id": "YVq6rmaWRD2M"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tOLd0gtdRLLS"
},
"outputs": [],
"source": [ "source": [
"# Initialize the parameters\n", "# Initialize the parameters\n",
"n_steps = 21\n", "n_steps = 21\n",
@@ -435,41 +432,41 @@
" draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n", " draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n",
"\n", "\n",
"draw_loss_function(compute_loss, data, model,phi_all)\n" "draw_loss_function(compute_loss, data, model,phi_all)\n"
], ]
"metadata": {
"id": "tOLd0gtdRLLS"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"# TODO Experiment with starting the optimization in the previous cell in different places\n",
"# and show that it heads to a local minimum if we don't start it in the right valley"
],
"metadata": { "metadata": {
"id": "Oi8ZlH0ptLqA" "id": "Oi8ZlH0ptLqA"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"# TODO Experiment with starting the optimization in the previous cell in different places\n",
"# and show that it heads to a local minimum if we don't start it in the right valley"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4l-ueLk-oAxV"
},
"outputs": [],
"source": [ "source": [
"def gradient_descent_step_fixed_learning_rate(phi, data, alpha):\n", "def gradient_descent_step_fixed_learning_rate(phi, data, alpha):\n",
" # TODO -- fill in this routine so that we take a fixed size step of size alpha without using line search\n", " # TODO -- fill in this routine so that we take a fixed size step of size alpha without using line search\n",
"\n", "\n",
" return phi" " return phi"
], ]
"metadata": {
"id": "4l-ueLk-oAxV"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oi9MX_GRpM41"
},
"outputs": [],
"source": [ "source": [
"# Initialize the parameters\n", "# Initialize the parameters\n",
"n_steps = 21\n", "n_steps = 21\n",
@@ -490,28 +487,28 @@
" draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n", " draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n",
"\n", "\n",
"draw_loss_function(compute_loss, data, model,phi_all)\n" "draw_loss_function(compute_loss, data, model,phi_all)\n"
], ]
"metadata": {
"id": "oi9MX_GRpM41"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "In6sQ5YCpMqn"
},
"outputs": [],
"source": [ "source": [
"# TODO Experiment with the learning rate, alpha.\n", "# TODO Experiment with the learning rate, alpha.\n",
"# What happens if you set it too large?\n", "# What happens if you set it too large?\n",
"# What happens if you set it too small?" "# What happens if you set it too small?"
], ]
"metadata": {
"id": "In6sQ5YCpMqn"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "VKTC9-1Gpm3N"
},
"outputs": [],
"source": [ "source": [
"def stochastic_gradient_descent_step(phi, data, alpha, batch_size):\n", "def stochastic_gradient_descent_step(phi, data, alpha, batch_size):\n",
" # TODO -- fill in this routine so that we take a fixed size step of size alpha but only using a subset (batch) of the data\n", " # TODO -- fill in this routine so that we take a fixed size step of size alpha but only using a subset (batch) of the data\n",
@@ -522,15 +519,15 @@
"\n", "\n",
"\n", "\n",
" return phi" " return phi"
], ]
"metadata": {
"id": "VKTC9-1Gpm3N"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "469OP_UHskJ4"
},
"outputs": [],
"source": [ "source": [
"# Set the random number generator so you always get same numbers (disable if you don't want this)\n", "# Set the random number generator so you always get same numbers (disable if you don't want this)\n",
"np.random.seed(1)\n", "np.random.seed(1)\n",
@@ -553,34 +550,45 @@
" draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n", " draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n",
"\n", "\n",
"draw_loss_function(compute_loss, data, model,phi_all)" "draw_loss_function(compute_loss, data, model,phi_all)"
], ]
"metadata": {
"id": "469OP_UHskJ4"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"# TODO -- Experiment with different learning rates, starting points, batch sizes, number of steps. Get a feel for this."
],
"metadata": { "metadata": {
"id": "LxE2kTa3s29p" "id": "LxE2kTa3s29p"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"# TODO -- Experiment with different learning rates, starting points, batch sizes, number of steps. Get a feel for this."
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"# TODO -- Add a learning rate schedule. Reduce the learning rate by a factor of beta every M iterations"
],
"metadata": { "metadata": {
"id": "lw4QPOaQTh5e" "id": "lw4QPOaQTh5e"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
} "# TODO -- Add a learning rate schedule. Reduce the learning rate by a factor of beta every M iterations"
] ]
} }
],
"metadata": {
"colab": {
"authorship_tag": "ABX9TyNk5FN4qlw3pk8BwDVWw1jN",
"include_colab_link": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}