Merge pull request #150 from yrahal/main
Fix minor typos in Chapter 6 notebooks
This commit is contained in:
@@ -113,7 +113,7 @@
|
|||||||
" b = 0.33\n",
|
" b = 0.33\n",
|
||||||
" c = 0.66\n",
|
" c = 0.66\n",
|
||||||
" d = 1.0\n",
|
" d = 1.0\n",
|
||||||
" n_iter =0;\n",
|
" n_iter = 0\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # While we haven't found the minimum closely enough\n",
|
" # While we haven't found the minimum closely enough\n",
|
||||||
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
||||||
@@ -131,8 +131,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" print('Iter %d, a=%3.3f, b=%3.3f, c=%3.3f, d=%3.3f'%(n_iter, a,b,c,d))\n",
|
" print('Iter %d, a=%3.3f, b=%3.3f, c=%3.3f, d=%3.3f'%(n_iter, a,b,c,d))\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #1 If the HEIGHT at point A is less the HEIGHT at points B, C, and D then halve values of B, C, and D\n",
|
" # Rule #1 If the HEIGHT at point A is less than the HEIGHT at points B, C, and D then halve values of B, C, and D\n",
|
||||||
" # i.e. bring them closer to the original point\n",
|
|
||||||
" # i.e. bring them closer to the original point\n",
|
" # i.e. bring them closer to the original point\n",
|
||||||
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
||||||
" if (0):\n",
|
" if (0):\n",
|
||||||
@@ -140,7 +139,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If the HEIGHT at point b is less than the HEIGHT at point c then\n",
|
" # Rule #2 If the HEIGHT at point b is less than the HEIGHT at point c then\n",
|
||||||
" # then point d becomes point c, and\n",
|
" # point d becomes point c, and\n",
|
||||||
" # point b becomes 1/3 between a and new d\n",
|
" # point b becomes 1/3 between a and new d\n",
|
||||||
" # point c becomes 2/3 between a and new d\n",
|
" # point c becomes 2/3 between a and new d\n",
|
||||||
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
||||||
@@ -148,7 +147,7 @@
|
|||||||
" continue;\n",
|
" continue;\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #3 If the HEIGHT at point c is less than the HEIGHT at point b then\n",
|
" # Rule #3 If the HEIGHT at point c is less than the HEIGHT at point b then\n",
|
||||||
" # then point a becomes point b, and\n",
|
" # point a becomes point b, and\n",
|
||||||
" # point b becomes 1/3 between new a and d\n",
|
" # point b becomes 1/3 between new a and d\n",
|
||||||
" # point c becomes 2/3 between new a and d\n",
|
" # point c becomes 2/3 between new a and d\n",
|
||||||
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
||||||
|
|||||||
@@ -117,7 +117,7 @@
|
|||||||
"id": "QU5mdGvpTtEG"
|
"id": "QU5mdGvpTtEG"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"Now lets create compute the sum of squares loss for the training data"
|
"Now let's compute the sum of squares loss for the training data"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -317,7 +317,7 @@
|
|||||||
" b = 0.33 * max_dist\n",
|
" b = 0.33 * max_dist\n",
|
||||||
" c = 0.66 * max_dist\n",
|
" c = 0.66 * max_dist\n",
|
||||||
" d = 1.0 * max_dist\n",
|
" d = 1.0 * max_dist\n",
|
||||||
" n_iter =0;\n",
|
" n_iter = 0\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # While we haven't found the minimum closely enough\n",
|
" # While we haven't found the minimum closely enough\n",
|
||||||
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
||||||
@@ -341,7 +341,7 @@
|
|||||||
" continue;\n",
|
" continue;\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If point b is less than point c then\n",
|
" # Rule #2 If point b is less than point c then\n",
|
||||||
" # then point d becomes point c, and\n",
|
" # point d becomes point c, and\n",
|
||||||
" # point b becomes 1/3 between a and new d\n",
|
" # point b becomes 1/3 between a and new d\n",
|
||||||
" # point c becomes 2/3 between a and new d\n",
|
" # point c becomes 2/3 between a and new d\n",
|
||||||
" if lossb < lossc:\n",
|
" if lossb < lossc:\n",
|
||||||
@@ -351,7 +351,7 @@
|
|||||||
" continue\n",
|
" continue\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If point c is less than point b then\n",
|
" # Rule #2 If point c is less than point b then\n",
|
||||||
" # then point a becomes point b, and\n",
|
" # point a becomes point b, and\n",
|
||||||
" # point b becomes 1/3 between new a and d\n",
|
" # point b becomes 1/3 between new a and d\n",
|
||||||
" # point c becomes 2/3 between new a and d\n",
|
" # point c becomes 2/3 between new a and d\n",
|
||||||
" a = b\n",
|
" a = b\n",
|
||||||
|
|||||||
@@ -53,7 +53,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Let's create our training data 30 pairs {x_i, y_i}\n",
|
"# Let's create our training data of 30 pairs {x_i, y_i}\n",
|
||||||
"# We'll try to fit the Gabor model to these data\n",
|
"# We'll try to fit the Gabor model to these data\n",
|
||||||
"data = np.array([[-1.920e+00,-1.422e+01,1.490e+00,-1.940e+00,-2.389e+00,-5.090e+00,\n",
|
"data = np.array([[-1.920e+00,-1.422e+01,1.490e+00,-1.940e+00,-2.389e+00,-5.090e+00,\n",
|
||||||
" -8.861e+00,3.578e+00,-6.010e+00,-6.995e+00,3.634e+00,8.743e-01,\n",
|
" -8.861e+00,3.578e+00,-6.010e+00,-6.995e+00,3.634e+00,8.743e-01,\n",
|
||||||
@@ -128,7 +128,7 @@
|
|||||||
"id": "QU5mdGvpTtEG"
|
"id": "QU5mdGvpTtEG"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"Now lets create compute the sum of squares loss for the training data"
|
"Now let's compute the sum of squares loss for the training data"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -198,7 +198,7 @@
|
|||||||
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
||||||
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Make grid of intercept/slope values to plot\n",
|
" # Make grid of offset/frequency values to plot\n",
|
||||||
" offsets_mesh, freqs_mesh = np.meshgrid(np.arange(-10,10.0,0.1), np.arange(2.5,22.5,0.1))\n",
|
" offsets_mesh, freqs_mesh = np.meshgrid(np.arange(-10,10.0,0.1), np.arange(2.5,22.5,0.1))\n",
|
||||||
" loss_mesh = np.zeros_like(freqs_mesh)\n",
|
" loss_mesh = np.zeros_like(freqs_mesh)\n",
|
||||||
" # Compute loss for every set of parameters\n",
|
" # Compute loss for every set of parameters\n",
|
||||||
@@ -343,7 +343,7 @@
|
|||||||
" b = 0.33 * max_dist\n",
|
" b = 0.33 * max_dist\n",
|
||||||
" c = 0.66 * max_dist\n",
|
" c = 0.66 * max_dist\n",
|
||||||
" d = 1.0 * max_dist\n",
|
" d = 1.0 * max_dist\n",
|
||||||
" n_iter =0;\n",
|
" n_iter = 0\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # While we haven't found the minimum closely enough\n",
|
" # While we haven't found the minimum closely enough\n",
|
||||||
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
||||||
@@ -367,7 +367,7 @@
|
|||||||
" continue;\n",
|
" continue;\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If point b is less than point c then\n",
|
" # Rule #2 If point b is less than point c then\n",
|
||||||
" # then point d becomes point c, and\n",
|
" # point d becomes point c, and\n",
|
||||||
" # point b becomes 1/3 between a and new d\n",
|
" # point b becomes 1/3 between a and new d\n",
|
||||||
" # point c becomes 2/3 between a and new d\n",
|
" # point c becomes 2/3 between a and new d\n",
|
||||||
" if lossb < lossc:\n",
|
" if lossb < lossc:\n",
|
||||||
@@ -377,7 +377,7 @@
|
|||||||
" continue\n",
|
" continue\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If point c is less than point b then\n",
|
" # Rule #2 If point c is less than point b then\n",
|
||||||
" # then point a becomes point b, and\n",
|
" # point a becomes point b, and\n",
|
||||||
" # point b becomes 1/3 between new a and d\n",
|
" # point b becomes 1/3 between new a and d\n",
|
||||||
" # point c becomes 2/3 between new a and d\n",
|
" # point c becomes 2/3 between new a and d\n",
|
||||||
" a = b\n",
|
" a = b\n",
|
||||||
|
|||||||
@@ -61,7 +61,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"# Let's create our training data 30 pairs {x_i, y_i}\n",
|
"# Let's create our training data of 30 pairs {x_i, y_i}\n",
|
||||||
"# We'll try to fit the Gabor model to these data\n",
|
"# We'll try to fit the Gabor model to these data\n",
|
||||||
"data = np.array([[-1.920e+00,-1.422e+01,1.490e+00,-1.940e+00,-2.389e+00,-5.090e+00,\n",
|
"data = np.array([[-1.920e+00,-1.422e+01,1.490e+00,-1.940e+00,-2.389e+00,-5.090e+00,\n",
|
||||||
" -8.861e+00,3.578e+00,-6.010e+00,-6.995e+00,3.634e+00,8.743e-01,\n",
|
" -8.861e+00,3.578e+00,-6.010e+00,-6.995e+00,3.634e+00,8.743e-01,\n",
|
||||||
@@ -137,7 +137,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"Now lets compute the sum of squares loss for the training data and plot the loss function"
|
"Now let's compute the sum of squares loss for the training data and plot the loss function"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "QU5mdGvpTtEG"
|
"id": "QU5mdGvpTtEG"
|
||||||
@@ -160,7 +160,7 @@
|
|||||||
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
||||||
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Make grid of intercept/slope values to plot\n",
|
" # Make grid of offset/frequency values to plot\n",
|
||||||
" offsets_mesh, freqs_mesh = np.meshgrid(np.arange(-10,10.0,0.1), np.arange(2.5,22.5,0.1))\n",
|
" offsets_mesh, freqs_mesh = np.meshgrid(np.arange(-10,10.0,0.1), np.arange(2.5,22.5,0.1))\n",
|
||||||
" loss_mesh = np.zeros_like(freqs_mesh)\n",
|
" loss_mesh = np.zeros_like(freqs_mesh)\n",
|
||||||
" # Compute loss for every set of parameters\n",
|
" # Compute loss for every set of parameters\n",
|
||||||
@@ -365,7 +365,6 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" # Update the parameters\n",
|
" # Update the parameters\n",
|
||||||
" phi_all[:,c_step+1:c_step+2] = phi_all[:,c_step:c_step+1] - alpha * momentum\n",
|
" phi_all[:,c_step+1:c_step+2] = phi_all[:,c_step:c_step+1] - alpha * momentum\n",
|
||||||
" # Measure loss and draw model every 8th step\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"loss = compute_loss(data[0,:], data[1,:], model, phi_all[:,c_step+1:c_step+2])\n",
|
"loss = compute_loss(data[0,:], data[1,:], model, phi_all[:,c_step+1:c_step+2])\n",
|
||||||
"draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n",
|
"draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n",
|
||||||
|
|||||||
@@ -110,7 +110,7 @@
|
|||||||
" ax.plot(opt_path[0,:], opt_path[1,:],'-', color='#a0d9d3ff')\n",
|
" ax.plot(opt_path[0,:], opt_path[1,:],'-', color='#a0d9d3ff')\n",
|
||||||
" ax.plot(opt_path[0,:], opt_path[1,:],'.', color='#a0d9d3ff',markersize=10)\n",
|
" ax.plot(opt_path[0,:], opt_path[1,:],'.', color='#a0d9d3ff',markersize=10)\n",
|
||||||
" ax.set_xlabel(\"$\\phi_{0}$\")\n",
|
" ax.set_xlabel(\"$\\phi_{0}$\")\n",
|
||||||
" ax.set_ylabel(\"$\\phi_1}$\")\n",
|
" ax.set_ylabel(\"$\\phi_{1}$\")\n",
|
||||||
" plt.show()"
|
" plt.show()"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -169,7 +169,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"Because the function changes much faster in $\\phi_1$ than in $\\phi_0$, there is no great step size to choose. If we set the step size so that it makes sensible progress in the $\\phi_1$, then it takes many iterations to converge. If we set the step size tso that we make sensible progress in the $\\phi_{0}$ direction, then the path oscillates in the $\\phi_1$ direction. \n",
|
"Because the function changes much faster in $\\phi_1$ than in $\\phi_0$, there is no great step size to choose. If we set the step size so that it makes sensible progress in the $\\phi_1$ direction, then it takes many iterations to converge. If we set the step size so that we make sensible progress in the $\\phi_{0}$ direction, then the path oscillates in the $\\phi_1$ direction. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"This motivates Adam. At the core of Adam is the idea that we should just determine which way is downhill along each axis (i.e. left/right for $\\phi_0$ or up/down for $\\phi_1$) and move a fixed distance in that direction."
|
"This motivates Adam. At the core of Adam is the idea that we should just determine which way is downhill along each axis (i.e. left/right for $\\phi_0$ or up/down for $\\phi_1$) and move a fixed distance in that direction."
|
||||||
],
|
],
|
||||||
|
|||||||
Reference in New Issue
Block a user