Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f524edd3b | ||
|
|
7a423507f5 | ||
|
|
4a5bd9c4d5 | ||
|
|
c0cd9c2aea | ||
|
|
924b6e220d | ||
|
|
b535a13d57 | ||
|
|
d0d413b9f6 | ||
|
|
1b53be1e08 |
@@ -341,7 +341,7 @@
|
||||
"2. What is $\\exp[1]$?\n",
|
||||
"3. What is $\\exp[-\\infty]$?\n",
|
||||
"4. What is $\\exp[+\\infty]$?\n",
|
||||
"5. A function is convex if we can draw a straight line between any two points on the function, and this line always lies above the function. Similarly, a function is concave if a straight line between any two points always lies below the function. Is the exponential function convex or concave or neither?\n"
|
||||
"5. A function is convex if we can draw a straight line between any two points on the function, and the line lies above the function everywhere between these two points. Similarly, a function is concave if a straight line between any two points lies below the function everywhere between these two points. Is the exponential function convex or concave or neither?\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -265,7 +265,7 @@
|
||||
"\\frac{\\partial L}{\\partial \\phi_{1}}&\\approx & \\frac{L[\\phi_0, \\phi_1+\\delta]-L[\\phi_0, \\phi_1]}{\\delta}\n",
|
||||
"\\end{align}\n",
|
||||
"\n",
|
||||
"We can't do this when there are many parameters; for a million parameters, we would have to evaluate the loss function two million times, and usually computing the gradients directly is much more efficient."
|
||||
"We can't do this when there are many parameters; for a million parameters, we would have to evaluate the loss function one million plus one times, and usually computing the gradients directly is much more efficient."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -279,7 +279,7 @@
|
||||
"f2: true value = 7.137, your value = 0.000\n",
|
||||
"h3: true value = 0.657, your value = 0.000\n",
|
||||
"f3: true value = 2.372, your value = 0.000\n",
|
||||
"like original = 0.139, like from forward pass = 0.000\n"
|
||||
"l_i original = 0.139, l_i from forward pass = 0.000\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -292,7 +292,7 @@
|
||||
"print(\"f2: true value = %3.3f, your value = %3.3f\"%(7.137, f2))\n",
|
||||
"print(\"h3: true value = %3.3f, your value = %3.3f\"%(0.657, h3))\n",
|
||||
"print(\"f3: true value = %3.3f, your value = %3.3f\"%(2.372, f3))\n",
|
||||
"print(\"like original = %3.3f, like from forward pass = %3.3f\"%(l_i_func, l_i))\n"
|
||||
"print(\"l_i original = %3.3f, l_i from forward pass = %3.3f\"%(l_i_func, l_i))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -115,9 +115,9 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Now let's run our random network. The weight matrices $\\boldsymbol\\Omega_{1\\ldots K}$ are the entries of the list \"all_weights\" and the biases $\\boldsymbol\\beta_{1\\ldots k}$ are the entries of the list \"all_biases\"\n",
|
||||
"Now let's run our random network. The weight matrices $\\boldsymbol\\Omega_{1\\ldots K}$ are the entries of the list \"all_weights\" and the biases $\\boldsymbol\\beta_{1\\ldots K}$ are the entries of the list \"all_biases\"\n",
|
||||
"\n",
|
||||
"We know that we will need the activations $\\mathbf{f}_{0\\ldots K}$ and the activations $\\mathbf{h}_{1\\ldots K}$ for the forward pass of backpropagation, so we'll store and return these as well.\n"
|
||||
"We know that we will need the preactivations $\\mathbf{f}_{0\\ldots K}$ and the activations $\\mathbf{h}_{1\\ldots K}$ for the forward pass of backpropagation, so we'll store and return these as well.\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "5irtyxnLJSGX"
|
||||
@@ -132,7 +132,7 @@
|
||||
" K = len(all_weights) -1\n",
|
||||
"\n",
|
||||
" # We'll store the pre-activations at each layer in a list \"all_f\"\n",
|
||||
" # and the activations in a second list[all_h].\n",
|
||||
" # and the activations in a second list \"all_h\".\n",
|
||||
" all_f = [None] * (K+1)\n",
|
||||
" all_h = [None] * (K+1)\n",
|
||||
"\n",
|
||||
@@ -143,7 +143,7 @@
|
||||
" # Run through the layers, calculating all_f[0...K-1] and all_h[1...K]\n",
|
||||
" for layer in range(K):\n",
|
||||
" # Update preactivations and activations at this layer according to eqn 7.16\n",
|
||||
" # Remmember to use np.matmul for matrrix multiplications\n",
|
||||
" # Remmember to use np.matmul for matrix multiplications\n",
|
||||
" # TODO -- Replace the lines below\n",
|
||||
" all_f[layer] = all_h[layer]\n",
|
||||
" all_h[layer+1] = all_f[layer]\n",
|
||||
@@ -166,7 +166,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Define in input\n",
|
||||
"# Define input\n",
|
||||
"net_input = np.ones((D_i,1)) * 1.2\n",
|
||||
"# Compute network output\n",
|
||||
"net_output, all_f, all_h = compute_network_output(net_input,all_weights, all_biases)\n",
|
||||
@@ -249,7 +249,7 @@
|
||||
"\n",
|
||||
" # Now work backwards through the network\n",
|
||||
" for layer in range(K,-1,-1):\n",
|
||||
" # TODO Calculate the derivatives of the loss with respect to the biases at layer this from all_dl_df[layer]. (eq 7.21)\n",
|
||||
" # TODO Calculate the derivatives of the loss with respect to the biases at layer from all_dl_df[layer]. (eq 7.21)\n",
|
||||
" # NOTE! To take a copy of matrix X, use Z=np.array(X)\n",
|
||||
" # REPLACE THIS LINE\n",
|
||||
" all_dl_dbiases[layer] = np.zeros_like(all_biases[layer])\n",
|
||||
@@ -265,7 +265,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
" if layer > 0:\n",
|
||||
" # TODO Calculate the derivatives of the loss with respect to the pre-activation f (use deriv of ReLu function, first part of last line of eq. 7.24)\n",
|
||||
" # TODO Calculate the derivatives of the loss with respect to the pre-activation f (use derivative of ReLu function, first part of last line of eq. 7.24)\n",
|
||||
" # REPLACE THIS LINE\n",
|
||||
" all_dl_df[layer-1] = np.zeros_like(all_f[layer-1])\n",
|
||||
"\n",
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"authorship_tag": "ABX9TyNHLXFpiSnUzAbzhtOk+bxu",
|
||||
"authorship_tag": "ABX9TyOaATWBrwVMylV1akcKtHjt",
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
@@ -120,7 +120,7 @@
|
||||
" K = len(all_weights)-1\n",
|
||||
"\n",
|
||||
" # We'll store the pre-activations at each layer in a list \"all_f\"\n",
|
||||
" # and the activations in a second list[all_h].\n",
|
||||
" # and the activations in a second list \"all_h\".\n",
|
||||
" all_f = [None] * (K+1)\n",
|
||||
" all_h = [None] * (K+1)\n",
|
||||
"\n",
|
||||
@@ -151,7 +151,7 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Now let's investigate how this the size of the outputs vary as we change the initialization variance:\n"
|
||||
"Now let's investigate how the size of the outputs vary as we change the initialization variance:\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "bIUrcXnOqChl"
|
||||
@@ -177,7 +177,7 @@
|
||||
"data_in = np.random.normal(size=(1,n_data))\n",
|
||||
"net_output, all_f, all_h = compute_network_output(data_in, all_weights, all_biases)\n",
|
||||
"\n",
|
||||
"for layer in range(K):\n",
|
||||
"for layer in range(1,K+1):\n",
|
||||
" print(\"Layer %d, std of hidden units = %3.3f\"%(layer, np.std(all_h[layer])))"
|
||||
],
|
||||
"metadata": {
|
||||
@@ -196,7 +196,7 @@
|
||||
"# Change this to 50 layers with 80 hidden units per layer\n",
|
||||
"\n",
|
||||
"# TO DO\n",
|
||||
"# Now experiment with sigma_sq_omega to try to stop the variance of the forward computation explode"
|
||||
"# Now experiment with sigma_sq_omega to try to stop the variance of the forward computation exploding"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "VL_SO4tar3DC"
|
||||
@@ -249,6 +249,9 @@
|
||||
"\n",
|
||||
"# Main backward pass routine\n",
|
||||
"def backward_pass(all_weights, all_biases, all_f, all_h, y):\n",
|
||||
" # Retrieve number of layers\n",
|
||||
" K = all_weights\n",
|
||||
"\n",
|
||||
" # We'll store the derivatives dl_dweights and dl_dbiases in lists as well\n",
|
||||
" all_dl_dweights = [None] * (K+1)\n",
|
||||
" all_dl_dbiases = [None] * (K+1)\n",
|
||||
|
||||
BIN
UDL_Errata.pdf
BIN
UDL_Errata.pdf
Binary file not shown.
@@ -15,8 +15,8 @@
|
||||
<ul>
|
||||
<li>
|
||||
<p style="font-size: larger; margin-bottom: 0">Download full PDF <a
|
||||
href="https://github.com/udlbook/udlbook/releases/download/v2.0.1/UnderstandingDeepLearning_02_15_24_C.pdf">here</a>
|
||||
</p>2024-02-15. CC-BY-NC-ND license<br>
|
||||
href="https://github.com/udlbook/udlbook/releases/download/v2.0.2/UnderstandingDeepLearning_03_06_24_C.pdf">here</a>
|
||||
</p>2024-03-06. CC-BY-NC-ND license<br>
|
||||
<img src="https://img.shields.io/github/downloads/udlbook/udlbook/total" alt="download stats shield">
|
||||
</li>
|
||||
<li> Order your copy from <a href="https://mitpress.mit.edu/9780262048644/understanding-deep-learning/">here </a></li>
|
||||
|
||||
Reference in New Issue
Block a user