Compare commits
70 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
de0a8946a6 | ||
|
|
12672832f5 | ||
|
|
51444a4bbb | ||
|
|
987df8cd88 | ||
|
|
9873b8b20d | ||
|
|
bc0ca18695 | ||
|
|
d66ba78862 | ||
|
|
a8fe82b5e1 | ||
|
|
ac540f1294 | ||
|
|
080bdd319d | ||
|
|
60d50aa9d2 | ||
|
|
d45cba5c95 | ||
|
|
e9f75027bb | ||
|
|
9de32ff327 | ||
|
|
871304357c | ||
|
|
c385687d8a | ||
|
|
207ff5e636 | ||
|
|
cc9c695ff7 | ||
|
|
75646c2c8e | ||
|
|
5552890706 | ||
|
|
01755deefe | ||
|
|
afb9ead4d8 | ||
|
|
57151930de | ||
|
|
ca85255c74 | ||
|
|
3003437b04 | ||
|
|
5e726fcf4e | ||
|
|
6a8273459f | ||
|
|
1c2e19aa3b | ||
|
|
e818dfe054 | ||
|
|
4a08818706 | ||
|
|
16b72a8a9e | ||
|
|
44a3e5f678 | ||
|
|
a644267053 | ||
|
|
69a2b00c9d | ||
|
|
9f0570e26f | ||
|
|
e3a8bb9ac4 | ||
|
|
49da623d86 | ||
|
|
0c771fd677 | ||
|
|
5302b32929 | ||
|
|
d5586e57fc | ||
|
|
d0acc42d81 | ||
|
|
f3188ac35a | ||
|
|
ad1b6a558b | ||
|
|
7eadd56eaa | ||
|
|
53c1357df7 | ||
|
|
8d862ede26 | ||
|
|
44bbfbed91 | ||
|
|
f65f0b1ddf | ||
|
|
1d6d6b6fbe | ||
|
|
62779ec260 | ||
|
|
be3edb60f9 | ||
|
|
b9403e091b | ||
|
|
2c916d9a87 | ||
|
|
310b71e203 | ||
|
|
fcb1333aed | ||
|
|
c39267b3b4 | ||
|
|
4291ed453c | ||
|
|
ab2ff3177a | ||
|
|
c2a4d40da3 | ||
|
|
aa75d3ad73 | ||
|
|
1f0c224a7d | ||
|
|
eb29a28284 | ||
|
|
7648203767 | ||
|
|
64e1d82d04 | ||
|
|
f7450d1875 | ||
|
|
884a7e358b | ||
|
|
2016977f30 | ||
|
|
f88127c0d2 | ||
|
|
a637eec888 | ||
|
|
ddd6bf9149 |
@@ -295,7 +295,7 @@
|
||||
"\n",
|
||||
"Throughout the book, we'll be using some special functions (see Appendix B.1.3). The most important of these are the logarithm and exponential functions. Let's investigate their properties.\n",
|
||||
"\n",
|
||||
"We'll start with the exponential function $y=\\exp[x]=e^x$ which maps the real line $[-\\infty,+\\infty]$ to non-negative numbers $[0,+\\infty]$."
|
||||
"We'll start with the exponential function $y=\\exp[x]=e^x$ which maps the real line $(-\\infty,+\\infty)$ to positive numbers $(0,+\\infty)$."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -301,7 +301,7 @@
|
||||
"source": [
|
||||
"def loss_function_1D(dist_prop, data, model, phi_start, search_direction):\n",
|
||||
" # Return the loss after moving this far\n",
|
||||
" return compute_loss(data[0,:], data[1,:], model, phi_start+ search_direction * dist_prop)\n",
|
||||
" return compute_loss(data[0,:], data[1,:], model, phi_start - search_direction * dist_prop)\n",
|
||||
"\n",
|
||||
"def line_search(data, model, phi, gradient, thresh=.00001, max_dist = 0.1, max_iter = 15, verbose=False):\n",
|
||||
" # Initialize four points along the range we are going to search\n",
|
||||
@@ -316,10 +316,10 @@
|
||||
" # Increment iteration counter (just to prevent an infinite loop)\n",
|
||||
" n_iter = n_iter+1\n",
|
||||
" # Calculate all four points\n",
|
||||
" lossa = loss_function_1D(a, data, model, phi,gradient)\n",
|
||||
" lossb = loss_function_1D(b, data, model, phi,gradient)\n",
|
||||
" lossc = loss_function_1D(c, data, model, phi,gradient)\n",
|
||||
" lossd = loss_function_1D(d, data, model, phi,gradient)\n",
|
||||
" lossa = loss_function_1D(a, data, model, phi, gradient)\n",
|
||||
" lossb = loss_function_1D(b, data, model, phi, gradient)\n",
|
||||
" lossc = loss_function_1D(c, data, model, phi, gradient)\n",
|
||||
" lossd = loss_function_1D(d, data, model, phi, gradient)\n",
|
||||
"\n",
|
||||
" if verbose:\n",
|
||||
" print('Iter %d, a=%3.3f, b=%3.3f, c=%3.3f, d=%3.3f'%(n_iter, a,b,c,d))\n",
|
||||
@@ -365,7 +365,7 @@
|
||||
"def gradient_descent_step(phi, data, model):\n",
|
||||
" # TODO -- update Phi with the gradient descent step (equation 6.3)\n",
|
||||
" # 1. Compute the gradient (you wrote this function above)\n",
|
||||
" # 2. Find the best step size alpha using line search function (above) -- use negative gradient as going downhill\n",
|
||||
" # 2. Find the best step size alpha using line search function (above)\n",
|
||||
" # 3. Update the parameters phi based on the gradient and the step size alpha.\n",
|
||||
"\n",
|
||||
" return phi"
|
||||
|
||||
@@ -325,7 +325,7 @@
|
||||
" for layer in range(1,K):\n",
|
||||
" aggregate_dl_df[layer][:,c_data] = np.squeeze(all_dl_df[layer])\n",
|
||||
"\n",
|
||||
"for layer in range(1,K):\n",
|
||||
"for layer in reversed(range(1,K)):\n",
|
||||
" print(\"Layer %d, std of dl_dh = %3.3f\"%(layer, np.std(aggregate_dl_df[layer].ravel())))\n"
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -293,7 +293,8 @@
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Plot the noise, bias and variance as a function of capacity\n",
|
||||
"hidden_variables = [1,2,3,4,5,6,7,8,9,10,11,12]\n",
|
||||
"n_hidden = 12\n",
|
||||
"hidden_variables = list(range(1, n_hidden + 1))\n",
|
||||
"bias = np.zeros((len(hidden_variables),1)) ;\n",
|
||||
"variance = np.zeros((len(hidden_variables),1)) ;\n",
|
||||
"\n",
|
||||
@@ -321,7 +322,7 @@
|
||||
"ax.plot(hidden_variables, variance, 'k-')\n",
|
||||
"ax.plot(hidden_variables, bias, 'r-')\n",
|
||||
"ax.plot(hidden_variables, variance+bias, 'g-')\n",
|
||||
"ax.set_xlim(0,12)\n",
|
||||
"ax.set_xlim(0,n_hidden)\n",
|
||||
"ax.set_ylim(0,0.5)\n",
|
||||
"ax.set_xlabel(\"Model capacity\")\n",
|
||||
"ax.set_ylabel(\"Variance\")\n",
|
||||
@@ -333,15 +334,6 @@
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"id": "WKUyOAywL_b2"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -4,7 +4,6 @@
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"authorship_tag": "ABX9TyPJzymRTuvoWggIskM2Kamc",
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
@@ -458,14 +457,14 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def dldphi0(phi, lambda_):\n",
|
||||
"def dregdphi0(phi, lambda_):\n",
|
||||
" # TODO compute the derivative with respect to phi0\n",
|
||||
" # Replace this line:]\n",
|
||||
" deriv = 0\n",
|
||||
"\n",
|
||||
" return deriv\n",
|
||||
"\n",
|
||||
"def dldphi1(phi, lambda_):\n",
|
||||
"def dregdphi1(phi, lambda_):\n",
|
||||
" # TODO compute the derivative with respect to phi1\n",
|
||||
" # Replace this line:]\n",
|
||||
" deriv = 0\n",
|
||||
@@ -475,8 +474,8 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"def compute_gradient2(data_x, data_y, phi, lambda_):\n",
|
||||
" dl_dphi0 = gabor_deriv_phi0(data_x, data_y, phi[0],phi[1])+dldphi0(np.squeeze(phi), lambda_)\n",
|
||||
" dl_dphi1 = gabor_deriv_phi1(data_x, data_y, phi[0],phi[1])+dldphi1(np.squeeze(phi), lambda_)\n",
|
||||
" dl_dphi0 = gabor_deriv_phi0(data_x, data_y, phi[0],phi[1])+dregdphi0(np.squeeze(phi), lambda_)\n",
|
||||
" dl_dphi1 = gabor_deriv_phi1(data_x, data_y, phi[0],phi[1])+dregdphi1(np.squeeze(phi), lambda_)\n",
|
||||
" # Return the gradient\n",
|
||||
" return np.array([[dl_dphi0],[dl_dphi1]])\n",
|
||||
"\n",
|
||||
|
||||
@@ -342,7 +342,7 @@
|
||||
"[\\mathbf{h}^*;1]\\biggr],\n",
|
||||
"\\end{align}\n",
|
||||
"\n",
|
||||
"where the notation $[\\mathbf{h}^{*T},1]$ is a row vector containing $\\mathbf{h}^{T}$ with a one appended to the end and $[\\mathbf{h};1 ]$ is a column vector containing $\\mathbf{h}$ with a one appended to the end.\n",
|
||||
"where the notation $[\\mathbf{h}^{*T},1]$ is a row vector containing $\\mathbf{h}^{*T}$ with a one appended to the end and $[\\mathbf{h}^{*};1 ]$ is a column vector containing $\\mathbf{h}^{*}$ with a one appended to the end.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To compute this, we reformulated the integrand using the relations from appendices C.3.3 and C.3.4 as the product of a normal distribution in $\\boldsymbol\\phi$ and a constant with respect\n",
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"authorship_tag": "ABX9TyMbSR8fzpXvO6TIQdO7bI0H",
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
@@ -71,9 +70,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def subsample(x_in):\n",
|
||||
"def downsample(x_in):\n",
|
||||
" x_out = np.zeros(( int(np.ceil(x_in.shape[0]/2)), int(np.ceil(x_in.shape[1]/2)) ))\n",
|
||||
" # TODO -- write the subsampling routine\n",
|
||||
" # TODO -- write the downsampling routine\n",
|
||||
" # Replace this line\n",
|
||||
" x_out = x_out\n",
|
||||
"\n",
|
||||
@@ -91,8 +90,8 @@
|
||||
"source": [
|
||||
"print(\"Original:\")\n",
|
||||
"print(orig_4_4)\n",
|
||||
"print(\"Subsampled:\")\n",
|
||||
"print(subsample(orig_4_4))"
|
||||
"print(\"Downsampled:\")\n",
|
||||
"print(downsample(orig_4_4))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "O_i0y72_JwGZ"
|
||||
@@ -127,24 +126,24 @@
|
||||
"image = Image.open('test_image.png')\n",
|
||||
"# convert image to numpy array\n",
|
||||
"data = asarray(image)\n",
|
||||
"data_subsample = subsample(data);\n",
|
||||
"data_downsample = downsample(data);\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(5,5))\n",
|
||||
"plt.imshow(data, cmap='gray')\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(5,5))\n",
|
||||
"plt.imshow(data_subsample, cmap='gray')\n",
|
||||
"plt.imshow(data_downsample, cmap='gray')\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"data_subsample2 = subsample(data_subsample)\n",
|
||||
"data_downsample2 = downsample(data_downsample)\n",
|
||||
"plt.figure(figsize=(5,5))\n",
|
||||
"plt.imshow(data_subsample2, cmap='gray')\n",
|
||||
"plt.imshow(data_downsample2, cmap='gray')\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"data_subsample3 = subsample(data_subsample2)\n",
|
||||
"data_downsample3 = downsample(data_downsample2)\n",
|
||||
"plt.figure(figsize=(5,5))\n",
|
||||
"plt.imshow(data_subsample3, cmap='gray')\n",
|
||||
"plt.imshow(data_downsample3, cmap='gray')\n",
|
||||
"plt.show()"
|
||||
],
|
||||
"metadata": {
|
||||
@@ -345,11 +344,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Let's re-upsample, sub-sampled rick\n",
|
||||
"data_duplicate = duplicate(data_subsample3);\n",
|
||||
"# Let's re-upsample, downsampled rick\n",
|
||||
"data_duplicate = duplicate(data_downsample3);\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(5,5))\n",
|
||||
"plt.imshow(data_subsample3, cmap='gray')\n",
|
||||
"plt.imshow(data_downsample3, cmap='gray')\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(5,5))\n",
|
||||
@@ -388,7 +387,7 @@
|
||||
"# The input x_high_res is the original high res image, from which you can deduce the position of the maximum index\n",
|
||||
"def max_unpool(x_in, x_high_res):\n",
|
||||
" x_out = np.zeros(( x_in.shape[0]*2, x_in.shape[1]*2 ))\n",
|
||||
" # TODO -- write the subsampling routine\n",
|
||||
" # TODO -- write the unpooling routine\n",
|
||||
" # Replace this line\n",
|
||||
" x_out = x_out\n",
|
||||
"\n",
|
||||
@@ -417,7 +416,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Let's re-upsample, sub-sampled rick\n",
|
||||
"# Let's re-upsample, down-sampled rick\n",
|
||||
"data_max_unpool= max_unpool(data_maxpool3,data_maxpool2);\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(5,5))\n",
|
||||
@@ -489,7 +488,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Let's re-upsample, sub-sampled rick\n",
|
||||
"# Let's re-upsample, down-sampled rick\n",
|
||||
"data_bilinear = bilinear(data_meanpool3);\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(5,5))\n",
|
||||
|
||||
@@ -1,26 +1,10 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"authorship_tag": "ABX9TyORZF8xy4X1yf4oRhRq8Rtm",
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "view-in-github",
|
||||
"colab_type": "text"
|
||||
"colab_type": "text",
|
||||
"id": "view-in-github"
|
||||
},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap10/10_5_Convolution_For_MNIST.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
@@ -28,6 +12,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "t9vk9Elugvmi"
|
||||
},
|
||||
"source": [
|
||||
"# **Notebook 10.5: Convolution for MNIST**\n",
|
||||
"\n",
|
||||
@@ -37,14 +24,18 @@
|
||||
"\n",
|
||||
"Work through the cells below, running each cell in turn. In various places you will see the words \"TODO\". Follow the instructions at these places and make predictions about what is going to happen or write code to complete the functions.\n",
|
||||
"\n",
|
||||
"If you are using Google Colab, you can change your runtime to an instance with GPU support to speed up training, e.g. a T4 GPU. If you do this, the cell below should output ``device(type='cuda')``\n",
|
||||
"\n",
|
||||
"Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "t9vk9Elugvmi"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "YrXWAH7sUWvU"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"import torchvision\n",
|
||||
@@ -52,16 +43,18 @@
|
||||
"import torch.nn.functional as F\n",
|
||||
"import torch.optim as optim\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import random"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "YrXWAH7sUWvU"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
"import random\n",
|
||||
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
||||
"device"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "wScBGXXFVadm"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Run this once to load the train and test data straight into a dataloader class\n",
|
||||
"# that will provide the batches\n",
|
||||
@@ -73,7 +66,7 @@
|
||||
"batch_size_train = 64\n",
|
||||
"batch_size_test = 1000\n",
|
||||
"\n",
|
||||
"# TODO Change this directory to point towards an existing directory\n",
|
||||
"# TODO Change this directory to point towards an existing directory (No change needed if using Google Colab)\n",
|
||||
"myDir = '/files/'\n",
|
||||
"\n",
|
||||
"train_loader = torch.utils.data.DataLoader(\n",
|
||||
@@ -93,15 +86,15 @@
|
||||
" (0.1307,), (0.3081,))\n",
|
||||
" ])),\n",
|
||||
" batch_size=batch_size_test, shuffle=True)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "wScBGXXFVadm"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "8bKADvLHbiV5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's draw some of the training data\n",
|
||||
"examples = enumerate(test_loader)\n",
|
||||
@@ -116,24 +109,24 @@
|
||||
" plt.xticks([])\n",
|
||||
" plt.yticks([])\n",
|
||||
"plt.show()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "8bKADvLHbiV5"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Define the network. This is a more typical way to define a network than the sequential structure. We define a class for the network, and define the parameters in the constructor. Then we use a function called forward to actually run the network. It's easy to see how you might use residual connections in this format."
|
||||
],
|
||||
"metadata": {
|
||||
"id": "_sFvRDGrl4qe"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Define the network. This is a more typical way to define a network than the sequential structure. We define a class for the network, and define the parameters in the constructor. Then we use a function called forward to actually run the network. It's easy to see how you might use residual connections in this format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "EQkvw2KOPVl7"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from os import X_OK\n",
|
||||
"# TODO Change this class to implement\n",
|
||||
@@ -174,52 +167,54 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "EQkvw2KOPVl7"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "qWZtkCZcU_dg"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# He initialization of weights\n",
|
||||
"def weights_init(layer_in):\n",
|
||||
" if isinstance(layer_in, nn.Linear):\n",
|
||||
" nn.init.kaiming_uniform_(layer_in.weight)\n",
|
||||
" layer_in.bias.data.fill_(0.0)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "qWZtkCZcU_dg"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "FslroPJJffrh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create network\n",
|
||||
"model = Net()\n",
|
||||
"model = Net().to(device)\n",
|
||||
"# Initialize model weights\n",
|
||||
"model.apply(weights_init)\n",
|
||||
"# Define optimizer\n",
|
||||
"optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "FslroPJJffrh"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "xKQd9PzkQ766"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Main training routine\n",
|
||||
"def train(epoch):\n",
|
||||
" model.train()\n",
|
||||
" # Get each\n",
|
||||
" for batch_idx, (data, target) in enumerate(train_loader):\n",
|
||||
" data = data.to(device)\n",
|
||||
" target = target.to(device)\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" output = model(data)\n",
|
||||
" loss = F.nll_loss(output, target)\n",
|
||||
@@ -229,15 +224,15 @@
|
||||
" if batch_idx % 10 == 0:\n",
|
||||
" print('Train Epoch: {} [{}/{}]\\tLoss: {:.6f}'.format(\n",
|
||||
" epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "xKQd9PzkQ766"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Byn-f7qWRLxX"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Run on test data\n",
|
||||
"def test():\n",
|
||||
@@ -246,6 +241,8 @@
|
||||
" correct = 0\n",
|
||||
" with torch.no_grad():\n",
|
||||
" for data, target in test_loader:\n",
|
||||
" data = data.to(device)\n",
|
||||
" target = target.to(device)\n",
|
||||
" output = model(data)\n",
|
||||
" test_loss += F.nll_loss(output, target, size_average=False).item()\n",
|
||||
" pred = output.data.max(1, keepdim=True)[1]\n",
|
||||
@@ -254,15 +251,15 @@
|
||||
" print('\\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
|
||||
" test_loss, correct, len(test_loader.dataset),\n",
|
||||
" 100. * correct / len(test_loader.dataset)))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Byn-f7qWRLxX"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "YgLaex1pfhqz"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get initial performance\n",
|
||||
"test()\n",
|
||||
@@ -271,15 +268,15 @@
|
||||
"for epoch in range(1, n_epochs + 1):\n",
|
||||
" train(epoch)\n",
|
||||
" test()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "YgLaex1pfhqz"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "o7fRUAy9Se1B"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Run network on data we got before and show predictions\n",
|
||||
"output = model(example_data)\n",
|
||||
@@ -294,12 +291,23 @@
|
||||
" plt.xticks([])\n",
|
||||
" plt.yticks([])\n",
|
||||
"plt.show()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "o7fRUAy9Se1B"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
]
|
||||
}
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"authorship_tag": "ABX9TyORZF8xy4X1yf4oRhRq8Rtm",
|
||||
"include_colab_link": true,
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -65,7 +65,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# K is width, D is number of hidden units in each layer\n",
|
||||
"# K is depth, D is number of hidden units in each layer\n",
|
||||
"def init_params(K, D):\n",
|
||||
" # Set seed so we always get the same random numbers\n",
|
||||
" np.random.seed(1)\n",
|
||||
|
||||
@@ -1,18 +1,16 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "view-in-github"
|
||||
"id": "view-in-github",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap17/17_1_Latent_Variable_Models.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "t9vk9Elugvmi"
|
||||
@@ -43,7 +41,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "IyVn-Gi-p7wf"
|
||||
@@ -79,7 +76,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "KB9FU34onW1j"
|
||||
@@ -145,7 +141,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "sQg2gKR5zMrF"
|
||||
@@ -223,7 +218,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "0X4NwixzqxtZ"
|
||||
@@ -254,7 +248,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "25xqXnmFo-PH"
|
||||
@@ -281,7 +274,7 @@
|
||||
"# We can't integrate this function in closed form\n",
|
||||
"# So let's approximate it as a sum over the z values (z = np.arange(-3,3,0.01))\n",
|
||||
"# You will need the functions get_likelihood() and get_prior()\n",
|
||||
"# To make this a valid probability distribution, you need to divide\n",
|
||||
"# To make this a valid probability distribution, you need to multiply\n",
|
||||
"# By the z-increment (0.01)\n",
|
||||
"# Replace this line\n",
|
||||
"pr_x1_x2 = np.zeros_like(x1_mesh)\n",
|
||||
@@ -292,7 +285,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "W264N7By_h9y"
|
||||
@@ -320,7 +312,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "D7N7oqLe-eJO"
|
||||
@@ -388,9 +379,8 @@
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"authorship_tag": "ABX9TyOSEQVqxE5KrXmsZVh9M3gq",
|
||||
"include_colab_link": true,
|
||||
"provenance": []
|
||||
"provenance": [],
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
|
||||
@@ -437,7 +437,7 @@
|
||||
" new_state = np.random.choice(a=np.arange(0,transition_probabilities_given_action.shape[0]),p = transition_probabilities_given_action[:,state,action])\n",
|
||||
" # Return the reward\n",
|
||||
" reward = reward_structure[new_state]\n",
|
||||
" is_terminal = new_state in [terminal_states]\n",
|
||||
" is_terminal = new_state in terminal_states\n",
|
||||
"\n",
|
||||
" return new_state, reward, action, is_terminal"
|
||||
]
|
||||
|
||||
@@ -265,7 +265,7 @@
|
||||
"\n",
|
||||
"In this icy environment the penguin is at one of the discrete cells in the gridworld. The agent starts each episode on a randomly chosen cell. The environment state dynamics are captured by the transition probabilities $Pr(s_{t+1} |s_t, a_t)$ where $s_t$ is the current state, $a_t$ is the action chosen, and $s_{t+1}$ is the next state at decision stage t. At each decision stage, the penguin can move in one of four directions: $a=0$ means try to go upward, $a=1$, right, $a=2$ down and $a=3$ left.\n",
|
||||
"\n",
|
||||
"However, the ice is slippery, so we don't always go the direction we want to: every time the agent chooses an action, with 0.25 probability, the environment changes the action taken to a differenct action, which is uniformly sampled from the other available actions.\n",
|
||||
"However, the ice is slippery, so we don't always go the direction we want to: every time the agent chooses an action, with 0.25 probability, the environment changes the action taken to a different action, which is uniformly sampled from the other available actions.\n",
|
||||
"\n",
|
||||
"The rewards are deterministic; the penguin will receive a reward of +3 if it reaches the fish, -2 if it slips into a hole and 0 otherwise.\n",
|
||||
"\n",
|
||||
@@ -470,7 +470,7 @@
|
||||
"\n",
|
||||
" # Return the reward -- here the reward is for arriving at the state\n",
|
||||
" reward = reward_structure[new_state]\n",
|
||||
" is_terminal = new_state in [terminal_states]\n",
|
||||
" is_terminal = new_state in terminal_states\n",
|
||||
"\n",
|
||||
" return new_state, reward, action, is_terminal"
|
||||
]
|
||||
|
||||
489
Trees/SAT_Construction.ipynb
Normal file
489
Trees/SAT_Construction.ipynb
Normal file
File diff suppressed because one or more lines are too long
271
Trees/SAT_Construction2.ipynb
Normal file
271
Trees/SAT_Construction2.ipynb
Normal file
File diff suppressed because one or more lines are too long
261
Trees/SAT_Construction2_Answers.ipynb
Normal file
261
Trees/SAT_Construction2_Answers.ipynb
Normal file
File diff suppressed because one or more lines are too long
570
Trees/SAT_Construction_Answers.ipynb
Normal file
570
Trees/SAT_Construction_Answers.ipynb
Normal file
File diff suppressed because one or more lines are too long
1061
Trees/SAT_Crossword.ipynb
Normal file
1061
Trees/SAT_Crossword.ipynb
Normal file
File diff suppressed because one or more lines are too long
911
Trees/SAT_Crossword_Answers.ipynb
Normal file
911
Trees/SAT_Crossword_Answers.ipynb
Normal file
File diff suppressed because one or more lines are too long
275
Trees/SAT_Graph_Coloring.ipynb
Normal file
275
Trees/SAT_Graph_Coloring.ipynb
Normal file
File diff suppressed because one or more lines are too long
279
Trees/SAT_Graph_Coloring_Answers.ipynb
Normal file
279
Trees/SAT_Graph_Coloring_Answers.ipynb
Normal file
File diff suppressed because one or more lines are too long
270
Trees/SAT_Sudoku.ipynb
Normal file
270
Trees/SAT_Sudoku.ipynb
Normal file
File diff suppressed because one or more lines are too long
433
Trees/SAT_Sudoku_Answers.ipynb
Normal file
433
Trees/SAT_Sudoku_Answers.ipynb
Normal file
File diff suppressed because one or more lines are too long
251
Trees/SAT_Tseitin.ipynb
Normal file
251
Trees/SAT_Tseitin.ipynb
Normal file
File diff suppressed because one or more lines are too long
310
Trees/SAT_Tseitin_Answers.ipynb
Normal file
310
Trees/SAT_Tseitin_Answers.ipynb
Normal file
File diff suppressed because one or more lines are too long
264
Trees/SAT_Z3.ipynb
Normal file
264
Trees/SAT_Z3.ipynb
Normal file
File diff suppressed because one or more lines are too long
335
Trees/SAT_Z3_Answers.ipynb
Normal file
335
Trees/SAT_Z3_Answers.ipynb
Normal file
File diff suppressed because one or more lines are too long
BIN
Trees/cb_2018_us_state_500k.zip
Normal file
BIN
Trees/cb_2018_us_state_500k.zip
Normal file
Binary file not shown.
Binary file not shown.
BIN
UDL_Errata.pdf
BIN
UDL_Errata.pdf
Binary file not shown.
429
notebooks/DeepNN/DeepNetworks_Answers.ipynb
Normal file
429
notebooks/DeepNN/DeepNetworks_Answers.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -33,45 +33,75 @@ const citation = `
|
||||
`;
|
||||
|
||||
const news = [
|
||||
{
|
||||
date: "01/23/25",
|
||||
content: (
|
||||
<HeroNewsItemContent>
|
||||
Added{" "}
|
||||
<UDLLink href="https://github.com/udlbook/udlbook/raw/main/understanding-deep-learning-final.bib">
|
||||
bibfile
|
||||
</UDLLink>{" "} for book and
|
||||
<UDLLink href="https://github.com/udlbook/udlbook/raw/main/UDL_Equations.tex">
|
||||
LaTeX
|
||||
</UDLLink>{" "}
|
||||
for all equations
|
||||
</HeroNewsItemContent>
|
||||
),
|
||||
},
|
||||
{
|
||||
date: "12/17/24",
|
||||
content: (
|
||||
<HeroNewsItemContent>
|
||||
{
|
||||
// date: "03/6/25",
|
||||
// content: (
|
||||
// <HeroNewsItemContent>
|
||||
// New {" "}
|
||||
// <UDLLink href="https://dl4ds.github.io/sp2025/lectures/">
|
||||
// slides and video lectures
|
||||
// </UDLLink>{" "}
|
||||
// that closely follow the book from Thomas Gardos of Boston University.
|
||||
// </HeroNewsItemContent>
|
||||
// ),
|
||||
},
|
||||
{
|
||||
date: "02/19/25",
|
||||
content: (
|
||||
<HeroNewsItemContent>
|
||||
Three new blogs {" "}
|
||||
<UDLLink href="https://rbcborealis.com/research-blogs/odes-and-sdes-for-machine-learning/">
|
||||
[1]
|
||||
</UDLLink>
|
||||
<UDLLink href="https://rbcborealis.com/research-blogs/introduction-ordinary-differential-equations/">
|
||||
[2]
|
||||
</UDLLink>
|
||||
<UDLLink href="https://rbcborealis.com/research-blogs/closed-form-solutions-for-odes/">
|
||||
[3]
|
||||
</UDLLink>{" "}
|
||||
on ODEs and SDEs in machine learning.
|
||||
</HeroNewsItemContent>
|
||||
),
|
||||
},
|
||||
{
|
||||
date: "01/23/25",
|
||||
content: (
|
||||
<HeroNewsItemContent>
|
||||
Added{" "}
|
||||
<UDLLink href="https://github.com/udlbook/udlbook/raw/main/understanding-deep-learning-final.bib">
|
||||
bibfile
|
||||
</UDLLink>{" "} for book and
|
||||
<UDLLink href="https://github.com/udlbook/udlbook/raw/main/UDL_Equations.tex">
|
||||
LaTeX
|
||||
</UDLLink>{" "}
|
||||
for all equations
|
||||
</HeroNewsItemContent>
|
||||
),
|
||||
},
|
||||
{
|
||||
date: "12/17/24",
|
||||
content: (
|
||||
<HeroNewsItemContent>
|
||||
|
||||
<UDLLink href="https://www.youtube.com/playlist?list=PLRdABJkXXytCz19PsZ1PCQBKoZGV069k3">
|
||||
Video lectures
|
||||
</UDLLink>{" "}
|
||||
for chapters 1-12 from Tamer Elsayed of Qatar University.
|
||||
</HeroNewsItemContent>
|
||||
),
|
||||
},
|
||||
{
|
||||
date: "12/05/24",
|
||||
content: (
|
||||
<HeroNewsItemContent>
|
||||
New{" "}
|
||||
<UDLLink href="https://rbcborealis.com/research-blogs/neural-network-gaussian-processes/">
|
||||
blog
|
||||
</UDLLink>{" "}
|
||||
on Neural network Gaussian processes
|
||||
</HeroNewsItemContent>
|
||||
),
|
||||
},
|
||||
<UDLLink href="https://www.youtube.com/playlist?list=PLRdABJkXXytCz19PsZ1PCQBKoZGV069k3">
|
||||
Video lectures
|
||||
</UDLLink>{" "}
|
||||
for chapters 1-12 from Tamer Elsayed of Qatar University.
|
||||
</HeroNewsItemContent>
|
||||
),
|
||||
},
|
||||
{
|
||||
date: "12/05/24",
|
||||
content: (
|
||||
<HeroNewsItemContent>
|
||||
New{" "}
|
||||
<UDLLink href="https://rbcborealis.com/research-blogs/neural-network-gaussian-processes/">
|
||||
blog
|
||||
</UDLLink>{" "}
|
||||
on Neural network Gaussian processes
|
||||
</HeroNewsItemContent>
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
date: "11/14/24",
|
||||
@@ -272,8 +302,8 @@ export default function HeroSection() {
|
||||
<HeroImgWrap>
|
||||
<Img src={img} alt="Book Cover" />
|
||||
</HeroImgWrap>
|
||||
<HeroLink href="https://github.com/udlbook/udlbook/releases/download/v5.00/UnderstandingDeepLearning_11_21_24_C.pdf">
|
||||
Download full PDF (21 November 2024)
|
||||
<HeroLink href="https://github.com/udlbook/udlbook/releases/download/v5.0.2/UnderstandingDeepLearning_05_29_25_C.pdf">
|
||||
Download full PDF (29 May 2025)
|
||||
</HeroLink>
|
||||
<br />
|
||||
<HeroDownloadsImg
|
||||
|
||||
@@ -69,23 +69,7 @@ export default function MediaSection() {
|
||||
</VideoFrame>
|
||||
</Column1>
|
||||
<Column2>
|
||||
Deeper insights podcast
|
||||
<VideoFrame>
|
||||
<iframe
|
||||
width="100%"
|
||||
height="100%"
|
||||
src="https://www.youtube.com/embed/nQf4o9TDSHI?si=uMk66zLD7uhuSnQ1&controls=0"
|
||||
title="YouTube video player"
|
||||
frameBorder="2"
|
||||
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
||||
allowfullscreen
|
||||
></iframe>
|
||||
</VideoFrame>
|
||||
</Column2>
|
||||
</MediaRow>
|
||||
<MediaRow2>
|
||||
<Column1>
|
||||
<TopLine>Reviews</TopLine>
|
||||
<TopLine>Reviews</TopLine>
|
||||
<MediaContent>
|
||||
{/* TODO: add dynamic rendering for reviews */}
|
||||
<ul>
|
||||
@@ -135,8 +119,6 @@ export default function MediaSection() {
|
||||
</li>
|
||||
</ul>
|
||||
</MediaContent>
|
||||
</Column1>
|
||||
<Column2>
|
||||
<TopLine>Interviews</TopLine>
|
||||
<MediaContent>
|
||||
<ul>
|
||||
@@ -151,11 +133,16 @@ export default function MediaSection() {
|
||||
</ul>
|
||||
</MediaContent>
|
||||
<TopLine>Video lectures</TopLine>
|
||||
<MediaLink href="https://www.youtube.com/playlist?list=PLRdABJkXXytCz19PsZ1PCQBKoZGV069k3">
|
||||
Video lectures
|
||||
</MediaLink>{" "} for chapter 1-12 from Tamer Elsayed
|
||||
<ul>
|
||||
<li>
|
||||
<MediaLink href="https://www.youtube.com/playlist?list=PLRdABJkXXytCz19PsZ1PCQBKoZGV069k3">
|
||||
Video lectures
|
||||
</MediaLink>{" "} for chapters 1-12 from Tamer Elsayed
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</Column2>
|
||||
</MediaRow2>
|
||||
</MediaRow>
|
||||
</MediaWrapper>
|
||||
</MediaContainer>
|
||||
</>
|
||||
|
||||
@@ -709,6 +709,50 @@ const responsibleAI = [
|
||||
},
|
||||
];
|
||||
|
||||
const ODESDE = [
|
||||
{
|
||||
text: "ODEs and SDEs in machine learning",
|
||||
link: "https://rbcborealis.com/research-blogs/odes-and-sdes-for-machine-learning/",
|
||||
details: [
|
||||
"ODEs",
|
||||
"SDEs",
|
||||
"ODEs and gradient descent",
|
||||
"SDEs in stochastic gradient descent",
|
||||
"ODEs in residual networks",
|
||||
"ODEs and SDES in diffusion models",
|
||||
"Physics-informed machine learning",
|
||||
],
|
||||
},
|
||||
{
|
||||
text: "Introduction to ODEs",
|
||||
link: "https://rbcborealis.com/research-blogs/introduction-ordinary-differential-equations/",
|
||||
details: [
|
||||
"What are ODEs?",
|
||||
"Terminology and properties",
|
||||
"Solutions",
|
||||
"Boundary conditions",
|
||||
"Existence of solutions",
|
||||
],
|
||||
},
|
||||
{
|
||||
text: "Closed-form solutions for ODEs",
|
||||
link: "https://rbcborealis.com/research-blogs/closed-form-solutions-for-odes/",
|
||||
details: [
|
||||
"Validating proposed solutions",
|
||||
"Class 1: Right-hand side is a function of t only",
|
||||
"Class 2: Linear homogeneous",
|
||||
"Class 3: right-hand side is function of x alone",
|
||||
"Class 4: Right-hand side is a separable function of x and t",
|
||||
"Class 5: Exact ODEs",
|
||||
"Class 6: linear inhomogeneous ODEs",
|
||||
"Class 7: Euler homogeneous",
|
||||
"Vector ODEs",
|
||||
"The matrix exponential"
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
export default function MoreSection() {
|
||||
return (
|
||||
<>
|
||||
@@ -859,6 +903,23 @@ export default function MoreSection() {
|
||||
</li>
|
||||
))}
|
||||
</MoreOuterList>
|
||||
<TopLine>ODEs and SDEs in machine learning</TopLine>
|
||||
<MoreOuterList>
|
||||
{ODESDE.map((item, index) => (
|
||||
<li key={index}>
|
||||
<MoreLink href={item.link} target="_blank" rel="noreferrer">
|
||||
{item.text}
|
||||
</MoreLink>
|
||||
<MoreInnerP>
|
||||
<MoreInnerList>
|
||||
{item.details.map((detail, index) => (
|
||||
<li key={index}>{detail}</li>
|
||||
))}
|
||||
</MoreInnerList>
|
||||
</MoreInnerP>
|
||||
</li>
|
||||
))}
|
||||
</MoreOuterList>
|
||||
</Column1>
|
||||
|
||||
<Column2>
|
||||
|
||||
Reference in New Issue
Block a user