Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9f2449fcde | ||
|
|
025b677457 | ||
|
|
435971e3e2 | ||
|
|
6e76cb9b96 | ||
|
|
732fc6f0b7 | ||
|
|
f2a3fab832 | ||
|
|
8e3008673d | ||
|
|
07bcc98a85 | ||
|
|
f4fa3e8397 | ||
|
|
21cff37c72 | ||
|
|
187c6a7352 | ||
|
|
8e4a0d4daf | ||
|
|
23b5affab3 | ||
|
|
4fb8ffe622 | ||
|
|
2adc1da566 | ||
|
|
6e4551a69f | ||
|
|
65c685706a | ||
|
|
934f5f7748 | ||
|
|
365cb41bba | ||
|
|
4855761fb2 | ||
|
|
37b4a76130 |
401
Blogs/BorealisGradientFlow.ipynb
Normal file
401
Blogs/BorealisGradientFlow.ipynb
Normal file
@@ -0,0 +1,401 @@
|
|||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": [],
|
||||||
|
"authorship_tag": "ABX9TyO6cFY1oR4CmbHL2QywgTXm",
|
||||||
|
"include_colab_link": true
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python3",
|
||||||
|
"display_name": "Python 3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "view-in-github",
|
||||||
|
"colab_type": "text"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Blogs/BorealisGradientFlow.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# Gradient flow\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook replicates some of the results in the the Borealis AI blog on gradient flow. \n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "ucrRRJ4dq8_d"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Import relevant libraries\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from scipy.linalg import expm\n",
|
||||||
|
"from matplotlib import cm\n",
|
||||||
|
"from matplotlib.colors import ListedColormap"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "_IQFHZEMZE8T"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Create the three data points that are used to train the linear model in the blog. Each input point is a column in $\\mathbf{X}$ and consists of the $x$ position in the plot and the value 1, which is used to allow the model to fit bias terms neatly."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "NwgUP3MSriiJ"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "cJNZ2VIcYsD8"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X = np.array([[0.2, 0.4, 0.8],[1,1,1]])\n",
|
||||||
|
"y = np.array([[-0.1],[0.15],[0.3]])\n",
|
||||||
|
"D = X.shape[0]\n",
|
||||||
|
"I = X.shape[1]\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"X=\\n\",X)\n",
|
||||||
|
"print(\"y=\\n\",y)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Draw the three data points\n",
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"ax.plot(X[0:1,:],y.T,'ro')\n",
|
||||||
|
"ax.set_xlim([0,1]); ax.set_ylim([-0.5,0.5])\n",
|
||||||
|
"ax.set_xlabel('x'); ax.set_ylabel('y')\n",
|
||||||
|
"plt.show()"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "FpFlD4nUZDRt"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Compute the evolution of the residuals, loss, and parameters as a function of time."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "H2LBR1DasQej"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Discretized time to evaluate quantities at\n",
|
||||||
|
"t_all = np.arange(0,20,0.01)\n",
|
||||||
|
"nT = t_all.shape[0]\n",
|
||||||
|
"\n",
|
||||||
|
"# Initial parameters, and initial function output at training points\n",
|
||||||
|
"phi_0 = np.array([[-0.05],[-0.4]])\n",
|
||||||
|
"f_0 = X.T @ phi_0\n",
|
||||||
|
"\n",
|
||||||
|
"# Precompute pseudoinverse term (not a very sensible numerical implementation, but it works...)\n",
|
||||||
|
"XXTInvX = np.linalg.inv(X@X.T)@X\n",
|
||||||
|
"\n",
|
||||||
|
"# Create arrays to hold function at data points over time, residual over time, parameters over time\n",
|
||||||
|
"f_all = np.zeros((I,nT))\n",
|
||||||
|
"f_minus_y_all = np.zeros((I,nT))\n",
|
||||||
|
"phi_t_all = np.zeros((D,nT))\n",
|
||||||
|
"\n",
|
||||||
|
"# For each time, compute function, residual, and parameters at each time.\n",
|
||||||
|
"for t in range(len(t_all)):\n",
|
||||||
|
" f = y + expm(-X.T@X * t_all[t]) @ (f_0-y)\n",
|
||||||
|
" f_all[:,t:t+1] = f\n",
|
||||||
|
" f_minus_y_all[:,t:t+1] = f-y\n",
|
||||||
|
" phi_t_all[:,t:t+1] = phi_0 - XXTInvX @ (np.identity(3)-expm(-X.T@X * t_all[t])) @ (f_0-y)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "wfF_oTS5Z4Wi"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Plot the results that were calculated in the previous cell"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "9jSjOOFutJUE"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Plot function at data points\n",
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"ax.plot(t_all,np.squeeze(f_all[0,:]),'r-', label='$f[x_{0},\\phi]$')\n",
|
||||||
|
"ax.plot(t_all,np.squeeze(f_all[1,:]),'g-', label='$f[x_{1},\\phi]$')\n",
|
||||||
|
"ax.plot(t_all,np.squeeze(f_all[2,:]),'b-', label='$f[x_{2},\\phi]$')\n",
|
||||||
|
"ax.set_xlim([0,np.max(t_all)]); ax.set_ylim([-0.5,0.5])\n",
|
||||||
|
"ax.set_xlabel('t'); ax.set_ylabel('f')\n",
|
||||||
|
"plt.legend(loc=\"lower right\")\n",
|
||||||
|
"plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"# Plot residual\n",
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"ax.plot(t_all,np.squeeze(f_minus_y_all[0,:]),'r-', label='$f[x_{0},\\phi]-y_{0}$')\n",
|
||||||
|
"ax.plot(t_all,np.squeeze(f_minus_y_all[1,:]),'g-', label='$f[x_{1},\\phi]-y_{1}$')\n",
|
||||||
|
"ax.plot(t_all,np.squeeze(f_minus_y_all[2,:]),'b-', label='$f[x_{2},\\phi]-y_{2}$')\n",
|
||||||
|
"ax.set_xlim([0,np.max(t_all)]); ax.set_ylim([-0.5,0.5])\n",
|
||||||
|
"ax.set_xlabel('t'); ax.set_ylabel('f-y')\n",
|
||||||
|
"plt.legend(loc=\"lower right\")\n",
|
||||||
|
"plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"# Plot loss (sum of residuals)\n",
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"square_error = 0.5 * np.sum(f_minus_y_all * f_minus_y_all, axis=0)\n",
|
||||||
|
"ax.plot(t_all, square_error,'k-')\n",
|
||||||
|
"ax.set_xlim([0,np.max(t_all)]); ax.set_ylim([-0.0,0.25])\n",
|
||||||
|
"ax.set_xlabel('t'); ax.set_ylabel('Loss')\n",
|
||||||
|
"plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"# Plot parameters\n",
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"ax.plot(t_all, np.squeeze(phi_t_all[0,:]),'c-',label='$\\phi_{0}$')\n",
|
||||||
|
"ax.plot(t_all, np.squeeze(phi_t_all[1,:]),'m-',label='$\\phi_{1}$')\n",
|
||||||
|
"ax.set_xlim([0,np.max(t_all)]); ax.set_ylim([-1,1])\n",
|
||||||
|
"ax.set_xlabel('t'); ax.set_ylabel('$\\phi$')\n",
|
||||||
|
"plt.legend(loc=\"lower right\")\n",
|
||||||
|
"plt.show()"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "G9IwgwKltHz5"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Define the model and the loss function"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "N6VaUq2swa8D"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Model is just a straight line with intercept phi[0] and slope phi[1]\n",
|
||||||
|
"def model(phi,x):\n",
|
||||||
|
" y_pred = phi[0]+phi[1] * x\n",
|
||||||
|
" return y_pred\n",
|
||||||
|
"\n",
|
||||||
|
"# Loss function is 0.5 times sum of squares of residuals for training data\n",
|
||||||
|
"def compute_loss(data_x, data_y, model, phi):\n",
|
||||||
|
" pred_y = model(phi, data_x)\n",
|
||||||
|
" loss = 0.5 * np.sum((pred_y-data_y)*(pred_y-data_y))\n",
|
||||||
|
" return loss"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "LGHEVUWWiB4f"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Draw the loss function"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "hr3hs7pKwo0g"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"def draw_loss_function(compute_loss, X, y, model, phi_iters):\n",
|
||||||
|
" # Define pretty colormap\n",
|
||||||
|
" my_colormap_vals_hex =('2a0902', '2b0a03', '2c0b04', '2d0c05', '2e0c06', '2f0d07', '300d08', '310e09', '320f0a', '330f0b', '34100b', '35110c', '36110d', '37120e', '38120f', '39130f', '3a1410', '3b1411', '3c1511', '3d1612', '3e1613', '3f1713', '401714', '411814', '421915', '431915', '451a16', '461b16', '471b17', '481c17', '491d18', '4a1d18', '4b1e19', '4c1f19', '4d1f1a', '4e201b', '50211b', '51211c', '52221c', '53231d', '54231d', '55241e', '56251e', '57261f', '58261f', '592720', '5b2821', '5c2821', '5d2922', '5e2a22', '5f2b23', '602b23', '612c24', '622d25', '632e25', '652e26', '662f26', '673027', '683027', '693128', '6a3229', '6b3329', '6c342a', '6d342a', '6f352b', '70362c', '71372c', '72372d', '73382e', '74392e', '753a2f', '763a2f', '773b30', '783c31', '7a3d31', '7b3e32', '7c3e33', '7d3f33', '7e4034', '7f4134', '804235', '814236', '824336', '834437', '854538', '864638', '874739', '88473a', '89483a', '8a493b', '8b4a3c', '8c4b3c', '8d4c3d', '8e4c3e', '8f4d3f', '904e3f', '924f40', '935041', '945141', '955242', '965343', '975343', '985444', '995545', '9a5646', '9b5746', '9c5847', '9d5948', '9e5a49', '9f5a49', 'a05b4a', 'a15c4b', 'a35d4b', 'a45e4c', 'a55f4d', 'a6604e', 'a7614e', 'a8624f', 'a96350', 'aa6451', 'ab6552', 'ac6552', 'ad6653', 'ae6754', 'af6855', 'b06955', 'b16a56', 'b26b57', 'b36c58', 'b46d59', 'b56e59', 'b66f5a', 'b7705b', 'b8715c', 'b9725d', 'ba735d', 'bb745e', 'bc755f', 'bd7660', 'be7761', 'bf7862', 'c07962', 'c17a63', 'c27b64', 'c27c65', 'c37d66', 'c47e67', 'c57f68', 'c68068', 'c78169', 'c8826a', 'c9836b', 'ca846c', 'cb856d', 'cc866e', 'cd876f', 'ce886f', 'ce8970', 'cf8a71', 'd08b72', 'd18c73', 'd28d74', 'd38e75', 'd48f76', 'd59077', 'd59178', 'd69279', 'd7937a', 'd8957b', 'd9967b', 'da977c', 'da987d', 'db997e', 'dc9a7f', 'dd9b80', 'de9c81', 'de9d82', 'df9e83', 'e09f84', 'e1a185', 'e2a286', 'e2a387', 'e3a488', 'e4a589', 'e5a68a', 'e5a78b', 'e6a88c', 'e7aa8d', 'e7ab8e', 'e8ac8f', 'e9ad90', 'eaae91', 'eaaf92', 'ebb093', 'ecb295', 'ecb396', 'edb497', 'eeb598', 'eeb699', 'efb79a', 'efb99b', 'f0ba9c', 'f1bb9d', 'f1bc9e', 'f2bd9f', 'f2bfa1', 'f3c0a2', 'f3c1a3', 'f4c2a4', 'f5c3a5', 'f5c5a6', 'f6c6a7', 'f6c7a8', 'f7c8aa', 'f7c9ab', 'f8cbac', 'f8ccad', 'f8cdae', 'f9ceb0', 'f9d0b1', 'fad1b2', 'fad2b3', 'fbd3b4', 'fbd5b6', 'fbd6b7', 'fcd7b8', 'fcd8b9', 'fcdaba', 'fddbbc', 'fddcbd', 'fddebe', 'fddfbf', 'fee0c1', 'fee1c2', 'fee3c3', 'fee4c5', 'ffe5c6', 'ffe7c7', 'ffe8c9', 'ffe9ca', 'ffebcb', 'ffeccd', 'ffedce', 'ffefcf', 'fff0d1', 'fff2d2', 'fff3d3', 'fff4d5', 'fff6d6', 'fff7d8', 'fff8d9', 'fffada', 'fffbdc', 'fffcdd', 'fffedf', 'ffffe0')\n",
|
||||||
|
" my_colormap_vals_dec = np.array([int(element,base=16) for element in my_colormap_vals_hex])\n",
|
||||||
|
" r = np.floor(my_colormap_vals_dec/(256*256))\n",
|
||||||
|
" g = np.floor((my_colormap_vals_dec - r *256 *256)/256)\n",
|
||||||
|
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
||||||
|
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
||||||
|
"\n",
|
||||||
|
" # Make grid of intercept/slope values to plot\n",
|
||||||
|
" intercepts_mesh, slopes_mesh = np.meshgrid(np.arange(-1.0,1.0,0.005), np.arange(-1.0,1.0,0.005))\n",
|
||||||
|
" loss_mesh = np.zeros_like(slopes_mesh)\n",
|
||||||
|
" # Compute loss for every set of parameters\n",
|
||||||
|
" for idslope, slope in np.ndenumerate(slopes_mesh):\n",
|
||||||
|
" loss_mesh[idslope] = compute_loss(X, y, model, np.array([[intercepts_mesh[idslope]], [slope]]))\n",
|
||||||
|
"\n",
|
||||||
|
" fig,ax = plt.subplots()\n",
|
||||||
|
" fig.set_size_inches(8,8)\n",
|
||||||
|
" ax.contourf(intercepts_mesh,slopes_mesh,loss_mesh,256,cmap=my_colormap)\n",
|
||||||
|
" ax.contour(intercepts_mesh,slopes_mesh,loss_mesh,40,colors=['#80808080'])\n",
|
||||||
|
" ax.set_ylim([1,-1]); ax.set_xlim([-1,1])\n",
|
||||||
|
"\n",
|
||||||
|
" ax.plot(phi_iters[1,:], phi_iters[0,:],'g-')\n",
|
||||||
|
" ax.set_xlabel('Intercept'); ax.set_ylabel('Slope')\n",
|
||||||
|
" plt.show()"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "UCxa3tZ8a9kz"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"draw_loss_function(compute_loss, X[0:1,:], y.T, model, phi_t_all)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "pXLLBaSaiI2A"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Draw the evolution of the function"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "ZsremHW-xFi5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"ax.plot(X[0:1,:],y.T,'ro')\n",
|
||||||
|
"x_vals = np.arange(0,1,0.001)\n",
|
||||||
|
"ax.plot(x_vals, phi_t_all[0,0]*x_vals + phi_t_all[1,0],'r-', label='t=0.00')\n",
|
||||||
|
"ax.plot(x_vals, phi_t_all[0,10]*x_vals + phi_t_all[1,10],'g-', label='t=0.10')\n",
|
||||||
|
"ax.plot(x_vals, phi_t_all[0,30]*x_vals + phi_t_all[1,30],'b-', label='t=0.30')\n",
|
||||||
|
"ax.plot(x_vals, phi_t_all[0,200]*x_vals + phi_t_all[1,200],'c-', label='t=2.00')\n",
|
||||||
|
"ax.plot(x_vals, phi_t_all[0,1999]*x_vals + phi_t_all[1,1999],'y-', label='t=20.0')\n",
|
||||||
|
"ax.set_xlim([0,1]); ax.set_ylim([-0.5,0.5])\n",
|
||||||
|
"ax.set_xlabel('x'); ax.set_ylabel('y')\n",
|
||||||
|
"plt.legend(loc=\"upper left\")\n",
|
||||||
|
"plt.show()"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "cv9ZrUoRkuhI"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Compute MAP and ML solutions\n",
|
||||||
|
"MLParams = np.linalg.inv(X@X.T)@X@y\n",
|
||||||
|
"sigma_sq_p = 3.0\n",
|
||||||
|
"sigma_sq = 0.05\n",
|
||||||
|
"MAPParams = np.linalg.inv(X@X.T+np.identity(X.shape[0])*sigma_sq/sigma_sq_p)@X@y"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "OU9oegSOof-o"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Finally, we predict both the mean and the uncertainty in the fitted model as a function of time"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "Ul__XvOgyYSA"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Define x positions to make predictions (appending a 1 to each column)\n",
|
||||||
|
"x_predict = np.arange(0,1,0.01)[None,:]\n",
|
||||||
|
"x_predict = np.concatenate((x_predict,np.ones_like(x_predict)))\n",
|
||||||
|
"nX = x_predict.shape[1]\n",
|
||||||
|
"\n",
|
||||||
|
"# Create variables to store evolution of mean and variance of prediction over time\n",
|
||||||
|
"predict_mean_all = np.zeros((nT,nX))\n",
|
||||||
|
"predict_var_all = np.zeros((nT,nX))\n",
|
||||||
|
"\n",
|
||||||
|
"# Initial covariance\n",
|
||||||
|
"sigma_sq_p = 2.0\n",
|
||||||
|
"cov_init = sigma_sq_p * np.identity(2)\n",
|
||||||
|
"\n",
|
||||||
|
"# Run through each time computing a and b and hence mean and variance of prediction\n",
|
||||||
|
"for t in range(len(t_all)):\n",
|
||||||
|
" a = x_predict.T @(XXTInvX @ (np.identity(3)-expm(-X.T@X * t_all[t])) @ y)\n",
|
||||||
|
" b = x_predict.T -x_predict.T@XXTInvX @ (np.identity(3)-expm(-X.T@X * t_all[t])) @ X.T\n",
|
||||||
|
" predict_mean_all[t:t+1,:] = a.T\n",
|
||||||
|
" predict_cov = b@ cov_init @b.T\n",
|
||||||
|
" # We just want the diagonal of the covariance to plot the uncertainty\n",
|
||||||
|
" predict_var_all[t:t+1,:] = np.reshape(np.diag(predict_cov),(1,nX))"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "aMPADCuByKWr"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Plot the mean and variance at various times"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "PZTj93KK7QH6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"def plot_mean_var(X,y,x_predict, predict_mean_all, predict_var_all, this_t, sigma_sq = 0.00001):\n",
|
||||||
|
" fig, ax = plt.subplots()\n",
|
||||||
|
" ax.plot(X[0:1,:],y.T,'ro')\n",
|
||||||
|
" ax.plot(x_predict[0:1,:].T, predict_mean_all[this_t:this_t+1,:].T,'r-')\n",
|
||||||
|
" lower = np.squeeze(predict_mean_all[this_t:this_t+1,:].T-np.sqrt(predict_var_all[this_t:this_t+1,:].T+np.sqrt(sigma_sq)))\n",
|
||||||
|
" upper = np.squeeze(predict_mean_all[this_t:this_t+1,:].T+np.sqrt(predict_var_all[this_t:this_t+1,:].T+np.sqrt(sigma_sq)))\n",
|
||||||
|
" ax.fill_between(np.squeeze(x_predict[0:1,:]), lower, upper, color='lightgray')\n",
|
||||||
|
" ax.set_xlim([0,1]); ax.set_ylim([-0.5,0.5])\n",
|
||||||
|
" ax.set_xlabel('x'); ax.set_ylabel('y')\n",
|
||||||
|
" plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"plot_mean_var(X,y,x_predict, predict_mean_all, predict_var_all, this_t=0)\n",
|
||||||
|
"plot_mean_var(X,y,x_predict, predict_mean_all, predict_var_all, this_t=40)\n",
|
||||||
|
"plot_mean_var(X,y,x_predict, predict_mean_all, predict_var_all, this_t=80)\n",
|
||||||
|
"plot_mean_var(X,y,x_predict, predict_mean_all, predict_var_all, this_t=200)\n",
|
||||||
|
"plot_mean_var(X,y,x_predict, predict_mean_all, predict_var_all, this_t=500)\n",
|
||||||
|
"plot_mean_var(X,y,x_predict, predict_mean_all, predict_var_all, this_t=1000)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "bYAFxgB880-v"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -185,7 +185,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"# Return probability under normal distribution for input x\n",
|
"# Return probability under normal distribution\n",
|
||||||
"def normal_distribution(y, mu, sigma):\n",
|
"def normal_distribution(y, mu, sigma):\n",
|
||||||
" # TODO-- write in the equation for the normal distribution\n",
|
" # TODO-- write in the equation for the normal distribution\n",
|
||||||
" # Equation 5.7 from the notes (you will need np.sqrt() and np.exp(), and math.pi)\n",
|
" # Equation 5.7 from the notes (you will need np.sqrt() and np.exp(), and math.pi)\n",
|
||||||
@@ -329,7 +329,7 @@
|
|||||||
"mu_pred = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
"mu_pred = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
||||||
"# Set the standard deviation to something reasonable\n",
|
"# Set the standard deviation to something reasonable\n",
|
||||||
"sigma = 0.2\n",
|
"sigma = 0.2\n",
|
||||||
"# Compute the log likelihood\n",
|
"# Compute the negative log likelihood\n",
|
||||||
"nll = compute_negative_log_likelihood(y_train, mu_pred, sigma)\n",
|
"nll = compute_negative_log_likelihood(y_train, mu_pred, sigma)\n",
|
||||||
"# Let's double check we get the right answer before proceeding\n",
|
"# Let's double check we get the right answer before proceeding\n",
|
||||||
"print(\"Correct answer = %9.9f, Your answer = %9.9f\"%(11.452419564,nll))"
|
"print(\"Correct answer = %9.9f, Your answer = %9.9f\"%(11.452419564,nll))"
|
||||||
@@ -388,7 +388,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"Now let's investigate finding the maximum likelihood / minimum log likelihood / least squares solution. For simplicity, we'll assume that all the parameters are correct except one and look at how the likelihood, log likelihood, and sum of squares change as we manipulate the last parameter. We'll start with overall y offset, beta_1 (formerly phi_0)"
|
"Now let's investigate finding the maximum likelihood / minimum negative log likelihood / least squares solution. For simplicity, we'll assume that all the parameters are correct except one and look at how the likelihood, negative log likelihood, and sum of squares change as we manipulate the last parameter. We'll start with overall y offset, beta_1 (formerly phi_0)"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "OgcRojvPWh4V"
|
"id": "OgcRojvPWh4V"
|
||||||
@@ -431,7 +431,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"# Now let's plot the likelihood, negative log likelihood, and least squares as a function the value of the offset beta1\n",
|
"# Now let's plot the likelihood, negative log likelihood, and least squares as a function of the value of the offset beta1\n",
|
||||||
"fig, ax = plt.subplots(1,2)\n",
|
"fig, ax = plt.subplots(1,2)\n",
|
||||||
"fig.set_size_inches(10.5, 5.5)\n",
|
"fig.set_size_inches(10.5, 5.5)\n",
|
||||||
"fig.tight_layout(pad=10.0)\n",
|
"fig.tight_layout(pad=10.0)\n",
|
||||||
@@ -530,7 +530,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"# Now let's plot the likelihood, negative log likelihood, and least squares as a function the value of the standard divation sigma\n",
|
"# Now let's plot the likelihood, negative log likelihood, and least squares as a function of the value of the standard deviation sigma\n",
|
||||||
"fig, ax = plt.subplots(1,2)\n",
|
"fig, ax = plt.subplots(1,2)\n",
|
||||||
"fig.set_size_inches(10.5, 5.5)\n",
|
"fig.set_size_inches(10.5, 5.5)\n",
|
||||||
"fig.tight_layout(pad=10.0)\n",
|
"fig.tight_layout(pad=10.0)\n",
|
||||||
@@ -581,7 +581,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"Obviously, to fit the full neural model we would vary all of the 10 parameters of the network in $\\boldsymbol\\beta_{0},\\boldsymbol\\omega_{0},\\boldsymbol\\beta_{1},\\boldsymbol\\omega_{1}$ (and maybe $\\sigma$) until we find the combination that have the maximum likelihood / minimum negative log likelihood / least squares.<br><br>\n",
|
"Obviously, to fit the full neural model we would vary all of the 10 parameters of the network in $\\boldsymbol\\beta_{0},\\boldsymbol\\Omega_{0},\\boldsymbol\\beta_{1},\\boldsymbol\\Omega_{1}$ (and maybe $\\sigma$) until we find the combination that have the maximum likelihood / minimum negative log likelihood / least squares.<br><br>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Here we just varied one at a time as it is easier to see what is going on. This is known as **coordinate descent**.\n"
|
"Here we just varied one at a time as it is easier to see what is going on. This is known as **coordinate descent**.\n"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -4,7 +4,6 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"provenance": [],
|
"provenance": [],
|
||||||
"authorship_tag": "ABX9TyOSb+W2AOFVQm8FZcHAb2Jq",
|
|
||||||
"include_colab_link": true
|
"include_colab_link": true
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -199,7 +198,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"The left is model output and the right is the model output after the sigmoid has been applied, so it now lies in the range [0,1] and represents the probability, that y=1. The black dots show the training data. We'll compute the the likelihood and the negative log likelihood."
|
"The left is model output and the right is the model output after the sigmoid has been applied, so it now lies in the range [0,1] and represents the probability, that y=1. The black dots show the training data. We'll compute the likelihood and the negative log likelihood."
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "MvVX6tl9AEXF"
|
"id": "MvVX6tl9AEXF"
|
||||||
@@ -208,7 +207,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"# Return probability under Bernoulli distribution for input x\n",
|
"# Return probability under Bernoulli distribution for observed class y\n",
|
||||||
"def bernoulli_distribution(y, lambda_param):\n",
|
"def bernoulli_distribution(y, lambda_param):\n",
|
||||||
" # TODO-- write in the equation for the Bernoulli distribution\n",
|
" # TODO-- write in the equation for the Bernoulli distribution\n",
|
||||||
" # Equation 5.17 from the notes (you will need np.power)\n",
|
" # Equation 5.17 from the notes (you will need np.power)\n",
|
||||||
@@ -269,7 +268,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Let's test this\n",
|
"# Let's test this\n",
|
||||||
"beta_0, omega_0, beta_1, omega_1 = get_parameters()\n",
|
"beta_0, omega_0, beta_1, omega_1 = get_parameters()\n",
|
||||||
"# Use our neural network to predict the mean of the Gaussian\n",
|
"# Use our neural network to predict the Bernoulli parameter lambda\n",
|
||||||
"model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
"model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
||||||
"lambda_train = sigmoid(model_out)\n",
|
"lambda_train = sigmoid(model_out)\n",
|
||||||
"# Compute the likelihood\n",
|
"# Compute the likelihood\n",
|
||||||
@@ -336,7 +335,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"Now let's investigate finding the maximum likelihood / minimum negative log likelihood solution. For simplicity, we'll assume that all the parameters are fixed except one and look at how the likelihood and log likelihood change as we manipulate the last parameter. We'll start with overall y_offset, beta_1 (formerly phi_0)"
|
"Now let's investigate finding the maximum likelihood / minimum negative log likelihood solution. For simplicity, we'll assume that all the parameters are fixed except one and look at how the likelihood and negative log likelihood change as we manipulate the last parameter. We'll start with overall y_offset, beta_1 (formerly phi_0)"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "OgcRojvPWh4V"
|
"id": "OgcRojvPWh4V"
|
||||||
@@ -359,7 +358,7 @@
|
|||||||
" # Run the network with new parameters\n",
|
" # Run the network with new parameters\n",
|
||||||
" model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
" model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
||||||
" lambda_train = sigmoid(model_out)\n",
|
" lambda_train = sigmoid(model_out)\n",
|
||||||
" # Compute and store the three values\n",
|
" # Compute and store the two values\n",
|
||||||
" likelihoods[count] = compute_likelihood(y_train,lambda_train)\n",
|
" likelihoods[count] = compute_likelihood(y_train,lambda_train)\n",
|
||||||
" nlls[count] = compute_negative_log_likelihood(y_train, lambda_train)\n",
|
" nlls[count] = compute_negative_log_likelihood(y_train, lambda_train)\n",
|
||||||
" # Draw the model for every 20th parameter setting\n",
|
" # Draw the model for every 20th parameter setting\n",
|
||||||
@@ -378,7 +377,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"# Now let's plot the likelihood, negative log likelihood, and least squares as a function the value of the offset beta1\n",
|
"# Now let's plot the likelihood and negative log likelihood as a function of the value of the offset beta1\n",
|
||||||
"fig, ax = plt.subplots()\n",
|
"fig, ax = plt.subplots()\n",
|
||||||
"fig.tight_layout(pad=5.0)\n",
|
"fig.tight_layout(pad=5.0)\n",
|
||||||
"likelihood_color = 'tab:red'\n",
|
"likelihood_color = 'tab:red'\n",
|
||||||
@@ -430,7 +429,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"They both give the same answer. But you can see from the likelihood above that the likelihood is very small unless the parameters are almost correct. So in practice, we would work with the negative log likelihood.<br><br>\n",
|
"They both give the same answer. But you can see from the likelihood above that the likelihood is very small unless the parameters are almost correct. So in practice, we would work with the negative log likelihood.<br><br>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Again, to fit the full neural model we would vary all of the 10 parameters of the network in the $\\boldsymbol\\beta_{0},\\boldsymbol\\omega_{0},\\boldsymbol\\beta_{1},\\boldsymbol\\omega_{1}$ until we find the combination that have the maximum likelihood / minimum negative log likelihood.<br><br>\n",
|
"Again, to fit the full neural model we would vary all of the 10 parameters of the network in the $\\boldsymbol\\beta_{0},\\boldsymbol\\Omega_{0},\\boldsymbol\\beta_{1},\\boldsymbol\\Omega_{1}$ until we find the combination that have the maximum likelihood / minimum negative log likelihood.<br><br>\n",
|
||||||
"\n"
|
"\n"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -438,4 +437,4 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1,18 +1,16 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"colab_type": "text",
|
"id": "view-in-github",
|
||||||
"id": "view-in-github"
|
"colab_type": "text"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap05/5_3_Multiclass_Cross_entropy_Loss.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap05/5_3_Multiclass_Cross_entropy_Loss.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "jSlFkICHwHQF"
|
"id": "jSlFkICHwHQF"
|
||||||
@@ -142,7 +140,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "PsgLZwsPxauP"
|
"id": "PsgLZwsPxauP"
|
||||||
@@ -209,13 +206,12 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "MvVX6tl9AEXF"
|
"id": "MvVX6tl9AEXF"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"The left is model output and the right is the model output after the softmax has been applied, so it now lies in the range [0,1] and represents the probability, that y=0 (red), 1 (green) and 2 (blue) The dots at the bottom show the training data with the same color scheme. So we want the red curve to be high where there are red dots, the green curve to be high where there are green dots, and the blue curve to be high where there are blue dots We'll compute the the likelihood and the negative log likelihood."
|
"The left is model output and the right is the model output after the softmax has been applied, so it now lies in the range [0,1] and represents the probability, that y=0 (red), 1 (green) and 2 (blue). The dots at the bottom show the training data with the same color scheme. So we want the red curve to be high where there are red dots, the green curve to be high where there are green dots, and the blue curve to be high where there are blue dots We'll compute the the likelihood and the negative log likelihood."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -226,7 +222,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Return probability under Categorical distribution for input x\n",
|
"# Return probability under categorical distribution for observed class y\n",
|
||||||
"# Just take value from row k of lambda param where y =k,\n",
|
"# Just take value from row k of lambda param where y =k,\n",
|
||||||
"def categorical_distribution(y, lambda_param):\n",
|
"def categorical_distribution(y, lambda_param):\n",
|
||||||
" return np.array([lambda_param[row, i] for i, row in enumerate (y)])"
|
" return np.array([lambda_param[row, i] for i, row in enumerate (y)])"
|
||||||
@@ -248,7 +244,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "R5z_0dzQMF35"
|
"id": "R5z_0dzQMF35"
|
||||||
@@ -286,7 +281,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Let's test this\n",
|
"# Let's test this\n",
|
||||||
"beta_0, omega_0, beta_1, omega_1 = get_parameters()\n",
|
"beta_0, omega_0, beta_1, omega_1 = get_parameters()\n",
|
||||||
"# Use our neural network to predict the mean of the Gaussian\n",
|
"# Use our neural network to predict the parameters of the categorical distribution\n",
|
||||||
"model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
"model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
||||||
"lambda_train = softmax(model_out)\n",
|
"lambda_train = softmax(model_out)\n",
|
||||||
"# Compute the likelihood\n",
|
"# Compute the likelihood\n",
|
||||||
@@ -296,7 +291,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "HzphKgPfOvlk"
|
"id": "HzphKgPfOvlk"
|
||||||
@@ -318,7 +312,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Return the negative log likelihood of the data under the model\n",
|
"# Return the negative log likelihood of the data under the model\n",
|
||||||
"def compute_negative_log_likelihood(y_train, lambda_param):\n",
|
"def compute_negative_log_likelihood(y_train, lambda_param):\n",
|
||||||
" # TODO -- compute the likelihood of the data -- don't use the likelihood function above -- compute the negative sum of the log probabilities\n",
|
" # TODO -- compute the negative log likelihood of the data -- don't use the likelihood function above -- compute the negative sum of the log probabilities\n",
|
||||||
" # You will need np.sum(), np.log()\n",
|
" # You will need np.sum(), np.log()\n",
|
||||||
" # Replace the line below\n",
|
" # Replace the line below\n",
|
||||||
" nll = 0\n",
|
" nll = 0\n",
|
||||||
@@ -336,24 +330,23 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Let's test this\n",
|
"# Let's test this\n",
|
||||||
"beta_0, omega_0, beta_1, omega_1 = get_parameters()\n",
|
"beta_0, omega_0, beta_1, omega_1 = get_parameters()\n",
|
||||||
"# Use our neural network to predict the mean of the Gaussian\n",
|
"# Use our neural network to predict the parameters of the categorical distribution\n",
|
||||||
"model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
"model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
||||||
"# Pass the outputs through the softmax function\n",
|
"# Pass the outputs through the softmax function\n",
|
||||||
"lambda_train = softmax(model_out)\n",
|
"lambda_train = softmax(model_out)\n",
|
||||||
"# Compute the log likelihood\n",
|
"# Compute the negative log likelihood\n",
|
||||||
"nll = compute_negative_log_likelihood(y_train, lambda_train)\n",
|
"nll = compute_negative_log_likelihood(y_train, lambda_train)\n",
|
||||||
"# Let's double check we get the right answer before proceeding\n",
|
"# Let's double check we get the right answer before proceeding\n",
|
||||||
"print(\"Correct answer = %9.9f, Your answer = %9.9f\"%(17.015457867,nll))"
|
"print(\"Correct answer = %9.9f, Your answer = %9.9f\"%(17.015457867,nll))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "OgcRojvPWh4V"
|
"id": "OgcRojvPWh4V"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"Now let's investigate finding the maximum likelihood / minimum log likelihood solution. For simplicity, we'll assume that all the parameters are fixed except one and look at how the likelihood and log likelihood change as we manipulate the last parameter. We'll start with overall y_offset, $\\beta_1$ (formerly $\\phi_0$)"
|
"Now let's investigate finding the maximum likelihood / minimum negative log likelihood solution. For simplicity, we'll assume that all the parameters are fixed except one and look at how the likelihood and negative log likelihood change as we manipulate the last parameter. We'll start with overall y_offset, $\\beta_1$ (formerly $\\phi_0$)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -378,7 +371,7 @@
|
|||||||
" # Run the network with new parameters\n",
|
" # Run the network with new parameters\n",
|
||||||
" model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
" model_out = shallow_nn(x_train, beta_0, omega_0, beta_1, omega_1)\n",
|
||||||
" lambda_train = softmax(model_out)\n",
|
" lambda_train = softmax(model_out)\n",
|
||||||
" # Compute and store the three values\n",
|
" # Compute and store the two values\n",
|
||||||
" likelihoods[count] = compute_likelihood(y_train,lambda_train)\n",
|
" likelihoods[count] = compute_likelihood(y_train,lambda_train)\n",
|
||||||
" nlls[count] = compute_negative_log_likelihood(y_train, lambda_train)\n",
|
" nlls[count] = compute_negative_log_likelihood(y_train, lambda_train)\n",
|
||||||
" # Draw the model for every 20th parameter setting\n",
|
" # Draw the model for every 20th parameter setting\n",
|
||||||
@@ -397,7 +390,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Now let's plot the likelihood, negative log likelihood, and least squares as a function the value of the offset beta1\n",
|
"# Now let's plot the likelihood and negative log likelihood as a function of the value of the offset beta1\n",
|
||||||
"fig, ax = plt.subplots()\n",
|
"fig, ax = plt.subplots()\n",
|
||||||
"fig.tight_layout(pad=5.0)\n",
|
"fig.tight_layout(pad=5.0)\n",
|
||||||
"likelihood_color = 'tab:red'\n",
|
"likelihood_color = 'tab:red'\n",
|
||||||
@@ -440,7 +433,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "771G8N1Vk5A2"
|
"id": "771G8N1Vk5A2"
|
||||||
@@ -448,16 +440,15 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"They both give the same answer. But you can see from the likelihood above that the likelihood is very small unless the parameters are almost correct. So in practice, we would work with the negative log likelihood.<br><br>\n",
|
"They both give the same answer. But you can see from the likelihood above that the likelihood is very small unless the parameters are almost correct. So in practice, we would work with the negative log likelihood.<br><br>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Again, to fit the full neural model we would vary all of the 16 parameters of the network in the $\\boldsymbol\\beta_{0},\\boldsymbol\\omega_{0},\\boldsymbol\\beta_{1},\\boldsymbol\\omega_{1}$ until we find the combination that have the maximum likelihood / minimum negative log likelihood.<br><br>\n",
|
"Again, to fit the full neural model we would vary all of the 16 parameters of the network in the $\\boldsymbol\\beta_{0},\\boldsymbol\\Omega_{0},\\boldsymbol\\beta_{1},\\boldsymbol\\Omega_{1}$ until we find the combination that have the maximum likelihood / minimum negative log likelihood.<br><br>\n",
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"authorship_tag": "ABX9TyOPv/l+ToaApJV7Nz+8AtpV",
|
"provenance": [],
|
||||||
"include_colab_link": true,
|
"include_colab_link": true
|
||||||
"provenance": []
|
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
@@ -469,4 +460,4 @@
|
|||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 0
|
"nbformat_minor": 0
|
||||||
}
|
}
|
||||||
@@ -113,7 +113,7 @@
|
|||||||
" b = 0.33\n",
|
" b = 0.33\n",
|
||||||
" c = 0.66\n",
|
" c = 0.66\n",
|
||||||
" d = 1.0\n",
|
" d = 1.0\n",
|
||||||
" n_iter =0;\n",
|
" n_iter = 0\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # While we haven't found the minimum closely enough\n",
|
" # While we haven't found the minimum closely enough\n",
|
||||||
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
||||||
@@ -131,8 +131,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" print('Iter %d, a=%3.3f, b=%3.3f, c=%3.3f, d=%3.3f'%(n_iter, a,b,c,d))\n",
|
" print('Iter %d, a=%3.3f, b=%3.3f, c=%3.3f, d=%3.3f'%(n_iter, a,b,c,d))\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #1 If the HEIGHT at point A is less the HEIGHT at points B, C, and D then halve values of B, C, and D\n",
|
" # Rule #1 If the HEIGHT at point A is less than the HEIGHT at points B, C, and D then halve values of B, C, and D\n",
|
||||||
" # i.e. bring them closer to the original point\n",
|
|
||||||
" # i.e. bring them closer to the original point\n",
|
" # i.e. bring them closer to the original point\n",
|
||||||
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
||||||
" if (0):\n",
|
" if (0):\n",
|
||||||
@@ -140,7 +139,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If the HEIGHT at point b is less than the HEIGHT at point c then\n",
|
" # Rule #2 If the HEIGHT at point b is less than the HEIGHT at point c then\n",
|
||||||
" # then point d becomes point c, and\n",
|
" # point d becomes point c, and\n",
|
||||||
" # point b becomes 1/3 between a and new d\n",
|
" # point b becomes 1/3 between a and new d\n",
|
||||||
" # point c becomes 2/3 between a and new d\n",
|
" # point c becomes 2/3 between a and new d\n",
|
||||||
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
||||||
@@ -148,7 +147,7 @@
|
|||||||
" continue;\n",
|
" continue;\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #3 If the HEIGHT at point c is less than the HEIGHT at point b then\n",
|
" # Rule #3 If the HEIGHT at point c is less than the HEIGHT at point b then\n",
|
||||||
" # then point a becomes point b, and\n",
|
" # point a becomes point b, and\n",
|
||||||
" # point b becomes 1/3 between new a and d\n",
|
" # point b becomes 1/3 between new a and d\n",
|
||||||
" # point c becomes 2/3 between new a and d\n",
|
" # point c becomes 2/3 between new a and d\n",
|
||||||
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
" # TODO REPLACE THE BLOCK OF CODE BELOW WITH THIS RULE\n",
|
||||||
@@ -190,4 +189,4 @@
|
|||||||
"outputs": []
|
"outputs": []
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -117,7 +117,7 @@
|
|||||||
"id": "QU5mdGvpTtEG"
|
"id": "QU5mdGvpTtEG"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"Now lets create compute the sum of squares loss for the training data"
|
"Now let's compute the sum of squares loss for the training data"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -317,7 +317,7 @@
|
|||||||
" b = 0.33 * max_dist\n",
|
" b = 0.33 * max_dist\n",
|
||||||
" c = 0.66 * max_dist\n",
|
" c = 0.66 * max_dist\n",
|
||||||
" d = 1.0 * max_dist\n",
|
" d = 1.0 * max_dist\n",
|
||||||
" n_iter =0;\n",
|
" n_iter = 0\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # While we haven't found the minimum closely enough\n",
|
" # While we haven't found the minimum closely enough\n",
|
||||||
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
||||||
@@ -341,7 +341,7 @@
|
|||||||
" continue;\n",
|
" continue;\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If point b is less than point c then\n",
|
" # Rule #2 If point b is less than point c then\n",
|
||||||
" # then point d becomes point c, and\n",
|
" # point d becomes point c, and\n",
|
||||||
" # point b becomes 1/3 between a and new d\n",
|
" # point b becomes 1/3 between a and new d\n",
|
||||||
" # point c becomes 2/3 between a and new d\n",
|
" # point c becomes 2/3 between a and new d\n",
|
||||||
" if lossb < lossc:\n",
|
" if lossb < lossc:\n",
|
||||||
@@ -351,7 +351,7 @@
|
|||||||
" continue\n",
|
" continue\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If point c is less than point b then\n",
|
" # Rule #2 If point c is less than point b then\n",
|
||||||
" # then point a becomes point b, and\n",
|
" # point a becomes point b, and\n",
|
||||||
" # point b becomes 1/3 between new a and d\n",
|
" # point b becomes 1/3 between new a and d\n",
|
||||||
" # point c becomes 2/3 between new a and d\n",
|
" # point c becomes 2/3 between new a and d\n",
|
||||||
" a = b\n",
|
" a = b\n",
|
||||||
|
|||||||
@@ -53,7 +53,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Let's create our training data 30 pairs {x_i, y_i}\n",
|
"# Let's create our training data of 30 pairs {x_i, y_i}\n",
|
||||||
"# We'll try to fit the Gabor model to these data\n",
|
"# We'll try to fit the Gabor model to these data\n",
|
||||||
"data = np.array([[-1.920e+00,-1.422e+01,1.490e+00,-1.940e+00,-2.389e+00,-5.090e+00,\n",
|
"data = np.array([[-1.920e+00,-1.422e+01,1.490e+00,-1.940e+00,-2.389e+00,-5.090e+00,\n",
|
||||||
" -8.861e+00,3.578e+00,-6.010e+00,-6.995e+00,3.634e+00,8.743e-01,\n",
|
" -8.861e+00,3.578e+00,-6.010e+00,-6.995e+00,3.634e+00,8.743e-01,\n",
|
||||||
@@ -128,7 +128,7 @@
|
|||||||
"id": "QU5mdGvpTtEG"
|
"id": "QU5mdGvpTtEG"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"Now lets create compute the sum of squares loss for the training data"
|
"Now let's compute the sum of squares loss for the training data"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -198,7 +198,7 @@
|
|||||||
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
||||||
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Make grid of intercept/slope values to plot\n",
|
" # Make grid of offset/frequency values to plot\n",
|
||||||
" offsets_mesh, freqs_mesh = np.meshgrid(np.arange(-10,10.0,0.1), np.arange(2.5,22.5,0.1))\n",
|
" offsets_mesh, freqs_mesh = np.meshgrid(np.arange(-10,10.0,0.1), np.arange(2.5,22.5,0.1))\n",
|
||||||
" loss_mesh = np.zeros_like(freqs_mesh)\n",
|
" loss_mesh = np.zeros_like(freqs_mesh)\n",
|
||||||
" # Compute loss for every set of parameters\n",
|
" # Compute loss for every set of parameters\n",
|
||||||
@@ -343,7 +343,7 @@
|
|||||||
" b = 0.33 * max_dist\n",
|
" b = 0.33 * max_dist\n",
|
||||||
" c = 0.66 * max_dist\n",
|
" c = 0.66 * max_dist\n",
|
||||||
" d = 1.0 * max_dist\n",
|
" d = 1.0 * max_dist\n",
|
||||||
" n_iter =0;\n",
|
" n_iter = 0\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # While we haven't found the minimum closely enough\n",
|
" # While we haven't found the minimum closely enough\n",
|
||||||
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
" while np.abs(b-c) > thresh and n_iter < max_iter:\n",
|
||||||
@@ -367,7 +367,7 @@
|
|||||||
" continue;\n",
|
" continue;\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If point b is less than point c then\n",
|
" # Rule #2 If point b is less than point c then\n",
|
||||||
" # then point d becomes point c, and\n",
|
" # point d becomes point c, and\n",
|
||||||
" # point b becomes 1/3 between a and new d\n",
|
" # point b becomes 1/3 between a and new d\n",
|
||||||
" # point c becomes 2/3 between a and new d\n",
|
" # point c becomes 2/3 between a and new d\n",
|
||||||
" if lossb < lossc:\n",
|
" if lossb < lossc:\n",
|
||||||
@@ -377,7 +377,7 @@
|
|||||||
" continue\n",
|
" continue\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Rule #2 If point c is less than point b then\n",
|
" # Rule #2 If point c is less than point b then\n",
|
||||||
" # then point a becomes point b, and\n",
|
" # point a becomes point b, and\n",
|
||||||
" # point b becomes 1/3 between new a and d\n",
|
" # point b becomes 1/3 between new a and d\n",
|
||||||
" # point c becomes 2/3 between new a and d\n",
|
" # point c becomes 2/3 between new a and d\n",
|
||||||
" a = b\n",
|
" a = b\n",
|
||||||
|
|||||||
@@ -61,7 +61,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"# Let's create our training data 30 pairs {x_i, y_i}\n",
|
"# Let's create our training data of 30 pairs {x_i, y_i}\n",
|
||||||
"# We'll try to fit the Gabor model to these data\n",
|
"# We'll try to fit the Gabor model to these data\n",
|
||||||
"data = np.array([[-1.920e+00,-1.422e+01,1.490e+00,-1.940e+00,-2.389e+00,-5.090e+00,\n",
|
"data = np.array([[-1.920e+00,-1.422e+01,1.490e+00,-1.940e+00,-2.389e+00,-5.090e+00,\n",
|
||||||
" -8.861e+00,3.578e+00,-6.010e+00,-6.995e+00,3.634e+00,8.743e-01,\n",
|
" -8.861e+00,3.578e+00,-6.010e+00,-6.995e+00,3.634e+00,8.743e-01,\n",
|
||||||
@@ -137,7 +137,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"Now lets compute the sum of squares loss for the training data and plot the loss function"
|
"Now let's compute the sum of squares loss for the training data and plot the loss function"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "QU5mdGvpTtEG"
|
"id": "QU5mdGvpTtEG"
|
||||||
@@ -160,7 +160,7 @@
|
|||||||
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
" b = np.floor(my_colormap_vals_dec - r * 256 *256 - g * 256)\n",
|
||||||
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
" my_colormap = ListedColormap(np.vstack((r,g,b)).transpose()/255.0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Make grid of intercept/slope values to plot\n",
|
" # Make grid of offset/frequency values to plot\n",
|
||||||
" offsets_mesh, freqs_mesh = np.meshgrid(np.arange(-10,10.0,0.1), np.arange(2.5,22.5,0.1))\n",
|
" offsets_mesh, freqs_mesh = np.meshgrid(np.arange(-10,10.0,0.1), np.arange(2.5,22.5,0.1))\n",
|
||||||
" loss_mesh = np.zeros_like(freqs_mesh)\n",
|
" loss_mesh = np.zeros_like(freqs_mesh)\n",
|
||||||
" # Compute loss for every set of parameters\n",
|
" # Compute loss for every set of parameters\n",
|
||||||
@@ -365,7 +365,6 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" # Update the parameters\n",
|
" # Update the parameters\n",
|
||||||
" phi_all[:,c_step+1:c_step+2] = phi_all[:,c_step:c_step+1] - alpha * momentum\n",
|
" phi_all[:,c_step+1:c_step+2] = phi_all[:,c_step:c_step+1] - alpha * momentum\n",
|
||||||
" # Measure loss and draw model every 8th step\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"loss = compute_loss(data[0,:], data[1,:], model, phi_all[:,c_step+1:c_step+2])\n",
|
"loss = compute_loss(data[0,:], data[1,:], model, phi_all[:,c_step+1:c_step+2])\n",
|
||||||
"draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n",
|
"draw_model(data,model,phi_all[:,c_step+1], \"Iteration %d, loss = %f\"%(c_step+1,loss))\n",
|
||||||
@@ -387,4 +386,4 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -110,7 +110,7 @@
|
|||||||
" ax.plot(opt_path[0,:], opt_path[1,:],'-', color='#a0d9d3ff')\n",
|
" ax.plot(opt_path[0,:], opt_path[1,:],'-', color='#a0d9d3ff')\n",
|
||||||
" ax.plot(opt_path[0,:], opt_path[1,:],'.', color='#a0d9d3ff',markersize=10)\n",
|
" ax.plot(opt_path[0,:], opt_path[1,:],'.', color='#a0d9d3ff',markersize=10)\n",
|
||||||
" ax.set_xlabel(\"$\\phi_{0}$\")\n",
|
" ax.set_xlabel(\"$\\phi_{0}$\")\n",
|
||||||
" ax.set_ylabel(\"$\\phi_1}$\")\n",
|
" ax.set_ylabel(\"$\\phi_{1}$\")\n",
|
||||||
" plt.show()"
|
" plt.show()"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -169,7 +169,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"Because the function changes much faster in $\\phi_1$ than in $\\phi_0$, there is no great step size to choose. If we set the step size so that it makes sensible progress in the $\\phi_1$, then it takes many iterations to converge. If we set the step size tso that we make sensible progress in the $\\phi_{0}$ direction, then the path oscillates in the $\\phi_1$ direction. \n",
|
"Because the function changes much faster in $\\phi_1$ than in $\\phi_0$, there is no great step size to choose. If we set the step size so that it makes sensible progress in the $\\phi_1$ direction, then it takes many iterations to converge. If we set the step size so that we make sensible progress in the $\\phi_{0}$ direction, then the path oscillates in the $\\phi_1$ direction. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"This motivates Adam. At the core of Adam is the idea that we should just determine which way is downhill along each axis (i.e. left/right for $\\phi_0$ or up/down for $\\phi_1$) and move a fixed distance in that direction."
|
"This motivates Adam. At the core of Adam is the idea that we should just determine which way is downhill along each axis (i.e. left/right for $\\phi_0$ or up/down for $\\phi_1$) and move a fixed distance in that direction."
|
||||||
],
|
],
|
||||||
@@ -285,4 +285,4 @@
|
|||||||
"outputs": []
|
"outputs": []
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -310,7 +310,7 @@
|
|||||||
"grad_path_tiny_lr = None ;\n",
|
"grad_path_tiny_lr = None ;\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# TODO: Run the gradient descent on the modified loss\n",
|
"# TODO: Run the gradient descent on the unmodified loss\n",
|
||||||
"# function with 100 steps and a very small learning rate alpha of 0.05\n",
|
"# function with 100 steps and a very small learning rate alpha of 0.05\n",
|
||||||
"# Replace this line:\n",
|
"# Replace this line:\n",
|
||||||
"grad_path_typical_lr = None ;\n",
|
"grad_path_typical_lr = None ;\n",
|
||||||
@@ -335,4 +335,4 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"provenance": [],
|
"provenance": [],
|
||||||
"authorship_tag": "ABX9TyOdSkjfQnSZXnffGsZVM7r5",
|
"authorship_tag": "ABX9TyO/wJ4N9w01f04mmrs/ZSHY",
|
||||||
"include_colab_link": true
|
"include_colab_link": true
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -185,10 +185,10 @@
|
|||||||
"np.set_printoptions(precision=3)\n",
|
"np.set_printoptions(precision=3)\n",
|
||||||
"output = graph_attention(X, omega, beta, phi, A);\n",
|
"output = graph_attention(X, omega, beta, phi, A);\n",
|
||||||
"print(\"Correct answer is:\")\n",
|
"print(\"Correct answer is:\")\n",
|
||||||
"print(\"[[1.796 1.346 0.569 1.703 1.298 1.224 1.24 1.234]\")\n",
|
"print(\"[[0. 0.028 0.37 0. 0.97 0. 0. 0.698]\")\n",
|
||||||
"print(\" [0.768 0.672 0. 0.529 3.841 4.749 5.376 4.761]\")\n",
|
"print(\" [0. 0. 0. 0. 1.184 0. 2.654 0. ]\")\n",
|
||||||
"print(\" [0.305 0.129 0. 0.341 0.785 1.014 1.113 1.024]\")\n",
|
"print(\" [1.13 0.564 0. 1.298 0.268 0. 0. 0.779]\")\n",
|
||||||
"print(\" [0. 0. 0. 0. 0.35 0.864 1.098 0.871]]]\")\n",
|
"print(\" [0.825 0. 0. 1.175 0. 0. 0. 0. ]]]\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Your answer is:\")\n",
|
"print(\"Your answer is:\")\n",
|
||||||
|
|||||||
BIN
UDL_Errata.pdf
BIN
UDL_Errata.pdf
Binary file not shown.
@@ -14,9 +14,9 @@
|
|||||||
<br>Published by MIT Press Dec 5th 2023.<br>
|
<br>Published by MIT Press Dec 5th 2023.<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li>
|
<li>
|
||||||
<p style="font-size: larger; margin-bottom: 0">Download draft PDF Chapters 1-21 <a
|
<p style="font-size: larger; margin-bottom: 0">Download full PDF <a
|
||||||
href="https://github.com/udlbook/udlbook/releases/download/v.1.18/UnderstandingDeepLearning_24_12_23_C.pdf">here</a>
|
href="https://github.com/udlbook/udlbook/releases/download/v2.00/UnderstandingDeepLearning_28_01_24_C.pdf">here</a>
|
||||||
</p>2023-12-24. CC-BY-NC-ND license<br>
|
</p>2024-01-28. CC-BY-NC-ND license<br>
|
||||||
<img src="https://img.shields.io/github/downloads/udlbook/udlbook/total" alt="download stats shield">
|
<img src="https://img.shields.io/github/downloads/udlbook/udlbook/total" alt="download stats shield">
|
||||||
</li>
|
</li>
|
||||||
<li> Order your copy from <a href="https://mitpress.mit.edu/9780262048644/understanding-deep-learning/">here </a></li>
|
<li> Order your copy from <a href="https://mitpress.mit.edu/9780262048644/understanding-deep-learning/">here </a></li>
|
||||||
|
|||||||
Reference in New Issue
Block a user