293 lines
12 KiB
Plaintext
293 lines
12 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": [],
|
|
"include_colab_link": true
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "view-in-github",
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap03/3_2_Shallow_Networks_II.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"# **Notebook 3.2 -- Shallow neural networks II**\n",
|
|
"\n",
|
|
"The purpose of this notebook is to gain some familiarity with shallow neural networks with 2D inputs. It works through an example similar to figure 3.8 and experiments with different activation functions. <br><br>\n",
|
|
"\n",
|
|
"Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and write code to complete the functions. There are also questions interspersed in the text.\n",
|
|
"\n",
|
|
"Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n"
|
|
],
|
|
"metadata": {
|
|
"id": "1Z6LB4Ybn1oN"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "hAM55ZjSncOk"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Imports math library\n",
|
|
"import numpy as np\n",
|
|
"# Imports plotting library\n",
|
|
"import matplotlib.pyplot as plt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Code to draw 2D function -- read it so you know what is going on, but you don't have to change it\n",
|
|
"def draw_2D_function(ax, x1_mesh, x2_mesh, y):\n",
|
|
" pos = ax.contourf(x1_mesh, x2_mesh, y, levels=256 ,cmap = 'hot', vmin=-10,vmax=10.0)\n",
|
|
" ax.set_xlabel('x1');ax.set_ylabel('x2')\n",
|
|
" levels = np.arange(-10,10,1.0)\n",
|
|
" ax.contour(x1_mesh, x2_mesh, y, levels, cmap='winter')\n",
|
|
"\n",
|
|
"# Plot the shallow neural network. We'll assume input in is range [0,10],[0,10] and output [-10,10]\n",
|
|
"def plot_neural_2_inputs(x1,x2, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3):\n",
|
|
"\n",
|
|
" fig, ax = plt.subplots(3,3)\n",
|
|
" fig.set_size_inches(8.5, 8.5)\n",
|
|
" fig.tight_layout(pad=3.0)\n",
|
|
" draw_2D_function(ax[0,0], x1,x2,pre_1); ax[0,0].set_title('Preactivation')\n",
|
|
" draw_2D_function(ax[0,1], x1,x2,pre_2); ax[0,1].set_title('Preactivation')\n",
|
|
" draw_2D_function(ax[0,2], x1,x2,pre_3); ax[0,2].set_title('Preactivation')\n",
|
|
" draw_2D_function(ax[1,0], x1,x2,act_1); ax[1,0].set_title('Activation')\n",
|
|
" draw_2D_function(ax[1,1], x1,x2,act_2); ax[1,1].set_title('Activation')\n",
|
|
" draw_2D_function(ax[1,2], x1,x2,act_3); ax[1,2].set_title('Activation')\n",
|
|
" draw_2D_function(ax[2,0], x1,x2,w_act_1); ax[2,0].set_title('Weighted Act')\n",
|
|
" draw_2D_function(ax[2,1], x1,x2,w_act_2); ax[2,1].set_title('Weighted Act')\n",
|
|
" draw_2D_function(ax[2,2], x1,x2,w_act_3); ax[2,2].set_title('Weighted Act')\n",
|
|
" plt.show()\n",
|
|
"\n",
|
|
" fig, ax = plt.subplots()\n",
|
|
" draw_2D_function(ax,x1,x2,y)\n",
|
|
" ax.set_title('Network output, $y$')\n",
|
|
" ax.set_aspect(1.0)\n",
|
|
" plt.show()"
|
|
],
|
|
"metadata": {
|
|
"id": "IHtCP0t2HC4c"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Define the Rectified Linear Unit (ReLU) function\n",
|
|
"def ReLU(preactivation):\n",
|
|
" activation = preactivation.clip(0.0)\n",
|
|
" return activation"
|
|
],
|
|
"metadata": {
|
|
"id": "Lw71laEeJgKs"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Define a shallow neural network with, two input, one output, and three hidden units\n",
|
|
"def shallow_2_1_3(x1,x2, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11,\\\n",
|
|
" theta_12, theta_20, theta_21, theta_22, theta_30, theta_31, theta_32):\n",
|
|
" # TODO Replace the lines below to compute the three initial linear functions\n",
|
|
" # (figure 3.8a-c) from the theta parameters. These are the preactivations\n",
|
|
" pre_1 = np.zeros_like(x1)\n",
|
|
" pre_2 = np.zeros_like(x1)\n",
|
|
" pre_3 = np.zeros_like(x1)\n",
|
|
"\n",
|
|
" # Pass these through the ReLU function to compute the activations as in\n",
|
|
" # figure 3.8 d-f\n",
|
|
" act_1 = activation_fn(pre_1)\n",
|
|
" act_2 = activation_fn(pre_2)\n",
|
|
" act_3 = activation_fn(pre_3)\n",
|
|
"\n",
|
|
" # TODO Replace the code below to weight the activations using phi1, phi2 and phi3\n",
|
|
" # To create the equivalent of figure 3.8 g-i\n",
|
|
" w_act_1 = np.zeros_like(x1)\n",
|
|
" w_act_2 = np.zeros_like(x1)\n",
|
|
" w_act_3 = np.zeros_like(x1)\n",
|
|
"\n",
|
|
" # TODO Replace the code below to combing the weighted activations and add\n",
|
|
" # phi_0 to create the output as in figure 3.8j\n",
|
|
" y = np.zeros_like(x1)\n",
|
|
"\n",
|
|
" # Return everything we have calculated\n",
|
|
" return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3"
|
|
],
|
|
"metadata": {
|
|
"id": "VIZA8HywIjfl"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Now lets define some parameters and run the neural network\n",
|
|
"theta_10 = -4.0 ; theta_11 = 0.9; theta_12 = 0.0\n",
|
|
"theta_20 = 5.0 ; theta_21 = -0.9 ; theta_22 = -0.5\n",
|
|
"theta_30 = -7 ; theta_31 = 0.5; theta_32 = 0.9\n",
|
|
"phi_0 = 0.0; phi_1 = -2.0; phi_2 = 2.0; phi_3 = 1.5\n",
|
|
"\n",
|
|
"x1 = np.arange(0.0, 10.0, 0.1)\n",
|
|
"x2 = np.arange(0.0, 10.0, 0.1)\n",
|
|
"x1,x2 = np.meshgrid(x1,x2) # https://www.geeksforgeeks.org/numpy-meshgrid-function/\n",
|
|
"\n",
|
|
"# We run the neural network for each of these input values\n",
|
|
"y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
|
|
" shallow_2_1_3(x1,x2, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_12, theta_20, theta_21, theta_22, theta_30, theta_31, theta_32)\n",
|
|
"# And then plot it\n",
|
|
"plot_neural_2_inputs(x1,x2, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3)"
|
|
],
|
|
"metadata": {
|
|
"id": "51lvc9bfIrs4"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"How many different linear polytopes are made by this model? Identify each in the network output."
|
|
],
|
|
"metadata": {
|
|
"id": "j62IizIfMYZK"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"Now we'll extend this model to have two outputs $y_1$ and $y_2$, each of which can be visualized with a separate heatmap. You will now have sets of parameters $\\phi_{10}, \\phi_{11}, \\phi_{12}, \\phi_{13}$ and $\\phi_{20}, \\phi_{21}, \\phi_{22}, \\phi_{23}$ that correspond to each of these outputs."
|
|
],
|
|
"metadata": {
|
|
"id": "Xl6LcrUyM7Lh"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Plot the shallow neural network. We'll assume input in is range [0,10],[0,10] and output [-10,10]\n",
|
|
"def plot_neural_2_inputs_2_outputs(x1,x2, y1, y2, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_11, w_act_12, w_act_13, w_act_21, w_act_22, w_act_23):\n",
|
|
"\n",
|
|
" # Plot intermediate plots if flag set\n",
|
|
" fig, ax = plt.subplots(4,3)\n",
|
|
" fig.set_size_inches(8.5, 8.5)\n",
|
|
" fig.tight_layout(pad=3.0)\n",
|
|
" draw_2D_function(ax[0,0], x1,x2,pre_1); ax[0,0].set_title('Preactivation')\n",
|
|
" draw_2D_function(ax[0,1], x1,x2,pre_2); ax[0,1].set_title('Preactivation')\n",
|
|
" draw_2D_function(ax[0,2], x1,x2,pre_3); ax[0,2].set_title('Preactivation')\n",
|
|
" draw_2D_function(ax[1,0], x1,x2,act_1); ax[1,0].set_title('Activation')\n",
|
|
" draw_2D_function(ax[1,1], x1,x2,act_2); ax[1,1].set_title('Activation')\n",
|
|
" draw_2D_function(ax[1,2], x1,x2,act_3); ax[1,2].set_title('Activation')\n",
|
|
" draw_2D_function(ax[2,0], x1,x2,w_act_11); ax[2,0].set_title('Weighted Act 1')\n",
|
|
" draw_2D_function(ax[2,1], x1,x2,w_act_12); ax[2,1].set_title('Weighted Act 1')\n",
|
|
" draw_2D_function(ax[2,2], x1,x2,w_act_13); ax[2,2].set_title('Weighted Act 1')\n",
|
|
" draw_2D_function(ax[3,0], x1,x2,w_act_21); ax[3,0].set_title('Weighted Act 2')\n",
|
|
" draw_2D_function(ax[3,1], x1,x2,w_act_22); ax[3,1].set_title('Weighted Act 2')\n",
|
|
" draw_2D_function(ax[3,2], x1,x2,w_act_23); ax[3,2].set_title('Weighted Act 2')\n",
|
|
" plt.show()\n",
|
|
"\n",
|
|
" fig, ax = plt.subplots()\n",
|
|
" draw_2D_function(ax,x1,x2,y1)\n",
|
|
" ax.set_title('Network output, $y_1$')\n",
|
|
" ax.set_aspect(1.0)\n",
|
|
" plt.show()\n",
|
|
"\n",
|
|
" fig, ax = plt.subplots()\n",
|
|
" draw_2D_function(ax,x1,x2,y2)\n",
|
|
" ax.set_title('Network output, $y_2$')\n",
|
|
" ax.set_aspect(1.0)\n",
|
|
" plt.show()"
|
|
],
|
|
"metadata": {
|
|
"id": "DlznqZWdPtjI"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"\n",
|
|
"# Define a shallow neural network with, two inputs, two outputs, and three hidden units\n",
|
|
"def shallow_2_2_3(x1,x2, activation_fn, phi_10,phi_11,phi_12,phi_13, phi_20,phi_21,phi_22,phi_23, theta_10, theta_11,\\\n",
|
|
" theta_12, theta_20, theta_21, theta_22, theta_30, theta_31, theta_32):\n",
|
|
"\n",
|
|
" # TODO -- write this function -- replace the dummy code below\n",
|
|
" pre_1 = np.zeros_like(x1)\n",
|
|
" pre_2 = np.zeros_like(x1)\n",
|
|
" pre_3 = np.zeros_like(x1)\n",
|
|
" act_1 = np.zeros_like(x1)\n",
|
|
" act_2 = np.zeros_like(x1)\n",
|
|
" act_3 = np.zeros_like(x1)\n",
|
|
" w_act_11 = np.zeros_like(x1)\n",
|
|
" w_act_12 = np.zeros_like(x1)\n",
|
|
" w_act_13 = np.zeros_like(x1)\n",
|
|
" w_act_21 = np.zeros_like(x1)\n",
|
|
" w_act_22 = np.zeros_like(x1)\n",
|
|
" w_act_23 = np.zeros_like(x1)\n",
|
|
" y1 = np.zeros_like(x1)\n",
|
|
" y2 = np.zeros_like(x1)\n",
|
|
"\n",
|
|
"\n",
|
|
" # Return everything we have calculated\n",
|
|
" return y1,y2, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_11, w_act_12, w_act_13, w_act_21, w_act_22, w_act_23\n"
|
|
],
|
|
"metadata": {
|
|
"id": "m8KAhwr4QWro"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Now lets define some parameters and run the neural network\n",
|
|
"theta_10 = -4.0 ; theta_11 = 0.9; theta_12 = 0.0\n",
|
|
"theta_20 = 5.0 ; theta_21 = -0.9 ; theta_22 = -0.5\n",
|
|
"theta_30 = -7 ; theta_31 = 0.5; theta_32 = 0.9\n",
|
|
"phi_10 = 0.0; phi_11 = -2.0; phi_12 = 2.0; phi_13 = 1.5\n",
|
|
"phi_20 = -2.0; phi_21 = -1.0; phi_22 = -2.0; phi_23 = 0.8\n",
|
|
"\n",
|
|
"x1 = np.arange(0.0, 10.0, 0.1)\n",
|
|
"x2 = np.arange(0.0, 10.0, 0.1)\n",
|
|
"x1,x2 = np.meshgrid(x1,x2) # https://www.geeksforgeeks.org/numpy-meshgrid-function/\n",
|
|
"\n",
|
|
"# We run the neural network for each of these input values\n",
|
|
"y1, y2, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_11, w_act_12, w_act_13, w_act_21, w_act_22, w_act_23 = \\\n",
|
|
" shallow_2_2_3(x1,x2, ReLU, phi_10,phi_11,phi_12,phi_13, phi_20,phi_21,phi_22,phi_23, theta_10, theta_11, theta_12, theta_20, theta_21, theta_22, theta_30, theta_31, theta_32)\n",
|
|
"# And then plot it\n",
|
|
"plot_neural_2_inputs_2_outputs(x1,x2, y1, y2, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_11, w_act_12, w_act_13, w_act_21, w_act_22, w_act_23)"
|
|
],
|
|
"metadata": {
|
|
"id": "ms4YTqbYUeRV"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
}
|
|
]
|
|
} |