From b7432dcd93d71e744d53b0b5d7311776c1b5f7f3 Mon Sep 17 00:00:00 2001 From: udlbook <110402648+udlbook@users.noreply.github.com> Date: Mon, 17 Oct 2022 10:59:28 +0100 Subject: [PATCH] Created using Colaboratory --- CM20315_Deep.ipynb | 448 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 448 insertions(+) create mode 100644 CM20315_Deep.ipynb diff --git a/CM20315_Deep.ipynb b/CM20315_Deep.ipynb new file mode 100644 index 0000000..16aa52d --- /dev/null +++ b/CM20315_Deep.ipynb @@ -0,0 +1,448 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyOtP/O21RVLxAeEBIwV0aZt", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **Deep neural networks**\n", + "\n", + "In this notebook, we'll experiment with feeding one neural network into another as in figure 4.1 from the book." + ], + "metadata": { + "id": "MaKn8CFlzN8E" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8ClURpZQzI6L" + }, + "outputs": [], + "source": [ + "# Imports math library\n", + "import numpy as np\n", + "# Imports plotting library\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "source": [ + "# Define the Rectified Linear Unit (ReLU) function\n", + "def ReLU(preactivation):\n", + " activation = preactivation.clip(0.0)\n", + " return activation" + ], + "metadata": { + "id": "YdmveeAUz4YG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Define a shallow neural network with, one input, one output, and three hidden units\n", + "def shallow_1_1_3(x, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31):\n", + " # Initial lines\n", + " pre_1 = theta_10 + theta_11 * x\n", + " pre_2 = theta_20 + theta_21 * x\n", + " pre_3 = theta_30 + theta_31 * x\n", + " # Activation functions\n", + " act_1 = activation_fn(pre_1)\n", + " act_2 = activation_fn(pre_2)\n", + " act_3 = activation_fn(pre_3)\n", + " # Weight activations\n", + " w_act_1 = phi_1 * act_1\n", + " w_act_2 = phi_2 * act_2\n", + " w_act_3 = phi_3 * act_3\n", + " # Combine weighted activation and add y offset\n", + " y = phi_0 + w_act_1 + w_act_2 + w_act_3\n", + " # Return everything we have calculated\n", + " return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3" + ], + "metadata": { + "id": "ximCLwIfz8kj" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# # Plot the shallow neural network. We'll assume input in is range [-1,1] and output [-1,1]\n", + "# If the plot_all flag is set to true, then we'll plot all the intermediate stages as in Figure 3.3 \n", + "def plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=False, x_data=None, y_data=None):\n", + "\n", + " # Plot intermediate plots if flag set\n", + " if plot_all:\n", + " fig, ax = plt.subplots(3,3)\n", + " fig.set_size_inches(8.5, 8.5)\n", + " fig.tight_layout(pad=3.0)\n", + " ax[0,0].plot(x,pre_1,'r-'); ax[0,0].set_ylabel('Preactivation')\n", + " ax[0,1].plot(x,pre_2,'b-'); ax[0,1].set_ylabel('Preactivation')\n", + " ax[0,2].plot(x,pre_3,'g-'); ax[0,2].set_ylabel('Preactivation')\n", + " ax[1,0].plot(x,act_1,'r-'); ax[1,0].set_ylabel('Activation')\n", + " ax[1,1].plot(x,act_2,'b-'); ax[1,1].set_ylabel('Activation')\n", + " ax[1,2].plot(x,act_3,'g-'); ax[1,2].set_ylabel('Activation')\n", + " ax[2,0].plot(x,w_act_1,'r-'); ax[2,0].set_ylabel('Weighted Act')\n", + " ax[2,1].plot(x,w_act_2,'b-'); ax[2,1].set_ylabel('Weighted Act')\n", + " ax[2,2].plot(x,w_act_3,'g-'); ax[2,2].set_ylabel('Weighted Act')\n", + "\n", + " for plot_y in range(3):\n", + " for plot_x in range(3):\n", + " ax[plot_y,plot_x].set_xlim([-1,1]);ax[plot_x,plot_y].set_ylim([-1,1])\n", + " ax[plot_y,plot_x].set_aspect(1.0)\n", + " ax[2,plot_y].set_xlabel('Input, $x$');\n", + " plt.show()\n", + "\n", + " fig, ax = plt.subplots()\n", + " ax.plot(x,y)\n", + " ax.set_xlabel('Input'); ax.set_ylabel('Output')\n", + " ax.set_xlim([-1,1]);ax.set_ylim([-1,1])\n", + " ax.set_aspect(1.0)\n", + " if x_data is not None:\n", + " ax.plot(x_data, y_data, 'mo')\n", + " for i in range(len(x_data)):\n", + " ax.plot(x_data[i], y_data[i],)\n", + " plt.show()" + ], + "metadata": { + "id": "btrt7BX20gKD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Let's define two networks. We'll put the prefixes n1_ and n2_ before all the variables to make it clear which network is which. We'll just consider the inputs and outputs over the range [-1,1]. If you set the \"plot_all\" flat to True, you can see the details of how they were created." + ], + "metadata": { + "id": "LxBJCObC-NTY" + } + }, + { + "cell_type": "code", + "source": [ + "# Now lets define some parameters and run the first neural network\n", + "n1_theta_10 = 0.0 ; n1_theta_11 = -1.0\n", + "n1_theta_20 = 0 ; n1_theta_21 = 1.0\n", + "n1_theta_30 = -0.67 ; n1_theta_31 = 1.0\n", + "n1_phi_0 = 1.0; n1_phi_1 = -2.0; n1_phi_2 = -3.0; n1_phi_3 = 9.3\n", + "\n", + "# Define a range of input values\n", + "n1_in = np.arange(-1,1,0.01)\n", + "\n", + "# We run the neural network for each of these input values\n", + "n1_out, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n", + " shallow_1_1_3(n1_in, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n", + "# And then plot it\n", + "plot_neural(n1_in, n1_out, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=False)" + ], + "metadata": { + "id": "JRebvurv22pT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Now lets define some parameters and run the second neural network\n", + "n2_theta_10 = -0.6 ; n2_theta_11 = -1.0\n", + "n2_theta_20 = 0.2 ; n2_theta_21 = 1.0\n", + "n2_theta_30 = -0.5 ; n2_theta_31 = 1.0\n", + "n2_phi_0 = 0.5; n2_phi_1 = -1.0; n2_phi_2 = -1.5; n2_phi_3 = 2.0\n", + "\n", + "# Define a range of input values\n", + "n2_in = np.arange(-1,1,0.01)\n", + "\n", + "# We run the neural network for each of these input values\n", + "n2_out, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n", + " shallow_1_1_3(n2_in, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n", + "# And then plot it\n", + "plot_neural(n2_in, n2_out, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=False)" + ], + "metadata": { + "id": "ZRjWu8i9239X" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now we'll consider feeding output of the first network into the second one." + ], + "metadata": { + "id": "qOcj2Rof-o20" + } + }, + { + "cell_type": "code", + "source": [ + "# # Plot two shallow neural networks and the composition of the two \n", + "def plot_neural_two_components(x_in, net1_out, net2_out, net12_out=None):\n", + "\n", + " # Plot the two networks separately\n", + " fig, ax = plt.subplots(1,2)\n", + " fig.set_size_inches(8.5, 8.5)\n", + " fig.tight_layout(pad=3.0)\n", + " ax[0].plot(x_in, net1_out,'r-')\n", + " ax[0].set_xlabel('Net 1 input'); ax[0].set_ylabel('Net 1 output')\n", + " ax[0].set_xlim([-1,1]);ax[0].set_ylim([-1,1])\n", + " ax[0].set_aspect(1.0)\n", + " ax[1].plot(x_in, net2_out,'b-')\n", + " ax[1].set_xlabel('Net 2 input'); ax[1].set_ylabel('Net 2 output')\n", + " ax[1].set_xlim([-1,1]);ax[1].set_ylim([-1,1])\n", + " ax[1].set_aspect(1.0)\n", + " plt.show()\n", + "\n", + " if net12_out is not None:\n", + " # Plot their composition\n", + " fig, ax = plt.subplots()\n", + " ax.plot(x_in ,net12_out,'g-')\n", + " ax.set_xlabel('Net 1 Input'); ax.set_ylabel('Net 2 Output')\n", + " ax.set_xlim([-1,1]);ax.set_ylim([-1,1])\n", + " ax.set_aspect(1.0)\n", + " plt.show()" + ], + "metadata": { + "id": "ZB2HTalOE40X" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Display the two inputs\n", + "x = np.arange(-1,1,0.001)\n", + "# We run the first and second neural networks for each of these input values\n", + "net1_out, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n", + "net2_out, *_ = shallow_1_1_3(x, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n", + "# Plot both graphs\n", + "plot_neural_two_components(x, net1_out, net2_out)" + ], + "metadata": { + "id": "K6Tmecgu7uqt" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# TODO \n", + "# Take a piece of paper and draw what you think will happen when we feed the \n", + "# output of the first network into the second one. Draw the relationship between\n", + "# the input of the first network and the output of the second one." + ], + "metadata": { + "id": "NUQVop9-Xta1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Now let's see if your predictions were right \n", + "\n", + "# TODO feed the output of first network into second network (replace this line)\n", + "net12_out = np.zeros_like(x)\n", + "# Plot all three graphs\n", + "plot_neural_two_components(x, net1_out, net2_out, net12_out)" + ], + "metadata": { + "id": "Yq7GH-MCIyPI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Now we'll change things a up a bit. What happens if we change the second network? (note the *-1 change)\n", + "net1_out, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n", + "net2_out, *_ = shallow_1_1_3(x, ReLU, n2_phi_0, n2_phi_1*-1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n", + "plot_neural_two_components(x, net1_out, net2_out)" + ], + "metadata": { + "id": "BMlLkLbdEuPu" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# TODO \n", + "# Take a piece of paper and draw what you think will happen when we feed the \n", + "# output of the first network into the second one now that we have changed it. Draw the relationship between\n", + "# the input of the first network and the output of the second one." + ], + "metadata": { + "id": "Of6jVXLTJ688" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# When you have a prediction, run this code to see if you were right\n", + "net12_out, *_ = shallow_1_1_3(net1_out, ReLU, n2_phi_0, n2_phi_1*-1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n", + "plot_neural_two_components(x, net1_out, net2_out, net12_out)" + ], + "metadata": { + "id": "PbbSCaSeK6SM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's change things again. What happens if we change the firsrt network? (note the changes)\n", + "net1_out, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1*0.5, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n", + "net2_out, *_ = shallow_1_1_3(x, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n", + "plot_neural_two_components(x, net1_out, net2_out)" + ], + "metadata": { + "id": "b39mcSGFK9Fd" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# TODO \n", + "# Take a piece of paper and draw what you think will happen when we feed the \n", + "# output of the first network now we have changed it into the original second network. Draw the relationship between\n", + "# the input of the first network and the output of the second one." + ], + "metadata": { + "id": "MhO40cC_LW9I" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# When you have a prediction, run this code to see if you were right\n", + "net12_out, *_ = shallow_1_1_3(net1_out, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n", + "plot_neural_two_components(x, net1_out, net2_out, net12_out)" + ], + "metadata": { + "id": "Akwo-hnPLkNr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's change things again. What happens if the first network and second networks are the same?\n", + "net1_out, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n", + "net2_out_new, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n", + "plot_neural_two_components(x, net1_out, net2_out_new)" + ], + "metadata": { + "id": "TJ7wXKpRLl_E" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# TODO \n", + "# Take a piece of paper and draw what you think will happen when we feed the \n", + "# output of the first network into the original second network. Draw the relationship between\n", + "# the input of the first network and the output of the second one." + ], + "metadata": { + "id": "dJbbh6R7NG9k" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# When you have a prediction, run this code to see if you were right\n", + "net12_out, *_ = shallow_1_1_3(net1_out, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n", + "plot_neural_two_components(x, net1_out, net2_out_new, net12_out)" + ], + "metadata": { + "id": "BiZZl3yNM2Bq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# TODO \n", + "# Contemplate what you think will happen when we feed the \n", + "# output of the original first network into a second copy of the original first network, and then \n", + "# the output of that into the original second network (so now we have a three layer network)\n", + "# How many total linear regions will we have in the output? \n", + "net123_out, *_ = shallow_1_1_3(net12_out, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n", + "plot_neural_two_components(x, net12_out, net2_out, net123_out)" + ], + "metadata": { + "id": "BSd51AkzNf7-" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# TO DO\n", + "# How many linear regions would there be if we ran N copies of the first network, feeding the result of the first \n", + "# into the second, the second into the third and so on, and then passed the result into the original second\n", + "# network (blue curve above)\n", + "\n", + "# Take away conclusions: with very few parameters, we can make A LOT of linear regions, but\n", + "# they depend on one another in complex ways that quickly become to difficult to understand intuitively." + ], + "metadata": { + "id": "HqzePCLOVQK7" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file