Files
udlbook/CM20315_Deep2.ipynb
2022-10-18 11:50:10 +01:00

317 lines
12 KiB
Plaintext

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyP87B9tfgXpVQdlQBUGw4mg",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Deep2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# **Deep neural networks #2**\n",
"\n",
"In this notebook, we'll investigate converting neural networks to matrix form."
],
"metadata": {
"id": "MaKn8CFlzN8E"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8ClURpZQzI6L"
},
"outputs": [],
"source": [
"# Imports math library\n",
"import numpy as np\n",
"# Imports plotting library\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"source": [
"# Define the Rectified Linear Unit (ReLU) function\n",
"def ReLU(preactivation):\n",
" activation = preactivation.clip(0.0)\n",
" return activation"
],
"metadata": {
"id": "YdmveeAUz4YG"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Define a shallow neural network with, one input, one output, and three hidden units\n",
"def shallow_1_1_3(x, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31):\n",
" # Initial lines\n",
" pre_1 = theta_10 + theta_11 * x\n",
" pre_2 = theta_20 + theta_21 * x\n",
" pre_3 = theta_30 + theta_31 * x\n",
" # Activation functions\n",
" act_1 = activation_fn(pre_1)\n",
" act_2 = activation_fn(pre_2)\n",
" act_3 = activation_fn(pre_3)\n",
" # Weight activations\n",
" w_act_1 = phi_1 * act_1\n",
" w_act_2 = phi_2 * act_2\n",
" w_act_3 = phi_3 * act_3\n",
" # Combine weighted activation and add y offset\n",
" y = phi_0 + w_act_1 + w_act_2 + w_act_3\n",
" # Return everything we have calculated\n",
" return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3"
],
"metadata": {
"id": "ximCLwIfz8kj"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# # Plot the shallow neural network. We'll assume input in is range [-1,1] and output [-1,1]\n",
"def plot_neural(x, y):\n",
" fig, ax = plt.subplots()\n",
" ax.plot(x.T,y.T)\n",
" ax.set_xlabel('Input'); ax.set_ylabel('Output')\n",
" ax.set_xlim([-1,1]);ax.set_ylim([-1,1])\n",
" ax.set_aspect(1.0)\n",
" plt.show()"
],
"metadata": {
"id": "btrt7BX20gKD"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Let's define a networks. We'll just consider the inputs and outputs over the range [-1,1]. If you set the \"plot_all\" flat to True, you can see the details of how it was created."
],
"metadata": {
"id": "LxBJCObC-NTY"
}
},
{
"cell_type": "code",
"source": [
"# Now lets define some parameters and run the first neural network\n",
"n1_theta_10 = 0.0 ; n1_theta_11 = -1.0\n",
"n1_theta_20 = 0 ; n1_theta_21 = 1.0\n",
"n1_theta_30 = -0.67 ; n1_theta_31 = 1.0\n",
"n1_phi_0 = 1.0; n1_phi_1 = -2.0; n1_phi_2 = -3.0; n1_phi_3 = 9.3\n",
"\n",
"# Define a range of input values\n",
"n1_in = np.arange(-1,1,0.01).reshape([1,-1])\n",
"\n",
"# We run the neural network for each of these input values\n",
"n1_out, *_ = shallow_1_1_3(n1_in, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
"# And then plot it\n",
"plot_neural(n1_in, n1_out)"
],
"metadata": {
"id": "JRebvurv22pT"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Now we'll define the same neural network, but this time, we will use matrix form. When you get this right, it will draw the same plot as above."
],
"metadata": {
"id": "XCJqo_AjfAra"
}
},
{
"cell_type": "code",
"source": [
"beta_0 = np.zeros((3,1))\n",
"Omega_0 = np.zeros((3,1))\n",
"beta_1 = np.zeros((1,1))\n",
"Omega_1 = np.zeros((1,3))\n",
"\n",
"# TODO Fill in the values of the beta and Omega matrices with the n1_theta and n1_phi parameters that define the network above\n",
"# !!! NOTE THAT MATRICES ARE CONVENTIONALLY INDEXED WITH a_11 IN THE TOP LEFT CORNER, BUT NDARRAYS START AT [0,0]\n",
"# To get you started I've filled in a couple:\n",
"beta_0[0,0] = n1_theta_10\n",
"Omega_0[0,0] = n1_theta_11\n",
"\n",
"\n",
"# Make sure that input data matrix has different inputs in its columns\n",
"n_data = n1_in.size\n",
"n_dim_in = 1\n",
"n1_in_mat = np.reshape(n1_in,(n_dim_in,n_data))\n",
"\n",
"# This runs the network for ALL of the inputs, x at once so we can draw graph\n",
"h1 = ReLU(np.matmul(beta_0,np.ones((1,n_data))) + np.matmul(Omega_0,n1_in_mat))\n",
"n1_out = np.matmul(beta_1,np.ones((1,n_data))) + np.matmul(Omega_1,h1)\n",
"\n",
"# Draw the network and check that it looks the same as the non-matrix case\n",
"plot_neural(n1_in, n1_out)"
],
"metadata": {
"id": "MR0AecZYfACR"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Now we'll feed the output of the first network into the second one."
],
"metadata": {
"id": "qOcj2Rof-o20"
}
},
{
"cell_type": "code",
"source": [
"# Now lets define some parameters and run the second neural network\n",
"n2_theta_10 = -0.6 ; n2_theta_11 = -1.0\n",
"n2_theta_20 = 0.2 ; n2_theta_21 = 1.0\n",
"n2_theta_30 = -0.5 ; n2_theta_31 = 1.0\n",
"n2_phi_0 = 0.5; n2_phi_1 = -1.0; n2_phi_2 = -1.5; n2_phi_3 = 2.0\n",
"\n",
"# Define a range of input values\n",
"n2_in = np.arange(-1,1,0.01)\n",
"\n",
"# We run the second neural network on the output of the first network\n",
"n2_out, *_ = \\\n",
" shallow_1_1_3(n1_out, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
"# And then plot it\n",
"plot_neural(n1_in, n2_out)"
],
"metadata": {
"id": "ZRjWu8i9239X"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"beta_0 = np.zeros((3,1))\n",
"Omega_0 = np.zeros((3,1))\n",
"beta_1 = np.zeros((3,1))\n",
"Omega_1 = np.zeros((3,3))\n",
"beta_2 = np.zeros((1,1))\n",
"Omega_2 = np.zeros((1,3))\n",
"\n",
"# TODO Fill in the values of the beta and Omega matrices for with the n1_theta, n1_phi, n2_theta, and n2_phi parameters \n",
"# that define the composition of the two networks above (see eqn 4.5 for Omega1 and beta1 albeit in different notation)\n",
"# !!! NOTE THAT MATRICES ARE CONVENTIONALLY INDEXED WITH a_11 IN THE TOP LEFT CORNER, BUT NDARRAYS START AT [0,0] SO EVERYTHING IS OFFSET\n",
"# To get you started I've filled in a few:\n",
"beta_0[0,0] = n1_theta_10\n",
"Omega_0[0,0] = n1_theta_11\n",
"beta_1[0,0] = n2_theta_10 + n2_theta_11 * n1_phi_0\n",
"Omega_1[0,0] = n2_theta_11 * n1_phi_1\n",
"\n",
"\n",
"\n",
"# Make sure that input data matrix has different inputs in its columns\n",
"n_data = n1_in.size\n",
"n_dim_in = 1\n",
"n1_in_mat = np.reshape(n1_in,(n_dim_in,n_data))\n",
"\n",
"# This runs the network for ALL of the inputs, x at once so we can draw graph (hence extra np.ones term)\n",
"h1 = ReLU(np.matmul(beta_0,np.ones((1,n_data))) + np.matmul(Omega_0,n1_in_mat))\n",
"h2 = ReLU(np.matmul(beta_1,np.ones((1,n_data))) + np.matmul(Omega_1,h1))\n",
"n1_out = np.matmul(beta_2,np.ones((1,n_data))) + np.matmul(Omega_2,h2)\n",
"\n",
"# Draw the network and check that it looks the same as the non-matrix version\n",
"plot_neural(n1_in, n1_out)"
],
"metadata": {
"id": "ZB2HTalOE40X"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Now let's make a deep network with 3 hidden layers. It will have d_i=4 inputs, d_1=5 neurons in the first layer, d_2=2 neurons in the second layer and d_3=4 neurons in the third layer, and d_o = 1 output. Consults figure 4.6 for guidance."
],
"metadata": {
"id": "0VANqxH2kyS4"
}
},
{
"cell_type": "code",
"source": [
"# define sizes\n",
"D_i=4; D_1=5; D_2=2; D_3=4; D_o=1\n",
"# We'll choose the inputs and parameters of this network randomly using np.random.normal\n",
"# For example, we'll set the input using\n",
"n_data = 10;\n",
"x = np.random.normal(size=(D_i, n_data))\n",
"# TODO initialize the parameters randomly but with the correct sizes\n",
"# Replace the lines below\n",
"beta_0 = np.random.normal(size=(1,1))\n",
"Omega_0 = np.random.normal(size=(1,1))\n",
"beta_1 = np.random.normal(size=(1,1))\n",
"Omega_1 = np.random.normal(size=(1,1))\n",
"beta_2 = np.random.normal(size=(1,1))\n",
"Omega_2 = np.random.normal(size=(1,1))\n",
"beta_3 = np.random.normal(size=(1,1))\n",
"Omega_3 = np.random.normal(size=(1,1))\n",
"\n",
"# If you set the above sizes to the correct values then, the following code will run \n",
"h1 = ReLU(np.matmul(beta_0,np.ones((1,n_data))) + np.matmul(Omega_0,x));\n",
"h2 = ReLU(np.matmul(beta_1,np.ones((1,n_data))) + np.matmul(Omega_1,h1));\n",
"h3 = ReLU(np.matmul(beta_2,np.ones((1,n_data))) + np.matmul(Omega_2,h2));\n",
"y = np.matmul(beta_3,np.ones((1,n_data))) + np.matmul(Omega_3,h3)\n",
"\n",
"if h1.shape[0] is not D_1 or h1.shape[1] is not n_data:\n",
" print(\"h1 is wrong shape\")\n",
"if h2.shape[0] is not D_2 or h1.shape[1] is not n_data:\n",
" print(\"h2 is wrong shape\")\n",
"if h3.shape[0] is not D_3 or h1.shape[1] is not n_data:\n",
" print(\"h3 is wrong shape\")\n",
"if y.shape[0] is not D_o or h1.shape[1] is not n_data:\n",
" print(\"Output is wrong shape\")\n",
"\n",
"# Print the inputs and outputs\n",
"print(x)\n",
"print(y)"
],
"metadata": {
"id": "RdBVAc_Rj22-"
},
"execution_count": null,
"outputs": []
}
]
}