udlbook/CM20315_Deep2.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyP87B9tfgXpVQdlQBUGw4mg",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Deep2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# **Deep neural networks #2**\n",
        "\n",
        "In this notebook, we'll investigate converting neural networks to matrix form."
      ],
      "metadata": {
        "id": "MaKn8CFlzN8E"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "8ClURpZQzI6L"
      },
      "outputs": [],
      "source": [
        "# Imports math library\n",
        "import numpy as np\n",
        "# Imports plotting library\n",
        "import matplotlib.pyplot as plt"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Define the Rectified Linear Unit (ReLU) function\n",
        "def ReLU(preactivation):\n",
        "  activation = preactivation.clip(0.0)\n",
        "  return activation"
      ],
      "metadata": {
        "id": "YdmveeAUz4YG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Define a shallow neural network with, one input, one output, and three hidden units\n",
        "def shallow_1_1_3(x, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31):\n",
        "  # Initial lines\n",
        "  pre_1 = theta_10 + theta_11 * x\n",
        "  pre_2 = theta_20 + theta_21 * x\n",
        "  pre_3 = theta_30 + theta_31 * x\n",
        "  # Activation functions\n",
        "  act_1 = activation_fn(pre_1)\n",
        "  act_2 = activation_fn(pre_2)\n",
        "  act_3 = activation_fn(pre_3)\n",
        "  # Weight activations\n",
        "  w_act_1 = phi_1 * act_1\n",
        "  w_act_2 = phi_2 * act_2\n",
        "  w_act_3 = phi_3 * act_3\n",
        "  # Combine weighted activation and add y offset\n",
        "  y = phi_0 + w_act_1 + w_act_2 + w_act_3\n",
        "  # Return everything we have calculated\n",
        "  return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3"
      ],
      "metadata": {
        "id": "ximCLwIfz8kj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# # Plot the shallow neural network.  We'll assume input in is range [-1,1] and output [-1,1]\n",
        "def plot_neural(x, y):\n",
        "  fig, ax = plt.subplots()\n",
        "  ax.plot(x.T,y.T)\n",
        "  ax.set_xlabel('Input'); ax.set_ylabel('Output')\n",
        "  ax.set_xlim([-1,1]);ax.set_ylim([-1,1])\n",
        "  ax.set_aspect(1.0)\n",
        "  plt.show()"
      ],
      "metadata": {
        "id": "btrt7BX20gKD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Let's define a networks.  We'll just consider the inputs and outputs over the range [-1,1].  If you set the \"plot_all\" flat to True,  you can see the details of how it was created."
      ],
      "metadata": {
        "id": "LxBJCObC-NTY"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Now lets define some parameters and run the first neural network\n",
        "n1_theta_10 = 0.0   ; n1_theta_11 = -1.0\n",
        "n1_theta_20 = 0     ; n1_theta_21 = 1.0\n",
        "n1_theta_30 = -0.67 ; n1_theta_31 =  1.0\n",
        "n1_phi_0 = 1.0; n1_phi_1 = -2.0; n1_phi_2 = -3.0; n1_phi_3 = 9.3\n",
        "\n",
        "# Define a range of input values\n",
        "n1_in = np.arange(-1,1,0.01).reshape([1,-1])\n",
        "\n",
        "# We run the neural network for each of these input values\n",
        "n1_out, *_ = shallow_1_1_3(n1_in, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
        "# And then plot it\n",
        "plot_neural(n1_in, n1_out)"
      ],
      "metadata": {
        "id": "JRebvurv22pT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now we'll define the same neural network, but this time, we will  use matrix form.  When you get this right, it will draw the same plot as above."
      ],
      "metadata": {
        "id": "XCJqo_AjfAra"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "beta_0 = np.zeros((3,1))\n",
        "Omega_0 = np.zeros((3,1))\n",
        "beta_1 = np.zeros((1,1))\n",
        "Omega_1 = np.zeros((1,3))\n",
        "\n",
        "# TODO Fill in the values of the beta and Omega matrices with the n1_theta and n1_phi parameters that define the network above\n",
        "# !!! NOTE THAT MATRICES ARE CONVENTIONALLY INDEXED WITH a_11 IN THE TOP LEFT CORNER, BUT NDARRAYS START AT [0,0]\n",
        "# To get you started I've filled in a couple:\n",
        "beta_0[0,0] = n1_theta_10\n",
        "Omega_0[0,0] = n1_theta_11\n",
        "\n",
        "\n",
        "# Make sure that input data matrix has different inputs in its columns\n",
        "n_data = n1_in.size\n",
        "n_dim_in = 1\n",
        "n1_in_mat = np.reshape(n1_in,(n_dim_in,n_data))\n",
        "\n",
        "# This runs the network for ALL of the inputs, x at once so we can draw graph\n",
        "h1 = ReLU(np.matmul(beta_0,np.ones((1,n_data))) + np.matmul(Omega_0,n1_in_mat))\n",
        "n1_out = np.matmul(beta_1,np.ones((1,n_data))) + np.matmul(Omega_1,h1)\n",
        "\n",
        "# Draw the network and check that it looks the same as the non-matrix case\n",
        "plot_neural(n1_in, n1_out)"
      ],
      "metadata": {
        "id": "MR0AecZYfACR"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now we'll feed the output of the first network into the second one."
      ],
      "metadata": {
        "id": "qOcj2Rof-o20"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Now lets define some parameters and run the second neural network\n",
        "n2_theta_10 =  -0.6 ; n2_theta_11 = -1.0\n",
        "n2_theta_20 =  0.2  ; n2_theta_21 = 1.0\n",
        "n2_theta_30 =  -0.5  ; n2_theta_31 =  1.0\n",
        "n2_phi_0 = 0.5; n2_phi_1 = -1.0; n2_phi_2 = -1.5; n2_phi_3 = 2.0\n",
        "\n",
        "# Define a range of input values\n",
        "n2_in = np.arange(-1,1,0.01)\n",
        "\n",
        "# We run the second neural network on the output of the first network\n",
        "n2_out, *_ = \\\n",
        "    shallow_1_1_3(n1_out, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
        "# And then plot it\n",
        "plot_neural(n1_in, n2_out)"
      ],
      "metadata": {
        "id": "ZRjWu8i9239X"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "beta_0 = np.zeros((3,1))\n",
        "Omega_0 = np.zeros((3,1))\n",
        "beta_1 = np.zeros((3,1))\n",
        "Omega_1 = np.zeros((3,3))\n",
        "beta_2 = np.zeros((1,1))\n",
        "Omega_2 = np.zeros((1,3))\n",
        "\n",
        "# TODO Fill in the values of the beta and Omega matrices for with the n1_theta, n1_phi, n2_theta, and n2_phi parameters \n",
        "# that define the composition of the two networks above (see eqn 4.5 for Omega1 and beta1 albeit in different notation)\n",
        "# !!! NOTE THAT MATRICES ARE CONVENTIONALLY INDEXED WITH a_11 IN THE TOP LEFT CORNER, BUT NDARRAYS START AT [0,0] SO EVERYTHING IS OFFSET\n",
        "# To get you started I've filled in a few:\n",
        "beta_0[0,0] = n1_theta_10\n",
        "Omega_0[0,0] = n1_theta_11\n",
        "beta_1[0,0] = n2_theta_10 + n2_theta_11 * n1_phi_0\n",
        "Omega_1[0,0] = n2_theta_11 * n1_phi_1\n",
        "\n",
        "\n",
        "\n",
        "# Make sure that input data matrix has different inputs in its columns\n",
        "n_data = n1_in.size\n",
        "n_dim_in = 1\n",
        "n1_in_mat = np.reshape(n1_in,(n_dim_in,n_data))\n",
        "\n",
        "# This runs the network for ALL of the inputs, x at once so we can draw graph (hence extra np.ones term)\n",
        "h1 = ReLU(np.matmul(beta_0,np.ones((1,n_data))) + np.matmul(Omega_0,n1_in_mat))\n",
        "h2 = ReLU(np.matmul(beta_1,np.ones((1,n_data))) + np.matmul(Omega_1,h1))\n",
        "n1_out = np.matmul(beta_2,np.ones((1,n_data))) + np.matmul(Omega_2,h2)\n",
        "\n",
        "# Draw the network and check that it looks the same as the non-matrix version\n",
        "plot_neural(n1_in, n1_out)"
      ],
      "metadata": {
        "id": "ZB2HTalOE40X"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now let's make a deep network with 3 hidden layers.  It will have d_i=4 inputs, d_1=5 neurons  in the first layer, d_2=2 neurons in the second layer and d_3=4 neurons in the third layer, and d_o = 1 output.  Consults figure 4.6 for guidance."
      ],
      "metadata": {
        "id": "0VANqxH2kyS4"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# define sizes\n",
        "D_i=4; D_1=5; D_2=2; D_3=4; D_o=1\n",
        "# We'll choose the inputs and parameters of this network randomly using np.random.normal\n",
        "# For example, we'll set the input using\n",
        "n_data = 10;\n",
        "x = np.random.normal(size=(D_i, n_data))\n",
        "# TODO initialize the parameters randomly but with the correct sizes\n",
        "# Replace the lines below\n",
        "beta_0 = np.random.normal(size=(1,1))\n",
        "Omega_0 = np.random.normal(size=(1,1))\n",
        "beta_1 = np.random.normal(size=(1,1))\n",
        "Omega_1 = np.random.normal(size=(1,1))\n",
        "beta_2 = np.random.normal(size=(1,1))\n",
        "Omega_2 = np.random.normal(size=(1,1))\n",
        "beta_3 = np.random.normal(size=(1,1))\n",
        "Omega_3 = np.random.normal(size=(1,1))\n",
        "\n",
        "# If you set the above sizes to the correct values then, the following code will run \n",
        "h1 = ReLU(np.matmul(beta_0,np.ones((1,n_data))) + np.matmul(Omega_0,x));\n",
        "h2 = ReLU(np.matmul(beta_1,np.ones((1,n_data))) + np.matmul(Omega_1,h1));\n",
        "h3 = ReLU(np.matmul(beta_2,np.ones((1,n_data))) + np.matmul(Omega_2,h2));\n",
        "y = np.matmul(beta_3,np.ones((1,n_data))) + np.matmul(Omega_3,h3)\n",
        "\n",
        "if h1.shape[0] is not D_1 or h1.shape[1] is not n_data:\n",
        "  print(\"h1 is wrong shape\")\n",
        "if h2.shape[0] is not D_2 or h1.shape[1] is not n_data:\n",
        "  print(\"h2 is wrong shape\")\n",
        "if h3.shape[0] is not D_3 or h1.shape[1] is not n_data:\n",
        "  print(\"h3 is wrong shape\")\n",
        "if y.shape[0] is not D_o or h1.shape[1] is not n_data:\n",
        "  print(\"Output is wrong shape\")\n",
        "\n",
        "# Print the inputs and outputs\n",
        "print(x)\n",
        "print(y)"
      ],
      "metadata": {
        "id": "RdBVAc_Rj22-"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}