{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyOtP/O21RVLxAeEBIwV0aZt",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Deep.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# **Deep neural networks**\n",
        "\n",
        "In this notebook, we'll experiment with feeding one neural network into another as in figure 4.1 from the book."
      ],
      "metadata": {
        "id": "MaKn8CFlzN8E"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "8ClURpZQzI6L"
      },
      "outputs": [],
      "source": [
        "# Imports math library\n",
        "import numpy as np\n",
        "# Imports plotting library\n",
        "import matplotlib.pyplot as plt"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Define the Rectified Linear Unit (ReLU) function\n",
        "def ReLU(preactivation):\n",
        "  activation = preactivation.clip(0.0)\n",
        "  return activation"
      ],
      "metadata": {
        "id": "YdmveeAUz4YG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Define a shallow neural network with, one input, one output, and three hidden units\n",
        "def shallow_1_1_3(x, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31):\n",
        "  # Initial lines\n",
        "  pre_1 = theta_10 + theta_11 * x\n",
        "  pre_2 = theta_20 + theta_21 * x\n",
        "  pre_3 = theta_30 + theta_31 * x\n",
        "  # Activation functions\n",
        "  act_1 = activation_fn(pre_1)\n",
        "  act_2 = activation_fn(pre_2)\n",
        "  act_3 = activation_fn(pre_3)\n",
        "  # Weight activations\n",
        "  w_act_1 = phi_1 * act_1\n",
        "  w_act_2 = phi_2 * act_2\n",
        "  w_act_3 = phi_3 * act_3\n",
        "  # Combine weighted activation and add y offset\n",
        "  y = phi_0 + w_act_1 + w_act_2 + w_act_3\n",
        "  # Return everything we have calculated\n",
        "  return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3"
      ],
      "metadata": {
        "id": "ximCLwIfz8kj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# # Plot the shallow neural network.  We'll assume input in is range [-1,1] and output [-1,1]\n",
        "# If the plot_all flag is set to true, then we'll plot all the intermediate stages as in Figure 3.3 \n",
        "def plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=False, x_data=None, y_data=None):\n",
        "\n",
        "  # Plot intermediate plots if flag set\n",
        "  if plot_all:\n",
        "    fig, ax = plt.subplots(3,3)\n",
        "    fig.set_size_inches(8.5, 8.5)\n",
        "    fig.tight_layout(pad=3.0)\n",
        "    ax[0,0].plot(x,pre_1,'r-'); ax[0,0].set_ylabel('Preactivation')\n",
        "    ax[0,1].plot(x,pre_2,'b-'); ax[0,1].set_ylabel('Preactivation')\n",
        "    ax[0,2].plot(x,pre_3,'g-'); ax[0,2].set_ylabel('Preactivation')\n",
        "    ax[1,0].plot(x,act_1,'r-'); ax[1,0].set_ylabel('Activation')\n",
        "    ax[1,1].plot(x,act_2,'b-'); ax[1,1].set_ylabel('Activation')\n",
        "    ax[1,2].plot(x,act_3,'g-'); ax[1,2].set_ylabel('Activation')\n",
        "    ax[2,0].plot(x,w_act_1,'r-'); ax[2,0].set_ylabel('Weighted Act')\n",
        "    ax[2,1].plot(x,w_act_2,'b-'); ax[2,1].set_ylabel('Weighted Act')\n",
        "    ax[2,2].plot(x,w_act_3,'g-'); ax[2,2].set_ylabel('Weighted Act')\n",
        "\n",
        "    for plot_y in range(3):\n",
        "      for plot_x in range(3):\n",
        "        ax[plot_y,plot_x].set_xlim([-1,1]);ax[plot_x,plot_y].set_ylim([-1,1])\n",
        "        ax[plot_y,plot_x].set_aspect(1.0)\n",
        "      ax[2,plot_y].set_xlabel('Input, $x$');\n",
        "    plt.show()\n",
        "\n",
        "  fig, ax = plt.subplots()\n",
        "  ax.plot(x,y)\n",
        "  ax.set_xlabel('Input'); ax.set_ylabel('Output')\n",
        "  ax.set_xlim([-1,1]);ax.set_ylim([-1,1])\n",
        "  ax.set_aspect(1.0)\n",
        "  if x_data is not None:\n",
        "    ax.plot(x_data, y_data, 'mo')\n",
        "    for i in range(len(x_data)):\n",
        "      ax.plot(x_data[i], y_data[i],)\n",
        "  plt.show()"
      ],
      "metadata": {
        "id": "btrt7BX20gKD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Let's define two networks.  We'll put the prefixes n1_ and n2_ before all the variables to make it clear which network is which.  We'll just consider the inputs and outputs over the range [-1,1].  If you set the \"plot_all\" flat to True,  you can see the details of how they were created."
      ],
      "metadata": {
        "id": "LxBJCObC-NTY"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Now lets define some parameters and run the first neural network\n",
        "n1_theta_10 = 0.0   ; n1_theta_11 = -1.0\n",
        "n1_theta_20 = 0     ; n1_theta_21 = 1.0\n",
        "n1_theta_30 = -0.67 ; n1_theta_31 =  1.0\n",
        "n1_phi_0 = 1.0; n1_phi_1 = -2.0; n1_phi_2 = -3.0; n1_phi_3 = 9.3\n",
        "\n",
        "# Define a range of input values\n",
        "n1_in = np.arange(-1,1,0.01)\n",
        "\n",
        "# We run the neural network for each of these input values\n",
        "n1_out, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
        "    shallow_1_1_3(n1_in, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
        "# And then plot it\n",
        "plot_neural(n1_in, n1_out, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=False)"
      ],
      "metadata": {
        "id": "JRebvurv22pT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Now lets define some parameters and run the second neural network\n",
        "n2_theta_10 =  -0.6 ; n2_theta_11 = -1.0\n",
        "n2_theta_20 =  0.2  ; n2_theta_21 = 1.0\n",
        "n2_theta_30 =  -0.5  ; n2_theta_31 =  1.0\n",
        "n2_phi_0 = 0.5; n2_phi_1 = -1.0; n2_phi_2 = -1.5; n2_phi_3 = 2.0\n",
        "\n",
        "# Define a range of input values\n",
        "n2_in = np.arange(-1,1,0.01)\n",
        "\n",
        "# We run the neural network for each of these input values\n",
        "n2_out, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
        "    shallow_1_1_3(n2_in, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
        "# And then plot it\n",
        "plot_neural(n2_in, n2_out, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=False)"
      ],
      "metadata": {
        "id": "ZRjWu8i9239X"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now we'll consider feeding output of the first network into the second one."
      ],
      "metadata": {
        "id": "qOcj2Rof-o20"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# # Plot two shallow neural networks and the composition of the two   \n",
        "def plot_neural_two_components(x_in, net1_out, net2_out, net12_out=None):\n",
        "\n",
        "  # Plot the two networks separately\n",
        "  fig, ax = plt.subplots(1,2)\n",
        "  fig.set_size_inches(8.5, 8.5)\n",
        "  fig.tight_layout(pad=3.0)\n",
        "  ax[0].plot(x_in, net1_out,'r-')\n",
        "  ax[0].set_xlabel('Net 1 input'); ax[0].set_ylabel('Net 1 output')\n",
        "  ax[0].set_xlim([-1,1]);ax[0].set_ylim([-1,1])\n",
        "  ax[0].set_aspect(1.0)\n",
        "  ax[1].plot(x_in, net2_out,'b-')\n",
        "  ax[1].set_xlabel('Net 2 input'); ax[1].set_ylabel('Net 2 output')\n",
        "  ax[1].set_xlim([-1,1]);ax[1].set_ylim([-1,1])\n",
        "  ax[1].set_aspect(1.0)\n",
        "  plt.show()\n",
        "\n",
        "  if net12_out is not None:\n",
        "    # Plot their composition\n",
        "    fig, ax = plt.subplots()\n",
        "    ax.plot(x_in ,net12_out,'g-')\n",
        "    ax.set_xlabel('Net 1 Input'); ax.set_ylabel('Net 2 Output')\n",
        "    ax.set_xlim([-1,1]);ax.set_ylim([-1,1])\n",
        "    ax.set_aspect(1.0)\n",
        "    plt.show()"
      ],
      "metadata": {
        "id": "ZB2HTalOE40X"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Display the two inputs\n",
        "x = np.arange(-1,1,0.001)\n",
        "# We run the first  and second neural networks for each of these input values\n",
        "net1_out, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
        "net2_out, *_ = shallow_1_1_3(x, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
        "# Plot both graphs\n",
        "plot_neural_two_components(x, net1_out, net2_out)"
      ],
      "metadata": {
        "id": "K6Tmecgu7uqt"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# TODO \n",
        "# Take a piece of paper and draw what you think will happen when we feed the \n",
        "# output of the first network into the second one.  Draw the relationship between\n",
        "# the input of the first network and the output of the second one."
      ],
      "metadata": {
        "id": "NUQVop9-Xta1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Now let's see if your predictions were right \n",
        "\n",
        "# TODO feed the output of first network into second network (replace this line)\n",
        "net12_out = np.zeros_like(x)\n",
        "# Plot all three graphs\n",
        "plot_neural_two_components(x, net1_out, net2_out, net12_out)"
      ],
      "metadata": {
        "id": "Yq7GH-MCIyPI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Now we'll change things a up a bit.  What happens if we change the second network? (note the *-1 change)\n",
        "net1_out, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
        "net2_out, *_ = shallow_1_1_3(x, ReLU, n2_phi_0, n2_phi_1*-1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
        "plot_neural_two_components(x, net1_out, net2_out)"
      ],
      "metadata": {
        "id": "BMlLkLbdEuPu"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# TODO \n",
        "# Take a piece of paper and draw what you think will happen when we feed the \n",
        "# output of the first network into the second one now that we have changed it.  Draw the relationship between\n",
        "# the input of the first network and the output of the second one."
      ],
      "metadata": {
        "id": "Of6jVXLTJ688"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# When you have a prediction, run this code to see if you were right\n",
        "net12_out, *_ = shallow_1_1_3(net1_out, ReLU, n2_phi_0, n2_phi_1*-1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
        "plot_neural_two_components(x, net1_out, net2_out, net12_out)"
      ],
      "metadata": {
        "id": "PbbSCaSeK6SM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Let's change things again.  What happens if we change the firsrt network? (note the changes)\n",
        "net1_out, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1*0.5, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
        "net2_out, *_ = shallow_1_1_3(x, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
        "plot_neural_two_components(x, net1_out, net2_out)"
      ],
      "metadata": {
        "id": "b39mcSGFK9Fd"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# TODO \n",
        "# Take a piece of paper and draw what you think will happen when we feed the \n",
        "# output of the first network now we have changed it into the original second network.  Draw the relationship between\n",
        "# the input of the first network and the output of the second one."
      ],
      "metadata": {
        "id": "MhO40cC_LW9I"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# When you have a prediction, run this code to see if you were right\n",
        "net12_out, *_ = shallow_1_1_3(net1_out, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
        "plot_neural_two_components(x, net1_out, net2_out, net12_out)"
      ],
      "metadata": {
        "id": "Akwo-hnPLkNr"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Let's change things again.  What happens if the first network and second networks are the same?\n",
        "net1_out, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
        "net2_out_new, *_ = shallow_1_1_3(x, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
        "plot_neural_two_components(x, net1_out, net2_out_new)"
      ],
      "metadata": {
        "id": "TJ7wXKpRLl_E"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# TODO \n",
        "# Take a piece of paper and draw what you think will happen when we feed the \n",
        "# output of the first network into the original second network.  Draw the relationship between\n",
        "# the input of the first network and the output of the second one."
      ],
      "metadata": {
        "id": "dJbbh6R7NG9k"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# When you have a prediction, run this code to see if you were right\n",
        "net12_out, *_ = shallow_1_1_3(net1_out, ReLU, n1_phi_0, n1_phi_1, n1_phi_2, n1_phi_3, n1_theta_10, n1_theta_11, n1_theta_20, n1_theta_21, n1_theta_30, n1_theta_31)\n",
        "plot_neural_two_components(x, net1_out, net2_out_new, net12_out)"
      ],
      "metadata": {
        "id": "BiZZl3yNM2Bq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# TODO \n",
        "# Contemplate what you think will happen when we feed the \n",
        "# output of the original first network into a second copy of the original first network, and then \n",
        "# the output of that into the original second network (so now we have a three layer network)\n",
        "# How many total linear regions will we have in the output?  \n",
        "net123_out, *_ = shallow_1_1_3(net12_out, ReLU, n2_phi_0, n2_phi_1, n2_phi_2, n2_phi_3, n2_theta_10, n2_theta_11, n2_theta_20, n2_theta_21, n2_theta_30, n2_theta_31)\n",
        "plot_neural_two_components(x, net12_out, net2_out, net123_out)"
      ],
      "metadata": {
        "id": "BSd51AkzNf7-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# TO DO\n",
        "# How many linear regions would there be if we ran N copies of the first network, feeding the result of the first \n",
        "# into the second, the second into the third and so on, and then passed the result into the original second\n",
        "# network (blue curve above)\n",
        "\n",
        "# Take away conclusions:  with very few parameters, we can make A LOT of linear regions, but\n",
        "# they depend on one another in complex ways that quickly become to difficult to understand intuitively."
      ],
      "metadata": {
        "id": "HqzePCLOVQK7"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}