diff --git a/Notebooks/Chap03/3_1_Shallow_Networks_I.ipynb b/Notebooks/Chap03/3_1_Shallow_Networks_I.ipynb
new file mode 100644
index 0000000..0f6f5c2
--- /dev/null
+++ b/Notebooks/Chap03/3_1_Shallow_Networks_I.ipynb
@@ -0,0 +1,524 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "authorship_tag": "ABX9TyNagjz+fy8uCFG71RAVMUVT",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Notebook 3.1 -- Shallow neural networks I**\n",
+ "\n",
+ "The purpose of this practical is to gain some familiarity with shallow neural networks. It works through the example similar to figure 3.3 and experiments with different activation functions.
\n",
+ "\n",
+ "Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and write code to complete the functions. There are also questions interspersed in the text.\n",
+ "\n",
+ "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n"
+ ],
+ "metadata": {
+ "id": "1Z6LB4Ybn1oN"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "hAM55ZjSncOk"
+ },
+ "outputs": [],
+ "source": [
+ "# Imports math library\n",
+ "import numpy as np\n",
+ "# Imports plotting library\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Let's first construct the shallow neural network with one input, three hidden units, and one output described in section 4.1 of the book."
+ ],
+ "metadata": {
+ "id": "wQDy9UzXpnf5"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Define the Rectified Linear Unit (ReLU) function\n",
+ "def ReLU(preactivation):\n",
+ " # TODO write code to implement the ReLU and compute the activation at the\n",
+ " # hidden unit from the preactivation\n",
+ " # This should work on every element of the ndarray \"preactivation\" at once\n",
+ " # One way to do this is with the ndarray \"clip\" function\n",
+ " # https://numpy.org/doc/stable/reference/generated/numpy.ndarray.clip.html\n",
+ " activation = np.zeros_like(preactivation);\n",
+ "\n",
+ " return activation"
+ ],
+ "metadata": {
+ "id": "OT7h7sSwpkrt"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Make an array of inputs\n",
+ "z = np.arange(-5,5,0.1)\n",
+ "RelU_z = ReLU(z)\n",
+ "\n",
+ "# Plot the ReLU function\n",
+ "fig, ax = plt.subplots()\n",
+ "ax.plot(z,RelU_z,'r-')\n",
+ "ax.set_xlim([-5,5]);ax.set_ylim([-5,5])\n",
+ "ax.set_xlabel('z'); ax.set_ylabel('ReLU[z]')\n",
+ "ax.set_aspect('equal')\n",
+ "plt.show"
+ ],
+ "metadata": {
+ "id": "okwJmSw9pVNF"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Define a shallow neural network with, one input, one output, and three hidden units\n",
+ "def shallow_1_1_3(x, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31):\n",
+ " # TODO Replace the lines below to compute the three initial lines\n",
+ " # (figure 3.3a-c) from the theta parameters. These are the preactivations\n",
+ " pre_1 = np.zeros_like(x)\n",
+ " pre_2 = np.zeros_like(x)\n",
+ " pre_3 = np.zeros_like(x)\n",
+ "\n",
+ " # Pass these through the ReLU function to compute the activations as in\n",
+ " # figure 3.3 d-f\n",
+ " act_1 = activation_fn(pre_1)\n",
+ " act_2 = activation_fn(pre_2)\n",
+ " act_3 = activation_fn(pre_3)\n",
+ "\n",
+ " # TODO Replace the code below to weight the activations using phi1, phi2 and phi3\n",
+ " # To create the equivalent of figure 3.3 g-i\n",
+ " w_act_1 = np.zeros_like(x)\n",
+ " w_act_2 = np.zeros_like(x)\n",
+ " w_act_3 = np.zeros_like(x)\n",
+ "\n",
+ " # TODO Replace the code below to combing the weighted activations and add\n",
+ " # phi_0 to create the output as in figure 3.3 j\n",
+ " y = np.zeros_like(x)\n",
+ "\n",
+ " # Return everything we have calculated\n",
+ " return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3"
+ ],
+ "metadata": {
+ "id": "epk68ZCBu7uJ"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Plot the shallow neural network. We'll assume input in is range [0,1] and output [-1,1]\n",
+ "# If the plot_all flag is set to true, then we'll plot all the intermediate stages as in Figure 3.3\n",
+ "def plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=False, x_data=None, y_data=None):\n",
+ "\n",
+ " # Plot intermediate plots if flag set\n",
+ " if plot_all:\n",
+ " fig, ax = plt.subplots(3,3)\n",
+ " fig.set_size_inches(8.5, 8.5)\n",
+ " fig.tight_layout(pad=3.0)\n",
+ " ax[0,0].plot(x,pre_1,'r-'); ax[0,0].set_ylabel('Preactivation')\n",
+ " ax[0,1].plot(x,pre_2,'b-'); ax[0,1].set_ylabel('Preactivation')\n",
+ " ax[0,2].plot(x,pre_3,'g-'); ax[0,2].set_ylabel('Preactivation')\n",
+ " ax[1,0].plot(x,act_1,'r-'); ax[1,0].set_ylabel('Activation')\n",
+ " ax[1,1].plot(x,act_2,'b-'); ax[1,1].set_ylabel('Activation')\n",
+ " ax[1,2].plot(x,act_3,'g-'); ax[1,2].set_ylabel('Activation')\n",
+ " ax[2,0].plot(x,w_act_1,'r-'); ax[2,0].set_ylabel('Weighted Act')\n",
+ " ax[2,1].plot(x,w_act_2,'b-'); ax[2,1].set_ylabel('Weighted Act')\n",
+ " ax[2,2].plot(x,w_act_3,'g-'); ax[2,2].set_ylabel('Weighted Act')\n",
+ "\n",
+ " for plot_y in range(3):\n",
+ " for plot_x in range(3):\n",
+ " ax[plot_y,plot_x].set_xlim([0,1]);ax[plot_x,plot_y].set_ylim([-1,1])\n",
+ " ax[plot_y,plot_x].set_aspect(0.5)\n",
+ " ax[2,plot_y].set_xlabel('Input, $x$');\n",
+ " plt.show()\n",
+ "\n",
+ " fig, ax = plt.subplots()\n",
+ " ax.plot(x,y)\n",
+ " ax.set_xlabel('Input, $x$'); ax.set_ylabel('Output, $y$')\n",
+ " ax.set_xlim([0,1]);ax.set_ylim([-1,1])\n",
+ " ax.set_aspect(0.5)\n",
+ " if x_data is not None:\n",
+ " ax.plot(x_data, y_data, 'mo')\n",
+ " for i in range(len(x_data)):\n",
+ " ax.plot(x_data[i], y_data[i],)\n",
+ " plt.show()"
+ ],
+ "metadata": {
+ "id": "CAr7n1lixuhQ"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now let's run the neural network. If your code is correct, then the final output should look like this:\n",
+ ""
+ ],
+ "metadata": {
+ "id": "T34bszToImKQ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Now lets define some parameters and run the neural network\n",
+ "theta_10 = 0.3 ; theta_11 = -1.0\n",
+ "theta_20 = -1.0 ; theta_21 = 2.0\n",
+ "theta_30 = -0.5 ; theta_31 = 0.65\n",
+ "phi_0 = -0.3; phi_1 = 2.0; phi_2 = -1.0; phi_3 = 7.0\n",
+ "\n",
+ "# Define a range of input values\n",
+ "x = np.arange(0,1,0.01)\n",
+ "\n",
+ "# We run the neural network for each of these input values\n",
+ "y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
+ " shallow_1_1_3(x, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
+ "# And then plot it\n",
+ "plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
+ ],
+ "metadata": {
+ "id": "SzIVdp9U-JWb"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now let's play with the parameters to make sure we understand how they work. The original parameters were:\n",
+ "\n",
+ "$\\theta_{10} = 0.3$ ; $\\theta_{20} = -1.0$
\n",
+ "$\\theta_{20} = -1.0$ ; $\\theta_{21} = 2.0$
\n",
+ "$\\theta_{30} = -0.5$ ; $\\theta_{31} = 0.65$
\n",
+ "$\\phi_0 = -0.3; \\phi_1 = 2.0; \\phi_2 = -1.0; \\phi_3 = 7.0$"
+ ],
+ "metadata": {
+ "id": "jhaBSS8oIWSX"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# TODO\n",
+ "# 1. Predict what effect changing phi_0 will have on the network.\n",
+ "\n",
+ "# 2. Predict what effect multiplying phi_1, phi_2, phi_3 by 0.5 would have. Check if you are correct\n",
+ "\n",
+ "# 3. Predict what effect multiplying phi_1 by -1 will have. Check if you are correct.\n",
+ "\n",
+ "# 4. Predict what effect setting theta_20 to -1.2 will have. Check if you are correct.\n",
+ "\n",
+ "# 5. Change the parameters so that there are only two \"joints\" (including outside the range of the plot)\n",
+ "# There are actually three ways to do this. See if you can figure them all out\n",
+ "\n",
+ "# 6. With the original parameters, the second line segment is flat (i.e. has slope zero)\n",
+ "# How could you change theta_10 so that all of the segments have non-zero slopes\n",
+ "\n",
+ "# 7. What do you predict would happen if you multiply theta_20 and theta21 by 0.5, and phi_2 by 2.0?\n",
+ "# Check if you are correct.\n",
+ "\n",
+ "# 8. What do you predict would happen if you multiply theta_20 and theta21 by -0.5, and phi_2 by -2.0?\n",
+ "# Check if you are correct.\n",
+ "\n",
+ "theta_10 = 0.3 ; theta_11 = -1.0\n",
+ "theta_20 = -1.0 ; theta_21 = 2.0\n",
+ "theta_30 = -0.5 ; theta_31 = 0.65\n",
+ "phi_0 = -0.3; phi_1 = 2.0; phi_2 = -1.0; phi_3 = 7.0\n",
+ "\n",
+ "# Define a range of input values\n",
+ "x = np.arange(0,1,0.01)\n",
+ "\n",
+ "# We run the neural network for each of these input values\n",
+ "y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
+ " shallow_1_1_3(x, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
+ "# And then plot it\n",
+ "plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
+ ],
+ "metadata": {
+ "id": "ur4arJ8KAQWe"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Different activation functions\n",
+ "\n",
+ "The ReLU isn't the only kind of activation function. For a long time, people used sigmoid functions. A logistic sigmoid function is defined by the equation\n",
+ "\n",
+ "\\begin{equation}\n",
+ "f[h] = \\frac{1}{1+\\exp{[-10 z ]}}\n",
+ "\\end{equation}\n",
+ "\n",
+ "(Note that the factor of 10 is not standard -- but it allow us to plot on the same axes as the ReLU examples)"
+ ],
+ "metadata": {
+ "id": "1NTT5GTbJSqK"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Define the sigmoid function\n",
+ "def sigmoid(preactivation):\n",
+ " # TODO write code to implement the sigmoid function and compute the activation at the\n",
+ " # hidden unit from the preactivation. Use the np.exp() function.\n",
+ " activation = np.zeros_like(preactivation);\n",
+ "\n",
+ " return activation"
+ ],
+ "metadata": {
+ "id": "FEzzQeVoZdV_"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Make an array of inputs\n",
+ "z = np.arange(-1,1,0.01)\n",
+ "sig_z = sigmoid(z)\n",
+ "\n",
+ "# Plot the sigmoid function\n",
+ "fig, ax = plt.subplots()\n",
+ "ax.plot(z,sig_z,'r-')\n",
+ "ax.set_xlim([-1,1]);ax.set_ylim([0,1])\n",
+ "ax.set_xlabel('z'); ax.set_ylabel('sig[z]')\n",
+ "plt.show"
+ ],
+ "metadata": {
+ "id": "dIn42wDlKqsv"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Let's see what happens when we use this activation function in a neural network"
+ ],
+ "metadata": {
+ "id": "uwQHGdC5KpH7"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "theta_10 = 0.3 ; theta_11 = -1.0\n",
+ "theta_20 = -1.0 ; theta_21 = 2.0\n",
+ "theta_30 = -0.5 ; theta_31 = 0.65\n",
+ "phi_0 = 0.3; phi_1 = 0.5; phi_2 = -1.0; phi_3 = 0.9\n",
+ "\n",
+ "# Define a range of input values\n",
+ "x = np.arange(0,1,0.01)\n",
+ "\n",
+ "# We run the neural network for each of these input values\n",
+ "y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
+ " shallow_1_1_3(x, sigmoid, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
+ "# And then plot it\n",
+ "plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
+ ],
+ "metadata": {
+ "id": "5W9m9MLKLddi"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "You probably notice that this gives nice smooth curves. So why don't we use this? Aha... it's not obvious right now, but we will get to it when we learn to fit models."
+ ],
+ "metadata": {
+ "id": "0c4S-XfnSfDx"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Linear activation functions\n",
+ "\n",
+ "However, neural networks don't work if the activation function is linear. For example, consider what would happen if the activation function was:\n",
+ "\n",
+ "\\begin{equation}\n",
+ "\\mbox{lin}[z] = a + bz\n",
+ "\\end{equation}"
+ ],
+ "metadata": {
+ "id": "IA_v_-eLRqek"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Define the linear activation function\n",
+ "def lin(preactivation):\n",
+ " a =0\n",
+ " b =1\n",
+ " # Compute linear function\n",
+ " activation = a+b * preactivation\n",
+ " # Return\n",
+ " return activation"
+ ],
+ "metadata": {
+ "id": "fTHJRv0KLjMD"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# TODO\n",
+ "# 1. The linear activation function above just returns the input: (0+1*z) = z\n",
+ "# Before running the code Make a prediction about what the ten panels of the drawing will look like\n",
+ "# Now run the code below to see if you were right. What family of functions can this represent?\n",
+ "\n",
+ "# 2. What happens if you change the parameters (a,b) to different values?\n",
+ "# Try a=0.5, b=-0.4 (don't forget) to run the cell again to update the function\n",
+ "\n",
+ "\n",
+ "theta_10 = 0.3 ; theta_11 = -1.0\n",
+ "theta_20 = -1.0 ; theta_21 = 2.0\n",
+ "theta_30 = -0.5 ; theta_31 = 0.65\n",
+ "phi_0 = 0.3; phi_1 = 0.5; phi_2 = -1.0; phi_3 = 0.9\n",
+ "\n",
+ "# Define a range of input values\n",
+ "x = np.arange(0,1,0.01)\n",
+ "\n",
+ "# We run the neural network for each of these input values\n",
+ "y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
+ " shallow_1_1_3(x, lin, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
+ "# And then plot it\n",
+ "plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
+ ],
+ "metadata": {
+ "id": "SauRG8r7TkvP"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Least squares loss\n",
+ "\n",
+ "Now let's consider fitting the network to data. First we need to define the loss function. We'll use the least squares loss:\n",
+ "\n",
+ "\\begin{equation}\n",
+ "L[\\boldsymbol\\phi] = \\sum_{i=1}^{I}(y_{i}-\\mbox{f}[x_{i},\\boldsymbol\\phi])^2\n",
+ "\\end{equation}\n",
+ "\n",
+ "where $(x_i,y_i)$ is an input/output training pair and $\\mbox{f}[\\bullet,\\boldsymbol\\phi]$ is the neural network with parameters $\\boldsymbol\\phi$. The first term in the brackets is the ground truth output and the second term is the prediction of the model"
+ ],
+ "metadata": {
+ "id": "osonHsEqVp2I"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Least squares function\n",
+ "def least_squares_loss(y_train, y_predict):\n",
+ " # TODO Replace the line below to use compute the sum of squared\n",
+ " # differences between the real and predicted values of y\n",
+ " # you will need to use the function np.sum\n",
+ " loss = 0\n",
+ "\n",
+ " return loss"
+ ],
+ "metadata": {
+ "id": "14d5II-TU46w"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Now lets define some parameters, run the neural network, and compute the loss\n",
+ "theta_10 = 0.3 ; theta_11 = -1.0\n",
+ "theta_20 = -1.0 ; theta_21 = 2.0\n",
+ "theta_30 = -0.5 ; theta_31 = 0.65\n",
+ "phi_0 = -0.3; phi_1 = 2.0; phi_2 = -1.0; phi_3 = 7.0\n",
+ "\n",
+ "# Define a range of input values\n",
+ "x = np.arange(0,1,0.01)\n",
+ "\n",
+ "x_train = np.array([0.09291784,0.46809093,0.93089486,0.67612654,0.73441752,0.86847339,\\\n",
+ " 0.49873225,0.51083168,0.18343972,0.99380898,0.27840809,0.38028817,\\\n",
+ " 0.12055708,0.56715537,0.92005746,0.77072270,0.85278176,0.05315950,\\\n",
+ " 0.87168699,0.58858043])\n",
+ "y_train = np.array([-0.15934537,0.18195445,0.451270150,0.13921448,0.09366691,0.30567674,\\\n",
+ " 0.372291170,0.40716968,-0.08131792,0.41187806,0.36943738,0.3994327,\\\n",
+ " 0.019062570,0.35820410,0.452564960,-0.0183121,0.02957665,-0.24354444, \\\n",
+ " 0.148038840,0.26824970])\n",
+ "\n",
+ "# We run the neural network for each of these input values\n",
+ "y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
+ " shallow_1_1_3(x, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
+ "# And then plot it\n",
+ "plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True, x_data = x_train, y_data = y_train)\n",
+ "\n",
+ "# Run the neural network on the training data\n",
+ "y_predict, *_ = shallow_1_1_3(x_train, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
+ "\n",
+ "# Compute the least squares loss and print it out\n",
+ "loss = least_squares_loss(y_train,y_predict)\n",
+ "print(\"Loss = %3.3f\"%(loss))\n",
+ "\n",
+ "# TODO. Manipulate the parameters (by hand!) to make the function\n",
+ "# fit the data better and try to reduce the loss to as small a number\n",
+ "# as possible. The best that I could do was 0.181\n",
+ "# Tip... start by manipulating phi_0.\n",
+ "# It's not that easy, so don't spend too much time on this!\n"
+ ],
+ "metadata": {
+ "id": "o6GXjtRubZ2U"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file