From 707f93daaeea9a36d6d0b05ea6d95e0a71d48b1b Mon Sep 17 00:00:00 2001
From: udlbook <110402648+udlbook@users.noreply.github.com>
Date: Tue, 2 Jan 2024 12:06:41 -0500
Subject: [PATCH] Add files via upload
---
Notebooks/Chap03/3_1_Shallow_Networks_I.ipynb | 184 ++++++------
.../Chap03/3_4_Activation_Functions.ipynb | 261 +++++++++---------
2 files changed, 230 insertions(+), 215 deletions(-)
diff --git a/Notebooks/Chap03/3_1_Shallow_Networks_I.ipynb b/Notebooks/Chap03/3_1_Shallow_Networks_I.ipynb
index 626b287..a225f50 100644
--- a/Notebooks/Chap03/3_1_Shallow_Networks_I.ipynb
+++ b/Notebooks/Chap03/3_1_Shallow_Networks_I.ipynb
@@ -1,33 +1,22 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "authorship_tag": "ABX9TyPBNztJrxnUt1ELWfm1Awa3",
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- }
- },
"cells": [
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {
- "id": "view-in-github",
- "colab_type": "text"
+ "colab_type": "text",
+ "id": "view-in-github"
},
"source": [
"
"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
+ "metadata": {
+ "id": "1Z6LB4Ybn1oN"
+ },
"source": [
"# **Notebook 3.1 -- Shallow neural networks I**\n",
"\n",
@@ -36,10 +25,7 @@
"Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and write code to complete the functions. There are also questions interspersed in the text.\n",
"\n",
"Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions."
- ],
- "metadata": {
- "id": "1Z6LB4Ybn1oN"
- }
+ ]
},
{
"cell_type": "code",
@@ -56,16 +42,22 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
- "source": [
- "Let's first construct the shallow neural network with one input, three hidden units, and one output described in section 3.1 of the book."
- ],
"metadata": {
"id": "wQDy9UzXpnf5"
- }
+ },
+ "source": [
+ "Let's first construct the shallow neural network with one input, three hidden units, and one output described in section 3.1 of the book."
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "OT7h7sSwpkrt"
+ },
+ "outputs": [],
"source": [
"# Define the Rectified Linear Unit (ReLU) function\n",
"def ReLU(preactivation):\n",
@@ -77,15 +69,15 @@
" activation = np.zeros_like(preactivation);\n",
"\n",
" return activation"
- ],
- "metadata": {
- "id": "OT7h7sSwpkrt"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "okwJmSw9pVNF"
+ },
+ "outputs": [],
"source": [
"# Make an array of inputs\n",
"z = np.arange(-5,5,0.1)\n",
@@ -98,15 +90,15 @@
"ax.set_xlabel('z'); ax.set_ylabel('ReLU[z]')\n",
"ax.set_aspect('equal')\n",
"plt.show()"
- ],
- "metadata": {
- "id": "okwJmSw9pVNF"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "epk68ZCBu7uJ"
+ },
+ "outputs": [],
"source": [
"# Define a shallow neural network with, one input, one output, and three hidden units\n",
"def shallow_1_1_3(x, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31):\n",
@@ -134,15 +126,15 @@
"\n",
" # Return everything we have calculated\n",
" return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3"
- ],
- "metadata": {
- "id": "epk68ZCBu7uJ"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "CAr7n1lixuhQ"
+ },
+ "outputs": [],
"source": [
"# Plot the shallow neural network. We'll assume input in is range [0,1] and output [-1,1]\n",
"# If the plot_all flag is set to true, then we'll plot all the intermediate stages as in Figure 3.3\n",
@@ -180,15 +172,15 @@
" for i in range(len(x_data)):\n",
" ax.plot(x_data[i], y_data[i],)\n",
" plt.show()"
- ],
- "metadata": {
- "id": "CAr7n1lixuhQ"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "SzIVdp9U-JWb"
+ },
+ "outputs": [],
"source": [
"# Now lets define some parameters and run the neural network\n",
"theta_10 = 0.3 ; theta_11 = -1.0\n",
@@ -204,25 +196,25 @@
" shallow_1_1_3(x, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
"# And then plot it\n",
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
- ],
- "metadata": {
- "id": "SzIVdp9U-JWb"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
+ "metadata": {
+ "id": "T34bszToImKQ"
+ },
"source": [
"If your code is correct, then the final output should look like this:\n",
"
"
- ],
- "metadata": {
- "id": "T34bszToImKQ"
- }
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
+ "metadata": {
+ "id": "jhaBSS8oIWSX"
+ },
"source": [
"Now let's play with the parameters to make sure we understand how they work. The original parameters were:\n",
"\n",
@@ -230,13 +222,15 @@
"$\\theta_{20} = -1.0$ ; $\\theta_{21} = 2.0$
\n",
"$\\theta_{30} = -0.5$ ; $\\theta_{31} = 0.65$
\n",
"$\\phi_0 = -0.3; \\phi_1 = 2.0; \\phi_2 = -1.0; \\phi_3 = 7.0$"
- ],
- "metadata": {
- "id": "jhaBSS8oIWSX"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ur4arJ8KAQWe"
+ },
+ "outputs": [],
"source": [
"# TODO\n",
"# 1. Predict what effect changing phi_0 will have on the network.\n",
@@ -272,32 +266,33 @@
" shallow_1_1_3(x, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
"# And then plot it\n",
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
- ],
- "metadata": {
- "id": "ur4arJ8KAQWe"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
+ "metadata": {
+ "id": "osonHsEqVp2I"
+ },
"source": [
"# Least squares loss\n",
"\n",
"Now let's consider fitting the network to data. First we need to define the loss function. We'll use the least squares loss:\n",
"\n",
"\\begin{equation}\n",
- "L[\\boldsymbol\\phi] = \\sum_{i=1}^{I}(y_{i}-\\mbox{f}[x_{i},\\boldsymbol\\phi])^2\n",
+ "L[\\boldsymbol\\phi] = \\sum_{i=1}^{I}(y_{i}-\\text{f}[x_{i},\\boldsymbol\\phi])^2\n",
"\\end{equation}\n",
"\n",
- "where $(x_i,y_i)$ is an input/output training pair and $\\mbox{f}[\\bullet,\\boldsymbol\\phi]$ is the neural network with parameters $\\boldsymbol\\phi$. The first term in the brackets is the ground truth output and the second term is the prediction of the model"
- ],
- "metadata": {
- "id": "osonHsEqVp2I"
- }
+ "where $(x_i,y_i)$ is an input/output training pair and $\\text{f}[\\bullet,\\boldsymbol\\phi]$ is the neural network with parameters $\\boldsymbol\\phi$. The first term in the brackets is the ground truth output and the second term is the prediction of the model"
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "14d5II-TU46w"
+ },
+ "outputs": [],
"source": [
"# Least squares function\n",
"def least_squares_loss(y_train, y_predict):\n",
@@ -308,15 +303,15 @@
" loss = 0\n",
"\n",
" return loss"
- ],
- "metadata": {
- "id": "14d5II-TU46w"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "o6GXjtRubZ2U"
+ },
+ "outputs": [],
"source": [
"# Now lets define some parameters, run the neural network, and compute the loss\n",
"theta_10 = 0.3 ; theta_11 = -1.0\n",
@@ -354,12 +349,23 @@
"# as possible. The best that I could do was 0.181\n",
"# Tip... start by manipulating phi_0.\n",
"# It's not that easy, so don't spend too much time on this!\n"
- ],
- "metadata": {
- "id": "o6GXjtRubZ2U"
- },
- "execution_count": null,
- "outputs": []
+ ]
}
- ]
+ ],
+ "metadata": {
+ "colab": {
+ "authorship_tag": "ABX9TyPBNztJrxnUt1ELWfm1Awa3",
+ "include_colab_link": true,
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/Notebooks/Chap03/3_4_Activation_Functions.ipynb b/Notebooks/Chap03/3_4_Activation_Functions.ipynb
index e1bb1bf..582ed1e 100644
--- a/Notebooks/Chap03/3_4_Activation_Functions.ipynb
+++ b/Notebooks/Chap03/3_4_Activation_Functions.ipynb
@@ -1,33 +1,22 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "authorship_tag": "ABX9TyOmxhh3ymYWX+1HdZ91I6zU",
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- }
- },
"cells": [
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {
- "id": "view-in-github",
- "colab_type": "text"
+ "colab_type": "text",
+ "id": "view-in-github"
},
"source": [
"
"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
+ "metadata": {
+ "id": "Mn0F56yY8ohX"
+ },
"source": [
"# **Notebook 3.4 -- Activation functions**\n",
"\n",
@@ -36,10 +25,7 @@
"Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and write code to complete the functions. There are also questions interspersed in the text.\n",
"\n",
"Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions."
- ],
- "metadata": {
- "id": "Mn0F56yY8ohX"
- }
+ ]
},
{
"cell_type": "code",
@@ -57,6 +43,11 @@
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "AeHzflFt9Tgn"
+ },
+ "outputs": [],
"source": [
"# Plot the shallow neural network. We'll assume input in is range [0,1] and output [-1,1]\n",
"# If the plot_all flag is set to true, then we'll plot all the intermediate stages as in Figure 3.3\n",
@@ -94,15 +85,15 @@
" for i in range(len(x_data)):\n",
" ax.plot(x_data[i], y_data[i],)\n",
" plt.show()"
- ],
- "metadata": {
- "id": "AeHzflFt9Tgn"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "7qeIUrh19AkH"
+ },
+ "outputs": [],
"source": [
"# Define a shallow neural network with, one input, one output, and three hidden units\n",
"def shallow_1_1_3(x, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31):\n",
@@ -123,38 +114,39 @@
"\n",
" # Return everything we have calculated\n",
" return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3"
- ],
- "metadata": {
- "id": "7qeIUrh19AkH"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "cwTp__Fk9YUx"
+ },
+ "outputs": [],
"source": [
"# Define the Rectified Linear Unit (ReLU) function\n",
"def ReLU(preactivation):\n",
" activation = preactivation.clip(0.0)\n",
" return activation"
- ],
- "metadata": {
- "id": "cwTp__Fk9YUx"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
- "source": [
- "First, let's run the network with a ReLU functions"
- ],
"metadata": {
"id": "INQkRzyn9kVC"
- }
+ },
+ "source": [
+ "First, let's run the network with a ReLU functions"
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "jT9QuKou9i0_"
+ },
+ "outputs": [],
"source": [
"# Now lets define some parameters and run the neural network\n",
"theta_10 = 0.3 ; theta_11 = -1.0\n",
@@ -170,15 +162,14 @@
" shallow_1_1_3(x, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
"# And then plot it\n",
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
- ],
- "metadata": {
- "id": "jT9QuKou9i0_"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
+ "metadata": {
+ "id": "-I8N7r1o9HYf"
+ },
"source": [
"# Sigmoid activation function\n",
"\n",
@@ -189,13 +180,15 @@
"\\end{equation}\n",
"\n",
"(Note that the factor of 10 is not standard -- but it allow us to plot on the same axes as the ReLU examples)"
- ],
- "metadata": {
- "id": "-I8N7r1o9HYf"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "hgkioNyr975Y"
+ },
+ "outputs": [],
"source": [
"# Define the sigmoid function\n",
"def sigmoid(preactivation):\n",
@@ -204,15 +197,15 @@
" activation = np.zeros_like(preactivation);\n",
"\n",
" return activation"
- ],
- "metadata": {
- "id": "hgkioNyr975Y"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "94HIXKJH97ve"
+ },
+ "outputs": [],
"source": [
"# Make an array of inputs\n",
"z = np.arange(-1,1,0.01)\n",
@@ -224,24 +217,25 @@
"ax.set_xlim([-1,1]);ax.set_ylim([0,1])\n",
"ax.set_xlabel('z'); ax.set_ylabel('sig[z]')\n",
"plt.show()"
- ],
- "metadata": {
- "id": "94HIXKJH97ve"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
- "source": [
- "Let's see what happens when we use this activation function in a neural network"
- ],
"metadata": {
"id": "p3zQNXhj-J-o"
- }
+ },
+ "source": [
+ "Let's see what happens when we use this activation function in a neural network"
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "C1dASr9L-GNt"
+ },
+ "outputs": [],
"source": [
"theta_10 = 0.3 ; theta_11 = -1.0\n",
"theta_20 = -1.0 ; theta_21 = 2.0\n",
@@ -256,39 +250,41 @@
" shallow_1_1_3(x, sigmoid, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
"# And then plot it\n",
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
- ],
- "metadata": {
- "id": "C1dASr9L-GNt"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
- "source": [
- "You probably notice that this gives nice smooth curves. So why don't we use this? Aha... it's not obvious right now, but we will get to it when we learn to fit models."
- ],
"metadata": {
"id": "Uuam_DewA9fH"
- }
+ },
+ "source": [
+ "You probably notice that this gives nice smooth curves. So why don't we use this? Aha... it's not obvious right now, but we will get to it when we learn to fit models."
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
+ "metadata": {
+ "id": "C9WKkcMUABze"
+ },
"source": [
"# Heaviside activation function\n",
"\n",
"The Heaviside function is defined as:\n",
"\n",
"\\begin{equation}\n",
- "\\mbox{heaviside}[z] = \\begin{cases} 0 & \\quad z <0 \\\\ 1 & \\quad z\\geq 0\\end{cases}\n",
+ "\\text{heaviside}[z] = \\begin{cases} 0 & \\quad z <0 \\\\ 1 & \\quad z\\geq 0\\end{cases}\n",
"\\end{equation}"
- ],
- "metadata": {
- "id": "C9WKkcMUABze"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "-1qFkdOL-NPc"
+ },
+ "outputs": [],
"source": [
"# Define the heaviside function\n",
"def heaviside(preactivation):\n",
@@ -299,15 +295,15 @@
"\n",
"\n",
" return activation"
- ],
- "metadata": {
- "id": "-1qFkdOL-NPc"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "mSPyp7iA-44H"
+ },
+ "outputs": [],
"source": [
"# Make an array of inputs\n",
"z = np.arange(-1,1,0.01)\n",
@@ -319,15 +315,15 @@
"ax.set_xlim([-1,1]);ax.set_ylim([-2,2])\n",
"ax.set_xlabel('z'); ax.set_ylabel('heaviside[z]')\n",
"plt.show()"
- ],
- "metadata": {
- "id": "mSPyp7iA-44H"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "t99K2lSl--Mq"
+ },
+ "outputs": [],
"source": [
"theta_10 = 0.3 ; theta_11 = -1.0\n",
"theta_20 = -1.0 ; theta_21 = 2.0\n",
@@ -342,39 +338,41 @@
" shallow_1_1_3(x, heaviside, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
"# And then plot it\n",
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
- ],
- "metadata": {
- "id": "t99K2lSl--Mq"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
- "source": [
- "This can approximate any function, but the output is discontinuous, and there are also reasons not to use it that we will discover when we learn more about model fitting."
- ],
"metadata": {
"id": "T65MRtM-BCQA"
- }
+ },
+ "source": [
+ "This can approximate any function, but the output is discontinuous, and there are also reasons not to use it that we will discover when we learn more about model fitting."
+ ]
},
{
+ "attachments": {},
"cell_type": "markdown",
+ "metadata": {
+ "id": "RkB-XZMLBTaR"
+ },
"source": [
"# Linear activation functions\n",
"\n",
"Neural networks don't work if the activation function is linear. For example, consider what would happen if the activation function was:\n",
"\n",
"\\begin{equation}\n",
- "\\mbox{lin}[z] = a + bz\n",
+ "\\text{lin}[z] = a + bz\n",
"\\end{equation}"
- ],
- "metadata": {
- "id": "RkB-XZMLBTaR"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Q59v3saj_jq1"
+ },
+ "outputs": [],
"source": [
"# Define the linear activation function\n",
"def lin(preactivation):\n",
@@ -384,15 +382,15 @@
" activation = a+b * preactivation\n",
" # Return\n",
" return activation"
- ],
- "metadata": {
- "id": "Q59v3saj_jq1"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "IwodsBr0BkDn"
+ },
+ "outputs": [],
"source": [
"# TODO\n",
"# 1. The linear activation function above just returns the input: (0+1*z) = z\n",
@@ -415,12 +413,23 @@
" shallow_1_1_3(x, lin, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
"# And then plot it\n",
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
- ],
- "metadata": {
- "id": "IwodsBr0BkDn"
- },
- "execution_count": null,
- "outputs": []
+ ]
}
- ]
-}
\ No newline at end of file
+ ],
+ "metadata": {
+ "colab": {
+ "authorship_tag": "ABX9TyOmxhh3ymYWX+1HdZ91I6zU",
+ "include_colab_link": true,
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}