Created using Colaboratory
This commit is contained in:
426
Notebooks/Chap03/3_4_Activation_Functions.ipynb
Normal file
426
Notebooks/Chap03/3_4_Activation_Functions.ipynb
Normal file
@@ -0,0 +1,426 @@
|
|||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": [],
|
||||||
|
"authorship_tag": "ABX9TyOu5BvK3aFb7ZEQKG5vfOZ1",
|
||||||
|
"include_colab_link": true
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python3",
|
||||||
|
"display_name": "Python 3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "view-in-github",
|
||||||
|
"colab_type": "text"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap03/3_4_Activation_Functions.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# **Notebook 3.4 -- Activation functions**\n",
|
||||||
|
"\n",
|
||||||
|
"The purpose of this practical is to experiment with different activation functions. <br>\n",
|
||||||
|
"\n",
|
||||||
|
"Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and write code to complete the functions. There are also questions interspersed in the text.\n",
|
||||||
|
"\n",
|
||||||
|
"Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "Mn0F56yY8ohX"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "2GaDML3I8Yx4"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Imports math library\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"# Imports plotting library\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Plot the shallow neural network. We'll assume input in is range [0,1] and output [-1,1]\n",
|
||||||
|
"# If the plot_all flag is set to true, then we'll plot all the intermediate stages as in Figure 3.3\n",
|
||||||
|
"def plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=False, x_data=None, y_data=None):\n",
|
||||||
|
"\n",
|
||||||
|
" # Plot intermediate plots if flag set\n",
|
||||||
|
" if plot_all:\n",
|
||||||
|
" fig, ax = plt.subplots(3,3)\n",
|
||||||
|
" fig.set_size_inches(8.5, 8.5)\n",
|
||||||
|
" fig.tight_layout(pad=3.0)\n",
|
||||||
|
" ax[0,0].plot(x,pre_1,'r-'); ax[0,0].set_ylabel('Preactivation')\n",
|
||||||
|
" ax[0,1].plot(x,pre_2,'b-'); ax[0,1].set_ylabel('Preactivation')\n",
|
||||||
|
" ax[0,2].plot(x,pre_3,'g-'); ax[0,2].set_ylabel('Preactivation')\n",
|
||||||
|
" ax[1,0].plot(x,act_1,'r-'); ax[1,0].set_ylabel('Activation')\n",
|
||||||
|
" ax[1,1].plot(x,act_2,'b-'); ax[1,1].set_ylabel('Activation')\n",
|
||||||
|
" ax[1,2].plot(x,act_3,'g-'); ax[1,2].set_ylabel('Activation')\n",
|
||||||
|
" ax[2,0].plot(x,w_act_1,'r-'); ax[2,0].set_ylabel('Weighted Act')\n",
|
||||||
|
" ax[2,1].plot(x,w_act_2,'b-'); ax[2,1].set_ylabel('Weighted Act')\n",
|
||||||
|
" ax[2,2].plot(x,w_act_3,'g-'); ax[2,2].set_ylabel('Weighted Act')\n",
|
||||||
|
"\n",
|
||||||
|
" for plot_y in range(3):\n",
|
||||||
|
" for plot_x in range(3):\n",
|
||||||
|
" ax[plot_y,plot_x].set_xlim([0,1]);ax[plot_x,plot_y].set_ylim([-1,1])\n",
|
||||||
|
" ax[plot_y,plot_x].set_aspect(0.5)\n",
|
||||||
|
" ax[2,plot_y].set_xlabel('Input, $x$');\n",
|
||||||
|
" plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
" fig, ax = plt.subplots()\n",
|
||||||
|
" ax.plot(x,y)\n",
|
||||||
|
" ax.set_xlabel('Input, $x$'); ax.set_ylabel('Output, $y$')\n",
|
||||||
|
" ax.set_xlim([0,1]);ax.set_ylim([-1,1])\n",
|
||||||
|
" ax.set_aspect(0.5)\n",
|
||||||
|
" if x_data is not None:\n",
|
||||||
|
" ax.plot(x_data, y_data, 'mo')\n",
|
||||||
|
" for i in range(len(x_data)):\n",
|
||||||
|
" ax.plot(x_data[i], y_data[i],)\n",
|
||||||
|
" plt.show()"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "AeHzflFt9Tgn"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Define a shallow neural network with, one input, one output, and three hidden units\n",
|
||||||
|
"def shallow_1_1_3(x, activation_fn, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31):\n",
|
||||||
|
" pre_1 = theta_10 + theta_11 * x\n",
|
||||||
|
" pre_2 = theta_20 + theta_21 * x\n",
|
||||||
|
" pre_3 = theta_30 + theta_31 * x\n",
|
||||||
|
" # Pass these through the ReLU function to compute the activations as in\n",
|
||||||
|
" # figure 3.3 d-f\n",
|
||||||
|
" act_1 = activation_fn(pre_1)\n",
|
||||||
|
" act_2 = activation_fn(pre_2)\n",
|
||||||
|
" act_3 = activation_fn(pre_3)\n",
|
||||||
|
"\n",
|
||||||
|
" w_act_1 = phi_1 * act_1\n",
|
||||||
|
" w_act_2 = phi_2 * act_2\n",
|
||||||
|
" w_act_3 = phi_3 * act_3\n",
|
||||||
|
"\n",
|
||||||
|
" y = phi_0 + w_act_1 + w_act_2 + w_act_3\n",
|
||||||
|
"\n",
|
||||||
|
" # Return everything we have calculated\n",
|
||||||
|
" return y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "7qeIUrh19AkH"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Define the Rectified Linear Unit (ReLU) function\n",
|
||||||
|
"def ReLU(preactivation):\n",
|
||||||
|
" activation = preactivation.clip(0.0)\n",
|
||||||
|
" return activation"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "cwTp__Fk9YUx"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"First, let's run the network with a ReLU functions"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "INQkRzyn9kVC"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Now lets define some parameters and run the neural network\n",
|
||||||
|
"theta_10 = 0.3 ; theta_11 = -1.0\n",
|
||||||
|
"theta_20 = -1.0 ; theta_21 = 2.0\n",
|
||||||
|
"theta_30 = -0.5 ; theta_31 = 0.65\n",
|
||||||
|
"phi_0 = -0.3; phi_1 = 2.0; phi_2 = -1.0; phi_3 = 7.0\n",
|
||||||
|
"\n",
|
||||||
|
"# Define a range of input values\n",
|
||||||
|
"x = np.arange(0,1,0.01)\n",
|
||||||
|
"\n",
|
||||||
|
"# We run the neural network for each of these input values\n",
|
||||||
|
"y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
|
||||||
|
" shallow_1_1_3(x, ReLU, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
|
||||||
|
"# And then plot it\n",
|
||||||
|
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "jT9QuKou9i0_"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# Sigmoid activation function\n",
|
||||||
|
"\n",
|
||||||
|
"The ReLU isn't the only kind of activation function. For a long time, people used sigmoid functions. A logistic sigmoid function is defined by the equation\n",
|
||||||
|
"\n",
|
||||||
|
"\\begin{equation}\n",
|
||||||
|
"f[h] = \\frac{1}{1+\\exp{[-10 z ]}}\n",
|
||||||
|
"\\end{equation}\n",
|
||||||
|
"\n",
|
||||||
|
"(Note that the factor of 10 is not standard -- but it allow us to plot on the same axes as the ReLU examples)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "-I8N7r1o9HYf"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Define the sigmoid function\n",
|
||||||
|
"def sigmoid(preactivation):\n",
|
||||||
|
" # TODO write code to implement the sigmoid function and compute the activation at the\n",
|
||||||
|
" # hidden unit from the preactivation. Use the np.exp() function.\n",
|
||||||
|
" activation = np.zeros_like(preactivation);\n",
|
||||||
|
"\n",
|
||||||
|
" return activation"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "hgkioNyr975Y"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Make an array of inputs\n",
|
||||||
|
"z = np.arange(-1,1,0.01)\n",
|
||||||
|
"sig_z = sigmoid(z)\n",
|
||||||
|
"\n",
|
||||||
|
"# Plot the sigmoid function\n",
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"ax.plot(z,sig_z,'r-')\n",
|
||||||
|
"ax.set_xlim([-1,1]);ax.set_ylim([0,1])\n",
|
||||||
|
"ax.set_xlabel('z'); ax.set_ylabel('sig[z]')\n",
|
||||||
|
"plt.show"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "94HIXKJH97ve"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Let's see what happens when we use this activation function in a neural network"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "p3zQNXhj-J-o"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"theta_10 = 0.3 ; theta_11 = -1.0\n",
|
||||||
|
"theta_20 = -1.0 ; theta_21 = 2.0\n",
|
||||||
|
"theta_30 = -0.5 ; theta_31 = 0.65\n",
|
||||||
|
"phi_0 = 0.3; phi_1 = 0.5; phi_2 = -1.0; phi_3 = 0.9\n",
|
||||||
|
"\n",
|
||||||
|
"# Define a range of input values\n",
|
||||||
|
"x = np.arange(0,1,0.01)\n",
|
||||||
|
"\n",
|
||||||
|
"# We run the neural network for each of these input values\n",
|
||||||
|
"y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
|
||||||
|
" shallow_1_1_3(x, sigmoid, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
|
||||||
|
"# And then plot it\n",
|
||||||
|
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "C1dASr9L-GNt"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"You probably notice that this gives nice smooth curves. So why don't we use this? Aha... it's not obvious right now, but we will get to it when we learn to fit models."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "Uuam_DewA9fH"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# Heaviside activation function\n",
|
||||||
|
"\n",
|
||||||
|
"The Heaviside function is defined as:\n",
|
||||||
|
"\n",
|
||||||
|
"\\begin{equation}\n",
|
||||||
|
"\\mbox{heaviside}[z] = \\begin{cases} 0 & \\quad z <0 \\\\ 1 & \\quad z\\geq 0\\end{cases}\n",
|
||||||
|
"\\end{equation}"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "C9WKkcMUABze"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Define the heaviside function\n",
|
||||||
|
"def heaviside(preactivation):\n",
|
||||||
|
" # TODO write code to implement the heaviside function and compute the activation at the\n",
|
||||||
|
" # hidden unit from the preactivation. Depending on your implementation you may need to\n",
|
||||||
|
" # convert a Boolean array to an array of ones and zeros. To do this, use .astype(int)\n",
|
||||||
|
" activation = np.zeros_like(preactivation);\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" return activation"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "-1qFkdOL-NPc"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Make an array of inputs\n",
|
||||||
|
"z = np.arange(-1,1,0.01)\n",
|
||||||
|
"heav_z = heaviside(z)\n",
|
||||||
|
"\n",
|
||||||
|
"# Plot the heaviside function\n",
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"ax.plot(z,heav_z,'r-')\n",
|
||||||
|
"ax.set_xlim([-1,1]);ax.set_ylim([-2,2])\n",
|
||||||
|
"ax.set_xlabel('z'); ax.set_ylabel('heaviside[z]')\n",
|
||||||
|
"plt.show"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "mSPyp7iA-44H"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"theta_10 = 0.3 ; theta_11 = -1.0\n",
|
||||||
|
"theta_20 = -1.0 ; theta_21 = 2.0\n",
|
||||||
|
"theta_30 = -0.5 ; theta_31 = 0.65\n",
|
||||||
|
"phi_0 = 0.3; phi_1 = 0.5; phi_2 = -1.0; phi_3 = 0.9\n",
|
||||||
|
"\n",
|
||||||
|
"# Define a range of input values\n",
|
||||||
|
"x = np.arange(0,1,0.01)\n",
|
||||||
|
"\n",
|
||||||
|
"# We run the neural network for each of these input values\n",
|
||||||
|
"y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
|
||||||
|
" shallow_1_1_3(x, heaviside, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
|
||||||
|
"# And then plot it\n",
|
||||||
|
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "t99K2lSl--Mq"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"This can approximate any function, but the output is discontinuous, and there are also reasons not to use it that we will discover when we learn more about model fitting."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "T65MRtM-BCQA"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# Linear activation functions\n",
|
||||||
|
"\n",
|
||||||
|
"Neural networks don't work if the activation function is linear. For example, consider what would happen if the activation function was:\n",
|
||||||
|
"\n",
|
||||||
|
"\\begin{equation}\n",
|
||||||
|
"\\mbox{lin}[z] = a + bz\n",
|
||||||
|
"\\end{equation}"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "RkB-XZMLBTaR"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Define the linear activation function\n",
|
||||||
|
"def lin(preactivation):\n",
|
||||||
|
" a =0\n",
|
||||||
|
" b =1\n",
|
||||||
|
" # Compute linear function\n",
|
||||||
|
" activation = a+b * preactivation\n",
|
||||||
|
" # Return\n",
|
||||||
|
" return activation"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "Q59v3saj_jq1"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# TODO\n",
|
||||||
|
"# 1. The linear activation function above just returns the input: (0+1*z) = z\n",
|
||||||
|
"# Before running the code Make a prediction about what the ten panels of the drawing will look like\n",
|
||||||
|
"# Now run the code below to see if you were right. What family of functions can this represent?\n",
|
||||||
|
"\n",
|
||||||
|
"# 2. What happens if you change the parameters (a,b) to different values?\n",
|
||||||
|
"# Try a=0.5, b=-0.4 Don't forget to run the cell again to update the function\n",
|
||||||
|
"\n",
|
||||||
|
"theta_10 = 0.3 ; theta_11 = -1.0\n",
|
||||||
|
"theta_20 = -1.0 ; theta_21 = 2.0\n",
|
||||||
|
"theta_30 = -0.5 ; theta_31 = 0.65\n",
|
||||||
|
"phi_0 = 0.3; phi_1 = 0.5; phi_2 = -1.0; phi_3 = 0.9\n",
|
||||||
|
"\n",
|
||||||
|
"# Define a range of input values\n",
|
||||||
|
"x = np.arange(0,1,0.01)\n",
|
||||||
|
"\n",
|
||||||
|
"# We run the neural network for each of these input values\n",
|
||||||
|
"y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3 = \\\n",
|
||||||
|
" shallow_1_1_3(x, lin, phi_0,phi_1,phi_2,phi_3, theta_10, theta_11, theta_20, theta_21, theta_30, theta_31)\n",
|
||||||
|
"# And then plot it\n",
|
||||||
|
"plot_neural(x, y, pre_1, pre_2, pre_3, act_1, act_2, act_3, w_act_1, w_act_2, w_act_3, plot_all=True)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "IwodsBr0BkDn"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user