348 lines
12 KiB
Plaintext
348 lines
12 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": [],
|
|
"include_colab_link": true
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "view-in-github",
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/Notebooks/Chap08/8_2_Bias_Variance_Trade_Off.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"# **Notebook 8.2: Bias-Variance Trade-Off**\n",
|
|
"\n",
|
|
"This notebook investigates the bias-variance trade-off for the toy model used throughout chapter 8 and reproduces the bias/variance trade off curves seen in figure 8.9.\n",
|
|
"\n",
|
|
"Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and make predictions about what is going to happen or write code to complete the functions.\n",
|
|
"\n",
|
|
"Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions."
|
|
],
|
|
"metadata": {
|
|
"id": "L6chybAVFJW2"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt"
|
|
],
|
|
"metadata": {
|
|
"id": "01Cu4SGZOVAi"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# The true function that we are trying to estimate, defined on [0,1]\n",
|
|
"def true_function(x):\n",
|
|
" y = np.exp(np.sin(x*(2*3.1413)))\n",
|
|
" return y"
|
|
],
|
|
"metadata": {
|
|
"id": "bSK2_EGyOgHu"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Generate some data points with or without noise\n",
|
|
"def generate_data(n_data, sigma_y=0.3):\n",
|
|
" # Generate x values quasi uniformly\n",
|
|
" x = np.ones(n_data)\n",
|
|
" for i in range(n_data):\n",
|
|
" x[i] = np.random.uniform(i/n_data, (i+1)/n_data, 1)\n",
|
|
"\n",
|
|
" # y value from running through function and adding noise\n",
|
|
" y = np.ones(n_data)\n",
|
|
" for i in range(n_data):\n",
|
|
" y[i] = true_function(x[i])\n",
|
|
" y[i] += np.random.normal(0, sigma_y, 1)\n",
|
|
" return x,y\n"
|
|
],
|
|
"metadata": {
|
|
"id": "yzZr2tcJO5pq"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Draw the fitted function, together with uncertainty used to generate points\n",
|
|
"def plot_function(x_func, y_func, x_data=None,y_data=None, x_model = None, y_model =None, sigma_func = None, sigma_model=None):\n",
|
|
"\n",
|
|
" fig,ax = plt.subplots()\n",
|
|
" ax.plot(x_func, y_func, 'k-')\n",
|
|
" if sigma_func is not None:\n",
|
|
" ax.fill_between(x_func, y_func-2*sigma_func, y_func+2*sigma_func, color='lightgray')\n",
|
|
"\n",
|
|
" if x_data is not None:\n",
|
|
" ax.plot(x_data, y_data, 'o', color='#d18362')\n",
|
|
"\n",
|
|
" if x_model is not None:\n",
|
|
" ax.plot(x_model, y_model, '-', color='#7fe7de')\n",
|
|
"\n",
|
|
" if sigma_model is not None:\n",
|
|
" ax.fill_between(x_model, y_model-2*sigma_model, y_model+2*sigma_model, color='lightgray')\n",
|
|
"\n",
|
|
" ax.set_xlim(0,1)\n",
|
|
" ax.set_xlabel('Input, $x$')\n",
|
|
" ax.set_ylabel('Output, $y$')\n",
|
|
" plt.show()"
|
|
],
|
|
"metadata": {
|
|
"id": "xfq1SD_ZOi6G"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Generate true function\n",
|
|
"x_func = np.linspace(0, 1.0, 100)\n",
|
|
"y_func = true_function(x_func);\n",
|
|
"\n",
|
|
"# Generate some data points\n",
|
|
"np.random.seed(1)\n",
|
|
"sigma_func = 0.3\n",
|
|
"n_data = 15\n",
|
|
"x_data,y_data = generate_data(n_data, sigma_func)\n",
|
|
"\n",
|
|
"# Plot the functinon, data and uncertainty\n",
|
|
"plot_function(x_func, y_func, x_data, y_data, sigma_func=sigma_func)"
|
|
],
|
|
"metadata": {
|
|
"id": "2tP-p7B6Qnuf"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Define model -- beta is a scalar and omega has size n_hidden,1\n",
|
|
"def network(x, beta, omega):\n",
|
|
" # Retrieve number of hidden units\n",
|
|
" n_hidden = omega.shape[0]\n",
|
|
"\n",
|
|
" y = np.zeros_like(x)\n",
|
|
" for c_hidden in range(n_hidden):\n",
|
|
" # Evaluate activations based on shifted lines (figure 8.4b-d)\n",
|
|
" line_vals = x - c_hidden/n_hidden\n",
|
|
" h = line_vals * (line_vals > 0)\n",
|
|
" # Weight activations by omega parameters and sum\n",
|
|
" y = y + omega[c_hidden] * h\n",
|
|
" # Add bias, beta\n",
|
|
" y = y + beta\n",
|
|
"\n",
|
|
" return y"
|
|
],
|
|
"metadata": {
|
|
"id": "zYMLtS3nT-0y"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# This fits the n_hidden+1 parameters (see fig 8.4a) in closed form.\n",
|
|
"# If you have studied linear algebra, then you will know it is a least\n",
|
|
"# squares solution of the form (A^TA)^-1A^Tb. If you don't recognize that,\n",
|
|
"# then just take it on trust that this gives you the best possible solution.\n",
|
|
"def fit_model_closed_form(x,y,n_hidden):\n",
|
|
" n_data = len(x)\n",
|
|
" A = np.ones((n_data, n_hidden+1))\n",
|
|
" for i in range(n_data):\n",
|
|
" for j in range(1,n_hidden+1):\n",
|
|
" A[i,j] = x[i]-(j-1)/n_hidden\n",
|
|
" if A[i,j] < 0:\n",
|
|
" A[i,j] = 0;\n",
|
|
"\n",
|
|
" beta_omega = np.linalg.lstsq(A, y, rcond=None)[0]\n",
|
|
"\n",
|
|
" beta = beta_omega[0]\n",
|
|
" omega = beta_omega[1:]\n",
|
|
"\n",
|
|
" return beta, omega\n"
|
|
],
|
|
"metadata": {
|
|
"id": "MinJxLh1XTHx"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Closed form solution\n",
|
|
"beta, omega = fit_model_closed_form(x_data,y_data,n_hidden=3)\n",
|
|
"\n",
|
|
"# Get prediction for model across graph range\n",
|
|
"x_model = np.linspace(0,1,100);\n",
|
|
"y_model = network(x_model, beta, omega)\n",
|
|
"\n",
|
|
"# Draw the function and the model\n",
|
|
"plot_function(x_func, y_func, x_data,y_data, x_model, y_model)"
|
|
],
|
|
"metadata": {
|
|
"id": "HP7fiwNFSfWz"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Run the model many times with different datasets and return the mean and variance\n",
|
|
"def get_model_mean_variance(n_data, n_datasets, n_hidden, sigma_func):\n",
|
|
"\n",
|
|
" # Create array that stores model results in rows\n",
|
|
" y_model_all = np.zeros((n_datasets, x_model.shape[0]))\n",
|
|
"\n",
|
|
" for c_dataset in range(n_datasets):\n",
|
|
" # TODO -- Generate n_data x,y, pairs with standard deviation sigma_func\n",
|
|
" # Replace this line\n",
|
|
" x_data,y_data = np.zeros([1,n_data]),np.zeros([1,n_data])\n",
|
|
"\n",
|
|
" # TODO -- Fit the model\n",
|
|
" # Replace this line:\n",
|
|
" beta = 0; omega = np.zeros([n_hidden,1])\n",
|
|
"\n",
|
|
" # TODO -- Run the fitted model on x_model\n",
|
|
" # Replace this line\n",
|
|
" y_model = np.zeros_like(x_model);\n",
|
|
"\n",
|
|
" # Store the model results\n",
|
|
" y_model_all[c_dataset,:] = y_model\n",
|
|
"\n",
|
|
" # Get mean and standard deviation of model\n",
|
|
" mean_model = np.mean(y_model_all,axis=0)\n",
|
|
" std_model = np.std(y_model_all,axis=0)\n",
|
|
"\n",
|
|
" # Return the mean and standard deviation of the fitted model\n",
|
|
" return mean_model, std_model"
|
|
],
|
|
"metadata": {
|
|
"id": "bL553uSaYidy"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Let's generate N random data sets, fit the model N times and look the mean and variance\n",
|
|
"n_datasets = 100\n",
|
|
"n_data = 15\n",
|
|
"sigma_func = 0.3\n",
|
|
"n_hidden = 5\n",
|
|
"\n",
|
|
"# Get mean and variance of fitted model\n",
|
|
"np.random.seed(1)\n",
|
|
"mean_model, std_model = get_model_mean_variance(n_data, n_datasets, n_hidden, sigma_func) ;\n",
|
|
"\n",
|
|
"# Plot the results\n",
|
|
"plot_function(x_func, y_func, x_model=x_model, y_model=mean_model, sigma_model=std_model)"
|
|
],
|
|
"metadata": {
|
|
"id": "Wxk64t2SoX9c"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# TODO -- Experiment with changing the number of data points and the number of hidden variables\n",
|
|
"# in the model. Get a feeling for what happens in terms of the bias (squared deviation between cyan and black lines)\n",
|
|
"# and the variance (gray region) as we manipulate these quantities."
|
|
],
|
|
"metadata": {
|
|
"id": "QO6mFaKNJ3J_"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Plot the noise, bias and variance as a function of capacity\n",
|
|
"hidden_variables = [1,2,3,4,5,6,7,8,9,10,11,12]\n",
|
|
"bias = np.zeros((len(hidden_variables),1)) ;\n",
|
|
"variance = np.zeros((len(hidden_variables),1)) ;\n",
|
|
"\n",
|
|
"n_datasets = 100\n",
|
|
"n_data = 15\n",
|
|
"sigma_func = 0.3\n",
|
|
"n_hidden = 5\n",
|
|
"\n",
|
|
"# Set random seed so that we get the same result every time\n",
|
|
"np.random.seed(1)\n",
|
|
"\n",
|
|
"for c_hidden in range(len(hidden_variables)):\n",
|
|
" # Get mean and variance of fitted model\n",
|
|
" mean_model, std_model = get_model_mean_variance(n_data, n_datasets, hidden_variables[c_hidden], sigma_func) ;\n",
|
|
" # TODO -- Estimate bias and variance\n",
|
|
" # Replace these lines\n",
|
|
"\n",
|
|
" # Compute variance -- average of the model variance (average squared deviation of fitted models around mean fitted model)\n",
|
|
" variance[c_hidden] = 0\n",
|
|
" # Compute bias (average squared deviation of mean fitted model around true function)\n",
|
|
" bias[c_hidden] = 0\n",
|
|
"\n",
|
|
"# Plot the results\n",
|
|
"fig,ax = plt.subplots()\n",
|
|
"ax.plot(hidden_variables, variance, 'k-')\n",
|
|
"ax.plot(hidden_variables, bias, 'r-')\n",
|
|
"ax.plot(hidden_variables, variance+bias, 'g-')\n",
|
|
"ax.set_xlim(0,12)\n",
|
|
"ax.set_ylim(0,0.5)\n",
|
|
"ax.set_xlabel(\"Model capacity\")\n",
|
|
"ax.set_ylabel(\"Variance\")\n",
|
|
"ax.legend(['Variance', 'Bias', 'Bias + Variance'])\n",
|
|
"plt.show()\n"
|
|
],
|
|
"metadata": {
|
|
"id": "ICKjqAlx3Ka9"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [],
|
|
"metadata": {
|
|
"id": "WKUyOAywL_b2"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
}
|
|
]
|
|
}
|