From fba121b30b0713c80844b4d1ee61f5005d06e841 Mon Sep 17 00:00:00 2001 From: udlbook <110402648+udlbook@users.noreply.github.com> Date: Tue, 24 Oct 2023 13:16:34 +0100 Subject: [PATCH] Created using Colaboratory --- .../20_2_Full_Batch_Gradient_Descent.ipynb | 296 ++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 Notebooks/Chap20/20_2_Full_Batch_Gradient_Descent.ipynb diff --git a/Notebooks/Chap20/20_2_Full_Batch_Gradient_Descent.ipynb b/Notebooks/Chap20/20_2_Full_Batch_Gradient_Descent.ipynb new file mode 100644 index 0000000..3b1841a --- /dev/null +++ b/Notebooks/Chap20/20_2_Full_Batch_Gradient_Descent.ipynb @@ -0,0 +1,296 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyOo4vm4MXcIvAzVlMCaLikH", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **Notebook 20.2: Full Batch Gradient Descent**\n", + "\n", + "This notebook investigates training a network with full batch gradient descent as in figure 20.2. There is also a version (notebook takes a long time to run), but this didn't speed it up much for me. If you run out of CoLab time, you'll need to download the Python file and run locally.\n", + "\n", + "Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and make predictions about what is going to happen or write code to complete the functions.\n", + "\n", + "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions." + ], + "metadata": { + "id": "t9vk9Elugvmi" + } + }, + { + "cell_type": "code", + "source": [ + "# Run this if you're in a Colab to make a local copy of the MNIST 1D repository\n", + "!git clone https://github.com/greydanus/mnist1d" + ], + "metadata": { + "id": "D5yLObtZCi9J" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import os\n", + "import torch, torch.nn as nn\n", + "from torch.utils.data import TensorDataset, DataLoader\n", + "from torch.optim.lr_scheduler import StepLR\n", + "import matplotlib.pyplot as plt\n", + "import mnist1d\n", + "import random\n", + "from IPython.display import display, clear_output" + ], + "metadata": { + "id": "YrXWAH7sUWvU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "args = mnist1d.data.get_dataset_args()\n", + "data = mnist1d.data.get_dataset(args, path='./mnist1d_data.pkl', download=False, regenerate=False)\n", + "\n", + "# The training and test input and outputs are in\n", + "# data['x'], data['y']\n", + "print(\"Examples in training set: {}\".format(len(data['y'])))\n", + "print(\"Length of each example: {}\".format(data['x'].shape[-1]))" + ], + "metadata": { + "id": "twI72ZCrCt5z" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Define the network" + ], + "metadata": { + "id": "_sFvRDGrl4qe" + } + }, + { + "cell_type": "code", + "source": [ + "# Data is length forty, and there are 10 classes\n", + "D_i = 40\n", + "D_o = 10\n", + "\n", + "# create model with one hidden layer and 298 hidden units\n", + "model_1_layer = nn.Sequential(\n", + "nn.Linear(D_i, 298),\n", + "nn.ReLU(),\n", + "nn.Linear(298, D_o))\n", + "\n", + "\n", + "# TODO -- create model with three hidden layers and 100 hidden units per layer\n", + "# Replace this line\n", + "model_2_layer = nn.Sequential(nn.Linear(D_i, D_o))\n", + "\n", + "\n", + "\n", + "# TODO -- Create model with three hidden layers and 75 hidden units per layer\n", + "# Replace this line\n", + "model_3_layer = nn.Sequential(nn.Linear(D_i, D_o))\n", + "\n", + "\n", + "\n", + "# TODO create model with four hidden layers and 63 hidden units per layer\n", + "# Replace this line\n", + "model_4_layer = nn.Sequential(nn.Linear(D_i, D_o))\n", + "\n" + ], + "metadata": { + "id": "FslroPJJffrh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# He initialization of weights\n", + "def weights_init(layer_in):\n", + " if isinstance(layer_in, nn.Linear):\n", + " nn.init.kaiming_uniform_(layer_in.weight)\n", + " layer_in.bias.data.fill_(0.0)" + ], + "metadata": { + "id": "YgLaex1pfhqz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def train_model(model, train_data_x, train_data_y, n_epoch):\n", + " print(\"This is going to take a long time!\")\n", + " # choose cross entropy loss function (equation 5.24 in the loss notes)\n", + " loss_function = nn.CrossEntropyLoss()\n", + " # construct SGD optimizer and initialize learning rate to small value and momentum to 0\n", + " optimizer = torch.optim.SGD(model.parameters(), lr = 0.0025, momentum=0.0)\n", + " # create 100 dummy data points and store in data loader class\n", + " x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n", + " y_train = torch.tensor(train_data_y.astype('long'))\n", + "\n", + " # load the data into a class that creates the batches -- full batch as there are 4000 examples\n", + " data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=4000, shuffle=False, worker_init_fn=np.random.seed(1))\n", + "\n", + " # Initialize model weights\n", + " model.apply(weights_init)\n", + "\n", + " # store the errors percentage at each point\n", + " errors_train = np.zeros((n_epoch))\n", + "\n", + " for epoch in range(n_epoch):\n", + " # loop over batches\n", + " for i, data in enumerate(data_loader):\n", + " # retrieve inputs and labels for this batch\n", + " x_batch, y_batch = data\n", + " # zero the parameter gradients\n", + " optimizer.zero_grad()\n", + " # forward pass -- calculate model output\n", + " pred = model(x_batch)\n", + " # compute the loss\n", + " loss = loss_function(pred, y_batch)\n", + " # Store the errors\n", + " _, predicted_train_class = torch.max(pred.data, 1)\n", + " errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n", + " # backward pass\n", + " loss.backward()\n", + " # SGD update\n", + " optimizer.step()\n", + "\n", + " if epoch % 10 == 0:\n", + " clear_output(wait=True)\n", + " display(\"Epoch %d, errors_train %3.3f\"%(epoch, errors_train[epoch]))\n", + "\n", + " return errors_train" + ], + "metadata": { + "id": "NYw8I_3mmX5c" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Load in the data\n", + "train_data_x = data['x'].transpose()\n", + "train_data_y = data['y']\n", + "# Print out sizes\n", + "print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))" + ], + "metadata": { + "id": "4FE3HQ_vedXO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Train the models\n", + "errors_four_layers = train_model(model_4_layer, train_data_x, train_data_y, n_epoch=200000)" + ], + "metadata": { + "id": "b56wdODqemF1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "errors_three_layers = train_model(model_3_layer, train_data_x, train_data_y, n_epoch=200000)\n" + ], + "metadata": { + "id": "hqY-MJVPnCBV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "errors_two_layers = train_model(model_2_layer, train_data_x, train_data_y, n_epoch=200000)\n" + ], + "metadata": { + "id": "T61jfpNGnDGj" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "errors_one_layer = train_model(model_1_layer, train_data_x, train_data_y, n_epoch=500000)" + ], + "metadata": { + "id": "HO8ZFgYqnEQe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Plot the results\n", + "fig, ax = plt.subplots()\n", + "ax.plot(errors_one_layer,'r-',label='one layer')\n", + "ax.plot(errors_two_layers,'g-',label='two layers')\n", + "ax.plot(errors_three_layers,'b-',label='three layers')\n", + "ax.plot(errors_four_layers,'m-',label='four layers')\n", + "ax.set_ylim(0,100)\n", + "ax.set_xlabel('Epoch'); ax.set_ylabel('Percent error')\n", + "ax.legend()\n", + "plt.show()" + ], + "metadata": { + "id": "pYL0YMI5oNSR" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "wJerga3M7eDw" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file