From cef074a42bc9a5f864e2c66e790dff48a4ae2887 Mon Sep 17 00:00:00 2001 From: udlbook <110402648+udlbook@users.noreply.github.com> Date: Mon, 20 Nov 2023 16:06:11 +0000 Subject: [PATCH] Created using Colaboratory --- CM20315_2023/CM20315_Coursework_III.ipynb | 275 ++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 CM20315_2023/CM20315_Coursework_III.ipynb diff --git a/CM20315_2023/CM20315_Coursework_III.ipynb b/CM20315_2023/CM20315_Coursework_III.ipynb new file mode 100644 index 0000000..e59c817 --- /dev/null +++ b/CM20315_2023/CM20315_Coursework_III.ipynb @@ -0,0 +1,275 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyM4iym9JS6lh/helwOU1CL9", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Coursework III -- Regularization\n", + "\n", + "The goal of the coursework is to modify a simple bit of numpy code that trains a network and measures the performance on a validation set for the MNist 1D dataset.\n", + "\n", + "In this coursework, you need add **regularization** of some kind to improve the performance. Anything from chapter 8 of the book or anything else you can find is fine *except* early stopping. You must not change the model hyperparameters or the training algorithm.\n", + "\n", + "You must improve the performance by at least 2% to get full marks.\n", + "\n", + "You will need to upload three things to Moodle:\n", + "1. The image that this notebook saves (click the folder icon on the left on colab to download it)\n", + "2. The lines of code you changed\n", + "3. The whole notebook as a .ipynb file. You can do this on the File menu\n", + "\n", + "\n" + ], + "metadata": { + "id": "t9vk9Elugvmi" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import os\n", + "import torch, torch.nn as nn\n", + "from torch.utils.data import TensorDataset, DataLoader\n", + "from torch.optim.lr_scheduler import StepLR\n", + "import matplotlib.pyplot as plt\n", + "import random\n", + "import gdown" + ], + "metadata": { + "id": "YrXWAH7sUWvU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Run this once to copy the train and validation data to your CoLab environment\n", + "if not os.path.exists('./Data.zip'):\n", + " !gdown 1HtnCrncY6dFCYqzgPf1HtPVAerTpwFRm\n", + " !unzip Data.zip" + ], + "metadata": { + "id": "wScBGXXFVadm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Load in the data\n", + "train_data_x = np.load('train_data_x.npy')\n", + "train_data_y = np.load('train_data_y.npy')\n", + "val_data_x = np.load('val_data_x.npy')\n", + "val_data_y = np.load('val_data_y.npy')\n", + "# Print out sizes\n", + "print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n", + "print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))" + ], + "metadata": { + "id": "8bKADvLHbiV5" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Define the network" + ], + "metadata": { + "id": "_sFvRDGrl4qe" + } + }, + { + "cell_type": "code", + "source": [ + "# There are 40 input dimensions and 10 output dimensions for this data\n", + "# The inputs correspond to the 40 offsets in the MNIST1D template.\n", + "D_i = 40\n", + "# The outputs correspond to the 10 digits\n", + "D_o = 10\n", + "\n", + "# Number of hidden units in layers 1 and 2\n", + "D_1 = 100\n", + "D_2 = 100\n", + "\n", + "# create model with two hidden layers\n", + "model = nn.Sequential(\n", + "nn.Linear(D_i, D_1),\n", + "nn.ReLU(),\n", + "nn.Linear(D_1, D_2),\n", + "nn.ReLU(),\n", + "nn.Linear(D_2, D_o))" + ], + "metadata": { + "id": "FslroPJJffrh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# He initialization of weights\n", + "def weights_init(layer_in):\n", + " if isinstance(layer_in, nn.Linear):\n", + " nn.init.kaiming_uniform_(layer_in.weight)\n", + " layer_in.bias.data.fill_(0.0)" + ], + "metadata": { + "id": "YgLaex1pfhqz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# You need all this stuff to ensure that PyTorch is deterministic\n", + "def set_seed(seed):\n", + " torch.manual_seed(seed)\n", + " torch.cuda.manual_seed_all(seed)\n", + " torch.backends.cudnn.deterministic = True\n", + " torch.backends.cudnn.benchmark = False\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " os.environ['PYTHONHASHSEED'] = str(seed)" + ], + "metadata": { + "id": "zXRmxCQNnL_M" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Set seed so always get same result (do not change)\n", + "set_seed(1)\n", + "\n", + "# choose cross entropy loss function (equation 5.24 in the loss notes)\n", + "loss_function = nn.CrossEntropyLoss()\n", + "# construct SGD optimizer and initialize learning rate and momentum\n", + "optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n", + "# object that decreases learning rate by half every 10 epochs\n", + "scheduler = StepLR(optimizer, step_size=10, gamma=0.5)\n", + "# create 100 dummy data points and store in data loader class\n", + "x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n", + "y_train = torch.tensor(train_data_y.astype('long'))\n", + "x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n", + "y_val = torch.tensor(val_data_y.astype('long'))\n", + "\n", + "# load the data into a class that creates the batches\n", + "data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n", + "\n", + "# Initialize model weights\n", + "model.apply(weights_init)\n", + "\n", + "# loop over the dataset n_epoch times\n", + "n_epoch = 50\n", + "# store the loss and the % correct at each epoch\n", + "losses_train = np.zeros((n_epoch))\n", + "errors_train = np.zeros((n_epoch))\n", + "losses_val = np.zeros((n_epoch))\n", + "errors_val = np.zeros((n_epoch))\n", + "\n", + "for epoch in range(n_epoch):\n", + " # loop over batches\n", + " for i, data in enumerate(data_loader):\n", + " # retrieve inputs and labels for this batch\n", + " x_batch, y_batch = data\n", + " # zero the parameter gradients\n", + " optimizer.zero_grad()\n", + " # forward pass -- calculate model output\n", + " pred = model(x_batch)\n", + " # compute the lss\n", + " loss = loss_function(pred, y_batch)\n", + " # backward pass\n", + " loss.backward()\n", + " # SGD update\n", + " optimizer.step()\n", + "\n", + " # Run whole dataset to get statistics -- normally wouldn't do this\n", + " pred_train = model(x_train)\n", + " pred_val = model(x_val)\n", + " _, predicted_train_class = torch.max(pred_train.data, 1)\n", + " _, predicted_val_class = torch.max(pred_val.data, 1)\n", + " errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n", + " errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n", + " losses_train[epoch] = loss_function(pred_train, y_train).item()\n", + " losses_val[epoch]= loss_function(pred_val, y_val).item()\n", + " print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f}, val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n", + "\n", + " # tell scheduler to consider updating learning rate\n", + " scheduler.step()\n", + "\n", + "# Plot the results\n", + "fig, ax = plt.subplots()\n", + "ax.plot(errors_train,'r-',label='train')\n", + "ax.plot(errors_val,'b-',label='validation')\n", + "ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n", + "ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n", + "ax.set_title('Part III: Validation Result %3.2f'%(errors_val[-1]))\n", + "ax.legend()\n", + "ax.plot([0,n_epoch],[37.45, 37.45],'k:') # Original results. You should be better than this!\n", + "plt.savefig('Coursework_III_Results.png',format='png')\n", + "plt.show()" + ], + "metadata": { + "id": "NYw8I_3mmX5c" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Leave this all commented for now\n", + "# We'll see how well you did on the test data after the coursework is submitted\n", + "\n", + "\n", + "# # I haven't given you this yet, leave commented\n", + "# test_data_x = np.load('test_data_x.npy')\n", + "# test_data_y = np.load('test_data_y.npy')\n", + "# x_test = torch.tensor(test_data_x.transpose().astype('float32'))\n", + "# y_test = torch.tensor(test_data_y.astype('long'))\n", + "# pred_test = model(x_test)\n", + "# _, predicted_test_class = torch.max(pred_test.data, 1)\n", + "# errors_test = 100 - 100 * (predicted_test_class == y_test).float().sum() / len(y_test)\n", + "# print(\"Test error = %3.3f\"%(errors_test))" + ], + "metadata": { + "id": "O7nBz-R84QdJ" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file