Created using Colaboratory

This commit is contained in:
udlbook
2022-11-21 11:29:09 +00:00
parent a2be175df9
commit c486cdcf24

View File

@@ -0,0 +1,290 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyMcvUAtcMw0yuDfUkbsEDLD",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Coursework_III.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Coursework III -- Regularization\n",
"\n",
"The goal of the coursework is to modify a simple bit of numpy code that trains a network and measures the performance on a validation set for the MNist 1D dataset. \n",
"\n",
"In this coursework, you need add **regularization** of some kind to improve the performance. Anything from chapter 8 of the book or anything else you can find is fine *except* early stopping. You must not change the model hyperparameters or the training algorithm.\n",
"\n",
"You don't have to improve the performance much. A few tenths of a percent is fine. It just has to be better to get full marks.\n",
"\n",
"You will need to upload three things to Moodle:\n",
"1. The image that this notebook saves (click the folder icon on the left on colab to download it)\n",
"2. The lines of code you changed\n",
"3. The whole notebook as a .ipynb file. You can do this on the File menu\n",
"\n",
"\n"
],
"metadata": {
"id": "t9vk9Elugvmi"
}
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"import os\n",
"import torch, torch.nn as nn\n",
"from torch.utils.data import TensorDataset, DataLoader\n",
"from torch.optim.lr_scheduler import StepLR\n",
"import matplotlib.pyplot as plt\n",
"import random"
],
"metadata": {
"id": "YrXWAH7sUWvU"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Run this once to copy the train and validation data to your CoLab environment \n",
"# or download from my github to your local machine if you are doing this locally\n",
"if not os.path.exists('./train_data_x.npy'):\n",
" !wget https://github.com/udlbook/udlbook/raw/main/practicals/train_data_x.npy\n",
" !wget https://github.com/udlbook/udlbook/raw/main/practicals/train_data_y.npy\n",
" !wget https://github.com/udlbook/udlbook/raw/main/practicals/val_data_x.npy\n",
" !wget https://github.com/udlbook/udlbook/raw/main/practicals/val_data_y.npy "
],
"metadata": {
"id": "wScBGXXFVadm"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Load in the data\n",
"train_data_x = np.load('train_data_x.npy')\n",
"train_data_y = np.load('train_data_y.npy')\n",
"val_data_x = np.load('val_data_x.npy')\n",
"val_data_y = np.load('val_data_y.npy')\n",
"# Print out sizes\n",
"print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n",
"print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))"
],
"metadata": {
"id": "8bKADvLHbiV5"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Define the network"
],
"metadata": {
"id": "_sFvRDGrl4qe"
}
},
{
"cell_type": "code",
"source": [
"# There are 40 input dimensions and 10 output dimensions for this data\n",
"# The inputs correspond to the 40 offsets in the MNIST1D template.\n",
"D_i = 40\n",
"# The outputs correspond to the 10 digits\n",
"D_o = 10\n",
"\n",
"# Number of hidden units in layers 1 and 2\n",
"D_1 = 100\n",
"D_2 = 100\n",
"\n",
"# create model with two hidden layers\n",
"model = nn.Sequential(\n",
"nn.Linear(D_i, D_1),\n",
"nn.ReLU(),\n",
"nn.Linear(D_1, D_2),\n",
"nn.ReLU(),\n",
"nn.Linear(D_2, D_o))"
],
"metadata": {
"id": "FslroPJJffrh"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# He initialization of weights\n",
"def weights_init(layer_in):\n",
" if isinstance(layer_in, nn.Linear):\n",
" nn.init.kaiming_uniform_(layer_in.weight)\n",
" layer_in.bias.data.fill_(0.0)"
],
"metadata": {
"id": "YgLaex1pfhqz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# You need all this stuff to ensure that PyTorch is deterministic\n",
"def set_seed(seed):\n",
" torch.manual_seed(seed)\n",
" torch.cuda.manual_seed_all(seed)\n",
" torch.backends.cudnn.deterministic = True\n",
" torch.backends.cudnn.benchmark = False\n",
" np.random.seed(seed)\n",
" random.seed(seed)\n",
" os.environ['PYTHONHASHSEED'] = str(seed)"
],
"metadata": {
"id": "zXRmxCQNnL_M"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Set seed so always get same result (do not change)\n",
"set_seed(1)\n",
"\n",
"# choose cross entropy loss function (equation 5.24 in the loss notes)\n",
"loss_function = nn.CrossEntropyLoss()\n",
"# construct SGD optimizer and initialize learning rate and momentum\n",
"optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n",
"# object that decreases learning rate by half every 10 epochs\n",
"scheduler = StepLR(optimizer, step_size=10, gamma=0.5)\n",
"# create 100 dummy data points and store in data loader class\n",
"x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n",
"y_train = torch.tensor(train_data_y.astype('long'))\n",
"x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n",
"y_val = torch.tensor(val_data_y.astype('long'))\n",
"\n",
"# load the data into a class that creates the batches\n",
"data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n",
"\n",
"# Initialize model weights\n",
"model.apply(weights_init)\n",
"\n",
"# loop over the dataset n_epoch times\n",
"n_epoch = 50\n",
"# store the loss and the % correct at each epoch\n",
"losses_train = np.zeros((n_epoch))\n",
"errors_train = np.zeros((n_epoch))\n",
"losses_val = np.zeros((n_epoch))\n",
"errors_val = np.zeros((n_epoch))\n",
"\n",
"for epoch in range(n_epoch):\n",
" # loop over batches\n",
" for i, data in enumerate(data_loader):\n",
" # retrieve inputs and labels for this batch\n",
" x_batch, y_batch = data\n",
" # zero the parameter gradients\n",
" optimizer.zero_grad()\n",
" # forward pass -- calculate model output\n",
" pred = model(x_batch)\n",
" # compute the lss\n",
" loss = loss_function(pred, y_batch)\n",
" # backward pass\n",
" loss.backward()\n",
" # SGD update\n",
" optimizer.step()\n",
"\n",
" # Run whole dataset to get statistics -- normally wouldn't do this\n",
" pred_train = model(x_train)\n",
" pred_val = model(x_val)\n",
" _, predicted_train_class = torch.max(pred_train.data, 1)\n",
" _, predicted_val_class = torch.max(pred_val.data, 1)\n",
" errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n",
" errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n",
" losses_train[epoch] = loss_function(pred_train, y_train).item()\n",
" losses_val[epoch]= loss_function(pred_val, y_val).item()\n",
" print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f}, val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n",
" \n",
" # tell scheduler to consider updating learning rate\n",
" scheduler.step()\n",
"\n",
"# Plot the results\n",
"fig, ax = plt.subplots()\n",
"ax.plot(errors_train,'r-',label='train')\n",
"ax.plot(errors_val,'b-',label='validation')\n",
"ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n",
"ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n",
"ax.set_title('Part III: Validation Result %3.2f'%(errors_val[-1]))\n",
"ax.legend()\n",
"ax.plot([0,n_epoch],[37.45, 37.45],'k:') # Original results. You should be better than this!\n",
"plt.savefig('Coursework_III_Results.png',format='png')\n",
"plt.show()"
],
"metadata": {
"id": "NYw8I_3mmX5c"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Leave this all commented for now\n",
"# We'll see how well you did on the test data after the coursework is submitted\n",
"\n",
"# if not os.path.exists('./test_data_x.npy'):\n",
"# !wget https://github.com/udlbook/udlbook/raw/main/practicals/test_data_x.npy\n",
"# !wget https://github.com/udlbook/udlbook/raw/main/practicals/test_data_y.npy\n",
"\n",
"\n",
"# # I haven't given you this yet, leave commented\n",
"# test_data_x = np.load('test_data_x.npy')\n",
"# test_data_y = np.load('test_data_y.npy')\n",
"# x_test = torch.tensor(test_data_x.transpose().astype('float32'))\n",
"# y_test = torch.tensor(test_data_y.astype('long'))\n",
"# pred_test = model(x_test)\n",
"# _, predicted_test_class = torch.max(pred_test.data, 1)\n",
"# errors_test = 100 - 100 * (predicted_test_class == y_test).float().sum() / len(y_test)\n",
"# print(\"Test error = %3.3f\"%(errors_test))"
],
"metadata": {
"id": "O7nBz-R84QdJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "zXccksoD1Eww"
},
"execution_count": null,
"outputs": []
}
]
}