Created using Colaboratory

2023-11-20 14:46:26 +00:00
parent 73cf5ff7da
commit 2056594cdd
1 changed files with 280 additions and 0 deletions
@@ -0,0 +1,280 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyPNASgWoh4kBvxFP0xkN/I4",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_2023/CM20315_Coursework_I.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Coursework I -- Model hyperparameters\n",
        "\n",
        "The goal of the coursework is to modify a simple bit of numpy code that trains a network and measures the performance on a validation set for the MNIST 1D dataset.\n",
        "\n",
        "In this coursework, you need to modify the **model hyperparameters** (only) to improve the performance over the current attempt.  This could mean the number of layers, the number of hidden units per layer, or the type of activation function, or any combination of the three.\n",
        "\n",
        "The only constraint is that you MUST use a fully connected network (no convolutional networks for now if you have read ahead in the book).\n",
        "\n",
        "You must improve the performance by at least 2% to get full marks.\n",
        "\n",
        "You will need to upload three things to Moodle:\n",
        "1.   The image that this notebook saves (click the folder icon on the left on colab to download it)\n",
        "2.   The lines of code you changed\n",
        "3.   The whole notebook as a .ipynb file.  You can do this on the File menu\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "t9vk9Elugvmi"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import os\n",
        "import torch, torch.nn as nn\n",
        "from torch.utils.data import TensorDataset, DataLoader\n",
        "from torch.optim.lr_scheduler import StepLR\n",
        "import matplotlib.pyplot as plt\n",
        "import random\n",
        "import gdown"
      ],
      "metadata": {
        "id": "YrXWAH7sUWvU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# Run this once to copy the train and validation data to your CoLab environment\n",
        "# or download from my github to your local machine if you are doing this locally\n",
        "if not os.path.exists('./Data.zip'):\n",
        "  !gdown 1HtnCrncY6dFCYqzgPf1HtPVAerTpwFRm\n",
        "  !unzip Data.zip"
      ],
      "metadata": {
        "id": "wScBGXXFVadm"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Load in the data\n",
        "train_data_x = np.load('train_data_x.npy')\n",
        "val_data_y = np.load('val_data_y.npy')\n",
        "train_data_y = np.load('train_data_y.npy')\n",
        "val_data_x = np.load('val_data_x.npy')\n",
        "# Print out sizes\n",
        "print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n",
        "print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))"
      ],
      "metadata": {
        "id": "8bKADvLHbiV5"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Define the network"
      ],
      "metadata": {
        "id": "_sFvRDGrl4qe"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# YOU SHOULD ONLY CHANGE THIS CELL!\n",
        "\n",
        "# There are 40 input dimensions and 10 output dimensions for this data\n",
        "# The inputs correspond to the 40 offsets in the MNIST1D template.\n",
        "D_i = 40\n",
        "# The outputs correspond to the 10 digits\n",
        "D_o = 10\n",
        "\n",
        "# Number of hidden units in layers 1 and 2\n",
        "D_1 = 100\n",
        "D_2 = 100\n",
        "\n",
        "# create model with two hidden layers\n",
        "model = nn.Sequential(\n",
        "nn.Linear(D_i, D_1),\n",
        "nn.ReLU(),\n",
        "nn.Linear(D_1, D_2),\n",
        "nn.ReLU(),\n",
        "nn.Linear(D_2, D_o))"
      ],
      "metadata": {
        "id": "FslroPJJffrh"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# He initialization of weights\n",
        "def weights_init(layer_in):\n",
        "  if isinstance(layer_in, nn.Linear):\n",
        "    nn.init.kaiming_uniform_(layer_in.weight)\n",
        "    layer_in.bias.data.fill_(0.0)"
      ],
      "metadata": {
        "id": "YgLaex1pfhqz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# You need all this stuff to ensure that PyTorch is deterministic\n",
        "def set_seed(seed):\n",
        "    torch.manual_seed(seed)\n",
        "    torch.cuda.manual_seed_all(seed)\n",
        "    torch.backends.cudnn.deterministic = True\n",
        "    torch.backends.cudnn.benchmark = False\n",
        "    np.random.seed(seed)\n",
        "    random.seed(seed)\n",
        "    os.environ['PYTHONHASHSEED'] = str(seed)"
      ],
      "metadata": {
        "id": "zXRmxCQNnL_M"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Set seed so always get same result (do not change)\n",
        "set_seed(1)\n",
        "\n",
        "# choose cross entropy loss function (equation 5.24 in the loss notes)\n",
        "loss_function = nn.CrossEntropyLoss()\n",
        "# construct SGD optimizer and initialize learning rate and momentum\n",
        "optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n",
        "# object that decreases learning rate by half every 10 epochs\n",
        "scheduler = StepLR(optimizer, step_size=10, gamma=0.5)\n",
        "# create 100 dummy data points and store in data loader class\n",
        "x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n",
        "y_train = torch.tensor(train_data_y.astype('long'))\n",
        "x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n",
        "y_val = torch.tensor(val_data_y.astype('long'))\n",
        "\n",
        "# load the data into a class that creates the batches\n",
        "data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n",
        "\n",
        "# Initialize model weights\n",
        "model.apply(weights_init)\n",
        "\n",
        "# loop over the dataset n_epoch times\n",
        "n_epoch = 50\n",
        "# store the loss and the % correct at each epoch\n",
        "losses_train = np.zeros((n_epoch))\n",
        "errors_train = np.zeros((n_epoch))\n",
        "losses_val = np.zeros((n_epoch))\n",
        "errors_val = np.zeros((n_epoch))\n",
        "\n",
        "for epoch in range(n_epoch):\n",
        "  # loop over batches\n",
        "  for i, data in enumerate(data_loader):\n",
        "    # retrieve inputs and labels for this batch\n",
        "    x_batch, y_batch = data\n",
        "    # zero the parameter gradients\n",
        "    optimizer.zero_grad()\n",
        "    # forward pass -- calculate model output\n",
        "    pred = model(x_batch)\n",
        "    # compute the lss\n",
        "    loss = loss_function(pred, y_batch)\n",
        "    # backward pass\n",
        "    loss.backward()\n",
        "    # SGD update\n",
        "    optimizer.step()\n",
        "\n",
        "  # Run whole dataset to get statistics -- normally wouldn't do this\n",
        "  pred_train = model(x_train)\n",
        "  pred_val = model(x_val)\n",
        "  _, predicted_train_class = torch.max(pred_train.data, 1)\n",
        "  _, predicted_val_class = torch.max(pred_val.data, 1)\n",
        "  errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n",
        "  errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n",
        "  losses_train[epoch] = loss_function(pred_train, y_train).item()\n",
        "  losses_val[epoch]= loss_function(pred_val, y_val).item()\n",
        "  print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f},  val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n",
        "\n",
        "  # tell scheduler to consider updating learning rate\n",
        "  scheduler.step()\n",
        "\n",
        "# Plot the results\n",
        "fig, ax = plt.subplots()\n",
        "ax.plot(errors_train,'r-',label='train')\n",
        "ax.plot(errors_val,'b-',label='validation')\n",
        "ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n",
        "ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n",
        "ax.set_title('Part I: Validation Result %3.2f'%(errors_val[-1]))\n",
        "ax.legend()\n",
        "ax.plot([0,n_epoch],[37.45, 37.45],'k:') # Original results. You should be better than this!\n",
        "plt.savefig('Coursework_I_Results.png',format='png')\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "NYw8I_3mmX5c"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Leave this all commented for now\n",
        "# We'll see how well you did on the test data after the coursework is submitted\n",
        "\n",
        "# # I haven't given you this yet, leave commented\n",
        "# test_data_x = np.load('test_data_x.npy')\n",
        "# test_data_y = np.load('test_data_y.npy')\n",
        "# x_test = torch.tensor(test_data_x.transpose().astype('float32'))\n",
        "# y_test = torch.tensor(test_data_y.astype('long'))\n",
        "# pred_test = model(x_test)\n",
        "# _, predicted_test_class = torch.max(pred_test.data, 1)\n",
        "# errors_test = 100 - 100 * (predicted_test_class == y_test).float().sum() / len(y_test)\n",
        "# print(\"Test error = %3.3f\"%(errors_test))"
      ],
      "metadata": {
        "id": "O7nBz-R84QdJ"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
 }