From ffb1778145937535d51dd06a2e7f948ebd5a5103 Mon Sep 17 00:00:00 2001
From: udlbook <110402648+udlbook@users.noreply.github.com>
Date: Tue, 6 Dec 2022 10:01:10 +0000
Subject: [PATCH] Created using Colaboratory

---
 CM20315_Convolution_II.ipynb | 253 +++++++++++++++++++++++++++++++++++
 1 file changed, 253 insertions(+)
 create mode 100644 CM20315_Convolution_II.ipynb
diff --git a/CM20315_Convolution_II.ipynb b/CM20315_Convolution_II.ipynb
new file mode 100644
index 0000000..6054d39
--- /dev/null
+++ b/CM20315_Convolution_II.ipynb
@@ -0,0 +1,253 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "authorship_tag": "ABX9TyN4fpyg0d75XccLLsNahur1",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Convolution_II.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Convolution II -- MNIST1D\n",
+        "\n",
+        "This notebook investigates what happens when we use convolutional layers instead of fully-connected layers for the MNIST-1D from the coursework.\n",
+        "\n",
+        "We'll build the network from figure 10.7 in the notes.\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "t9vk9Elugvmi"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "import os\n",
+        "import torch, torch.nn as nn\n",
+        "from torch.utils.data import TensorDataset, DataLoader\n",
+        "from torch.optim.lr_scheduler import StepLR\n",
+        "import matplotlib.pyplot as plt\n",
+        "import random"
+      ],
+      "metadata": {
+        "id": "YrXWAH7sUWvU"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Run this once to copy the train and validation data to your CoLab environment \n",
+        "# or download from my github to your local machine if you are doing this locally\n",
+        "if not os.path.exists('./train_data_x.npy'):\n",
+        "  !wget https://github.com/udlbook/udlbook/raw/main/practicals/train_data_x.npy\n",
+        "  !wget https://github.com/udlbook/udlbook/raw/main/practicals/train_data_y.npy\n",
+        "  !wget https://github.com/udlbook/udlbook/raw/main/practicals/val_data_x.npy\n",
+        "  !wget https://github.com/udlbook/udlbook/raw/main/practicals/val_data_y.npy  "
+      ],
+      "metadata": {
+        "id": "wScBGXXFVadm"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Load in the data\n",
+        "train_data_x = np.load('train_data_x.npy')\n",
+        "train_data_y = np.load('train_data_y.npy')\n",
+        "val_data_x = np.load('val_data_x.npy')\n",
+        "val_data_y = np.load('val_data_y.npy')\n",
+        "# Print out sizes\n",
+        "print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n",
+        "print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))"
+      ],
+      "metadata": {
+        "id": "8bKADvLHbiV5"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Define the network"
+      ],
+      "metadata": {
+        "id": "_sFvRDGrl4qe"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "# TODO Create a model with the folowing layers\n",
+        "# 1. Convolutional layer, (input=length 40 and 1 channel, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels ) \n",
+        "# 2. ReLU\n",
+        "# 3. Convolutional layer, (input=length 19 and 15 channels, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels )\n",
+        "# 4. ReLU\n",
+        "# 5. Convolutional layer, (input=length 9 and 15 channels, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels)\n",
+        "# 6. ReLU\n",
+        "# 7. Flatten (converts 4x15) to length 60\n",
+        "# 8. Linear layer (input size = 60, output size = 10)\n",
+        "# References:\n",
+        "# https://pytorch.org/docs/1.13/generated/torch.nn.Conv1d.html?highlight=conv1d#torch.nn.Conv1d\n",
+        "# https://pytorch.org/docs/stable/generated/torch.nn.Flatten.html\n",
+        "# https://pytorch.org/docs/1.13/generated/torch.nn.Linear.html?highlight=linear#torch.nn.Linear\n",
+        "\n",
+        "# Replace the following function which just runs a standard fully connected network\n",
+        "# The flatten at the beginning is becuase we are passing in the data in a slightly different format.\n",
+        "model = nn.Sequential(\n",
+        "nn.Flatten(),\n",
+        "nn.Linear(40, 100),\n",
+        "nn.ReLU(),\n",
+        "nn.Linear(100, 100),\n",
+        "nn.ReLU(),\n",
+        "nn.Linear(100, 10))"
+      ],
+      "metadata": {
+        "id": "FslroPJJffrh"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# He initialization of weights\n",
+        "def weights_init(layer_in):\n",
+        "  if isinstance(layer_in, nn.Linear):\n",
+        "    nn.init.kaiming_uniform_(layer_in.weight)\n",
+        "    layer_in.bias.data.fill_(0.0)"
+      ],
+      "metadata": {
+        "id": "YgLaex1pfhqz"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# You need all this stuff to ensure that PyTorch is deterministic\n",
+        "def set_seed(seed):\n",
+        "    torch.manual_seed(seed)\n",
+        "    torch.cuda.manual_seed_all(seed)\n",
+        "    torch.backends.cudnn.deterministic = True\n",
+        "    torch.backends.cudnn.benchmark = False\n",
+        "    np.random.seed(seed)\n",
+        "    random.seed(seed)\n",
+        "    os.environ['PYTHONHASHSEED'] = str(seed)"
+      ],
+      "metadata": {
+        "id": "zXRmxCQNnL_M"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set seed so always get same result (do not change)\n",
+        "set_seed(1)\n",
+        "\n",
+        "# choose cross entropy loss function (equation 5.24 in the loss notes)\n",
+        "loss_function = nn.CrossEntropyLoss()\n",
+        "# construct SGD optimizer and initialize learning rate and momentum\n",
+        "optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n",
+        "# object that decreases learning rate by half every 10 epochs\n",
+        "scheduler = StepLR(optimizer, step_size=10, gamma=0.5)\n",
+        "# create 100 dummy data points and store in data loader class\n",
+        "x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n",
+        "y_train = torch.tensor(train_data_y.astype('long'))\n",
+        "x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n",
+        "y_val = torch.tensor(val_data_y.astype('long'))\n",
+        "\n",
+        "# load the data into a class that creates the batches\n",
+        "data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n",
+        "\n",
+        "# Initialize model weights\n",
+        "model.apply(weights_init)\n",
+        "\n",
+        "# loop over the dataset n_epoch times\n",
+        "n_epoch = 50\n",
+        "# store the loss and the % correct at each epoch\n",
+        "losses_train = np.zeros((n_epoch))\n",
+        "errors_train = np.zeros((n_epoch))\n",
+        "losses_val = np.zeros((n_epoch))\n",
+        "errors_val = np.zeros((n_epoch))\n",
+        "\n",
+        "for epoch in range(n_epoch):\n",
+        "  # loop over batches\n",
+        "  for i, data in enumerate(data_loader):\n",
+        "    # retrieve inputs and labels for this batch\n",
+        "    x_batch, y_batch = data\n",
+        "    # zero the parameter gradients\n",
+        "    optimizer.zero_grad()\n",
+        "    # forward pass -- calculate model output\n",
+        "    pred = model(x_batch[:,None,:])\n",
+        "    # compute the loss\n",
+        "    loss = loss_function(pred, y_batch)\n",
+        "    # backward pass\n",
+        "    loss.backward()\n",
+        "    # SGD update\n",
+        "    optimizer.step()\n",
+        "\n",
+        "  # Run whole dataset to get statistics -- normally wouldn't do this\n",
+        "  pred_train = model(x_train[:,None,:])\n",
+        "  pred_val = model(x_val[:,None,:])\n",
+        "  _, predicted_train_class = torch.max(pred_train.data, 1)\n",
+        "  _, predicted_val_class = torch.max(pred_val.data, 1)\n",
+        "  errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n",
+        "  errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n",
+        "  losses_train[epoch] = loss_function(pred_train, y_train).item()\n",
+        "  losses_val[epoch]= loss_function(pred_val, y_val).item()\n",
+        "  print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f},  val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n",
+        "  \n",
+        "  # tell scheduler to consider updating learning rate\n",
+        "  scheduler.step()\n",
+        "\n",
+        "# Plot the results\n",
+        "fig, ax = plt.subplots()\n",
+        "ax.plot(errors_train,'r-',label='train')\n",
+        "ax.plot(errors_val,'b-',label='validation')\n",
+        "ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n",
+        "ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n",
+        "ax.set_title('Part I: Validation Result %3.2f'%(errors_val[-1]))\n",
+        "ax.legend()\n",
+        "ax.plot([0,n_epoch],[37.45, 37.45],'k:') # Original results. You should be better than this!\n",
+        "plt.savefig('Coursework_I_Results.png',format='png')\n",
+        "plt.show()"
+      ],
+      "metadata": {
+        "id": "NYw8I_3mmX5c"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file