From ecd01f29926e3101c16054d0519ebbdd49dc68a2 Mon Sep 17 00:00:00 2001 From: udlbook <110402648+udlbook@users.noreply.github.com> Date: Sun, 1 Oct 2023 18:19:38 +0100 Subject: [PATCH] Delete Notesbooks/Chap11 directory --- .../Chap11/11_2_Residual_Networks.ipynb | 277 ------------------ 1 file changed, 277 deletions(-) delete mode 100644 Notesbooks/Chap11/11_2_Residual_Networks.ipynb diff --git a/Notesbooks/Chap11/11_2_Residual_Networks.ipynb b/Notesbooks/Chap11/11_2_Residual_Networks.ipynb deleted file mode 100644 index caa7236..0000000 --- a/Notesbooks/Chap11/11_2_Residual_Networks.ipynb +++ /dev/null @@ -1,277 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyMJvfoCDFcSK7Z0/HkcGunb", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# **Notebook 11.2: Residual Networks**\n", - "\n", - "This notebook adapts the networks for MNIST1D to use residual connections.\n", - "\n", - "Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and make predictions about what is going to happen or write code to complete the functions.\n", - "\n", - "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n", - "\n" - ], - "metadata": { - "id": "t9vk9Elugvmi" - } - }, - { - "cell_type": "code", - "source": [ - "# Run this if you're in a Colab to make a local copy of the MNIST 1D repository\n", - "!git clone https://github.com/greydanus/mnist1d" - ], - "metadata": { - "id": "D5yLObtZCi9J" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "import numpy as np\n", - "import os\n", - "import torch, torch.nn as nn\n", - "from torch.utils.data import TensorDataset, DataLoader\n", - "from torch.optim.lr_scheduler import StepLR\n", - "import matplotlib.pyplot as plt\n", - "import mnist1d\n", - "import random" - ], - "metadata": { - "id": "YrXWAH7sUWvU" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "args = mnist1d.data.get_dataset_args()\n", - "data = mnist1d.data.get_dataset(args, path='./mnist1d_data.pkl', download=False, regenerate=False)\n", - "\n", - "# The training and test input and outputs are in\n", - "# data['x'], data['y'], data['x_test'], and data['y_test']\n", - "print(\"Examples in training set: {}\".format(len(data['y'])))\n", - "print(\"Examples in test set: {}\".format(len(data['y_test'])))\n", - "print(\"Length of each example: {}\".format(data['x'].shape[-1]))" - ], - "metadata": { - "id": "twI72ZCrCt5z" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Load in the data\n", - "train_data_x = data['x'].transpose()\n", - "train_data_y = data['y']\n", - "val_data_x = data['x_test'].transpose()\n", - "val_data_y = data['y_test']\n", - "# Print out sizes\n", - "print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n", - "print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))" - ], - "metadata": { - "id": "8bKADvLHbiV5" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Define the network" - ], - "metadata": { - "id": "_sFvRDGrl4qe" - } - }, - { - "cell_type": "code", - "source": [ - "# There are 40 input dimensions and 10 output dimensions for this data\n", - "# The inputs correspond to the 40 offsets in the MNIST1D template.\n", - "D_i = 40\n", - "# The outputs correspond to the 10 digits\n", - "D_o = 10\n", - "\n", - "\n", - "# We will adapt this model to have residual connections around the linear layers\n", - "# This is the same model we used in practical 8.1, but we can't use the sequential\n", - "# class for residual networks (which aren't strictly sequential). Hence, I've rewritten\n", - "# it as a model that inherits from a base class\n", - "\n", - "class ResidualNetwork(torch.nn.Module):\n", - " def __init__(self, input_size, output_size, hidden_size=100):\n", - " super(ResidualNetwork, self).__init__()\n", - " self.linear1 = nn.Linear(input_size, hidden_size)\n", - " self.linear2 = nn.Linear(hidden_size, hidden_size)\n", - " self.linear3 = nn.Linear(hidden_size, hidden_size)\n", - " self.linear4 = nn.Linear(hidden_size, output_size)\n", - " print(\"Initialized MLPBase model with {} parameters\".format(self.count_params()))\n", - "\n", - " def count_params(self):\n", - " return sum([p.view(-1).shape[0] for p in self.parameters()])\n", - "\n", - "# # TODO -- Add residual connections to this model\n", - "# # The order of operations should similar to figure 11.5b\n", - "# # linear1 first, ReLU+linear2 in first residual block, ReLU+linear3 in second residual block), linear4 at end\n", - "# # Replace this function\n", - " def forward(self, x):\n", - " h1 = self.linear1(x).relu()\n", - " h2 = self.linear2(h1).relu()\n", - " h3 = self.linear3(h2).relu()\n", - " return self.linear4(h3)\n" - ], - "metadata": { - "id": "FslroPJJffrh" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# He initialization of weights\n", - "def weights_init(layer_in):\n", - " if isinstance(layer_in, nn.Linear):\n", - " nn.init.kaiming_uniform_(layer_in.weight)\n", - " layer_in.bias.data.fill_(0.0)" - ], - "metadata": { - "id": "YgLaex1pfhqz" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#Define the model\n", - "model = ResidualNetwork(40, 10)\n", - "\n", - "# choose cross entropy loss function (equation 5.24 in the loss notes)\n", - "loss_function = nn.CrossEntropyLoss()\n", - "# construct SGD optimizer and initialize learning rate and momentum\n", - "optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n", - "# object that decreases learning rate by half every 20 epochs\n", - "scheduler = StepLR(optimizer, step_size=20, gamma=0.5)\n", - "# create 100 dummy data points and store in data loader class\n", - "x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n", - "y_train = torch.tensor(train_data_y.astype('long'))\n", - "x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n", - "y_val = torch.tensor(val_data_y.astype('long'))\n", - "\n", - "# load the data into a class that creates the batches\n", - "data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n", - "\n", - "# Initialize model weights\n", - "model.apply(weights_init)\n", - "\n", - "# loop over the dataset n_epoch times\n", - "n_epoch = 100\n", - "# store the loss and the % correct at each epoch\n", - "losses_train = np.zeros((n_epoch))\n", - "errors_train = np.zeros((n_epoch))\n", - "losses_val = np.zeros((n_epoch))\n", - "errors_val = np.zeros((n_epoch))\n", - "\n", - "for epoch in range(n_epoch):\n", - " # loop over batches\n", - " for i, data in enumerate(data_loader):\n", - " # retrieve inputs and labels for this batch\n", - " x_batch, y_batch = data\n", - " # zero the parameter gradients\n", - " optimizer.zero_grad()\n", - " # forward pass -- calculate model output\n", - " pred = model(x_batch)\n", - " # compute the loss\n", - " loss = loss_function(pred, y_batch)\n", - " # backward pass\n", - " loss.backward()\n", - " # SGD update\n", - " optimizer.step()\n", - "\n", - " # Run whole dataset to get statistics -- normally wouldn't do this\n", - " pred_train = model(x_train)\n", - " pred_val = model(x_val)\n", - " _, predicted_train_class = torch.max(pred_train.data, 1)\n", - " _, predicted_val_class = torch.max(pred_val.data, 1)\n", - " errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n", - " errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n", - " losses_train[epoch] = loss_function(pred_train, y_train).item()\n", - " losses_val[epoch]= loss_function(pred_val, y_val).item()\n", - " print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f}, val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n", - "\n", - " # tell scheduler to consider updating learning rate\n", - " scheduler.step()" - ], - "metadata": { - "id": "NYw8I_3mmX5c" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Plot the results\n", - "fig, ax = plt.subplots()\n", - "ax.plot(errors_train,'r-',label='train')\n", - "ax.plot(errors_val,'b-',label='test')\n", - "ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n", - "ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n", - "ax.set_title('TrainError %3.2f, Val Error %3.2f'%(errors_train[-1],errors_val[-1]))\n", - "ax.legend()\n", - "plt.show()" - ], - "metadata": { - "id": "CcP_VyEmE2sv" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "The primary motivation of residual networks is to allow training of much deeper networks. \n", - "\n", - "TODO: Try running this network with and without the residual connections. Does adding the residual connections change the performance?" - ], - "metadata": { - "id": "wMmqhmxuAx0M" - } - } - ] -} \ No newline at end of file