From 9cbd56dff3ac05b8f1607678d42c471e460bd506 Mon Sep 17 00:00:00 2001 From: udlbook <110402648+udlbook@users.noreply.github.com> Date: Tue, 1 Aug 2023 17:46:26 -0400 Subject: [PATCH] Created using Colaboratory --- .../10_2_Convolution_for_MNIST_1D.ipynb | 253 ++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 Notebooks/Chap10/10_2_Convolution_for_MNIST_1D.ipynb diff --git a/Notebooks/Chap10/10_2_Convolution_for_MNIST_1D.ipynb b/Notebooks/Chap10/10_2_Convolution_for_MNIST_1D.ipynb new file mode 100644 index 0000000..a8ac83d --- /dev/null +++ b/Notebooks/Chap10/10_2_Convolution_for_MNIST_1D.ipynb @@ -0,0 +1,253 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNAm5SrknniRXMQy1I5fuve", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **Notebook 10.2: Convolution for MNIST-1D**\n", + "\n", + "This notebook investigates a 1D convolutional network for MNIST-1D as in figure 10.7 and 10.8a.\n", + "\n", + "Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and make predictions about what is going to happen or write code to complete the functions.\n", + "\n", + "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n", + "\n" + ], + "metadata": { + "id": "t9vk9Elugvmi" + } + }, + { + "cell_type": "code", + "source": [ + "# Run this if you're in a Colab to make a local copy of the MNIST 1D repository\n", + "!git clone https://github.com/greydanus/mnist1d" + ], + "metadata": { + "id": "D5yLObtZCi9J" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import os\n", + "import torch, torch.nn as nn\n", + "from torch.utils.data import TensorDataset, DataLoader\n", + "from torch.optim.lr_scheduler import StepLR\n", + "import matplotlib.pyplot as plt\n", + "import mnist1d\n", + "import random" + ], + "metadata": { + "id": "YrXWAH7sUWvU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "args = mnist1d.data.get_dataset_args()\n", + "data = mnist1d.data.get_dataset(args, path='./mnist1d_data.pkl', download=False, regenerate=False)\n", + "\n", + "# The training and test input and outputs are in\n", + "# data['x'], data['y'], data['x_test'], and data['y_test']\n", + "print(\"Examples in training set: {}\".format(len(data['y'])))\n", + "print(\"Examples in test set: {}\".format(len(data['y_test'])))\n", + "print(\"Length of each example: {}\".format(data['x'].shape[-1]))" + ], + "metadata": { + "id": "twI72ZCrCt5z" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Load in the data\n", + "train_data_x = data['x'].transpose()\n", + "train_data_y = data['y']\n", + "val_data_x = data['x_test'].transpose()\n", + "val_data_y = data['y_test']\n", + "# Print out sizes\n", + "print(\"Train data: %d examples (columns), each of which has %d dimensions (rows)\"%((train_data_x.shape[1],train_data_x.shape[0])))\n", + "print(\"Validation data: %d examples (columns), each of which has %d dimensions (rows)\"%((val_data_x.shape[1],val_data_x.shape[0])))" + ], + "metadata": { + "id": "8bKADvLHbiV5" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Define the network" + ], + "metadata": { + "id": "_sFvRDGrl4qe" + } + }, + { + "cell_type": "code", + "source": [ + "# There are 40 input dimensions and 10 output dimensions for this data\n", + "# The inputs correspond to the 40 offsets in the MNIST1D template.\n", + "D_i = 40\n", + "# The outputs correspond to the 10 digits\n", + "D_o = 10\n", + "\n", + "\n", + "# TODO Create a model with the folowing layers\n", + "# 1. Convolutional layer, (input=length 40 and 1 channel, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels )\n", + "# 2. ReLU\n", + "# 3. Convolutional layer, (input=length 19 and 15 channels, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels )\n", + "# 4. ReLU\n", + "# 5. Convolutional layer, (input=length 9 and 15 channels, kernel size 3x3, stride 2, padding=\"valid\", 15 output channels)\n", + "# 6. ReLU\n", + "# 7. Flatten (converts 4x15) to length 60\n", + "# 8. Linear layer (input size = 60, output size = 10)\n", + "# References:\n", + "# https://pytorch.org/docs/1.13/generated/torch.nn.Conv1d.html?highlight=conv1d#torch.nn.Conv1d\n", + "# https://pytorch.org/docs/stable/generated/torch.nn.Flatten.html\n", + "# https://pytorch.org/docs/1.13/generated/torch.nn.Linear.html?highlight=linear#torch.nn.Linear\n", + "\n", + "# Replace the following function:\n", + "model = nn.Sequential(\n", + "nn.Flatten(),\n", + "nn.Linear(40, 100),\n", + "nn.ReLU(),\n", + "nn.Linear(100, 100),\n", + "nn.ReLU(),\n", + "nn.Linear(100, 10))\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "FslroPJJffrh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# He initialization of weights\n", + "def weights_init(layer_in):\n", + " if isinstance(layer_in, nn.Linear):\n", + " nn.init.kaiming_uniform_(layer_in.weight)\n", + " layer_in.bias.data.fill_(0.0)" + ], + "metadata": { + "id": "YgLaex1pfhqz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# choose cross entropy loss function (equation 5.24 in the loss notes)\n", + "loss_function = nn.CrossEntropyLoss()\n", + "# construct SGD optimizer and initialize learning rate and momentum\n", + "optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9)\n", + "# object that decreases learning rate by half every 20 epochs\n", + "scheduler = StepLR(optimizer, step_size=20, gamma=0.5)\n", + "# create 100 dummy data points and store in data loader class\n", + "x_train = torch.tensor(train_data_x.transpose().astype('float32'))\n", + "y_train = torch.tensor(train_data_y.astype('long'))\n", + "x_val= torch.tensor(val_data_x.transpose().astype('float32'))\n", + "y_val = torch.tensor(val_data_y.astype('long'))\n", + "\n", + "# load the data into a class that creates the batches\n", + "data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n", + "\n", + "# Initialize model weights\n", + "model.apply(weights_init)\n", + "\n", + "# loop over the dataset n_epoch times\n", + "n_epoch = 100\n", + "# store the loss and the % correct at each epoch\n", + "losses_train = np.zeros((n_epoch))\n", + "errors_train = np.zeros((n_epoch))\n", + "losses_val = np.zeros((n_epoch))\n", + "errors_val = np.zeros((n_epoch))\n", + "\n", + "for epoch in range(n_epoch):\n", + " # loop over batches\n", + " for i, data in enumerate(data_loader):\n", + " # retrieve inputs and labels for this batch\n", + " x_batch, y_batch = data\n", + " # zero the parameter gradients\n", + " optimizer.zero_grad()\n", + " # forward pass -- calculate model output\n", + " pred = model(x_batch[:,None,:])\n", + " # compute the loss\n", + " loss = loss_function(pred, y_batch)\n", + " # backward pass\n", + " loss.backward()\n", + " # SGD update\n", + " optimizer.step()\n", + "\n", + " # Run whole dataset to get statistics -- normally wouldn't do this\n", + " pred_train = model(x_train[:,None,:])\n", + " pred_val = model(x_val[:,None,:])\n", + " _, predicted_train_class = torch.max(pred_train.data, 1)\n", + " _, predicted_val_class = torch.max(pred_val.data, 1)\n", + " errors_train[epoch] = 100 - 100 * (predicted_train_class == y_train).float().sum() / len(y_train)\n", + " errors_val[epoch]= 100 - 100 * (predicted_val_class == y_val).float().sum() / len(y_val)\n", + " losses_train[epoch] = loss_function(pred_train, y_train).item()\n", + " losses_val[epoch]= loss_function(pred_val, y_val).item()\n", + " print(f'Epoch {epoch:5d}, train loss {losses_train[epoch]:.6f}, train error {errors_train[epoch]:3.2f}, val loss {losses_val[epoch]:.6f}, percent error {errors_val[epoch]:3.2f}')\n", + "\n", + " # tell scheduler to consider updating learning rate\n", + " scheduler.step()\n", + "\n", + "# Plot the results\n", + "fig, ax = plt.subplots()\n", + "ax.plot(errors_train,'r-',label='train')\n", + "ax.plot(errors_val,'b-',label='validation')\n", + "ax.set_ylim(0,100); ax.set_xlim(0,n_epoch)\n", + "ax.set_xlabel('Epoch'); ax.set_ylabel('Error')\n", + "ax.set_title('Part I: Validation Result %3.2f'%(errors_val[-1]))\n", + "ax.legend()\n", + "plt.show()" + ], + "metadata": { + "id": "NYw8I_3mmX5c" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file