Files
udlbook/CM20315/CM20315_Convolution_I.ipynb

433 lines
16 KiB
Plaintext

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyOdO9HZNZ/DwsTSc7M8PBTl",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Convolution_I.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Convolution I \n",
"\n",
"This notebook investigates the convolution operation. It asks you to hand code a convolution so we can be sure that we are computing the same thing as in PyTorch. The subsequent notebooks use the convolutional layers in PyTorch directly."
],
"metadata": {
"id": "VB_crnDGASX-"
}
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"import torch\n",
"# Set to print in reasonable form\n",
"np.set_printoptions(precision=3, floatmode=\"fixed\")\n",
"torch.set_printoptions(precision=3)"
],
"metadata": {
"id": "YAoWDUb_DezG"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"This routine performs convolution in PyTorch"
],
"metadata": {
"id": "eAwYWXzAElHG"
}
},
{
"cell_type": "code",
"source": [
"# Perform convolution in PyTorch\n",
"def conv_pytorch(image, conv_weights, stride=1, pad =1):\n",
" # Convert image and kernel to tensors\n",
" image_tensor = torch.from_numpy(image) # (batchSize, channelsIn, imageHeightIn, =imageWidthIn)\n",
" conv_weights_tensor = torch.from_numpy(conv_weights) # (channelsOut, channelsIn, kernelHeight, kernelWidth) \n",
" # Do the convolution\n",
" output_tensor = torch.nn.functional.conv2d(image_tensor, conv_weights_tensor, stride=stride, padding=pad) \n",
" # Convert back from PyTorch and return\n",
" return(output_tensor.numpy()) # (batchSize channelsOut imageHeightOut imageHeightIn)"
],
"metadata": {
"id": "xsmUIN-3BlWr"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"First we'll start with the simplest 2D convolution. Just one channel in and one channel out. A single image in the batch."
],
"metadata": {
"id": "A3Sm8bUWtDNO"
}
},
{
"cell_type": "code",
"source": [
"# Perform convolution in numpy\n",
"def conv_numpy_1(image, weights, pad=1):\n",
" \n",
" # Perform zero padding \n",
" if pad != 0:\n",
" image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
" \n",
" # Get sizes of image array and kernel weights\n",
" batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
" channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
"\n",
" # Get size of output arrays\n",
" imageHeightOut = np.floor(1 + imageHeightIn - kernelHeight).astype(int)\n",
" imageWidthOut = np.floor(1 + imageWidthIn - kernelWidth).astype(int)\n",
"\n",
" # Create output\n",
" out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
" \n",
" for c_y in range(imageHeightOut):\n",
" for c_x in range(imageWidthOut):\n",
" for c_kernel_y in range(kernelHeight):\n",
" for c_kernel_x in range(kernelWidth):\n",
" # TODO -- Retrieve the image pixel and the weight from the convolution\n",
" # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n",
" # Replace the two lines below\n",
" this_pixel_value = 1.0\n",
" this_weight = 1.0\n",
" \n",
" # Multiply these together and add to the output at this position\n",
" out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
" \n",
" return out"
],
"metadata": {
"id": "EF8FWONVLo1Q"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Set random seed so we always get same answer\n",
"np.random.seed(1) \n",
"n_batch = 1\n",
"image_height = 4\n",
"image_width = 6\n",
"channels_in = 1\n",
"kernel_size = 3\n",
"channels_out = 1\n",
"\n",
"# Create random input image\n",
"input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
"# Create random convolution kernel weights\n",
"conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
"\n",
"# Perform convolution using PyTorch\n",
"conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
"print(\"PyTorch Results\")\n",
"print(conv_results_pytorch)\n",
"\n",
"# Perform convolution in numpy\n",
"print(\"Your results\")\n",
"conv_results_numpy = conv_numpy_1(input_image, conv_weights)\n",
"print(conv_results_numpy)"
],
"metadata": {
"id": "iw9KqXZTHN8v"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Let's now add in the possibility of using different strides"
],
"metadata": {
"id": "IYj_lxeGzaHX"
}
},
{
"cell_type": "code",
"source": [
"# Perform convolution in numpy\n",
"def conv_numpy_2(image, weights, stride=1, pad=1):\n",
" \n",
" # Perform zero padding \n",
" if pad != 0:\n",
" image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
" \n",
" # Get sizes of image array and kernel weights\n",
" batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
" channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
"\n",
" # Get size of output arrays\n",
" imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
" imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
" \n",
" # Create output\n",
" out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
" \n",
" for c_y in range(imageHeightOut):\n",
" for c_x in range(imageWidthOut):\n",
" for c_kernel_y in range(kernelHeight):\n",
" for c_kernel_x in range(kernelWidth):\n",
" # TODO -- Retrieve the image pixel and the weight from the convolution\n",
" # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n",
" # Replace the two lines below\n",
" this_pixel_value = 1.0\n",
" this_weight = 1.0\n",
"\n",
" # Multiply these together and add to the output at this position\n",
" out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
" \n",
" return out"
],
"metadata": {
"id": "GiujmLhqHN1F"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Set random seed so we always get same answer\n",
"np.random.seed(1) \n",
"n_batch = 1\n",
"image_height = 12\n",
"image_width = 10\n",
"channels_in = 1\n",
"kernel_size = 3\n",
"channels_out = 1\n",
"stride = 2\n",
"\n",
"# Create random input image\n",
"input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
"# Create random convolution kernel weights\n",
"conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
"\n",
"# Perform convolution using PyTorch\n",
"conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride, pad=1)\n",
"print(\"PyTorch Results\")\n",
"print(conv_results_pytorch)\n",
"\n",
"# Perform convolution in numpy\n",
"print(\"Your results\")\n",
"conv_results_numpy = conv_numpy_2(input_image, conv_weights, stride, pad=1)\n",
"print(conv_results_numpy)"
],
"metadata": {
"id": "FeJy6Bvozgxq"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Now we'll introduce multiple input and output channels"
],
"metadata": {
"id": "3flq1Wan2gX-"
}
},
{
"cell_type": "code",
"source": [
"# Perform convolution in numpy\n",
"def conv_numpy_3(image, weights, stride=1, pad=1):\n",
" \n",
" # Perform zero padding \n",
" if pad != 0:\n",
" image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
" \n",
" # Get sizes of image array and kernel weights\n",
" batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
" channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
"\n",
" # Get size of output arrays\n",
" imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
" imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
" \n",
" # Create output\n",
" out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
" \n",
" for c_y in range(imageHeightOut):\n",
" for c_x in range(imageWidthOut):\n",
" for c_channel_out in range(channelsOut):\n",
" for c_channel_in in range(channelsIn):\n",
" for c_kernel_y in range(kernelHeight):\n",
" for c_kernel_x in range(kernelWidth):\n",
" # TODO -- Retrieve the image pixel and the weight from the convolution\n",
" # Only one image in batch so this index should be zero\n",
" # Replace the two lines below\n",
" this_pixel_value = 1.0\n",
" this_weight = 1.0\n",
"\n",
" # Multiply these together and add to the output at this position\n",
" out[0, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
" return out"
],
"metadata": {
"id": "AvdRWGiU2ppX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Set random seed so we always get same answer\n",
"np.random.seed(1) \n",
"n_batch = 1\n",
"image_height = 4\n",
"image_width = 6\n",
"channels_in = 5\n",
"kernel_size = 3\n",
"channels_out = 2\n",
"\n",
"# Create random input image\n",
"input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
"# Create random convolution kernel weights\n",
"conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
"\n",
"# Perform convolution using PyTorch\n",
"conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
"print(\"PyTorch Results\")\n",
"print(conv_results_pytorch)\n",
"\n",
"# Perform convolution in numpy\n",
"print(\"Your results\")\n",
"conv_results_numpy = conv_numpy_3(input_image, conv_weights, stride=1, pad=1)\n",
"print(conv_results_numpy)"
],
"metadata": {
"id": "mdSmjfvY4li2"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Now we'll do the full convolution with multiple images (batch size > 1), and multiple input channels, multiple output channels."
],
"metadata": {
"id": "Q2MUFebdsJbH"
}
},
{
"cell_type": "code",
"source": [
"# Perform convolution in numpy\n",
"def conv_numpy_4(image, weights, stride=1, pad=1):\n",
" \n",
" # Perform zero padding \n",
" if pad != 0:\n",
" image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
" \n",
" # Get sizes of image array and kernel weights\n",
" batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
" channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
"\n",
" # Get size of output arrays\n",
" imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
" imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
" \n",
" # Create output\n",
" out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
" \n",
" for c_batch in range(batchSize):\n",
" for c_y in range(imageHeightOut):\n",
" for c_x in range(imageWidthOut):\n",
" for c_channel_out in range(channelsOut):\n",
" for c_channel_in in range(channelsIn):\n",
" for c_kernel_y in range(kernelHeight):\n",
" for c_kernel_x in range(kernelWidth):\n",
" # TODO -- Retrieve the image pixel and the weight from the convolution\n",
" # Replace the two lines below\n",
" this_pixel_value = 1.0\n",
" this_weight = 1.0\n",
" \n",
" # Multiply these together and add to the output at this position\n",
" out[c_batch, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
" return out"
],
"metadata": {
"id": "5WePF-Y-sC1y"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1w2GEBtqAM2P"
},
"outputs": [],
"source": [
"# Set random seed so we always get same answer\n",
"np.random.seed(1) \n",
"n_batch = 2\n",
"image_height = 4\n",
"image_width = 6\n",
"channels_in = 5\n",
"kernel_size = 3\n",
"channels_out = 2\n",
"\n",
"# Create random input image\n",
"input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
"# Create random convolution kernel weights\n",
"conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
"\n",
"# Perform convolution using PyTorch\n",
"conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
"print(\"PyTorch Results\")\n",
"print(conv_results_pytorch)\n",
"\n",
"# Perform convolution in numpy\n",
"print(\"Your results\")\n",
"conv_results_numpy = conv_numpy_4(input_image, conv_weights, stride=1, pad=1)\n",
"print(conv_results_numpy)"
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "Lody75JB5By7"
},
"execution_count": null,
"outputs": []
}
]
}