432 lines
16 KiB
Plaintext
432 lines
16 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": [],
|
|
"authorship_tag": "ABX9TyOdO9HZNZ/DwsTSc7M8PBTl",
|
|
"include_colab_link": true
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "view-in-github",
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Convolution_I.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"# Convolution I \n",
|
|
"\n",
|
|
"This notebook investigates the convolution operation. It asks you to hand code a convolution so we can be sure that we are computing the same thing as in PyTorch. The subsequent notebooks use the convolutional layers in PyTorch directly."
|
|
],
|
|
"metadata": {
|
|
"id": "VB_crnDGASX-"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import torch\n",
|
|
"# Set to print in reasonable form\n",
|
|
"np.set_printoptions(precision=3, floatmode=\"fixed\")\n",
|
|
"torch.set_printoptions(precision=3)"
|
|
],
|
|
"metadata": {
|
|
"id": "YAoWDUb_DezG"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"This routine performs convolution in PyTorch"
|
|
],
|
|
"metadata": {
|
|
"id": "eAwYWXzAElHG"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Perform convolution in PyTorch\n",
|
|
"def conv_pytorch(image, conv_weights, stride=1, pad =1):\n",
|
|
" # Convert image and kernel to tensors\n",
|
|
" image_tensor = torch.from_numpy(image) # (batchSize, channelsIn, imageHeightIn, =imageWidthIn)\n",
|
|
" conv_weights_tensor = torch.from_numpy(conv_weights) # (channelsOut, channelsIn, kernelHeight, kernelWidth) \n",
|
|
" # Do the convolution\n",
|
|
" output_tensor = torch.nn.functional.conv2d(image_tensor, conv_weights_tensor, stride=stride, padding=pad) \n",
|
|
" # Convert back from PyTorch and return\n",
|
|
" return(output_tensor.numpy()) # (batchSize channelsOut imageHeightOut imageHeightIn)"
|
|
],
|
|
"metadata": {
|
|
"id": "xsmUIN-3BlWr"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"First we'll start with the simplest 2D convolution. Just one channel in and one channel out. A single image in the batch."
|
|
],
|
|
"metadata": {
|
|
"id": "A3Sm8bUWtDNO"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Perform convolution in numpy\n",
|
|
"def conv_numpy_1(image, weights, pad=1):\n",
|
|
" \n",
|
|
" # Perform zero padding \n",
|
|
" if pad != 0:\n",
|
|
" image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
|
|
" \n",
|
|
" # Get sizes of image array and kernel weights\n",
|
|
" batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
|
|
" channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
|
|
"\n",
|
|
" # Get size of output arrays\n",
|
|
" imageHeightOut = np.floor(1 + imageHeightIn - kernelHeight).astype(int)\n",
|
|
" imageWidthOut = np.floor(1 + imageWidthIn - kernelWidth).astype(int)\n",
|
|
"\n",
|
|
" # Create output\n",
|
|
" out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
|
|
" \n",
|
|
" for c_y in range(imageHeightOut):\n",
|
|
" for c_x in range(imageWidthOut):\n",
|
|
" for c_kernel_y in range(kernelHeight):\n",
|
|
" for c_kernel_x in range(kernelWidth):\n",
|
|
" # TODO -- Retrieve the image pixel and the weight from the convolution\n",
|
|
" # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n",
|
|
" # Replace the two lines below\n",
|
|
" this_pixel_value = 1.0\n",
|
|
" this_weight = 1.0\n",
|
|
" \n",
|
|
" # Multiply these together and add to the output at this position\n",
|
|
" out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
|
|
" \n",
|
|
" return out"
|
|
],
|
|
"metadata": {
|
|
"id": "EF8FWONVLo1Q"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Set random seed so we always get same answer\n",
|
|
"np.random.seed(1) \n",
|
|
"n_batch = 1\n",
|
|
"image_height = 4\n",
|
|
"image_width = 6\n",
|
|
"channels_in = 1\n",
|
|
"kernel_size = 3\n",
|
|
"channels_out = 1\n",
|
|
"\n",
|
|
"# Create random input image\n",
|
|
"input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
|
|
"# Create random convolution kernel weights\n",
|
|
"conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
|
|
"\n",
|
|
"# Perform convolution using PyTorch\n",
|
|
"conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
|
|
"print(\"PyTorch Results\")\n",
|
|
"print(conv_results_pytorch)\n",
|
|
"\n",
|
|
"# Perform convolution in numpy\n",
|
|
"print(\"Your results\")\n",
|
|
"conv_results_numpy = conv_numpy_1(input_image, conv_weights)\n",
|
|
"print(conv_results_numpy)"
|
|
],
|
|
"metadata": {
|
|
"id": "iw9KqXZTHN8v"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"Let's now add in the possibility of using different strides"
|
|
],
|
|
"metadata": {
|
|
"id": "IYj_lxeGzaHX"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Perform convolution in numpy\n",
|
|
"def conv_numpy_2(image, weights, stride=1, pad=1):\n",
|
|
" \n",
|
|
" # Perform zero padding \n",
|
|
" if pad != 0:\n",
|
|
" image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
|
|
" \n",
|
|
" # Get sizes of image array and kernel weights\n",
|
|
" batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
|
|
" channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
|
|
"\n",
|
|
" # Get size of output arrays\n",
|
|
" imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
|
|
" imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
|
|
" \n",
|
|
" # Create output\n",
|
|
" out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
|
|
" \n",
|
|
" for c_y in range(imageHeightOut):\n",
|
|
" for c_x in range(imageWidthOut):\n",
|
|
" for c_kernel_y in range(kernelHeight):\n",
|
|
" for c_kernel_x in range(kernelWidth):\n",
|
|
" # TODO -- Retrieve the image pixel and the weight from the convolution\n",
|
|
" # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n",
|
|
" # Replace the two lines below\n",
|
|
" this_pixel_value = 1.0\n",
|
|
" this_weight = 1.0\n",
|
|
"\n",
|
|
" # Multiply these together and add to the output at this position\n",
|
|
" out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
|
|
" \n",
|
|
" return out"
|
|
],
|
|
"metadata": {
|
|
"id": "GiujmLhqHN1F"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Set random seed so we always get same answer\n",
|
|
"np.random.seed(1) \n",
|
|
"n_batch = 1\n",
|
|
"image_height = 12\n",
|
|
"image_width = 10\n",
|
|
"channels_in = 1\n",
|
|
"kernel_size = 3\n",
|
|
"channels_out = 1\n",
|
|
"stride = 2\n",
|
|
"\n",
|
|
"# Create random input image\n",
|
|
"input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
|
|
"# Create random convolution kernel weights\n",
|
|
"conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
|
|
"\n",
|
|
"# Perform convolution using PyTorch\n",
|
|
"conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride, pad=1)\n",
|
|
"print(\"PyTorch Results\")\n",
|
|
"print(conv_results_pytorch)\n",
|
|
"\n",
|
|
"# Perform convolution in numpy\n",
|
|
"print(\"Your results\")\n",
|
|
"conv_results_numpy = conv_numpy_2(input_image, conv_weights, stride, pad=1)\n",
|
|
"print(conv_results_numpy)"
|
|
],
|
|
"metadata": {
|
|
"id": "FeJy6Bvozgxq"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"Now we'll introduce multiple input and output channels"
|
|
],
|
|
"metadata": {
|
|
"id": "3flq1Wan2gX-"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Perform convolution in numpy\n",
|
|
"def conv_numpy_3(image, weights, stride=1, pad=1):\n",
|
|
" \n",
|
|
" # Perform zero padding \n",
|
|
" if pad != 0:\n",
|
|
" image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
|
|
" \n",
|
|
" # Get sizes of image array and kernel weights\n",
|
|
" batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
|
|
" channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
|
|
"\n",
|
|
" # Get size of output arrays\n",
|
|
" imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
|
|
" imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
|
|
" \n",
|
|
" # Create output\n",
|
|
" out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
|
|
" \n",
|
|
" for c_y in range(imageHeightOut):\n",
|
|
" for c_x in range(imageWidthOut):\n",
|
|
" for c_channel_out in range(channelsOut):\n",
|
|
" for c_channel_in in range(channelsIn):\n",
|
|
" for c_kernel_y in range(kernelHeight):\n",
|
|
" for c_kernel_x in range(kernelWidth):\n",
|
|
" # TODO -- Retrieve the image pixel and the weight from the convolution\n",
|
|
" # Only one image in batch so this index should be zero\n",
|
|
" # Replace the two lines below\n",
|
|
" this_pixel_value = 1.0\n",
|
|
" this_weight = 1.0\n",
|
|
"\n",
|
|
" # Multiply these together and add to the output at this position\n",
|
|
" out[0, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
|
|
" return out"
|
|
],
|
|
"metadata": {
|
|
"id": "AvdRWGiU2ppX"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Set random seed so we always get same answer\n",
|
|
"np.random.seed(1) \n",
|
|
"n_batch = 1\n",
|
|
"image_height = 4\n",
|
|
"image_width = 6\n",
|
|
"channels_in = 5\n",
|
|
"kernel_size = 3\n",
|
|
"channels_out = 2\n",
|
|
"\n",
|
|
"# Create random input image\n",
|
|
"input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
|
|
"# Create random convolution kernel weights\n",
|
|
"conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
|
|
"\n",
|
|
"# Perform convolution using PyTorch\n",
|
|
"conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
|
|
"print(\"PyTorch Results\")\n",
|
|
"print(conv_results_pytorch)\n",
|
|
"\n",
|
|
"# Perform convolution in numpy\n",
|
|
"print(\"Your results\")\n",
|
|
"conv_results_numpy = conv_numpy_3(input_image, conv_weights, stride=1, pad=1)\n",
|
|
"print(conv_results_numpy)"
|
|
],
|
|
"metadata": {
|
|
"id": "mdSmjfvY4li2"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"Now we'll do the full convolution with multiple images (batch size > 1), and multiple input channels, multiple output channels."
|
|
],
|
|
"metadata": {
|
|
"id": "Q2MUFebdsJbH"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Perform convolution in numpy\n",
|
|
"def conv_numpy_4(image, weights, stride=1, pad=1):\n",
|
|
" \n",
|
|
" # Perform zero padding \n",
|
|
" if pad != 0:\n",
|
|
" image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
|
|
" \n",
|
|
" # Get sizes of image array and kernel weights\n",
|
|
" batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
|
|
" channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
|
|
"\n",
|
|
" # Get size of output arrays\n",
|
|
" imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
|
|
" imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
|
|
" \n",
|
|
" # Create output\n",
|
|
" out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
|
|
" \n",
|
|
" for c_batch in range(batchSize):\n",
|
|
" for c_y in range(imageHeightOut):\n",
|
|
" for c_x in range(imageWidthOut):\n",
|
|
" for c_channel_out in range(channelsOut):\n",
|
|
" for c_channel_in in range(channelsIn):\n",
|
|
" for c_kernel_y in range(kernelHeight):\n",
|
|
" for c_kernel_x in range(kernelWidth):\n",
|
|
" # TODO -- Retrieve the image pixel and the weight from the convolution\n",
|
|
" # Replace the two lines below\n",
|
|
" this_pixel_value = 1.0\n",
|
|
" this_weight = 1.0\n",
|
|
" \n",
|
|
" # Multiply these together and add to the output at this position\n",
|
|
" out[c_batch, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
|
|
" return out"
|
|
],
|
|
"metadata": {
|
|
"id": "5WePF-Y-sC1y"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "1w2GEBtqAM2P"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Set random seed so we always get same answer\n",
|
|
"np.random.seed(1) \n",
|
|
"n_batch = 2\n",
|
|
"image_height = 4\n",
|
|
"image_width = 6\n",
|
|
"channels_in = 5\n",
|
|
"kernel_size = 3\n",
|
|
"channels_out = 2\n",
|
|
"\n",
|
|
"# Create random input image\n",
|
|
"input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
|
|
"# Create random convolution kernel weights\n",
|
|
"conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
|
|
"\n",
|
|
"# Perform convolution using PyTorch\n",
|
|
"conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
|
|
"print(\"PyTorch Results\")\n",
|
|
"print(conv_results_pytorch)\n",
|
|
"\n",
|
|
"# Perform convolution in numpy\n",
|
|
"print(\"Your results\")\n",
|
|
"conv_results_numpy = conv_numpy_4(input_image, conv_weights, stride=1, pad=1)\n",
|
|
"print(conv_results_numpy)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [],
|
|
"metadata": {
|
|
"id": "Lody75JB5By7"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
}
|
|
]
|
|
} |