udlbook/CM20315_Convolution_I.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyOdO9HZNZ/DwsTSc7M8PBTl",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/udlbook/udlbook/blob/main/CM20315_Convolution_I.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Convolution I \n",
        "\n",
        "This notebook investigates the convolution operation.  It asks you to hand code a convolution so we can be sure that we are computing the same thing as in PyTorch.  The subsequent notebooks use the convolutional layers in PyTorch directly."
      ],
      "metadata": {
        "id": "VB_crnDGASX-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import torch\n",
        "# Set to print in reasonable form\n",
        "np.set_printoptions(precision=3, floatmode=\"fixed\")\n",
        "torch.set_printoptions(precision=3)"
      ],
      "metadata": {
        "id": "YAoWDUb_DezG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "This routine performs convolution in PyTorch"
      ],
      "metadata": {
        "id": "eAwYWXzAElHG"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Perform convolution in PyTorch\n",
        "def conv_pytorch(image, conv_weights, stride=1, pad =1):\n",
        "  # Convert image and kernel to tensors\n",
        "  image_tensor = torch.from_numpy(image) # (batchSize, channelsIn, imageHeightIn, =imageWidthIn)\n",
        "  conv_weights_tensor = torch.from_numpy(conv_weights) # (channelsOut, channelsIn, kernelHeight, kernelWidth) \n",
        "  # Do the convolution\n",
        "  output_tensor = torch.nn.functional.conv2d(image_tensor, conv_weights_tensor, stride=stride, padding=pad) \n",
        "  # Convert back from PyTorch and return\n",
        "  return(output_tensor.numpy()) # (batchSize channelsOut imageHeightOut imageHeightIn)"
      ],
      "metadata": {
        "id": "xsmUIN-3BlWr"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "First we'll start with the simplest 2D convolution.  Just one channel in and one channel out.  A single image in the batch."
      ],
      "metadata": {
        "id": "A3Sm8bUWtDNO"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Perform convolution in numpy\n",
        "def conv_numpy_1(image, weights, pad=1):\n",
        "    \n",
        "    # Perform zero padding \n",
        "    if pad != 0:\n",
        "        image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
        "    \n",
        "    # Get sizes of image array and kernel weights\n",
        "    batchSize,  channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
        "    channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
        "\n",
        "    # Get size of output arrays\n",
        "    imageHeightOut = np.floor(1 + imageHeightIn - kernelHeight).astype(int)\n",
        "    imageWidthOut = np.floor(1 + imageWidthIn - kernelWidth).astype(int)\n",
        "\n",
        "    # Create output\n",
        "    out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
        "    \n",
        "    for c_y in range(imageHeightOut):\n",
        "      for c_x in range(imageWidthOut):\n",
        "        for c_kernel_y in range(kernelHeight):\n",
        "          for c_kernel_x in range(kernelWidth):\n",
        "            # TODO -- Retrieve the image pixel and the weight from the convolution\n",
        "            # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n",
        "            # Replace the two lines below\n",
        "            this_pixel_value = 1.0\n",
        "            this_weight = 1.0\n",
        "         \n",
        "            # Multiply these together and add to the output at this position\n",
        "            out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
        "            \n",
        "    return out"
      ],
      "metadata": {
        "id": "EF8FWONVLo1Q"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Set random seed so we always get same answer\n",
        "np.random.seed(1) \n",
        "n_batch = 1\n",
        "image_height = 4\n",
        "image_width = 6\n",
        "channels_in = 1\n",
        "kernel_size = 3\n",
        "channels_out = 1\n",
        "\n",
        "# Create random input image\n",
        "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
        "# Create random convolution kernel weights\n",
        "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
        "\n",
        "# Perform convolution using PyTorch\n",
        "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
        "print(\"PyTorch Results\")\n",
        "print(conv_results_pytorch)\n",
        "\n",
        "# Perform convolution in numpy\n",
        "print(\"Your results\")\n",
        "conv_results_numpy = conv_numpy_1(input_image, conv_weights)\n",
        "print(conv_results_numpy)"
      ],
      "metadata": {
        "id": "iw9KqXZTHN8v"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Let's now add in the possibility of using different strides"
      ],
      "metadata": {
        "id": "IYj_lxeGzaHX"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Perform convolution in numpy\n",
        "def conv_numpy_2(image, weights, stride=1, pad=1):\n",
        "    \n",
        "    # Perform zero padding \n",
        "    if pad != 0:\n",
        "        image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
        "    \n",
        "    # Get sizes of image array and kernel weights\n",
        "    batchSize,  channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
        "    channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
        "\n",
        "    # Get size of output arrays\n",
        "    imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
        "    imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
        "    \n",
        "    # Create output\n",
        "    out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
        "    \n",
        "    for c_y in range(imageHeightOut):\n",
        "      for c_x in range(imageWidthOut):\n",
        "        for c_kernel_y in range(kernelHeight):\n",
        "          for c_kernel_x in range(kernelWidth):\n",
        "            # TODO -- Retrieve the image pixel and the weight from the convolution\n",
        "            # Only one image in batch, one input channel and one output channel, so these indices should all be zero\n",
        "            # Replace the two lines below\n",
        "            this_pixel_value = 1.0\n",
        "            this_weight = 1.0\n",
        "\n",
        "            # Multiply these together and add to the output at this position\n",
        "            out[0, 0, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
        "            \n",
        "    return out"
      ],
      "metadata": {
        "id": "GiujmLhqHN1F"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Set random seed so we always get same answer\n",
        "np.random.seed(1) \n",
        "n_batch = 1\n",
        "image_height = 12\n",
        "image_width = 10\n",
        "channels_in = 1\n",
        "kernel_size = 3\n",
        "channels_out = 1\n",
        "stride = 2\n",
        "\n",
        "# Create random input image\n",
        "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
        "# Create random convolution kernel weights\n",
        "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
        "\n",
        "# Perform convolution using PyTorch\n",
        "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride, pad=1)\n",
        "print(\"PyTorch Results\")\n",
        "print(conv_results_pytorch)\n",
        "\n",
        "# Perform convolution in numpy\n",
        "print(\"Your results\")\n",
        "conv_results_numpy = conv_numpy_2(input_image, conv_weights, stride, pad=1)\n",
        "print(conv_results_numpy)"
      ],
      "metadata": {
        "id": "FeJy6Bvozgxq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now we'll introduce multiple input and output channels"
      ],
      "metadata": {
        "id": "3flq1Wan2gX-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Perform convolution in numpy\n",
        "def conv_numpy_3(image, weights, stride=1, pad=1):\n",
        "    \n",
        "    # Perform zero padding \n",
        "    if pad != 0:\n",
        "        image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
        "    \n",
        "    # Get sizes of image array and kernel weights\n",
        "    batchSize,  channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
        "    channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
        "\n",
        "    # Get size of output arrays\n",
        "    imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
        "    imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
        "    \n",
        "    # Create output\n",
        "    out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
        "    \n",
        "    for c_y in range(imageHeightOut):\n",
        "      for c_x in range(imageWidthOut):\n",
        "        for c_channel_out in range(channelsOut):\n",
        "          for c_channel_in in range(channelsIn):\n",
        "            for c_kernel_y in range(kernelHeight):\n",
        "              for c_kernel_x in range(kernelWidth):\n",
        "                  # TODO -- Retrieve the image pixel and the weight from the convolution\n",
        "                  # Only one image in batch so this index should be zero\n",
        "                  # Replace the two lines below\n",
        "                  this_pixel_value = 1.0\n",
        "                  this_weight = 1.0\n",
        "\n",
        "                 # Multiply these together and add to the output at this position\n",
        "                  out[0, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
        "    return out"
      ],
      "metadata": {
        "id": "AvdRWGiU2ppX"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Set random seed so we always get same answer\n",
        "np.random.seed(1) \n",
        "n_batch = 1\n",
        "image_height = 4\n",
        "image_width = 6\n",
        "channels_in = 5\n",
        "kernel_size = 3\n",
        "channels_out = 2\n",
        "\n",
        "# Create random input image\n",
        "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
        "# Create random convolution kernel weights\n",
        "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
        "\n",
        "# Perform convolution using PyTorch\n",
        "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
        "print(\"PyTorch Results\")\n",
        "print(conv_results_pytorch)\n",
        "\n",
        "# Perform convolution in numpy\n",
        "print(\"Your results\")\n",
        "conv_results_numpy = conv_numpy_3(input_image, conv_weights, stride=1, pad=1)\n",
        "print(conv_results_numpy)"
      ],
      "metadata": {
        "id": "mdSmjfvY4li2"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now we'll do the full convolution with multiple images (batch size > 1), and multiple input channels, multiple output channels."
      ],
      "metadata": {
        "id": "Q2MUFebdsJbH"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Perform convolution in numpy\n",
        "def conv_numpy_4(image, weights, stride=1, pad=1):\n",
        "    \n",
        "    # Perform zero padding \n",
        "    if pad != 0:\n",
        "        image = np.pad(image, ((0, 0), (0 ,0), (pad, pad), (pad, pad)),'constant')\n",
        "    \n",
        "    # Get sizes of image array and kernel weights\n",
        "    batchSize,  channelsIn, imageHeightIn, imageWidthIn = image.shape\n",
        "    channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape\n",
        "\n",
        "    # Get size of output arrays\n",
        "    imageHeightOut = np.floor(1 + (imageHeightIn - kernelHeight) / stride).astype(int)\n",
        "    imageWidthOut = np.floor(1 + (imageWidthIn - kernelWidth) / stride).astype(int)\n",
        "    \n",
        "    # Create output\n",
        "    out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype=np.float32) \n",
        "    \n",
        "    for c_batch in range(batchSize):\n",
        "      for c_y in range(imageHeightOut):\n",
        "        for c_x in range(imageWidthOut):\n",
        "          for c_channel_out in range(channelsOut):\n",
        "            for c_channel_in in range(channelsIn):\n",
        "              for c_kernel_y in range(kernelHeight):\n",
        "                for c_kernel_x in range(kernelWidth):\n",
        "                    # TODO -- Retrieve the image pixel and the weight from the convolution\n",
        "                    # Replace the two lines below\n",
        "                    this_pixel_value = 1.0\n",
        "                    this_weight = 1.0\n",
        "                    \n",
        "                    # Multiply these together and add to the output at this position\n",
        "                    out[c_batch, c_channel_out, c_y, c_x] += np.sum(this_pixel_value * this_weight) \n",
        "    return out"
      ],
      "metadata": {
        "id": "5WePF-Y-sC1y"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1w2GEBtqAM2P"
      },
      "outputs": [],
      "source": [
        "# Set random seed so we always get same answer\n",
        "np.random.seed(1) \n",
        "n_batch = 2\n",
        "image_height = 4\n",
        "image_width = 6\n",
        "channels_in = 5\n",
        "kernel_size = 3\n",
        "channels_out = 2\n",
        "\n",
        "# Create random input image\n",
        "input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))\n",
        "# Create random convolution kernel weights\n",
        "conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))\n",
        "\n",
        "# Perform convolution using PyTorch\n",
        "conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)\n",
        "print(\"PyTorch Results\")\n",
        "print(conv_results_pytorch)\n",
        "\n",
        "# Perform convolution in numpy\n",
        "print(\"Your results\")\n",
        "conv_results_numpy = conv_numpy_4(input_image, conv_weights, stride=1, pad=1)\n",
        "print(conv_results_numpy)"
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "Lody75JB5By7"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}