From d4de5f324da8a54154ab737b460050bed64b3033 Mon Sep 17 00:00:00 2001 From: udlbook <110402648+udlbook@users.noreply.github.com> Date: Wed, 2 Aug 2023 17:53:41 -0400 Subject: [PATCH] Created using Colaboratory --- .../10_4_Downsampling_and_Upsampling.ipynb | 520 ++++++++++++++++++ 1 file changed, 520 insertions(+) create mode 100644 Notebooks/Chap10/10_4_Downsampling_and_Upsampling.ipynb diff --git a/Notebooks/Chap10/10_4_Downsampling_and_Upsampling.ipynb b/Notebooks/Chap10/10_4_Downsampling_and_Upsampling.ipynb new file mode 100644 index 0000000..1bb2c08 --- /dev/null +++ b/Notebooks/Chap10/10_4_Downsampling_and_Upsampling.ipynb @@ -0,0 +1,520 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyMbSR8fzpXvO6TIQdO7bI0H", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **Notebook 10.4: Downsampling and Upsampling**\n", + "\n", + "This notebook investigates the down sampling and downsampling methods discussed in section 10.4 of the book.\n", + "\n", + "Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and make predictions about what is going to happen or write code to complete the functions.\n", + "\n", + "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions.\n" + ], + "metadata": { + "id": "t9vk9Elugvmi" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from PIL import Image\n", + "from numpy import asarray" + ], + "metadata": { + "id": "YrXWAH7sUWvU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Define 4 by 4 original patch\n", + "orig_4_4 = np.array([[1, 3, 5,3 ], [6,2,0,8], [4,6,1,4], [2,8,0,3]])\n", + "print(orig_4_4)" + ], + "metadata": { + "id": "WPRoJcC_JXE2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def subsample(x_in):\n", + " x_out = np.zeros(( int(np.ceil(x_in.shape[0]/2)), int(np.ceil(x_in.shape[1]/2)) ))\n", + " # TO DO -- write the subsampling routine\n", + " # Replace this line\n", + " x_out = x_out\n", + "\n", + "\n", + " return x_out" + ], + "metadata": { + "id": "qneyOiZRJubi" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(\"Original:\")\n", + "print(orig_4_4)\n", + "print(\"Subsampled:\")\n", + "print(subsample(orig_4_4))" + ], + "metadata": { + "id": "O_i0y72_JwGZ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Let's try that on an image to get a feel for how it works:" + ], + "metadata": { + "id": "AobyC8IILbCO" + } + }, + { + "cell_type": "code", + "source": [ + "!wget https://raw.githubusercontent.com/udlbook/udlbook/main/Notebooks/Chap10/test_image.png" + ], + "metadata": { + "id": "3dJEo-6DM-Py" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# load the image\n", + "image = Image.open('test_image.png')\n", + "# convert image to numpy array\n", + "data = asarray(image)\n", + "data_subsample = subsample(data);\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data, cmap='gray')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_subsample, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_subsample2 = subsample(data_subsample)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_subsample2, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_subsample3 = subsample(data_subsample2)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_subsample3, cmap='gray')\n", + "plt.show()" + ], + "metadata": { + "id": "HCZVutk6NB6B" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Now let's try max-pooling\n", + "def maxpool(x_in):\n", + " x_out = np.zeros(( int(np.floor(x_in.shape[0]/2)), int(np.floor(x_in.shape[1]/2)) ))\n", + " # TO DO -- write the maxpool routine\n", + " # Replace this line\n", + " x_out = x_out\n", + "\n", + " return x_out" + ], + "metadata": { + "id": "Z99uYehaPtJa" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(\"Original:\")\n", + "print(orig_4_4)\n", + "print(\"Maxpooled:\")\n", + "print(maxpool(orig_4_4))" + ], + "metadata": { + "id": "J4KMTMmG9P44" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's see what Rick looks like:\n", + "data_maxpool = maxpool(data);\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data, cmap='gray')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_maxpool, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_maxpool2 = maxpool(data_maxpool)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_maxpool2, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_maxpool3 = maxpool(data_maxpool2)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_maxpool3, cmap='gray')\n", + "plt.show()" + ], + "metadata": { + "id": "0ES0sB8t9Wyv" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can see that the stripes on his shirt gradually turn to white because we keep retaining the brightest local pixels." + ], + "metadata": { + "id": "nMtSdBGlAktq" + } + }, + { + "cell_type": "code", + "source": [ + "# Finally, let's try mean pooling\n", + "def meanpool(x_in):\n", + " x_out = np.zeros(( int(np.floor(x_in.shape[0]/2)), int(np.floor(x_in.shape[1]/2)) ))\n", + " # TO DO -- write the meanpool routine\n", + " # Replace this line\n", + " x_out = x_out\n", + "\n", + " return x_out" + ], + "metadata": { + "id": "ZQBjBtmB_aGQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(\"Original:\")\n", + "print(orig_4_4)\n", + "print(\"Meanpooled:\")\n", + "print(meanpool(orig_4_4))" + ], + "metadata": { + "id": "N4VDlWNt_8dp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's see what Rick looks like:\n", + "data_meanpool = meanpool(data);\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data, cmap='gray')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_meanpool, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_meanpool2 = meanpool(data_maxpool)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_meanpool2, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_meanpool3 = meanpool(data_meanpool2)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_meanpool3, cmap='gray')\n", + "plt.show()" + ], + "metadata": { + "id": "Lkg5zUYo_-IV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Notice that the three low resolution images look quite different.
\n", + "\n", + "Now let's upscale them again" + ], + "metadata": { + "id": "J7VssF4pBf2y" + } + }, + { + "cell_type": "code", + "source": [ + "# Define 2 by 2 original patch\n", + "orig_2_2 = np.array([[2, 4], [4,8]])\n", + "print(orig_2_2)" + ], + "metadata": { + "id": "Q4N7i76FA_YH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's first use the duplication method\n", + "def duplicate(x_in):\n", + " x_out = np.zeros(( x_in.shape[0]*2, x_in.shape[1]*2 ))\n", + " # TO DO -- write the duplication routine\n", + " # Replace this line\n", + " x_out = x_out\n", + "\n", + " return x_out" + ], + "metadata": { + "id": "6eSjnl3cB5g4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(\"Original:\")\n", + "print(orig_2_2)\n", + "print(\"Duplicated:\")\n", + "print(duplicate(orig_2_2))" + ], + "metadata": { + "id": "4FtRcvXrFLg7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's re-upsample, sub-sampled rick\n", + "data_duplicate = duplicate(data_subsample3);\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_subsample3, cmap='gray')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_duplicate, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_duplicate2 = duplicate(data_duplicate)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_duplicate2, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_duplicate3 = duplicate(data_duplicate2)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_duplicate3, cmap='gray')\n", + "plt.show()" + ], + "metadata": { + "id": "agq0YN34FQfA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "They look the same, but if you look at the axes, you'll see that the pixels are just duplicated." + ], + "metadata": { + "id": "bCQrJ_M8GUFs" + } + }, + { + "cell_type": "code", + "source": [ + "# Now let's try max pooling back up\n", + "# The input x_high_res is the original high res image, from which you can deduce the position of the maximum index\n", + "def max_unpool(x_in, x_high_res):\n", + " x_out = np.zeros(( x_in.shape[0]*2, x_in.shape[1]*2 ))\n", + " # TO DO -- write the subsampling routine\n", + " # Replace this line\n", + " x_out = x_out\n", + "\n", + " return x_out" + ], + "metadata": { + "id": "uDUDChmBF71_" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(\"Original:\")\n", + "print(orig_2_2)\n", + "print(\"Max unpooled:\")\n", + "print(max_unpool(orig_2_2,orig_4_4))" + ], + "metadata": { + "id": "EmjptCVNHq74" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's re-upsample, sub-sampled rick\n", + "data_max_unpool= max_unpool(data_maxpool3,data_maxpool2);\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_maxpool3, cmap='gray')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_max_unpool, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_max_unpool2 = max_unpool(data_max_unpool, data_maxpool)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_max_unpool2, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_max_unpool3 = max_unpool(data_max_unpool2, data)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_max_unpool3, cmap='gray')\n", + "plt.show()" + ], + "metadata": { + "id": "SSPhTuV6H4ZH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Finally, we'll try upsampling using bilinear interpolation. We'll treat the positions off the image as zeros by padding the original image and round fractional values upwards using np.ceil()" + ], + "metadata": { + "id": "sBx36bvbJHrK" + } + }, + { + "cell_type": "code", + "source": [ + "def bilinear(x_in):\n", + " x_out = np.zeros(( x_in.shape[0]*2, x_in.shape[1]*2 ))\n", + " x_in_pad = np.zeros((x_in.shape[0]+1, x_in.shape[1]+1))\n", + " x_in_pad[0:x_in.shape[0],0:x_in.shape[1]] = x_in\n", + " # TO DO -- write the duplication routine\n", + " # Replace this line\n", + " x_out = x_out\n", + "\n", + " return x_out" + ], + "metadata": { + "id": "00XpfQo3Ivdf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(\"Original:\")\n", + "print(orig_2_2)\n", + "print(\"Bilinear:\")\n", + "print(bilinear(orig_2_2))" + ], + "metadata": { + "id": "qI5oRVCCNRob" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's re-upsample, sub-sampled rick\n", + "data_bilinear = bilinear(data_meanpool3);\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_meanpool3, cmap='gray')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_bilinear, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_bilinear2 = bilinear(data_bilinear)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_bilinear2, cmap='gray')\n", + "plt.show()\n", + "\n", + "data_bilinear3 = duplicate(data_bilinear2)\n", + "plt.figure(figsize=(5,5))\n", + "plt.imshow(data_bilinear3, cmap='gray')\n", + "plt.show()" + ], + "metadata": { + "id": "4m0bkhdmNRec" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file