diff --git a/Notebooks/Chap13/13_2_Graph_Classification.ipynb b/Notebooks/Chap13/13_2_Graph_Classification.ipynb new file mode 100644 index 0000000..c50b8bd --- /dev/null +++ b/Notebooks/Chap13/13_2_Graph_Classification.ipynb @@ -0,0 +1,244 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNXtvnYIArM5r9NA5Qpwuvm", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **Notebook 13.2: Graph classification**\n", + "\n", + "This notebook investigates representing graphs with matrices as illustrated in figure 13.4 from the book.\n", + "\n", + "Work through the cells below, running each cell in turn. In various places you will see the words \"TO DO\". Follow the instructions at these places and make predictions about what is going to happen or write code to complete the functions.\n", + "\n", + "Contact me at udlbookmail@gmail.com if you find any mistakes or have any suggestions." + ], + "metadata": { + "id": "t9vk9Elugvmi" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import networkx as nx" + ], + "metadata": { + "id": "OLComQyvCIJ7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Let's build a model that maps a chemical structure to a binary decision. This model might be used to predict whether a chemical is liquid at room temparature or not. We'll start by drawing the chemical structure." + ], + "metadata": { + "id": "UNleESc7k5uB" + } + }, + { + "cell_type": "code", + "source": [ + "# Define a graph that represents the chemical structure of ethanol and draw it\n", + "# Each node is labelled with the node number and the element (carbon, hydrogen, oxygen)\n", + "G = nx.Graph()\n", + "G.add_edge('0:H','2:C')\n", + "G.add_edge('1:H','2:C')\n", + "G.add_edge('3:H','2:C')\n", + "G.add_edge('2:C','5:C')\n", + "G.add_edge('4:H','5:C')\n", + "G.add_edge('6:H','5:C')\n", + "G.add_edge('7:O','5:C')\n", + "G.add_edge('8:H','7:O')\n", + "nx.draw(G, nx.spring_layout(G, seed = 0), with_labels=True, node_size=600)\n", + "plt.show()" + ], + "metadata": { + "id": "TIrihEw-7DRV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Define adjacency matrix\n", + "# TODO -- Define the adjacency matrix for this chemical\n", + "# Replace this line\n", + "A = np.zeros((9,9)) ;\n", + "\n", + "\n", + "print(A)\n", + "\n", + "# TODO -- Define node matrix\n", + "# There will be 9 nodes and 118 possible chemical elements\n", + "# so we'll define a 9x118 matrix. Each column represents one\n", + "# node and is a one-hot vector (i.e. all zeros, except a single one at the\n", + "# chemical number of the element).\n", + "# Chemical numbers: Hydrogen-->1, Carbon-->6, Oxygen-->8\n", + "# Since the indices start at 0, we'll set element 0 to 1 for hydrogen, element 5\n", + "# to one for carbon, and element 7 to one for oxygen\n", + "# Replace this line:\n", + "X = np.zeros((118,9))\n", + "\n", + "\n", + "# Print the top 15 rows of the data matrix\n", + "print(X[0:15,:])" + ], + "metadata": { + "id": "gKBD5JsPfrkA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now let's define a network with four layers that maps this graph to a binary value, using the formulation in equation 13.11." + ], + "metadata": { + "id": "40FLjNIcpHa9" + } + }, + { + "cell_type": "code", + "source": [ + "# We'll need these helper functions\n", + "\n", + "# Define the Rectified Linear Unit (ReLU) function\n", + "def ReLU(preactivation):\n", + " activation = preactivation.clip(0.0)\n", + " return activation\n", + "\n", + "# Define the logistic sigmoid function\n", + "def sigmoid(x):\n", + " return 1.0/(1.0+np.exp(-x))" + ], + "metadata": { + "id": "52IFREpepHE4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Our network will have K=3 hidden layers, and will use a dimension of D=200.\n", + "K = 3; D = 200\n", + "# Set seed so we always get the same random numbers\n", + "np.random.seed(1)\n", + "# Let's initialize the parameter matrices randomly with He initialization\n", + "Omega0 = np.random.normal(size=(D, 118)) * 2.0 / D\n", + "beta0 = np.random.normal(size=(D,1)) * 2.0 / D\n", + "Omega1 = np.random.normal(size=(D, D)) * 2.0 / D\n", + "beta1 = np.random.normal(size=(D,1)) * 2.0 / D\n", + "Omega2 = np.random.normal(size=(D, D)) * 2.0 / D\n", + "beta2 = np.random.normal(size=(D,1)) * 2.0 / D\n", + "omega3 = np.random.normal(size=(1, D))\n", + "beta3 = np.random.normal(size=(1,1))" + ], + "metadata": { + "id": "ag0YdEgnpApK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def graph_neural_network(A,X, Omega0, beta0, Omega1, beta1, Omega2, beta2, omega3, beta3):\n", + " # Define this network according to equation 13.11 from the book\n", + " # Replace this line\n", + " f = np.ones((1,1))\n", + "\n", + " return f;" + ], + "metadata": { + "id": "RQuTMc2WrsU3" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's test this network\n", + "f = graph_neural_network(A,X, Omega0, beta0, Omega1, beta1, Omega2, beta2, omega3, beta3)\n", + "print(\"Your value is %3f: \"%(f[0,0]), \"True value of f: 0.498010\")" + ], + "metadata": { + "id": "X7gYgOu6uIAt" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's check that permuting the indices of the graph doesn't change\n", + "# the output of the network\n", + "# Define a permutation matrix\n", + "P = np.array([[0,1,0,0,0,0,0,0,0],\n", + " [0,0,0,0,1,0,0,0,0],\n", + " [0,0,0,0,0,1,0,0,0],\n", + " [0,0,0,0,0,0,0,0,1],\n", + " [1,0,0,0,0,0,0,0,0],\n", + " [0,0,1,0,0,0,0,0,0],\n", + " [0,0,0,1,0,0,0,0,0],\n", + " [0,0,0,0,0,0,0,1,0],\n", + " [0,0,0,0,0,0,1,0,0]]);\n", + "\n", + "# TODO -- Use this matrix to permute the adjacency matrix A and node matrix X\n", + "# Replace these lines\n", + "A_permuted = np.copy(A)\n", + "X_permuted = np.copy(X)\n", + "\n", + "f = graph_neural_network(A_permuted,X_permuted, Omega0, beta0, Omega1, beta1, Omega2, beta2, omega3, beta3)\n", + "print(\"Your value is %3f: \"%(f[0,0]), \"True value of f: 0.498010\")" + ], + "metadata": { + "id": "F0zc3U_UuR5K" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "TODO -- encode the adjaceny matrix and node matrix for propanol and run the network again. Show that the network still runs even though the size of the input graph is different.\n", + "\n", + "Propanol structure can be found [here](https://https://en.wikipedia.org/wiki/File:Propanol_flat_structure.png)." + ], + "metadata": { + "id": "l44vHi50zGqY" + } + } + ] +} \ No newline at end of file