Add files via upload

Update 17_1_Latent_Variable_Models.ipynb
2024-07-22 17:09:30 -04:00 · 2024-07-22 17:09:15 -04:00 · 2024-07-22 15:03:17 -04:00 · 2024-07-19 15:55:44 -04:00 · 2024-07-10 15:53:00 -04:00 · 2024-07-10 14:15:21 -05:00
6 changed files with 78 additions and 19 deletions
--- a/Notebooks/Chap08/8_3_Double_Descent.ipynb
+++ b/Notebooks/Chap08/8_3_Double_Descent.ipynb
@@ -99,7 +99,7 @@
        "# data['x'], data['y'], data['x_test'], and data['y_test']\n",
        "print(\"Examples in training set: {}\".format(len(data['y'])))\n",
        "print(\"Examples in test set: {}\".format(len(data['y_test'])))\n",
-        "print(\"Length of each example: {}\".format(data['x'].shape[-1]))"
+        "print(\"Dimensionality of each example: {}\".format(data['x'].shape[-1]))"
      ],
      "metadata": {
        "id": "PW2gyXL5UkLU"
@@ -147,7 +147,7 @@
    {
      "cell_type": "code",
      "source": [
-        "def fit_model(model, data):\n",
+        "def fit_model(model, data, n_epoch):\n",
        "\n",
        "  # choose cross entropy loss function (equation 5.24)\n",
        "  loss_function = torch.nn.CrossEntropyLoss()\n",
@@ -164,9 +164,6 @@
        "  # load the data into a class that creates the batches\n",
        "  data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n",
        "\n",
-        "  # loop over the dataset n_epoch times\n",
-        "  n_epoch = 1000\n",
-        "\n",
        "  for epoch in range(n_epoch):\n",
        "    # loop over batches\n",
        "    for i, batch in enumerate(data_loader):\n",
@@ -203,6 +200,18 @@
      "execution_count": null,
      "outputs": []
    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def count_parameters(model):\n",
+        "    return sum(p.numel() for p in model.parameters() if p.requires_grad)"
+      ],
+      "metadata": {
+        "id": "AQNCmFNV6JpV"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
    {
      "cell_type": "markdown",
      "source": [
@@ -226,19 +235,27 @@
        "# This code will take a while (~30 mins on GPU) to run!  Go and make a cup of coffee!\n",
        "\n",
        "hidden_variables = np.array([2,4,6,8,10,14,18,22,26,30,35,40,45,50,55,60,70,80,90,100,120,140,160,180,200,250,300,400]) ;\n",
+        "\n",
        "errors_train_all = np.zeros_like(hidden_variables)\n",
        "errors_test_all = np.zeros_like(hidden_variables)\n",
+        "total_weights_all = np.zeros_like(hidden_variables)\n",
+        "\n",
+        "# loop over the dataset n_epoch times\n",
+        "n_epoch = 1000\n",
        "\n",
        "# For each hidden variable size\n",
        "for c_hidden in range(len(hidden_variables)):\n",
        "    print(f'Training model with {hidden_variables[c_hidden]:3d} hidden variables')\n",
        "    # Get a model\n",
        "    model = get_model(hidden_variables[c_hidden]) ;\n",
+        "    # Count and store number of weights\n",
+        "    total_weights_all[c_hidden] = count_parameters(model)\n",
        "    # Train the model\n",
-        "    errors_train, errors_test = fit_model(model, data)\n",
+        "    errors_train, errors_test = fit_model(model, data, n_epoch)\n",
        "    # Store the results\n",
        "    errors_train_all[c_hidden] = errors_train\n",
-        "    errors_test_all[c_hidden]= errors_test"
+        "    errors_test_all[c_hidden]= errors_test\n",
+        "\n"
      ],
      "metadata": {
        "id": "K4OmBZGHWXpk"
@@ -249,12 +266,29 @@
    {
      "cell_type": "code",
      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "\n",
+        "# Assuming data['y'] is available and contains the training examples\n",
+        "num_training_examples = len(data['y'])\n",
+        "\n",
+        "# Find the index where total_weights_all is closest to num_training_examples\n",
+        "closest_index = np.argmin(np.abs(np.array(total_weights_all) - num_training_examples))\n",
+        "\n",
+        "# Get the corresponding value of hidden variables\n",
+        "hidden_variable_at_num_training_examples = hidden_variables[closest_index]\n",
+        "\n",
        "# Plot the results\n",
        "fig, ax = plt.subplots()\n",
        "ax.plot(hidden_variables, errors_train_all, 'r-', label='train')\n",
        "ax.plot(hidden_variables, errors_test_all, 'b-', label='test')\n",
-        "ax.set_ylim(0,100);\n",
-        "ax.set_xlabel('No hidden variables'); ax.set_ylabel('Error')\n",
+        "\n",
+        "# Add a vertical line at the point where total weights equal the number of training examples\n",
+        "ax.axvline(x=hidden_variable_at_num_training_examples, color='g', linestyle='--', label='N(weights) = N(train)')\n",
+        "\n",
+        "ax.set_ylim(0, 100)\n",
+        "ax.set_xlabel('No. hidden variables')\n",
+        "ax.set_ylabel('Error')\n",
        "ax.legend()\n",
        "plt.show()\n"
      ],
@@ -263,6 +297,24 @@
      },
      "execution_count": null,
      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "KT4X8_hE5NFb"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "iGKZSfVF2r4z"
+      },
+      "execution_count": null,
+      "outputs": []
    }
  ]
 }
--- a/Notebooks/Chap12/12_2_Multihead_Self_Attention.ipynb
+++ b/Notebooks/Chap12/12_2_Multihead_Self_Attention.ipynb
@@ -28,7 +28,7 @@
    {
      "cell_type": "markdown",
      "source": [
-        "# **Notebook 12.1: Multihead Self-Attention**\n",
+        "# **Notebook 12.2: Multihead Self-Attention**\n",
        "\n",
        "This notebook builds a multihead self-attention mechanism as in figure 12.6\n",
        "\n",
--- a/Notebooks/Chap17/17_1_Latent_Variable_Models.ipynb
+++ b/Notebooks/Chap17/17_1_Latent_Variable_Models.ipynb
@@ -55,7 +55,7 @@
        "Pr(z) = \\text{Norm}_{z}[0,1]\n",
        "\\end{equation}\n",
        "\n",
-        "As in figure 17.2, we'll assume that the output is two dimensional, we we need to define a function that maps from the 1D latent variable to two dimensions.  Usually, we would use a neural network, but in this case, we'll just define an arbitrary relationship.\n",
+        "As in figure 17.2, we'll assume that the output is two dimensional, we need to define a function that maps from the 1D latent variable to two dimensions.  Usually, we would use a neural network, but in this case, we'll just define an arbitrary relationship.\n",
        "\n",
        "\\begin{align}\n",
        "x_{1} &=& 0.5\\cdot\\exp\\Bigl[\\sin\\bigl[2+ 3.675 z \\bigr]\\Bigr]\\\\\n",
--- a/Notebooks/Chap20/20_3_Lottery_Tickets.ipynb
+++ b/Notebooks/Chap20/20_3_Lottery_Tickets.ipynb
@@ -44,7 +44,8 @@
      },
      "source": [
        "# Run this if you're in a Colab to install MNIST 1D repository\n",
-        "!pip install git+https://github.com/greydanus/mnist1d"
+        "!pip install git+https://github.com/greydanus/mnist1d\n",
+        "!git clone https://github.com/greydanus/mnist1d"
      ],
      "execution_count": null,
      "outputs": []
@@ -95,6 +96,12 @@
        "id": "I-vm_gh5xTJs"
      },
      "source": [
+        "from mnist1d.data import get_dataset, get_dataset_args\n",
+        "from mnist1d.utils import set_seed, to_pickle, from_pickle\n",
+        "\n",
+        "import sys ; sys.path.append('./mnist1d/notebooks')\n",
+        "from train import get_model_args, train_model\n",
+        "\n",
        "args = mnist1d.get_dataset_args()\n",
        "data = mnist1d.get_dataset(args=args)  # by default, this will download a pre-made dataset from the GitHub repo\n",
        "\n",
@@ -210,7 +217,7 @@
        "  # we would return [1,1,0,0,1]\n",
        "  # Remember that these are torch tensors and not numpy arrays\n",
        "  # Replace this function:\n",
-        "  mask = torch.ones_like(scores)\n",
+        "  mask = torch.ones_like(absolute_weights)\n",
        "\n",
        "\n",
        "  return mask"
@@ -237,7 +244,6 @@
        "def find_lottery_ticket(model, dataset, args, sparsity_schedule, criteria_fn=None, **kwargs):\n",
        "\n",
        "  criteria_fn = lambda init_params, final_params: final_params.abs()\n",
-        "\n",
        "  init_params = model.get_layer_vecs()\n",
        "  stats = {'train_losses':[], 'test_losses':[], 'train_accs':[], 'test_accs':[]}\n",
        "  models = []\n",
@@ -253,7 +259,7 @@
        "    model.set_layer_masks(masks)\n",
        "\n",
        "    # training process\n",
-        "    results = mnist1d.train_model(dataset, model, args)\n",
+        "    results = train_model(dataset, model, args)\n",
        "    model = results['checkpoints'][-1]\n",
        "\n",
        "    # store stats\n",
@@ -291,7 +297,8 @@
      },
      "source": [
        "# train settings\n",
-        "model_args = mnist1d.get_model_args()\n",
+        "from train import get_model_args, train_model\n",
+        "model_args = get_model_args()\n",
        "model_args.total_steps = 1501\n",
        "model_args.hidden_size = 500\n",
        "model_args.print_every = 5000 # print never\n",
--- a/UDL_Answer_Booklet_Students.pdf
+++ b/UDL_Answer_Booklet_Students.pdf
--- a/UDL_Errata.pdf
+++ b/UDL_Errata.pdf
Author	SHA1	Message	Date
udlbook	e968741846	Add files via upload	2024-07-22 17:09:30 -04:00
udlbook	37011065d7	Add files via upload	2024-07-22 17:09:15 -04:00
udlbook	afd20d0364	Update 17_1_Latent_Variable_Models.ipynb	2024-07-22 15:03:17 -04:00
udlbook	0d135f1ee7	Fixed problems with MNIST1D	2024-07-19 15:55:44 -04:00
udlbook	54a020304e	Merge pull request #211 from qualiaMachine/patch-1 Update 8_3_Double_Descent.ipynb	2024-07-10 15:53:00 -04:00
Chris Endemann	ccbbc4126e	Update 8_3_Double_Descent.ipynb Apologies, accidentally removed the "open in colab" button in the pull request you accepted earlier today. This corrects the mistake!	2024-07-10 14:15:21 -05:00
udlbook	d3273c99e2	Merge pull request #210 from qualiaMachine/main Add vertical line to double descent plot indicating where count(weights) = count(train)	2024-07-10 14:33:31 -04:00
Chris Endemann	f9e45c976c	Merge branch 'udlbook:main' into main	2024-07-10 09:43:18 -05:00
Chris Endemann	b005cec9c1	Update 8_3_Double_Descent.ipynb I added a little code to include a vertical dashed line on the plot representing where total_weights = number of train observations. I also moved n_epochs as an argument to fit_model() so learners can play around with the impact of n_epochs more easily.	2024-07-10 09:42:38 -05:00
udlbook	b8a91ad34d	Merge pull request #208 from SwayStar123/patch-4 Update 12_2_Multihead_Self_Attention.ipynb	2024-07-09 17:53:31 -04:00
SwayStar123	a2a86c27bc	Update 12_2_Multihead_Self_Attention.ipynb title number is incorrect, its actually 12.2	2024-07-06 17:19:13 +05:30