Compare commits

..

11 Commits

Author SHA1 Message Date
udlbook
e968741846 Add files via upload 2024-07-22 17:09:30 -04:00
udlbook
37011065d7 Add files via upload 2024-07-22 17:09:15 -04:00
udlbook
afd20d0364 Update 17_1_Latent_Variable_Models.ipynb 2024-07-22 15:03:17 -04:00
udlbook
0d135f1ee7 Fixed problems with MNIST1D 2024-07-19 15:55:44 -04:00
udlbook
54a020304e Merge pull request #211 from qualiaMachine/patch-1
Update 8_3_Double_Descent.ipynb
2024-07-10 15:53:00 -04:00
Chris Endemann
ccbbc4126e Update 8_3_Double_Descent.ipynb
Apologies, accidentally removed the "open in colab" button in the pull request you accepted earlier today. This corrects the mistake!
2024-07-10 14:15:21 -05:00
udlbook
d3273c99e2 Merge pull request #210 from qualiaMachine/main
Add vertical line to double descent plot indicating where count(weights) = count(train)
2024-07-10 14:33:31 -04:00
Chris Endemann
f9e45c976c Merge branch 'udlbook:main' into main 2024-07-10 09:43:18 -05:00
Chris Endemann
b005cec9c1 Update 8_3_Double_Descent.ipynb
I added a little code to include a vertical dashed line on the plot representing where total_weights = number of train observations.  I also moved n_epochs as an argument to fit_model() so learners can play around with the impact of n_epochs more easily.
2024-07-10 09:42:38 -05:00
udlbook
b8a91ad34d Merge pull request #208 from SwayStar123/patch-4
Update 12_2_Multihead_Self_Attention.ipynb
2024-07-09 17:53:31 -04:00
SwayStar123
a2a86c27bc Update 12_2_Multihead_Self_Attention.ipynb
title number is incorrect, its actually 12.2
2024-07-06 17:19:13 +05:30
6 changed files with 78 additions and 19 deletions

View File

@@ -99,7 +99,7 @@
"# data['x'], data['y'], data['x_test'], and data['y_test']\n",
"print(\"Examples in training set: {}\".format(len(data['y'])))\n",
"print(\"Examples in test set: {}\".format(len(data['y_test'])))\n",
"print(\"Length of each example: {}\".format(data['x'].shape[-1]))"
"print(\"Dimensionality of each example: {}\".format(data['x'].shape[-1]))"
],
"metadata": {
"id": "PW2gyXL5UkLU"
@@ -147,7 +147,7 @@
{
"cell_type": "code",
"source": [
"def fit_model(model, data):\n",
"def fit_model(model, data, n_epoch):\n",
"\n",
" # choose cross entropy loss function (equation 5.24)\n",
" loss_function = torch.nn.CrossEntropyLoss()\n",
@@ -164,9 +164,6 @@
" # load the data into a class that creates the batches\n",
" data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n",
"\n",
" # loop over the dataset n_epoch times\n",
" n_epoch = 1000\n",
"\n",
" for epoch in range(n_epoch):\n",
" # loop over batches\n",
" for i, batch in enumerate(data_loader):\n",
@@ -203,6 +200,18 @@
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def count_parameters(model):\n",
" return sum(p.numel() for p in model.parameters() if p.requires_grad)"
],
"metadata": {
"id": "AQNCmFNV6JpV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
@@ -226,19 +235,27 @@
"# This code will take a while (~30 mins on GPU) to run! Go and make a cup of coffee!\n",
"\n",
"hidden_variables = np.array([2,4,6,8,10,14,18,22,26,30,35,40,45,50,55,60,70,80,90,100,120,140,160,180,200,250,300,400]) ;\n",
"\n",
"errors_train_all = np.zeros_like(hidden_variables)\n",
"errors_test_all = np.zeros_like(hidden_variables)\n",
"total_weights_all = np.zeros_like(hidden_variables)\n",
"\n",
"# loop over the dataset n_epoch times\n",
"n_epoch = 1000\n",
"\n",
"# For each hidden variable size\n",
"for c_hidden in range(len(hidden_variables)):\n",
" print(f'Training model with {hidden_variables[c_hidden]:3d} hidden variables')\n",
" # Get a model\n",
" model = get_model(hidden_variables[c_hidden]) ;\n",
" # Count and store number of weights\n",
" total_weights_all[c_hidden] = count_parameters(model)\n",
" # Train the model\n",
" errors_train, errors_test = fit_model(model, data)\n",
" errors_train, errors_test = fit_model(model, data, n_epoch)\n",
" # Store the results\n",
" errors_train_all[c_hidden] = errors_train\n",
" errors_test_all[c_hidden]= errors_test"
" errors_test_all[c_hidden]= errors_test\n",
"\n"
],
"metadata": {
"id": "K4OmBZGHWXpk"
@@ -249,12 +266,29 @@
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"# Assuming data['y'] is available and contains the training examples\n",
"num_training_examples = len(data['y'])\n",
"\n",
"# Find the index where total_weights_all is closest to num_training_examples\n",
"closest_index = np.argmin(np.abs(np.array(total_weights_all) - num_training_examples))\n",
"\n",
"# Get the corresponding value of hidden variables\n",
"hidden_variable_at_num_training_examples = hidden_variables[closest_index]\n",
"\n",
"# Plot the results\n",
"fig, ax = plt.subplots()\n",
"ax.plot(hidden_variables, errors_train_all,'r-',label='train')\n",
"ax.plot(hidden_variables, errors_test_all,'b-',label='test')\n",
"ax.set_ylim(0,100);\n",
"ax.set_xlabel('No hidden variables'); ax.set_ylabel('Error')\n",
"ax.plot(hidden_variables, errors_train_all, 'r-', label='train')\n",
"ax.plot(hidden_variables, errors_test_all, 'b-', label='test')\n",
"\n",
"# Add a vertical line at the point where total weights equal the number of training examples\n",
"ax.axvline(x=hidden_variable_at_num_training_examples, color='g', linestyle='--', label='N(weights) = N(train)')\n",
"\n",
"ax.set_ylim(0, 100)\n",
"ax.set_xlabel('No. hidden variables')\n",
"ax.set_ylabel('Error')\n",
"ax.legend()\n",
"plt.show()\n"
],
@@ -263,6 +297,24 @@
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "KT4X8_hE5NFb"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "iGKZSfVF2r4z"
},
"execution_count": null,
"outputs": []
}
]
}

View File

@@ -28,7 +28,7 @@
{
"cell_type": "markdown",
"source": [
"# **Notebook 12.1: Multihead Self-Attention**\n",
"# **Notebook 12.2: Multihead Self-Attention**\n",
"\n",
"This notebook builds a multihead self-attention mechanism as in figure 12.6\n",
"\n",

View File

@@ -55,7 +55,7 @@
"Pr(z) = \\text{Norm}_{z}[0,1]\n",
"\\end{equation}\n",
"\n",
"As in figure 17.2, we'll assume that the output is two dimensional, we we need to define a function that maps from the 1D latent variable to two dimensions. Usually, we would use a neural network, but in this case, we'll just define an arbitrary relationship.\n",
"As in figure 17.2, we'll assume that the output is two dimensional, we need to define a function that maps from the 1D latent variable to two dimensions. Usually, we would use a neural network, but in this case, we'll just define an arbitrary relationship.\n",
"\n",
"\\begin{align}\n",
"x_{1} &=& 0.5\\cdot\\exp\\Bigl[\\sin\\bigl[2+ 3.675 z \\bigr]\\Bigr]\\\\\n",

View File

@@ -44,7 +44,8 @@
},
"source": [
"# Run this if you're in a Colab to install MNIST 1D repository\n",
"!pip install git+https://github.com/greydanus/mnist1d"
"!pip install git+https://github.com/greydanus/mnist1d\n",
"!git clone https://github.com/greydanus/mnist1d"
],
"execution_count": null,
"outputs": []
@@ -95,6 +96,12 @@
"id": "I-vm_gh5xTJs"
},
"source": [
"from mnist1d.data import get_dataset, get_dataset_args\n",
"from mnist1d.utils import set_seed, to_pickle, from_pickle\n",
"\n",
"import sys ; sys.path.append('./mnist1d/notebooks')\n",
"from train import get_model_args, train_model\n",
"\n",
"args = mnist1d.get_dataset_args()\n",
"data = mnist1d.get_dataset(args=args) # by default, this will download a pre-made dataset from the GitHub repo\n",
"\n",
@@ -210,7 +217,7 @@
" # we would return [1,1,0,0,1]\n",
" # Remember that these are torch tensors and not numpy arrays\n",
" # Replace this function:\n",
" mask = torch.ones_like(scores)\n",
" mask = torch.ones_like(absolute_weights)\n",
"\n",
"\n",
" return mask"
@@ -237,7 +244,6 @@
"def find_lottery_ticket(model, dataset, args, sparsity_schedule, criteria_fn=None, **kwargs):\n",
"\n",
" criteria_fn = lambda init_params, final_params: final_params.abs()\n",
"\n",
" init_params = model.get_layer_vecs()\n",
" stats = {'train_losses':[], 'test_losses':[], 'train_accs':[], 'test_accs':[]}\n",
" models = []\n",
@@ -253,7 +259,7 @@
" model.set_layer_masks(masks)\n",
"\n",
" # training process\n",
" results = mnist1d.train_model(dataset, model, args)\n",
" results = train_model(dataset, model, args)\n",
" model = results['checkpoints'][-1]\n",
"\n",
" # store stats\n",
@@ -291,7 +297,8 @@
},
"source": [
"# train settings\n",
"model_args = mnist1d.get_model_args()\n",
"from train import get_model_args, train_model\n",
"model_args = get_model_args()\n",
"model_args.total_steps = 1501\n",
"model_args.hidden_size = 500\n",
"model_args.print_every = 5000 # print never\n",

Binary file not shown.

Binary file not shown.