Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e968741846 | ||
|
|
37011065d7 | ||
|
|
afd20d0364 | ||
|
|
0d135f1ee7 | ||
|
|
54a020304e | ||
|
|
ccbbc4126e | ||
|
|
d3273c99e2 | ||
|
|
f9e45c976c | ||
|
|
b005cec9c1 | ||
|
|
b8a91ad34d | ||
|
|
a2a86c27bc |
@@ -99,7 +99,7 @@
|
||||
"# data['x'], data['y'], data['x_test'], and data['y_test']\n",
|
||||
"print(\"Examples in training set: {}\".format(len(data['y'])))\n",
|
||||
"print(\"Examples in test set: {}\".format(len(data['y_test'])))\n",
|
||||
"print(\"Length of each example: {}\".format(data['x'].shape[-1]))"
|
||||
"print(\"Dimensionality of each example: {}\".format(data['x'].shape[-1]))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "PW2gyXL5UkLU"
|
||||
@@ -147,7 +147,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def fit_model(model, data):\n",
|
||||
"def fit_model(model, data, n_epoch):\n",
|
||||
"\n",
|
||||
" # choose cross entropy loss function (equation 5.24)\n",
|
||||
" loss_function = torch.nn.CrossEntropyLoss()\n",
|
||||
@@ -164,9 +164,6 @@
|
||||
" # load the data into a class that creates the batches\n",
|
||||
" data_loader = DataLoader(TensorDataset(x_train,y_train), batch_size=100, shuffle=True, worker_init_fn=np.random.seed(1))\n",
|
||||
"\n",
|
||||
" # loop over the dataset n_epoch times\n",
|
||||
" n_epoch = 1000\n",
|
||||
"\n",
|
||||
" for epoch in range(n_epoch):\n",
|
||||
" # loop over batches\n",
|
||||
" for i, batch in enumerate(data_loader):\n",
|
||||
@@ -203,6 +200,18 @@
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def count_parameters(model):\n",
|
||||
" return sum(p.numel() for p in model.parameters() if p.requires_grad)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "AQNCmFNV6JpV"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
@@ -226,19 +235,27 @@
|
||||
"# This code will take a while (~30 mins on GPU) to run! Go and make a cup of coffee!\n",
|
||||
"\n",
|
||||
"hidden_variables = np.array([2,4,6,8,10,14,18,22,26,30,35,40,45,50,55,60,70,80,90,100,120,140,160,180,200,250,300,400]) ;\n",
|
||||
"\n",
|
||||
"errors_train_all = np.zeros_like(hidden_variables)\n",
|
||||
"errors_test_all = np.zeros_like(hidden_variables)\n",
|
||||
"total_weights_all = np.zeros_like(hidden_variables)\n",
|
||||
"\n",
|
||||
"# loop over the dataset n_epoch times\n",
|
||||
"n_epoch = 1000\n",
|
||||
"\n",
|
||||
"# For each hidden variable size\n",
|
||||
"for c_hidden in range(len(hidden_variables)):\n",
|
||||
" print(f'Training model with {hidden_variables[c_hidden]:3d} hidden variables')\n",
|
||||
" # Get a model\n",
|
||||
" model = get_model(hidden_variables[c_hidden]) ;\n",
|
||||
" # Count and store number of weights\n",
|
||||
" total_weights_all[c_hidden] = count_parameters(model)\n",
|
||||
" # Train the model\n",
|
||||
" errors_train, errors_test = fit_model(model, data)\n",
|
||||
" errors_train, errors_test = fit_model(model, data, n_epoch)\n",
|
||||
" # Store the results\n",
|
||||
" errors_train_all[c_hidden] = errors_train\n",
|
||||
" errors_test_all[c_hidden]= errors_test"
|
||||
" errors_test_all[c_hidden]= errors_test\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "K4OmBZGHWXpk"
|
||||
@@ -249,12 +266,29 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Assuming data['y'] is available and contains the training examples\n",
|
||||
"num_training_examples = len(data['y'])\n",
|
||||
"\n",
|
||||
"# Find the index where total_weights_all is closest to num_training_examples\n",
|
||||
"closest_index = np.argmin(np.abs(np.array(total_weights_all) - num_training_examples))\n",
|
||||
"\n",
|
||||
"# Get the corresponding value of hidden variables\n",
|
||||
"hidden_variable_at_num_training_examples = hidden_variables[closest_index]\n",
|
||||
"\n",
|
||||
"# Plot the results\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.plot(hidden_variables, errors_train_all, 'r-', label='train')\n",
|
||||
"ax.plot(hidden_variables, errors_test_all, 'b-', label='test')\n",
|
||||
"ax.set_ylim(0,100);\n",
|
||||
"ax.set_xlabel('No hidden variables'); ax.set_ylabel('Error')\n",
|
||||
"\n",
|
||||
"# Add a vertical line at the point where total weights equal the number of training examples\n",
|
||||
"ax.axvline(x=hidden_variable_at_num_training_examples, color='g', linestyle='--', label='N(weights) = N(train)')\n",
|
||||
"\n",
|
||||
"ax.set_ylim(0, 100)\n",
|
||||
"ax.set_xlabel('No. hidden variables')\n",
|
||||
"ax.set_ylabel('Error')\n",
|
||||
"ax.legend()\n",
|
||||
"plt.show()\n"
|
||||
],
|
||||
@@ -263,6 +297,24 @@
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"id": "KT4X8_hE5NFb"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"id": "iGKZSfVF2r4z"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -28,7 +28,7 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# **Notebook 12.1: Multihead Self-Attention**\n",
|
||||
"# **Notebook 12.2: Multihead Self-Attention**\n",
|
||||
"\n",
|
||||
"This notebook builds a multihead self-attention mechanism as in figure 12.6\n",
|
||||
"\n",
|
||||
|
||||
@@ -55,7 +55,7 @@
|
||||
"Pr(z) = \\text{Norm}_{z}[0,1]\n",
|
||||
"\\end{equation}\n",
|
||||
"\n",
|
||||
"As in figure 17.2, we'll assume that the output is two dimensional, we we need to define a function that maps from the 1D latent variable to two dimensions. Usually, we would use a neural network, but in this case, we'll just define an arbitrary relationship.\n",
|
||||
"As in figure 17.2, we'll assume that the output is two dimensional, we need to define a function that maps from the 1D latent variable to two dimensions. Usually, we would use a neural network, but in this case, we'll just define an arbitrary relationship.\n",
|
||||
"\n",
|
||||
"\\begin{align}\n",
|
||||
"x_{1} &=& 0.5\\cdot\\exp\\Bigl[\\sin\\bigl[2+ 3.675 z \\bigr]\\Bigr]\\\\\n",
|
||||
|
||||
@@ -44,7 +44,8 @@
|
||||
},
|
||||
"source": [
|
||||
"# Run this if you're in a Colab to install MNIST 1D repository\n",
|
||||
"!pip install git+https://github.com/greydanus/mnist1d"
|
||||
"!pip install git+https://github.com/greydanus/mnist1d\n",
|
||||
"!git clone https://github.com/greydanus/mnist1d"
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
@@ -95,6 +96,12 @@
|
||||
"id": "I-vm_gh5xTJs"
|
||||
},
|
||||
"source": [
|
||||
"from mnist1d.data import get_dataset, get_dataset_args\n",
|
||||
"from mnist1d.utils import set_seed, to_pickle, from_pickle\n",
|
||||
"\n",
|
||||
"import sys ; sys.path.append('./mnist1d/notebooks')\n",
|
||||
"from train import get_model_args, train_model\n",
|
||||
"\n",
|
||||
"args = mnist1d.get_dataset_args()\n",
|
||||
"data = mnist1d.get_dataset(args=args) # by default, this will download a pre-made dataset from the GitHub repo\n",
|
||||
"\n",
|
||||
@@ -210,7 +217,7 @@
|
||||
" # we would return [1,1,0,0,1]\n",
|
||||
" # Remember that these are torch tensors and not numpy arrays\n",
|
||||
" # Replace this function:\n",
|
||||
" mask = torch.ones_like(scores)\n",
|
||||
" mask = torch.ones_like(absolute_weights)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" return mask"
|
||||
@@ -237,7 +244,6 @@
|
||||
"def find_lottery_ticket(model, dataset, args, sparsity_schedule, criteria_fn=None, **kwargs):\n",
|
||||
"\n",
|
||||
" criteria_fn = lambda init_params, final_params: final_params.abs()\n",
|
||||
"\n",
|
||||
" init_params = model.get_layer_vecs()\n",
|
||||
" stats = {'train_losses':[], 'test_losses':[], 'train_accs':[], 'test_accs':[]}\n",
|
||||
" models = []\n",
|
||||
@@ -253,7 +259,7 @@
|
||||
" model.set_layer_masks(masks)\n",
|
||||
"\n",
|
||||
" # training process\n",
|
||||
" results = mnist1d.train_model(dataset, model, args)\n",
|
||||
" results = train_model(dataset, model, args)\n",
|
||||
" model = results['checkpoints'][-1]\n",
|
||||
"\n",
|
||||
" # store stats\n",
|
||||
@@ -291,7 +297,8 @@
|
||||
},
|
||||
"source": [
|
||||
"# train settings\n",
|
||||
"model_args = mnist1d.get_model_args()\n",
|
||||
"from train import get_model_args, train_model\n",
|
||||
"model_args = get_model_args()\n",
|
||||
"model_args.total_steps = 1501\n",
|
||||
"model_args.hidden_size = 500\n",
|
||||
"model_args.print_every = 5000 # print never\n",
|
||||
|
||||
Binary file not shown.
BIN
UDL_Errata.pdf
BIN
UDL_Errata.pdf
Binary file not shown.
Reference in New Issue
Block a user