{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Optimizers\n", "\n", "Pytorch comes with a module of optimizers. We can replace our vanilla gradient descent with many different ones without modifying a lot of code." ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "from matplotlib import pyplot\n", "import torch\n", "torch.set_printoptions(edgeitems=2, linewidth=75)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Taking our input from the previous notebook and applying our scaling" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,\n", " 8.0, 3.0, -4.0, 6.0, 13.0, 21.0])\n", "t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,\n", " 33.9, 21.8, 48.4, 60.4, 68.4])\n", "t_un = 0.1 * t_u" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Same model and loss function as before." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def model(t_u, w, b):\n", " return w * t_u + b" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def loss_fn(t_p, t_c):\n", " squared_diffs = (t_p - t_c)**2\n", " return squared_diffs.mean()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ASGD',\n", " 'Adadelta',\n", " 'Adagrad',\n", " 'Adam',\n", " 'AdamW',\n", " 'Adamax',\n", " 'LBFGS',\n", " 'Optimizer',\n", " 'RMSprop',\n", " 'Rprop',\n", " 'SGD',\n", " 'SparseAdam',\n", " '__builtins__',\n", " '__cached__',\n", " '__doc__',\n", " '__file__',\n", " '__loader__',\n", " '__name__',\n", " '__package__',\n", " '__path__',\n", " '__spec__',\n", " 'lr_scheduler']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch.optim as optim\n", "\n", "dir(optim)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "params = torch.tensor([1.0, 0.0], requires_grad=True)\n", "learning_rate = 1e-5\n", "optimizer = optim.SGD([params], lr=learning_rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The values of our parameters are updated when we call `step`.\n", "\n", "The code below forgets to zero out the gradients!" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_p = model(t_u, *params)\n", "loss = loss_fn(t_p, t_c)\n", "loss.backward()\n", "\n", "optimizer.step()\n", "\n", "params" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can use this snippet in a loop for training" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([1.7761, 0.1064], requires_grad=True)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "params = torch.tensor([1.0, 0.0], requires_grad=True)\n", "learning_rate = 1e-2\n", "optimizer = optim.SGD([params], lr=learning_rate)\n", "\n", "t_p = model(t_un, *params)\n", "loss = loss_fn(t_p, t_c)\n", "\n", "optimizer.zero_grad() # <1>\n", "loss.backward()\n", "optimizer.step()\n", "\n", "params" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def training_loop(n_epochs, optimizer, params, t_u, t_c):\n", " for epoch in range(1, n_epochs + 1):\n", " t_p = model(t_u, *params) \n", " loss = loss_fn(t_p, t_c)\n", " \n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " if epoch % 500 == 0:\n", " print('Epoch %d, Loss %f' % (epoch, float(loss)))\n", " \n", " return params" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 500, Loss 7.860116\n", "Epoch 1000, Loss 3.828538\n", "Epoch 1500, Loss 3.092191\n", "Epoch 2000, Loss 2.957697\n", "Epoch 2500, Loss 2.933134\n", "Epoch 3000, Loss 2.928648\n", "Epoch 3500, Loss 2.927830\n", "Epoch 4000, Loss 2.927679\n", "Epoch 4500, Loss 2.927652\n", "Epoch 5000, Loss 2.927647\n" ] }, { "data": { "text/plain": [ "tensor([ 5.3671, -17.3012], requires_grad=True)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "params = torch.tensor([1.0, 0.0], requires_grad=True)\n", "learning_rate = 1e-2\n", "optimizer = optim.SGD([params], lr=learning_rate) # <1>\n", "\n", "training_loop(\n", " n_epochs = 5000, \n", " optimizer = optimizer,\n", " params = params, # <1> \n", " t_u = t_un,\n", " t_c = t_c)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And we get the same loss" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 500, Loss 7.612903\n", "Epoch 1000, Loss 3.086700\n", "Epoch 1500, Loss 2.928578\n", "Epoch 2000, Loss 2.927646\n" ] }, { "data": { "text/plain": [ "tensor([ 0.5367, -17.3021], requires_grad=True)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "params = torch.tensor([1.0, 0.0], requires_grad=True)\n", "learning_rate = 1e-1\n", "optimizer = optim.Adam([params], lr=learning_rate) # <1>\n", "\n", "training_loop(\n", " n_epochs = 2000, \n", " optimizer = optimizer,\n", " params = params,\n", " t_u = t_u, # <2> \n", " t_c = t_c)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Training and Validation Splits" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([ 5, 9, 1, 6, 7, 10, 3, 8, 0]), tensor([2, 4]))" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n_samples = t_u.shape[0]\n", "n_val = int(0.2 * n_samples)\n", "\n", "shuffled_indices = torch.randperm(n_samples)\n", "\n", "train_indices = shuffled_indices[:-n_val]\n", "val_indices = shuffled_indices[-n_val:]\n", "\n", "train_indices, val_indices # <1>" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "train_t_u = t_u[train_indices]\n", "train_t_c = t_c[train_indices]\n", "\n", "val_t_u = t_u[val_indices]\n", "val_t_c = t_c[val_indices]\n", "\n", "train_t_un = 0.1 * train_t_u\n", "val_t_un = 0.1 * val_t_u" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,\n", " train_t_c, val_t_c, print_periodically=True):\n", " val_loss_each_epoch = []\n", " for epoch in range(1, n_epochs + 1):\n", " train_t_p = model(train_t_u, *params) # <1>\n", " train_loss = loss_fn(train_t_p, train_t_c)\n", " \n", " val_t_p = model(val_t_u, *params) # <1>\n", " val_loss = loss_fn(val_t_p, val_t_c)\n", " \n", " val_loss_each_epoch.append(val_loss.item())\n", " \n", " optimizer.zero_grad()\n", " train_loss.backward() # <2>\n", " optimizer.step()\n", "\n", " if print_periodically and (epoch <= 3 or epoch % 500 == 0):\n", " print(f\"\\tEpoch {epoch}, Training loss {train_loss.item():.4f},\"\n", " f\" Validation loss {val_loss.item():.4f}\")\n", " \n", " return *params, train_loss.item(), val_loss.item(), val_loss_each_epoch" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 43.9632, Validation loss 11.2589\n", "\tEpoch 3, Training loss 36.8792, Validation loss 4.2194\n", "\tEpoch 500, Training loss 7.1544, Validation loss 2.7312\n", "\tEpoch 1000, Training loss 3.5517, Validation loss 2.5743\n", "\tEpoch 1500, Training loss 3.1001, Validation loss 2.5225\n", "\tEpoch 2000, Training loss 3.0435, Validation loss 2.5046\n", "\tEpoch 2500, Training loss 3.0364, Validation loss 2.4983\n", "\tEpoch 3000, Training loss 3.0355, Validation loss 2.4961\n" ] }, { "data": { "text/plain": [ "(tensor([ 5.3719, -17.2278], requires_grad=True),\n", " 3.0354840755462646,\n", " 2.4961061477661133)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "params = torch.tensor([1.0, 0.0], requires_grad=True)\n", "learning_rate = 1e-2\n", "optimizer = optim.SGD([params], lr=learning_rate)\n", "\n", "training_loop(\n", " n_epochs = 3000, \n", " optimizer = optimizer,\n", " params = params,\n", " train_t_u = train_t_un, # <1> \n", " val_t_u = val_t_un, # <1> \n", " train_t_c = train_t_c,\n", " val_t_c = val_t_c)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Searching" ] }, { "cell_type": "code", "execution_count": 130, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ASGD alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.0659, Validation loss 55.9043\n", "\tEpoch 3, Training loss 84.4833, Validation loss 55.2618\n", "\tEpoch 500, Training loss 35.2186, Validation loss 3.2051\n", "ASGD alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.0659, Validation loss 55.9043\n", "\tEpoch 3, Training loss 84.4833, Validation loss 55.2618\n", "\tEpoch 500, Training loss 35.2186, Validation loss 3.2051\n", "\tEpoch 1000, Training loss 34.4265, Validation loss 3.2288\n", "\tEpoch 1500, Training loss 33.7817, Validation loss 3.2290\n", "\tEpoch 2000, Training loss 33.1505, Validation loss 3.2205\n", "\tEpoch 2500, Training loss 32.5322, Validation loss 3.2116\n", "\tEpoch 3000, Training loss 31.9267, Validation loss 3.2028\n", "\tEpoch 3500, Training loss 31.3335, Validation loss 3.1942\n", "\tEpoch 4000, Training loss 30.7526, Validation loss 3.1856\n", "\tEpoch 4500, Training loss 30.1836, Validation loss 3.1772\n", "\tEpoch 5000, Training loss 29.6263, Validation loss 3.1689\n", "ASGD alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.0659, Validation loss 55.9043\n", "\tEpoch 3, Training loss 84.4833, Validation loss 55.2618\n", "\tEpoch 500, Training loss 35.2186, Validation loss 3.2051\n", "\tEpoch 1000, Training loss 34.4265, Validation loss 3.2288\n", "\tEpoch 1500, Training loss 33.7817, Validation loss 3.2290\n", "\tEpoch 2000, Training loss 33.1505, Validation loss 3.2205\n", "\tEpoch 2500, Training loss 32.5322, Validation loss 3.2116\n", "\tEpoch 3000, Training loss 31.9267, Validation loss 3.2028\n", "\tEpoch 3500, Training loss 31.3335, Validation loss 3.1942\n", "\tEpoch 4000, Training loss 30.7526, Validation loss 3.1856\n", "\tEpoch 4500, Training loss 30.1836, Validation loss 3.1772\n", "\tEpoch 5000, Training loss 29.6263, Validation loss 3.1689\n", "ASGD alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 79.9171, Validation loss 50.2310\n", "\tEpoch 3, Training loss 74.8356, Validation loss 44.6451\n", "\tEpoch 500, Training loss 29.6358, Validation loss 3.1691\n", "ASGD alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 79.9171, Validation loss 50.2310\n", "\tEpoch 3, Training loss 74.8356, Validation loss 44.6451\n", "\tEpoch 500, Training loss 29.6358, Validation loss 3.1691\n", "\tEpoch 1000, Training loss 24.6529, Validation loss 3.0919\n", "\tEpoch 1500, Training loss 20.6038, Validation loss 3.0243\n", "\tEpoch 2000, Training loss 17.3134, Validation loss 2.9650\n", "\tEpoch 2500, Training loss 14.6397, Validation loss 2.9130\n", "\tEpoch 3000, Training loss 12.4670, Validation loss 2.8671\n", "\tEpoch 3500, Training loss 10.7012, Validation loss 2.8266\n", "\tEpoch 4000, Training loss 9.2661, Validation loss 2.7909\n", "\tEpoch 4500, Training loss 8.0998, Validation loss 2.7592\n", "\tEpoch 5000, Training loss 7.1519, Validation loss 2.7311\n", "ASGD alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 79.9171, Validation loss 50.2310\n", "\tEpoch 3, Training loss 74.8356, Validation loss 44.6451\n", "\tEpoch 500, Training loss 29.6358, Validation loss 3.1691\n", "\tEpoch 1000, Training loss 24.6529, Validation loss 3.0919\n", "\tEpoch 1500, Training loss 20.6038, Validation loss 3.0243\n", "\tEpoch 2000, Training loss 17.3134, Validation loss 2.9650\n", "\tEpoch 2500, Training loss 14.6397, Validation loss 2.9130\n", "\tEpoch 3000, Training loss 12.4670, Validation loss 2.8671\n", "\tEpoch 3500, Training loss 10.7012, Validation loss 2.8266\n", "\tEpoch 4000, Training loss 9.2661, Validation loss 2.7909\n", "\tEpoch 4500, Training loss 8.0998, Validation loss 2.7592\n", "\tEpoch 5000, Training loss 7.1519, Validation loss 2.7311\n", "ASGD alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 43.9632, Validation loss 11.2589\n", "\tEpoch 3, Training loss 36.8792, Validation loss 4.2195\n", "\tEpoch 500, Training loss 7.1592, Validation loss 2.7314\n", "ASGD alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 43.9632, Validation loss 11.2589\n", "\tEpoch 3, Training loss 36.8792, Validation loss 4.2195\n", "\tEpoch 500, Training loss 7.1592, Validation loss 2.7314\n", "\tEpoch 1000, Training loss 3.5551, Validation loss 2.5745\n", "\tEpoch 1500, Training loss 3.1015, Validation loss 2.5228\n", "\tEpoch 2000, Training loss 3.0440, Validation loss 2.5049\n", "\tEpoch 2500, Training loss 3.0366, Validation loss 2.4986\n", "\tEpoch 3000, Training loss 3.0356, Validation loss 2.4964\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4956\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4953\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4952\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4952\n", "ASGD alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 43.9632, Validation loss 11.2589\n", "\tEpoch 3, Training loss 36.8792, Validation loss 4.2195\n", "\tEpoch 500, Training loss 7.1592, Validation loss 2.7314\n", "\tEpoch 1000, Training loss 3.5551, Validation loss 2.5745\n", "\tEpoch 1500, Training loss 3.1015, Validation loss 2.5228\n", "\tEpoch 2000, Training loss 3.0440, Validation loss 2.5049\n", "\tEpoch 2500, Training loss 3.0366, Validation loss 2.4986\n", "\tEpoch 3000, Training loss 3.0356, Validation loss 2.4964\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4956\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4953\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4952\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4952\n", "Adadelta alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6554, Validation loss 56.5546\n", "\tEpoch 3, Training loss 85.6553, Validation loss 56.5546\n", "\tEpoch 500, Training loss 85.6300, Validation loss 56.5257\n", "Adadelta alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6554, Validation loss 56.5546\n", "\tEpoch 3, Training loss 85.6553, Validation loss 56.5546\n", "\tEpoch 500, Training loss 85.6300, Validation loss 56.5257\n", "\tEpoch 1000, Training loss 85.5900, Validation loss 56.4800\n", "\tEpoch 1500, Training loss 85.5400, Validation loss 56.4229\n", "\tEpoch 2000, Training loss 85.4813, Validation loss 56.3559\n", "\tEpoch 2500, Training loss 85.4156, Validation loss 56.2810\n", "\tEpoch 3000, Training loss 85.3429, Validation loss 56.1979\n", "\tEpoch 3500, Training loss 85.2643, Validation loss 56.1083\n", "\tEpoch 4000, Training loss 85.1803, Validation loss 56.0124\n", "\tEpoch 4500, Training loss 85.0912, Validation loss 55.9107\n", "\tEpoch 5000, Training loss 84.9973, Validation loss 55.8035\n", "Adadelta alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6554, Validation loss 56.5546\n", "\tEpoch 3, Training loss 85.6553, Validation loss 56.5546\n", "\tEpoch 500, Training loss 85.6300, Validation loss 56.5257\n", "\tEpoch 1000, Training loss 85.5900, Validation loss 56.4800\n", "\tEpoch 1500, Training loss 85.5400, Validation loss 56.4229\n", "\tEpoch 2000, Training loss 85.4813, Validation loss 56.3559\n", "\tEpoch 2500, Training loss 85.4156, Validation loss 56.2810\n", "\tEpoch 3000, Training loss 85.3429, Validation loss 56.1979\n", "\tEpoch 3500, Training loss 85.2643, Validation loss 56.1083\n", "\tEpoch 4000, Training loss 85.1803, Validation loss 56.0124\n", "\tEpoch 4500, Training loss 85.0912, Validation loss 55.9107\n", "\tEpoch 5000, Training loss 84.9973, Validation loss 55.8035\n", "Adadelta alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6551, Validation loss 56.5543\n", "\tEpoch 3, Training loss 85.6548, Validation loss 56.5540\n", "\tEpoch 500, Training loss 85.4015, Validation loss 56.2648\n", "Adadelta alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6551, Validation loss 56.5543\n", "\tEpoch 3, Training loss 85.6548, Validation loss 56.5540\n", "\tEpoch 500, Training loss 85.4015, Validation loss 56.2648\n", "\tEpoch 1000, Training loss 85.0055, Validation loss 55.8128\n", "\tEpoch 1500, Training loss 84.5108, Validation loss 55.2482\n", "\tEpoch 2000, Training loss 83.9376, Validation loss 54.5939\n", "\tEpoch 2500, Training loss 83.2986, Validation loss 53.8647\n", "\tEpoch 3000, Training loss 82.6034, Validation loss 53.0712\n", "\tEpoch 3500, Training loss 81.8594, Validation loss 52.2221\n", "\tEpoch 4000, Training loss 81.0730, Validation loss 51.3247\n", "\tEpoch 4500, Training loss 80.2495, Validation loss 50.3849\n", "\tEpoch 5000, Training loss 79.3935, Validation loss 49.4082\n", "Adadelta alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6551, Validation loss 56.5543\n", "\tEpoch 3, Training loss 85.6548, Validation loss 56.5540\n", "\tEpoch 500, Training loss 85.4015, Validation loss 56.2648\n", "\tEpoch 1000, Training loss 85.0055, Validation loss 55.8128\n", "\tEpoch 1500, Training loss 84.5108, Validation loss 55.2482\n", "\tEpoch 2000, Training loss 83.9376, Validation loss 54.5939\n", "\tEpoch 2500, Training loss 83.2986, Validation loss 53.8647\n", "\tEpoch 3000, Training loss 82.6034, Validation loss 53.0712\n", "\tEpoch 3500, Training loss 81.8594, Validation loss 52.2221\n", "\tEpoch 4000, Training loss 81.0730, Validation loss 51.3247\n", "\tEpoch 4500, Training loss 80.2495, Validation loss 50.3849\n", "\tEpoch 5000, Training loss 79.3935, Validation loss 49.4082\n", "Adadelta alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6527, Validation loss 56.5515\n", "\tEpoch 3, Training loss 85.6499, Validation loss 56.5484\n", "\tEpoch 500, Training loss 83.1666, Validation loss 53.7141\n", "Adadelta alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6527, Validation loss 56.5515\n", "\tEpoch 3, Training loss 85.6499, Validation loss 56.5484\n", "\tEpoch 500, Training loss 83.1666, Validation loss 53.7141\n", "\tEpoch 1000, Training loss 79.4724, Validation loss 49.4983\n", "\tEpoch 1500, Training loss 75.1670, Validation loss 44.5863\n", "\tEpoch 2000, Training loss 70.5870, Validation loss 39.3633\n", "\tEpoch 2500, Training loss 65.9726, Validation loss 34.1044\n", "\tEpoch 3000, Training loss 61.5033, Validation loss 29.0160\n", "\tEpoch 3500, Training loss 57.3126, Validation loss 24.2519\n", "\tEpoch 4000, Training loss 53.4949, Validation loss 19.9223\n", "\tEpoch 4500, Training loss 50.1112, Validation loss 16.0998\n", "\tEpoch 5000, Training loss 47.1933, Validation loss 12.8244\n", "Adadelta alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6527, Validation loss 56.5515\n", "\tEpoch 3, Training loss 85.6499, Validation loss 56.5484\n", "\tEpoch 500, Training loss 83.1666, Validation loss 53.7141\n", "\tEpoch 1000, Training loss 79.4724, Validation loss 49.4983\n", "\tEpoch 1500, Training loss 75.1670, Validation loss 44.5863\n", "\tEpoch 2000, Training loss 70.5870, Validation loss 39.3633\n", "\tEpoch 2500, Training loss 65.9726, Validation loss 34.1044\n", "\tEpoch 3000, Training loss 61.5033, Validation loss 29.0160\n", "\tEpoch 3500, Training loss 57.3126, Validation loss 24.2519\n", "\tEpoch 4000, Training loss 53.4949, Validation loss 19.9223\n", "\tEpoch 4500, Training loss 50.1112, Validation loss 16.0998\n", "\tEpoch 5000, Training loss 47.1933, Validation loss 12.8244\n", "Adagrad alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6407, Validation loss 56.5379\n", "\tEpoch 500, Training loss 85.2844, Validation loss 56.1311\n", "Adagrad alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6407, Validation loss 56.5379\n", "\tEpoch 500, Training loss 85.2844, Validation loss 56.1311\n", "\tEpoch 1000, Training loss 85.1259, Validation loss 55.9503\n", "\tEpoch 1500, Training loss 85.0047, Validation loss 55.8119\n", "\tEpoch 2000, Training loss 84.9026, Validation loss 55.6954\n", "\tEpoch 2500, Training loss 84.8128, Validation loss 55.5929\n", "\tEpoch 3000, Training loss 84.7318, Validation loss 55.5004\n", "\tEpoch 3500, Training loss 84.6574, Validation loss 55.4155\n", "\tEpoch 4000, Training loss 84.5881, Validation loss 55.3364\n", "\tEpoch 4500, Training loss 84.5226, Validation loss 55.2617\n", "\tEpoch 5000, Training loss 84.4618, Validation loss 55.1923\n", "Adagrad alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6407, Validation loss 56.5379\n", "\tEpoch 500, Training loss 85.2844, Validation loss 56.1311\n", "\tEpoch 1000, Training loss 85.1259, Validation loss 55.9503\n", "\tEpoch 1500, Training loss 85.0047, Validation loss 55.8119\n", "\tEpoch 2000, Training loss 84.9026, Validation loss 55.6954\n", "\tEpoch 2500, Training loss 84.8128, Validation loss 55.5929\n", "\tEpoch 3000, Training loss 84.7318, Validation loss 55.5004\n", "\tEpoch 3500, Training loss 84.6574, Validation loss 55.4155\n", "\tEpoch 4000, Training loss 84.5881, Validation loss 55.3364\n", "\tEpoch 4500, Training loss 84.5226, Validation loss 55.2617\n", "\tEpoch 5000, Training loss 84.4618, Validation loss 55.1923\n", "Adagrad alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.5086, Validation loss 56.3872\n", "\tEpoch 500, Training loss 82.0349, Validation loss 52.4222\n", "Adagrad alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.5086, Validation loss 56.3872\n", "\tEpoch 500, Training loss 82.0349, Validation loss 52.4222\n", "\tEpoch 1000, Training loss 80.5431, Validation loss 50.7195\n", "\tEpoch 1500, Training loss 79.4224, Validation loss 49.4405\n", "\tEpoch 2000, Training loss 78.4938, Validation loss 48.3806\n", "\tEpoch 2500, Training loss 77.6877, Validation loss 47.4607\n", "\tEpoch 3000, Training loss 76.9687, Validation loss 46.6402\n", "\tEpoch 3500, Training loss 76.3155, Validation loss 45.8948\n", "\tEpoch 4000, Training loss 75.7145, Validation loss 45.2089\n", "\tEpoch 4500, Training loss 75.1561, Validation loss 44.5717\n", "\tEpoch 5000, Training loss 74.6332, Validation loss 43.9751\n", "Adagrad alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.5086, Validation loss 56.3872\n", "\tEpoch 500, Training loss 82.0349, Validation loss 52.4222\n", "\tEpoch 1000, Training loss 80.5431, Validation loss 50.7195\n", "\tEpoch 1500, Training loss 79.4224, Validation loss 49.4405\n", "\tEpoch 2000, Training loss 78.4938, Validation loss 48.3806\n", "\tEpoch 2500, Training loss 77.6877, Validation loss 47.4607\n", "\tEpoch 3000, Training loss 76.9687, Validation loss 46.6402\n", "\tEpoch 3500, Training loss 76.3155, Validation loss 45.8948\n", "\tEpoch 4000, Training loss 75.7145, Validation loss 45.2089\n", "\tEpoch 4500, Training loss 75.1561, Validation loss 44.5717\n", "\tEpoch 5000, Training loss 74.6332, Validation loss 43.9751\n", "Adagrad alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 84.2010, Validation loss 54.8945\n", "\tEpoch 500, Training loss 57.6999, Validation loss 24.6808\n", "Adagrad alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 84.2010, Validation loss 54.8945\n", "\tEpoch 500, Training loss 57.6999, Validation loss 24.6808\n", "\tEpoch 1000, Training loss 50.5468, Validation loss 16.5840\n", "\tEpoch 1500, Training loss 46.5815, Validation loss 12.1548\n", "\tEpoch 2000, Training loss 44.0707, Validation loss 9.4133\n", "\tEpoch 2500, Training loss 42.3649, Validation loss 7.6167\n", "\tEpoch 3000, Training loss 41.1456, Validation loss 6.3996\n", "\tEpoch 3500, Training loss 40.2350, Validation loss 5.5561\n", "\tEpoch 4000, Training loss 39.5263, Validation loss 4.9612\n", "\tEpoch 4500, Training loss 38.9526, Validation loss 4.5352\n", "\tEpoch 5000, Training loss 38.4714, Validation loss 4.2256\n", "Adagrad alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 84.2010, Validation loss 54.8945\n", "\tEpoch 500, Training loss 57.6999, Validation loss 24.6808\n", "\tEpoch 1000, Training loss 50.5468, Validation loss 16.5840\n", "\tEpoch 1500, Training loss 46.5815, Validation loss 12.1548\n", "\tEpoch 2000, Training loss 44.0707, Validation loss 9.4133\n", "\tEpoch 2500, Training loss 42.3649, Validation loss 7.6167\n", "\tEpoch 3000, Training loss 41.1456, Validation loss 6.3996\n", "\tEpoch 3500, Training loss 40.2350, Validation loss 5.5561\n", "\tEpoch 4000, Training loss 39.5263, Validation loss 4.9612\n", "\tEpoch 4500, Training loss 38.9526, Validation loss 4.5352\n", "\tEpoch 5000, Training loss 38.4714, Validation loss 4.2256\n", "Adam alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6382, Validation loss 56.5350\n", "\tEpoch 500, Training loss 81.5026, Validation loss 51.8148\n", "Adam alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6382, Validation loss 56.5350\n", "\tEpoch 500, Training loss 81.5026, Validation loss 51.8148\n", "\tEpoch 1000, Training loss 77.6139, Validation loss 47.3770\n", "\tEpoch 1500, Training loss 73.9759, Validation loss 43.2260\n", "\tEpoch 2000, Training loss 70.5698, Validation loss 39.3404\n", "\tEpoch 2500, Training loss 67.3807, Validation loss 35.7033\n", "\tEpoch 3000, Training loss 64.3965, Validation loss 32.3008\n", "\tEpoch 3500, Training loss 61.6076, Validation loss 29.1220\n", "\tEpoch 4000, Training loss 59.0063, Validation loss 26.1586\n", "\tEpoch 4500, Training loss 56.5864, Validation loss 23.4038\n", "\tEpoch 5000, Training loss 54.3430, Validation loss 20.8522\n", "Adam alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6382, Validation loss 56.5350\n", "\tEpoch 500, Training loss 81.5026, Validation loss 51.8148\n", "\tEpoch 1000, Training loss 77.6139, Validation loss 47.3770\n", "\tEpoch 1500, Training loss 73.9759, Validation loss 43.2260\n", "\tEpoch 2000, Training loss 70.5698, Validation loss 39.3404\n", "\tEpoch 2500, Training loss 67.3807, Validation loss 35.7033\n", "\tEpoch 3000, Training loss 64.3965, Validation loss 32.3008\n", "\tEpoch 3500, Training loss 61.6076, Validation loss 29.1220\n", "\tEpoch 4000, Training loss 59.0063, Validation loss 26.1586\n", "\tEpoch 4500, Training loss 56.5864, Validation loss 23.4038\n", "\tEpoch 5000, Training loss 54.3430, Validation loss 20.8522\n", "Adam alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4835, Validation loss 56.3584\n", "\tEpoch 500, Training loss 55.2777, Validation loss 21.9455\n", "Adam alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4835, Validation loss 56.3584\n", "\tEpoch 500, Training loss 55.2777, Validation loss 21.9455\n", "\tEpoch 1000, Training loss 42.8083, Validation loss 8.1165\n", "\tEpoch 1500, Training loss 38.4026, Validation loss 4.1677\n", "\tEpoch 2000, Training loss 36.1285, Validation loss 3.3609\n", "\tEpoch 2500, Training loss 34.1304, Validation loss 3.1924\n", "\tEpoch 3000, Training loss 32.2028, Validation loss 3.1374\n", "\tEpoch 3500, Training loss 30.3603, Validation loss 3.1119\n", "\tEpoch 4000, Training loss 28.6105, Validation loss 3.0970\n", "\tEpoch 4500, Training loss 26.9507, Validation loss 3.0854\n", "\tEpoch 5000, Training loss 25.3745, Validation loss 3.0739\n", "Adam alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4835, Validation loss 56.3584\n", "\tEpoch 500, Training loss 55.2777, Validation loss 21.9455\n", "\tEpoch 1000, Training loss 42.8083, Validation loss 8.1165\n", "\tEpoch 1500, Training loss 38.4026, Validation loss 4.1677\n", "\tEpoch 2000, Training loss 36.1285, Validation loss 3.3609\n", "\tEpoch 2500, Training loss 34.1304, Validation loss 3.1924\n", "\tEpoch 3000, Training loss 32.2028, Validation loss 3.1374\n", "\tEpoch 3500, Training loss 30.3603, Validation loss 3.1119\n", "\tEpoch 4000, Training loss 28.6105, Validation loss 3.0970\n", "\tEpoch 4500, Training loss 26.9507, Validation loss 3.0854\n", "\tEpoch 5000, Training loss 25.3745, Validation loss 3.0739\n", "Adam alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.9507, Validation loss 54.6088\n", "\tEpoch 500, Training loss 27.8058, Validation loss 3.0615\n", "Adam alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.9507, Validation loss 54.6088\n", "\tEpoch 500, Training loss 27.8058, Validation loss 3.0615\n", "\tEpoch 1000, Training loss 16.2508, Validation loss 2.8700\n", "\tEpoch 1500, Training loss 9.1708, Validation loss 2.7344\n", "\tEpoch 2000, Training loss 5.4193, Validation loss 2.6365\n", "\tEpoch 2500, Training loss 3.7619, Validation loss 2.5698\n", "\tEpoch 3000, Training loss 3.1943, Validation loss 2.5288\n", "\tEpoch 3500, Training loss 3.0576, Validation loss 2.5073\n", "\tEpoch 4000, Training loss 3.0371, Validation loss 2.4983\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4956\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4950\n", "Adam alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.9507, Validation loss 54.6088\n", "\tEpoch 500, Training loss 27.8058, Validation loss 3.0615\n", "\tEpoch 1000, Training loss 16.2508, Validation loss 2.8700\n", "\tEpoch 1500, Training loss 9.1708, Validation loss 2.7344\n", "\tEpoch 2000, Training loss 5.4193, Validation loss 2.6365\n", "\tEpoch 2500, Training loss 3.7619, Validation loss 2.5698\n", "\tEpoch 3000, Training loss 3.1943, Validation loss 2.5288\n", "\tEpoch 3500, Training loss 3.0576, Validation loss 2.5073\n", "\tEpoch 4000, Training loss 3.0371, Validation loss 2.4983\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4956\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4950\n", "AdamW alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6469, Validation loss 56.5449\n", "\tEpoch 3, Training loss 85.6383, Validation loss 56.5352\n", "\tEpoch 500, Training loss 81.5413, Validation loss 51.8572\n", "AdamW alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6469, Validation loss 56.5449\n", "\tEpoch 3, Training loss 85.6383, Validation loss 56.5352\n", "\tEpoch 500, Training loss 81.5413, Validation loss 51.8572\n", "\tEpoch 1000, Training loss 77.6875, Validation loss 47.4576\n", "\tEpoch 1500, Training loss 74.0827, Validation loss 43.3427\n", "\tEpoch 2000, Training loss 70.7076, Validation loss 39.4906\n", "\tEpoch 2500, Training loss 67.5455, Validation loss 35.8826\n", "\tEpoch 3000, Training loss 64.5878, Validation loss 32.5084\n", "\tEpoch 3500, Training loss 61.8212, Validation loss 29.3532\n", "\tEpoch 4000, Training loss 59.2393, Validation loss 26.4101\n", "\tEpoch 4500, Training loss 56.8366, Validation loss 23.6728\n", "\tEpoch 5000, Training loss 54.6060, Validation loss 21.1337\n", "AdamW alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6469, Validation loss 56.5449\n", "\tEpoch 3, Training loss 85.6383, Validation loss 56.5352\n", "\tEpoch 500, Training loss 81.5413, Validation loss 51.8572\n", "\tEpoch 1000, Training loss 77.6875, Validation loss 47.4576\n", "\tEpoch 1500, Training loss 74.0827, Validation loss 43.3427\n", "\tEpoch 2000, Training loss 70.7076, Validation loss 39.4906\n", "\tEpoch 2500, Training loss 67.5455, Validation loss 35.8826\n", "\tEpoch 3000, Training loss 64.5878, Validation loss 32.5084\n", "\tEpoch 3500, Training loss 61.8212, Validation loss 29.3532\n", "\tEpoch 4000, Training loss 59.2393, Validation loss 26.4101\n", "\tEpoch 4500, Training loss 56.8366, Validation loss 23.6728\n", "\tEpoch 5000, Training loss 54.6060, Validation loss 21.1337\n", "AdamW alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5701, Validation loss 56.4573\n", "\tEpoch 3, Training loss 85.4850, Validation loss 56.3601\n", "\tEpoch 500, Training loss 55.5340, Validation loss 22.2194\n", "AdamW alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5701, Validation loss 56.4573\n", "\tEpoch 3, Training loss 85.4850, Validation loss 56.3601\n", "\tEpoch 500, Training loss 55.5340, Validation loss 22.2194\n", "\tEpoch 1000, Training loss 43.0774, Validation loss 8.3753\n", "\tEpoch 1500, Training loss 38.6096, Validation loss 4.3015\n", "\tEpoch 2000, Training loss 36.3186, Validation loss 3.4165\n", "\tEpoch 2500, Training loss 34.3219, Validation loss 3.2153\n", "\tEpoch 3000, Training loss 32.3932, Validation loss 3.1444\n", "\tEpoch 3500, Training loss 30.5497, Validation loss 3.1106\n", "\tEpoch 4000, Training loss 28.8042, Validation loss 3.0922\n", "\tEpoch 4500, Training loss 27.1559, Validation loss 3.0798\n", "\tEpoch 5000, Training loss 25.5986, Validation loss 3.0690\n", "AdamW alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5701, Validation loss 56.4573\n", "\tEpoch 3, Training loss 85.4850, Validation loss 56.3601\n", "\tEpoch 500, Training loss 55.5340, Validation loss 22.2194\n", "\tEpoch 1000, Training loss 43.0774, Validation loss 8.3753\n", "\tEpoch 1500, Training loss 38.6096, Validation loss 4.3015\n", "\tEpoch 2000, Training loss 36.3186, Validation loss 3.4165\n", "\tEpoch 2500, Training loss 34.3219, Validation loss 3.2153\n", "\tEpoch 3000, Training loss 32.3932, Validation loss 3.1444\n", "\tEpoch 3500, Training loss 30.5497, Validation loss 3.1106\n", "\tEpoch 4000, Training loss 28.8042, Validation loss 3.0922\n", "\tEpoch 4500, Training loss 27.1559, Validation loss 3.0798\n", "\tEpoch 5000, Training loss 25.5986, Validation loss 3.0690\n", "AdamW alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.8065, Validation loss 55.5854\n", "\tEpoch 3, Training loss 83.9657, Validation loss 54.6253\n", "\tEpoch 500, Training loss 28.1584, Validation loss 3.0684\n", "AdamW alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.8065, Validation loss 55.5854\n", "\tEpoch 3, Training loss 83.9657, Validation loss 54.6253\n", "\tEpoch 500, Training loss 28.1584, Validation loss 3.0684\n", "\tEpoch 1000, Training loss 16.9523, Validation loss 2.8717\n", "\tEpoch 1500, Training loss 10.1140, Validation loss 2.7438\n", "\tEpoch 2000, Training loss 6.3451, Validation loss 2.6549\n", "\tEpoch 2500, Training loss 4.4604, Validation loss 2.5943\n", "\tEpoch 3000, Training loss 3.6122, Validation loss 2.5549\n", "\tEpoch 3500, Training loss 3.2670, Validation loss 2.5310\n", "\tEpoch 4000, Training loss 3.1350, Validation loss 2.5175\n", "\tEpoch 4500, Training loss 3.0837, Validation loss 2.5101\n", "\tEpoch 5000, Training loss 3.0614, Validation loss 2.5059\n", "AdamW alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.8065, Validation loss 55.5854\n", "\tEpoch 3, Training loss 83.9657, Validation loss 54.6253\n", "\tEpoch 500, Training loss 28.1584, Validation loss 3.0684\n", "\tEpoch 1000, Training loss 16.9523, Validation loss 2.8717\n", "\tEpoch 1500, Training loss 10.1140, Validation loss 2.7438\n", "\tEpoch 2000, Training loss 6.3451, Validation loss 2.6549\n", "\tEpoch 2500, Training loss 4.4604, Validation loss 2.5943\n", "\tEpoch 3000, Training loss 3.6122, Validation loss 2.5549\n", "\tEpoch 3500, Training loss 3.2670, Validation loss 2.5310\n", "\tEpoch 4000, Training loss 3.1350, Validation loss 2.5175\n", "\tEpoch 4500, Training loss 3.0837, Validation loss 2.5101\n", "\tEpoch 5000, Training loss 3.0614, Validation loss 2.5059\n", "Adamax alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6382, Validation loss 56.5350\n", "\tEpoch 500, Training loss 81.4559, Validation loss 51.7608\n", "Adamax alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6382, Validation loss 56.5350\n", "\tEpoch 500, Training loss 81.4559, Validation loss 51.7608\n", "\tEpoch 1000, Training loss 77.4467, Validation loss 47.1834\n", "\tEpoch 1500, Training loss 73.6369, Validation loss 42.8327\n", "\tEpoch 2000, Training loss 70.0265, Validation loss 38.7087\n", "\tEpoch 2500, Training loss 66.6155, Validation loss 34.8115\n", "\tEpoch 3000, Training loss 63.4039, Validation loss 31.1410\n", "\tEpoch 3500, Training loss 60.3916, Validation loss 27.6971\n", "\tEpoch 4000, Training loss 57.5788, Validation loss 24.4801\n", "\tEpoch 4500, Training loss 54.9653, Validation loss 21.4897\n", "\tEpoch 5000, Training loss 52.5513, Validation loss 18.7261\n", "Adamax alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6382, Validation loss 56.5350\n", "\tEpoch 500, Training loss 81.4559, Validation loss 51.7608\n", "\tEpoch 1000, Training loss 77.4467, Validation loss 47.1834\n", "\tEpoch 1500, Training loss 73.6369, Validation loss 42.8327\n", "\tEpoch 2000, Training loss 70.0265, Validation loss 38.7087\n", "\tEpoch 2500, Training loss 66.6155, Validation loss 34.8115\n", "\tEpoch 3000, Training loss 63.4039, Validation loss 31.1410\n", "\tEpoch 3500, Training loss 60.3916, Validation loss 27.6971\n", "\tEpoch 4000, Training loss 57.5788, Validation loss 24.4801\n", "\tEpoch 4500, Training loss 54.9653, Validation loss 21.4897\n", "\tEpoch 5000, Training loss 52.5513, Validation loss 18.7261\n", "Adamax alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4834, Validation loss 56.3584\n", "\tEpoch 500, Training loss 53.0044, Validation loss 19.4455\n", "Adamax alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4834, Validation loss 56.3584\n", "\tEpoch 500, Training loss 53.0044, Validation loss 19.4455\n", "\tEpoch 1000, Training loss 40.1065, Validation loss 5.5895\n", "\tEpoch 1500, Training loss 36.3288, Validation loss 3.3336\n", "\tEpoch 2000, Training loss 34.1454, Validation loss 3.1726\n", "\tEpoch 2500, Training loss 32.3056, Validation loss 3.1781\n", "\tEpoch 3000, Training loss 30.5537, Validation loss 3.1759\n", "\tEpoch 3500, Training loss 28.8615, Validation loss 3.1658\n", "\tEpoch 4000, Training loss 27.2246, Validation loss 3.1503\n", "\tEpoch 4500, Training loss 25.6417, Validation loss 3.1314\n", "\tEpoch 5000, Training loss 24.1124, Validation loss 3.1102\n", "Adamax alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4834, Validation loss 56.3584\n", "\tEpoch 500, Training loss 53.0044, Validation loss 19.4455\n", "\tEpoch 1000, Training loss 40.1065, Validation loss 5.5895\n", "\tEpoch 1500, Training loss 36.3288, Validation loss 3.3336\n", "\tEpoch 2000, Training loss 34.1454, Validation loss 3.1726\n", "\tEpoch 2500, Training loss 32.3056, Validation loss 3.1781\n", "\tEpoch 3000, Training loss 30.5537, Validation loss 3.1759\n", "\tEpoch 3500, Training loss 28.8615, Validation loss 3.1658\n", "\tEpoch 4000, Training loss 27.2246, Validation loss 3.1503\n", "\tEpoch 4500, Training loss 25.6417, Validation loss 3.1314\n", "\tEpoch 5000, Training loss 24.1124, Validation loss 3.1102\n", "Adamax alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.9537, Validation loss 54.6123\n", "\tEpoch 500, Training loss 32.9020, Validation loss 3.1556\n", "Adamax alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.9537, Validation loss 54.6123\n", "\tEpoch 500, Training loss 32.9020, Validation loss 3.1556\n", "\tEpoch 1000, Training loss 23.5443, Validation loss 2.9822\n", "\tEpoch 1500, Training loss 14.0532, Validation loss 2.7960\n", "\tEpoch 2000, Training loss 6.9738, Validation loss 2.6378\n", "\tEpoch 2500, Training loss 3.7493, Validation loss 2.5423\n", "\tEpoch 3000, Training loss 3.0767, Validation loss 2.5045\n", "\tEpoch 3500, Training loss 3.0357, Validation loss 2.4957\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "Adamax alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.9537, Validation loss 54.6123\n", "\tEpoch 500, Training loss 32.9020, Validation loss 3.1556\n", "\tEpoch 1000, Training loss 23.5443, Validation loss 2.9822\n", "\tEpoch 1500, Training loss 14.0532, Validation loss 2.7960\n", "\tEpoch 2000, Training loss 6.9738, Validation loss 2.6378\n", "\tEpoch 2500, Training loss 3.7493, Validation loss 2.5423\n", "\tEpoch 3000, Training loss 3.0767, Validation loss 2.5045\n", "\tEpoch 3500, Training loss 3.0357, Validation loss 2.4957\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "RMSprop alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.5085, Validation loss 56.3870\n", "\tEpoch 500, Training loss 80.4863, Validation loss 50.6544\n", "RMSprop alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.5085, Validation loss 56.3870\n", "\tEpoch 500, Training loss 80.4863, Validation loss 50.6544\n", "\tEpoch 1000, Training loss 76.5649, Validation loss 46.1777\n", "\tEpoch 1500, Training loss 72.8422, Validation loss 41.9271\n", "\tEpoch 2000, Training loss 69.3178, Validation loss 37.9020\n", "\tEpoch 2500, Training loss 65.9883, Validation loss 34.0989\n", "\tEpoch 3000, Training loss 62.8568, Validation loss 30.5211\n", "\tEpoch 3500, Training loss 59.9206, Validation loss 27.1655\n", "\tEpoch 4000, Training loss 57.1805, Validation loss 24.0335\n", "\tEpoch 4500, Training loss 54.6366, Validation loss 21.1248\n", "\tEpoch 5000, Training loss 52.2880, Validation loss 18.4388\n", "RMSprop alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.5085, Validation loss 56.3870\n", "\tEpoch 500, Training loss 80.4863, Validation loss 50.6544\n", "\tEpoch 1000, Training loss 76.5649, Validation loss 46.1777\n", "\tEpoch 1500, Training loss 72.8422, Validation loss 41.9271\n", "\tEpoch 2000, Training loss 69.3178, Validation loss 37.9020\n", "\tEpoch 2500, Training loss 65.9883, Validation loss 34.0989\n", "\tEpoch 3000, Training loss 62.8568, Validation loss 30.5211\n", "\tEpoch 3500, Training loss 59.9206, Validation loss 27.1655\n", "\tEpoch 4000, Training loss 57.1805, Validation loss 24.0335\n", "\tEpoch 4500, Training loss 54.6366, Validation loss 21.1248\n", "\tEpoch 5000, Training loss 52.2880, Validation loss 18.4388\n", "RMSprop alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 84.1994, Validation loss 54.8928\n", "\tEpoch 500, Training loss 49.4834, Validation loss 15.3363\n", "RMSprop alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 84.1994, Validation loss 54.8928\n", "\tEpoch 500, Training loss 49.4834, Validation loss 15.3363\n", "\tEpoch 1000, Training loss 38.8005, Validation loss 4.5215\n", "\tEpoch 1500, Training loss 34.7420, Validation loss 3.1943\n", "\tEpoch 2000, Training loss 32.8426, Validation loss 3.2466\n", "\tEpoch 2500, Training loss 31.0912, Validation loss 3.2254\n", "\tEpoch 3000, Training loss 29.3959, Validation loss 3.2013\n", "\tEpoch 3500, Training loss 27.7538, Validation loss 3.1766\n", "\tEpoch 4000, Training loss 26.1648, Validation loss 3.1515\n", "\tEpoch 4500, Training loss 24.6290, Validation loss 3.1267\n", "\tEpoch 5000, Training loss 23.1463, Validation loss 3.1020\n", "RMSprop alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 84.1994, Validation loss 54.8928\n", "\tEpoch 500, Training loss 49.4834, Validation loss 15.3363\n", "\tEpoch 1000, Training loss 38.8005, Validation loss 4.5215\n", "\tEpoch 1500, Training loss 34.7420, Validation loss 3.1943\n", "\tEpoch 2000, Training loss 32.8426, Validation loss 3.2466\n", "\tEpoch 2500, Training loss 31.0912, Validation loss 3.2254\n", "\tEpoch 3000, Training loss 29.3959, Validation loss 3.2013\n", "\tEpoch 3500, Training loss 27.7538, Validation loss 3.1766\n", "\tEpoch 4000, Training loss 26.1648, Validation loss 3.1515\n", "\tEpoch 4500, Training loss 24.6290, Validation loss 3.1267\n", "\tEpoch 5000, Training loss 23.1463, Validation loss 3.1020\n", "RMSprop alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 77.4490, Validation loss 47.1859\n", "\tEpoch 3, Training loss 72.3669, Validation loss 41.3872\n", "\tEpoch 500, Training loss 21.2608, Validation loss 3.0369\n", "RMSprop alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 77.4490, Validation loss 47.1859\n", "\tEpoch 3, Training loss 72.3669, Validation loss 41.3872\n", "\tEpoch 500, Training loss 21.2608, Validation loss 3.0369\n", "\tEpoch 1000, Training loss 10.6040, Validation loss 2.8581\n", "\tEpoch 1500, Training loss 4.8201, Validation loss 2.6892\n", "\tEpoch 2000, Training loss 3.0810, Validation loss 2.5482\n", "\tEpoch 2500, Training loss 3.0363, Validation loss 2.5299\n", "\tEpoch 3000, Training loss 3.0364, Validation loss 2.5300\n", "\tEpoch 3500, Training loss 3.0364, Validation loss 2.5300\n", "\tEpoch 4000, Training loss 3.0364, Validation loss 2.5300\n", "\tEpoch 4500, Training loss 3.0364, Validation loss 2.5300\n", "\tEpoch 5000, Training loss 3.0364, Validation loss 2.5300\n", "RMSprop alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 77.4490, Validation loss 47.1859\n", "\tEpoch 3, Training loss 72.3669, Validation loss 41.3872\n", "\tEpoch 500, Training loss 21.2608, Validation loss 3.0369\n", "\tEpoch 1000, Training loss 10.6040, Validation loss 2.8581\n", "\tEpoch 1500, Training loss 4.8201, Validation loss 2.6892\n", "\tEpoch 2000, Training loss 3.0810, Validation loss 2.5482\n", "\tEpoch 2500, Training loss 3.0363, Validation loss 2.5299\n", "\tEpoch 3000, Training loss 3.0364, Validation loss 2.5300\n", "\tEpoch 3500, Training loss 3.0364, Validation loss 2.5300\n", "\tEpoch 4000, Training loss 3.0364, Validation loss 2.5300\n", "\tEpoch 4500, Training loss 3.0364, Validation loss 2.5300\n", "\tEpoch 5000, Training loss 3.0364, Validation loss 2.5300\n", "Rprop alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6364, Validation loss 56.5330\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "Rprop alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6364, Validation loss 56.5330\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "Rprop alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.6468, Validation loss 56.5448\n", "\tEpoch 3, Training loss 85.6364, Validation loss 56.5330\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "Rprop alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4663, Validation loss 56.3388\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "Rprop alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4663, Validation loss 56.3388\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "Rprop alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.5694, Validation loss 56.4565\n", "\tEpoch 3, Training loss 85.4663, Validation loss 56.3388\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "Rprop alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.7817, Validation loss 54.4159\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "Rprop alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.7817, Validation loss 54.4159\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "Rprop alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 84.7989, Validation loss 55.5771\n", "\tEpoch 3, Training loss 83.7817, Validation loss 54.4159\n", "\tEpoch 500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 1500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 2500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "SGD alpha 0.0001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.0659, Validation loss 55.9043\n", "\tEpoch 3, Training loss 84.4833, Validation loss 55.2618\n", "\tEpoch 500, Training loss 35.2186, Validation loss 3.2051\n", "SGD alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.0659, Validation loss 55.9043\n", "\tEpoch 3, Training loss 84.4833, Validation loss 55.2618\n", "\tEpoch 500, Training loss 35.2186, Validation loss 3.2051\n", "\tEpoch 1000, Training loss 34.4265, Validation loss 3.2288\n", "\tEpoch 1500, Training loss 33.7817, Validation loss 3.2290\n", "\tEpoch 2000, Training loss 33.1505, Validation loss 3.2205\n", "\tEpoch 2500, Training loss 32.5322, Validation loss 3.2116\n", "\tEpoch 3000, Training loss 31.9266, Validation loss 3.2028\n", "\tEpoch 3500, Training loss 31.3335, Validation loss 3.1942\n", "\tEpoch 4000, Training loss 30.7525, Validation loss 3.1856\n", "\tEpoch 4500, Training loss 30.1835, Validation loss 3.1772\n", "\tEpoch 5000, Training loss 29.6262, Validation loss 3.1689\n", "SGD alpha 0.0001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 85.0659, Validation loss 55.9043\n", "\tEpoch 3, Training loss 84.4833, Validation loss 55.2618\n", "\tEpoch 500, Training loss 35.2186, Validation loss 3.2051\n", "\tEpoch 1000, Training loss 34.4265, Validation loss 3.2288\n", "\tEpoch 1500, Training loss 33.7817, Validation loss 3.2290\n", "\tEpoch 2000, Training loss 33.1505, Validation loss 3.2205\n", "\tEpoch 2500, Training loss 32.5322, Validation loss 3.2116\n", "\tEpoch 3000, Training loss 31.9266, Validation loss 3.2028\n", "\tEpoch 3500, Training loss 31.3335, Validation loss 3.1942\n", "\tEpoch 4000, Training loss 30.7525, Validation loss 3.1856\n", "\tEpoch 4500, Training loss 30.1835, Validation loss 3.1772\n", "\tEpoch 5000, Training loss 29.6262, Validation loss 3.1689\n", "SGD alpha 0.001 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 79.9171, Validation loss 50.2310\n", "\tEpoch 3, Training loss 74.8356, Validation loss 44.6451\n", "\tEpoch 500, Training loss 29.6356, Validation loss 3.1691\n", "SGD alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 79.9171, Validation loss 50.2310\n", "\tEpoch 3, Training loss 74.8356, Validation loss 44.6451\n", "\tEpoch 500, Training loss 29.6356, Validation loss 3.1691\n", "\tEpoch 1000, Training loss 24.6520, Validation loss 3.0919\n", "\tEpoch 1500, Training loss 20.6021, Validation loss 3.0243\n", "\tEpoch 2000, Training loss 17.3109, Validation loss 2.9650\n", "\tEpoch 2500, Training loss 14.6364, Validation loss 2.9129\n", "\tEpoch 3000, Training loss 12.4629, Validation loss 2.8670\n", "\tEpoch 3500, Training loss 10.6966, Validation loss 2.8266\n", "\tEpoch 4000, Training loss 9.2613, Validation loss 2.7908\n", "\tEpoch 4500, Training loss 8.0948, Validation loss 2.7591\n", "\tEpoch 5000, Training loss 7.1469, Validation loss 2.7310\n", "SGD alpha 0.001 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 79.9171, Validation loss 50.2310\n", "\tEpoch 3, Training loss 74.8356, Validation loss 44.6451\n", "\tEpoch 500, Training loss 29.6356, Validation loss 3.1691\n", "\tEpoch 1000, Training loss 24.6520, Validation loss 3.0919\n", "\tEpoch 1500, Training loss 20.6021, Validation loss 3.0243\n", "\tEpoch 2000, Training loss 17.3109, Validation loss 2.9650\n", "\tEpoch 2500, Training loss 14.6364, Validation loss 2.9129\n", "\tEpoch 3000, Training loss 12.4629, Validation loss 2.8670\n", "\tEpoch 3500, Training loss 10.6966, Validation loss 2.8266\n", "\tEpoch 4000, Training loss 9.2613, Validation loss 2.7908\n", "\tEpoch 4500, Training loss 8.0948, Validation loss 2.7591\n", "\tEpoch 5000, Training loss 7.1469, Validation loss 2.7310\n", "SGD alpha 0.01 epochs 500\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 43.9632, Validation loss 11.2589\n", "\tEpoch 3, Training loss 36.8792, Validation loss 4.2194\n", "\tEpoch 500, Training loss 7.1544, Validation loss 2.7312\n", "SGD alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 43.9632, Validation loss 11.2589\n", "\tEpoch 3, Training loss 36.8792, Validation loss 4.2194\n", "\tEpoch 500, Training loss 7.1544, Validation loss 2.7312\n", "\tEpoch 1000, Training loss 3.5517, Validation loss 2.5743\n", "\tEpoch 1500, Training loss 3.1001, Validation loss 2.5225\n", "\tEpoch 2000, Training loss 3.0435, Validation loss 2.5046\n", "\tEpoch 2500, Training loss 3.0364, Validation loss 2.4983\n", "\tEpoch 3000, Training loss 3.0355, Validation loss 2.4961\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4953\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4950\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n", "SGD alpha 0.01 epochs 5000\n", "\tEpoch 1, Training loss 85.6554, Validation loss 56.5547\n", "\tEpoch 2, Training loss 43.9632, Validation loss 11.2589\n", "\tEpoch 3, Training loss 36.8792, Validation loss 4.2194\n", "\tEpoch 500, Training loss 7.1544, Validation loss 2.7312\n", "\tEpoch 1000, Training loss 3.5517, Validation loss 2.5743\n", "\tEpoch 1500, Training loss 3.1001, Validation loss 2.5225\n", "\tEpoch 2000, Training loss 3.0435, Validation loss 2.5046\n", "\tEpoch 2500, Training loss 3.0364, Validation loss 2.4983\n", "\tEpoch 3000, Training loss 3.0355, Validation loss 2.4961\n", "\tEpoch 3500, Training loss 3.0354, Validation loss 2.4953\n", "\tEpoch 4000, Training loss 3.0354, Validation loss 2.4950\n", "\tEpoch 4500, Training loss 3.0354, Validation loss 2.4949\n", "\tEpoch 5000, Training loss 3.0354, Validation loss 2.4949\n" ] } ], "source": [ "results = []\n", "val_loss_over_time_by_name = {} # list of dictionaries to track validation loss for each \n", "optimizer_names = [\n", " 'ASGD',\n", " 'Adadelta',\n", " 'Adagrad',\n", " 'Adam',\n", " 'AdamW',\n", " 'Adamax',\n", " 'RMSprop',\n", " 'Rprop',\n", " 'SGD'\n", "]\n", "learning_rates = [1e-4, 1e-3, 1e-2]\n", "epochs = [500, 5000, 5000]\n", "\n", "for optimizer_name in optimizer_names:\n", " for learning_rate in learning_rates:\n", " for number_of_epochs in epochs:\n", " name = f\"{optimizer_name} alpha {learning_rate} epochs {number_of_epochs}\"\n", " print(name)\n", "\n", " params = torch.tensor([1.0, 0.0], requires_grad=True)\n", " optimizer = getattr(optim, optimizer_name)([params], lr=learning_rate)\n", "\n", " learned_params = training_loop(\n", " n_epochs = number_of_epochs, \n", " optimizer = optimizer,\n", " params = params,\n", " train_t_u = train_t_un, # <1> \n", " val_t_u = val_t_un, # <1> \n", " train_t_c = train_t_c,\n", " val_t_c = val_t_c,\n", " print_periodically=True\n", " )\n", " beta_1, beta_0, train_loss, val_loss, val_loss_over_time = learned_params\n", " # print(f\"\\tbeta_1 (weight multipled by measurement in unknown units) {beta_1}\")\n", " # print(f\"\\tbeta_0 (y intercept) {beta_0}\")\n", " # print(f\"\\ttrain_loss {train_loss}\")\n", " # print(f\"\\tval_loss {val_loss}\")\n", "\n", " results.append(\n", " {\n", " \"optimizer_name\": optimizer_name,\n", " \"learning_rate\": learning_rate,\n", " \"number_of_epochs\": number_of_epochs,\n", " \"name\": name, \n", " \"w\": beta_1.item(),\n", " \"b\": beta_0.item(),\n", " \"train_loss\": train_loss,\n", " \"val_loss\": val_loss\n", " }\n", " )\n", " \n", " val_loss_over_time_df = pd.DataFrame(val_loss_over_time).reset_index()\n", " val_loss_over_time_df.columns = [\"epoch\", \"val_loss\"]\n", " \n", " val_loss_over_time_by_name[name] = val_loss_over_time_df" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | optimizer_name | \n", "learning_rate | \n", "number_of_epochs | \n", "name | \n", "w | \n", "b | \n", "train_loss | \n", "val_loss | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "ASGD | \n", "0.0001 | \n", "500 | \n", "ASGD alpha 0.0001 epochs 500 | \n", "2.245429 | \n", "0.034534 | \n", "35.218567 | \n", "3.205099 | \n", "
1 | \n", "ASGD | \n", "0.0001 | \n", "5000 | \n", "ASGD alpha 0.0001 epochs 5000 | \n", "2.584742 | \n", "-1.496300 | \n", "29.626274 | \n", "3.168919 | \n", "
2 | \n", "ASGD | \n", "0.0001 | \n", "5000 | \n", "ASGD alpha 0.0001 epochs 5000 | \n", "2.584742 | \n", "-1.496300 | \n", "29.626274 | \n", "3.168919 | \n", "
3 | \n", "ASGD | \n", "0.0010 | \n", "500 | \n", "ASGD alpha 0.001 epochs 500 | \n", "2.584764 | \n", "-1.496413 | \n", "29.635824 | \n", "3.169067 | \n", "
4 | \n", "ASGD | \n", "0.0010 | \n", "5000 | \n", "ASGD alpha 0.001 epochs 5000 | \n", "4.279163 | \n", "-11.060122 | \n", "7.151948 | \n", "2.731149 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
76 | \n", "SGD | \n", "0.0010 | \n", "5000 | \n", "SGD alpha 0.001 epochs 5000 | \n", "4.279835 | \n", "-11.063889 | \n", "7.146945 | \n", "2.731016 | \n", "
77 | \n", "SGD | \n", "0.0010 | \n", "5000 | \n", "SGD alpha 0.001 epochs 5000 | \n", "4.279835 | \n", "-11.063889 | \n", "7.146945 | \n", "2.731016 | \n", "
78 | \n", "SGD | \n", "0.0100 | \n", "500 | \n", "SGD alpha 0.01 epochs 500 | \n", "4.280899 | \n", "-11.069892 | \n", "7.154356 | \n", "2.731246 | \n", "
79 | \n", "SGD | \n", "0.0100 | \n", "5000 | \n", "SGD alpha 0.01 epochs 5000 | \n", "5.377911 | \n", "-17.261759 | \n", "3.035359 | \n", "2.494914 | \n", "
80 | \n", "SGD | \n", "0.0100 | \n", "5000 | \n", "SGD alpha 0.01 epochs 5000 | \n", "5.377911 | \n", "-17.261759 | \n", "3.035359 | \n", "2.494914 | \n", "
81 rows ? 8 columns
\n", "\n", " | optimizer_name | \n", "learning_rate | \n", "number_of_epochs | \n", "name | \n", "w | \n", "b | \n", "train_loss | \n", "val_loss | \n", "
---|---|---|---|---|---|---|---|---|
52 | \n", "Adamax | \n", "0.0100 | \n", "5000 | \n", "Adamax alpha 0.01 epochs 5000 | \n", "5.377995 | \n", "-17.262234 | \n", "3.035359 | \n", "2.494895 | \n", "
53 | \n", "Adamax | \n", "0.0100 | \n", "5000 | \n", "Adamax alpha 0.01 epochs 5000 | \n", "5.377995 | \n", "-17.262234 | \n", "3.035359 | \n", "2.494895 | \n", "
65 | \n", "Rprop | \n", "0.0001 | \n", "5000 | \n", "Rprop alpha 0.0001 epochs 5000 | \n", "5.377999 | \n", "-17.262249 | \n", "3.035357 | \n", "2.494897 | \n", "
63 | \n", "Rprop | \n", "0.0001 | \n", "500 | \n", "Rprop alpha 0.0001 epochs 500 | \n", "5.377999 | \n", "-17.262249 | \n", "3.035357 | \n", "2.494897 | \n", "
64 | \n", "Rprop | \n", "0.0001 | \n", "5000 | \n", "Rprop alpha 0.0001 epochs 5000 | \n", "5.377999 | \n", "-17.262249 | \n", "3.035357 | \n", "2.494897 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
11 | \n", "Adadelta | \n", "0.0001 | \n", "5000 | \n", "Adadelta alpha 0.0001 epochs 5000 | \n", "1.007679 | \n", "0.007671 | \n", "84.997284 | \n", "55.803486 | \n", "
10 | \n", "Adadelta | \n", "0.0001 | \n", "5000 | \n", "Adadelta alpha 0.0001 epochs 5000 | \n", "1.007679 | \n", "0.007671 | \n", "84.997284 | \n", "55.803486 | \n", "
18 | \n", "Adagrad | \n", "0.0001 | \n", "500 | \n", "Adagrad alpha 0.0001 epochs 500 | \n", "1.004325 | \n", "0.004324 | \n", "85.284355 | \n", "56.131142 | \n", "
12 | \n", "Adadelta | \n", "0.0010 | \n", "500 | \n", "Adadelta alpha 0.001 epochs 500 | \n", "1.002963 | \n", "0.002962 | \n", "85.401489 | \n", "56.264843 | \n", "
9 | \n", "Adadelta | \n", "0.0001 | \n", "500 | \n", "Adadelta alpha 0.0001 epochs 500 | \n", "1.000295 | \n", "0.000296 | \n", "85.630035 | \n", "56.525707 | \n", "
81 rows ? 8 columns
\n", "\n", " | epoch | \n", "val_loss | \n", "
---|---|---|
0 | \n", "0 | \n", "56.554653 | \n", "
1 | \n", "1 | \n", "56.554615 | \n", "
2 | \n", "2 | \n", "56.554577 | \n", "
3 | \n", "3 | \n", "56.554543 | \n", "
4 | \n", "4 | \n", "56.554512 | \n", "
... | \n", "... | \n", "... | \n", "
495 | \n", "495 | \n", "56.526024 | \n", "
496 | \n", "496 | \n", "56.525955 | \n", "
497 | \n", "497 | \n", "56.525871 | \n", "
498 | \n", "498 | \n", "56.525791 | \n", "
499 | \n", "499 | \n", "56.525707 | \n", "
500 rows ? 2 columns
\n", "