This is based on code from the following book

The follow blog post walks through what PyTorch’s Optimzers are.e

Pytorch comes with a module of optimizers. We can replace our vanilla gradient descent with many different ones without modifying a lot of code.

%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

Taking our input from the previous notebook and applying our scaling

t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

Same model and loss function as before.

def model(t_u, w, b):
return w * t_u + b
def loss_fn(t_p, t_c):
squared_diffs = (t_p - t_c)**2
return squared_diffs.mean()
import torch.optim as optim

dir(optim)
['ASGD',
'LBFGS',
'Optimizer',
'RMSprop',
'Rprop',
'SGD',
'__builtins__',
'__cached__',
'__doc__',
'__file__',
'__name__',
'__package__',
'__path__',
'__spec__',
'lr_scheduler']
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

The values of our parameters are updated when we call step.

The code below forgets to zero out the gradients!

t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()

optimizer.step()

params

Now we can use this snippet in a loop for training

learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)
loss = loss_fn(t_p, t_c)

loss.backward()
optimizer.step()

params
def training_loop(n_epochs, optimizer, params, t_u, t_c):
for epoch in range(1, n_epochs + 1):
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)

loss.backward()
optimizer.step()

if epoch % 500 == 0:
print('Epoch %d, Loss %f' % (epoch, float(loss)))

return params
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate) # <1>

training_loop(
n_epochs = 5000,
optimizer = optimizer,
params = params, # <1>
t_u = t_un,
t_c = t_c)
Epoch 500, Loss 7.860116
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957697
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647

And we get the same loss

learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate) # <1>

training_loop(
n_epochs = 2000,
optimizer = optimizer,
params = params,
t_u = t_u, # <2>
t_c = t_c)
Epoch 500, Loss 7.612903
Epoch 1000, Loss 3.086700
Epoch 1500, Loss 2.928578
Epoch 2000, Loss 2.927646

## Training and Validation Splits#

n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>
(tensor([ 5,  9,  1,  6,  7, 10,  3,  8,  0]), tensor([2, 4]))
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
train_t_c, val_t_c, print_periodically=True):
val_loss_each_epoch = []
for epoch in range(1, n_epochs + 1):
train_t_p = model(train_t_u, *params) # <1>
train_loss = loss_fn(train_t_p, train_t_c)

val_t_p = model(val_t_u, *params) # <1>
val_loss = loss_fn(val_t_p, val_t_c)

val_loss_each_epoch.append(val_loss.item())

train_loss.backward() # <2>
optimizer.step()

if print_periodically and (epoch <= 3 or epoch % 500 == 0):
print(f"\tEpoch {epoch}, Training loss {train_loss.item():.4f},"
f" Validation loss {val_loss.item():.4f}")

return *params, train_loss.item(), val_loss.item(), val_loss_each_epoch
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
n_epochs = 3000,
optimizer = optimizer,
params = params,
train_t_u = train_t_un, # <1>
val_t_u = val_t_un, # <1>
train_t_c = train_t_c,
val_t_c = val_t_c)
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 43.9632, Validation loss 11.2589
Epoch 3, Training loss 36.8792, Validation loss 4.2194
Epoch 500, Training loss 7.1544, Validation loss 2.7312
Epoch 1000, Training loss 3.5517, Validation loss 2.5743
Epoch 1500, Training loss 3.1001, Validation loss 2.5225
Epoch 2000, Training loss 3.0435, Validation loss 2.5046
Epoch 2500, Training loss 3.0364, Validation loss 2.4983
Epoch 3000, Training loss 3.0355, Validation loss 2.4961

3.0354840755462646,
2.4961061477661133)

## Searching#

results = []
val_loss_over_time_by_name = {} # list of dictionaries to track validation loss for each
optimizer_names = [
'ASGD',
'RMSprop',
'Rprop',
'SGD'
]
learning_rates = [1e-4, 1e-3, 1e-2]
epochs = [500, 5000, 5000]

for optimizer_name in optimizer_names:
for learning_rate in learning_rates:
for number_of_epochs in epochs:
name = f"{optimizer_name} alpha {learning_rate} epochs {number_of_epochs}"
print(name)

optimizer = getattr(optim, optimizer_name)([params], lr=learning_rate)

learned_params = training_loop(
n_epochs = number_of_epochs,
optimizer = optimizer,
params = params,
train_t_u = train_t_un, # <1>
val_t_u = val_t_un, # <1>
train_t_c = train_t_c,
val_t_c = val_t_c,
print_periodically=True
)
beta_1, beta_0, train_loss, val_loss, val_loss_over_time = learned_params
# print(f"\tbeta_1 (weight multipled by measurement in unknown units) {beta_1}")
# print(f"\tbeta_0 (y intercept) {beta_0}")
# print(f"\ttrain_loss {train_loss}")
# print(f"\tval_loss {val_loss}")

results.append(
{
"optimizer_name": optimizer_name,
"learning_rate": learning_rate,
"number_of_epochs": number_of_epochs,
"name": name,
"w": beta_1.item(),
"b": beta_0.item(),
"train_loss": train_loss,
"val_loss": val_loss
}
)

val_loss_over_time_df = pd.DataFrame(val_loss_over_time).reset_index()
val_loss_over_time_df.columns = ["epoch", "val_loss"]

val_loss_over_time_by_name[name] = val_loss_over_time_df
ASGD alpha 0.0001 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.0659, Validation loss 55.9043
Epoch 3, Training loss 84.4833, Validation loss 55.2618
Epoch 500, Training loss 35.2186, Validation loss 3.2051
ASGD alpha 0.0001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.0659, Validation loss 55.9043
Epoch 3, Training loss 84.4833, Validation loss 55.2618
Epoch 500, Training loss 35.2186, Validation loss 3.2051
Epoch 1000, Training loss 34.4265, Validation loss 3.2288
Epoch 1500, Training loss 33.7817, Validation loss 3.2290
Epoch 2000, Training loss 33.1505, Validation loss 3.2205
Epoch 2500, Training loss 32.5322, Validation loss 3.2116
Epoch 3000, Training loss 31.9267, Validation loss 3.2028
Epoch 3500, Training loss 31.3335, Validation loss 3.1942
Epoch 4000, Training loss 30.7526, Validation loss 3.1856
Epoch 4500, Training loss 30.1836, Validation loss 3.1772
Epoch 5000, Training loss 29.6263, Validation loss 3.1689
ASGD alpha 0.0001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.0659, Validation loss 55.9043
Epoch 3, Training loss 84.4833, Validation loss 55.2618
Epoch 500, Training loss 35.2186, Validation loss 3.2051
Epoch 1000, Training loss 34.4265, Validation loss 3.2288
Epoch 1500, Training loss 33.7817, Validation loss 3.2290
Epoch 2000, Training loss 33.1505, Validation loss 3.2205
Epoch 2500, Training loss 32.5322, Validation loss 3.2116
Epoch 3000, Training loss 31.9267, Validation loss 3.2028
Epoch 3500, Training loss 31.3335, Validation loss 3.1942
Epoch 4000, Training loss 30.7526, Validation loss 3.1856
Epoch 4500, Training loss 30.1836, Validation loss 3.1772
Epoch 5000, Training loss 29.6263, Validation loss 3.1689
ASGD alpha 0.001 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 79.9171, Validation loss 50.2310
Epoch 3, Training loss 74.8356, Validation loss 44.6451
Epoch 500, Training loss 29.6358, Validation loss 3.1691
ASGD alpha 0.001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 79.9171, Validation loss 50.2310
Epoch 3, Training loss 74.8356, Validation loss 44.6451
Epoch 500, Training loss 29.6358, Validation loss 3.1691
Epoch 1000, Training loss 24.6529, Validation loss 3.0919
Epoch 1500, Training loss 20.6038, Validation loss 3.0243
Epoch 2000, Training loss 17.3134, Validation loss 2.9650
Epoch 2500, Training loss 14.6397, Validation loss 2.9130
Epoch 3000, Training loss 12.4670, Validation loss 2.8671
Epoch 3500, Training loss 10.7012, Validation loss 2.8266
Epoch 4000, Training loss 9.2661, Validation loss 2.7909
Epoch 4500, Training loss 8.0998, Validation loss 2.7592
Epoch 5000, Training loss 7.1519, Validation loss 2.7311
ASGD alpha 0.001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 79.9171, Validation loss 50.2310
Epoch 3, Training loss 74.8356, Validation loss 44.6451
Epoch 500, Training loss 29.6358, Validation loss 3.1691
Epoch 1000, Training loss 24.6529, Validation loss 3.0919
Epoch 1500, Training loss 20.6038, Validation loss 3.0243
Epoch 2000, Training loss 17.3134, Validation loss 2.9650
Epoch 2500, Training loss 14.6397, Validation loss 2.9130
Epoch 3000, Training loss 12.4670, Validation loss 2.8671
Epoch 3500, Training loss 10.7012, Validation loss 2.8266
Epoch 4000, Training loss 9.2661, Validation loss 2.7909
Epoch 4500, Training loss 8.0998, Validation loss 2.7592
Epoch 5000, Training loss 7.1519, Validation loss 2.7311
ASGD alpha 0.01 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 43.9632, Validation loss 11.2589
Epoch 3, Training loss 36.8792, Validation loss 4.2195
Epoch 500, Training loss 7.1592, Validation loss 2.7314
ASGD alpha 0.01 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 43.9632, Validation loss 11.2589
Epoch 3, Training loss 36.8792, Validation loss 4.2195
Epoch 500, Training loss 7.1592, Validation loss 2.7314
Epoch 1000, Training loss 3.5551, Validation loss 2.5745
Epoch 1500, Training loss 3.1015, Validation loss 2.5228
Epoch 2000, Training loss 3.0440, Validation loss 2.5049
Epoch 2500, Training loss 3.0366, Validation loss 2.4986
Epoch 3000, Training loss 3.0356, Validation loss 2.4964
Epoch 3500, Training loss 3.0354, Validation loss 2.4956
Epoch 4000, Training loss 3.0354, Validation loss 2.4953
Epoch 4500, Training loss 3.0354, Validation loss 2.4952
Epoch 5000, Training loss 3.0354, Validation loss 2.4952
ASGD alpha 0.01 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 43.9632, Validation loss 11.2589
Epoch 3, Training loss 36.8792, Validation loss 4.2195
Epoch 500, Training loss 7.1592, Validation loss 2.7314
Epoch 1000, Training loss 3.5551, Validation loss 2.5745
Epoch 1500, Training loss 3.1015, Validation loss 2.5228
Epoch 2000, Training loss 3.0440, Validation loss 2.5049
Epoch 2500, Training loss 3.0366, Validation loss 2.4986
Epoch 3000, Training loss 3.0356, Validation loss 2.4964
Epoch 3500, Training loss 3.0354, Validation loss 2.4956
Epoch 4000, Training loss 3.0354, Validation loss 2.4953
Epoch 4500, Training loss 3.0354, Validation loss 2.4952
Epoch 5000, Training loss 3.0354, Validation loss 2.4952
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6554, Validation loss 56.5546
Epoch 3, Training loss 85.6553, Validation loss 56.5546
Epoch 500, Training loss 85.6300, Validation loss 56.5257
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6554, Validation loss 56.5546
Epoch 3, Training loss 85.6553, Validation loss 56.5546
Epoch 500, Training loss 85.6300, Validation loss 56.5257
Epoch 1000, Training loss 85.5900, Validation loss 56.4800
Epoch 1500, Training loss 85.5400, Validation loss 56.4229
Epoch 2000, Training loss 85.4813, Validation loss 56.3559
Epoch 2500, Training loss 85.4156, Validation loss 56.2810
Epoch 3000, Training loss 85.3429, Validation loss 56.1979
Epoch 3500, Training loss 85.2643, Validation loss 56.1083
Epoch 4000, Training loss 85.1803, Validation loss 56.0124
Epoch 4500, Training loss 85.0912, Validation loss 55.9107
Epoch 5000, Training loss 84.9973, Validation loss 55.8035
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6554, Validation loss 56.5546
Epoch 3, Training loss 85.6553, Validation loss 56.5546
Epoch 500, Training loss 85.6300, Validation loss 56.5257
Epoch 1000, Training loss 85.5900, Validation loss 56.4800
Epoch 1500, Training loss 85.5400, Validation loss 56.4229
Epoch 2000, Training loss 85.4813, Validation loss 56.3559
Epoch 2500, Training loss 85.4156, Validation loss 56.2810
Epoch 3000, Training loss 85.3429, Validation loss 56.1979
Epoch 3500, Training loss 85.2643, Validation loss 56.1083
Epoch 4000, Training loss 85.1803, Validation loss 56.0124
Epoch 4500, Training loss 85.0912, Validation loss 55.9107
Epoch 5000, Training loss 84.9973, Validation loss 55.8035
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6551, Validation loss 56.5543
Epoch 3, Training loss 85.6548, Validation loss 56.5540
Epoch 500, Training loss 85.4015, Validation loss 56.2648
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6551, Validation loss 56.5543
Epoch 3, Training loss 85.6548, Validation loss 56.5540
Epoch 500, Training loss 85.4015, Validation loss 56.2648
Epoch 1000, Training loss 85.0055, Validation loss 55.8128
Epoch 1500, Training loss 84.5108, Validation loss 55.2482
Epoch 2000, Training loss 83.9376, Validation loss 54.5939
Epoch 2500, Training loss 83.2986, Validation loss 53.8647
Epoch 3000, Training loss 82.6034, Validation loss 53.0712
Epoch 3500, Training loss 81.8594, Validation loss 52.2221
Epoch 4000, Training loss 81.0730, Validation loss 51.3247
Epoch 4500, Training loss 80.2495, Validation loss 50.3849
Epoch 5000, Training loss 79.3935, Validation loss 49.4082
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6551, Validation loss 56.5543
Epoch 3, Training loss 85.6548, Validation loss 56.5540
Epoch 500, Training loss 85.4015, Validation loss 56.2648
Epoch 1000, Training loss 85.0055, Validation loss 55.8128
Epoch 1500, Training loss 84.5108, Validation loss 55.2482
Epoch 2000, Training loss 83.9376, Validation loss 54.5939
Epoch 2500, Training loss 83.2986, Validation loss 53.8647
Epoch 3000, Training loss 82.6034, Validation loss 53.0712
Epoch 3500, Training loss 81.8594, Validation loss 52.2221
Epoch 4000, Training loss 81.0730, Validation loss 51.3247
Epoch 4500, Training loss 80.2495, Validation loss 50.3849
Epoch 5000, Training loss 79.3935, Validation loss 49.4082
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6527, Validation loss 56.5515
Epoch 3, Training loss 85.6499, Validation loss 56.5484
Epoch 500, Training loss 83.1666, Validation loss 53.7141
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6527, Validation loss 56.5515
Epoch 3, Training loss 85.6499, Validation loss 56.5484
Epoch 500, Training loss 83.1666, Validation loss 53.7141
Epoch 1000, Training loss 79.4724, Validation loss 49.4983
Epoch 1500, Training loss 75.1670, Validation loss 44.5863
Epoch 2000, Training loss 70.5870, Validation loss 39.3633
Epoch 2500, Training loss 65.9726, Validation loss 34.1044
Epoch 3000, Training loss 61.5033, Validation loss 29.0160
Epoch 3500, Training loss 57.3126, Validation loss 24.2519
Epoch 4000, Training loss 53.4949, Validation loss 19.9223
Epoch 4500, Training loss 50.1112, Validation loss 16.0998
Epoch 5000, Training loss 47.1933, Validation loss 12.8244
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6527, Validation loss 56.5515
Epoch 3, Training loss 85.6499, Validation loss 56.5484
Epoch 500, Training loss 83.1666, Validation loss 53.7141
Epoch 1000, Training loss 79.4724, Validation loss 49.4983
Epoch 1500, Training loss 75.1670, Validation loss 44.5863
Epoch 2000, Training loss 70.5870, Validation loss 39.3633
Epoch 2500, Training loss 65.9726, Validation loss 34.1044
Epoch 3000, Training loss 61.5033, Validation loss 29.0160
Epoch 3500, Training loss 57.3126, Validation loss 24.2519
Epoch 4000, Training loss 53.4949, Validation loss 19.9223
Epoch 4500, Training loss 50.1112, Validation loss 16.0998
Epoch 5000, Training loss 47.1933, Validation loss 12.8244
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6407, Validation loss 56.5379
Epoch 500, Training loss 85.2844, Validation loss 56.1311
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6407, Validation loss 56.5379
Epoch 500, Training loss 85.2844, Validation loss 56.1311
Epoch 1000, Training loss 85.1259, Validation loss 55.9503
Epoch 1500, Training loss 85.0047, Validation loss 55.8119
Epoch 2000, Training loss 84.9026, Validation loss 55.6954
Epoch 2500, Training loss 84.8128, Validation loss 55.5929
Epoch 3000, Training loss 84.7318, Validation loss 55.5004
Epoch 3500, Training loss 84.6574, Validation loss 55.4155
Epoch 4000, Training loss 84.5881, Validation loss 55.3364
Epoch 4500, Training loss 84.5226, Validation loss 55.2617
Epoch 5000, Training loss 84.4618, Validation loss 55.1923
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6407, Validation loss 56.5379
Epoch 500, Training loss 85.2844, Validation loss 56.1311
Epoch 1000, Training loss 85.1259, Validation loss 55.9503
Epoch 1500, Training loss 85.0047, Validation loss 55.8119
Epoch 2000, Training loss 84.9026, Validation loss 55.6954
Epoch 2500, Training loss 84.8128, Validation loss 55.5929
Epoch 3000, Training loss 84.7318, Validation loss 55.5004
Epoch 3500, Training loss 84.6574, Validation loss 55.4155
Epoch 4000, Training loss 84.5881, Validation loss 55.3364
Epoch 4500, Training loss 84.5226, Validation loss 55.2617
Epoch 5000, Training loss 84.4618, Validation loss 55.1923
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.5086, Validation loss 56.3872
Epoch 500, Training loss 82.0349, Validation loss 52.4222
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.5086, Validation loss 56.3872
Epoch 500, Training loss 82.0349, Validation loss 52.4222
Epoch 1000, Training loss 80.5431, Validation loss 50.7195
Epoch 1500, Training loss 79.4224, Validation loss 49.4405
Epoch 2000, Training loss 78.4938, Validation loss 48.3806
Epoch 2500, Training loss 77.6877, Validation loss 47.4607
Epoch 3000, Training loss 76.9687, Validation loss 46.6402
Epoch 3500, Training loss 76.3155, Validation loss 45.8948
Epoch 4000, Training loss 75.7145, Validation loss 45.2089
Epoch 4500, Training loss 75.1561, Validation loss 44.5717
Epoch 5000, Training loss 74.6332, Validation loss 43.9751
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.5086, Validation loss 56.3872
Epoch 500, Training loss 82.0349, Validation loss 52.4222
Epoch 1000, Training loss 80.5431, Validation loss 50.7195
Epoch 1500, Training loss 79.4224, Validation loss 49.4405
Epoch 2000, Training loss 78.4938, Validation loss 48.3806
Epoch 2500, Training loss 77.6877, Validation loss 47.4607
Epoch 3000, Training loss 76.9687, Validation loss 46.6402
Epoch 3500, Training loss 76.3155, Validation loss 45.8948
Epoch 4000, Training loss 75.7145, Validation loss 45.2089
Epoch 4500, Training loss 75.1561, Validation loss 44.5717
Epoch 5000, Training loss 74.6332, Validation loss 43.9751
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 84.2010, Validation loss 54.8945
Epoch 500, Training loss 57.6999, Validation loss 24.6808
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 84.2010, Validation loss 54.8945
Epoch 500, Training loss 57.6999, Validation loss 24.6808
Epoch 1000, Training loss 50.5468, Validation loss 16.5840
Epoch 1500, Training loss 46.5815, Validation loss 12.1548
Epoch 2000, Training loss 44.0707, Validation loss 9.4133
Epoch 2500, Training loss 42.3649, Validation loss 7.6167
Epoch 3000, Training loss 41.1456, Validation loss 6.3996
Epoch 3500, Training loss 40.2350, Validation loss 5.5561
Epoch 4000, Training loss 39.5263, Validation loss 4.9612
Epoch 4500, Training loss 38.9526, Validation loss 4.5352
Epoch 5000, Training loss 38.4714, Validation loss 4.2256
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 84.2010, Validation loss 54.8945
Epoch 500, Training loss 57.6999, Validation loss 24.6808
Epoch 1000, Training loss 50.5468, Validation loss 16.5840
Epoch 1500, Training loss 46.5815, Validation loss 12.1548
Epoch 2000, Training loss 44.0707, Validation loss 9.4133
Epoch 2500, Training loss 42.3649, Validation loss 7.6167
Epoch 3000, Training loss 41.1456, Validation loss 6.3996
Epoch 3500, Training loss 40.2350, Validation loss 5.5561
Epoch 4000, Training loss 39.5263, Validation loss 4.9612
Epoch 4500, Training loss 38.9526, Validation loss 4.5352
Epoch 5000, Training loss 38.4714, Validation loss 4.2256
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6382, Validation loss 56.5350
Epoch 500, Training loss 81.5026, Validation loss 51.8148
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6382, Validation loss 56.5350
Epoch 500, Training loss 81.5026, Validation loss 51.8148
Epoch 1000, Training loss 77.6139, Validation loss 47.3770
Epoch 1500, Training loss 73.9759, Validation loss 43.2260
Epoch 2000, Training loss 70.5698, Validation loss 39.3404
Epoch 2500, Training loss 67.3807, Validation loss 35.7033
Epoch 3000, Training loss 64.3965, Validation loss 32.3008
Epoch 3500, Training loss 61.6076, Validation loss 29.1220
Epoch 4000, Training loss 59.0063, Validation loss 26.1586
Epoch 4500, Training loss 56.5864, Validation loss 23.4038
Epoch 5000, Training loss 54.3430, Validation loss 20.8522
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6382, Validation loss 56.5350
Epoch 500, Training loss 81.5026, Validation loss 51.8148
Epoch 1000, Training loss 77.6139, Validation loss 47.3770
Epoch 1500, Training loss 73.9759, Validation loss 43.2260
Epoch 2000, Training loss 70.5698, Validation loss 39.3404
Epoch 2500, Training loss 67.3807, Validation loss 35.7033
Epoch 3000, Training loss 64.3965, Validation loss 32.3008
Epoch 3500, Training loss 61.6076, Validation loss 29.1220
Epoch 4000, Training loss 59.0063, Validation loss 26.1586
Epoch 4500, Training loss 56.5864, Validation loss 23.4038
Epoch 5000, Training loss 54.3430, Validation loss 20.8522
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4835, Validation loss 56.3584
Epoch 500, Training loss 55.2777, Validation loss 21.9455
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4835, Validation loss 56.3584
Epoch 500, Training loss 55.2777, Validation loss 21.9455
Epoch 1000, Training loss 42.8083, Validation loss 8.1165
Epoch 1500, Training loss 38.4026, Validation loss 4.1677
Epoch 2000, Training loss 36.1285, Validation loss 3.3609
Epoch 2500, Training loss 34.1304, Validation loss 3.1924
Epoch 3000, Training loss 32.2028, Validation loss 3.1374
Epoch 3500, Training loss 30.3603, Validation loss 3.1119
Epoch 4000, Training loss 28.6105, Validation loss 3.0970
Epoch 4500, Training loss 26.9507, Validation loss 3.0854
Epoch 5000, Training loss 25.3745, Validation loss 3.0739
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4835, Validation loss 56.3584
Epoch 500, Training loss 55.2777, Validation loss 21.9455
Epoch 1000, Training loss 42.8083, Validation loss 8.1165
Epoch 1500, Training loss 38.4026, Validation loss 4.1677
Epoch 2000, Training loss 36.1285, Validation loss 3.3609
Epoch 2500, Training loss 34.1304, Validation loss 3.1924
Epoch 3000, Training loss 32.2028, Validation loss 3.1374
Epoch 3500, Training loss 30.3603, Validation loss 3.1119
Epoch 4000, Training loss 28.6105, Validation loss 3.0970
Epoch 4500, Training loss 26.9507, Validation loss 3.0854
Epoch 5000, Training loss 25.3745, Validation loss 3.0739
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.9507, Validation loss 54.6088
Epoch 500, Training loss 27.8058, Validation loss 3.0615
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.9507, Validation loss 54.6088
Epoch 500, Training loss 27.8058, Validation loss 3.0615
Epoch 1000, Training loss 16.2508, Validation loss 2.8700
Epoch 1500, Training loss 9.1708, Validation loss 2.7344
Epoch 2000, Training loss 5.4193, Validation loss 2.6365
Epoch 2500, Training loss 3.7619, Validation loss 2.5698
Epoch 3000, Training loss 3.1943, Validation loss 2.5288
Epoch 3500, Training loss 3.0576, Validation loss 2.5073
Epoch 4000, Training loss 3.0371, Validation loss 2.4983
Epoch 4500, Training loss 3.0354, Validation loss 2.4956
Epoch 5000, Training loss 3.0354, Validation loss 2.4950
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.9507, Validation loss 54.6088
Epoch 500, Training loss 27.8058, Validation loss 3.0615
Epoch 1000, Training loss 16.2508, Validation loss 2.8700
Epoch 1500, Training loss 9.1708, Validation loss 2.7344
Epoch 2000, Training loss 5.4193, Validation loss 2.6365
Epoch 2500, Training loss 3.7619, Validation loss 2.5698
Epoch 3000, Training loss 3.1943, Validation loss 2.5288
Epoch 3500, Training loss 3.0576, Validation loss 2.5073
Epoch 4000, Training loss 3.0371, Validation loss 2.4983
Epoch 4500, Training loss 3.0354, Validation loss 2.4956
Epoch 5000, Training loss 3.0354, Validation loss 2.4950
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6469, Validation loss 56.5449
Epoch 3, Training loss 85.6383, Validation loss 56.5352
Epoch 500, Training loss 81.5413, Validation loss 51.8572
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6469, Validation loss 56.5449
Epoch 3, Training loss 85.6383, Validation loss 56.5352
Epoch 500, Training loss 81.5413, Validation loss 51.8572
Epoch 1000, Training loss 77.6875, Validation loss 47.4576
Epoch 1500, Training loss 74.0827, Validation loss 43.3427
Epoch 2000, Training loss 70.7076, Validation loss 39.4906
Epoch 2500, Training loss 67.5455, Validation loss 35.8826
Epoch 3000, Training loss 64.5878, Validation loss 32.5084
Epoch 3500, Training loss 61.8212, Validation loss 29.3532
Epoch 4000, Training loss 59.2393, Validation loss 26.4101
Epoch 4500, Training loss 56.8366, Validation loss 23.6728
Epoch 5000, Training loss 54.6060, Validation loss 21.1337
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6469, Validation loss 56.5449
Epoch 3, Training loss 85.6383, Validation loss 56.5352
Epoch 500, Training loss 81.5413, Validation loss 51.8572
Epoch 1000, Training loss 77.6875, Validation loss 47.4576
Epoch 1500, Training loss 74.0827, Validation loss 43.3427
Epoch 2000, Training loss 70.7076, Validation loss 39.4906
Epoch 2500, Training loss 67.5455, Validation loss 35.8826
Epoch 3000, Training loss 64.5878, Validation loss 32.5084
Epoch 3500, Training loss 61.8212, Validation loss 29.3532
Epoch 4000, Training loss 59.2393, Validation loss 26.4101
Epoch 4500, Training loss 56.8366, Validation loss 23.6728
Epoch 5000, Training loss 54.6060, Validation loss 21.1337
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5701, Validation loss 56.4573
Epoch 3, Training loss 85.4850, Validation loss 56.3601
Epoch 500, Training loss 55.5340, Validation loss 22.2194
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5701, Validation loss 56.4573
Epoch 3, Training loss 85.4850, Validation loss 56.3601
Epoch 500, Training loss 55.5340, Validation loss 22.2194
Epoch 1000, Training loss 43.0774, Validation loss 8.3753
Epoch 1500, Training loss 38.6096, Validation loss 4.3015
Epoch 2000, Training loss 36.3186, Validation loss 3.4165
Epoch 2500, Training loss 34.3219, Validation loss 3.2153
Epoch 3000, Training loss 32.3932, Validation loss 3.1444
Epoch 3500, Training loss 30.5497, Validation loss 3.1106
Epoch 4000, Training loss 28.8042, Validation loss 3.0922
Epoch 4500, Training loss 27.1559, Validation loss 3.0798
Epoch 5000, Training loss 25.5986, Validation loss 3.0690
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5701, Validation loss 56.4573
Epoch 3, Training loss 85.4850, Validation loss 56.3601
Epoch 500, Training loss 55.5340, Validation loss 22.2194
Epoch 1000, Training loss 43.0774, Validation loss 8.3753
Epoch 1500, Training loss 38.6096, Validation loss 4.3015
Epoch 2000, Training loss 36.3186, Validation loss 3.4165
Epoch 2500, Training loss 34.3219, Validation loss 3.2153
Epoch 3000, Training loss 32.3932, Validation loss 3.1444
Epoch 3500, Training loss 30.5497, Validation loss 3.1106
Epoch 4000, Training loss 28.8042, Validation loss 3.0922
Epoch 4500, Training loss 27.1559, Validation loss 3.0798
Epoch 5000, Training loss 25.5986, Validation loss 3.0690
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.8065, Validation loss 55.5854
Epoch 3, Training loss 83.9657, Validation loss 54.6253
Epoch 500, Training loss 28.1584, Validation loss 3.0684
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.8065, Validation loss 55.5854
Epoch 3, Training loss 83.9657, Validation loss 54.6253
Epoch 500, Training loss 28.1584, Validation loss 3.0684
Epoch 1000, Training loss 16.9523, Validation loss 2.8717
Epoch 1500, Training loss 10.1140, Validation loss 2.7438
Epoch 2000, Training loss 6.3451, Validation loss 2.6549
Epoch 2500, Training loss 4.4604, Validation loss 2.5943
Epoch 3000, Training loss 3.6122, Validation loss 2.5549
Epoch 3500, Training loss 3.2670, Validation loss 2.5310
Epoch 4000, Training loss 3.1350, Validation loss 2.5175
Epoch 4500, Training loss 3.0837, Validation loss 2.5101
Epoch 5000, Training loss 3.0614, Validation loss 2.5059
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.8065, Validation loss 55.5854
Epoch 3, Training loss 83.9657, Validation loss 54.6253
Epoch 500, Training loss 28.1584, Validation loss 3.0684
Epoch 1000, Training loss 16.9523, Validation loss 2.8717
Epoch 1500, Training loss 10.1140, Validation loss 2.7438
Epoch 2000, Training loss 6.3451, Validation loss 2.6549
Epoch 2500, Training loss 4.4604, Validation loss 2.5943
Epoch 3000, Training loss 3.6122, Validation loss 2.5549
Epoch 3500, Training loss 3.2670, Validation loss 2.5310
Epoch 4000, Training loss 3.1350, Validation loss 2.5175
Epoch 4500, Training loss 3.0837, Validation loss 2.5101
Epoch 5000, Training loss 3.0614, Validation loss 2.5059
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6382, Validation loss 56.5350
Epoch 500, Training loss 81.4559, Validation loss 51.7608
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6382, Validation loss 56.5350
Epoch 500, Training loss 81.4559, Validation loss 51.7608
Epoch 1000, Training loss 77.4467, Validation loss 47.1834
Epoch 1500, Training loss 73.6369, Validation loss 42.8327
Epoch 2000, Training loss 70.0265, Validation loss 38.7087
Epoch 2500, Training loss 66.6155, Validation loss 34.8115
Epoch 3000, Training loss 63.4039, Validation loss 31.1410
Epoch 3500, Training loss 60.3916, Validation loss 27.6971
Epoch 4000, Training loss 57.5788, Validation loss 24.4801
Epoch 4500, Training loss 54.9653, Validation loss 21.4897
Epoch 5000, Training loss 52.5513, Validation loss 18.7261
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6382, Validation loss 56.5350
Epoch 500, Training loss 81.4559, Validation loss 51.7608
Epoch 1000, Training loss 77.4467, Validation loss 47.1834
Epoch 1500, Training loss 73.6369, Validation loss 42.8327
Epoch 2000, Training loss 70.0265, Validation loss 38.7087
Epoch 2500, Training loss 66.6155, Validation loss 34.8115
Epoch 3000, Training loss 63.4039, Validation loss 31.1410
Epoch 3500, Training loss 60.3916, Validation loss 27.6971
Epoch 4000, Training loss 57.5788, Validation loss 24.4801
Epoch 4500, Training loss 54.9653, Validation loss 21.4897
Epoch 5000, Training loss 52.5513, Validation loss 18.7261
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4834, Validation loss 56.3584
Epoch 500, Training loss 53.0044, Validation loss 19.4455
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4834, Validation loss 56.3584
Epoch 500, Training loss 53.0044, Validation loss 19.4455
Epoch 1000, Training loss 40.1065, Validation loss 5.5895
Epoch 1500, Training loss 36.3288, Validation loss 3.3336
Epoch 2000, Training loss 34.1454, Validation loss 3.1726
Epoch 2500, Training loss 32.3056, Validation loss 3.1781
Epoch 3000, Training loss 30.5537, Validation loss 3.1759
Epoch 3500, Training loss 28.8615, Validation loss 3.1658
Epoch 4000, Training loss 27.2246, Validation loss 3.1503
Epoch 4500, Training loss 25.6417, Validation loss 3.1314
Epoch 5000, Training loss 24.1124, Validation loss 3.1102
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4834, Validation loss 56.3584
Epoch 500, Training loss 53.0044, Validation loss 19.4455
Epoch 1000, Training loss 40.1065, Validation loss 5.5895
Epoch 1500, Training loss 36.3288, Validation loss 3.3336
Epoch 2000, Training loss 34.1454, Validation loss 3.1726
Epoch 2500, Training loss 32.3056, Validation loss 3.1781
Epoch 3000, Training loss 30.5537, Validation loss 3.1759
Epoch 3500, Training loss 28.8615, Validation loss 3.1658
Epoch 4000, Training loss 27.2246, Validation loss 3.1503
Epoch 4500, Training loss 25.6417, Validation loss 3.1314
Epoch 5000, Training loss 24.1124, Validation loss 3.1102
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.9537, Validation loss 54.6123
Epoch 500, Training loss 32.9020, Validation loss 3.1556
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.9537, Validation loss 54.6123
Epoch 500, Training loss 32.9020, Validation loss 3.1556
Epoch 1000, Training loss 23.5443, Validation loss 2.9822
Epoch 1500, Training loss 14.0532, Validation loss 2.7960
Epoch 2000, Training loss 6.9738, Validation loss 2.6378
Epoch 2500, Training loss 3.7493, Validation loss 2.5423
Epoch 3000, Training loss 3.0767, Validation loss 2.5045
Epoch 3500, Training loss 3.0357, Validation loss 2.4957
Epoch 4000, Training loss 3.0354, Validation loss 2.4949
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.9537, Validation loss 54.6123
Epoch 500, Training loss 32.9020, Validation loss 3.1556
Epoch 1000, Training loss 23.5443, Validation loss 2.9822
Epoch 1500, Training loss 14.0532, Validation loss 2.7960
Epoch 2000, Training loss 6.9738, Validation loss 2.6378
Epoch 2500, Training loss 3.7493, Validation loss 2.5423
Epoch 3000, Training loss 3.0767, Validation loss 2.5045
Epoch 3500, Training loss 3.0357, Validation loss 2.4957
Epoch 4000, Training loss 3.0354, Validation loss 2.4949
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
RMSprop alpha 0.0001 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.5085, Validation loss 56.3870
Epoch 500, Training loss 80.4863, Validation loss 50.6544
RMSprop alpha 0.0001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.5085, Validation loss 56.3870
Epoch 500, Training loss 80.4863, Validation loss 50.6544
Epoch 1000, Training loss 76.5649, Validation loss 46.1777
Epoch 1500, Training loss 72.8422, Validation loss 41.9271
Epoch 2000, Training loss 69.3178, Validation loss 37.9020
Epoch 2500, Training loss 65.9883, Validation loss 34.0989
Epoch 3000, Training loss 62.8568, Validation loss 30.5211
Epoch 3500, Training loss 59.9206, Validation loss 27.1655
Epoch 4000, Training loss 57.1805, Validation loss 24.0335
Epoch 4500, Training loss 54.6366, Validation loss 21.1248
Epoch 5000, Training loss 52.2880, Validation loss 18.4388
RMSprop alpha 0.0001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.5085, Validation loss 56.3870
Epoch 500, Training loss 80.4863, Validation loss 50.6544
Epoch 1000, Training loss 76.5649, Validation loss 46.1777
Epoch 1500, Training loss 72.8422, Validation loss 41.9271
Epoch 2000, Training loss 69.3178, Validation loss 37.9020
Epoch 2500, Training loss 65.9883, Validation loss 34.0989
Epoch 3000, Training loss 62.8568, Validation loss 30.5211
Epoch 3500, Training loss 59.9206, Validation loss 27.1655
Epoch 4000, Training loss 57.1805, Validation loss 24.0335
Epoch 4500, Training loss 54.6366, Validation loss 21.1248
Epoch 5000, Training loss 52.2880, Validation loss 18.4388
RMSprop alpha 0.001 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 84.1994, Validation loss 54.8928
Epoch 500, Training loss 49.4834, Validation loss 15.3363
RMSprop alpha 0.001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 84.1994, Validation loss 54.8928
Epoch 500, Training loss 49.4834, Validation loss 15.3363
Epoch 1000, Training loss 38.8005, Validation loss 4.5215
Epoch 1500, Training loss 34.7420, Validation loss 3.1943
Epoch 2000, Training loss 32.8426, Validation loss 3.2466
Epoch 2500, Training loss 31.0912, Validation loss 3.2254
Epoch 3000, Training loss 29.3959, Validation loss 3.2013
Epoch 3500, Training loss 27.7538, Validation loss 3.1766
Epoch 4000, Training loss 26.1648, Validation loss 3.1515
Epoch 4500, Training loss 24.6290, Validation loss 3.1267
Epoch 5000, Training loss 23.1463, Validation loss 3.1020
RMSprop alpha 0.001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 84.1994, Validation loss 54.8928
Epoch 500, Training loss 49.4834, Validation loss 15.3363
Epoch 1000, Training loss 38.8005, Validation loss 4.5215
Epoch 1500, Training loss 34.7420, Validation loss 3.1943
Epoch 2000, Training loss 32.8426, Validation loss 3.2466
Epoch 2500, Training loss 31.0912, Validation loss 3.2254
Epoch 3000, Training loss 29.3959, Validation loss 3.2013
Epoch 3500, Training loss 27.7538, Validation loss 3.1766
Epoch 4000, Training loss 26.1648, Validation loss 3.1515
Epoch 4500, Training loss 24.6290, Validation loss 3.1267
Epoch 5000, Training loss 23.1463, Validation loss 3.1020
RMSprop alpha 0.01 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 77.4490, Validation loss 47.1859
Epoch 3, Training loss 72.3669, Validation loss 41.3872
Epoch 500, Training loss 21.2608, Validation loss 3.0369
RMSprop alpha 0.01 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 77.4490, Validation loss 47.1859
Epoch 3, Training loss 72.3669, Validation loss 41.3872
Epoch 500, Training loss 21.2608, Validation loss 3.0369
Epoch 1000, Training loss 10.6040, Validation loss 2.8581
Epoch 1500, Training loss 4.8201, Validation loss 2.6892
Epoch 2000, Training loss 3.0810, Validation loss 2.5482
Epoch 2500, Training loss 3.0363, Validation loss 2.5299
Epoch 3000, Training loss 3.0364, Validation loss 2.5300
Epoch 3500, Training loss 3.0364, Validation loss 2.5300
Epoch 4000, Training loss 3.0364, Validation loss 2.5300
Epoch 4500, Training loss 3.0364, Validation loss 2.5300
Epoch 5000, Training loss 3.0364, Validation loss 2.5300
RMSprop alpha 0.01 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 77.4490, Validation loss 47.1859
Epoch 3, Training loss 72.3669, Validation loss 41.3872
Epoch 500, Training loss 21.2608, Validation loss 3.0369
Epoch 1000, Training loss 10.6040, Validation loss 2.8581
Epoch 1500, Training loss 4.8201, Validation loss 2.6892
Epoch 2000, Training loss 3.0810, Validation loss 2.5482
Epoch 2500, Training loss 3.0363, Validation loss 2.5299
Epoch 3000, Training loss 3.0364, Validation loss 2.5300
Epoch 3500, Training loss 3.0364, Validation loss 2.5300
Epoch 4000, Training loss 3.0364, Validation loss 2.5300
Epoch 4500, Training loss 3.0364, Validation loss 2.5300
Epoch 5000, Training loss 3.0364, Validation loss 2.5300
Rprop alpha 0.0001 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6364, Validation loss 56.5330
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Rprop alpha 0.0001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6364, Validation loss 56.5330
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Epoch 1000, Training loss 3.0354, Validation loss 2.4949
Epoch 1500, Training loss 3.0354, Validation loss 2.4949
Epoch 2000, Training loss 3.0354, Validation loss 2.4949
Epoch 2500, Training loss 3.0354, Validation loss 2.4949
Epoch 3000, Training loss 3.0354, Validation loss 2.4949
Epoch 3500, Training loss 3.0354, Validation loss 2.4949
Epoch 4000, Training loss 3.0354, Validation loss 2.4949
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
Rprop alpha 0.0001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.6468, Validation loss 56.5448
Epoch 3, Training loss 85.6364, Validation loss 56.5330
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Epoch 1000, Training loss 3.0354, Validation loss 2.4949
Epoch 1500, Training loss 3.0354, Validation loss 2.4949
Epoch 2000, Training loss 3.0354, Validation loss 2.4949
Epoch 2500, Training loss 3.0354, Validation loss 2.4949
Epoch 3000, Training loss 3.0354, Validation loss 2.4949
Epoch 3500, Training loss 3.0354, Validation loss 2.4949
Epoch 4000, Training loss 3.0354, Validation loss 2.4949
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
Rprop alpha 0.001 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4663, Validation loss 56.3388
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Rprop alpha 0.001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4663, Validation loss 56.3388
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Epoch 1000, Training loss 3.0354, Validation loss 2.4949
Epoch 1500, Training loss 3.0354, Validation loss 2.4949
Epoch 2000, Training loss 3.0354, Validation loss 2.4949
Epoch 2500, Training loss 3.0354, Validation loss 2.4949
Epoch 3000, Training loss 3.0354, Validation loss 2.4949
Epoch 3500, Training loss 3.0354, Validation loss 2.4949
Epoch 4000, Training loss 3.0354, Validation loss 2.4949
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
Rprop alpha 0.001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.5694, Validation loss 56.4565
Epoch 3, Training loss 85.4663, Validation loss 56.3388
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Epoch 1000, Training loss 3.0354, Validation loss 2.4949
Epoch 1500, Training loss 3.0354, Validation loss 2.4949
Epoch 2000, Training loss 3.0354, Validation loss 2.4949
Epoch 2500, Training loss 3.0354, Validation loss 2.4949
Epoch 3000, Training loss 3.0354, Validation loss 2.4949
Epoch 3500, Training loss 3.0354, Validation loss 2.4949
Epoch 4000, Training loss 3.0354, Validation loss 2.4949
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
Rprop alpha 0.01 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.7817, Validation loss 54.4159
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Rprop alpha 0.01 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.7817, Validation loss 54.4159
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Epoch 1000, Training loss 3.0354, Validation loss 2.4949
Epoch 1500, Training loss 3.0354, Validation loss 2.4949
Epoch 2000, Training loss 3.0354, Validation loss 2.4949
Epoch 2500, Training loss 3.0354, Validation loss 2.4949
Epoch 3000, Training loss 3.0354, Validation loss 2.4949
Epoch 3500, Training loss 3.0354, Validation loss 2.4949
Epoch 4000, Training loss 3.0354, Validation loss 2.4949
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
Rprop alpha 0.01 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 84.7989, Validation loss 55.5771
Epoch 3, Training loss 83.7817, Validation loss 54.4159
Epoch 500, Training loss 3.0354, Validation loss 2.4949
Epoch 1000, Training loss 3.0354, Validation loss 2.4949
Epoch 1500, Training loss 3.0354, Validation loss 2.4949
Epoch 2000, Training loss 3.0354, Validation loss 2.4949
Epoch 2500, Training loss 3.0354, Validation loss 2.4949
Epoch 3000, Training loss 3.0354, Validation loss 2.4949
Epoch 3500, Training loss 3.0354, Validation loss 2.4949
Epoch 4000, Training loss 3.0354, Validation loss 2.4949
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
SGD alpha 0.0001 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.0659, Validation loss 55.9043
Epoch 3, Training loss 84.4833, Validation loss 55.2618
Epoch 500, Training loss 35.2186, Validation loss 3.2051
SGD alpha 0.0001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.0659, Validation loss 55.9043
Epoch 3, Training loss 84.4833, Validation loss 55.2618
Epoch 500, Training loss 35.2186, Validation loss 3.2051
Epoch 1000, Training loss 34.4265, Validation loss 3.2288
Epoch 1500, Training loss 33.7817, Validation loss 3.2290
Epoch 2000, Training loss 33.1505, Validation loss 3.2205
Epoch 2500, Training loss 32.5322, Validation loss 3.2116
Epoch 3000, Training loss 31.9266, Validation loss 3.2028
Epoch 3500, Training loss 31.3335, Validation loss 3.1942
Epoch 4000, Training loss 30.7525, Validation loss 3.1856
Epoch 4500, Training loss 30.1835, Validation loss 3.1772
Epoch 5000, Training loss 29.6262, Validation loss 3.1689
SGD alpha 0.0001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 85.0659, Validation loss 55.9043
Epoch 3, Training loss 84.4833, Validation loss 55.2618
Epoch 500, Training loss 35.2186, Validation loss 3.2051
Epoch 1000, Training loss 34.4265, Validation loss 3.2288
Epoch 1500, Training loss 33.7817, Validation loss 3.2290
Epoch 2000, Training loss 33.1505, Validation loss 3.2205
Epoch 2500, Training loss 32.5322, Validation loss 3.2116
Epoch 3000, Training loss 31.9266, Validation loss 3.2028
Epoch 3500, Training loss 31.3335, Validation loss 3.1942
Epoch 4000, Training loss 30.7525, Validation loss 3.1856
Epoch 4500, Training loss 30.1835, Validation loss 3.1772
Epoch 5000, Training loss 29.6262, Validation loss 3.1689
SGD alpha 0.001 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 79.9171, Validation loss 50.2310
Epoch 3, Training loss 74.8356, Validation loss 44.6451
Epoch 500, Training loss 29.6356, Validation loss 3.1691
SGD alpha 0.001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 79.9171, Validation loss 50.2310
Epoch 3, Training loss 74.8356, Validation loss 44.6451
Epoch 500, Training loss 29.6356, Validation loss 3.1691
Epoch 1000, Training loss 24.6520, Validation loss 3.0919
Epoch 1500, Training loss 20.6021, Validation loss 3.0243
Epoch 2000, Training loss 17.3109, Validation loss 2.9650
Epoch 2500, Training loss 14.6364, Validation loss 2.9129
Epoch 3000, Training loss 12.4629, Validation loss 2.8670
Epoch 3500, Training loss 10.6966, Validation loss 2.8266
Epoch 4000, Training loss 9.2613, Validation loss 2.7908
Epoch 4500, Training loss 8.0948, Validation loss 2.7591
Epoch 5000, Training loss 7.1469, Validation loss 2.7310
SGD alpha 0.001 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 79.9171, Validation loss 50.2310
Epoch 3, Training loss 74.8356, Validation loss 44.6451
Epoch 500, Training loss 29.6356, Validation loss 3.1691
Epoch 1000, Training loss 24.6520, Validation loss 3.0919
Epoch 1500, Training loss 20.6021, Validation loss 3.0243
Epoch 2000, Training loss 17.3109, Validation loss 2.9650
Epoch 2500, Training loss 14.6364, Validation loss 2.9129
Epoch 3000, Training loss 12.4629, Validation loss 2.8670
Epoch 3500, Training loss 10.6966, Validation loss 2.8266
Epoch 4000, Training loss 9.2613, Validation loss 2.7908
Epoch 4500, Training loss 8.0948, Validation loss 2.7591
Epoch 5000, Training loss 7.1469, Validation loss 2.7310
SGD alpha 0.01 epochs 500
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 43.9632, Validation loss 11.2589
Epoch 3, Training loss 36.8792, Validation loss 4.2194
Epoch 500, Training loss 7.1544, Validation loss 2.7312
SGD alpha 0.01 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 43.9632, Validation loss 11.2589
Epoch 3, Training loss 36.8792, Validation loss 4.2194
Epoch 500, Training loss 7.1544, Validation loss 2.7312
Epoch 1000, Training loss 3.5517, Validation loss 2.5743
Epoch 1500, Training loss 3.1001, Validation loss 2.5225
Epoch 2000, Training loss 3.0435, Validation loss 2.5046
Epoch 2500, Training loss 3.0364, Validation loss 2.4983
Epoch 3000, Training loss 3.0355, Validation loss 2.4961
Epoch 3500, Training loss 3.0354, Validation loss 2.4953
Epoch 4000, Training loss 3.0354, Validation loss 2.4950
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
SGD alpha 0.01 epochs 5000
Epoch 1, Training loss 85.6554, Validation loss 56.5547
Epoch 2, Training loss 43.9632, Validation loss 11.2589
Epoch 3, Training loss 36.8792, Validation loss 4.2194
Epoch 500, Training loss 7.1544, Validation loss 2.7312
Epoch 1000, Training loss 3.5517, Validation loss 2.5743
Epoch 1500, Training loss 3.1001, Validation loss 2.5225
Epoch 2000, Training loss 3.0435, Validation loss 2.5046
Epoch 2500, Training loss 3.0364, Validation loss 2.4983
Epoch 3000, Training loss 3.0355, Validation loss 2.4961
Epoch 3500, Training loss 3.0354, Validation loss 2.4953
Epoch 4000, Training loss 3.0354, Validation loss 2.4950
Epoch 4500, Training loss 3.0354, Validation loss 2.4949
Epoch 5000, Training loss 3.0354, Validation loss 2.4949
df = pd.DataFrame(results)
df
.dataframe tbody tr th {
vertical-align: top;
}

text-align: right;
}

sorting our dataframe by val_loss in ascending order to see who performed the best

df = df.sort_values(by=["val_loss"])
df
.dataframe tbody tr th {
vertical-align: top;
}

text-align: right;
}

def show_values_on_bars(axs):
# from https://stackoverflow.com/a/51535326
def _show_on_single_plot(ax):
for p in ax.patches:
_x = p.get_x() + p.get_width() / 2
_y = p.get_y() + p.get_height()
value = '{:.2f}'.format(p.get_height())
ax.text(_x, _y, value, ha="center")

if isinstance(axs, np.ndarray):
for idx, ax in np.ndenumerate(axs):
_show_on_single_plot(ax)
else:
_show_on_single_plot(axs)

## Visualizing Loss Over Time#

val_loss_over_time_by_name
{'ASGD alpha 0.0001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  55.904346
2        2  55.261829
3        3  54.627026
4        4  53.999836
..     ...        ...
495    495   3.207940
496    496   3.207206
497    497   3.206488
498    498   3.205786
499    499   3.205099

[500 rows x 2 columns],
'ASGD alpha 0.0001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  55.904346
2         2  55.261829
3         3  54.627026
4         4  53.999836
...     ...        ...
4995   4995   3.168985
4996   4996   3.168968
4997   4997   3.168952
4998   4998   3.168935
4999   4999   3.168919

[5000 rows x 2 columns],
'ASGD alpha 0.001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  50.230995
2        2  44.645107
3        3  39.711609
4        4  35.354980
..     ...        ...
495    495   3.169730
496    496   3.169564
497    497   3.169399
498    498   3.169234
499    499   3.169067

[500 rows x 2 columns],
'ASGD alpha 0.001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  50.230995
2         2  44.645107
3         3  39.711609
4         4  35.354980
...     ...        ...
4995   4995   2.731359
4996   4996   2.731308
4997   4997   2.731251
4998   4998   2.731201
4999   4999   2.731149

[5000 rows x 2 columns],
'ASGD alpha 0.01 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  11.258911
2        2   4.219460
3        3   3.260551
4        4   3.188705
..     ...        ...
495    495   2.733504
496    496   2.732972
497    497   2.732441
498    498   2.731906
499    499   2.731377

[500 rows x 2 columns],
'ASGD alpha 0.01 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  11.258911
2         2   4.219460
3         3   3.260551
4         4   3.188705
...     ...        ...
4995   4995   2.495191
4996   4996   2.495189
4997   4997   2.495189
4998   4998   2.495187
4999   4999   2.495189

[5000 rows x 2 columns],
0        0  56.554653
1        1  56.554615
2        2  56.554577
3        3  56.554543
4        4  56.554512
..     ...        ...
495    495  56.526024
496    496  56.525955
497    497  56.525871
498    498  56.525791
499    499  56.525707

[500 rows x 2 columns],
0         0  56.554653
1         1  56.554615
2         2  56.554577
3         3  56.554543
4         4  56.554512
...     ...        ...
4995   4995  55.804359
4996   4996  55.804153
4997   4997  55.803925
4998   4998  55.803707
4999   4999  55.803486

[5000 rows x 2 columns],
0        0  56.554653
1        1  56.554329
2        2  56.554020
3        3  56.553696
4        4  56.553364
..     ...        ...
495    495  56.267933
496    496  56.267162
497    497  56.266396
498    498  56.265621
499    499  56.264843

[500 rows x 2 columns],
0         0  56.554653
1         1  56.554329
2         2  56.554020
3         3  56.553696
4         4  56.553364
...     ...        ...
4995   4995  49.416130
4996   4996  49.414135
4997   4997  49.412148
4998   4998  49.410164
4999   4999  49.408180

[5000 rows x 2 columns],
0        0  56.554653
1        1  56.551548
2        2  56.548363
3        3  56.545120
4        4  56.541828
..     ...        ...
495    495  53.743576
496    496  53.736214
497    497  53.728844
498    498  53.721470
499    499  53.714096

[500 rows x 2 columns],
0         0  56.554653
1         1  56.551548
2         2  56.548363
3         3  56.545120
4         4  56.541828
...     ...        ...
4995   4995  12.848351
4996   4996  12.842356
4997   4997  12.836356
4998   4998  12.830364
4999   4999  12.824373

[5000 rows x 2 columns],
0        0  56.554653
1        1  56.544823
2        2  56.537872
3        3  56.532219
4        4  56.527313
..     ...        ...
495    495  56.132912
496    496  56.132469
497    497  56.132030
498    498  56.131580
499    499  56.131142

[500 rows x 2 columns],
0         0  56.554653
1         1  56.544823
2         2  56.537872
3         3  56.532219
4         4  56.527313
...     ...        ...
4995   4995  55.192818
4996   4996  55.192669
4997   4997  55.192543
4998   4998  55.192398
4999   4999  55.192265

[5000 rows x 2 columns],
0        0  56.554653
1        1  56.456478
2        2  56.387157
3        3  56.330597
4        4  56.281654
..     ...        ...
495    495  52.438919
496    496  52.434738
497    497  52.430573
498    498  52.426403
499    499  52.422249

[500 rows x 2 columns],
0         0  56.554653
1         1  56.456478
2         2  56.387157
3         3  56.330597
4         4  56.281654
...     ...        ...
4995   4995  43.979683
4996   4996  43.978531
4997   4997  43.977379
4998   4998  43.976227
4999   4999  43.975075

[5000 rows x 2 columns],
0        0  56.554653
1        1  55.577057
2        2  54.894497
3        3  54.342110
4        4  53.867180
..     ...        ...
495    495  24.773571
496    496  24.750305
497    497  24.727089
498    498  24.703909
499    499  24.680784

[500 rows x 2 columns],
0         0  56.554653
1         1  55.577057
2         2  54.894497
3         3  54.342110
4         4  53.867180
...     ...        ...
4995   4995   4.227723
4996   4996   4.227191
4997   4997   4.226661
4998   4998   4.226130
4999   4999   4.225605

[5000 rows x 2 columns],
'Adam alpha 0.0001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.544823
2        2  56.535011
3        3  56.525185
4        4  56.515358
..     ...        ...
495    495  51.851482
496    496  51.842304
497    497  51.833126
498    498  51.823948
499    499  51.814774

[500 rows x 2 columns],
'Adam alpha 0.0001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.544823
2         2  56.535011
3         3  56.525185
4         4  56.515358
...     ...        ...
4995   4995  20.871820
4996   4996  20.866917
4997   4997  20.862013
4998   4998  20.857115
4999   4999  20.852211

[5000 rows x 2 columns],
'Adam alpha 0.001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.456478
2        2  56.358410
3        3  56.260429
4        4  56.162548
..     ...        ...
495    495  22.123474
496    496  22.078865
497    497  22.034321
498    498  21.989882
499    499  21.945518

[500 rows x 2 columns],
'Adam alpha 0.001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.456478
2         2  56.358410
3         3  56.260429
4         4  56.162548
...     ...        ...
4995   4995   3.073970
4996   4996   3.073947
4997   4997   3.073922
4998   4998   3.073897
4999   4999   3.073874

[5000 rows x 2 columns],
'Adam alpha 0.01 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  55.577057
2        2  54.608772
3        3  53.649937
4        4  52.700714
..     ...        ...
495    495   3.063385
496    496   3.062908
497    497   3.062427
498    498   3.061947
499    499   3.061470

[500 rows x 2 columns],
'Adam alpha 0.01 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  55.577057
2         2  54.608772
3         3  53.649937
4         4  52.700714
...     ...        ...
4995   4995   2.494981
4996   4996   2.494981
4997   4997   2.494981
4998   4998   2.494981
4999   4999   2.494979

[5000 rows x 2 columns],
'AdamW alpha 0.0001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.544907
2        2  56.535187
3        3  56.525459
4        4  56.515717
..     ...        ...
495    495  51.893639
496    496  51.884533
497    497  51.875420
498    498  51.866318
499    499  51.857204

[500 rows x 2 columns],
'AdamW alpha 0.0001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.544907
2         2  56.535187
3         3  56.525459
4         4  56.515717
...     ...        ...
4995   4995  21.153263
4996   4996  21.148376
4997   4997  21.143486
4998   4998  21.138601
4999   4999  21.133724

[5000 rows x 2 columns],
'AdamW alpha 0.001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.457317
2        2  56.360092
3        3  56.262943
4        4  56.165882
..     ...        ...
495    495  22.396433
496    496  22.352057
497    497  22.307760
498    498  22.263554
499    499  22.219418

[500 rows x 2 columns],
'AdamW alpha 0.001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.457317
2         2  56.360092
3         3  56.262943
4         4  56.165882
...     ...        ...
4995   4995   3.069065
4996   4996   3.069041
4997   4997   3.069020
4998   4998   3.068998
4999   4999   3.068979

[5000 rows x 2 columns],
'AdamW alpha 0.01 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  55.585354
2        2  54.625298
3        3  53.674629
4        4  52.733505
..     ...        ...
495    495   3.070481
496    496   3.069960
497    497   3.069438
498    498   3.068917
499    499   3.068398

[500 rows x 2 columns],
'AdamW alpha 0.01 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  55.585354
2         2  54.625298
3         3  53.674629
4         4  52.733505
...     ...        ...
4995   4995   2.505894
4996   4996   2.505887
4997   4997   2.505881
4998   4998   2.505876
4999   4999   2.505870

[5000 rows x 2 columns],
'Adamax alpha 0.0001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.544823
2        2  56.535011
3        3  56.525185
4        4  56.515358
..     ...        ...
495    495  51.798332
496    496  51.788948
497    497  51.779568
498    498  51.770184
499    499  51.760803

[500 rows x 2 columns],
'Adamax alpha 0.0001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.544823
2         2  56.535011
3         3  56.525185
4         4  56.515358
...     ...        ...
4995   4995  18.747269
4996   4996  18.741966
4997   4997  18.736656
4998   4998  18.731358
4999   4999  18.726057

[5000 rows x 2 columns],
'Adamax alpha 0.001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.456478
2        2  56.358368
3        3  56.260292
4        4  56.162277
..     ...        ...
495    495  19.641920
496    496  19.592678
497    497  19.543533
498    498  19.494484
499    499  19.445547

[500 rows x 2 columns],
'Adamax alpha 0.001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.456478
2         2  56.358368
3         3  56.260292
4         4  56.162277
...     ...        ...
4995   4995   3.110403
4996   4996   3.110355
4997   4997   3.110315
4998   4998   3.110270
4999   4999   3.110229

[5000 rows x 2 columns],
'Adamax alpha 0.01 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  55.577057
2        2  54.612324
3        3  53.660469
4        4  52.721535
..     ...        ...
495    495   3.156959
496    496   3.156623
497    497   3.156288
498    498   3.155955
499    499   3.155621

[500 rows x 2 columns],
'Adamax alpha 0.01 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  55.577057
2         2  54.612324
3         3  53.660469
4         4  52.721535
...     ...        ...
4995   4995   2.494895
4996   4996   2.494895
4997   4997   2.494895
4998   4998   2.494895
4999   4999   2.494895

[5000 rows x 2 columns],
'RMSprop alpha 0.0001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.456478
2        2  56.386971
3        3  56.330135
4        4  56.280827
..     ...        ...
495    495  50.691227
496    496  50.682026
497    497  50.672829
498    498  50.663620
499    499  50.654415

[500 rows x 2 columns],
'RMSprop alpha 0.0001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.456478
2         2  56.386971
3         3  56.330135
4         4  56.280827
...     ...        ...
4995   4995  18.459406
4996   4996  18.454260
4997   4997  18.449104
4998   4998  18.443958
4999   4999  18.438801

[5000 rows x 2 columns],
'RMSprop alpha 0.001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  55.577057
2        2  54.892769
3        3  54.337593
4        4  53.859093
..     ...        ...
495    495  15.494940
496    496  15.455171
497    497  15.415486
498    498  15.375866
499    499  15.336336

[500 rows x 2 columns],
'RMSprop alpha 0.001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  55.577057
2         2  54.892769
3         3  54.337593
4         4  53.859093
...     ...        ...
4995   4995   3.102169
4996   4996   3.098152
4997   4997   3.102068
4998   4998   3.098054
4999   4999   3.101970

[5000 rows x 2 columns],
'RMSprop alpha 0.01 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  47.185917
2        2  41.387192
3        3  37.090027
4        4  33.661922
..     ...        ...
495    495   3.038208
496    496   3.037877
497    497   3.037549
498    498   3.037217
499    499   3.036888

[500 rows x 2 columns],
'RMSprop alpha 0.01 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  47.185917
2         2  41.387192
3         3  37.090027
4         4  33.661922
...     ...        ...
4995   4995   2.530038
4996   4996   2.462016
4997   4997   2.530038
4998   4998   2.462016
4999   4999   2.530038

[5000 rows x 2 columns],
'Rprop alpha 0.0001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.544823
2        2  56.533043
3        3  56.518898
4        4  56.501934
..     ...        ...
495    495   2.494897
496    496   2.494901
497    497   2.494901
498    498   2.494897
499    499   2.494897

[500 rows x 2 columns],
'Rprop alpha 0.0001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.544823
2         2  56.533043
3         3  56.518898
4         4  56.501934
...     ...        ...
4995   4995   2.494897
4996   4996   2.494901
4997   4997   2.494901
4998   4998   2.494897
4999   4999   2.494897

[5000 rows x 2 columns],
'Rprop alpha 0.001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  56.456478
2        2  56.338799
3        3  56.197750
4        4  56.028740
..     ...        ...
495    495   2.494897
496    496   2.494889
497    497   2.494889
498    498   2.494897
499    499   2.494897

[500 rows x 2 columns],
'Rprop alpha 0.001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  56.456478
2         2  56.338799
3         3  56.197750
4         4  56.028740
...     ...        ...
4995   4995   2.494897
4996   4996   2.494889
4997   4997   2.494889
4998   4998   2.494897
4999   4999   2.494897

[5000 rows x 2 columns],
'Rprop alpha 0.01 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  55.577057
2        2  54.415916
3        3  53.039719
4        4  51.413044
..     ...        ...
495    495   2.494901
496    496   2.494901
497    497   2.494893
498    498   2.494893
499    499   2.494901

[500 rows x 2 columns],
'Rprop alpha 0.01 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  55.577057
2         2  54.415916
3         3  53.039719
4         4  51.413044
...     ...        ...
4995   4995   2.494901
4996   4996   2.494901
4997   4997   2.494893
4998   4998   2.494893
4999   4999   2.494901

[5000 rows x 2 columns],
'SGD alpha 0.0001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  55.904346
2        2  55.261829
3        3  54.627026
4        4  53.999836
..     ...        ...
495    495   3.207938
496    496   3.207205
497    497   3.206487
498    498   3.205786
499    499   3.205097

[500 rows x 2 columns],
'SGD alpha 0.0001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  55.904346
2         2  55.261829
3         3  54.627026
4         4  53.999836
...     ...        ...
4995   4995   3.168984
4996   4996   3.168968
4997   4997   3.168951
4998   4998   3.168934
4999   4999   3.168918

[5000 rows x 2 columns],
'SGD alpha 0.001 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  50.230991
2        2  44.645096
3        3  39.711586
4        4  35.354950
..     ...        ...
495    495   3.169732
496    496   3.169568
497    497   3.169403
498    498   3.169238
499    499   3.169075

[500 rows x 2 columns],
'SGD alpha 0.001 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  50.230991
2         2  44.645096
3         3  39.711586
4         4  35.354950
...     ...        ...
4995   4995   2.731227
4996   4996   2.731175
4997   4997   2.731124
4998   4998   2.731071
4999   4999   2.731016

[5000 rows x 2 columns],
'SGD alpha 0.01 epochs 500':      epoch   val_loss
0        0  56.554653
1        1  11.258880
2        2   4.219429
3        3   3.260540
4        4   3.188704
..     ...        ...
495    495   2.733375
496    496   2.732840
497    497   2.732311
498    498   2.731781
499    499   2.731246

[500 rows x 2 columns],
'SGD alpha 0.01 epochs 5000':       epoch   val_loss
0         0  56.554653
1         1  11.258880
2         2   4.219429
3         3   3.260540
4         4   3.188704
...     ...        ...
4995   4995   2.494914
4996   4996   2.494912
4997   4997   2.494912
4998   4998   2.494914
4999   4999   2.494914

[5000 rows x 2 columns]}
#experiment_name = "Adamax alpha 0.01 epochs 5000"