Configurable Convolutional Neural Network

ConfigurableConvolutionalNetwork

Convolutional Network Framework in Python

To test configurable CNN network on CIFAR10. Based on class project

In [1]:
# Setup environment
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.convnetGenerator import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y)/ (np.maximum(1e-8, np.abs(x) + np.abs(y))))
In [2]:
# Load preprocessed CIFAR10 image
data = get_CIFAR10_data()
for k, v in data.iteritems():
    print '%s: ' %k, v.shape
X_val:  (1000, 3, 32, 32)
X_train:  (49000, 3, 32, 32)
X_test:  (1000, 3, 32, 32)
y_val:  (1000,)
y_train:  (49000,)
y_test:  (1000,)
In [3]:
# Load convnetGenerator
from cs231n.classifiers.convnet import *
In [4]:
# Sanity check on loss value
layers = [
    ['conv', {'num_of_filters':5, 'filter_size':3}],
    ['spatial_batchnorm'],
    ['relu'],
    ['dropout'],
    ['pool'],
    ['affine', (20)],
    ['batchnorm'],
    ['affine', (10)],
    ['relu'],
    ['dropout']
]

N = 10
X = np.random.randn(N, 3, 4, 4)
y = np.random.randint(10, size=N)

model = convnetGenerator(input_dim=(3, 4, 4), 
                         layers=layers, verbose=False, 
                         weight_scale=1e-1, seed=0, 
                         dtype=np.float64)
loss, grads = model.loss(X, y)
print 'Initial loss (without regularization): ', loss

for param_name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6)
    e = rel_error(param_grad_num, grads[param_name])
    print '%s max relative error: %e' % (param_name, e)
    
model.reg = 1
loss, grads = model.loss(X, y)
print 'Initial loss (with regularization): ', loss

for param_name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6)
    e = rel_error(param_grad_num, grads[param_name])
    print '%s max relative error: %e' % (param_name, e)
Initial loss (without regularization):  2.48318674289
W0 max relative error: 9.883919e-08
W10 max relative error: 1.759215e-08
W5 max relative error: 6.472835e-07
W7 max relative error: 2.026246e-07
b0 max relative error: 1.019150e-09
b10 max relative error: 3.745815e-08
b5 max relative error: 7.806256e-10
b7 max relative error: 1.484453e-08
beta1 max relative error: 7.822434e-09
beta6 max relative error: 1.102359e-07
gamma1 max relative error: 1.011301e-08
gamma6 max relative error: 6.094902e-08
Initial loss (with regularization):  6.86712538153
W0 max relative error: 2.300431e-07
W10 max relative error: 1.047184e-06
W5 max relative error: 2.451635e-05
W7 max relative error: 1.070334e-07
b0 max relative error: 1.019150e-09
b10 max relative error: 6.404624e-08
b5 max relative error: 7.806256e-10
b7 max relative error: 7.096623e-09
beta1 max relative error: 2.393115e-08
beta6 max relative error: 1.102359e-07
gamma1 max relative error: 2.030903e-08
gamma6 max relative error: 6.094902e-08
In [5]:
# Test three layer model clone
# Overfit small data
num_train = 100
small_data = {
    'X_train': data['X_train'][:num_train],
    'y_train': data['y_train'][:num_train],
    'X_val': data['X_val'],
    'y_val': data['y_val'],
}

layers = [
    ['conv', {'num_of_filters':32, 'filter_size':7}],
    ['relu'],
    ['pool'],
    ['affine', (100)],
    ['relu']
]
model = convnetGenerator(layers=layers, 
                         verbose=False, 
                         weight_scale=1e-2)

solver = Solver(model, small_data,
               num_epochs=10, batch_size=50,
               update_rule='adam',
               optim_config={
                    'learning_rate': 1e-3
               },
                verbose=True, print_every=1)
solver.train()
(Iteration 1 / 20) loss: 2.501378
(Epoch 0 / 10) train acc: 0.140000; val_acc: 0.079000
(Iteration 2 / 20) loss: 4.735779
(Epoch 1 / 10) train acc: 0.110000; val_acc: 0.107000
(Iteration 3 / 20) loss: 2.662372
(Iteration 4 / 20) loss: 2.312295
(Epoch 2 / 10) train acc: 0.280000; val_acc: 0.142000
(Iteration 5 / 20) loss: 2.336332
(Iteration 6 / 20) loss: 2.030852
(Epoch 3 / 10) train acc: 0.350000; val_acc: 0.137000
(Iteration 7 / 20) loss: 1.874209
(Iteration 8 / 20) loss: 1.803743
(Epoch 4 / 10) train acc: 0.440000; val_acc: 0.168000
(Iteration 9 / 20) loss: 1.730151
(Iteration 10 / 20) loss: 1.712336
(Epoch 5 / 10) train acc: 0.590000; val_acc: 0.169000
(Iteration 11 / 20) loss: 1.350723
(Iteration 12 / 20) loss: 1.347327
(Epoch 6 / 10) train acc: 0.610000; val_acc: 0.220000
(Iteration 13 / 20) loss: 1.458294
(Iteration 14 / 20) loss: 1.056061
(Epoch 7 / 10) train acc: 0.770000; val_acc: 0.224000
(Iteration 15 / 20) loss: 0.802197
(Iteration 16 / 20) loss: 0.818013
(Epoch 8 / 10) train acc: 0.740000; val_acc: 0.202000
(Iteration 17 / 20) loss: 0.925857
(Iteration 18 / 20) loss: 0.721106
(Epoch 9 / 10) train acc: 0.830000; val_acc: 0.191000
(Iteration 19 / 20) loss: 0.668794
(Iteration 20 / 20) loss: 0.691209
(Epoch 10 / 10) train acc: 0.850000; val_acc: 0.213000
In [6]:
# Plot traint loss, accuracy, validation accuracy
plt.subplot(2, 1, 1)
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')

plt.subplot(2, 1, 2)
plt.plot(solver.train_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()
In [7]:
# Test tree-layer model on full test set
layers = [
    ['conv', {'num_of_filters':32, 'filter_size':7}],
    ['relu'],
    ['pool'],
    ['affine', (500)],
    ['relu']
]
model = convnetGenerator(layers=layers, 
                         verbose=False, 
                         weight_scale=1e-3,
                         reg=0.001)

solver = Solver(model, data,
               num_epochs=1, batch_size=50,
               update_rule='adam',
               optim_config={
                    'learning_rate': 1e-3
               },
                verbose=True, print_every=20)
solver.train()
(Iteration 1 / 980) loss: 2.304609
(Epoch 0 / 1) train acc: 0.118000; val_acc: 0.104000
(Iteration 21 / 980) loss: 2.166419
(Iteration 41 / 980) loss: 1.845205
(Iteration 61 / 980) loss: 1.651621
(Iteration 81 / 980) loss: 1.911876
(Iteration 101 / 980) loss: 1.907545
(Iteration 121 / 980) loss: 1.949062
(Iteration 141 / 980) loss: 1.965104
(Iteration 161 / 980) loss: 1.789602
(Iteration 181 / 980) loss: 1.732072
(Iteration 201 / 980) loss: 1.711157
(Iteration 221 / 980) loss: 1.714963
(Iteration 241 / 980) loss: 1.610209
(Iteration 261 / 980) loss: 1.673627
(Iteration 281 / 980) loss: 2.374440
(Iteration 301 / 980) loss: 1.450597
(Iteration 321 / 980) loss: 1.714960
(Iteration 341 / 980) loss: 1.782738
(Iteration 361 / 980) loss: 1.547322
(Iteration 381 / 980) loss: 1.838150
(Iteration 401 / 980) loss: 1.505023
(Iteration 421 / 980) loss: 1.743893
(Iteration 441 / 980) loss: 1.769522
(Iteration 461 / 980) loss: 1.470960
(Iteration 481 / 980) loss: 1.665716
(Iteration 501 / 980) loss: 1.334806
(Iteration 521 / 980) loss: 1.596020
(Iteration 541 / 980) loss: 1.633129
(Iteration 561 / 980) loss: 1.789139
(Iteration 581 / 980) loss: 1.359481
(Iteration 601 / 980) loss: 1.672585
(Iteration 621 / 980) loss: 1.626993
(Iteration 641 / 980) loss: 1.493078
(Iteration 661 / 980) loss: 1.327803
(Iteration 681 / 980) loss: 1.415701
(Iteration 701 / 980) loss: 1.897976
(Iteration 721 / 980) loss: 1.414025
(Iteration 741 / 980) loss: 1.599150
(Iteration 761 / 980) loss: 1.424620
(Iteration 781 / 980) loss: 1.736903
(Iteration 801 / 980) loss: 1.475317
(Iteration 821 / 980) loss: 1.592283
(Iteration 841 / 980) loss: 1.541136
(Iteration 861 / 980) loss: 1.503937
(Iteration 881 / 980) loss: 1.418334
(Iteration 901 / 980) loss: 1.731127
(Iteration 921 / 980) loss: 1.595903
(Iteration 941 / 980) loss: 1.439913
(Iteration 961 / 980) loss: 1.370036
(Epoch 1 / 1) train acc: 0.501000; val_acc: 0.498000
In [9]:
from cs231n.vis_utils import visualize_grid

grid = visualize_grid(model.params['W0'].transpose(0, 2, 3, 1))
plt.imshow(grid.astype('uint8'))
plt.axis('off')
plt.gcf().set_size_inches(5, 5)
plt.show()
In [10]:
history_7x7 = solver.loss_history
In [11]:
# Try filter size 3x3
layers = [
    ['conv', {'num_of_filters':32, 'filter_size':3}],
    ['relu'],
    ['pool'],
    ['affine', (500)],
    ['relu']
]
model = convnetGenerator(layers=layers, 
                         verbose=False, 
                         weight_scale=1e-3,
                         reg=0.001)

solver = Solver(model, data,
               num_epochs=1, batch_size=50,
               update_rule='adam',
               optim_config={
                    'learning_rate': 1e-3
               },
                verbose=True, print_every=20)
solver.train()
(Iteration 1 / 980) loss: 2.304682
(Epoch 0 / 1) train acc: 0.157000; val_acc: 0.149000
(Iteration 21 / 980) loss: 2.061422
(Iteration 41 / 980) loss: 2.017416
(Iteration 61 / 980) loss: 1.694147
(Iteration 81 / 980) loss: 2.088317
(Iteration 101 / 980) loss: 1.877042
(Iteration 121 / 980) loss: 1.958555
(Iteration 141 / 980) loss: 1.732520
(Iteration 161 / 980) loss: 1.558876
(Iteration 181 / 980) loss: 2.015591
(Iteration 201 / 980) loss: 1.784223
(Iteration 221 / 980) loss: 1.623718
(Iteration 241 / 980) loss: 1.446056
(Iteration 261 / 980) loss: 1.461913
(Iteration 281 / 980) loss: 1.467658
(Iteration 301 / 980) loss: 1.527802
(Iteration 321 / 980) loss: 1.499437
(Iteration 341 / 980) loss: 1.628613
(Iteration 361 / 980) loss: 1.454831
(Iteration 381 / 980) loss: 2.153976
(Iteration 401 / 980) loss: 1.617547
(Iteration 421 / 980) loss: 1.431443
(Iteration 441 / 980) loss: 1.473944
(Iteration 461 / 980) loss: 1.313490
(Iteration 481 / 980) loss: 1.570247
(Iteration 501 / 980) loss: 1.431398
(Iteration 521 / 980) loss: 1.456620
(Iteration 541 / 980) loss: 1.356616
(Iteration 561 / 980) loss: 1.351886
(Iteration 581 / 980) loss: 1.275274
(Iteration 601 / 980) loss: 1.699909
(Iteration 621 / 980) loss: 1.495320
(Iteration 641 / 980) loss: 1.635343
(Iteration 661 / 980) loss: 1.383922
(Iteration 681 / 980) loss: 1.443174
(Iteration 701 / 980) loss: 1.290799
(Iteration 721 / 980) loss: 1.469113
(Iteration 741 / 980) loss: 1.620972
(Iteration 761 / 980) loss: 1.765069
(Iteration 781 / 980) loss: 1.391391
(Iteration 801 / 980) loss: 1.437773
(Iteration 821 / 980) loss: 1.720682
(Iteration 841 / 980) loss: 1.365103
(Iteration 861 / 980) loss: 1.441266
(Iteration 881 / 980) loss: 1.355539
(Iteration 901 / 980) loss: 1.402413
(Iteration 921 / 980) loss: 1.501388
(Iteration 941 / 980) loss: 1.531445
(Iteration 961 / 980) loss: 1.574137
(Epoch 1 / 1) train acc: 0.541000; val_acc: 0.527000
In [12]:
# plot both runs to compare
plt.plot(history_7x7, 'o')
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['7x7', '3x3'], loc='upper right')
Out[12]:
<matplotlib.legend.Legend at 0x7fe52ea73b10>
In [13]:
# Check number of filters impact
filter_numbers = [8, 16, 32, 64]
stats = []
for f in filter_numbers:
    layers = [
        ['conv', {'num_of_filters':f, 'filter_size':3}],
        ['relu'],
        ['pool'],
        ['affine', (500)],
        ['relu']
    ]
    model = convnetGenerator(layers=layers, 
                         verbose=False, 
                         weight_scale=1e-3,
                         reg=0.001)
    solver = Solver(model, data,
                    num_epochs=1, batch_size=50,
                    update_rule='adam',
                    optim_config={'learning_rate': 1e-3,},
                    verbose=True, print_every=100
                   )
    solver.train()
    stats.append(solver)
(Iteration 1 / 980) loss: 2.303082
(Epoch 0 / 1) train acc: 0.097000; val_acc: 0.107000
(Iteration 101 / 980) loss: 1.638973
(Iteration 201 / 980) loss: 1.593063
(Iteration 301 / 980) loss: 1.578063
(Iteration 401 / 980) loss: 1.630801
(Iteration 501 / 980) loss: 1.706995
(Iteration 601 / 980) loss: 1.738532
(Iteration 701 / 980) loss: 1.297444
(Iteration 801 / 980) loss: 1.598771
(Iteration 901 / 980) loss: 1.449220
(Epoch 1 / 1) train acc: 0.577000; val_acc: 0.520000
(Iteration 1 / 980) loss: 2.303643
(Epoch 0 / 1) train acc: 0.085000; val_acc: 0.107000
(Iteration 101 / 980) loss: 1.790572
(Iteration 201 / 980) loss: 1.545909
(Iteration 301 / 980) loss: 1.655494
(Iteration 401 / 980) loss: 1.378390
(Iteration 501 / 980) loss: 1.348919
(Iteration 601 / 980) loss: 1.805475
(Iteration 701 / 980) loss: 1.394510
(Iteration 801 / 980) loss: 1.295717
(Iteration 901 / 980) loss: 1.317254
(Epoch 1 / 1) train acc: 0.535000; val_acc: 0.515000
(Iteration 1 / 980) loss: 2.304612
(Epoch 0 / 1) train acc: 0.099000; val_acc: 0.107000
(Iteration 101 / 980) loss: 1.746556
(Iteration 201 / 980) loss: 1.641346
(Iteration 301 / 980) loss: 1.832679
(Iteration 401 / 980) loss: 1.675884
(Iteration 501 / 980) loss: 1.827131
(Iteration 601 / 980) loss: 1.553097
(Iteration 701 / 980) loss: 1.720324
(Iteration 801 / 980) loss: 1.384032
(Iteration 901 / 980) loss: 1.171511
(Epoch 1 / 1) train acc: 0.503000; val_acc: 0.530000
(Iteration 1 / 980) loss: 2.306743
(Epoch 0 / 1) train acc: 0.133000; val_acc: 0.142000
(Iteration 101 / 980) loss: 1.793919
(Iteration 201 / 980) loss: 1.847362
(Iteration 301 / 980) loss: 1.582304
(Iteration 401 / 980) loss: 1.630551
(Iteration 501 / 980) loss: 1.517153
(Iteration 601 / 980) loss: 1.656106
(Iteration 701 / 980) loss: 1.705829
(Iteration 801 / 980) loss: 1.708226
(Iteration 901 / 980) loss: 1.591098
(Epoch 1 / 1) train acc: 0.525000; val_acc: 0.524000
In [15]:
# number of filter does not have big impact
print '# train_acc val_acc'
for i, s in enumerate(stats):
  print filter_numbers[i], s.train_acc_history[-1], s.val_acc_history[-1]
# train_acc val_acc
8 0.577 0.52
16 0.535 0.515
32 0.503 0.53
64 0.525 0.524
In [18]:
# Add batch norm and check the impact
layers = [
  ['conv',  {'num_of_filters':f, 'filter_size':3}],
  ['relu'],
  ['pool'],
  ['spatial_batchnorm'], # normalize before affine
  ['affine', (500)],
  ['relu'],
  ['batchnorm'] # for last affine layer
]

model = convnetGenerator(layers=layers, weight_scale=0.001, reg=0.001)

solver = Solver(model, data,
                num_epochs=1, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=100)
solver.train()
(Iteration 1 / 980) loss: 2.309418
(Epoch 0 / 1) train acc: 0.121000; val_acc: 0.118000
(Iteration 101 / 980) loss: 1.681039
(Iteration 201 / 980) loss: 1.709945
(Iteration 301 / 980) loss: 1.535179
(Iteration 401 / 980) loss: 2.062533
(Iteration 501 / 980) loss: 1.524193
(Iteration 601 / 980) loss: 1.458269
(Iteration 701 / 980) loss: 1.832951
(Iteration 801 / 980) loss: 1.978657
(Iteration 901 / 980) loss: 2.207330
(Epoch 1 / 1) train acc: 0.549000; val_acc: 0.550000
In [19]:
plt.plot(stats[2].loss_history, 'o')
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['No BN', 'BatchNorm'], loc='upper right')
Out[19]:
<matplotlib.legend.Legend at 0x7fe52e9c0790>
In [21]:
plt.plot(stats[2].train_acc_history, '-o')
plt.plot(solver.train_acc_history, '-o')
plt.plot(stats[2].val_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['No BN train ACC', 'BatchNorm train ACC', 'No BN val ACC', 'BatchNorm val ACC'], loc='upper right')
Out[21]:
<matplotlib.legend.Legend at 0x7fe52d04a6d0>
In [22]:
# Maybe we didn't run enough training?
# Try more epochs
layers = [
  ['conv',  {'num_of_filters':f, 'filter_size':3}],
  ['relu'],
  ['pool'],
  ['spatial_batchnorm'], # normalize before affine
  ['affine', (500)],
  ['relu'],
  ['batchnorm'] # for last affine layer
]

model = convnetGenerator(layers=layers, weight_scale=0.001, reg=0.001)

solver = Solver(model, data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=100)
solver.train()
(Iteration 1 / 9800) loss: 2.312011
(Epoch 0 / 10) train acc: 0.139000; val_acc: 0.141000
(Iteration 101 / 9800) loss: 1.800229
(Iteration 201 / 9800) loss: 1.651026
(Iteration 301 / 9800) loss: 1.893823
(Iteration 401 / 9800) loss: 1.465016
(Iteration 501 / 9800) loss: 1.716302
(Iteration 601 / 9800) loss: 1.418364
(Iteration 701 / 9800) loss: 1.624111
(Iteration 801 / 9800) loss: 1.590870
(Iteration 901 / 9800) loss: 1.660085
(Epoch 1 / 10) train acc: 0.578000; val_acc: 0.528000
(Iteration 1001 / 9800) loss: 1.756186
(Iteration 1101 / 9800) loss: 1.865406
(Iteration 1201 / 9800) loss: 1.547603
(Iteration 1301 / 9800) loss: 1.560186
(Iteration 1401 / 9800) loss: 1.582535
(Iteration 1501 / 9800) loss: 1.724063
(Iteration 1601 / 9800) loss: 1.365690
(Iteration 1701 / 9800) loss: 1.596534
(Iteration 1801 / 9800) loss: 1.445190
(Iteration 1901 / 9800) loss: 1.531936
(Epoch 2 / 10) train acc: 0.644000; val_acc: 0.605000
(Iteration 2001 / 9800) loss: 1.611134
(Iteration 2101 / 9800) loss: 1.374546
(Iteration 2201 / 9800) loss: 1.486211
(Iteration 2301 / 9800) loss: 1.184432
(Iteration 2401 / 9800) loss: 1.462766
(Iteration 2501 / 9800) loss: 1.394436
(Iteration 2601 / 9800) loss: 1.385071
(Iteration 2701 / 9800) loss: 1.508776
(Iteration 2801 / 9800) loss: 1.769226
(Iteration 2901 / 9800) loss: 1.571395
(Epoch 3 / 10) train acc: 0.683000; val_acc: 0.624000
(Iteration 3001 / 9800) loss: 1.558630
(Iteration 3101 / 9800) loss: 1.433534
(Iteration 3201 / 9800) loss: 1.381232
(Iteration 3301 / 9800) loss: 1.243681
(Iteration 3401 / 9800) loss: 1.483883
(Iteration 3501 / 9800) loss: 1.467480
(Iteration 3601 / 9800) loss: 1.242238
(Iteration 3701 / 9800) loss: 1.526186
(Iteration 3801 / 9800) loss: 1.397907
(Iteration 3901 / 9800) loss: 1.572212
(Epoch 4 / 10) train acc: 0.708000; val_acc: 0.620000
(Iteration 4001 / 9800) loss: 1.283835
(Iteration 4101 / 9800) loss: 1.384965
(Iteration 4201 / 9800) loss: 1.177130
(Iteration 4301 / 9800) loss: 1.633236
(Iteration 4401 / 9800) loss: 1.355671
(Iteration 4501 / 9800) loss: 1.250904
(Iteration 4601 / 9800) loss: 1.174688
(Iteration 4701 / 9800) loss: 1.523482
(Iteration 4801 / 9800) loss: 1.478957
(Epoch 5 / 10) train acc: 0.769000; val_acc: 0.648000
(Iteration 4901 / 9800) loss: 0.954473
(Iteration 5001 / 9800) loss: 1.268360
(Iteration 5101 / 9800) loss: 1.401886
(Iteration 5201 / 9800) loss: 1.410911
(Iteration 5301 / 9800) loss: 1.145333
(Iteration 5401 / 9800) loss: 1.247879
(Iteration 5501 / 9800) loss: 1.419671
(Iteration 5601 / 9800) loss: 0.891581
(Iteration 5701 / 9800) loss: 1.556425
(Iteration 5801 / 9800) loss: 1.352331
(Epoch 6 / 10) train acc: 0.758000; val_acc: 0.664000
(Iteration 5901 / 9800) loss: 1.160467
(Iteration 6001 / 9800) loss: 1.053299
(Iteration 6101 / 9800) loss: 1.297573
(Iteration 6201 / 9800) loss: 1.108562
(Iteration 6301 / 9800) loss: 0.976720
(Iteration 6401 / 9800) loss: 1.025000
(Iteration 6501 / 9800) loss: 0.972647
(Iteration 6601 / 9800) loss: 1.125813
(Iteration 6701 / 9800) loss: 0.997228
(Iteration 6801 / 9800) loss: 1.126960
(Epoch 7 / 10) train acc: 0.773000; val_acc: 0.648000
(Iteration 6901 / 9800) loss: 1.148099
(Iteration 7001 / 9800) loss: 1.062083
(Iteration 7101 / 9800) loss: 0.988627
(Iteration 7201 / 9800) loss: 0.785474
(Iteration 7301 / 9800) loss: 1.020244
(Iteration 7401 / 9800) loss: 0.918925
(Iteration 7501 / 9800) loss: 1.197352
(Iteration 7601 / 9800) loss: 0.964436
(Iteration 7701 / 9800) loss: 1.401961
(Iteration 7801 / 9800) loss: 1.145336
(Epoch 8 / 10) train acc: 0.762000; val_acc: 0.631000
(Iteration 7901 / 9800) loss: 1.210087
(Iteration 8001 / 9800) loss: 1.096513
(Iteration 8101 / 9800) loss: 1.077458
(Iteration 8201 / 9800) loss: 0.918693
(Iteration 8301 / 9800) loss: 1.164165
(Iteration 8401 / 9800) loss: 1.050552
(Iteration 8501 / 9800) loss: 0.906412
(Iteration 8601 / 9800) loss: 1.255938
(Iteration 8701 / 9800) loss: 0.979160
(Iteration 8801 / 9800) loss: 0.909903
(Epoch 9 / 10) train acc: 0.775000; val_acc: 0.654000
(Iteration 8901 / 9800) loss: 1.469478
(Iteration 9001 / 9800) loss: 1.028888
(Iteration 9101 / 9800) loss: 1.071493
(Iteration 9201 / 9800) loss: 1.057363
(Iteration 9301 / 9800) loss: 1.126383
(Iteration 9401 / 9800) loss: 1.048362
(Iteration 9501 / 9800) loss: 0.820006
(Iteration 9601 / 9800) loss: 0.932375
(Iteration 9701 / 9800) loss: 1.136240
(Epoch 10 / 10) train acc: 0.786000; val_acc: 0.680000
In [24]:
plt.plot(stats[2].train_acc_history, '-o')
plt.plot(solver.train_acc_history, '-o')
plt.plot(stats[2].val_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['No BN train ACC', 'BatchNorm train ACC', 'No BN val ACC', 'BatchNorm val ACC'], loc='lower right')
Out[24]:
<matplotlib.legend.Legend at 0x7fe52ce46e90>
In [25]:
y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
print 'Validation set accuracy: ', (y_val_pred == data['y_val']).mean()
print 'Test set accuracy: ', (y_test_pred == data['y_test']).mean()
Validation set accuracy:  0.68
Test set accuracy:  0.665
In [26]:
# Remove max pool layer
# Try more epochs
layers = [
  ['conv',  {'num_of_filters':f, 'filter_size':3}],
  ['relu'],
  ['spatial_batchnorm'], # normalize before affine
  ['affine', (500)],
  ['relu'],
  ['batchnorm'] # for last affine layer
]

model = convnetGenerator(layers=layers, weight_scale=0.001, reg=0.001)

solver = Solver(model, data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=100)
solver.train()
(Iteration 1 / 9800) loss: 2.324754
(Epoch 0 / 10) train acc: 0.095000; val_acc: 0.123000
(Iteration 101 / 9800) loss: 1.896241
(Iteration 201 / 9800) loss: 1.805495
(Iteration 301 / 9800) loss: 2.008959
(Iteration 401 / 9800) loss: 1.973951
(Iteration 501 / 9800) loss: 1.878531
(Iteration 601 / 9800) loss: 2.170256
(Iteration 701 / 9800) loss: 1.937537
(Iteration 801 / 9800) loss: 2.089812
(Iteration 901 / 9800) loss: 1.766083
(Epoch 1 / 10) train acc: 0.505000; val_acc: 0.488000
(Iteration 1001 / 9800) loss: 1.767214
(Iteration 1101 / 9800) loss: 2.115131
(Iteration 1201 / 9800) loss: 2.088252
(Iteration 1301 / 9800) loss: 1.764972
(Iteration 1401 / 9800) loss: 1.684328
(Iteration 1501 / 9800) loss: 1.849211
(Iteration 1601 / 9800) loss: 1.987896
(Iteration 1701 / 9800) loss: 1.848357
(Iteration 1801 / 9800) loss: 1.791609
(Iteration 1901 / 9800) loss: 1.953224
(Epoch 2 / 10) train acc: 0.500000; val_acc: 0.521000
(Iteration 2001 / 9800) loss: 1.636806
(Iteration 2101 / 9800) loss: 1.675737
(Iteration 2201 / 9800) loss: 1.876629
(Iteration 2301 / 9800) loss: 1.797997
(Iteration 2401 / 9800) loss: 1.711645
(Iteration 2501 / 9800) loss: 1.825393
(Iteration 2601 / 9800) loss: 1.990960
(Iteration 2701 / 9800) loss: 1.666032
(Iteration 2801 / 9800) loss: 1.715085
(Iteration 2901 / 9800) loss: 1.442548
(Epoch 3 / 10) train acc: 0.554000; val_acc: 0.526000
(Iteration 3001 / 9800) loss: 1.674004
(Iteration 3101 / 9800) loss: 1.797069
(Iteration 3201 / 9800) loss: 1.785476
(Iteration 3301 / 9800) loss: 1.701619
(Iteration 3401 / 9800) loss: 1.394719
(Iteration 3501 / 9800) loss: 1.652609
(Iteration 3601 / 9800) loss: 1.669696
(Iteration 3701 / 9800) loss: 1.238229
(Iteration 3801 / 9800) loss: 1.357781
(Iteration 3901 / 9800) loss: 1.622159
(Epoch 4 / 10) train acc: 0.601000; val_acc: 0.552000
(Iteration 4001 / 9800) loss: 1.701807
(Iteration 4101 / 9800) loss: 1.292943
(Iteration 4201 / 9800) loss: 1.388350
(Iteration 4301 / 9800) loss: 1.441849
(Iteration 4401 / 9800) loss: 1.221635
(Iteration 4501 / 9800) loss: 1.574081
(Iteration 4601 / 9800) loss: 1.314446
(Iteration 4701 / 9800) loss: 1.440264
(Iteration 4801 / 9800) loss: 1.329411
(Epoch 5 / 10) train acc: 0.659000; val_acc: 0.607000
(Iteration 4901 / 9800) loss: 1.475479
(Iteration 5001 / 9800) loss: 1.391802
(Iteration 5101 / 9800) loss: 1.221501
(Iteration 5201 / 9800) loss: 1.256809
(Iteration 5301 / 9800) loss: 1.382730
(Iteration 5401 / 9800) loss: 1.371024
(Iteration 5501 / 9800) loss: 1.710086
(Iteration 5601 / 9800) loss: 1.131282
(Iteration 5701 / 9800) loss: 1.577423
(Iteration 5801 / 9800) loss: 1.497820
(Epoch 6 / 10) train acc: 0.710000; val_acc: 0.594000
(Iteration 5901 / 9800) loss: 1.534810
(Iteration 6001 / 9800) loss: 1.306800
(Iteration 6101 / 9800) loss: 1.248945
(Iteration 6201 / 9800) loss: 1.344940
(Iteration 6301 / 9800) loss: 1.697938
(Iteration 6401 / 9800) loss: 1.240161
(Iteration 6501 / 9800) loss: 1.334998
(Iteration 6601 / 9800) loss: 1.067959
(Iteration 6701 / 9800) loss: 1.041611
(Iteration 6801 / 9800) loss: 1.486041
(Epoch 7 / 10) train acc: 0.722000; val_acc: 0.620000
(Iteration 6901 / 9800) loss: 1.136422
(Iteration 7001 / 9800) loss: 1.130785
(Iteration 7101 / 9800) loss: 1.198386
(Iteration 7201 / 9800) loss: 1.403366
(Iteration 7301 / 9800) loss: 1.137794
(Iteration 7401 / 9800) loss: 1.069853
(Iteration 7501 / 9800) loss: 1.220662
(Iteration 7601 / 9800) loss: 1.098863
(Iteration 7701 / 9800) loss: 1.360739
(Iteration 7801 / 9800) loss: 1.190277
(Epoch 8 / 10) train acc: 0.764000; val_acc: 0.630000
(Iteration 7901 / 9800) loss: 1.096901
(Iteration 8001 / 9800) loss: 1.450723
(Iteration 8101 / 9800) loss: 1.169444
(Iteration 8201 / 9800) loss: 1.306434
(Iteration 8301 / 9800) loss: 1.085921
(Iteration 8401 / 9800) loss: 1.113912
(Iteration 8501 / 9800) loss: 1.243758
(Iteration 8601 / 9800) loss: 1.303660
(Iteration 8701 / 9800) loss: 0.966121
(Iteration 8801 / 9800) loss: 1.269276
(Epoch 9 / 10) train acc: 0.763000; val_acc: 0.605000
(Iteration 8901 / 9800) loss: 0.956320
(Iteration 9001 / 9800) loss: 1.402196
(Iteration 9101 / 9800) loss: 0.896963
(Iteration 9201 / 9800) loss: 0.923713
(Iteration 9301 / 9800) loss: 0.928696
(Iteration 9401 / 9800) loss: 1.179554
(Iteration 9501 / 9800) loss: 1.292880
(Iteration 9601 / 9800) loss: 1.021084
(Iteration 9701 / 9800) loss: 1.149757
(Epoch 10 / 10) train acc: 0.791000; val_acc: 0.604000
In [27]:
y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
print 'Validation set accuracy: ', (y_val_pred == data['y_val']).mean()
print 'Test set accuracy: ', (y_test_pred == data['y_test']).mean()
Validation set accuracy:  0.509
Test set accuracy:  0.52
In [28]:
plt.plot(stats[2].train_acc_history, '-o')
plt.plot(solver.train_acc_history, '-o')
plt.plot(stats[2].val_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['No BN train ACC', 'BatchNorm train ACC', 'No BN val ACC', 'BatchNorm val ACC'], loc='lower right')
Out[28]:
<matplotlib.legend.Legend at 0x7fe52cd37690>
In [ ]:
 

Published: January 28 2017

  • category:
blog comments powered by Disqus