Configurable Convolutional Neural Network
Convolutional Network Framework in Python¶
To test configurable CNN network on CIFAR10. Based on class project
In [1]:
# Setup environment
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.convnetGenerator import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
%load_ext autoreload
%autoreload 2
def rel_error(x, y):
""" returns relative error """
return np.max(np.abs(x - y)/ (np.maximum(1e-8, np.abs(x) + np.abs(y))))
In [2]:
# Load preprocessed CIFAR10 image
data = get_CIFAR10_data()
for k, v in data.iteritems():
print '%s: ' %k, v.shape
In [3]:
# Load convnetGenerator
from cs231n.classifiers.convnet import *
In [4]:
# Sanity check on loss value
layers = [
['conv', {'num_of_filters':5, 'filter_size':3}],
['spatial_batchnorm'],
['relu'],
['dropout'],
['pool'],
['affine', (20)],
['batchnorm'],
['affine', (10)],
['relu'],
['dropout']
]
N = 10
X = np.random.randn(N, 3, 4, 4)
y = np.random.randint(10, size=N)
model = convnetGenerator(input_dim=(3, 4, 4),
layers=layers, verbose=False,
weight_scale=1e-1, seed=0,
dtype=np.float64)
loss, grads = model.loss(X, y)
print 'Initial loss (without regularization): ', loss
for param_name in sorted(grads):
f = lambda _: model.loss(X, y)[0]
param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6)
e = rel_error(param_grad_num, grads[param_name])
print '%s max relative error: %e' % (param_name, e)
model.reg = 1
loss, grads = model.loss(X, y)
print 'Initial loss (with regularization): ', loss
for param_name in sorted(grads):
f = lambda _: model.loss(X, y)[0]
param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6)
e = rel_error(param_grad_num, grads[param_name])
print '%s max relative error: %e' % (param_name, e)
In [5]:
# Test three layer model clone
# Overfit small data
num_train = 100
small_data = {
'X_train': data['X_train'][:num_train],
'y_train': data['y_train'][:num_train],
'X_val': data['X_val'],
'y_val': data['y_val'],
}
layers = [
['conv', {'num_of_filters':32, 'filter_size':7}],
['relu'],
['pool'],
['affine', (100)],
['relu']
]
model = convnetGenerator(layers=layers,
verbose=False,
weight_scale=1e-2)
solver = Solver(model, small_data,
num_epochs=10, batch_size=50,
update_rule='adam',
optim_config={
'learning_rate': 1e-3
},
verbose=True, print_every=1)
solver.train()
In [6]:
# Plot traint loss, accuracy, validation accuracy
plt.subplot(2, 1, 1)
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.subplot(2, 1, 2)
plt.plot(solver.train_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()
In [7]:
# Test tree-layer model on full test set
layers = [
['conv', {'num_of_filters':32, 'filter_size':7}],
['relu'],
['pool'],
['affine', (500)],
['relu']
]
model = convnetGenerator(layers=layers,
verbose=False,
weight_scale=1e-3,
reg=0.001)
solver = Solver(model, data,
num_epochs=1, batch_size=50,
update_rule='adam',
optim_config={
'learning_rate': 1e-3
},
verbose=True, print_every=20)
solver.train()
In [9]:
from cs231n.vis_utils import visualize_grid
grid = visualize_grid(model.params['W0'].transpose(0, 2, 3, 1))
plt.imshow(grid.astype('uint8'))
plt.axis('off')
plt.gcf().set_size_inches(5, 5)
plt.show()
In [10]:
history_7x7 = solver.loss_history
In [11]:
# Try filter size 3x3
layers = [
['conv', {'num_of_filters':32, 'filter_size':3}],
['relu'],
['pool'],
['affine', (500)],
['relu']
]
model = convnetGenerator(layers=layers,
verbose=False,
weight_scale=1e-3,
reg=0.001)
solver = Solver(model, data,
num_epochs=1, batch_size=50,
update_rule='adam',
optim_config={
'learning_rate': 1e-3
},
verbose=True, print_every=20)
solver.train()
In [12]:
# plot both runs to compare
plt.plot(history_7x7, 'o')
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['7x7', '3x3'], loc='upper right')
Out[12]:
In [13]:
# Check number of filters impact
filter_numbers = [8, 16, 32, 64]
stats = []
for f in filter_numbers:
layers = [
['conv', {'num_of_filters':f, 'filter_size':3}],
['relu'],
['pool'],
['affine', (500)],
['relu']
]
model = convnetGenerator(layers=layers,
verbose=False,
weight_scale=1e-3,
reg=0.001)
solver = Solver(model, data,
num_epochs=1, batch_size=50,
update_rule='adam',
optim_config={'learning_rate': 1e-3,},
verbose=True, print_every=100
)
solver.train()
stats.append(solver)
In [15]:
# number of filter does not have big impact
print '# train_acc val_acc'
for i, s in enumerate(stats):
print filter_numbers[i], s.train_acc_history[-1], s.val_acc_history[-1]
In [18]:
# Add batch norm and check the impact
layers = [
['conv', {'num_of_filters':f, 'filter_size':3}],
['relu'],
['pool'],
['spatial_batchnorm'], # normalize before affine
['affine', (500)],
['relu'],
['batchnorm'] # for last affine layer
]
model = convnetGenerator(layers=layers, weight_scale=0.001, reg=0.001)
solver = Solver(model, data,
num_epochs=1, batch_size=50,
update_rule='adam',
optim_config={
'learning_rate': 1e-3,
},
verbose=True, print_every=100)
solver.train()
In [19]:
plt.plot(stats[2].loss_history, 'o')
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['No BN', 'BatchNorm'], loc='upper right')
Out[19]:
In [21]:
plt.plot(stats[2].train_acc_history, '-o')
plt.plot(solver.train_acc_history, '-o')
plt.plot(stats[2].val_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['No BN train ACC', 'BatchNorm train ACC', 'No BN val ACC', 'BatchNorm val ACC'], loc='upper right')
Out[21]:
In [22]:
# Maybe we didn't run enough training?
# Try more epochs
layers = [
['conv', {'num_of_filters':f, 'filter_size':3}],
['relu'],
['pool'],
['spatial_batchnorm'], # normalize before affine
['affine', (500)],
['relu'],
['batchnorm'] # for last affine layer
]
model = convnetGenerator(layers=layers, weight_scale=0.001, reg=0.001)
solver = Solver(model, data,
num_epochs=10, batch_size=50,
update_rule='adam',
optim_config={
'learning_rate': 1e-3,
},
verbose=True, print_every=100)
solver.train()
In [24]:
plt.plot(stats[2].train_acc_history, '-o')
plt.plot(solver.train_acc_history, '-o')
plt.plot(stats[2].val_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['No BN train ACC', 'BatchNorm train ACC', 'No BN val ACC', 'BatchNorm val ACC'], loc='lower right')
Out[24]:
In [25]:
y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
print 'Validation set accuracy: ', (y_val_pred == data['y_val']).mean()
print 'Test set accuracy: ', (y_test_pred == data['y_test']).mean()
In [26]:
# Remove max pool layer
# Try more epochs
layers = [
['conv', {'num_of_filters':f, 'filter_size':3}],
['relu'],
['spatial_batchnorm'], # normalize before affine
['affine', (500)],
['relu'],
['batchnorm'] # for last affine layer
]
model = convnetGenerator(layers=layers, weight_scale=0.001, reg=0.001)
solver = Solver(model, data,
num_epochs=10, batch_size=50,
update_rule='adam',
optim_config={
'learning_rate': 1e-3,
},
verbose=True, print_every=100)
solver.train()
In [27]:
y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
print 'Validation set accuracy: ', (y_val_pred == data['y_val']).mean()
print 'Test set accuracy: ', (y_test_pred == data['y_test']).mean()
In [28]:
plt.plot(stats[2].train_acc_history, '-o')
plt.plot(solver.train_acc_history, '-o')
plt.plot(stats[2].val_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend(['No BN train ACC', 'BatchNorm train ACC', 'No BN val ACC', 'BatchNorm val ACC'], loc='lower right')
Out[28]:
In [ ]:
blog comments powered by Disqus