Featured image is from analyticsvidhya.com
Update (June 19, 2019):
Recently, I revisit this case and found out the latest version of Keras==2.2.4
and tensorflow-gpu==1.13.1
make customizing VGG16 easier. For example, we can use pre-trained VGG16 to fit CIFAR-10 (32×32) dataset just like this:
X, y = load_cfar10_batch(dir_path, 1) base_model = VGG16(include_top=False, weights=vgg16_weights, input_shape=(32, 32, 3)) # add a global spatial average pooling layer # fully-connected layer and prediction layer x = base_model.output x = GlobalAveragePooling2D()(x) x = Dense(512, activation='relu')(x) x = Dropout(0.3)(x) predictions = Dense(10, activation='softmax')(x) # freeze vgg16 layers for layer in base_model.layers: layer.trainable = False model = Model(inputs=base_model.input, outputs=predictions) model.compile(optimizer='rmsprop', loss='categorical_crossentropy') model.fit(X, y)
Now, we don’t need to create function to manually load the weights anymore ?
You can find the full script in my Github.
Original (November 17, 2016):
Keras graciously provides an API to use pretrained models such as VGG16
easily. Unfortunatey, if we try to use different input shape other than 224 x 224 using given API (keras 1.1.1
& theano 0.9.0dev4
)
from keras.layers import Input from keras.optimizers import SGD from keras.applications.vgg16 import VGG16 ... model = VGG16(weights='imagenet', input_tensor=Input(shape=(3, 300, 300))) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy') model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))
we will hit an intimidating error similar to:
Traceback (most recent call last): File "vgg-16_keras_input_change_2.py", line 28, in <module> model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y)) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1129, in fit callback_metrics=callback_metrics) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 847, in _fit_loop outs = f(ins_batch) File "/usr/local/lib/python2.7/dist-packages/keras/backend/theano_backend.py", line 811, in __call__ return self.function(*inputs) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 886, in __call__ storage_map=getattr(self.fn, 'storage_map', None)) File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 325, in raise_with_op reraise(exc_type, exc_value, exc_trace) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 873, in __call__ self.fn() if output_subset is None else\ ValueError: dimension mismatch in args to gemm (1,41472)x(25088,4096)->(1,4096) Apply node that caused the error: GpuDot22(GpuReshape{2}.0, fc1_W) Toposort index: 346 Inputs types: [CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix)] Inputs shapes: [(1, 41472), (25088, 4096)] Inputs strides: [(0, 1), (4096, 1)] Inputs values: ['not shown', 'not shown'] Outputs clients: [[GpuElemwise{add,no_inplace}(GpuDot22.0, GpuDimShuffle{x,0}.0)]]
Luckily–thanks to some posts in keras github and stackoverflow–there is a way to do it. Basically we need to redefine each layers of VGG16 and load its weights explicitly:
from keras.models import Sequential from keras.layers.core import Flatten, Dense, Dropout from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D from keras.optimizers import SGD import cv2, numpy as np import linecache import sys import h5py def VGG_16(weights_path=None): model = Sequential() model.add(ZeroPadding2D((1,1),input_shape=(3,300,300))) #model.add(ZeroPadding2D((1,1),input_shape=(3,224,224))) model.add(Convolution2D(64, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(64, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(128, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(128, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) # load weights if weights_path: f = h5py.File(weights_path) for k in range(f.attrs['nb_layers']): if k >= len(model.layers) - 1: # we don't look at the last two layers in the savefile (fully-connected and activation) break g = f['layer_{}'.format(k)] weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] layer = model.layers[k] #if layer.__class__.__name__ in ['Convolution1D', 'Convolution2D', 'Convolution3D', 'AtrousConvolution2D']: # weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) layer.set_weights(weights) # freeze layer.trainable = False f.close() model.add(Flatten()) model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1000, activation='softmax')) return model
Then we can call the method to get the model & train it
# load custom vgg16 and train 1 epoch model = VGG_16('vgg16_weights.h5') #print_weights(model) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy') model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))
Check out the full source code here.