Keras VGG16 with different input shape

Featured image is from analyticsvidhya.com

Update (June 19, 2019):

Recently, I revisit this case and found out the latest version of Keras==2.2.4 and tensorflow-gpu==1.13.1 make customizing VGG16 easier. For example, we can use pre-trained VGG16 to fit CIFAR-10 (32×32) dataset just like this:

X, y = load_cfar10_batch(dir_path, 1)
base_model = VGG16(include_top=False, weights=vgg16_weights, input_shape=(32, 32, 3))
# add a global spatial average pooling layer
# fully-connected layer and prediction layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(10, activation='softmax')(x)
# freeze vgg16 layers
for layer in base_model.layers:
    layer.trainable = False
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit(X, y)

Now, we don’t need to create function to manually load the weights anymore ?

You can find the full script in my Github.

Original (November 17, 2016):

Keras graciously provides an API to use pretrained models such as VGG16 easily. Unfortunatey, if we try to use different input shape other than 224 x 224 using given API (keras 1.1.1 & theano 0.9.0dev4)

from keras.layers import Input 
from keras.optimizers import SGD 
from keras.applications.vgg16 import VGG16 

... 

model = VGG16(weights='imagenet', input_tensor=Input(shape=(3, 300, 300))) 
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) 
model.compile(optimizer=sgd, loss='categorical_crossentropy') 
model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))

we will hit an intimidating error similar to:

Traceback (most recent call last): File "vgg-16_keras_input_change_2.py", line 28, 
in <module> model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y)) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1129,
in fit callback_metrics=callback_metrics) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 847, 
in _fit_loop outs = f(ins_batch) File "/usr/local/lib/python2.7/dist-packages/keras/backend/theano_backend.py", line 811, 
in __call__ return self.function(*inputs) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 886, 
in __call__ storage_map=getattr(self.fn, 'storage_map', None)) File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 325, 
in raise_with_op reraise(exc_type, exc_value, exc_trace) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 873, 
in __call__ self.fn() if output_subset is None else\ ValueError: dimension mismatch in args to gemm (1,41472)x(25088,4096)->(1,4096) Apply node that caused the error: GpuDot22(GpuReshape{2}.0, fc1_W) Toposort index: 346 Inputs types: [CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix)] Inputs shapes: [(1, 41472), (25088, 4096)] Inputs strides: [(0, 1), (4096, 1)] Inputs values: ['not shown', 'not shown'] Outputs clients: [[GpuElemwise{add,no_inplace}(GpuDot22.0, GpuDimShuffle{x,0}.0)]]

Luckily–thanks to some posts in keras github and stackoverflow–there is a way to do it. Basically we need to redefine each layers of VGG16 and load its weights explicitly:

from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
import cv2, numpy as np
import linecache
import sys
import h5py


def VGG_16(weights_path=None):
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(3,300,300)))
    #model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    # load weights
    if weights_path:
        f = h5py.File(weights_path)
        for k in range(f.attrs['nb_layers']):
            if k >= len(model.layers) - 1:
                # we don't look at the last two layers in the savefile (fully-connected and activation)
                break
            g = f['layer_{}'.format(k)]
            weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
            layer = model.layers[k]

            #if layer.__class__.__name__ in ['Convolution1D', 'Convolution2D', 'Convolution3D', 'AtrousConvolution2D']:
            #    weights[0] = np.transpose(weights[0], (2, 3, 1, 0))

            layer.set_weights(weights)
            # freeze
            layer.trainable = False
        f.close()

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1000, activation='softmax'))
    
    return model

Then we can call the method to get the model & train it

# load custom vgg16 and train 1 epoch
model = VGG_16('vgg16_weights.h5')
#print_weights(model)
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')       
model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))

Check out the full source code here.

0 0 votes

Article Rating