Keras VGG16 with different input shape

Featured image is from analyticsvidhya.com

Update (June 19, 2019):

Recently, I revisit this case and found out the latest version of Keras==2.2.4 and tensorflow-gpu==1.13.1 make customizing VGG16 easier. For example, we can use pre-trained VGG16 to fit CIFAR-10 (32×32) dataset just like this:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
X, y = load_cfar10_batch(dir_path, 1)
base_model = VGG16(include_top=False, weights=vgg16_weights, input_shape=(32, 32, 3))
# add a global spatial average pooling layer
# fully-connected layer and prediction layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(10, activation='softmax')(x)
# freeze vgg16 layers
for layer in base_model.layers:
layer.trainable = False
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit(X, y)
X, y = load_cfar10_batch(dir_path, 1) base_model = VGG16(include_top=False, weights=vgg16_weights, input_shape=(32, 32, 3)) # add a global spatial average pooling layer # fully-connected layer and prediction layer x = base_model.output x = GlobalAveragePooling2D()(x) x = Dense(512, activation='relu')(x) x = Dropout(0.3)(x) predictions = Dense(10, activation='softmax')(x) # freeze vgg16 layers for layer in base_model.layers: layer.trainable = False model = Model(inputs=base_model.input, outputs=predictions) model.compile(optimizer='rmsprop', loss='categorical_crossentropy') model.fit(X, y)
X, y = load_cfar10_batch(dir_path, 1)
base_model = VGG16(include_top=False, weights=vgg16_weights, input_shape=(32, 32, 3))
# add a global spatial average pooling layer
# fully-connected layer and prediction layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(10, activation='softmax')(x)
# freeze vgg16 layers
for layer in base_model.layers:
    layer.trainable = False
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit(X, y)

Now, we don’t need to create function to manually load the weights anymore ?

You can find the full script in my Github.

Original (November 17, 2016):

Keras graciously provides an API to use pretrained models such as VGG16 easily. Unfortunatey, if we try to use different input shape other than 224 x 224 using given API (keras 1.1.1 & theano 0.9.0dev4)

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
from keras.layers import Input
from keras.optimizers import SGD
from keras.applications.vgg16 import VGG16
...
model = VGG16(weights='imagenet', input_tensor=Input(shape=(3, 300, 300)))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')
model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))
from keras.layers import Input from keras.optimizers import SGD from keras.applications.vgg16 import VGG16 ... model = VGG16(weights='imagenet', input_tensor=Input(shape=(3, 300, 300))) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy') model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))
from keras.layers import Input 
from keras.optimizers import SGD 
from keras.applications.vgg16 import VGG16 

... 

model = VGG16(weights='imagenet', input_tensor=Input(shape=(3, 300, 300))) 
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) 
model.compile(optimizer=sgd, loss='categorical_crossentropy') 
model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))

we will hit an intimidating error similar to:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
Traceback (most recent call last): File "vgg-16_keras_input_change_2.py", line 28,
in <module> model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y)) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1129,
in fit callback_metrics=callback_metrics) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 847,
in _fit_loop outs = f(ins_batch) File "/usr/local/lib/python2.7/dist-packages/keras/backend/theano_backend.py", line 811,
in __call__ return self.function(*inputs) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 886,
in __call__ storage_map=getattr(self.fn, 'storage_map', None)) File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 325,
in raise_with_op reraise(exc_type, exc_value, exc_trace) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 873,
in __call__ self.fn() if output_subset is None else\ ValueError: dimension mismatch in args to gemm (1,41472)x(25088,4096)->(1,4096) Apply node that caused the error: GpuDot22(GpuReshape{2}.0, fc1_W) Toposort index: 346 Inputs types: [CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix)] Inputs shapes: [(1, 41472), (25088, 4096)] Inputs strides: [(0, 1), (4096, 1)] Inputs values: ['not shown', 'not shown'] Outputs clients: [[GpuElemwise{add,no_inplace}(GpuDot22.0, GpuDimShuffle{x,0}.0)]]
Traceback (most recent call last): File "vgg-16_keras_input_change_2.py", line 28, in <module> model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y)) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1129, in fit callback_metrics=callback_metrics) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 847, in _fit_loop outs = f(ins_batch) File "/usr/local/lib/python2.7/dist-packages/keras/backend/theano_backend.py", line 811, in __call__ return self.function(*inputs) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 886, in __call__ storage_map=getattr(self.fn, 'storage_map', None)) File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 325, in raise_with_op reraise(exc_type, exc_value, exc_trace) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 873, in __call__ self.fn() if output_subset is None else\ ValueError: dimension mismatch in args to gemm (1,41472)x(25088,4096)->(1,4096) Apply node that caused the error: GpuDot22(GpuReshape{2}.0, fc1_W) Toposort index: 346 Inputs types: [CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix)] Inputs shapes: [(1, 41472), (25088, 4096)] Inputs strides: [(0, 1), (4096, 1)] Inputs values: ['not shown', 'not shown'] Outputs clients: [[GpuElemwise{add,no_inplace}(GpuDot22.0, GpuDimShuffle{x,0}.0)]]
Traceback (most recent call last): File "vgg-16_keras_input_change_2.py", line 28, 
in <module> model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y)) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1129,
in fit callback_metrics=callback_metrics) File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 847, 
in _fit_loop outs = f(ins_batch) File "/usr/local/lib/python2.7/dist-packages/keras/backend/theano_backend.py", line 811, 
in __call__ return self.function(*inputs) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 886, 
in __call__ storage_map=getattr(self.fn, 'storage_map', None)) File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 325, 
in raise_with_op reraise(exc_type, exc_value, exc_trace) File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 873, 
in __call__ self.fn() if output_subset is None else\ ValueError: dimension mismatch in args to gemm (1,41472)x(25088,4096)->(1,4096) Apply node that caused the error: GpuDot22(GpuReshape{2}.0, fc1_W) Toposort index: 346 Inputs types: [CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix)] Inputs shapes: [(1, 41472), (25088, 4096)] Inputs strides: [(0, 1), (4096, 1)] Inputs values: ['not shown', 'not shown'] Outputs clients: [[GpuElemwise{add,no_inplace}(GpuDot22.0, GpuDimShuffle{x,0}.0)]]

Luckily–thanks to some posts in keras github and stackoverflow–there is a way to do it. Basically we need to redefine each layers of VGG16 and load its weights explicitly:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
import cv2, numpy as np
import linecache
import sys
import h5py
def VGG_16(weights_path=None):
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(3,300,300)))
#model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
# load weights
if weights_path:
f = h5py.File(weights_path)
for k in range(f.attrs['nb_layers']):
if k >= len(model.layers) - 1:
# we don't look at the last two layers in the savefile (fully-connected and activation)
break
g = f['layer_{}'.format(k)]
weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
layer = model.layers[k]
#if layer.__class__.__name__ in ['Convolution1D', 'Convolution2D', 'Convolution3D', 'AtrousConvolution2D']:
# weights[0] = np.transpose(weights[0], (2, 3, 1, 0))
layer.set_weights(weights)
# freeze
layer.trainable = False
f.close()
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1000, activation='softmax'))
return model
from keras.models import Sequential from keras.layers.core import Flatten, Dense, Dropout from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D from keras.optimizers import SGD import cv2, numpy as np import linecache import sys import h5py def VGG_16(weights_path=None): model = Sequential() model.add(ZeroPadding2D((1,1),input_shape=(3,300,300))) #model.add(ZeroPadding2D((1,1),input_shape=(3,224,224))) model.add(Convolution2D(64, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(64, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(128, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(128, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(MaxPooling2D((2,2), strides=(2,2))) # load weights if weights_path: f = h5py.File(weights_path) for k in range(f.attrs['nb_layers']): if k >= len(model.layers) - 1: # we don't look at the last two layers in the savefile (fully-connected and activation) break g = f['layer_{}'.format(k)] weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] layer = model.layers[k] #if layer.__class__.__name__ in ['Convolution1D', 'Convolution2D', 'Convolution3D', 'AtrousConvolution2D']: # weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) layer.set_weights(weights) # freeze layer.trainable = False f.close() model.add(Flatten()) model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1000, activation='softmax')) return model
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
import cv2, numpy as np
import linecache
import sys
import h5py


def VGG_16(weights_path=None):
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(3,300,300)))
    #model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    # load weights
    if weights_path:
        f = h5py.File(weights_path)
        for k in range(f.attrs['nb_layers']):
            if k >= len(model.layers) - 1:
                # we don't look at the last two layers in the savefile (fully-connected and activation)
                break
            g = f['layer_{}'.format(k)]
            weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
            layer = model.layers[k]

            #if layer.__class__.__name__ in ['Convolution1D', 'Convolution2D', 'Convolution3D', 'AtrousConvolution2D']:
            #    weights[0] = np.transpose(weights[0], (2, 3, 1, 0))

            layer.set_weights(weights)
            # freeze
            layer.trainable = False
        f.close()

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1000, activation='softmax'))
    
    return model

Then we can call the method to get the model & train it

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
# load custom vgg16 and train 1 epoch
model = VGG_16('vgg16_weights.h5')
#print_weights(model)
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')
model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))
# load custom vgg16 and train 1 epoch model = VGG_16('vgg16_weights.h5') #print_weights(model) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy') model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))
# load custom vgg16 and train 1 epoch
model = VGG_16('vgg16_weights.h5')
#print_weights(model)
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')       
model.fit(X, Y, batch_size=1, nb_epoch=1, validation_data=(X, Y))

Check out the full source code here.

0 0 votes
Article Rating
Subscribe
Notify of
guest


0 Comments
Oldest
Newest Most Voted
Inline Feedbacks
View all comments