基于Keras搭建cifar10数据集训练预测Pipeline

Pipeline

本次训练模型的数据直接使用Keras.datasets.cifar10.load_data()得到，模型建立是通过Sequential搭建。

重点思考的内容是如何应用训练过的模型进行实际预测，里面牵涉到一些细节，需要注意。同时，Keras提供的ImageDataGenerator为模型训练时提供数据输入，之前有总结过这个类，并给出了从文件系统中加载原始图片数据的方法。

模型搭建

from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os

# 指定超参数
batch_size = 32
num_classes = 10
epochs = 50
data_augmentation = True # 数据增强
num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_cifar10_trained_model.h5'

# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# 搭建模型
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# 如果不用模型增强
if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)

# 使用模型增强
else:
    print('Using real-time data augmentation.')
    
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    history = model.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        steps_per_epoch = 600,
                        validation_data=(x_test, y_test),
                        validation_steps = 10,
                        workers=4)

# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

训练完毕后，模型保存为：keras_cifar10_trained_model.h5

使用预训练模型

# 使用已经训练好的参数来加载模型

from keras.models import load_model

model = load_model('./saved_models/keras_cifar10_trained_model.h5')

model.summary()

'''
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_9 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_13 (Activation)   (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 30, 30, 32)        9248      
_________________________________________________________________
activation_14 (Activation)   (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 15, 15, 64)        18496     
_________________________________________________________________
activation_15 (Activation)   (None, 15, 15, 64)        0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 13, 13, 64)        36928     
_________________________________________________________________
activation_16 (Activation)   (None, 13, 13, 64)        0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 6, 6, 64)          0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 2304)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 512)               1180160   
_________________________________________________________________
activation_17 (Activation)   (None, 512)               0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                5130      
_________________________________________________________________
activation_18 (Activation)   (None, 10)                0         
=================================================================
Total params: 1,250,858
Trainable params: 1,250,858
Non-trainable params: 0
'''

识别测试集图片

lst= ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
def onehot_to_label(res):
  label = ''
  for i in range(len(res[0])):
    if res[0][i] == 1:
      label = lst[i]
  return label

def softmax_to_label(res):
  label = ''
  index = res[0].argmax()
  label = lst[index]
  return label

# 识别测试集图片
test_image = x_test[100].reshape([1,32,32,3])
test_image.shape
res = model.predict(test_image)
label = softmax_to_label(res)
print(label)

本地加载图片识别

# 自己加载raw image进行识别
from PIL import Image
from keras.preprocessing.image import img_to_array
import numpy as np

image = Image.open('./images/airplane.jpeg') # 加载图片
image = image.resize((32,32))
image = img_to_array(image)

# 加载进来之后开始预测
image = image.reshape([1,32,32,3]) # 需要reshape到四维张量才行
res = model.predict(image)
label = softmax_to_label(res)
print("The image is: ", label)

# 或者整合为一个函数
def image_to_array(path):
  image = Image.open(path)
  image = image.resize((32,32),Image.NEAREST) # 会将图像整体缩放到指定大小，不是裁剪
  image = img_to_array(image) # 变成数组
  image = image.reshape([1,32,32,3]) # reshape到4维张量
  return image

使用时注意到输入到网络的数据是张量，且需要reshape到四维，因为按照批量往里输入的时候，也是四维，单独输入一张图片，使用方式相同。

END.

基于Keras搭建cifar10数据集训练预测Pipeline

Pipeline

模型搭建

使用预训练模型

推荐阅读更多精彩内容