2021-03-01





Inception-V3 Google Research




  • 在实践中,很少有人从零开始训练卷积网络(随机初始化),因为很少有足够的数据集。因此,使用预先训练的网络权值作为初始化或固定的特征提取器有助于解决现有的大多数问题。

  • 非常深的网络训练是昂贵的。最复杂的模型需要使用数百台配备了昂贵gpu的机器,数周的时间来进行训练。

  • 因为深度学习确定结构/调整/训练方法/超参数是一门没有太多理论指导的黑盒子。


"DON'T TRY TO BE AN HERO" ~Andrej Karapathy

我遇到的大多数计算机视觉问题都没有非常大的数据集(5000张图像- 40000张图像)。即使使用极端的数据增强策略,也很难达到较高的精度。用数百万个参数训练这些网络通常会使模型过拟合。所以迁移学习对我们有帮助。



Inception V3 Google Research


  1. from keras import applications

  2. from keras.preprocessing.image importImageDataGenerator

  3. from keras import optimizers

  4. from keras.models importSequential, Model

  5. from keras.layers importDropout, Flatten, Dense, GlobalAveragePooling2D

  6. from keras import backend as k

  7. from keras.callbacks importModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

  8. img_width, img_height = 256, 256

  9. train_data_dir = "data/train"

  10. validation_data_dir = "data/val"

  11. nb_train_samples = 4125

  12. nb_validation_samples = 466

  13. batch_size = 16

  14. epochs = 50

  15. model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = (img_width, img_height, 3))

  16. """

  17. Layer (type) Output Shape Param #

  18. =================================================================

  19. input_1 (InputLayer) (None, 256, 256, 3) 0

  20. _________________________________________________________________

  21. block1_conv1 (Conv2D) (None, 256, 256, 64) 1792

  22. _________________________________________________________________

  23. block1_conv2 (Conv2D) (None, 256, 256, 64) 36928

  24. _________________________________________________________________

  25. block1_pool (MaxPooling2D) (None, 128, 128, 64) 0

  26. _________________________________________________________________

  27. block2_conv1 (Conv2D) (None, 128, 128, 128) 73856

  28. _________________________________________________________________

  29. block2_conv2 (Conv2D) (None, 128, 128, 128) 147584

  30. _________________________________________________________________

  31. block2_pool (MaxPooling2D) (None, 64, 64, 128) 0

  32. _________________________________________________________________

  33. block3_conv1 (Conv2D) (None, 64, 64, 256) 295168

  34. _________________________________________________________________

  35. block3_conv2 (Conv2D) (None, 64, 64, 256) 590080

  36. _________________________________________________________________

  37. block3_conv3 (Conv2D) (None, 64, 64, 256) 590080

  38. _________________________________________________________________

  39. block3_conv4 (Conv2D) (None, 64, 64, 256) 590080

  40. _________________________________________________________________

  41. block3_pool (MaxPooling2D) (None, 32, 32, 256) 0

  42. _________________________________________________________________

  43. block4_conv1 (Conv2D) (None, 32, 32, 512) 1180160

  44. _________________________________________________________________

  45. block4_conv2 (Conv2D) (None, 32, 32, 512) 2359808

  46. _________________________________________________________________

  47. block4_conv3 (Conv2D) (None, 32, 32, 512) 2359808

  48. _________________________________________________________________

  49. block4_conv4 (Conv2D) (None, 32, 32, 512) 2359808

  50. _________________________________________________________________

  51. block4_pool (MaxPooling2D) (None, 16, 16, 512) 0

  52. _________________________________________________________________

  53. block5_conv1 (Conv2D) (None, 16, 16, 512) 2359808

  54. _________________________________________________________________

  55. block5_conv2 (Conv2D) (None, 16, 16, 512) 2359808

  56. _________________________________________________________________

  57. block5_conv3 (Conv2D) (None, 16, 16, 512) 2359808

  58. _________________________________________________________________

  59. block5_conv4 (Conv2D) (None, 16, 16, 512) 2359808

  60. _________________________________________________________________

  61. block5_pool (MaxPooling2D) (None, 8, 8, 512) 0

  62. =================================================================

  63. Total params: 20,024,384.0

  64. Trainable params: 20,024,384.0

  65. Non-trainable params: 0.0

  66. """

  67. # Freeze the layers which you don't want to train. Here I am freezing the first 5 layers.

  68. for layer in model.layers[:5]:

  69. layer.trainable = False

  70. #Adding custom Layers

  71. x = model.output

  72. x = Flatten()(x)

  73. x = Dense(1024, activation="relu")(x)

  74. x = Dropout(0.5)(x)

  75. x = Dense(1024, activation="relu")(x)

  76. predictions = Dense(16, activation="softmax")(x)

  77. # creating the final model

  78. model_final = Model(input = model.input, output = predictions)

  79. # compile the model

  80. model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])

  81. # Initiate the train and test generators with data Augumentation

  82. train_datagen = ImageDataGenerator(

  83. rescale = 1./255,

  84. horizontal_flip = True,

  85. fill_mode = "nearest",

  86. zoom_range = 0.3,

  87. width_shift_range = 0.3,

  88. height_shift_range=0.3,

  89. rotation_range=30)

  90. test_datagen = ImageDataGenerator(

  91. rescale = 1./255,

  92. horizontal_flip = True,

  93. fill_mode = "nearest",

  94. zoom_range = 0.3,

  95. width_shift_range = 0.3,

  96. height_shift_range=0.3,

  97. rotation_range=30)

  98. train_generator = train_datagen.flow_from_directory(

  99. train_data_dir,

  100. target_size = (img_height, img_width),

  101. batch_size = batch_size,

  102. class_mode = "categorical")

  103. validation_generator = test_datagen.flow_from_directory(

  104. validation_data_dir,

  105. target_size = (img_height, img_width),

  106. class_mode = "categorical")

  107. # Save the model according to the conditions

  108. checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)

  109. early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

  110. # Train the model

  111. model_final.fit_generator(

  112. train_generator,

  113. samples_per_epoch = nb_train_samples,

  114. epochs = epochs,

  115. validation_data = validation_generator,

  116. nb_val_samples = nb_validation_samples,

  117. callbacks = [checkpoint, early])


1. 新数据集很小,和原始数据集相似:



  1. for layer in model.layers:

  2. layer.trainable = False

  3. #Now we will be training only the classifiers (FC layers)

2. 新数据集很大,和原始数据集相似:


  1. for layer in model.layers:

  2. layer.trainable = True

  3. #The default is already set to True. I have mentioned it here to make things clear.


  1. for layer in model.layers[:5]:

  2. layer.trainable = False.

  3. # Here I am freezing the first 5 layers

3. 新数据集很小,但与原始数据集非常不同


  1. from keras import applications

  2. from keras.preprocessing.image importImageDataGenerator

  3. from keras import optimizers

  4. from keras.models importSequential, Model

  5. from keras.layers importDropout, Flatten, Dense, GlobalAveragePooling2D

  6. from keras import backend as k

  7. from keras.callbacks importModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

  8. img_width, img_height = 256, 256

  9. ### Build the network

  10. img_input = Input(shape=(256, 256, 3))

  11. x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)

  12. x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)

  13. x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

  14. # Block 2

  15. x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)

  16. x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)

  17. x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

  18. model = Model(input = img_input, output = x)

  19. model.summary()

  20. """

  21. _________________________________________________________________

  22. Layer (type) Output Shape Param #

  23. =================================================================

  24. input_1 (InputLayer) (None, 256, 256, 3) 0

  25. _________________________________________________________________

  26. block1_conv1 (Conv2D) (None, 256, 256, 64) 1792

  27. _________________________________________________________________

  28. block1_conv2 (Conv2D) (None, 256, 256, 64) 36928

  29. _________________________________________________________________

  30. block1_pool (MaxPooling2D) (None, 128, 128, 64) 0

  31. _________________________________________________________________

  32. block2_conv1 (Conv2D) (None, 128, 128, 128) 73856

  33. _________________________________________________________________

  34. block2_conv2 (Conv2D) (None, 128, 128, 128) 147584

  35. _________________________________________________________________

  36. block2_pool (MaxPooling2D) (None, 64, 64, 128) 0

  37. =================================================================

  38. Total params: 260,160.0

  39. Trainable params: 260,160.0

  40. Non-trainable params: 0.0

  41. """

  42. layer_dict = dict([(layer.name, layer) for layer in model.layers])

  43. [layer.name for layer in model.layers]

  44. """

  45. ['input_1',

  46. 'block1_conv1',

  47. 'block1_conv2',

  48. 'block1_pool',

  49. 'block2_conv1',

  50. 'block2_conv2',

  51. 'block2_pool']

  52. """

  53. import h5py

  54. weights_path = 'vgg19_weights.h5'# ('https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5)

  55. f = h5py.File(weights_path)

  56. list(f["model_weights"].keys())

  57. """

  58. ['block1_conv1',

  59. 'block1_conv2',

  60. 'block1_pool',

  61. 'block2_conv1',

  62. 'block2_conv2',

  63. 'block2_pool',

  64. 'block3_conv1',

  65. 'block3_conv2',

  66. 'block3_conv3',

  67. 'block3_conv4',

  68. 'block3_pool',

  69. 'block4_conv1',

  70. 'block4_conv2',

  71. 'block4_conv3',

  72. 'block4_conv4',

  73. 'block4_pool',

  74. 'block5_conv1',

  75. 'block5_conv2',

  76. 'block5_conv3',

  77. 'block5_conv4',

  78. 'block5_pool',

  79. 'dense_1',

  80. 'dense_2',

  81. 'dense_3',

  82. 'dropout_1',

  83. 'global_average_pooling2d_1',

  84. 'input_1']

  85. """

  86. # list all the layer names which are in the model.

  87. layer_names = [layer.name for layer in model.layers]

  88. """

  89. # Here we are extracting model_weights for each and every layer from the .h5 file

  90. >>> f["model_weights"]["block1_conv1"].attrs["weight_names"]

  91. array([b'block1_conv1/kernel:0', b'block1_conv1/bias:0'],

  92. dtype='|S21')

  93. # we are assiging this array to weight_names below

  94. >>> f["model_weights"]["block1_conv1"]["block1_conv1/kernel:0]

  95. <HDF5 dataset "kernel:0": shape (3, 3, 3, 64), type "<f4">

  96. # The list comprehension (weights) stores these two weights and bias of both the layers

  97. >>>layer_names.index("block1_conv1")

  98. 1

  99. >>> model.layers[1].set_weights(weights)

  100. # This will set the weights for that particular layer.

  101. With a for loop we can set_weights for the entire network.

  102. """

  103. for i in layer_dict.keys():

  104. weight_names = f["model_weights"][i].attrs["weight_names"]

  105. weights = [f["model_weights"][i][j] for j in weight_names]

  106. index = layer_names.index(i)

  107. model.layers[index].set_weights(weights)

  108. import cv2

  109. import numpy as np

  110. import pandas as pd

  111. from tqdm import tqdm

  112. import itertools

  113. import glob

  114. features = []

  115. for i in tqdm(files_location):

  116. im = cv2.imread(i)

  117. im = cv2.resize(cv2.cvtColor(im, cv2.COLOR_BGR2RGB), (256, 256)).astype(np.float32) / 255.0

  118. im = np.expand_dims(im, axis =0)

  119. outcome = model_final.predict(im)

  120. features.append(outcome)

  121. ## collect these features and create a dataframe and train a classfier on top of it.


  • 添加几个FC层和输出层。

  • 设置早期图层的权重并将其冻结。

  • 训练网络。

4. 新数据集很大,与原始数据集非常不同


  • 使用随机初始化训练网络或使用预先训练的网络权重作为初始化器。第二种方法通常是首选的。

  • 如果你使用的是不同的网络,或者对现有网络进行了一些小的修改,请注意命名约定。


  1. cs231n.github.io/transfer-learning/

  2. keras.io

  3. https://github.com/fchollet/keras







