源码地址 https://github.com/qqwweee/keras-yolo3
春节期间仔细看了看yolov3的kears源码,这个源码毕竟不是作者写的,有点寒酸,可能大道至简也是这么个理。我在看源码的时候,参照了一些博客进行补充,主要是,作者公布的代码有点凌乱和我熟悉的代码风格不同的缘故吧。。。。。
看到大神的优秀博客,感觉自己的笔记有点炒冷饭的味道。。。😂
1.目录结构:
如下:这个就是直接从github上down下来的
.
├── coco_annotation.py
├── convert.py
├── darknet53.cfg
├── font
│ ├── FiraMono-Medium.otf
│ └── SIL Open Font License.txt
├── .gitignore
├── kmeans.py
├── LICENSE
├── model_data
│ ├── coco_classes.txt
│ ├── tiny_yolo_anchors.txt
│ ├── voc_classes.txt
│ └── yolo_anchors.txt
├── README.md
├── train_bottleneck.py
├── train.py
├── voc_annotation.py
├── yolo3
│ ├── __init__.py
│ ├── model.py
│ └── utils.py
├── yolo.py
├── yolov3.cfg
├── yolov3-tiny.cfg
└── yolo_video.py
- font是字体目录
- model_data:
是各个数据库对应的模型的文件:
-
coco_classes文件: 就是coco文件的类别文件
如下:
-
yolo_anchors文件:就是yolo3所需要的anchors大小
如下
这里的两文件可以根据数据不同改变,改成你所需要的类别。而anchors可以通过k-means进行聚类直接获得。
- yolo3:
这里有model.py和utils.py文件。
- model.py 就是构建yolo3的主要模块文件,这里一共有14个函数/
如下:
- utils.py 是在模型训练时进行数据处理的工具文件,一共有3个函数:
- *_annoataion.py 对数据进行转换的文件,把原始的文件转换为txt文件。
- coco_annoataion.py 把json文件转换为txt文件
- voc_annoataion.py 把xml文件转换为txt
- convert.py 把原始权重转换为kares的能读取的原始h5文件
- kmeans.py 输入上面得到的txt文件,通过聚类得到数据最佳anchors。
- train.py 进行yolov3训练的文件
- yolo.py 构建以yolov3为底层构件的yolo检测模型,因为上面的yolov3还是分开的单个函数,功能并没有融合在一起,即使在训练的时候所有的yolov3组件还是分开的功能,并没有统一接口,供在模型训练完成之后,直接使用。通过yolo.py融合所有的组件。
- yolo_video.py 使用yolo.py文件中的yolo检测模型,并且对视频中的物体进行检测。
- yolov3.cfg 构建yolov3检测模型的整个超参文件。
在阅读源码的时候主要参考:
https://github.com/SpikeKing/keras-yolo3-detection的几篇博文,但是为了更好理解keras-yolo3的代码,这几篇博文的对应文件如下:
- 探索 YOLO v3 源码 - 第1篇 训练---在train.py中
- 探索 YOLO v3 源码 - 第2篇 模型---在train.py中
- 探索 YOLO v3 源码 - 第3篇 网络---在yolo3/model.py中
- 探索 YOLO v3 源码 - 第4篇 真值---在yolo3/utils.py和yolo3/model.py中
- 探索 YOLO v3 源码 - 第5篇 Loss---在yolo3/model.py中
- 探索 YOLO v3 源码 - 完结篇 预测---在yolo.py中
kmens.py
import numpy as np
class YOLO_Kmeans:
def __init__(self, cluster_number, filename):
# 读取kmeans的中心数
self.cluster_number = cluster_number
# 标签文件的文件名
self.filename = "2012_train.txt"
def iou(self, boxes, clusters): # 1 box -> k clusters
# boxes : 所有的[width, height]
# clusters : 9个随机的中心点[width, height]
n = boxes.shape[0]
k = self.cluster_number
# 所有的boxes的面积
box_area = boxes[:, 0] * boxes[:, 1]
# 将box_area的每个元素重复k次
box_area = box_area.repeat(k)
box_area = np.reshape(box_area, (n, k))
# 计算9个中点的面积
cluster_area = clusters[:, 0] * clusters[:, 1]
# 对cluster_area进行复制n份
cluster_area = np.tile(cluster_area, [1, n])
cluster_area = np.reshape(cluster_area, (n, k))
# 获取box和中心的的交叉w的宽
box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
# 获取box和中心的的交叉w的高
box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
# 交叉点的面积
inter_area = np.multiply(min_w_matrix, min_h_matrix)
# 9个交叉点和所有的boxes的iou值
result = inter_area / (box_area + cluster_area - inter_area)
return result
def avg_iou(self, boxes, clusters):
# 计算9个中点与所有的boxes总的iou,n个点的平均iou
accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
return accuracy
def kmeans(self, boxes, k, dist=np.median):
# np.median 求众数
# boxes = [宽, 高]C
# k 中心点数
box_number = boxes.shape[0]
distances = np.empty((box_number, k))
last_nearest = np.zeros((box_number,))
np.random.seed()
# 从所有的boxe中选区9个随机中心点
clusters = boxes[np.random.choice(
box_number, k, replace=False)] # init k clusters
while True:
# 计算所有的boxes和clusters的值(n,k)
distances = 1 - self.iou(boxes, clusters)
# 选取iou值最小的点(n,)
current_nearest = np.argmin(distances, axis=1)
# 中心点未改变,跳出
if (last_nearest == current_nearest).all():
break # clusters won't change
# 计算每个群组的中心或者众数
for cluster in range(k):
clusters[cluster] = dist( # update clusters
boxes[current_nearest == cluster], axis=0)
# 改变中心点
last_nearest = current_nearest
return clusters
def result2txt(self, data):
# 把9个中心点,写入txt文件
f = open("yolo_anchors.txt", 'w')
row = np.shape(data)[0]
for i in range(row):
if i == 0:
x_y = "%d,%d" % (data[i][0], data[i][1])
else:
x_y = ", %d,%d" % (data[i][0], data[i][1])
f.write(x_y)
f.close()
def txt2boxes(self):
# 打开文件
f = open(self.filename, 'r')
dataSet = []
# 读取文件
for line in f:
infos = line.split(" ")
length = len(infos)
# infons[0] 为图片的名称
for i in range(1, length):
# 获取文件的宽和高
width = int(infos[i].split(",")[2]) - \
int(infos[i].split(",")[0])
height = int(infos[i].split(",")[3]) - \
int(infos[i].split(",")[1])
dataSet.append([width, height])
result = np.array(dataSet)
f.close()
return result
def txt2clusters(self):
# 获取所有的文件目标的宽和高,width, height
all_boxes = self.txt2boxes()
# result 9个中心点
result = self.kmeans(all_boxes, k=self.cluster_number)
# 按最后一列顺序排序
result = result[np.lexsort(result.T[0, None])]
# 把结果写入txt文件
self.result2txt(result)
print("K anchors:\n {}".format(result))
# 计算9个中点与所有的boxes总的iou,n个点的平均iou
print("Accuracy: {:.2f}%".format(
self.avg_iou(all_boxes, result) * 100))
if __name__ == "__main__":
cluster_number = 9
filename = "2012_train.txt"
kmeans = YOLO_Kmeans(cluster_number, filename)
kmeans.txt2clusters()
k-means拿到数据里所有的目标框,得到所有的宽和高,在这里面随机取得9个随即中心,之后以9个点为中心得到9个族,不断计算其他点到中点的距离调整每个点所归属的族和中心,直到9个中心不再变即可。这9个中心的x,y就是整个数据的9个合适的anchors==框的宽和高。
train.py
#!/usr/bin/env python
# -- coding: utf-8 --
"""
Copyright (c) 2018. All rights reserved.
Created by C. L. Wang on 2018/7/4
"""
import os
import numpy as np
import tensorflow as tf
import keras.backend as K
from keras.backend import mean
from keras.layers import Input, Lambda
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.utils import plot_model
from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
from yolo3.utils import get_random_data
def _main():
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
from keras import backend as K
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)
annotation_path = 'dataset/WIDER_train.txt' # 数据
classes_path = 'configs/wider_classes.txt' # 类别
log_dir = 'logs/004/' # 日志文件夹
# pretrained_path = 'model_data/yolo_weights.h5' # 预训练模型
pretrained_path = 'logs/003/ep074-loss26.535-val_loss27.370.h5' # 预训练模型
anchors_path = 'configs/yolo_anchors.txt' # anchors
class_names = get_classes(classes_path) # 类别列表
num_classes = len(class_names) # 类别数
anchors = get_anchors(anchors_path) # anchors列表
input_shape = (416, 416) # 32的倍数,输入图像
# 创建需要训练的模型
model = create_model(input_shape, anchors, num_classes,
freeze_body=2,
weights_path=pretrained_path) # make sure you know what you freeze
logging = TensorBoard(log_dir=log_dir)
checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss', save_weights_only=True,
save_best_only=True, period=3) # 只存储weights,
#reduce_lr:当评价指标不在提升时,减少学习率,每次减少10%,当验证损失值,持续3次未减少时,则终止训练。
#early_stopping:当验证集损失值,连续增加小于0时,持续10个epoch,则终止训练。
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) # 当评价指标不在提升时,减少学习率
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) # 测试集准确率,下降前终止
val_split = 0.1 # 训练和验证的比例
with open(annotation_path) as f:
lines = f.readlines()
np.random.seed(47)
np.random.shuffle(lines)
np.random.seed(None)
num_val = int(len(lines) * val_split) # 验证集数量
num_train = len(lines) - num_val # 训练集数量
"""
把目标当成一个输入,构成多输入模型,把loss写成一个层,作为最后的输出,搭建模型的时候,
就只需要将模型的output定义为loss,而compile的时候,
直接将loss设置为y_pred(因为模型的输出就是loss,所以y_pred就是loss),
无视y_true,训练的时候,y_true随便扔一个符合形状的数组进去就行了。
"""
if False:
model.compile(optimizer=Adam(lr=1e-3), loss={
# 使用定制的 yolo_loss Lambda层
'yolo_loss': lambda y_true, y_pred: y_pred}) # 损失函数
batch_size = 32 # batch尺寸
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
steps_per_epoch=max(1, num_train // batch_size),
validation_data=data_generator_wrapper(
lines[num_train:], batch_size, input_shape, anchors, num_classes),
validation_steps=max(1, num_val // batch_size),
epochs=50,
initial_epoch=0,
callbacks=[logging, checkpoint])
model.save_weights(log_dir + 'trained_weights_stage_1.h5') # 存储最终的参数,再训练过程中,通过回调存储
if True: # 全部训练
for i in range(len(model.layers)):
model.layers[i].trainable = True
model.compile(optimizer=Adam(lr=1e-4),
loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
print('Unfreeze all of the layers.')
batch_size = 16 # note that more GPU memory is required after unfreezing the body
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
steps_per_epoch=max(1, num_train // batch_size),
validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors,
num_classes),
validation_steps=max(1, num_val // batch_size),
epochs=100,
initial_epoch=50,
callbacks=[logging, checkpoint, reduce_lr, early_stopping])
model.save_weights(log_dir + 'trained_weights_final.h5')
def get_classes(classes_path):
# 输入类别文件,读取文件中所有的类别,生成list
'''loads the classes'''
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def get_anchors(anchors_path):
# 获取所有的anchors的长和宽
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
weights_path='model_data/yolo_weights.h5'):
K.clear_session() # 清除session
h, w = input_shape # 尺寸
image_input = Input(shape=(w, h, 3)) # 图片输入格式
num_anchors = len(anchors) # anchor数量
# YOLO的三种尺度,每个尺度的anchor数,类别数+边框4个+置信度1
y_true = [Input(shape=(h // {0: 32, 1: 16, 2: 8}[l], w // {0: 32, 1: 16, 2: 8}[l],
num_anchors // 3, num_classes + 5)) for l in range(3)]
model_body = yolo_body(image_input, num_anchors // 3, num_classes) # model
print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
if load_pretrained: # 加载预训练模型
model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) # 加载参数,跳过错误
print('Load weights {}.'.format(weights_path))
if freeze_body in [1, 2]:
# Freeze darknet53 body or freeze all but 3 output layers.
num = (185, len(model_body.layers) - 3)[freeze_body - 1]
for i in range(num):
model_body.layers[i].trainable = False # 将其他层的训练关闭
print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
# 构建 yolo_loss
# model_body: [(?, 13, 13, 18), (?, 26, 26, 18), (?, 52, 52, 18)]
# y_true: [(?, 13, 13, 18), (?, 26, 26, 18), (?, 52, 52, 18)]
model_loss = Lambda(yolo_loss,
output_shape=(1,), name='yolo_loss',
arguments={'anchors': anchors,
'num_classes': num_classes,
'ignore_thresh': 0.5}
)(model_body.output + y_true)
model = Model(inputs=[model_body.input] + y_true, outputs=model_loss) # 模型,inputs和outputs
plot_model(model, to_file=os.path.join('model_data', 'model.png'), show_shapes=True, show_layer_names=True)
model.summary()
#
return model
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
'''data generator for fit_generator
annotation_lines: 所有的图片名称
batch_size:每批图片的大小
input_shape: 图片的输入尺寸
anchors: 大小
num_classes: 类别数
'''
n = len(annotation_lines)
i = 0
while True:
image_data = []
box_data = []
for b in range(batch_size):
if i == 0:
# 随机排列图片顺序
np.random.shuffle(annotation_lines)
# image_data: (16, 416, 416, 3)
# box_data: (16, 20, 5) # 每个图片最多含有20个框
image, box = get_random_data(annotation_lines[i], input_shape, random=True) # 获取图片和盒子
#获取真实的数据根据输入的尺寸对原始数据进行缩放处理得到input_shape大小的数据图片,
# 随机进行图片的翻转,标记数据数据也根据比例改变
image_data.append(image) # 添加图片
box_data.append(box) # 添加盒子
i = (i + 1) % n
image_data = np.array(image_data)
box_data = np.array(box_data)
# y_true是3个预测特征的列表
y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) # 真值
# y_true的第0和1位是中心点xy,范围是(0~13/26/52),第2和3位是宽高wh,范围是0~1,
# 第4位是置信度1或0,第5~n位是类别为1其余为0。
# [(16, 13, 13, 3, 6), (16, 26, 26, 3, 6), (16, 52, 52, 3, 6)]
yield [image_data] + y_true, np.zeros(batch_size)
def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
"""
用于条件检查
"""
n = len(annotation_lines) # 标注图片的行数
if n == 0 or batch_size <= 0: return None
return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
if __name__ == '__main__':
_main()
在train.py中主要就是构建yolov3的训练模型,这里作者使用自定义loss的方式进行模型训练并没有,在loss时输入y_true, y_pred。具体参考
Keras中自定义目标函数(损失函数)的简单方法
和Keras中自定义复杂的loss函数,这两篇博客,主要时loss在loss函数里已经把y_true和y_pred计算完成了,所以之后的y_pred,在数据生成器(data_generator)有如下体现,np.zeros(batch_size)
yield [image_data] + y_true, np.zeros(batch_size)
🆗,这个train.py有两个train模式,一个是冻结模型,一个是微调模型。冻结那几个层手动调节,1是冻结DarkNet53的层,2是冻结全部,只保留最后3层。
model.py
#!/usr/bin/env python
# -- coding: utf-8 --
"""
Copyright (c) 2018. All rights reserved.
Created by C. L. Wang on 2018/7/4
"""
from functools import wraps
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from yolo3.utils import compose
@wraps(Conv2D)
def DarknetConv2D(*args, **kwargs):
# 普通的卷积网络,带正则化,当步长为2时进行下采样
"""Wrapper to set Darknet parameters for Convolution2D."""
darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides') == (2, 2) else 'same'
darknet_conv_kwargs.update(kwargs)
return Conv2D(*args, **darknet_conv_kwargs)
def DarknetConv2D_BN_Leaky(*args, **kwargs):
# 没有偏置,带正则项
"""Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
no_bias_kwargs = {'use_bias': False}
no_bias_kwargs.update(kwargs)
return compose(
DarknetConv2D(*args, **no_bias_kwargs),
BatchNormalization(),
LeakyReLU(alpha=0.1))
def resblock_body(x, num_filters, num_blocks):
# 使用残差块, 1 + 2 * num_filters 为总的卷积层数
'''A series of resblocks starting with a downsampling Convolution2D'''
# Darknet uses left and top padding instead of 'same' mode
x = ZeroPadding2D(((1, 0), (1, 0)))(x)
x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x)
for i in range(num_blocks):
y = compose(
DarknetConv2D_BN_Leaky(num_filters // 2, (1, 1)),
DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x)
x = Add()([x, y])
return x
def darknet_body(x):
# darknet的主体网络52层卷积网络
'''Darknent body having 52 Convolution2D layers'''
x = DarknetConv2D_BN_Leaky(32, (3, 3))(x)
x = resblock_body(x, num_filters=64, num_blocks=1)
x = resblock_body(x, num_filters=128, num_blocks=2)
x = resblock_body(x, num_filters=256, num_blocks=8)
x = resblock_body(x, num_filters=512, num_blocks=8)
x = resblock_body(x, num_filters=1024, num_blocks=4)
return x
def make_last_layers(x, num_filters, out_filters):
# 最后检测头部,无降采采样操作
'''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
x = compose(
DarknetConv2D_BN_Leaky(num_filters, (1, 1)),
DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
DarknetConv2D_BN_Leaky(num_filters, (1, 1)),
DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(x)
y = compose(
DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
DarknetConv2D(out_filters, (1, 1)))(x)
return x, y
def yolo_body(inputs, num_anchors, num_classes):
"""Create YOLO_V3 model CNN body in Keras."""
darknet = Model(inputs, darknet_body(inputs))
x, y1 = make_last_layers(darknet.output, 512, num_anchors * (num_classes + 5))
# 上采样
x = compose(
DarknetConv2D_BN_Leaky(256, (1, 1)),
UpSampling2D(2))(x)
x = Concatenate()([x, darknet.layers[152].output])
x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5))
# 上采样
x = compose(
DarknetConv2D_BN_Leaky(128, (1, 1)),
UpSampling2D(2))(x)
x = Concatenate()([x, darknet.layers[92].output])
_, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5))
# 上采样 y1, y2, y3
# 13x13, 26x26, 52x52
return Model(inputs, [y1, y2, y3])
def tiny_yolo_body(inputs, num_anchors, num_classes):
'''Create Tiny YOLO_v3 model CNN body in keras.'''
x1 = compose(
DarknetConv2D_BN_Leaky(16, (3, 3)),
MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
DarknetConv2D_BN_Leaky(32, (3, 3)),
MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
DarknetConv2D_BN_Leaky(64, (3, 3)),
MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
DarknetConv2D_BN_Leaky(128, (3, 3)),
MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs)
x2 = compose(
MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
DarknetConv2D_BN_Leaky(512, (3, 3)),
MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
DarknetConv2D_BN_Leaky(1024, (3, 3)),
DarknetConv2D_BN_Leaky(256, (1, 1)))(x1)
y1 = compose(
DarknetConv2D_BN_Leaky(512, (3, 3)),
DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)
x2 = compose(
DarknetConv2D_BN_Leaky(128, (1, 1)),
UpSampling2D(2))(x2)
y2 = compose(
Concatenate(),
DarknetConv2D_BN_Leaky(256, (3, 3)),
DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1])
return Model(inputs, [y1, y2])
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
"""
feats的后处理函数,feats就是yolo_outputs,把输出转换到inputs的坐标系
"""
num_anchors = len(anchors)
# Reshape to batch, height, width, num_anchors, box_params.
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
grid_shape = K.shape(feats)[1:3] # height, width
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
[grid_shape[0], 1, 1, 1])
grid = K.concatenate([grid_x, grid_y])
grid = K.cast(grid, K.dtype(feats))
# Reshape to batch, height, width, num_anchors, box_params.
feats = K.reshape(
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
# Adjust preditions to each spatial grid point and anchor size.
# 把结果转化到每个格子中,坐标为图片的坐标系的0-1,类别预测也为0-1
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) # xy是归一化的值
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) # wh是归一化的值
box_confidence = K.sigmoid(feats[..., 4:5])
box_class_probs = K.sigmoid(feats[..., 5:])
if calc_loss == True:
return grid, feats, box_xy, box_wh
return box_xy, box_wh, box_confidence, box_class_probs
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
'''Get corrected boxes'''
# 把预测的图片转换到原始图片的大小
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
input_shape = K.cast(input_shape, K.dtype(box_yx))
image_shape = K.cast(image_shape, K.dtype(box_yx))
new_shape = K.round(image_shape * K.min(input_shape / image_shape))
offset = (input_shape - new_shape) / 2. / input_shape
scale = input_shape / new_shape
box_yx = (box_yx - offset) * scale
box_hw *= scale
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
boxes = K.concatenate([
box_mins[..., 0:1], # y_min
box_mins[..., 1:2], # x_min
box_maxes[..., 0:1], # y_max
box_maxes[..., 1:2] # x_max
])
# Scale boxes back to original image shape.
boxes *= K.concatenate([image_shape, image_shape])
return boxes
def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
'''Process Conv layer output'''
# 对yolo的输出进行后处理,输出合适原始图片的box和scores
box_xy, box_wh, box_confidence, box_class_probs = yolo_head(
feats, anchors, num_classes, input_shape)
boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
boxes = K.reshape(boxes, [-1, 4])
box_scores = box_confidence * box_class_probs
box_scores = K.reshape(box_scores, [-1, num_classes])
return boxes, box_scores
def yolo_eval(yolo_outputs, anchors, num_classes, image_shape,
max_boxes=20, score_threshold=.6, iou_threshold=.5):
"""Evaluate YOLO model on given input and return filtered boxes."""
# 使用yolo模型进行图片的检测,进行坐标转化,和nms处理,和
num_layers = len(yolo_outputs)
anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # default setting
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
for l in range(num_layers):
_boxes, _box_scores = yolo_boxes_and_scores(
yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
boxes.append(_boxes)
box_scores.append(_box_scores)
boxes = K.concatenate(boxes, axis=0)
box_scores = K.concatenate(box_scores, axis=0)
mask = box_scores >= score_threshold
max_boxes_tensor = K.constant(max_boxes, dtype='int32')
boxes_ = []
scores_ = []
classes_ = []
for c in range(num_classes):
# 进行nms处理
# TODO: use keras backend instead of tf.
class_boxes = tf.boolean_mask(boxes, mask[:, c])
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
nms_index = tf.image.non_max_suppression(
class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
class_boxes = K.gather(class_boxes, nms_index)
class_box_scores = K.gather(class_box_scores, nms_index)
classes = K.ones_like(class_box_scores, 'int32') * c
boxes_.append(class_boxes)
scores_.append(class_box_scores)
classes_.append(classes)
boxes_ = K.concatenate(boxes_, axis=0)
scores_ = K.concatenate(scores_, axis=0)
classes_ = K.concatenate(classes_, axis=0)
return boxes_, scores_, classes_
def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
'''Preprocess true boxes to training input format
y_true的第0和1位是中心点xy,范围是(0~13/26/52),第2和3位是宽高wh,范围是0~1,
第4位是置信度1或0,第5~n位是类别为1其余为0。
Parameters
----------
true_boxes: array, shape=(m, T, 5)
Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
input_shape: array-like, hw, multiples of 32
anchors: array, shape=(N, 2), wh
num_classes: integer
Returns
-------
y_true: list of array, shape like yolo_outputs, xywh are reletive value
'''
assert (true_boxes[..., 4] < num_classes).all(), 'class id must be less than num_classes'
num_layers = len(anchors) // 3 # default setting
anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
# 所有的真实标注
true_boxes = np.array(true_boxes, dtype='float32')
# 模型的输入尺寸
input_shape = np.array(input_shape, dtype='int32')
# 获取目标输入x,y
boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
# 获取目标的w,h
boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
# 这时候坐标变为0~1
true_boxes[..., 0:2] = boxes_xy / input_shape[::-1]
true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]
# 获取所有的这是标记的数量
m = true_boxes.shape[0]
# 得到模型每一个输出y1-y3的下采样之后的特征图片大小
grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]
# 获取三组y_true
# [(16, 13, 13, 3, 6), (16, 26, 26, 3, 6), (16, 52, 52, 3, 6)],
y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + num_classes),
dtype='float32') for l in range(num_layers)]
# Expand dim to apply broadcasting.
# 9个anchors的值
anchors = np.expand_dims(anchors, 0)
anchor_maxes = anchors / 2.
anchor_mins = -anchor_maxes
valid_mask = boxes_wh[..., 0] > 0
for b in range(m):
# Discard zero rows.
# 只第b个boxes选取wh大于0的anchors
wh = boxes_wh[b, valid_mask[b]]
if len(wh) == 0: continue
# Expand dim to apply broadcasting.
wh = np.expand_dims(wh, -2)
box_maxes = wh / 2.
box_mins = -box_maxes
# 求目标的范围,和anchors的iou值,查看目标的标记值与9个anchors哪个iou最大
intersect_mins = np.maximum(box_mins, anchor_mins)
intersect_maxes = np.minimum(box_maxes, anchor_maxes)
intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
box_area = wh[..., 0] * wh[..., 1]
anchor_area = anchors[..., 0] * anchors[..., 1]
iou = intersect_area / (box_area + anchor_area - intersect_area)
# 从每个iou值中,找到iou值最大的目标
# Find best anchor for each true box
# 得到9个anchos的一个值
best_anchor = np.argmax(iou, axis=-1)
for t, n in enumerate(best_anchor):
# t,n为9个最大之中的某一个
for l in range(num_layers):
# 三层y1
if n in anchor_mask[l]:
# i就是在对应的特征图上的实际尺寸的宽,就是高
i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32')
j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32')
k = anchor_mask[l].index(n)
c = true_boxes[b, t, 4].astype('int32')
# y_true的第0和1位是中心点xy,范围是(0~13/26/52),
# 第2和3位是宽高wh,范围是0~1,
# 第4位是置信度1或0,
# 第5~n位是类别为1其余为0。
y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
y_true[l][b, j, i, k, 4] = 1
y_true[l][b, j, i, k, 5 + c] = 1
# [(16, 13, 13, 3, 6), (16, 26, 26, 3, 6), (16, 52, 52, 3, 6)]
return y_true
def box_iou(b1, b2):
'''Return iou tensor
Parameters
----------
b1: tensor, shape=(i1,...,iN, 4), xywh
b2: tensor, shape=(j, 4), xywh
Returns
-------
iou: tensor, shape=(i1,...,iN, j)
'''
# Expand dim to apply broadcasting.
b1 = K.expand_dims(b1, -2)
b1_xy = b1[..., :2]
b1_wh = b1[..., 2:4]
b1_wh_half = b1_wh / 2.
b1_mins = b1_xy - b1_wh_half
b1_maxes = b1_xy + b1_wh_half
# Expand dim to apply broadcasting.
b2 = K.expand_dims(b2, 0)
b2_xy = b2[..., :2]
b2_wh = b2[..., 2:4]
b2_wh_half = b2_wh / 2.
b2_mins = b2_xy - b2_wh_half
b2_maxes = b2_xy + b2_wh_half
intersect_mins = K.maximum(b1_mins, b2_mins)
intersect_maxes = K.minimum(b1_maxes, b2_maxes)
intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
b1_area = b1_wh[..., 0] * b1_wh[..., 1]
b2_area = b2_wh[..., 0] * b2_wh[..., 1]
iou = intersect_area / (b1_area + b2_area - intersect_area)
return iou
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=True):
'''Return yolo_loss tensor
Parameters
----------
yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
y_true: list of array, the output of preprocess_true_boxes
anchors: array, shape=(N, 2), wh
num_classes: integer
ignore_thresh: float, the iou threshold whether to ignore object confidence loss
Returns
-------
loss: tensor, shape=(1,)
'''
num_layers = len(anchors) // 3 # default setting
# 获取模型的输出,获取输入真值
yolo_outputs = args[:num_layers]
y_true = args[num_layers:]
anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
# input_shape是输出的尺寸*32, 就是原始的输入尺寸,[1:3]是尺寸的位置
input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
# 每个网格的尺寸,
grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
loss = 0
m = K.shape(yolo_outputs[0])[0] # batch size, tensor
mf = K.cast(m, K.dtype(yolo_outputs[0]))
# y_true的第0和1位是中心点xy,范围是(0~13/26/52),
# 第2和3位是宽高wh,范围是0~1,
# 第4位是置信度1或0,
# 第5~n位是类别为1其余为0。
for l in range(num_layers):
object_mask = y_true[l][..., 4:5] # 1
true_class_probs = y_true[l][..., 5:]
# 这是yolo_outputs的后处理程序
grid, raw_pred, pred_xy, pred_wh = \
yolo_head(yolo_outputs[l], anchors[anchor_mask[l]],
num_classes, input_shape, calc_loss=True)
pred_box = K.concatenate([pred_xy, pred_wh])
# Darknet raw box to calculate loss.
# bugfix grid_shapes重复相乘,另一个在preprocess_true_boxes中
# 把真实的坐标转换到预测坐标系
raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # 1
raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # 2-w*h
# Find ignore mask, iterate over each of batch.
ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
object_mask_bool = K.cast(object_mask, 'bool')
def loop_body(b, ignore_mask):
true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0])
iou = box_iou(pred_box[b], true_box)
best_iou = K.max(iou, axis=-1)
ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
return b + 1, ignore_mask
_, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask])
ignore_mask = ignore_mask.stack()
ignore_mask = K.expand_dims(ignore_mask, -1)
# K.binary_crossentropy is helpful to avoid exp overflow.
xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2],
from_logits=True)
wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4])
confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
(1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
from_logits=True) * ignore_mask
class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[..., 5:], from_logits=True)
xy_loss = K.sum(xy_loss) / mf
wh_loss = K.sum(wh_loss) / mf
confidence_loss = K.sum(confidence_loss) / mf
class_loss = K.sum(class_loss) / mf
loss += xy_loss + wh_loss + confidence_loss + class_loss
if print_loss:
loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)],
message='loss: ')
return loss
在model.py中有几个函数需要进行讲解:
- DarknetConv2D(*args, **kwargs)普通的卷积网络,带正则化,当步长为2时进行下采
- DarknetConv2D_BN_Leaky(*args, **kwargs)没有偏置,带正则项
- resblock_body(x, num_filters, num_blocks)使用残差块, 1 + 2 * num_filters 为总的卷积层数
- darknet_body(x) darknet的主体网络52层卷积网络
- make_last_layers(x, num_filters, out_filters) yolo最后检测头部,无降采采样操作
- yolo_body(inputs, num_anchors, num_classes)yolov3的三个检测输出部分
- yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False) feats的后处理函数,feats就是yolo_outputs,把输出转换到inputs的坐标系
- yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) 把预测的图片转换到原始图片的大小
- yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape) 对yolo的输出进行后处理,输出合适原始图片的box和scores
- yolo_eval(yolo_outputs, anchors, num_classes, image_shape,
max_boxes=20, score_threshold=.6, iou_threshold=.5) 使用yolo模型进行图片的检测,进行坐标转化,和nms处理 - preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes) 对图片中标记的数据与anchors进行转换,转换到预测的坐标系
- yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=True) 构建loss
utils.py
#!/usr/bin/env python
# -- coding: utf-8 --
"""Miscellaneous utility functions."""
from functools import reduce
from PIL import Image
import numpy as np
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
def compose(*funcs):
"""
https://blog.csdn.net/jmu201521121021/article/details/86626976
# 参数为多个函数名,按照reduce的功能执行,把前一个函数的结果作为下一个函数的输入,知道最后执行完毕
Compose arbitrarily many functions, evaluated left to right.
Reference: https://mathieularose.com/function-composition-in-python/
"""
# return lambda x: reduce(lambda v, f: f(v), funcs, x)
if funcs:
return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
else:
raise ValueError('Composition of empty sequence not supported.')
def letterbox_image(image, size):
'''resize image with unchanged aspect ratio using padding'''
iw, ih = image.size # 原始图像是1200x1800
w, h = size # 转换为416x416
scale = min(float(w) / float(iw), float(h) / float(ih)) # 转换比例
nw = int(iw * scale) # 新图像的宽,保证新图像是等比下降的
nh = int(ih * scale) # 新图像的高
image = image.resize((nw, nh), Image.BICUBIC) # 缩小图像
new_image = Image.new('RGB', size, (128, 128, 128)) # 生成灰色图像
new_image.paste(image, ((w - nw) // 2, (h - nh) // 2)) # 将图像填充为中间图像,两侧为灰色的样式
return new_image
def rand(a=0., b=1.):
return np.random.rand() * (b - a) + a
def get_random_data(
annotation_line, input_shape, random=True,
max_boxes=20, jitter=.3, hue=.1, sat=1.5,
val=1.5, proc_img=True):
'''random preprocessing for real-time data augmentation:
获取真实的数据根据输入的尺寸对原始数据进行缩放处理得到input_shape大小的数据图片,
随机进行图片的翻转,标记数据数据也根据比例改变
annotation_line: 单条图片的信息的列表
input_shape:输入的尺寸
'''
# 处理图片
line = annotation_line.split()
# 读取图片图片
image = Image.open(line[0])
# 原始图片的比例
iw, ih = image.size
# 获取模型的输入图片的大小
h, w = input_shape
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
if not random:
# resize image
# 获取原始图片和模型输入图片的比例
scale = min(float(w) / float(iw), float(h) / float(ih))
nw = int(iw * scale)
nh = int(ih * scale)
dx = (w - nw) // 2
dy = (h - nh) // 2
image_data = 0
if proc_img:
image = image.resize((nw, nh), Image.BICUBIC)
# 首先创建一张灰色的图片
new_image = Image.new('RGB', (w, h), (128, 128, 128))
# 把原始的图片粘贴到灰色图片上
new_image.paste(image, (dx, dy))
image_data = np.array(new_image) / 255.
# correct boxes
box_data = np.zeros((max_boxes, 5))
# 对所有的图片中的目标进行缩放
if len(box) > 0:
np.random.shuffle(box)
if len(box) > max_boxes: box = box[:max_boxes] # 最多只取20个
box[:, [0, 2]] = box[:, [0, 2]] * scale + dx
box[:, [1, 3]] = box[:, [1, 3]] * scale + dy
box_data[:len(box)] = box
return image_data, box_data
# resize image
# 随机的图片比例变换
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
scale = rand(.25, 2.)
# 计算新的图片尺寸
if new_ar < 1:
nh = int(scale * h)
nw = int(nh * new_ar)
else:
nw = int(scale * w)
nh = int(nw / new_ar)
# 改变图片尺寸
image = image.resize((nw, nh), Image.BICUBIC)
# place image
# 随机把图片摆放在灰度图片上
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image = new_image
# flip image or not
# 是否反转图片
flip = rand() < .5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
# distort image
# 在HSV坐标域中,改变图片的颜色范围,hue值相加,sat和vat相乘,
# 先由RGB转为HSV,再由HSV转为RGB,添加若干错误判断,避免范围过大。
hue = rand(-hue, hue)
sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
x = rgb_to_hsv(np.array(image) / 255.)
x[..., 0] += hue
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x > 1] = 1
x[x < 0] = 0
image_data = hsv_to_rgb(x) # numpy array, 0 to 1
# correct boxes
# 将所有的图片变换,增加至检测框中,并且包含若干异常处理,
# 避免变换之后的值过大或过小,去除异常的box。
box_data = np.zeros((max_boxes, 5))
if len(box) > 0:
np.random.shuffle(box)
# 变换所有目标的尺寸
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
# 如果已经翻转了需要进行坐标变换,并且把坐标限制在图片内
if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
# 最大的目标数不能超过超参数
if len(box) > max_boxes: box = box[:max_boxes]
box_data[:len(box)] = box
return image_data, box_data
这里的get_random_data就是对原始数据进行处理的函数,获取真实的数据根据输入的尺寸对原始数据进行缩放处理得到input_shape大小的数据图片,
随机进行图片的翻转,标记数据数据也根据比例改变
yolo.py
#!/usr/bin/env python
# -- coding: utf-8 --
"""
Copyright (c) 2018. All rights reserved.
Created by C. L. Wang on 2018/7/4
"""
"""
Run a YOLO_v3 style detection model on test images.
"""
import colorsys
import os
from timeit import default_timer as timer
import numpy as np
from PIL import Image, ImageFont, ImageDraw
from keras import backend as K
from keras.layers import Input
from yolo3.model import yolo_eval, yolo_body
from yolo3.utils import letterbox_image
class YOLO(object):
def __init__(self):
self.anchors_path = 'configs/yolo_anchors.txt' # Anchors
self.model_path = 'model_data/yolo_weights.h5' # 模型文件
self.classes_path = 'configs/coco_classes_ch.txt' # 类别文件
# self.model_path = 'model_data/ep074-loss26.535-val_loss27.370.h5' # 模型文件
# self.classes_path = 'configs/wider_classes.txt' # 类别文件
self.score = 0.60
self.iou = 0.45
# self.iou = 0.01
self.class_names = self._get_class() # 获取类别
self.anchors = self._get_anchors() # 获取anchor
self.sess = K.get_session()
self.model_image_size = (416, 416) # fixed size or (None, None), hw
self.colors = self.__get_colors(self.class_names)
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
# 获取检测类别
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path, encoding='utf8') as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
# 获取检测的anchors
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
@staticmethod
def __get_colors(names):
# 不同的框,不同的颜色
hsv_tuples = [(float(x) / len(names), 1., 1.)
for x in range(len(names))] # 不同颜色
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) # RGB
np.random.seed(10101)
np.random.shuffle(colors)
np.random.seed(None)
return colors
def generate(self):#
# 构建检测模型,下载模型数据
model_path = os.path.expanduser(self.model_path) # 转换~
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
num_anchors = len(self.anchors) # anchors的数量
num_classes = len(self.class_names) # 类别数
self.yolo_model = yolo_body(Input(shape=(416, 416, 3)), 3, num_classes)
self.yolo_model.load_weights(model_path) # 加载模型参数
print('{} model, {} anchors, and {} classes loaded.'.format(model_path, num_anchors, num_classes))
# 根据检测参数,过滤框
self.input_image_shape = K.placeholder(shape=(2,))
boxes, scores, classes = yolo_eval(
self.yolo_model.output, self.anchors, len(self.class_names),
self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
# 检测图片,返回图片
start = timer() # 起始时间
if self.model_image_size != (None, None): # 416x416, 416=32*13,必须为32的倍数,最小尺度是除以32
assert self.model_image_size[0] % 32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1] % 32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) # 填充图像
else:
new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
print('detector size {}'.format(image_data.shape))
image_data /= 255. # 转换0~1
image_data = np.expand_dims(image_data, 0) # 添加批次维度,将图片增加1维
# 参数盒子、得分、类别;输入图像0~1,4维;原始图像的尺寸
# 通过调用yolo_eval: self.boxes, self.scores, self.classes
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
print('Found {} boxes for {}'.format(len(out_boxes), 'img')) # 检测出的框
font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) # 字体
thickness = (image.size[0] + image.size[1]) // 512 # 厚度
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c] # 类别
box = out_boxes[i] # 框
score = out_scores[i] # 执行度
label = '{} {:.2f}'.format(predicted_class, score) # 标签
draw = ImageDraw.Draw(image) # 画图
label_size = draw.textsize(label, font) # 标签文字
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom)) # 边框
if top - label_size[1] >= 0: # 标签文字
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
# My kingdom for a good redistributable image drawing library.
for i in range(thickness): # 画框
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle( # 文字背景
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, label, fill=(0, 0, 0), font=font) # 文案
del draw
end = timer()
print(end - start) # 检测执行时间
return image
def detect_objects_of_image(self, img_path):
# 检测图片返回,box的各个值
image = Image.open(img_path)
assert self.model_image_size[0] % 32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1] % 32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) # 填充图像
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255. # 转换0~1
image_data = np.expand_dims(image_data, 0) # 添加批次维度,将图片增加1维
# print('detector size {}'.format(image_data.shape))
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
# print('out_boxes: {}'.format(out_boxes))
# print('out_scores: {}'.format(out_scores))
# print('out_classes: {}'.format(out_classes))
img_size = image.size[0] * image.size[1]
# 过滤较小的图片
objects_line = self._filter_boxes(out_boxes, out_scores, out_classes, img_size)
return objects_line
def _filter_boxes(self, boxes, scores, classes, img_size):
# 过滤较小的图片
res_items = []
for box, score, clazz in zip(boxes, scores, classes):
top, left, bottom, right = box
box_size = (bottom - top) * (right - left)
rate = float(box_size) / float(img_size)
clz_name = self.class_names[clazz]
if rate > 0.05:
res_items.append('{}-{:0.2f}'.format(clz_name, rate))
res_line = ','.join(res_items)
return res_line
def close_session(self):
self.sess.close()
def detect_img_for_test():
yolo = YOLO()
img_path = './dataset/vDaPl5QHdoqb2wOaVql4FoJWNGglYk.jpg'
image = Image.open(img_path)
r_image = yolo.detect_image(image)
yolo.close_session()
r_image.save('xxx.png')
def test_of_detect_objects_of_image():
yolo = YOLO()
img_path = './dataset/vDaPl5QHdoqb2wOaVql4FoJWNGglYk.jpg'
objects_line = yolo.detect_objects_of_image(img_path)
print(objects_line)
if __name__ == '__main__':
# detect_img_for_test()
test_of_detect_objects_of_image()
这是yolo的检测文件,yolo检测模型对外提供两个接口:
- detect_image(self, image) 检测图片,返回图片
- detect_objects_of_image(self, img_path) 检测图片返回,box的各个值
这里的yolo模型使用的就是model.py文件yolo_eval()函数,这是在模型训练完成之后,最重要的函数,这个文件只导入了yolo_eval和 yolo_body两个函数
from yolo3.model import yolo_eval, yolo_body
参考:
https://github.com/SpikeKing/keras-yolo3-detection
Keras中自定义目标函数(损失函数)的简单方法
Keras中自定义复杂的loss函数