记录使用 Tensorflow 官方的 Object Detection API 进行自定义模型训练的踩坑过程。官方项目地址
https://github.com/tensorflow/models
假设自定义的项目名称是 Demo。推荐步骤:
一. 环境准备
- git clone 到本地, 然后建议复制出项目的 research/object_detection 文件夹和 research/slim 文件夹到 Demo 目录下
- 新建环境变量 PYTHONPATH=Demo:Demo/slim
- 下载 protobuf 3.0 工具(注意版本最好是3.0的,高版本的protobuf 没有通用符匹配功能, windows版本对应路径找就行了)
wget -O protobuf.zip https://github.com/google/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip
- 使用 protobuf 工具编译 Demo/object_detection/protos 文件夹下的 *.proto 文件,生成的.py文件也在protos文件夹下即可
./bin/protoc object_detection/protos/*.proto --python_out=object_detection/protos/
- 安装依赖的包
pip install tensorflow-gpu
pip install --user Cython
pip install --user contextlib2
pip install --user pillow
pip install --user lxml
pip install --user matplotlib
pip install --user opencv-python
- 检测上述步骤是否成功 (在Demo根目录执行, 下同), 最后输出 OK 表示环境准备成功
python object_detection/builders/model_builder_test.py
二. 自定义训练
-
准备图片, 分别是 images 和 xml 描述文件即可, 这里就存放在 Demo/data 下面,images存放图片信息, labels存放xml描述信息。两者名字必须一一对应。
- 将准备好的数据转换成 TF record 格式的文件,使用以下python脚本一步到位
'''
将普通xml文件转换成 tensorflow 的 tf record文件
'''
import os, io
import random
import os.path
import cv2
import imutils
from xml.dom import minidom
from os.path import basename
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple
import pandas as pd
import tensorflow as tf
#------------ 用户自定义修改部分 开始----------------------------
project_dir = os.path.dirname(__file__)
data_dir = os.path.join(project_dir, "data")
folderCharacter = "/" # \\ is for windows
classList = { "test":0} # 这里的class 表示检测的类别,value 从0开始,生成的pbtxt文件从1开始(自动生成)
xmlFolder = os.path.join(data_dir, "labels")
imgFolder = os.path.join(data_dir, "images")
savePath = os.path.join(data_dir, "ssd_dataset")
testRatio = 0.2 # 测试集比率
recordTF_out = ("train.record", "test.record") # 生成的 record 文件
recordTF_in = ("train.csv", "test.csv")
resizeImage = False # set true will cause rfcn box large than image size problem
resize_width = 1920
imgResizedFolder = imgFolder + "_" + str(resize_width)
#----------- 用户自定义修改部分 结束 ------------------------------------------
fileList = []
outputTrainFile = savePath + folderCharacter + recordTF_in[0]
outputTestFile = savePath + folderCharacter + recordTF_in[1]
if not os.path.exists(savePath):
os.makedirs(savePath)
if not os.path.exists(imgResizedFolder):
os.makedirs(imgResizedFolder)
def transferTF( xmlFilepath, imgFilepath, labelGrep=""):
#print("TEST:", xmlFilepath, imgFilepath)
if(os.path.exists(xmlFilepath) and os.path.exists(imgFilepath)):
img_file, img_file_extension = os.path.splitext(imgFilepath)
img_filename = basename(img_file)
img = cv2.imread(imgFilepath)
org_width = img.shape[1]
org_height = img.shape[0]
if(resizeImage==True):
if(img.shape[1]>=img.shape[0]):
img = imutils.resize(img, width = resize_width)
size_ratio_w = img.shape[1] / org_width
size_ratio_h = img.shape[0] / org_height
else:
img = imutils.resize(img, height = resize_width)
size_ratio_w = img.shape[1] / org_width
size_ratio_h = img.shape[0] / org_height
cv2.imwrite(imgResizedFolder + folderCharacter + img_filename + img_file_extension, img)
else:
cv2.imwrite(imgResizedFolder + folderCharacter + img_filename + img_file_extension, img)
size_ratio_w = 1
size_ratio_h = 1
imgShape = img.shape
img_h = imgShape[0]
img_w = imgShape[1]
labelXML = minidom.parse(xmlFilepath)
labelName = []
labelXmin = []
labelYmin = []
labelXmax = []
labelYmax = []
countLabels = 0
tmpArrays = labelXML.getElementsByTagName("filename")
for elem in tmpArrays:
filenameImage = elem.firstChild.data
tmpArrays = labelXML.getElementsByTagName("name")
for elem in tmpArrays:
labelName.append(str(elem.firstChild.data))
tmpArrays = labelXML.getElementsByTagName("xmin")
for elem in tmpArrays:
labelXmin.append(int(int(elem.firstChild.data) * size_ratio_w))
tmpArrays = labelXML.getElementsByTagName("ymin")
for elem in tmpArrays:
labelYmin.append(int(int(elem.firstChild.data) * size_ratio_h))
tmpArrays = labelXML.getElementsByTagName("xmax")
for elem in tmpArrays:
labelXmax.append(int(int(elem.firstChild.data) * size_ratio_w))
tmpArrays = labelXML.getElementsByTagName("ymax")
for elem in tmpArrays:
labelYmax.append(int(int(elem.firstChild.data) * size_ratio_h))
return (img_filename+img_file_extension , img_w, img_h, labelName, labelXmin, labelYmin, labelXmax, labelYmax)
else:
return (None, None, None, None, None, None, None, None)
def class_text_to_int(row_label):
print("row_label :{}".format(row_label))
return classList[row_label]
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(int(row['xmin']) / width)
xmaxs.append(int(row['xmax']) / width)
ymins.append(int(row['ymin']) / height)
ymaxs.append(int(row['ymax']) / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
#-------------------------------------------------
#step 1: make train.csv / test.csv
for file in os.listdir(imgFolder):
filename, file_extension = os.path.splitext(file)
file_extension = file_extension.lower()
if(file_extension == ".jpg" or file_extension==".jpeg" or file_extension==".png" or file_extension==".bmp"):
imgFile = basename(filename) + file_extension
xmlFile = basename(filename) + ".xml"
print("XML:"+xmlFile, "IMG:"+imgFile)
if(os.path.exists(xmlFolder+folderCharacter+xmlFile)):
fileList.append(imgFile)
print("total image files: ", len(fileList))
testCount = int(len(fileList) * testRatio)
trainCount = len(fileList) - testCount
a = range(len(fileList))
test_data = random.sample(a, testCount)
#train_data = random.sample(a, trainCount)
train_data = [x for x in a if x not in test_data]
print ("Train:{} images".format(len(train_data)))
print("Test:{} images".format(len(test_data)))
csvFilename = savePath + folderCharacter + recordTF_in[0]
print("writeing to {}".format(csvFilename))
with open(csvFilename, 'a') as the_file:
i = 0
the_file.write("filename,width,height,class,xmin,ymin,xmax,ymax" + '\n')
for id in train_data:
base_filename = os.path.splitext(fileList[id])[0]
xmlpath = xmlFolder + folderCharacter + base_filename + ".xml"
imgpath = imgFolder + folderCharacter + fileList[id]
(imgfile , w, h, labels, Xmin, Ymin, Xmax, Ymax) = transferTF(xmlpath, imgpath, "")
print(imgfile , w, h, labels, Xmin, Ymin, Xmax, Ymax)
if(imgfile is not None):
for id2, label in enumerate(labels):
the_file.write(imgfile + ',' + str(w) + ',' + str(h) + ',' + labels[id2] + ',' + str(Xmin[id2]) + ',' + str(Ymin[id2]) \
+ ',' + str(Xmax[id2]) + ',' + str(Ymax[id2]) + '\n')
i += 1
the_file.close()
print("Total {} train records to {}".format(i, recordTF_in[0]))
csvFilename = savePath + folderCharacter + recordTF_in[1]
print("writeing to {}".format(csvFilename))
with open(csvFilename, 'a') as the_file:
i = 0
the_file.write("filename,width,height,class,xmin,ymin,xmax,ymax" + '\n')
for id in test_data:
base_filename = os.path.splitext(fileList[id])[0]
xmlpath = xmlFolder + folderCharacter + base_filename + ".xml"
imgpath = imgFolder + folderCharacter + fileList[id]
(imgfile, w, h, labels, Xmin, Ymin, Xmax, Ymax) = transferTF( xmlpath, imgpath, "")
# print(imgfile , w, h, labels, Xmin, Ymin, Xmax, Ymax)
if(imgfile is not None):
print("TEST_labels:", labels)
print("TEST_Xmin:", Xmin)
print("TEST_Xmax:", Xmax)
print("TEST_Ymin:", Ymin)
print("TEST_Ymax:", Ymax)
for id2, label in enumerate(labels):
the_file.write(imgfile + ',' + str(w) + ',' + str(h) + ',' + labels[id2] + ',' + str(Xmin[id2]) + ',' + str(Ymin[id2]) \
+ ',' + str(Xmax[id2]) + ',' + str(Ymax[id2]) + '\n')
i += 1
the_file.close()
print("Total {} test records to {}".format(i, recordTF_in[1]))
#----------------------------------------------------------
#step 2: make TFRecords: train.record / test.record
print("----------- Transfer to TF Record ---------------")
for i in (0, 1):
output_path = savePath + folderCharacter + recordTF_out[i]
writer = tf.python_io.TFRecordWriter(output_path)
examples = pd.read_csv(savePath + folderCharacter + recordTF_in[i])
grouped = split(examples, 'filename')
for group in grouped:
print("group :{}".format(group))
tf_example = create_tf_example(group, imgResizedFolder)
writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecords: {}'.format(output_path))
#-----------------------------------------
print("-----------make object_detection.pbtxt -----------")
filename = savePath + folderCharacter + 'object_detection.pbtxt'
print("writeing to {}".format(filename))
inv_classList = {v: k for k, v in classList.items()}
print(inv_classList)
with open(filename, 'a') as the_file:
for i in range(1, len(classList)+1):
print("i=", i)
the_file.write("item {" + '\n')
the_file.write(" id: " + str(i) + '\n')
the_file.write(" name: '" + inv_classList[i-1] + "'" + '\n')
the_file.write("}" + '\n\n')
the_file.close()
执行这个脚本会在 data/ssd_dataset 目录下生成train.record, test.record 和 object_detection.pbtxt 三个文件和csv中间文件。
- 下载预训练模型,模型选择地址:
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
自己综合选择吧,我这里选择的是ssd_resnet_50_fpn_coco ☆ ,速度还行,准确率比较高 - 解压缩预训练模型到项目根目录
tar xvf ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz
查看预训练模型的目录结构:
- 配置pipeline.config文件,这是训练的关键。有几个注意点说明一下:
- train_config 块的 batch_size 的大小是你训练时发生OOM的关键,如果发生OOM了,减小这个数值即可。
- train_config 块的 fine_tune_checkpoint 配置为上一步骤的目录的绝对地址
- .pbtxt 和 .record 文件的路径配置(绝对路径)
train_input_reader {
label_map_path: "pbtxt 文件的绝对路径"
tf_record_input_reader {
input_path: "train record 文件的绝对路径"
}
}
eval_input_reader {
label_map_path: "pbtxt 文件的绝对路径"
shuffle: false
num_readers: 1
tf_record_input_reader {
input_path: "test record 文件的绝对路径"
}
}
- 训练模型, 这里假设训练结果保存路径为 trainning 文件夹(相对于项目根目录)
python object_detection/model_main.py --alsologtostrerr --pipeline_config_path=上一步里面pipeline.config文件相对路径(绝对路径也行) --model_dir=存放训练结果的地址(这里是trining/) --num_train_steps=训练步骤(这个模型默认是25000)
- 将训练后的模型转换成可执行的pb格式文件
python object_detection/export_inference_graph.py
--input_type image_tensor
--pipeline_config_path pipeline.config文件的相对地址
--trained_checkpoint_prefix 训练后的ckpt文件前缀 (这里假设为 training/model.ckpt-50000)
--output_directory model (pb文件的保存路径)
- 测试模型,写了个脚本测试:
'''
模型测试
'''
import tensorflow as tf
import cv2 as cv
import os
import imutils # pip install imutils
project_dir = os.path.dirname(__file__)
model_dir = os.path.join(project_dir, "model") # pb文件存放目录
data_dir = os.path.join(project_dir, "data") # data文件夹路径
pb_path = os.path.join(project_dir, "pb") # pbtxt文件的目录,这里是把前面生成的pbtxt文件放在了项目的pb文件夹里面
model_path = os.path.join(model_dir, "frozen_inference_graph.pb")
pbtxt_path = os.path.join(pb_path, 'object_detection.pbtxt')
testimg = os.path.join(data_dir, 'test.jpg') # 测试的图片文件路径
with tf.gfile.FastGFile(model_path, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Session() as sess:
sess.graph.as_default()
tf.import_graph_def(graph_def, name='')
img = cv.imread(testimg)
rows = img.shape[0]
cols = img.shape[1]
inp = cv.resize(img, (450, 450))
inp = inp[:, :, [2, 1, 0]] # BGR2RGB
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
sess.graph.get_tensor_by_name('detection_scores:0'),
sess.graph.get_tensor_by_name('detection_boxes:0'),
sess.graph.get_tensor_by_name('detection_classes:0')],
feed_dict={'image_tensor:0': inp.reshape(1, inp.shape[0], inp.shape[1], 3)})
num_detections = int(out[0][0])
for i in range(num_detections):
cat = int(out[3][0][i])
score = float(out[1][0][i])
bbox = [float(v) for v in out[2][0][i]]
x = bbox[1] * cols
y = bbox[0] * rows
right = bbox[3] * cols
bottom = bbox[2] * rows
cv.rectangle(img, (int(x), int(y)), (int(right), int(bottom)), (125, 255, 51), thickness=2)
print(cat, "-->", score, x, y, right, bottom)
cv.imshow('SHOW', imutils.resize(img, width=1080))
cv.waitKey()
运行脚本就可以看到结果了。
环境踩坑:
- opencv-python 的安装问题,建议安装:
pip3 install opencv-contrib-python==3.3.0.9
- gast库版本:
pip3 uninstall -y gast
pip3 install gast==0.2.2
- 在windows 上训练自定义模型,无法避免的一个包 pycocotools 官方安装是失败的,换种方式:
pip3 install Cython
pip3 install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
Cython 是pycocotools的依赖
总结:
仔细看日志,特别是预训练模型选择那一块,不同的模型训练时报的错误都不一样,有些是bug,有些是包版本问题,多google,看官方github的issue解决问题。这里用的tensorflow gpu版本是1.14. 好运~