tensorflow目标检测API实现血细胞图像识别和计数

1. 准备数据集

这里使用开源的BCCD血细胞图像数据集，包含364张图像和364个xml标注
xml标注
图像
某张图像

上图为血细胞图像，其中最多包含3种目标对象，分别是红细胞，白细胞和血小板

上图为图像对应的xml标注，主要信息有目标的类别，标注框的位置坐标
这里将364张图像拆分为训练集300张，测试集64张，分别存放在iamges和images_testdata路径下
对应的xml标注分别存放在annotations和Annotations_testdata路径下
其中0-338为训练集，339-410为测试集
至此，数据集和xml标注准备完成

2. 数据读取和观察分析

import os
import numpy as np
import xml.etree.ElementTree as ET
from lxml import etree
CELL_NAMES = ['RBC', 'WBC', 'Platelets']
os.chdir("E:/cell_detection_count/project")

# 返回一个字典，将类别转为数字
# {'RBC': 0, 'WBC': 1, 'Platelets': 2}
def get_cell_names():
    cell_category2id = {
   
   }
    for i, item in enumerate(CELL_NAMES):
        cell_category2id[item] = i
    return cell_category2id


# 获得数据集列表  (读取xml文件)
def get_annotations(cname2cid, datadir):
    # 获取全部xml文件名
    filenames = os.listdir(datadir)
    records = []  # 保存对象具体信息
    # 遍历全部xml文件
    for fname in filenames:
        # 保存该xml文件路径+文件名
        fpath = os.path.join(datadir, fname)
        # 生成对应xml文件的图像路径
        img_file = os.path.join("BCCD_Dataset-master/BCCD/JPEGImages/", fname.split(".")[0] + ".jpg")
        tree = etree.parse(datadir + "/" + fname)  # 生成解析xml对象
        objs = tree.xpath("//object")  # 获取当前图像中全部血细胞对象
        im_w = float(tree.xpath('//size')[0].xpath('width')[0].text)  # 获取图像宽高
        im_h = float(tree.xpath('//size')[0].xpath('height')[0].text)
        gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)  # 生成保存对象的数组
        gt_class = np.zeros((len(objs),), dtype=np.int32)  # 生成保存对象类别的数组
        is_crowd = np.zeros((len(objs),), dtype=np.int32)  # 生成
        difficult = np.zeros((len(objs),), dtype=np.int32)
        for i, obj in enumerate(objs):  # 遍历全部对象
            cname = obj.xpath('.//name')[0].text  # 获取对象类别名称
            gt_class[i] = cname2cid[cname]  # 类别名转为数值并添加到数组
            _difficult = int(obj.xpath('.//difficult')[0].text)
            x1 = float(obj.xpath('.//bndbox')[0].xpath('./xmin')[0].text)  # 获取对象框的左上角和右下角的坐标
            y1 = float(obj.xpath('.//bndbox')[0].xpath('./ymin')[0].text)
            x2 = float(obj.xpath('.//bndbox')[0].xpath('./xmax')[0].text)
            y2 = float(obj.xpath('.//bndbox')[0].xpath('./ymax')[0].text)
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(im_w - 1, x2)
            y2 = min(im_h - 1, y2)
            # 这里使用xywh格式来表示目标物体真实框
            gt_bbox[i] = [(x1 + x2) / 2.0, (y1 + y2) / 2.0, x2 - x1 + 1., y2 - y1 + 1.]
            is_crowd[i] = 0
            difficult[i] = _difficult

        # 保存当前图像具体信息的字典
        voc_rec = {
   
   
            'im_file': img_file,
            'h': im_h,
            'w': im_w,
            'is_crowd': is_crowd,
            'gt_class': gt_class,
            'gt_bbox': gt_bbox,
            'gt_poly': [],
            'difficult': difficult
        }
        # 若该图像包含对象至少一个则将其追加到列表
        if len(objs) != 0:
            records.append(voc_rec)
    return records

train_path = 'BCCD_Dataset-master/BCCD/Annotations'

# 读取图像数据
import cv2


# 对于一般的检测任务来说，一张图片上往往会有多个目标物体
# 设置参数MAX_NUM = 50， 即一张图片最多取50个真实框；如果真实
# 框的数目少于50个，则将不足部分的gt_bbox, gt_class和gt_score的各项数值全设置为0
def get_bbox(gt_bbox, gt_class):
    MAX_NUM = 50
    gt_bbox2 = np.zeros((MAX_NUM, 4))
    gt_class2 = np.zeros((MAX_NUM,))
    for i in range(len(gt_bbox)):
        gt_bbox2[i, :] = gt_bbox[i, :]
        gt_class2[i] = gt_class[i]
        if i >= MAX_NUM:
            break
    return gt_bbox2, gt_class2


def get_img_data_from_file(record):
    im_file = record['im_file']
    h = record['h']
    w = record['w']
    is_crowd = record['is_crowd']
    gt_class = record['gt_class']
    gt_bbox = record['gt_bbox']
    difficult = record['difficult']

    img = cv2.imread(im_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    gt_boxes, gt_labels = get_bbox(gt_bbox, gt_class)

    # gt_bbox 用相对值
    gt_boxes[:, 0] = gt_boxes[:, 0] / float(w)
    gt_boxes[:, 1] = gt_boxes[:, 1] / float(h)
    gt_boxes[:, 2] = gt_boxes[:, 2] / float(w)
    gt_boxes[:, 3] = gt_boxes[:, 3] / float(h)

    return img, gt_boxes, gt_labels, (h, w)

def rectangle_draw(img, gt_boxes, gt_class, size):
    """
    #给图像中的细胞绘制矩形框和文字标注
    """
    # 将位置相对值转为绝对值

    gt_boxes[:, 0] = gt_boxes[:, 0] * size[1]
    gt_boxes[:, 1] = gt_boxes[:, 1] * size[0]
    gt_boxes[:, 2] = gt_boxes[:, 2] * size[1]
    gt_boxes[:, 3] = gt_boxes[:, 3] * size[0]
    font = cv2.FONT_HERSHEY_SIMPLEX
    for i, g_b in enumerate(gt_boxes):
        font_x_y = (int(g_b[0]), int(g_b[1]))  #字体显示位置坐标
        x1 = int(g_b[0]) - int(g_b[2] / 2)
        y1 = int(g_b[1]) - int(g_b[3] / 2)
        x2 = int(g_b[0]) + int(g_b[2] / 2)
        y2 = int(g_b[1]) + int(g_b[3] / 2)
        if gt_class[i] == 0:
            color = (0,0,255)
            cv2.putText(img, "RBC", font_x_y, font, 0.5, color, 1, cv2.LINE_AA)
        elif gt_class[i] == 1:
            color = (255,0,0)
            cv2.putText(img, "WBC", font_x_y, font, 0.5, color, 1, cv2.LINE_AA)

        else:
            color = (0,255,0)
            cv2.putText(img, "Platelets", font_x_y, font, 0.5, color, 1, cv2.LINE_AA)
        cv2.rectangle(img, (x1,y1), (x2,y2), color)

def circle_draw(img, gt_boxes, gt_class, size):
    import math
    """
    #给图像中的细胞绘制圆形框和文字标注
    """
    # 将位置相对值转为绝对值
    gt_boxes[:, 0] = gt_boxes[:, 0] * size[1]
    gt_boxes[:, 1] = gt_boxes[:, 1] * size[0]
    gt_boxes[:, 2] = gt_boxes[:, 2] * size[1]
    gt_boxes[:, 3] = gt_boxes[:, 3] * size[0]
    font = cv2.FONT_HERSHEY_SIMPLEX
    for i, g_b in enumerate(gt_boxes):
        font_x_y = (int(g_b[0]), int(g_b[1]))