最终效果

输入一张图片到模型-->模型能够检测到物体并把框画出来

代码位置

inference.py前向推理代码

# -------------------------------------##       创建YOLO类# -------------------------------------#import osos.environ["CUDA_VISIBLE_DEVICES"] = '0'import cv2import numpy as npimport colorsysimport osimport torchimport torch.nn as nnfrom yolo4 import YoloBodyfrom utils.utils import *from yolo_layer import *# --------------------------------------------##   使用自己训练好的模型预测需要修改2个参数#   model_path和classes_path都需要修改！# --------------------------------------------#class Inference(object):    # ---------------------------------------------------#    #   初始化模型和参数，导入已经训练好的权重    # ---------------------------------------------------#    def __init__(self, **kwargs):        self.model_path = kwargs['model_path']        self.anchors_path = kwargs['anchors_path']        self.classes_path = kwargs['classes_path']        self.model_image_size = kwargs['model_image_size']        self.confidence = kwargs['confidence']        self.cuda = kwargs['cuda']        self.class_names = self.get_class()   # class_names=['person','bicycle','car',...,'toothbrush']        self.anchors = self.get_anchors()   # anchors=[12.0,16.0,19.0,36.0,...,401]        print(self.anchors)        self.net = YoloBody(3, len(self.class_names)).eval()  # .eval()表示不启用BN和Dropout层,这里的YoloBody表示从开始一直到最后面的蓝色卷积那里        self.load_model_pth(self.net, self.model_path)        if self.cuda:            self.net = self.net.cuda()            self.net.eval()        print('Finished!')        self.yolo_decodes = []        anchor_masks = [[0,1,2],[3,4,5],[6,7,8]]        for i in range(3):            head = YoloLayer(self.model_image_size, anchor_masks, len(self.class_names),                        self.anchors, len(self.anchors)//2).eval()            self.yolo_decodes.append(head)        print('{} model, anchors, and classes loaded.'.format(self.model_path))    def load_model_pth(self, model, pth):        print('Loading weights into state dict, name: %s' % (pth))        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')        model_dict = model.state_dict()        pretrained_dict = torch.load(pth, map_location=device) # map_location用于切换GPU和CPU        matched_dict = {}  # 匹配字典        for k, v in pretrained_dict.items(): # k是层名称，v是参数            if np.shape(model_dict[k]) == np.shape(v):                matched_dict[k] = v            else:                print('un matched layers: %s' % k)        print(len(model_dict.keys()), len(pretrained_dict.keys()))        print('%d layers matched,  %d layers miss' % (        len(matched_dict.keys()), len(model_dict) - len(matched_dict.keys())))        model_dict.update(matched_dict) # 根据matched_dict更新model_dict        model.load_state_dict(pretrained_dict)  #        print('Finished!')        return model    # ---------------------------------------------------#    #   获得所有的分类    # ---------------------------------------------------#    def get_class(self):        classes_path = os.path.expanduser(self.classes_path) # 找到classes_path的绝对路径        with open(classes_path) as f:            class_names = f.readlines()        class_names = [c.strip() for c in class_names]        return class_names    # ---------------------------------------------------#    #   获得所有的先验框    # ---------------------------------------------------#    def get_anchors(self):        anchors_path = os.path.expanduser(self.anchors_path)        with open(anchors_path) as f:            anchors = f.readline()        anchors = [float(x) for x in anchors.split(',')]        return anchors        #return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]    # ---------------------------------------------------#    #   检测图片    # ---------------------------------------------------#    def detect_image(self, image_src):        h, w, _ = image_src.shape        image = cv2.resize(image_src, (608, 608))        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)        img = np.array(image, dtype=np.float32)        img = np.transpose(img / 255.0, (2, 0, 1))        images = np.asarray([img])        with torch.no_grad():            images = torch.from_numpy(images)            if self.cuda:                images = images.cuda()            outputs = self.net(images)  # 调用YoloBody里面的前向传播，得到三个蓝色卷积的输出。        output_list = []        for i in range(3):            output_list.append(self.yolo_decodes[i](outputs[i]))        output = torch.cat(output_list, 1)        print(output.shape)        batch_detections = non_max_suppression(output, len(self.class_names),                        conf_thres=self.confidence,                        nms_thres=0.1)        boxes = [box.cpu().numpy() for box in batch_detections]        print(boxes[0])        return boxes[0]if __name__ == '__main__':    params = {        "model_path": 'pth/yolo4_weights_my.pth',        "anchors_path": 'work_dir/yolo_anchors_coco.txt',        "classes_path": 'work_dir/coco_classes.txt',        "model_image_size": (608, 608, 3),        "confidence": 0.4,        "cuda": True    }    model = Inference(**params)    class_names = load_class_names(params['classes_path'])    image_src = cv2.imread('dog.jpg')    boxes = model.detect_image(image_src)    plot_boxes_cv2(image_src, boxes, savename='output3.jpg', class_names=class_names)

utils.py与前向传播有关的函数

dataloader与训练有关的函数

from random import shuffleimport numpy as npimport torchimport torch.nn as nnimport mathimport torch.nn.functional as Ffrom PIL import Imagefrom torch.autograd import Variablefrom torch.utils.data import DataLoaderfrom torch.utils.data.dataset import Datasetfrom utils.utils import bbox_iou, merge_bboxesfrom matplotlib.colors import rgb_to_hsv, hsv_to_rgb# from nets.yolo_training import Generatorimport cv2class TestDataset(Dataset):    def __init__(self, lines, image_size):        super(TestDataset, self).__init__()        self.test_lines = lines        self.test_batches = len(lines)        self.image_size = image_size    def __len__(self):        return self.test_batches    def __getitem__(self, index):        one_line = self.test_lines[index]        line = one_line.split()        image_src = cv2.imread(line[0])        h, w, _ = image_src.shape        image = cv2.resize(image_src, (self.image_size[1], self.image_size[0]))        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)        y = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])        img = np.array(image, dtype=np.float32)        img = np.transpose(img / 255.0, (2, 0, 1))        return image_src, img, y, [h, w, line[0]]class TrainDataset(Dataset):    def __init__(self, train_lines, image_size, mosaic=True):        super(TrainDataset, self).__init__()        self.train_lines = train_lines        self.train_batches = len(train_lines)        self.image_size = image_size        self.mosaic = mosaic        self.flag = True    def __len__(self):        return self.train_batches    def rand(self, a=0, b=1):        return np.random.rand() * (b - a) + a    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):        """实时数据增强的随机预处理"""        line = annotation_line.split()        image = Image.open(line[0])        iw, ih = image.size        h, w = input_shape        box = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])        # 调整图片大小        new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)        scale = self.rand(.25, 2)        if new_ar < 1:            nh = int(scale * h)            nw = int(nh * new_ar)        else:            nw = int(scale * w)            nh = int(nw / new_ar)        image = image.resize((nw, nh), Image.BICUBIC)        # 放置图片        dx = int(self.rand(0, w - nw))        dy = int(self.rand(0, h - nh))        new_image = Image.new('RGB', (w, h),       (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))        new_image.paste(image, (dx, dy))        image = new_image        # 是否翻转图片        flip = self.rand() < .5        if flip:            image = image.transpose(Image.FLIP_LEFT_RIGHT)        # 色域变换        hue = self.rand(-hue, hue)        sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)        val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)        x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)        x[..., 0] += hue*360        x[..., 0][x[..., 0]>1] -= 1        x[..., 0][x[..., 0]<0] += 1        x[..., 1] *= sat        x[..., 2] *= val        x[x[:,:, 0]>360, 0] = 360        x[:, :, 1:][x[:, :, 1:]>1] = 1        x[x<0] = 0        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255        # 调整目标框坐标        box_data = np.zeros((len(box), 5))        if len(box) > 0:            np.random.shuffle(box)            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy            if flip:                box[:, [0, 2]] = w - box[:, [2, 0]]            box[:, 0:2][box[:, 0:2] < 0] = 0            box[:, 2][box[:, 2] > w] = w            box[:, 3][box[:, 3] > h] = h            box_w = box[:, 2] - box[:, 0]            box_h = box[:, 3] - box[:, 1]            box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框            box_data = np.zeros((len(box), 5))            box_data[:len(box)] = box        if len(box) == 0:            return image_data, []        if (box_data[:, :4] > 0).any():            return image_data, box_data        else:            return image_data, []    def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):        h, w = input_shape        min_offset_x = 0.3        min_offset_y = 0.3        scale_low = 1 - min(min_offset_x, min_offset_y)        scale_high = scale_low + 0.2        image_datas = []        box_datas = []        index = 0        place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]        place_y = [0, int(h * min_offset_y), int(w * min_offset_y), 0]        for line in annotation_line:            # 每一行进行分割            line_content = line.split()            # 打开图片            image = Image.open(line_content[0])            image = image.convert("RGB")            # 图片的大小            iw, ih = image.size            # 保存框的位置            box = np.array([np.array(list(map(float, box.split(',')))) for box in line_content[1:]])            # 是否翻转图片            flip = self.rand() < .5            if flip and len(box) > 0:                image = image.transpose(Image.FLIP_LEFT_RIGHT)                box[:, [0, 2]] = iw - box[:, [2, 0]]            # 对输入进来的图片进行缩放            new_ar = w / h            scale = self.rand(scale_low, scale_high)            if new_ar < 1:                nh = int(scale * h)                nw = int(nh * new_ar)            else:                nw = int(scale * w)                nh = int(nw / new_ar)            image = image.resize((nw, nh), Image.BICUBIC)            # 进行色域变换            hue = self.rand(-hue, hue)            sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)            val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)            x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)            x[..., 0] += hue*360            x[..., 0][x[..., 0]>1] -= 1            x[..., 0][x[..., 0]<0] += 1            x[..., 1] *= sat            x[..., 2] *= val            x[x[:,:, 0]>360, 0] = 360            x[:, :, 1:][x[:, :, 1:]>1] = 1            x[x<0] = 0            image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) # numpy array, 0 to 1            image = Image.fromarray((image * 255).astype(np.uint8))            # 将图片进行放置，分别对应四张分割图片的位置            dx = place_x[index]            dy = place_y[index]            new_image = Image.new('RGB', (w, h),           (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))            new_image.paste(image, (dx, dy))            image_data = np.array(new_image)            index = index + 1            box_data = []            # 对box进行重新处理            if len(box) > 0:                np.random.shuffle(box)                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy                box[:, 0:2][box[:, 0:2] < 0] = 0                box[:, 2][box[:, 2] > w] = w                box[:, 3][box[:, 3] > h] = h                box_w = box[:, 2] - box[:, 0]                box_h = box[:, 3] - box[:, 1]                box = box[np.logical_and(box_w > 1, box_h > 1)]                box_data = np.zeros((len(box), 5))                box_data[:len(box)] = box            image_datas.append(image_data)            box_datas.append(box_data)        # 将图片分割，放在一起        cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))        cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))        new_image = np.zeros([h, w, 3])        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]        # 对框进行进一步的处理        new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))        if len(new_boxes) == 0:            return new_image, []        if (new_boxes[:, :4] > 0).any():            return new_image, new_boxes        else:            return new_image, []    def __getitem__(self, index):        if index == 0:            shuffle(self.train_lines)        lines = self.train_lines        n = self.train_batches        index = index % n        if self.mosaic:            if self.flag and (index + 4) < n:                img, y = self.get_random_data_with_Mosaic(lines[index:index + 4], self.image_size[0:2])            else:                img, y = self.get_random_data(lines[index], self.image_size[0:2])            self.flag = bool(1-self.flag)        else:            img, y = self.get_random_data(lines[index], self.image_size[0:2])        if len(y) != 0:            # 从坐标转换成0~1的百分比            boxes = np.array(y[:, :4], dtype=np.float32)            boxes[:, 0] = boxes[:, 0] / self.image_size[1]            boxes[:, 1] = boxes[:, 1] / self.image_size[0]            boxes[:, 2] = boxes[:, 2] / self.image_size[1]            boxes[:, 3] = boxes[:, 3] / self.image_size[0]            boxes = np.maximum(np.minimum(boxes, 1), 0)            boxes[:, 2] = boxes[:, 2] - boxes[:, 0]            boxes[:, 3] = boxes[:, 3] - boxes[:, 1]            boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2            boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2            y = np.concatenate([boxes, y[:, -1:]], axis=-1)        img = np.array(img, dtype=np.float32)        tmp_inp = np.transpose(img / 255.0, (2, 0, 1))        tmp_targets = np.array(y, dtype=np.float32)        return tmp_inp, tmp_targets# DataLoader中collate_fn使用def train_dataset_collate(batch):    images = []    bboxes = []    for img, box in batch:        images.append(img)        bboxes.append(box)    images = np.array(images)    bboxes = np.array(bboxes)    return images, bboxesdef test_dataset_collate(batch):    srcs = []    inputs = []    targets = []    shapes = []    for img_src, img, labels, infos in batch:        srcs.append(img_src)        inputs.append(img)        targets.append(labels)        shapes.append(infos)    inputs = np.array(inputs, dtype=np.float32)    return srcs, inputs, targets, shapes

generator与训练有关的函数

import timefrom PIL import Imageimport numpy as npimport cv2from random import shufflefrom utils.utils import merge_bboxesdef rand(a=0, b=1):    return np.random.rand()*(b-a) + aclass TrainGenerator(object):    def __init__(self, batch_size,                 train_lines, image_size,                 ):        self.batch_size = batch_size        self.train_lines = train_lines        self.train_batches = len(train_lines)        self.image_size = image_size        self.test_time = time.time()    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):        '''r实时数据增强的随机预处理'''        line = annotation_line.split()        image = Image.open(line[0])        iw, ih = image.size        h, w = input_shape        box = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])        # resize image        new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)        scale = rand(.25, 2)        if new_ar < 1:            nh = int(scale * h)            nw = int(nh * new_ar)        else:            nw = int(scale * w)            nh = int(nw / new_ar)        image = image.resize((nw, nh), Image.BICUBIC)        # place image        dx = int(rand(0, w - nw))        dy = int(rand(0, h - nh))        new_image = Image.new('RGB', (w, h), (128, 128, 128))        new_image.paste(image, (dx, dy))        image = new_image        # flip image or not        flip = rand() < .5        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)        # distort image        hue = rand(-hue, hue)        sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)        val = rand(1, val) if rand() < .5 else 1 / rand(1, val)        x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)        x[..., 0] += hue * 360        x[..., 0][x[..., 0] > 1] -= 1        x[..., 0][x[..., 0] < 0] += 1        x[..., 1] *= sat        x[..., 2] *= val        x[x[:, :, 0] > 360, 0] = 360        x[:, :, 1:][x[:, :, 1:] > 1] = 1        x[x < 0] = 0        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255        # correct boxes        box_data = np.zeros((len(box), 5))        if len(box) > 0:            np.random.shuffle(box)            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy            if flip: box[:, [0, 2]] = w - box[:, [2, 0]]            box[:, 0:2][box[:, 0:2] < 0] = 0            box[:, 2][box[:, 2] > w] = w            box[:, 3][box[:, 3] > h] = h            box_w = box[:, 2] - box[:, 0]            box_h = box[:, 3] - box[:, 1]            box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid box            box_data = np.zeros((len(box), 5))            box_data[:len(box)] = box        if len(box) == 0:            return image_data, []        if (box_data[:, :4] > 0).any():            return image_data, box_data        else:            return image_data, []    def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):        '''random preprocessing for real-time data augmentation'''        h, w = input_shape        min_offset_x = 0.4        min_offset_y = 0.4        scale_low = 1 - min(min_offset_x, min_offset_y)        scale_high = scale_low + 0.2        image_datas = []        box_datas = []        index = 0        place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]        place_y = [0, int(h * min_offset_y), int(w * min_offset_y), 0]        for line in annotation_line:            # 每一行进行分割            line_content = line.split()            # 打开图片            image = Image.open(line_content[0])            image = image.convert("RGB")            # 图片的大小            iw, ih = image.size            # 保存框的位置            box = np.array([np.array(list(map(float, box.split(',')))) for box in line_content[1:]])            # 是否翻转图片            flip = rand() < .5            if flip and len(box) > 0:                image = image.transpose(Image.FLIP_LEFT_RIGHT)                box[:, [0, 2]] = iw - box[:, [2, 0]]            # 对输入进来的图片进行缩放            new_ar = w / h            scale = rand(scale_low, scale_high)            if new_ar < 1:                nh = int(scale * h)                nw = int(nh * new_ar)            else:                nw = int(scale * w)                nh = int(nw / new_ar)            image = image.resize((nw, nh), Image.BICUBIC)            # 进行色域变换            hue = rand(-hue, hue)            sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)            val = rand(1, val) if rand() < .5 else 1 / rand(1, val)            x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)            x[..., 0] += hue * 360            x[..., 0][x[..., 0] > 1] -= 1            x[..., 0][x[..., 0] < 0] += 1            x[..., 1] *= sat            x[..., 2] *= val            x[x[:, :, 0] > 360, 0] = 360            x[:, :, 1:][x[:, :, 1:] > 1] = 1            x[x < 0] = 0            image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)  # numpy array, 0 to 1            image = Image.fromarray((image * 255).astype(np.uint8))            # 将图片进行放置，分别对应四张分割图片的位置            dx = place_x[index]            dy = place_y[index]            new_image = Image.new('RGB', (w, h), (128, 128, 128))            new_image.paste(image, (dx, dy))            image_data = np.array(new_image)            index = index + 1            box_data = []            # 对box进行重新处理            if len(box) > 0:                np.random.shuffle(box)                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy                box[:, 0:2][box[:, 0:2] < 0] = 0                box[:, 2][box[:, 2] > w] = w                box[:, 3][box[:, 3] > h] = h                box_w = box[:, 2] - box[:, 0]                box_h = box[:, 3] - box[:, 1]                box = box[np.logical_and(box_w > 1, box_h > 1)]                box_data = np.zeros((len(box), 5))                box_data[:len(box)] = box            image_datas.append(image_data)            box_datas.append(box_data)        # 将图片分割，放在一起        cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))        cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))        new_image = np.zeros([h, w, 3])        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]        # 对框进行进一步的处理        new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))        if len(new_boxes) == 0:            return new_image, []        if (new_boxes[:, :4] > 0).any():            return new_image, new_boxes        else:            return new_image, []    def generate(self, train=True, mosaic=True):        while True:            shuffle(self.train_lines)            lines = self.train_lines            inputs = []            targets = []            flag = True            n = len(lines)            for i in range(len(lines)):                if mosaic == True:if flag and (i + 4) < n: img, y = self.get_random_data_with_Mosaic(lines[i:i + 4], self.image_size[0:2]) i = (i + 4) % nelse: img, y = self.get_random_data(lines[i], self.image_size[0:2]) i = (i + 1) % nflag = bool(1 - flag)                else:img, y = self.get_random_data(lines[i], self.image_size[0:2])i = (i + 1) % n                if len(y) != 0:boxes = np.array(y[:, :4], dtype=np.float32)boxes[:, 0] = boxes[:, 0] / self.image_size[1]boxes[:, 1] = boxes[:, 1] / self.image_size[0]boxes[:, 2] = boxes[:, 2] / self.image_size[1]boxes[:, 3] = boxes[:, 3] / self.image_size[0]boxes = np.maximum(np.minimum(boxes, 1), 0)boxes[:, 2] = boxes[:, 2] - boxes[:, 0]boxes[:, 3] = boxes[:, 3] - boxes[:, 1]boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2y = np.concatenate([boxes, y[:, -1:]], axis=-1)                img = np.array(img, dtype=np.float32)                inputs.append(np.transpose(img / 255.0, (2, 0, 1)))                targets.append(np.array(y, dtype=np.float32))                if len(targets) == self.batch_size:tmp_inp = np.array(inputs)tmp_targets = np.array(targets)inputs = []targets = []# print('data load use time:', time.time()-self.test_time)# self.test_time = time.time()yield tmp_inp, tmp_targetsclass TestGenerator(object):    def __init__(self, batch_size, lines, image_size):        self.batch_size = batch_size        self.test_lines = lines        self.test_batches = len(lines)        self.image_size = image_size    def generate(self):        lines = self.test_lines        inputs = []        targets = []        shapes = []        for one_line in lines:            print(one_line)            line = one_line.split()            image_src = cv2.imread(line[0])            h, w, _ = image_src.shape            image = cv2.resize(image_src, (self.image_size[1], self.image_size[0]))            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)            y = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])            img = np.array(image, dtype=np.float32)            inputs.append(np.transpose(img / 255.0, (2, 0, 1)))            targets.append(y)            shapes.append([h, w, line[0]])            if len(targets) == self.batch_size:                tmp_inp = np.array(inputs)                tmp_targets = targets                tmp_shapes = shapes                inputs = []                targets = []                shapes = []                # print('data load use time:', time.time()-self.test_time)                # self.test_time = time.time()                yield tmp_inp, tmp_targets, tmp_shapes

utills.py与前向传播有关的函数

画框函数：导入类别函数：load_class_namesiou计算函数：非极大值抑制函数：

from __future__ import divisionimport torchimport numpy as npimport mathimport cv2def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):    img = np.copy(img)    colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)    def get_color(c, x, max_val):        ratio = float(x) / max_val * 5        i = int(math.floor(ratio))        j = int(math.ceil(ratio))        ratio = ratio - i        r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]        return int(r * 255)    width = img.shape[1]    height = img.shape[0]    for i in range(len(boxes)):        box = boxes[i]        x1 = int(box[0] * width)    # 相对原图位置*原图宽度，得到原图对应x坐标        y1 = int(box[1] * height)   # 相对原图位置*原图宽度，得到原图对应y坐标        x2 = int(box[2] * width)    #        y2 = int(box[3] * height)        if color:            rgb = color        else:            rgb = (255, 0, 0)        if len(box) >= 7 and class_names:            cls_conf = box[5]            cls_id = box[6]            # print('%s: %f' % (class_names[cls_id], cls_conf))            classes = len(class_names)            offset = cls_id * 123457 % classes            red = get_color(2, offset, classes)            green = get_color(1, offset, classes)            blue = get_color(0, offset, classes)            if color is None:                rgb = (red, green, blue)            img = cv2.putText(img, class_names[int(cls_id)], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 2)        img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 3)    if savename:        print("save plot results to %s" % savename)        cv2.imwrite(savename, img)    return imgdef load_class_names(namesfile):    class_names = []    with open(namesfile, 'r') as fp:        lines = fp.readlines()    for line in lines:        line = line.rstrip()        class_names.append(line)    return class_namesdef bbox_iou1(box1, box2, x1y1x2y2=True):    # print('iou box1:', box1)    # print('iou box2:', box2)    if x1y1x2y2:        mx = min(box1[0], box2[0])        Mx = max(box1[2], box2[2])        my = min(box1[1], box2[1])        My = max(box1[3], box2[3])        w1 = box1[2] - box1[0]        h1 = box1[3] - box1[1]        w2 = box2[2] - box2[0]        h2 = box2[3] - box2[1]    else:        w1 = box1[2]        h1 = box1[3]        w2 = box2[2]        h2 = box2[3]        mx = min(box1[0], box2[0])        Mx = max(box1[0] + w1, box2[0] + w2)        my = min(box1[1], box2[1])        My = max(box1[1] + h1, box2[1] + h2)    uw = Mx - mx    uh = My - my    cw = w1 + w2 - uw    ch = h1 + h2 - uh    carea = 0    if cw <= 0 or ch <= 0:        return 0.0    area1 = w1 * h1    area2 = w2 * h2    carea = cw * ch    uarea = area1 + area2 - carea    return carea / uareadef bbox_iou(box1, box2, x1y1x2y2=True):    """        计算IOU    """    if not x1y1x2y2:        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2    else:        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]    inter_rect_x1 = torch.max(b1_x1, b2_x1)    inter_rect_y1 = torch.max(b1_y1, b2_y1)    inter_rect_x2 = torch.min(b1_x2, b2_x2)    inter_rect_y2 = torch.min(b1_y2, b2_y2)    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1+1e-3, min=0) *                  torch.clamp(inter_rect_y2 - inter_rect_y1+1e-3, min=0)                     b1_area = (b1_x2 - b1_x1 + 1e-3) * (b1_y2 - b1_y1 + 1e-3)    b2_area = (b2_x2 - b2_x1 + 1e-3) * (b2_y2 - b2_y1 + 1e-3)    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)    return ioudef non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):    # 求左上角和右下角    box_corner = prediction.new(prediction.shape)    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2    prediction[:, :, :4] = box_corner[:, :, :4]    output = [None for _ in range(len(prediction))]    for image_i, image_pred in enumerate(prediction):        # 利用置信度进行第一轮筛选        conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()        image_pred = image_pred[conf_mask]        if not image_pred.size(0):            continue        # 获得种类及其置信度        class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)        # 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)        # 获得种类        unique_labels = detections[:, -1].cpu().unique()        if prediction.is_cuda:            unique_labels = unique_labels.cuda()        for c in unique_labels:            # 获得某一类初步筛选后全部的预测结果            detections_class = detections[detections[:, -1] == c]            # 按照存在物体的置信度排序            _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)            detections_class = detections_class[conf_sort_index]            # 进行非极大抑制            max_detections = []            while detections_class.size(0):                # 取出这一类置信度最高的，一步一步往下判断，判断重合程度是否大于nms_thres，如果是则去除掉                max_detections.append(detections_class[0].unsqueeze(0))                if len(detections_class) == 1:break                ious = bbox_iou(max_detections[-1], detections_class[1:])                detections_class = detections_class[1:][ious < nms_thres]            # 堆叠            max_detections = torch.cat(max_detections).data            # Add max detections to outputs            output[image_i] = max_detections if output[image_i] is None else torch.cat(                (output[image_i], max_detections))    return outputdef merge_bboxes(bboxes, cutx, cuty):    merge_bbox = []    for i in range(len(bboxes)):        for box in bboxes[i]:            tmp_box = []            x1,y1,x2,y2 = box[0], box[1], box[2], box[3]            if i == 0:                if y1 > cuty or x1 > cutx:continue                if y2 >= cuty and y1 <= cuty:y2 = cutyif y2-y1 < 5: continue                if x2 >= cutx and x1 <= cutx:x2 = cutxif x2-x1 < 5: continue            if i == 1:                if y2 < cuty or x1 > cutx:continue                if y2 >= cuty and y1 <= cuty:y1 = cutyif y2-y1 < 5: continue                if x2 >= cutx and x1 <= cutx:x2 = cutxif x2-x1 < 5: continue            if i == 2:                if y2 < cuty or x2 < cutx:continue                if y2 >= cuty and y1 <= cuty:y1 = cutyif y2-y1 < 5: continue                if x2 >= cutx and x1 <= cutx:x1 = cutxif x2-x1 < 5: continue            if i == 3:                if y1 > cuty or x2 < cutx:continue                if y2 >= cuty and y1 <= cuty:y2 = cutyif y2-y1 < 5: continue                if x2 >= cutx and x1 <= cutx:x1 = cutxif x2-x1 < 5: continue            tmp_box.append(x1)            tmp_box.append(y1)            tmp_box.append(x2)            tmp_box.append(y2)            tmp_box.append(box[-1])            merge_bbox.append(tmp_box)    return merge_bbox

YOLOV4-模型集成-pytorch

最终效果

代码位置

inference.py前向推理代码

utils.py与前向传播有关的函数

dataloader与训练有关的函数

generator与训练有关的函数

utills.py与前向传播有关的函数

Python相关栏目本月热门文章