From eed800e45c0b3e3ce2bd55b31152ac3b93b7fb16 Mon Sep 17 00:00:00 2001 From: BIGWangYuDong Date: Tue, 14 Mar 2023 17:15:36 +0800 Subject: [PATCH] [Feature] Support CrowdHuman Metric --- mmeval/metrics/__init__.py | 4 +- mmeval/metrics/crowdhuman.py | 848 +++++++++++++++++++++++++++++++++++ 2 files changed, 851 insertions(+), 1 deletion(-) create mode 100644 mmeval/metrics/crowdhuman.py diff --git a/mmeval/metrics/__init__.py b/mmeval/metrics/__init__.py index 9d7a66aa..c4a9b20e 100644 --- a/mmeval/metrics/__init__.py +++ b/mmeval/metrics/__init__.py @@ -8,6 +8,7 @@ from .bleu import BLEU from .coco_detection import COCODetection from .connectivity_error import ConnectivityError +from .crowdhuman import CrowdHuman from .dota_map import DOTAMeanAP from .end_point_error import EndPointError from .f1_score import F1Score @@ -46,7 +47,8 @@ 'ConnectivityError', 'ROUGE', 'Perplexity', 'KeypointEndPointError', 'KeypointAUC', 'KeypointNME', 'NaturalImageQualityEvaluator', 'WordAccuracy', 'PrecisionRecallF1score', - 'SingleLabelPrecisionRecallF1score', 'MultiLabelPrecisionRecallF1score' + 'SingleLabelPrecisionRecallF1score', 'MultiLabelPrecisionRecallF1score', + 'CrowdHuman' ] _deprecated_msg = ( diff --git a/mmeval/metrics/crowdhuman.py b/mmeval/metrics/crowdhuman.py new file mode 100644 index 00000000..98bff6d3 --- /dev/null +++ b/mmeval/metrics/crowdhuman.py @@ -0,0 +1,848 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import json +import math +import numpy as np +import os.path as osp +import tempfile +from collections import OrderedDict +from json import dump +from multiprocessing import Process, Queue +from rich.console import Console +from rich.table import Table +from scipy.sparse import csr_matrix +from scipy.sparse.csgraph import maximum_bipartite_matching +from typing import Dict, List, Optional, Sequence, Tuple, Union + +from mmeval.core.base_metric import BaseMetric +from mmeval.fileio import get_text, load +from mmeval.metrics.utils import calculate_overlaps + +PERSON_CLASSES = ['background', 'person'] + + +class CrowdHuman(BaseMetric): + """CrowdHuman evaluation metric. + + Evaluate Average Precision (AP), Miss Rate (MR) and Jaccard Index (JI) for + detection tasks. + """ + + def __init__(self, + ann_file: str, + metric: Union[str, List[str]] = 'bbox', + format_only: bool = False, + outfile_prefix: Optional[str] = None, + eval_mode: int = 0, + iou_thrs: float = 0.5, + compare_matching_method: Optional[str] = None, + mr_ref: str = 'CALTECH_-2', + num_ji_process: int = 10, + backend_args: Optional[dict] = None, + **kwargs) -> None: + super().__init__(**kwargs) + self.ann_file = ann_file + + # crowdhuman evaluation metrics + self.metrics = metric if isinstance(metric, list) else [metric] + allowed_metrics = ['MR', 'AP', 'JI'] + for metric in self.metrics: + if metric not in allowed_metrics: + raise KeyError("metric should be one of 'MR', 'AP', 'JI', " + f'but got {metric}.') + + self.format_only = format_only + if self.format_only: + assert outfile_prefix is not None, 'outfile_prefix must be not' + 'None when format_only is True, otherwise the result files will' + 'be saved to a temp directory which will be cleaned up at the end.' + self.outfile_prefix = outfile_prefix + + assert eval_mode in [0, 1, 2], \ + "Unknown eval mode. eval_mode should be one of '0', '1', '2'." + + assert compare_matching_method is None or \ + compare_matching_method == 'VOC', \ + 'The alternative compare_matching_method is VOC.' \ + 'This parameter defaults to CALTECH(None)' + + assert mr_ref in ['CALTECH_-2', 'CALTECH_-4'], \ + "mr_ref should be one of 'CALTECH_-2', 'CALTECH_-4'." + + self.eval_mode = eval_mode + self.compare_matching_method = compare_matching_method + self.mr_ref = mr_ref + self.num_ji_process = num_ji_process + self.backend_args = backend_args + + assert isinstance(iou_thrs, float), '`iou_thrs` should be float' + self.iou_thrs = iou_thrs + + @staticmethod + def results2json(results: Sequence[dict], outfile_prefix: str) -> str: + """Dump the detection results to a COCO style json file. + + Args: + results (Sequence[dict]): Testing results of the + dataset. + outfile_prefix (str): The filename prefix of the json files. If the + prefix is "somepath/xxx", the json files will be named + "somepath/xxx.bbox.json". + + Returns: + str: The dump path of json file. + """ + json_results_dict = dict() + for pred, ann in results: + dump_dict = dict() + dump_dict['ID'] = ann['ID'] + dump_dict['width'] = ann['width'] + dump_dict['height'] = ann['height'] + + bboxes = pred['bboxes'].tolist() + scores = pred['scores'] + assert len(bboxes) == len(scores) + dtboxes = [] + for i, single_bbox in enumerate(bboxes): + temp_dict = dict() + x1, y1, x2, y2 = single_bbox + temp_dict['box'] = [x1, y1, x2 - x1, y2 - y1] + temp_dict['score'] = float(scores[i]) # type: ignore + temp_dict['tag'] = 1 # type: ignore + dtboxes.append(temp_dict) + dump_dict['dtboxes'] = dtboxes + json_results_dict[ann['ID']] = dump_dict + result_file = f'{outfile_prefix}.bbox.json' + with open(result_file, 'w') as f: + dump(json_results_dict, f) + + return result_file + + def add(self, predictions: Sequence[Dict], groundtruths: Sequence[Dict]) -> None: # type: ignore # yapf: disable # noqa: E501 + """Add the intermediate results to `self._results`. + + Args: + predictions (Sequence[dict]): A sequence of dict. Each dict + representing a detection result for an image, with the + following keys: + + - bboxes (np.ndarray): Shape (N, 4), the predicted + bounding bboxes of this image, in 'xyxy' foramrt. + - scores (np.ndarray): Shape (N, ), the predicted scores + of bounding boxes. + + groundtruths (Sequence[dict]): A sequence of dict. Each dict + represents a groundtruths for an image, with the following + keys: + + - img_id (int): Image id. + - width (int): The width of the image. + - height (int): The height of the image. + """ + for prediction, groundtruth in zip(predictions, groundtruths): + assert isinstance(prediction, dict), 'The prediciton should be ' \ + f'a sequence of dict, but got a sequence of {type(prediction)}.' # noqa: E501 + assert isinstance(groundtruth, dict), 'The label should be ' \ + f'a sequence of dict, but got a sequence of {type(groundtruth)}.' # noqa: E501 + self._results.append((prediction, groundtruth)) + + def compute_metric(self, results: list) -> dict: + """Compute the CrowdHuman metrics. + + Args: + results (List[tuple]): A list of tuple. Each tuple is the + prediction and ground truth of an image. This list has already + been synced across all ranks. + + Returns: + dict: The computed metric. The keys are the names of + the metrics, and the values are corresponding results. + """ + tmp_dir = None + if self.outfile_prefix is None: + tmp_dir = tempfile.TemporaryDirectory() + outfile_prefix = osp.join(tmp_dir.name, 'results') + else: + outfile_prefix = self.outfile_prefix + + # convert predictions to coco format and dump to json file + result_file = self.results2json(results, outfile_prefix) + + eval_results: OrderedDict = OrderedDict() + table_results: list = list() + if self.format_only: + self.logger.info( + f'Results are saved in {osp.dirname(outfile_prefix)}') + return eval_results + + # load evaluation samples + eval_samples = self.load_eval_samples(result_file) + + if 'AP' in self.metrics or 'MR' in self.metrics: + score_list = self.compare(eval_samples) + gt_num = sum([eval_samples[i].gt_num for i in eval_samples]) + ign_num = sum([eval_samples[i].ign_num for i in eval_samples]) + gt_num = gt_num - ign_num + img_num = len(eval_samples) + + for metric in self.metrics: + self.logger.info(f'Evaluating {metric}...') + eval_func = getattr(self, f'eval_{metric.lower()}') + + if metric in ['AP', 'MR']: + val = eval_func(score_list, gt_num, img_num) + else: + val = eval_func(eval_samples) + + eval_results[f'{metric}'] = float(val) + table_results.append(f'{round(val * 100, 2):0.2f}') + + self._print_results(table_results) + + if tmp_dir is not None: + tmp_dir.cleanup() + + return eval_results + + def load_eval_samples(self, result_file: str) -> dict: + """Load data from annotations file and detection results. + + Args: + result_file (str): The file path of the saved detection results. + + Returns: + dict: The detection result packaged by Image + """ + gt_str = get_text(self.ann_file, backend_args=self.backend_args) + gt_str_list = gt_str.strip().split('\n') + gt_records = [json.loads(line) for line in gt_str_list] + + pred_records = load(result_file) + eval_samples = dict() + for gt_record in gt_records: + img_id = gt_record['ID'] + assert img_id in pred_records, f'{img_id} is not in predictions' + pred_record = pred_records[img_id] + eval_samples[img_id] = Image(mode=self.eval_mode) + eval_samples[img_id].load( + record=gt_record, + body_key='box', + head_key=None, + class_names=PERSON_CLASSES, + gt_flag=True) + eval_samples[img_id].load( + record=pred_record, + body_key='box', + head_key=None, + class_names=PERSON_CLASSES, + gt_flag=False) + eval_samples[img_id].clip_all_boader() + return eval_samples + + def compare(self, samples: dict) -> list: + """Match the detection results with the ground_truth. + + Args: + samples (dict[Image]): The detection result packaged by Image. + + Returns: + list: Matching result. A list of tuples (dtbox, label, imgID) + in the descending sort of dtbox.score. + """ + score_list = list() + for id in samples.keys(): + if self.compare_matching_method == 'VOC': + result = samples[id].compare_voc(self.iou_thrs) + else: + result = samples[id].compare_caltech(self.iou_thrs) + score_list.extend(result) + # In the descending sort of dtbox score. + score_list.sort(key=lambda x: x[0][-1], reverse=True) + return score_list + + @staticmethod + def eval_ap(score_list: list, gt_num: int, img_num: int) -> float: + """Compute Average Precision (AP). + + Args: + score_list(list[tuple[ndarray, int, str]]): The matching result. + a list of tuples (dtbox, label, imgID) in the descending + sort of dtbox.score. + gt_num(int): The number of gt boxes in the entire dataset. + img_num(int): The number of images in the entire dataset. + + Returns: + float: Average Precision (AP). + """ + + # calculate general ap score + def _calculate_map(_recall: list, _precision: list) -> float: + assert len(_recall) == len(_precision) + area = 0 + for k in range(1, len(_recall)): + delta_h = (_precision[k - 1] + _precision[k]) / 2 + delta_w = _recall[k] - _recall[k - 1] + area += delta_w * delta_h + return area + + tp, fp = 0.0, 0.0 + rpX, rpY = list(), list() + + fpn = [] + recalln = [] + thr = [] + fppi = [] + for i, item in enumerate(score_list): + if item[1] == 1: + tp += 1.0 + elif item[1] == 0: + fp += 1.0 + fn = gt_num - tp + recall = tp / (tp + fn) + precision = tp / (tp + fp) + rpX.append(recall) + rpY.append(precision) + fpn.append(fp) + recalln.append(tp) + thr.append(item[0][-1]) + fppi.append(fp / img_num) + + ap = _calculate_map(rpX, rpY) + return ap + + def eval_mr(self, score_list: list, gt_num: int, img_num: int) -> float: + """Compute Caltech-style log-average Miss Rate (MR). + + Args: + score_list(list[tuple[ndarray, int, str]]): The matching result. + a list of tuples (dtbox, label, imgID) in the descending + sort of dtbox.score. + gt_num(int): The number of gt boxes in the entire dataset. + img_num(int): The number of images in the entire dataset. + + Returns: + float: Miss Rate (MR). + """ + + # find greater_than + def _find_gt(lst: list, target: float) -> int: + for idx, _item in enumerate(lst): + if _item >= target: + return idx + return len(lst) - 1 + + if self.mr_ref == 'CALTECH_-2': + # CALTECH_MRREF_2: anchor points (from 10^-2 to 1) as in + # P.Dollar's paper + ref = [ + 0.0100, 0.0178, 0.03160, 0.0562, 0.1000, 0.1778, 0.3162, + 0.5623, 1.000 + ] + else: + # CALTECH_MRREF_4: anchor points (from 10^-4 to 1) as in + # S.Zhang's paper + ref = [ + 0.0001, 0.0003, 0.00100, 0.0032, 0.0100, 0.0316, 0.1000, + 0.3162, 1.000 + ] + + tp, fp = 0.0, 0.0 + fppiX, fppiY = list(), list() + for i, item in enumerate(score_list): + if item[1] == 1: + tp += 1.0 + elif item[1] == 0: + fp += 1.0 + + fn = gt_num - tp + recall = tp / (tp + fn) + missrate = 1.0 - recall + fppi = fp / img_num + fppiX.append(fppi) + fppiY.append(missrate) + + score = list() + for pos in ref: + argmin = _find_gt(fppiX, pos) + if argmin >= 0: + score.append(fppiY[argmin]) + score = np.array(score) + mr = np.exp(np.log(score).mean()) + return mr + + def eval_ji(self, samples: dict) -> float: + """Compute Jaccard Index (JI) by using multi_process. + + Args: + samples(Dict[str, Image]): The detection result packaged by Image. + + Returns: + float: Jaccard Index (JI). + """ + res_ji = [] + for i in range(10): + score_thr = 1e-1 * i + total = len(samples) + stride = math.ceil(total / self.num_ji_process) + records = list(samples.items()) + result_queue: Queue = Queue(10000) + results, procs = [], [] + for j in range(self.num_ji_process): + start = j * stride + end = np.min([start + stride, total]) + sample_data = dict(records[start:end]) + p = Process( + target=self.compute_ji_with_ignore, + args=(result_queue, sample_data, score_thr)) + p.start() + procs.append(p) + for _ in range(total): + t = result_queue.get() + results.append(t) + for p in procs: + p.join() + mean_ratio = self.gather(results) + res_ji.append(mean_ratio) + return max(res_ji) + + def compute_ji_with_ignore(self, result_queue: Queue, dt_result: dict, + score_thr: float): + """Compute Jaccard Index (JI) with ignore. + + Args: + result_queue (Queue): The Queue for save compute result when + multi_process. + dt_result (dict): Detection result packaged by Image. + score_thr (float): The threshold of detection score. + + Returns: + dict: compute result. + """ + for ID, record in dt_result.items(): + gt_boxes = record.gt_boxes + dt_boxes = record.dt_boxes + keep = dt_boxes[:, -1] > score_thr + dt_boxes = dt_boxes[keep][:, :-1] + + gt_tag = np.array(gt_boxes[:, -1] != -1) + matches = self.compute_ji_matching(dt_boxes, gt_boxes[gt_tag, :4]) + # get the unmatched_indices + matched_indices = np.array([j for (j, _) in matches]) + unmatched_indices = list( + set(np.arange(dt_boxes.shape[0])) - set(matched_indices)) + num_ignore_dt = self.get_ignores(dt_boxes[unmatched_indices], + gt_boxes[~gt_tag, :4]) + matched_indices = np.array([j for (_, j) in matches]) + unmatched_indices = list( + set(np.arange(gt_boxes[gt_tag].shape[0])) - + set(matched_indices)) + num_ignore_gt = self.get_ignores( + gt_boxes[gt_tag][unmatched_indices], gt_boxes[~gt_tag, :4]) + # compute results + eps = 1e-6 + k = len(matches) + m = gt_tag.sum() - num_ignore_gt + n = dt_boxes.shape[0] - num_ignore_dt + ratio = k / (m + n - k + eps) + recall = k / (m + eps) + cover = k / (n + eps) + noise = 1 - cover + result_dict = dict( + ratio=ratio, + recall=recall, + cover=cover, + noise=noise, + k=k, + m=m, + n=n) + result_queue.put_nowait(result_dict) + + @staticmethod + def gather(results: list) -> float: + """Integrate test results. + + Args: + results (list): A list of compute results. + + Returns: + float: Compute result. + """ + assert len(results) + img_num = 0 + for result in results: + if result['n'] != 0 or result['m'] != 0: + img_num += 1 + mean_ratio = np.sum([rb['ratio'] for rb in results]) / img_num + return mean_ratio + + def compute_ji_matching(self, dt_boxes: np.ndarray, + gt_boxes: np.ndarray) -> list: + """Match the annotation box for each detection box. + + Args: + dt_boxes(ndarray): Detection boxes. + gt_boxes(ndarray): Ground_truth boxes. + + Returns: + list: Match result. + """ + assert dt_boxes.shape[-1] > 3 and gt_boxes.shape[-1] > 3 + if dt_boxes.shape[0] < 1 or gt_boxes.shape[0] < 1: + return list() + + ious = calculate_overlaps(dt_boxes, gt_boxes, mode='iou') + input_ = copy.deepcopy(ious) + input_[input_ < self.iou_thrs] = 0 + match_scipy = maximum_bipartite_matching( + csr_matrix(input_), perm_type='column') + matches = [] + for i in range(len(match_scipy)): + if match_scipy[i] != -1: + matches.append((i, int(match_scipy[i]))) + return matches + + def get_ignores(self, dt_boxes: np.ndarray, gt_boxes: np.ndarray) -> int: + """Get the number of ignore bboxes. + + Args: + dt_boxes(ndarray): Detection boxes. + gt_boxes(ndarray): Ground_truth boxes. + + Returns: + int: Number of ignored boxes. + """ + if gt_boxes.size: + ioas = calculate_overlaps(dt_boxes, gt_boxes, mode='iof') + ioas = np.max(ioas, axis=1) + rows = np.where(ioas > self.iou_thrs)[0] + return len(rows) + else: + return 0 + + def _print_results(self, table_results: list) -> None: + """Print the evaluation results table. + + Args: + table_results (list): The computed metric. + """ + table_title = 'CrowdHuman Results IoU=' \ + f'{round(self.iou_thrs * 100)} (%)' + + table = Table(title=table_title, width=50) + console = Console(width=100) + for name in self.metrics: + table.add_column(name, justify='left') + table.add_row(*table_results) + with console.capture() as capture: + console.print(table, end='') + self.logger.info('\n' + capture.get()) + + +class Image: + """Data structure for evaluation of CrowdHuman. + + Note: + This implementation is modified from + https://github.com/Purkialo/CrowdDet/blob/master/lib/evaluate/APMRToolkits/image.py # noqa: E501 + + Args: + mode (int): Select the mode of evaluate. Valid mode include + 0(just body box), 1(just head box) and 2(both of them). + Defaults to 0. + """ + + def __init__(self, mode: int) -> None: + self.ID = None + self.width = None + self.height = None + self.dt_boxes = None + self.gt_boxes = None + self.eval_mode = mode + + self.ign_num = None + self.gt_num = None + self.dt_num = None + + def load(self, record: dict, body_key: Optional[str], + head_key: Optional[str], class_names: list, + gt_flag: bool) -> None: + """Loading information for evaluation. + + Args: + record (dict): Label information or prediction results. + The format might look something like this: + { + 'ID': '273271,c9db000d5146c15', + 'gtboxes': [ + {'fbox': [72, 202, 163, 503], 'tag': 'person', ...}, + {'fbox': [199, 180, 144, 499], 'tag': 'person', ...}, + ... + ] + } + or: + { + 'ID': '273271,c9db000d5146c15', + 'width': 800, + 'height': 1067, + 'dtboxes': [ + { + 'box': [306.22, 205.95, 164.05, 394.04], + 'score': 0.99, + 'tag': 1 + }, + { + 'box': [403.60, 178.66, 157.15, 421.33], + 'score': 0.99, + 'tag': 1 + }, + ... + ] + } + body_key (str, None): key of detection body box. + Valid when loading detection results and self.eval_mode!=1. + head_key (str, None): key of detection head box. + Valid when loading detection results and self.eval_mode!=0. + class_names (list[str]):class names of data set. + Defaults to ['background', 'person']. + gt_flag (bool): Indicate whether record is ground truth + or predicting the outcome. + """ + if 'ID' in record and self.ID is None: + self.ID = record['ID'] + if 'width' in record and self.width is None: + self.width = record['width'] + if 'height' in record and self.height is None: + self.height = record['height'] + if gt_flag: + self.gt_num = len(record['gtboxes']) # type: ignore + body_bbox, head_bbox = self.load_gt_boxes(record, 'gtboxes', + class_names) + if self.eval_mode == 0: + self.gt_boxes = body_bbox + self.ign_num = (body_bbox[:, -1] == -1).sum() + elif self.eval_mode == 1: + self.gt_boxes = head_bbox + self.ign_num = (head_bbox[:, -1] == -1).sum() + else: + gt_tag = np.array([ + body_bbox[i, -1] != -1 and head_bbox[i, -1] != -1 + for i in range(len(body_bbox)) + ]) + self.ign_num = (gt_tag == 0).sum() + self.gt_boxes = np.hstack( + (body_bbox[:, :-1], head_bbox[:, :-1], + gt_tag.reshape(-1, 1))) + + if not gt_flag: + self.dt_num = len(record['dtboxes']) # type: ignore + if self.eval_mode == 0: + self.dt_boxes = self.load_det_boxes(record, 'dtboxes', + body_key, 'score') + elif self.eval_mode == 1: + self.dt_boxes = self.load_det_boxes(record, 'dtboxes', + head_key, 'score') + else: + body_dtboxes = self.load_det_boxes(record, 'dtboxes', body_key, + 'score') + head_dtboxes = self.load_det_boxes(record, 'dtboxes', head_key, + 'score') + self.dt_boxes = np.hstack((body_dtboxes, head_dtboxes)) + + @staticmethod + def load_gt_boxes(dict_input, key_name, class_names): + """load ground_truth and transform [x, y, w, h] to [x1, y1, x2, y2]""" + assert key_name in dict_input + if len(dict_input[key_name]) < 1: + return np.empty([0, 5]) + head_bbox = [] + body_bbox = [] + for rb in dict_input[key_name]: + if rb['tag'] in class_names: + body_tag = class_names.index(rb['tag']) + head_tag = copy.deepcopy(body_tag) + else: + body_tag = -1 + head_tag = -1 + if 'extra' in rb: + if 'ignore' in rb['extra']: + if rb['extra']['ignore'] != 0: + body_tag = -1 + head_tag = -1 + if 'head_attr' in rb: + if 'ignore' in rb['head_attr']: + if rb['head_attr']['ignore'] != 0: + head_tag = -1 + head_bbox.append(np.hstack((rb['hbox'], head_tag))) + body_bbox.append(np.hstack((rb['fbox'], body_tag))) + head_bbox = np.array(head_bbox) + head_bbox[:, 2:4] += head_bbox[:, :2] + body_bbox = np.array(body_bbox) + body_bbox[:, 2:4] += body_bbox[:, :2] + return body_bbox, head_bbox + + @staticmethod + def load_det_boxes(dict_input, key_name, key_box, key_score, key_tag=None): + """load detection boxes.""" + assert key_name in dict_input + if len(dict_input[key_name]) < 1: + return np.empty([0, 5]) + else: + assert key_box in dict_input[key_name][0] + if key_score: + assert key_score in dict_input[key_name][0] + if key_tag: + assert key_tag in dict_input[key_name][0] + if key_score: + if key_tag: + bboxes = np.vstack([ + np.hstack((rb[key_box], rb[key_score], rb[key_tag])) + for rb in dict_input[key_name] + ]) + else: + bboxes = np.vstack([ + np.hstack((rb[key_box], rb[key_score])) + for rb in dict_input[key_name] + ]) + else: + if key_tag: + bboxes = np.vstack([ + np.hstack((rb[key_box], rb[key_tag])) + for rb in dict_input[key_name] + ]) + else: + bboxes = np.vstack( + [rb[key_box] for rb in dict_input[key_name]]) + bboxes[:, 2:4] += bboxes[:, :2] + return bboxes + + def clip_all_boader(self) -> None: + """Make sure boxes are within the image range.""" + + def _clip_boundary(boxes, height, width): + assert boxes.shape[-1] >= 4 + boxes[:, 0] = np.minimum(np.maximum(boxes[:, 0], 0), width - 1) + boxes[:, 1] = np.minimum(np.maximum(boxes[:, 1], 0), height - 1) + boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], width), 0) + boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], height), 0) + return boxes + + assert self.dt_boxes.shape[-1] >= 4 # type: ignore + assert self.gt_boxes.shape[-1] >= 4 # type: ignore + assert self.width is not None and self.height is not None + if self.eval_mode == 2: + self.dt_boxes[:, :4] = _clip_boundary(self.dt_boxes[:, :4], + self.height, self.width) + self.gt_boxes[:, :4] = _clip_boundary(self.gt_boxes[:, :4], + self.height, self.width) + self.dt_boxes[:, 4:8] = _clip_boundary(self.dt_boxes[:, 4:8], + self.height, self.width) + self.gt_boxes[:, 4:8] = _clip_boundary(self.gt_boxes[:, 4:8], + self.height, self.width) + else: + self.dt_boxes = _clip_boundary(self.dt_boxes, self.height, + self.width) + self.gt_boxes = _clip_boundary(self.gt_boxes, self.height, + self.width) + + def compare_voc(self, + iou_thrs: float) -> List[Tuple[np.ndarray, int, str]]: + """Match the detection results with the ground_truth by VOC. + + Args: + iou_thrs (float): IOU threshold. + + Returns: + list[tuple[np.ndarray, int, str]]: Matching result. + A list of tuple (dtbox, label, imgID) in the descending + sort of dtbox.score. + """ + if self.dt_boxes is None: + return list() + dtboxes = self.dt_boxes + gtboxes = self.gt_boxes if self.gt_boxes is not None else list() + dtboxes.sort(key=lambda x: x.score, reverse=True) + gtboxes.sort(key=lambda x: x.ign) + + score_list = list() + for i, dt in enumerate(dtboxes): + maxpos = -1 + maxiou = iou_thrs + + for j, gt in enumerate(gtboxes): + overlap = dt.iou(gt) + if overlap > maxiou: + maxiou = overlap + maxpos = j + + if maxpos >= 0: + if gtboxes[maxpos].ign == 0: + gtboxes[maxpos].matched = 1 + dtboxes[i].matched = 1 + score_list.append((dt, self.ID)) + else: + dtboxes[i].matched = -1 + else: + dtboxes[i].matched = 0 + score_list.append((dt, self.ID)) + return score_list + + def compare_caltech(self, + iou_thrs: float) -> List[Tuple[np.ndarray, int, str]]: + """Match the detection results with the ground_truth by Caltech + matching strategy. + + Args: + iou_thrs (float): IOU threshold. + + Returns: + list[tuple[np.ndarray, int, str]]: Matching result. + A list of tuple (dtbox, label, imgID) in the descending + sort of dtbox.score. + """ + if self.dt_boxes is None or self.gt_boxes is None: + return list() + + dtboxes = self.dt_boxes if self.dt_boxes is not None else list() + gtboxes = self.gt_boxes if self.gt_boxes is not None else list() + dt_matched = np.zeros(dtboxes.shape[0]) + gt_matched = np.zeros(gtboxes.shape[0]) + + dtboxes = np.array(sorted(dtboxes, key=lambda x: x[-1], reverse=True)) + gtboxes = np.array(sorted(gtboxes, key=lambda x: x[-1], reverse=True)) + if len(dtboxes): + overlap_iou = calculate_overlaps(dtboxes, gtboxes, mode='iou') + overlap_ioa = calculate_overlaps(dtboxes, gtboxes, mode='iof') + else: + return list() + + score_list = list() + for i, dt in enumerate(dtboxes): + maxpos = -1 + maxiou = iou_thrs + for j, gt in enumerate(gtboxes): + if gt_matched[j] == 1: + continue + if gt[-1] > 0: + overlap = overlap_iou[i][j] + if overlap > maxiou: + maxiou = overlap + maxpos = j + else: + if maxpos >= 0: + break + else: + overlap = overlap_ioa[i][j] + if overlap > iou_thrs: + maxiou = overlap + maxpos = j + if maxpos >= 0: + if gtboxes[maxpos, -1] > 0: + gt_matched[maxpos] = 1 + dt_matched[i] = 1 + score_list.append((dt, 1, self.ID)) + else: + dt_matched[i] = -1 + else: + dt_matched[i] = 0 + score_list.append((dt, 0, self.ID)) + return score_list