diff --git a/mmhuman3d/data/data_converters/__init__.py b/mmhuman3d/data/data_converters/__init__.py index e3335e49..6c20472d 100644 --- a/mmhuman3d/data/data_converters/__init__.py +++ b/mmhuman3d/data/data_converters/__init__.py @@ -44,6 +44,7 @@ from .mpi_inf_3dhp_hybrik import MpiInf3dhpHybrIKConverter from .mpii_neural_annot import MpiiNeuralConverter from .mscoco_neural_annot import MscocoNeuralConverter +from .mtp import MtpConverter from .penn_action import PennActionConverter from .posetrack import PosetrackConverter from .pw3d import Pw3dConverter @@ -92,5 +93,5 @@ 'Crowd3dConverter', 'BedlamConverter','Pw3dConverter', 'DecoConverter', 'Hi4dConverter', 'Idea400Converter', 'Pw3dBedlamConverter', 'RichConverter', 'SynbodyWhacConverter', 'ArcticConverter', 'SynHandConverter', - 'SignAvatarConverter', + 'SignAvatarConverter', 'MtpConverter', ] diff --git a/mmhuman3d/data/data_converters/mtp.py b/mmhuman3d/data/data_converters/mtp.py new file mode 100644 index 00000000..4e6feeaa --- /dev/null +++ b/mmhuman3d/data/data_converters/mtp.py @@ -0,0 +1,135 @@ +import glob +import json +import os +import pdb +import random +import time +from typing import List + +import cv2 +import numpy as np +from tqdm import tqdm +import torch +# from scipy.spatial.distance import cdist + +# import mmcv +# from mmhuman3d.models.body_models.builder import build_body_model +# from mmhuman3d.core.conventions.keypoints_mapping import smplx +from mmhuman3d.core.conventions.keypoints_mapping import ( + convert_kps, + get_keypoint_idx, + get_keypoint_idxs_by_part, +) +from mmhuman3d.models.body_models.utils import batch_transform_to_camera_frame +from mmhuman3d.models.body_models.utils import transform_to_camera_frame +from mmhuman3d.data.data_structures.human_data import HumanData +from .base_converter import BaseModeConverter +from .builder import DATA_CONVERTERS +from mmhuman3d.models.body_models.builder import build_body_model +from mmhuman3d.core.cameras import build_cameras + +@DATA_CONVERTERS.register_module() +class MtpConverter(BaseModeConverter): + """Synbody dataset.""" + ACCEPTED_MODES = ['train', 'val'] + + def __init__(self, modes: List = []) -> None: + + self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') + self.misc_config = dict( + bbox_body_scale=1.2, + bbox_facehand_scale=1.0, + bbox_source='keypoints2d_original', + flat_hand_mean=True, + cam_param_type='prespective', + cam_param_source='original', + smplx_source='original', + # contact_label=['part_segmentation', 'contact_region'], + # part_segmentation=['left_foot', 'right_foot'], + ) + + self.smplx_shape = { + 'betas': (-1, 10), + 'transl': (-1, 3), + 'global_orient': (-1, 3), + 'body_pose': (-1, 21, 3), + 'left_hand_pose': (-1, 15, 3), + 'right_hand_pose': (-1, 15, 3), + 'leye_pose': (-1, 3), + 'reye_pose': (-1, 3), + 'jaw_pose': (-1, 3), + 'expression': (-1, 10) + } + + super(MtpConverter, self).__init__(modes) + + + def _keypoints_to_scaled_bbox_fh(self, + keypoints, + occ=None, + scale=1.0, + convention='smplx'): + '''Obtain scaled bbox in xyxy format given keypoints + Args: + keypoints (np.ndarray): Keypoints + scale (float): Bounding Box scale + + Returns: + bbox_xyxy (np.ndarray): Bounding box in xyxy format + ''' + bboxs = [] + for body_part in ['head', 'left_hand', 'right_hand']: + kp_id = get_keypoint_idxs_by_part(body_part, convention=convention) + + # keypoints_factory=smplx.SMPLX_KEYPOINTS) + kps = keypoints[kp_id] + + if occ == None: + conf = 1 + else: + occ_p = occ[kp_id] + + if np.sum(occ_p) / len(kp_id) >= 0.1: + conf = 0 + # print(f'{body_part} occluded, occlusion: {np.sum(occ_p) / len(kp_id)}, skip') + else: + # print(f'{body_part} good, {np.sum(self_occ_p + occ_p) / len(kp_id)}') + conf = 1 + + xmin, ymin = np.amin(kps, axis=0) + xmax, ymax = np.amax(kps, axis=0) + + width = (xmax - xmin) * scale + height = (ymax - ymin) * scale + + x_center = 0.5 * (xmax + xmin) + y_center = 0.5 * (ymax + ymin) + xmin = x_center - 0.5 * width + xmax = x_center + 0.5 * width + ymin = y_center - 0.5 * height + ymax = y_center + 0.5 * height + + bbox = np.stack([xmin, ymin, xmax, ymax, conf], + axis=0).astype(np.float32) + + bboxs.append(bbox) + return bboxs[0], bboxs[1], bboxs[2] + + + def convert_by_mode(self, dataset_path: str, out_path: str, + mode: str) -> dict: + """ + Args: + dataset_path (str): Path to directory where raw images and + annotations are stored. + out_path (str): Path to directory to save preprocessed npz file + mode (str): Mode in accepted modes + + Returns: + dict: + A dict containing keys image_path, bbox_xywh, keypoints2d, + keypoints2d_mask, keypoints3d, keypoints3d_mask, cam_param + stored in HumanData() format + """ + + # get all images \ No newline at end of file diff --git a/tools/convert_datasets.py b/tools/convert_datasets.py index f044371c..5d0c23c2 100644 --- a/tools/convert_datasets.py +++ b/tools/convert_datasets.py @@ -183,6 +183,10 @@ type='MscocoNeuralConverter', # real, have few multihuman prefix='mscoco', modes=['train']), + mtp=dict( + type='MtpConverter', # real + prefix='mtp', + modes=['train', 'val']), renbody=dict( type='RenbodyConverter', # real prefix='renbody',