From 4da66058b7b9c3f569f9c0e07458b523a1cfff3a Mon Sep 17 00:00:00 2001 From: Siddharth Sahu <112792547+sahusiddharth@users.noreply.github.com> Date: Thu, 11 Jul 2024 16:27:47 +0530 Subject: [PATCH 1/2] Refactored-make_folder3d_dataset-ruff-error-C901 (#1926) Signed-off-by: sahusiddharth --- src/anomalib/data/depth/folder_3d.py | 165 ++++++++++++++++----------- 1 file changed, 99 insertions(+), 66 deletions(-) diff --git a/src/anomalib/data/depth/folder_3d.py b/src/anomalib/data/depth/folder_3d.py index 41a12fbf40..3d2dabed50 100644 --- a/src/anomalib/data/depth/folder_3d.py +++ b/src/anomalib/data/depth/folder_3d.py @@ -24,62 +24,16 @@ from anomalib.data.utils.path import _prepare_files_labels, validate_and_resolve_path -def make_folder3d_dataset( # noqa: C901 +def make_path_dirs( normal_dir: str | Path, - root: str | Path | None = None, abnormal_dir: str | Path | None = None, normal_test_dir: str | Path | None = None, mask_dir: str | Path | None = None, normal_depth_dir: str | Path | None = None, abnormal_depth_dir: str | Path | None = None, normal_test_depth_dir: str | Path | None = None, - split: str | Split | None = None, - extensions: tuple[str, ...] | None = None, -) -> DataFrame: - """Make Folder Dataset. - - Args: - normal_dir (str | Path): Path to the directory containing normal images. - root (str | Path | None): Path to the root directory of the dataset. - Defaults to ``None``. - abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images. - Defaults to ``None``. - normal_test_dir (str | Path | None, optional): Path to the directory containing normal images for the test - dataset. Normal test images will be a split of `normal_dir` if `None`. - Defaults to ``None``. - mask_dir (str | Path | None, optional): Path to the directory containing the mask annotations. - Defaults to ``None``. - normal_depth_dir (str | Path | None, optional): Path to the directory containing - normal depth images for the test dataset. Normal test depth images will be a split of `normal_dir` - Defaults to ``None``. - abnormal_depth_dir (str | Path | None, optional): Path to the directory containing abnormal depth images for - the test dataset. - Defaults to ``None``. - normal_test_depth_dir (str | Path | None, optional): Path to the directory containing normal depth images for - the test dataset. Normal test images will be a split of `normal_dir` if `None`. - Defaults to ``None``. - split (str | Split | None, optional): Dataset split (ie., Split.FULL, Split.TRAIN or Split.TEST). - Defaults to ``None``. - extensions (tuple[str, ...] | None, optional): Type of the image extensions to read from the directory. - Defaults to ``None``. - - Returns: - DataFrame: an output dataframe containing samples for the requested split (ie., train or test) - """ - normal_dir = validate_and_resolve_path(normal_dir, root) - abnormal_dir = validate_and_resolve_path(abnormal_dir, root) if abnormal_dir else None - normal_test_dir = validate_and_resolve_path(normal_test_dir, root) if normal_test_dir else None - mask_dir = validate_and_resolve_path(mask_dir, root) if mask_dir else None - normal_depth_dir = validate_and_resolve_path(normal_depth_dir, root) if normal_depth_dir else None - abnormal_depth_dir = validate_and_resolve_path(abnormal_depth_dir, root) if abnormal_depth_dir else None - normal_test_depth_dir = validate_and_resolve_path(normal_test_depth_dir, root) if normal_test_depth_dir else None - - if not normal_dir.is_dir(): - msg = "A folder location must be provided in normal_dir." - raise ValueError(msg) - - filenames = [] - labels = [] +) -> dict: + """Create a dictionary containing paths to different directories.""" dirs = {DirType.NORMAL: normal_dir} if abnormal_dir: @@ -99,24 +53,17 @@ def make_folder3d_dataset( # noqa: C901 if mask_dir: dirs[DirType.MASK] = mask_dir + return dirs - for dir_type, path in dirs.items(): - filename, label = _prepare_files_labels(path, dir_type, extensions) - filenames += filename - labels += label - - samples = DataFrame({"image_path": filenames, "label": labels}) - samples = samples.sort_values(by="image_path", ignore_index=True) - - # Create label index for normal (0) and abnormal (1) images. - samples.loc[ - (samples.label == DirType.NORMAL) | (samples.label == DirType.NORMAL_TEST), - "label_index", - ] = LabelName.NORMAL - samples.loc[(samples.label == DirType.ABNORMAL), "label_index"] = LabelName.ABNORMAL - samples.label_index = samples.label_index.astype("Int64") - # If a path to mask is provided, add it to the sample dataframe. +def add_mask( + samples: DataFrame, + normal_depth_dir: str | Path | None = None, + abnormal_dir: str | Path | None = None, + normal_test_dir: str | Path | None = None, + mask_dir: str | Path | None = None, +) -> DataFrame: + """If a path to mask is provided, add it to the sample dataframe.""" if normal_depth_dir: samples.loc[samples.label == DirType.NORMAL, "depth_path"] = samples.loc[ samples.label == DirType.NORMAL_DEPTH @@ -151,7 +98,6 @@ def make_folder3d_dataset( # noqa: C901 samples = samples.astype({"depth_path": "str"}) - # If a path to mask is provided, add it to the sample dataframe. if mask_dir and abnormal_dir: samples.loc[samples.label == DirType.ABNORMAL, "mask_path"] = samples.loc[ samples.label == DirType.MASK @@ -167,6 +113,93 @@ def make_folder3d_dataset( # noqa: C901 raise FileNotFoundError(msg) else: samples["mask_path"] = "" + return samples + + +def make_folder3d_dataset( + normal_dir: str | Path, + root: str | Path | None = None, + abnormal_dir: str | Path | None = None, + normal_test_dir: str | Path | None = None, + mask_dir: str | Path | None = None, + normal_depth_dir: str | Path | None = None, + abnormal_depth_dir: str | Path | None = None, + normal_test_depth_dir: str | Path | None = None, + split: str | Split | None = None, + extensions: tuple[str, ...] | None = None, +) -> DataFrame: + """Make Folder Dataset. + + Args: + normal_dir (str | Path): Path to the directory containing normal images. + root (str | Path | None): Path to the root directory of the dataset. + Defaults to ``None``. + abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images. + Defaults to ``None``. + normal_test_dir (str | Path | None, optional): Path to the directory containing normal images for the test + dataset. Normal test images will be a split of `normal_dir` if `None`. + Defaults to ``None``. + mask_dir (str | Path | None, optional): Path to the directory containing the mask annotations. + Defaults to ``None``. + normal_depth_dir (str | Path | None, optional): Path to the directory containing + normal depth images for the test dataset. Normal test depth images will be a split of `normal_dir` + Defaults to ``None``. + abnormal_depth_dir (str | Path | None, optional): Path to the directory containing abnormal depth images for + the test dataset. + Defaults to ``None``. + normal_test_depth_dir (str | Path | None, optional): Path to the directory containing normal depth images for + the test dataset. Normal test images will be a split of `normal_dir` if `None`. + Defaults to ``None``. + split (str | Split | None, optional): Dataset split (ie., Split.FULL, Split.TRAIN or Split.TEST). + Defaults to ``None``. + extensions (tuple[str, ...] | None, optional): Type of the image extensions to read from the directory. + Defaults to ``None``. + + Returns: + DataFrame: an output dataframe containing samples for the requested split (ie., train or test) + """ + normal_dir = validate_and_resolve_path(normal_dir, root) + abnormal_dir = validate_and_resolve_path(abnormal_dir, root) if abnormal_dir else None + normal_test_dir = validate_and_resolve_path(normal_test_dir, root) if normal_test_dir else None + mask_dir = validate_and_resolve_path(mask_dir, root) if mask_dir else None + normal_depth_dir = validate_and_resolve_path(normal_depth_dir, root) if normal_depth_dir else None + abnormal_depth_dir = validate_and_resolve_path(abnormal_depth_dir, root) if abnormal_depth_dir else None + normal_test_depth_dir = validate_and_resolve_path(normal_test_depth_dir, root) if normal_test_depth_dir else None + + if not normal_dir.is_dir(): + msg = "A folder location must be provided in normal_dir." + raise ValueError(msg) + + filenames = [] + labels = [] + dirs = make_path_dirs( + normal_dir, + abnormal_dir, + normal_test_dir, + mask_dir, + normal_depth_dir, + abnormal_depth_dir, + normal_test_depth_dir, + ) + + for dir_type, path in dirs.items(): + filename, label = _prepare_files_labels(path, dir_type, extensions) + filenames += filename + labels += label + + samples = DataFrame({"image_path": filenames, "label": labels}) + samples = samples.sort_values(by="image_path", ignore_index=True) + + # Create label index for normal (0) and abnormal (1) images. + samples.loc[ + (samples.label == DirType.NORMAL) | (samples.label == DirType.NORMAL_TEST), + "label_index", + ] = LabelName.NORMAL + samples.loc[(samples.label == DirType.ABNORMAL), "label_index"] = LabelName.ABNORMAL + samples.label_index = samples.label_index.astype("Int64") + + # If a path to mask is provided, add it to the sample dataframe. + samples = add_mask(samples, normal_depth_dir, abnormal_dir, normal_test_dir, mask_dir) # remove all the rows with temporal image samples that have already been assigned samples = samples.loc[ From 70c1ef0b1dc10b20c98907b935f878e49a4d9477 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 11 Jul 2024 12:04:14 +0100 Subject: [PATCH 2/2] Simplify folder 3d dataset (#2184) --- src/anomalib/data/depth/folder_3d.py | 183 +++++++++++---------------- 1 file changed, 72 insertions(+), 111 deletions(-) diff --git a/src/anomalib/data/depth/folder_3d.py b/src/anomalib/data/depth/folder_3d.py index 3d2dabed50..0fac137850 100644 --- a/src/anomalib/data/depth/folder_3d.py +++ b/src/anomalib/data/depth/folder_3d.py @@ -24,98 +24,6 @@ from anomalib.data.utils.path import _prepare_files_labels, validate_and_resolve_path -def make_path_dirs( - normal_dir: str | Path, - abnormal_dir: str | Path | None = None, - normal_test_dir: str | Path | None = None, - mask_dir: str | Path | None = None, - normal_depth_dir: str | Path | None = None, - abnormal_depth_dir: str | Path | None = None, - normal_test_depth_dir: str | Path | None = None, -) -> dict: - """Create a dictionary containing paths to different directories.""" - dirs = {DirType.NORMAL: normal_dir} - - if abnormal_dir: - dirs[DirType.ABNORMAL] = abnormal_dir - - if normal_test_dir: - dirs[DirType.NORMAL_TEST] = normal_test_dir - - if normal_depth_dir: - dirs[DirType.NORMAL_DEPTH] = normal_depth_dir - - if abnormal_depth_dir: - dirs[DirType.ABNORMAL_DEPTH] = abnormal_depth_dir - - if normal_test_depth_dir: - dirs[DirType.NORMAL_TEST_DEPTH] = normal_test_depth_dir - - if mask_dir: - dirs[DirType.MASK] = mask_dir - return dirs - - -def add_mask( - samples: DataFrame, - normal_depth_dir: str | Path | None = None, - abnormal_dir: str | Path | None = None, - normal_test_dir: str | Path | None = None, - mask_dir: str | Path | None = None, -) -> DataFrame: - """If a path to mask is provided, add it to the sample dataframe.""" - if normal_depth_dir: - samples.loc[samples.label == DirType.NORMAL, "depth_path"] = samples.loc[ - samples.label == DirType.NORMAL_DEPTH - ].image_path.to_numpy() - samples.loc[samples.label == DirType.ABNORMAL, "depth_path"] = samples.loc[ - samples.label == DirType.ABNORMAL_DEPTH - ].image_path.to_numpy() - - if normal_test_dir: - samples.loc[samples.label == DirType.NORMAL_TEST, "depth_path"] = samples.loc[ - samples.label == DirType.NORMAL_TEST_DEPTH - ].image_path.to_numpy() - - # make sure every rgb image has a corresponding depth image and that the file exists - mismatch = ( - samples.loc[samples.label_index == LabelName.ABNORMAL] - .apply(lambda x: Path(x.image_path).stem in Path(x.depth_path).stem, axis=1) - .all() - ) - if not mismatch: - msg = """Mismatch between anomalous images and depth images. Make sure the mask files - in 'xyz' folder follow the same naming convention as the anomalous images in the dataset - (e.g. image: '000.png', depth: '000.tiff').""" - raise MisMatchError(msg) - - missing_depth_files = samples.depth_path.apply( - lambda x: Path(x).exists() if not isna(x) else True, - ).all() - if not missing_depth_files: - msg = "Missing depth image files." - raise FileNotFoundError(msg) - - samples = samples.astype({"depth_path": "str"}) - - if mask_dir and abnormal_dir: - samples.loc[samples.label == DirType.ABNORMAL, "mask_path"] = samples.loc[ - samples.label == DirType.MASK - ].image_path.to_numpy() - samples["mask_path"] = samples["mask_path"].fillna("") - samples = samples.astype({"mask_path": "str"}) - - # make sure all the files exist - if not samples.mask_path.apply( - lambda x: Path(x).exists() if x != "" else True, - ).all(): - msg = f"Missing mask files. mask_dir={mask_dir}" - raise FileNotFoundError(msg) - else: - samples["mask_path"] = "" - return samples - - def make_folder3d_dataset( normal_dir: str | Path, root: str | Path | None = None, @@ -170,27 +78,28 @@ def make_folder3d_dataset( msg = "A folder location must be provided in normal_dir." raise ValueError(msg) - filenames = [] - labels = [] - dirs = make_path_dirs( - normal_dir, - abnormal_dir, - normal_test_dir, - mask_dir, - normal_depth_dir, - abnormal_depth_dir, - normal_test_depth_dir, - ) - - for dir_type, path in dirs.items(): - filename, label = _prepare_files_labels(path, dir_type, extensions) - filenames += filename - labels += label + dirs = { + DirType.NORMAL: normal_dir, + DirType.ABNORMAL: abnormal_dir, + DirType.NORMAL_TEST: normal_test_dir, + DirType.NORMAL_DEPTH: normal_depth_dir, + DirType.ABNORMAL_DEPTH: abnormal_depth_dir, + DirType.NORMAL_TEST_DEPTH: normal_test_depth_dir, + DirType.MASK: mask_dir, + } + + filenames: list[Path] = [] + labels: list[str] = [] + + for dir_type, dir_path in dirs.items(): + if dir_path is not None: + filename, label = _prepare_files_labels(dir_path, dir_type, extensions) + filenames += filename + labels += label samples = DataFrame({"image_path": filenames, "label": labels}) samples = samples.sort_values(by="image_path", ignore_index=True) - # Create label index for normal (0) and abnormal (1) images. samples.loc[ (samples.label == DirType.NORMAL) | (samples.label == DirType.NORMAL_TEST), "label_index", @@ -199,9 +108,61 @@ def make_folder3d_dataset( samples.label_index = samples.label_index.astype("Int64") # If a path to mask is provided, add it to the sample dataframe. - samples = add_mask(samples, normal_depth_dir, abnormal_dir, normal_test_dir, mask_dir) + if normal_depth_dir: + samples.loc[samples.label == DirType.NORMAL, "depth_path"] = samples.loc[ + samples.label == DirType.NORMAL_DEPTH + ].image_path.to_numpy() + samples.loc[samples.label == DirType.ABNORMAL, "depth_path"] = samples.loc[ + samples.label == DirType.ABNORMAL_DEPTH + ].image_path.to_numpy() + + if normal_test_dir: + samples.loc[samples.label == DirType.NORMAL_TEST, "depth_path"] = samples.loc[ + samples.label == DirType.NORMAL_TEST_DEPTH + ].image_path.to_numpy() + + # make sure every rgb image has a corresponding depth image and that the file exists + mismatch = ( + samples.loc[samples.label_index == LabelName.ABNORMAL] + .apply(lambda x: Path(x.image_path).stem in Path(x.depth_path).stem, axis=1) + .all() + ) + if not mismatch: + msg = ( + "Mismatch between anomalous images and depth images. " + "Make sure the mask files in 'xyz' folder follow the same naming " + "convention as the anomalous images in the dataset" + "(e.g. image: '000.png', depth: '000.tiff')." + ) + raise MisMatchError(msg) + + missing_depth_files = samples.depth_path.apply( + lambda x: Path(x).exists() if not isna(x) else True, + ).all() + if not missing_depth_files: + msg = "Missing depth image files." + raise FileNotFoundError(msg) + + samples = samples.astype({"depth_path": "str"}) + + # If a path to mask is provided, add it to the sample dataframe. + if mask_dir and abnormal_dir: + samples.loc[samples.label == DirType.ABNORMAL, "mask_path"] = samples.loc[ + samples.label == DirType.MASK + ].image_path.to_numpy() + samples["mask_path"] = samples["mask_path"].fillna("") + samples = samples.astype({"mask_path": "str"}) + + # Make sure all the files exist + if not samples.mask_path.apply( + lambda x: Path(x).exists() if x != "" else True, + ).all(): + msg = f"Missing mask files. mask_dir={mask_dir}" + raise FileNotFoundError(msg) + else: + samples["mask_path"] = "" - # remove all the rows with temporal image samples that have already been assigned + # Remove all the rows with temporal image samples that have already been assigned samples = samples.loc[ (samples.label == DirType.NORMAL) | (samples.label == DirType.ABNORMAL) | (samples.label == DirType.NORMAL_TEST) ]