From 6b5191d509cc0dca85d26e7f6cd6fe7a4e937917 Mon Sep 17 00:00:00 2001 From: Yoohee Choi <17771952+y27choi@users.noreply.github.com> Date: Wed, 27 Sep 2023 17:06:06 -0400 Subject: [PATCH] Updated stratifications in semantic segmentation example (#246) --- examples/semantic_segmentation/README.md | 7 ++++--- .../semantic_segmentation/constants.py | 9 +++++---- .../semantic_segmentation/seed_test_run.py | 2 +- .../semantic_segmentation/seed_test_suite.py | 13 +++++++------ .../tests/test_semantic_segmentation.py | 2 +- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/examples/semantic_segmentation/README.md b/examples/semantic_segmentation/README.md index 5573294da..1fcddca46 100644 --- a/examples/semantic_segmentation/README.md +++ b/examples/semantic_segmentation/README.md @@ -1,6 +1,6 @@ # Example Integration: Semantic Segmentation -This example integration uses the [COCO-Stuff 10K](https://github.com/nightrome/cocostuff10k) dataset, specificially +This example integration uses the [COCO-Stuff 10K](https://github.com/nightrome/cocostuff10k) dataset, specifically 5,520 images with person label, to demonstrate how to test single class semantic segmentation problems on Kolena. ## Setup @@ -23,7 +23,8 @@ This project defines two scripts that perform the following operations: 1. [`seed_test_suite.py`](semantic_segmentation/seed_test_suite.py) creates the following test suites: - - `"coco-stuff-10k"`, containing samples of COCO-Stuff 10K data + - `"# of people :: coco-stuff-10k [person]"`, containing samples of COCO-Stuff 10K data, specifically 5,520 images + with person label, stratified by # of people in the image. 2. [`seed_test_run.py`](semantic_segmentation/seed_test_run.py) tests a specified model, e.g. `pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512`, `pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512`, on the above @@ -35,7 +36,7 @@ test suite. [instructions](https://docs.kolena.io/advanced-usage/connecting-cloud-storage/amazon-s3/) to connect your bucket to Kolena. - The result masks will be stored under s3://{args.out_bucket}/coco-stuff-10k/results/{args.model} directory in + The result masks will be stored under `s3://{args.out_bucket}/coco-stuff-10k/results/{args.model}` directory in your bucket. diff --git a/examples/semantic_segmentation/semantic_segmentation/constants.py b/examples/semantic_segmentation/semantic_segmentation/constants.py index 082b643a1..66d927f6d 100644 --- a/examples/semantic_segmentation/semantic_segmentation/constants.py +++ b/examples/semantic_segmentation/semantic_segmentation/constants.py @@ -15,8 +15,9 @@ BUCKET = "kolena-public-datasets" DATASET = "coco-stuff-10k" -SIZE_MAPPING_IMAGES = { - "small": (0, 270000), - "medium": (270000, 300000), - "large": (300000, 10000000), +PERSON_COUNT_MAPPING_IMAGES = { + "single person": (1, 2), + "two people": (2, 3), + "some people": (3, 6), + "many people": (6, 100), } diff --git a/examples/semantic_segmentation/semantic_segmentation/seed_test_run.py b/examples/semantic_segmentation/semantic_segmentation/seed_test_run.py index 8615f7ceb..13fd27cc6 100644 --- a/examples/semantic_segmentation/semantic_segmentation/seed_test_run.py +++ b/examples/semantic_segmentation/semantic_segmentation/seed_test_run.py @@ -71,7 +71,7 @@ def main(args: Namespace) -> int: ) ap.add_argument( "--test_suites", - default=[f"image size :: {DATASET} [person]"], + default=[f"# of people :: {DATASET} [person]"], nargs="+", help="Name(s) of test suite(s) to test.", ) diff --git a/examples/semantic_segmentation/semantic_segmentation/seed_test_suite.py b/examples/semantic_segmentation/semantic_segmentation/seed_test_suite.py index 6b0fa912e..d14dee82b 100644 --- a/examples/semantic_segmentation/semantic_segmentation/seed_test_suite.py +++ b/examples/semantic_segmentation/semantic_segmentation/seed_test_suite.py @@ -20,7 +20,7 @@ import pandas as pd from semantic_segmentation.constants import DATASET -from semantic_segmentation.constants import SIZE_MAPPING_IMAGES +from semantic_segmentation.constants import PERSON_COUNT_MAPPING_IMAGES from semantic_segmentation.workflow import GroundTruth from semantic_segmentation.workflow import Label from semantic_segmentation.workflow import TestCase @@ -39,16 +39,16 @@ def within_range(area: int, range: Tuple[int, int]) -> bool: def seed_stratified_test_cases(complete_test_case: TestCase, test_suite_name) -> List[TestCase]: test_samples = complete_test_case.load_test_samples() test_cases = [] - for size_name, area_range in SIZE_MAPPING_IMAGES.items(): + for name, count_range in PERSON_COUNT_MAPPING_IMAGES.items(): samples = [] for ts, gt in test_samples: - image_size = ts.metadata["width"] * ts.metadata["height"] - if within_range(image_size, area_range): + person_count = ts.metadata["person_count"] + if within_range(person_count, count_range): samples.append((ts, gt)) if len(samples) > 0: test_cases.append( - TestCase(f"{size_name} image :: {test_suite_name}", test_samples=samples, reset=True), + TestCase(f"{name} :: {test_suite_name}", test_samples=samples, reset=True), ) return test_cases @@ -66,6 +66,7 @@ def seed_complete_test_case(args: Namespace) -> TestCase: has_person=record.has_person, width=record.image_width, height=record.image_height, + person_count=record.person_count, ), ) ground_truth = GroundTruth(mask=SegmentationMask(locator=record.mask, labels=Label.as_label_map())) @@ -77,7 +78,7 @@ def seed_complete_test_case(args: Namespace) -> TestCase: def main(args: Namespace) -> None: kolena.initialize(os.environ["KOLENA_TOKEN"], verbose=True) - test_suite_name = f"image size :: {DATASET} [person]" + test_suite_name = f"# of people :: {DATASET} [person]" complete_test_case = seed_complete_test_case(args) stratified_test_cases = seed_stratified_test_cases(complete_test_case, test_suite_name) TestSuite( diff --git a/examples/semantic_segmentation/tests/test_semantic_segmentation.py b/examples/semantic_segmentation/tests/test_semantic_segmentation.py index 1d228788b..59dc0a5bf 100644 --- a/examples/semantic_segmentation/tests/test_semantic_segmentation.py +++ b/examples/semantic_segmentation/tests/test_semantic_segmentation.py @@ -28,6 +28,6 @@ def test__semantic_segmentation_seed_test_run__smoke() -> None: args = Namespace( out_bucket="kolena-public-datasets", model="pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512", - test_suites=["image size :: coco-stuff-10k [person]"], + test_suites=["# of people :: coco-stuff-10k [person]"], ) seed_test_run_main(args)