From 6b5191d509cc0dca85d26e7f6cd6fe7a4e937917 Mon Sep 17 00:00:00 2001
From: Yoohee Choi <17771952+y27choi@users.noreply.github.com>
Date: Wed, 27 Sep 2023 17:06:06 -0400
Subject: [PATCH] Updated stratifications in semantic segmentation example
 (#246)

---
 examples/semantic_segmentation/README.md            |  7 ++++---
 .../semantic_segmentation/constants.py              |  9 +++++----
 .../semantic_segmentation/seed_test_run.py          |  2 +-
 .../semantic_segmentation/seed_test_suite.py        | 13 +++++++------
 .../tests/test_semantic_segmentation.py             |  2 +-
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/examples/semantic_segmentation/README.md b/examples/semantic_segmentation/README.md
index 5573294da..1fcddca46 100644
--- a/examples/semantic_segmentation/README.md
+++ b/examples/semantic_segmentation/README.md
@@ -1,6 +1,6 @@
 # Example Integration: Semantic Segmentation
 
-This example integration uses the [COCO-Stuff 10K](https://github.com/nightrome/cocostuff10k) dataset, specificially
+This example integration uses the [COCO-Stuff 10K](https://github.com/nightrome/cocostuff10k) dataset, specifically
 5,520 images with person label, to demonstrate how to test single class semantic segmentation problems on Kolena.
 
 ## Setup
@@ -23,7 +23,8 @@ This project defines two scripts that perform the following operations:
 
 1. [`seed_test_suite.py`](semantic_segmentation/seed_test_suite.py) creates the following test suites:
 
-    - `"coco-stuff-10k"`, containing samples of COCO-Stuff 10K data
+    - `"# of people :: coco-stuff-10k [person]"`, containing samples of COCO-Stuff 10K data, specifically 5,520 images
+    with person label, stratified by # of people in the image.
 
 2. [`seed_test_run.py`](semantic_segmentation/seed_test_run.py) tests a specified model,
 e.g. `pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512`, `pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512`, on the above
@@ -35,7 +36,7 @@ test suite.
     [instructions](https://docs.kolena.io/advanced-usage/connecting-cloud-storage/amazon-s3/) to connect
     your bucket to Kolena.
 
-    The result masks will be stored under s3://{args.out_bucket}/coco-stuff-10k/results/{args.model} directory in
+    The result masks will be stored under `s3://{args.out_bucket}/coco-stuff-10k/results/{args.model}` directory in
     your bucket.
 
 
diff --git a/examples/semantic_segmentation/semantic_segmentation/constants.py b/examples/semantic_segmentation/semantic_segmentation/constants.py
index 082b643a1..66d927f6d 100644
--- a/examples/semantic_segmentation/semantic_segmentation/constants.py
+++ b/examples/semantic_segmentation/semantic_segmentation/constants.py
@@ -15,8 +15,9 @@
 BUCKET = "kolena-public-datasets"
 DATASET = "coco-stuff-10k"
 
-SIZE_MAPPING_IMAGES = {
-    "small": (0, 270000),
-    "medium": (270000, 300000),
-    "large": (300000, 10000000),
+PERSON_COUNT_MAPPING_IMAGES = {
+    "single person": (1, 2),
+    "two people": (2, 3),
+    "some people": (3, 6),
+    "many people": (6, 100),
 }
diff --git a/examples/semantic_segmentation/semantic_segmentation/seed_test_run.py b/examples/semantic_segmentation/semantic_segmentation/seed_test_run.py
index 8615f7ceb..13fd27cc6 100644
--- a/examples/semantic_segmentation/semantic_segmentation/seed_test_run.py
+++ b/examples/semantic_segmentation/semantic_segmentation/seed_test_run.py
@@ -71,7 +71,7 @@ def main(args: Namespace) -> int:
     )
     ap.add_argument(
         "--test_suites",
-        default=[f"image size :: {DATASET} [person]"],
+        default=[f"# of people :: {DATASET} [person]"],
         nargs="+",
         help="Name(s) of test suite(s) to test.",
     )
diff --git a/examples/semantic_segmentation/semantic_segmentation/seed_test_suite.py b/examples/semantic_segmentation/semantic_segmentation/seed_test_suite.py
index 6b0fa912e..d14dee82b 100644
--- a/examples/semantic_segmentation/semantic_segmentation/seed_test_suite.py
+++ b/examples/semantic_segmentation/semantic_segmentation/seed_test_suite.py
@@ -20,7 +20,7 @@
 
 import pandas as pd
 from semantic_segmentation.constants import DATASET
-from semantic_segmentation.constants import SIZE_MAPPING_IMAGES
+from semantic_segmentation.constants import PERSON_COUNT_MAPPING_IMAGES
 from semantic_segmentation.workflow import GroundTruth
 from semantic_segmentation.workflow import Label
 from semantic_segmentation.workflow import TestCase
@@ -39,16 +39,16 @@ def within_range(area: int, range: Tuple[int, int]) -> bool:
 def seed_stratified_test_cases(complete_test_case: TestCase, test_suite_name) -> List[TestCase]:
     test_samples = complete_test_case.load_test_samples()
     test_cases = []
-    for size_name, area_range in SIZE_MAPPING_IMAGES.items():
+    for name, count_range in PERSON_COUNT_MAPPING_IMAGES.items():
         samples = []
         for ts, gt in test_samples:
-            image_size = ts.metadata["width"] * ts.metadata["height"]
-            if within_range(image_size, area_range):
+            person_count = ts.metadata["person_count"]
+            if within_range(person_count, count_range):
                 samples.append((ts, gt))
 
         if len(samples) > 0:
             test_cases.append(
-                TestCase(f"{size_name} image :: {test_suite_name}", test_samples=samples, reset=True),
+                TestCase(f"{name} :: {test_suite_name}", test_samples=samples, reset=True),
             )
     return test_cases
 
@@ -66,6 +66,7 @@ def seed_complete_test_case(args: Namespace) -> TestCase:
                 has_person=record.has_person,
                 width=record.image_width,
                 height=record.image_height,
+                person_count=record.person_count,
             ),
         )
         ground_truth = GroundTruth(mask=SegmentationMask(locator=record.mask, labels=Label.as_label_map()))
@@ -77,7 +78,7 @@ def seed_complete_test_case(args: Namespace) -> TestCase:
 
 def main(args: Namespace) -> None:
     kolena.initialize(os.environ["KOLENA_TOKEN"], verbose=True)
-    test_suite_name = f"image size :: {DATASET} [person]"
+    test_suite_name = f"# of people :: {DATASET} [person]"
     complete_test_case = seed_complete_test_case(args)
     stratified_test_cases = seed_stratified_test_cases(complete_test_case, test_suite_name)
     TestSuite(
diff --git a/examples/semantic_segmentation/tests/test_semantic_segmentation.py b/examples/semantic_segmentation/tests/test_semantic_segmentation.py
index 1d228788b..59dc0a5bf 100644
--- a/examples/semantic_segmentation/tests/test_semantic_segmentation.py
+++ b/examples/semantic_segmentation/tests/test_semantic_segmentation.py
@@ -28,6 +28,6 @@ def test__semantic_segmentation_seed_test_run__smoke() -> None:
     args = Namespace(
         out_bucket="kolena-public-datasets",
         model="pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512",
-        test_suites=["image size :: coco-stuff-10k [person]"],
+        test_suites=["# of people :: coco-stuff-10k [person]"],
     )
     seed_test_run_main(args)