From 43c2944ef38ae66561c119cc50a685cbabde292d Mon Sep 17 00:00:00 2001
From: Lawhy <yuanhe.cs.ai@gmail.com>
Date: Wed, 20 Nov 2024 00:55:26 +0000
Subject: [PATCH] update

---
 src/hierarchy_transformers/datasets/load.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/hierarchy_transformers/datasets/load.py b/src/hierarchy_transformers/datasets/load.py
index b202f96..055abbc 100644
--- a/src/hierarchy_transformers/datasets/load.py
+++ b/src/hierarchy_transformers/datasets/load.py
@@ -15,7 +15,7 @@
 import os
 import json
 from typing import Optional
-from datasets import load_dataset
+from datasets import load_dataset, Dataset
 import logging
 from tqdm import tqdm
 
@@ -88,11 +88,13 @@ def load_zenodo_dataset(
 
     for split, examples in dataset.items():
         # list comprehension is faster than nested for-loop due to C implementation
-        dataset[split] = [
-            transformed
-            for example in tqdm(examples, desc=f"Map ({split})", unit="example", leave=True)
-            for transformed in transform(example, negative_type, entity_lexicon)
-        ]
+        dataset[split] = Dataset.from_list(
+            [
+                transformed
+                for example in tqdm(examples, desc=f"Map ({split})", leave=True)
+                for transformed in transform(example, negative_type, entity_lexicon)
+            ]
+        )
 
     if return_entity_lexicon:
         return dataset, entity_lexicon
@@ -119,7 +121,9 @@ def zenodo_example_to_pairs(example: dict, negative_type: str, entity_lexicon: d
     parent = entity_lexicon[example["parent"]]["name"]
     negative_type = f"{negative_type}_negatives"
     negative_parents = [entity_lexicon[neg]["name"] for neg in example[negative_type]]
-    return [{"child": child, "parent": parent, "label": 1}] + [{"child": child, "parent": neg, "label": 0} for neg in negative_parents]
+    return [{"child": child, "parent": parent, "label": 1}] + [
+        {"child": child, "parent": neg, "label": 0} for neg in negative_parents
+    ]
 
 
 def zenodo_example_to_idxs(example: dict, negative_type: str, entity_to_indices: dict):