Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Lawhy committed Nov 20, 2024
1 parent 82920ec commit 43c2944
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions src/hierarchy_transformers/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import os
import json
from typing import Optional
from datasets import load_dataset
from datasets import load_dataset, Dataset
import logging
from tqdm import tqdm

Expand Down Expand Up @@ -88,11 +88,13 @@ def load_zenodo_dataset(

for split, examples in dataset.items():
# list comprehension is faster than nested for-loop due to C implementation
dataset[split] = [
transformed
for example in tqdm(examples, desc=f"Map ({split})", unit="example", leave=True)
for transformed in transform(example, negative_type, entity_lexicon)
]
dataset[split] = Dataset.from_list(
[
transformed
for example in tqdm(examples, desc=f"Map ({split})", leave=True)
for transformed in transform(example, negative_type, entity_lexicon)
]
)

if return_entity_lexicon:
return dataset, entity_lexicon
Expand All @@ -119,7 +121,9 @@ def zenodo_example_to_pairs(example: dict, negative_type: str, entity_lexicon: d
parent = entity_lexicon[example["parent"]]["name"]
negative_type = f"{negative_type}_negatives"
negative_parents = [entity_lexicon[neg]["name"] for neg in example[negative_type]]
return [{"child": child, "parent": parent, "label": 1}] + [{"child": child, "parent": neg, "label": 0} for neg in negative_parents]
return [{"child": child, "parent": parent, "label": 1}] + [
{"child": child, "parent": neg, "label": 0} for neg in negative_parents
]


def zenodo_example_to_idxs(example: dict, negative_type: str, entity_to_indices: dict):
Expand Down

0 comments on commit 43c2944

Please sign in to comment.