Skip to content

Commit

Permalink
Improve metadata validation
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Nov 27, 2024
1 parent 4bed8cf commit 88134f6
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
21 changes: 19 additions & 2 deletions sc2ts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,9 +636,26 @@ def extend(
help="Validate metadata",
show_default=True,
)
@click.option(
"-s",
"--skip",
default=[],
help="Skip this metadata field during comparison",
show_default=True,
multiple=True,
)
@chunk_cache_size
@verbose
def validate(dataset, ts_file, deletions_as_missing, genotypes, metadata, chunk_cache_size, verbose):
def validate(
dataset,
ts_file,
deletions_as_missing,
genotypes,
metadata,
skip,
chunk_cache_size,
verbose,
):
"""
Check that the specified trees correctly encode data
"""
Expand All @@ -649,7 +666,7 @@ def validate(dataset, ts_file, deletions_as_missing, genotypes, metadata, chunk_
if genotypes:
sc2ts.validate_genotypes(ts, ds, deletions_as_missing, show_progress=True)
if metadata:
sc2ts.validate_metadata(ts, ds, show_progress=True)
sc2ts.validate_metadata(ts, ds, skip_fields=set(skip), show_progress=True)


# @click.command()
Expand Down
9 changes: 6 additions & 3 deletions sc2ts/validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import unittest

import numpy as np
import numpy.testing as nt
Expand Down Expand Up @@ -30,15 +31,17 @@ def validate_genotypes(ts, dataset, deletions_as_missing=False, show_progress=Fa
bar.close()


def validate_metadata(ts, dataset, show_progress=False):
def validate_metadata(ts, dataset, show_progress=False, skip_fields=set()):

samples = ts.samples()[1:]
bar = tqdm.tqdm(samples, desc="Metadata", disable=not show_progress)
for u in bar:
md1 = ts.node(u).metadata
del md1["sc2ts"]
keys = set(md1.keys()) - ({"sc2ts"} | skip_fields)
md2 = dataset.metadata[md1["strain"]]
assert md1 == md2
md1 = {k: md1[k] for k in keys}
md2 = {k: md2[k] for k in keys}
unittest.TestCase().assertDictEqual(md1, md2)


def validate(ts, dataset, deletions_as_missing=False, show_progress=False):
Expand Down

0 comments on commit 88134f6

Please sign in to comment.