Skip to content

Commit

Permalink
fix: only consider MAPPING/ALIGNING SUMMARY entries from dragen qc su…
Browse files Browse the repository at this point in the history
…mmary file (#134)

* fix: only consider MAPPING/ALIGNING SUMMARY entries from dragen qc summary file

* additionally check for the second column to be empty, just in case

* update pytest snapshots for BAMQC
  • Loading branch information
tedil authored Nov 6, 2024
1 parent f6f5adc commit c4b8dd1
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
4 changes: 2 additions & 2 deletions tests/cli/__snapshots__/test_tools.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
# name: test_dragen_to_bam_qc.1
'''
case_id set_id bam_stats
. . {"""sample""": {"""summary""": {"""mean coverage""": 35.44, """total target size""": 39036020}, """min_cov_target""": {"""0""": 100.0, """1""": 99.0, """3""": 98.77, """10""": 98.18, """15""": 97.59, """20""": 95.41, """50""": 0.88, """100""": 0.05, """500""": 0.03, """1000""": 0.03, """1500""": 0.03}, """bamstats""": {"""sequences""": 1055691016, """reads duplicated""": 65335430, """insert size average""": 488.03, """insert size standard deviation""": 115.27}}, """sample2""": {"""summary""": {"""mean coverage""": 35.44, """total target size""": 39036020}, """min_cov_target""": {"""0""": 100.0, """1""": 99.0, """3""": 98.77, """10""": 98.18, """15""": 97.59, """20""": 95.41, """50""": 0.88, """100""": 0.05, """500""": 0.03, """1000""": 0.03, """1500""": 0.03}, """bamstats""": {"""sequences""": 1055691016, """reads duplicated""": 65335430, """insert size average""": 488.03, """insert size standard deviation""": 115.27}}}
. . {"""sample""": {"""summary""": {"""mean coverage""": 35.44, """total target size""": 39036020}, """min_cov_target""": {"""0""": 100.0, """1""": 99.0, """3""": 98.77, """10""": 98.18, """15""": 97.59, """20""": 95.41, """50""": 0.88, """100""": 0.05, """500""": 0.03, """1000""": 0.03, """1500""": 0.03}, """bamstats""": {"""sequences""": 1055691016, """reads duplicated""": 128799765, """insert size average""": 493.5, """insert size standard deviation""": 118.57}}, """sample2""": {"""summary""": {"""mean coverage""": 35.44, """total target size""": 39036020}, """min_cov_target""": {"""0""": 100.0, """1""": 99.0, """3""": 98.77, """10""": 98.18, """15""": 97.59, """20""": 95.41, """50""": 0.88, """100""": 0.05, """500""": 0.03, """1000""": 0.03, """1500""": 0.03}, """bamstats""": {"""sequences""": 1055691016, """reads duplicated""": 128799765, """insert size average""": 493.5, """insert size standard deviation""": 118.57}}}

'''
# ---
# name: test_load_bam_qc
BamQc(sample_data={'sample': BamQcData(summary={'mean coverage': 35.44, 'total target size': 39036020}, min_cov_target={0: 100.0, 1: 99.0, 3: 98.77, 10: 98.18, 15: 97.59, 20: 95.41, 50: 0.88, 100: 0.05, 500: 0.03, 1000: 0.03, 1500: 0.03}, bamstats={'sequences': 1055691016, 'reads duplicated': 65335430, 'insert size average': 488.03, 'insert size standard deviation': 115.27}), 'sample2': BamQcData(summary={'mean coverage': 35.44, 'total target size': 39036020}, min_cov_target={0: 100.0, 1: 99.0, 3: 98.77, 10: 98.18, 15: 97.59, 20: 95.41, 50: 0.88, 100: 0.05, 500: 0.03, 1000: 0.03, 1500: 0.03}, bamstats={'sequences': 1055691016, 'reads duplicated': 65335430, 'insert size average': 488.03, 'insert size standard deviation': 115.27})})
BamQc(sample_data={'sample': BamQcData(summary={'mean coverage': 35.44, 'total target size': 39036020}, min_cov_target={0: 100.0, 1: 99.0, 3: 98.77, 10: 98.18, 15: 97.59, 20: 95.41, 50: 0.88, 100: 0.05, 500: 0.03, 1000: 0.03, 1500: 0.03}, bamstats={'sequences': 1055691016, 'reads duplicated': 128799765, 'insert size average': 493.5, 'insert size standard deviation': 118.57}), 'sample2': BamQcData(summary={'mean coverage': 35.44, 'total target size': 39036020}, min_cov_target={0: 100.0, 1: 99.0, 3: 98.77, 10: 98.18, 15: 97.59, 20: 95.41, 50: 0.88, 100: 0.05, 500: 0.03, 1000: 0.03, 1500: 0.03}, bamstats={'sequences': 1055691016, 'reads duplicated': 128799765, 'insert size average': 493.5, 'insert size standard deviation': 118.57})})
# ---
8 changes: 6 additions & 2 deletions varfish_cli/cli/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def load_sample_data(qc_coverage_region_path: str, mapping_metrics_path: str) ->
:param mapping_metrics: Path to coverage metrics file
:raises typer.Exit: If there is a problem with reading the input files.
"""
fieldnames = ["_coverage_summary", "_empty", "label", "value"]
fieldnames = ["_summary", "_empty", "label", "value"]
qc_coverage_region: Dict[str, str] = {}
with open(qc_coverage_region_path, "rt") as qc_coverage_file:
reader = csv.DictReader(
Expand All @@ -36,7 +36,11 @@ def load_sample_data(qc_coverage_region_path: str, mapping_metrics_path: str) ->
mapping_metrics: Dict[str, str] = {}
with open(mapping_metrics_path, "rt") as coverage_metrics_file:
reader = csv.DictReader(coverage_metrics_file, fieldnames=fieldnames, delimiter=",")
mapping_metrics = {row["label"]: row["value"] for row in reader}
mapping_metrics = {
row["label"]: row["value"]
for row in reader
if row["_summary"] == "MAPPING/ALIGNING SUMMARY" and row["_empty"] == ""
}

key_aligned_in_region = "Aligned bases in QC coverage region"
try:
Expand Down

0 comments on commit c4b8dd1

Please sign in to comment.