Skip to content

Commit

Permalink
Do not prefilter the variants with curated genes
Browse files Browse the repository at this point in the history
  • Loading branch information
Hongxin committed Oct 30, 2020
1 parent c4f8087 commit 997f16c
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 67 deletions.
84 changes: 23 additions & 61 deletions AnnotatorCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,22 +205,7 @@ def makeoncokbgetrequest(url):
return requests.get(url, headers=headers)


def getcuratedgenes():
global curatedgenes
url = oncokbapiurl + "/utils/allCuratedGenes.json"
response = makeoncokbgetrequest(url)
if response.status_code == 200:
curatedgenesjson = response.json()

for curatedgene in curatedgenesjson:
if curatedgene['hugoSymbol'] is not None:
curatedgenes.append(curatedgene['hugoSymbol'])
else:
log.error("error when processing %s \n" % url +
"reason: %s" % response.reason)

_3dhotspots = None
curatedgenes = []

def init_3d_hotspots():
global _3dhotspots
Expand Down Expand Up @@ -344,7 +329,7 @@ def get_reference_genome_from_row(row_reference_genome, default_reference_genome


def processalterationevents(eventfile, outfile, previousoutfile, defaultCancerType, cancerTypeMap,
retainonlycuratedgenes, annotatehotspots, user_input_query_type, default_reference_genome):
annotatehotspots, user_input_query_type, default_reference_genome):
if annotatehotspots:
init_3d_hotspots()
if os.path.isfile(previousoutfile):
Expand Down Expand Up @@ -387,13 +372,11 @@ def processalterationevents(eventfile, outfile, previousoutfile, defaultCancerTy
if (query_type == QueryType.HGVSP_SHORT):
process_alteration(reader, outf, headers, [HGVSP_SHORT_HEADER, ALTERATION_HEADER], ncols, newncols,
defaultCancerType,
cancerTypeMap,
retainonlycuratedgenes, annotatehotspots, default_reference_genome)
cancerTypeMap, annotatehotspots, default_reference_genome)

if (query_type == QueryType.HGVSP):
process_alteration(reader, outf, headers, [HGVSP_HEADER, ALTERATION_HEADER], ncols, newncols, defaultCancerType,
cancerTypeMap,
retainonlycuratedgenes, annotatehotspots, default_reference_genome)
cancerTypeMap, annotatehotspots, default_reference_genome)

if (query_type == QueryType.HGVSG):
process_hvsg(reader, outf, headers, [HGVSG_HEADER, ALTERATION_HEADER], ncols, newncols, defaultCancerType,
Expand All @@ -414,7 +397,7 @@ def get_cell_content(row, index, return_empty_string=False):
return None

def process_alteration(maffilereader, outf, maf_headers, alteration_column_names, ncols, nannotationcols, defaultCancerType, cancerTypeMap,
retainonlycuratedgenes, annotatehotspots, default_reference_genome):
annotatehotspots, default_reference_genome):
ihugo = geIndexOfHeader(maf_headers, HUGO_HEADERS)
iconsequence = geIndexOfHeader(maf_headers, CONSEQUENCE_HEADERS)
ihgvs = geIndexOfHeader(maf_headers, alteration_column_names)
Expand Down Expand Up @@ -479,18 +462,9 @@ def process_alteration(maffilereader, outf, maf_headers, alteration_column_names
if start is not None and end is None:
end = start

if not retainonlycuratedgenes or hugo in curatedgenes:
query = ProteinChangeQuery(hugo, hgvs, cancertype, reference_genome, consequence, start, end)
queries.append(query)
rows.append(row)
else:
# Include Gene in OncoKB and Variant in OncoKB
if annotatehotspots:
default_cols = [['', '', GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]]
else:
default_cols = [[GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]]
append_annotation_to_file(outf, ncols + nannotationcols, [row],
default_cols)
query = ProteinChangeQuery(hugo, hgvs, cancertype, reference_genome, consequence, start, end)
queries.append(query)
rows.append(row)

if len(queries) == POST_QUERIES_THRESHOLD:
annotations = pull_protein_change_info(queries,annotatehotspots)
Expand Down Expand Up @@ -634,7 +608,7 @@ def getgenesfromfusion(fusion, nameregex=None):
gene1=gene2=fusion
return gene1, gene2

def processsv(svdata, outfile, previousoutfile, defaultCancerType, cancerTypeMap, retainonlycuratedgenes, nameregex):
def processsv(svdata, outfile, previousoutfile, defaultCancerType, cancerTypeMap, nameregex):
if os.path.isfile(previousoutfile):
cacheannotated(previousoutfile, defaultCancerType, cancerTypeMap)
outf = open(outfile, 'w+')
Expand Down Expand Up @@ -690,27 +664,22 @@ def processsv(svdata, outfile, previousoutfile, defaultCancerType, cancerTypeMap
cancertype = get_tumor_type_from_row(row, i, defaultCancerType, icancertype, cancerTypeMap, sample)


if not retainonlycuratedgenes or gene1 in curatedgenes or gene2 in curatedgenes:
queries.append(StructuralVariantQuery(gene1, gene2, 'FUSION', cancertype))
rows.append(row)
queries.append(StructuralVariantQuery(gene1, gene2, 'FUSION', cancertype))
rows.append(row)

if len(queries) == POST_QUERIES_THRESHOLD:
annotations = pull_structural_variant_info(queries)
append_annotation_to_file(outf, newcols, rows, annotations)
queries = []
rows = []
else:
# Include default Gene in OncoKB and Variant in OncoKB
append_annotation_to_file(outf, newcols, [row],
[[GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]])
if len(queries) == POST_QUERIES_THRESHOLD:
annotations = pull_structural_variant_info(queries)
append_annotation_to_file(outf, newcols, rows, annotations)
queries = []
rows = []

if len(queries) > 0:
annotations = pull_structural_variant_info(queries)
append_annotation_to_file(outf, newcols, rows, annotations)
outf.close()


def processcnagisticdata(cnafile, outfile, previousoutfile, defaultCancerType, cancerTypeMap, retainonlycuratedgenes, annotate_gain_loss=False):
def processcnagisticdata(cnafile, outfile, previousoutfile, defaultCancerType, cancerTypeMap, annotate_gain_loss=False):
CNA_AMPLIFICATION_TXT = 'Amplification'
CNA_DELETION_TXT = 'Deletion'
CNA_LOSS_TXT = 'Loss'
Expand Down Expand Up @@ -784,19 +753,14 @@ def processcnagisticdata(cnafile, outfile, previousoutfile, defaultCancerType, c
if sample in cancerTypeMap:
cancertype = cancerTypeMap[sample]

if not retainonlycuratedgenes or hugo in curatedgenes:
rows.append([sample, cancertype, hugo, cna_type])
queries.append(CNAQuery(hugo, cna_type, cancertype))
rows.append([sample, cancertype, hugo, cna_type])
queries.append(CNAQuery(hugo, cna_type, cancertype))

if len(queries) == POST_QUERIES_THRESHOLD:
annotations = pull_cna_info(queries)
append_annotation_to_file(outf, ncols, rows, annotations)
rows = []
queries = []
else:
# Include Gene in OncoKB and Variant in OncoKB
append_annotation_to_file(outf, ncols, [[sample, cancertype, hugo, cna_type]],
[[GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]])
if len(queries) == POST_QUERIES_THRESHOLD:
annotations = pull_cna_info(queries)
append_annotation_to_file(outf, ncols, rows, annotations)
rows = []
queries = []

if len(queries) > 0:
annotations = pull_cna_info(queries)
Expand Down Expand Up @@ -1126,8 +1090,6 @@ def cacheannotated(annotatedfile, defaultCancerType, cancerTypeMap):
for row in reader:
try:
hugo = row[ihugo]
if hugo not in curatedgenes:
continue

hgvs = row[ihgvs]
if hgvs.startswith('p.'):
Expand Down
3 changes: 1 addition & 2 deletions CnaAnnotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,14 @@ def main(argv):
if argv.oncokb_api_url:
setoncokbbaseurl(argv.oncokb_api_url)
setoncokbapitoken(argv.oncokb_api_bearer_token)
getcuratedgenes()

cancertypemap = {}
if argv.input_clinical_file:
readCancerTypes(argv.input_clinical_file, cancertypemap)

log.info('annotating %s ...' % argv.input_file)
processcnagisticdata(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type,
cancertypemap, True, argv.annotate_gain_loss)
cancertypemap, argv.annotate_gain_loss)

log.info('done!')

Expand Down
3 changes: 1 addition & 2 deletions FusionAnnotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,14 @@ def main(argv):
if argv.oncokb_api_url:
setoncokbbaseurl(argv.oncokb_api_url)
setoncokbapitoken(argv.oncokb_api_bearer_token)
getcuratedgenes()

cancertypemap = {}
if argv.input_clinical_file:
readCancerTypes(argv.input_clinical_file, cancertypemap)

log.info('annotating %s ...' % argv.input_file)
processsv(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type,
cancertypemap, True, argv.structural_variant_name_format)
cancertypemap, argv.structural_variant_name_format)

log.info('done!')

Expand Down
3 changes: 1 addition & 2 deletions MafAnnotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def main(argv):
if argv.oncokb_api_url:
setoncokbbaseurl(argv.oncokb_api_url)
setoncokbapitoken(argv.oncokb_api_bearer_token)
getcuratedgenes()

cancertypemap = {}
if argv.input_clinical_file:
Expand All @@ -91,7 +90,7 @@ def main(argv):
raise

processalterationevents(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type,
cancertypemap, True, argv.annotate_hotspots, user_input_query_type, default_reference_genome)
cancertypemap, argv.annotate_hotspots, user_input_query_type, default_reference_genome)

log.info('done!')

Expand Down

0 comments on commit 997f16c

Please sign in to comment.