From bdf94c777ba64b35c5087f3d0647be9eee6a68bf Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Tue, 13 Aug 2024 18:45:16 +0200 Subject: [PATCH] Keep only 100% identity matches --- microSALT/utils/scraper.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/microSALT/utils/scraper.py b/microSALT/utils/scraper.py index 00782a8..fade756 100644 --- a/microSALT/utils/scraper.py +++ b/microSALT/utils/scraper.py @@ -409,12 +409,18 @@ def scrape_blast(self, type="", file_list=[]): pass ind += 1 + hypo_filtered = [] + + for h in hypo: + if float(h["identity"]) == 100.0 and float(h["evalue"]) == 0.0: + hypo_filtered.append(h) + self.logger.info( "{} {} hits were added after removing overlaps and duplicate hits".format( - len(hypo), type + len(hypo_filtered), type ) ) - for hit in hypo: + for hit in hypo_filtered: self.logger.debug( "Kept {}:{} with span {} and id {}".format( hit.get("loci"), @@ -438,7 +444,7 @@ def scrape_blast(self, type="", file_list=[]): self.name, str(e) ) ) - return hypo + return hypo_filtered def load_resistances(self): """Legacy function, loads common resistance names for genes from notes file"""