From f2c33e8e69080550a7e6fcbe20cf23a473774d6b Mon Sep 17 00:00:00 2001 From: Jaebeom Kim Date: Tue, 23 Jan 2024 14:21:45 +0900 Subject: [PATCH] new parameter: --tie-ratio --- src/commons/LocalParameters.cpp | 8 ++++++++ src/commons/LocalParameters.h | 2 ++ src/commons/Taxonomer.cpp | 5 +++-- src/commons/Taxonomer.h | 1 + src/workflow/classify.cpp | 1 + 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/commons/LocalParameters.cpp b/src/commons/LocalParameters.cpp index c5e0ca2c..f4e3843c 100644 --- a/src/commons/LocalParameters.cpp +++ b/src/commons/LocalParameters.cpp @@ -150,6 +150,13 @@ LocalParameters::LocalParameters() : typeid(int), (void *) &minSSMatch, "^[0-9]+$"), + TIE_RATIO(TIE_RATIO_ID, + "--tie-ratio", + "Best * --tie-ratio is considered as a tie", + "Best * --tie-ratio is considered as a tie", + typeid(float), + (void *) &tieRatio, + "^0(\\.[0-9]+)?|1(\\.0+)?$"), LIBRARY_PATH(LIBRARY_PATH_ID, "--library-path", "Path to library where the FASTA files are stored", @@ -314,6 +321,7 @@ LocalParameters::LocalParameters() : classify.push_back(&RAM_USAGE); classify.push_back(&MATCH_PER_KMER); classify.push_back(&ACCESSION_LEVEL); + classify.push_back(&TIE_RATIO); // classify.push_back(&MIN_SS_MATCH); // filter diff --git a/src/commons/LocalParameters.h b/src/commons/LocalParameters.h index 676fbe23..a15a9b55 100644 --- a/src/commons/LocalParameters.h +++ b/src/commons/LocalParameters.h @@ -56,6 +56,7 @@ class LocalParameters : public Parameters { PARAMETER(MIN_CONS_CNT_EUK) PARAMETER(MATCH_PER_KMER) PARAMETER(MIN_SS_MATCH) + PARAMETER(TIE_RATIO) // DB build parameters PARAMETER(LIBRARY_PATH) @@ -104,6 +105,7 @@ class LocalParameters : public Parameters { int minConsCntEuk; int matchPerKmer; int minSSMatch; + float tieRatio; // Database creation std::string tinfoPath; diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp index 63fb5731..11fab177 100644 --- a/src/commons/Taxonomer.cpp +++ b/src/commons/Taxonomer.cpp @@ -27,6 +27,7 @@ Taxonomer::Taxonomer(const LocalParameters &par, NcbiTaxonomy *taxonomy) : taxon minConsCnt = par.minConsCnt; minConsCntEuk = par.minConsCntEuk; eukaryotaTaxId = par.eukaryotaTaxId; + tieRatio = par.tieRatio; if (par.seqMode == 1 || par.seqMode == 2) { denominator = 100; @@ -300,7 +301,7 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector & speciesMatches, vector maxSpecies; for (auto & spScore : species2score) { - if (spScore.second > bestSpScore * 0.95) { + if (spScore.second >= bestSpScore * tieRatio) { maxSpecies.push_back(spScore.first); } } @@ -397,7 +398,7 @@ TaxonScore Taxonomer::getBestSpeciesMatches(vector & speciesMatches, } vector maxSpecies; for (auto & spScore : species2score) { - if (spScore.second > bestSpScore * 0.95) { + if (spScore.second >= bestSpScore * tieRatio) { maxSpecies.push_back(spScore.first); } } diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h index 0279fe38..67cd12cc 100644 --- a/src/commons/Taxonomer.h +++ b/src/commons/Taxonomer.h @@ -61,6 +61,7 @@ class Taxonomer { int minConsCnt; int minConsCntEuk; int eukaryotaTaxId; + float tieRatio; // Internal int denominator; diff --git a/src/workflow/classify.cpp b/src/workflow/classify.cpp index 460bab34..207f4c54 100644 --- a/src/workflow/classify.cpp +++ b/src/workflow/classify.cpp @@ -25,6 +25,7 @@ void setClassifyDefaults(LocalParameters & par){ par.maskProb = 0.9; par.matchPerKmer = 4; par.accessionLevel = 0; + par.tieRatio = 0.95; } int classify(int argc, const char **argv, const Command& command)