From 3ed737c06616d96c61b1b8534737076e1c99812b Mon Sep 17 00:00:00 2001 From: Martin Steinegger Date: Thu, 29 Aug 2024 23:48:23 +0900 Subject: [PATCH] Add --tmscore-threshold-mode to allow to switch normalization --- src/commons/LocalParameters.cpp | 5 +++++ src/commons/LocalParameters.h | 6 ++++++ src/commons/TMaligner.cpp | 14 +++++++++++++- src/commons/TMaligner.h | 2 ++ src/strucclustutils/aln2tmscore.cpp | 3 ++- src/strucclustutils/structurealign.cpp | 2 +- src/strucclustutils/structurerescorediagonal.cpp | 2 +- 7 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/commons/LocalParameters.cpp b/src/commons/LocalParameters.cpp index 5483966b..ca0319fd 100644 --- a/src/commons/LocalParameters.cpp +++ b/src/commons/LocalParameters.cpp @@ -10,6 +10,7 @@ LocalParameters::LocalParameters() : Parameters(), PARAM_PREF_MODE(PARAM_PREF_MODE_ID,"--prefilter-mode", "Prefilter mode", "prefilter mode: 0: kmer/ungapped 1: ungapped, 2: nofilter",typeid(int), (void *) &prefMode, "^[0-2]{1}$"), PARAM_TMSCORE_THRESHOLD(PARAM_TMSCORE_THRESHOLD_ID,"--tmscore-threshold", "TMscore threshold", "accept alignments with a tmsore > thr [0.0,1.0]",typeid(float), (void *) &tmScoreThr, "^0(\\.[0-9]+)?|1(\\.0+)?$"), + PARAM_TMSCORE_THRESHOLD_MODE(PARAM_TMSCORE_THRESHOLD_MODE_ID,"--tmscore-threshold-mode", "TMscore threshold mode", "0: alignment, 1: query 2: target length",typeid(int), (void *) &tmScoreThrMode, "^[0-2]{1}$"), PARAM_TMALIGN_HIT_ORDER(PARAM_TMALIGN_HIT_ORDER_ID,"--tmalign-hit-order", "TMalign hit order", "order hits by 0: (qTM+tTM)/2, 1: qTM, 2: tTM, 3: min(qTM,tTM) 4: max(qTM,tTM)",typeid(int), (void *) &tmAlignHitOrder, "^[0-4]{1}$"), PARAM_LDDT_THRESHOLD(PARAM_LDDT_THRESHOLD_ID,"--lddt-threshold", "LDDT threshold", "accept alignments with a lddt > thr [0.0,1.0]",typeid(float), (void *) &lddtThr, "^0(\\.[0-9]+)?|1(\\.0+)?$"), PARAM_SORT_BY_STRUCTURE_BITS(PARAM_SORT_BY_STRUCTURE_BITS_ID,"--sort-by-structure-bits", "Sort by structure bit score", "sort by bits*sqrt(alnlddt*alntmscore)",typeid(int), (void *) &sortByStructureBits, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN | MMseqsParameter::COMMAND_EXPERT), @@ -106,6 +107,7 @@ LocalParameters::LocalParameters() : tmalign.push_back(&PARAM_ADD_BACKTRACE); tmalign.push_back(&PARAM_INCLUDE_IDENTITY); tmalign.push_back(&PARAM_TMSCORE_THRESHOLD); + tmalign.push_back(&PARAM_TMSCORE_THRESHOLD_MODE); tmalign.push_back(&PARAM_TMALIGN_HIT_ORDER); tmalign.push_back(&PARAM_TMALIGN_FAST); tmalign.push_back(&PARAM_PRELOAD_MODE); @@ -114,11 +116,13 @@ LocalParameters::LocalParameters() : structurerescorediagonal.push_back(&PARAM_EXACT_TMSCORE); structurerescorediagonal.push_back(&PARAM_TMSCORE_THRESHOLD); + structurerescorediagonal.push_back(&PARAM_TMSCORE_THRESHOLD_MODE); structurerescorediagonal.push_back(&PARAM_LDDT_THRESHOLD); structurerescorediagonal.push_back(&PARAM_ALIGNMENT_TYPE); structurerescorediagonal = combineList(structurerescorediagonal, align); structurealign.push_back(&PARAM_TMSCORE_THRESHOLD); + structurealign.push_back(&PARAM_TMSCORE_THRESHOLD_MODE); structurealign.push_back(&PARAM_LDDT_THRESHOLD); structurealign.push_back(&PARAM_SORT_BY_STRUCTURE_BITS); structurealign.push_back(&PARAM_ALIGNMENT_TYPE); @@ -216,6 +220,7 @@ LocalParameters::LocalParameters() : prefMode = PREF_MODE_KMER; alignmentType = ALIGNMENT_TYPE_3DI_AA; tmScoreThr = 0.0; + tmScoreThrMode = TMSCORE_THRESHOLD_MODE_ALIGNMENT; tmAlignHitOrder = TMALIGN_HIT_ORDER_AVG; lddtThr = 0.0; evalThr = 10; diff --git a/src/commons/LocalParameters.h b/src/commons/LocalParameters.h index faff6af8..c7734b68 100644 --- a/src/commons/LocalParameters.h +++ b/src/commons/LocalParameters.h @@ -32,6 +32,10 @@ class LocalParameters : public Parameters { static const int ALIGNMENT_TYPE_TMALIGN = 1; static const int ALIGNMENT_TYPE_3DI_AA = 2; + static const int TMSCORE_THRESHOLD_MODE_ALIGNMENT = 0; + static const int TMSCORE_THRESHOLD_MODE_QUERY = 1; + static const int TMSCORE_THRESHOLD_MODE_TARGET = 2; + static const int PREF_MODE_KMER = 0; static const int PREF_MODE_UNGAPPED = 1; static const int PREF_MODE_EXHAUSTIVE = 2; @@ -104,6 +108,7 @@ class LocalParameters : public Parameters { PARAMETER(PARAM_PREF_MODE) PARAMETER(PARAM_TMSCORE_THRESHOLD) + PARAMETER(PARAM_TMSCORE_THRESHOLD_MODE) PARAMETER(PARAM_TMALIGN_HIT_ORDER) PARAMETER(PARAM_LDDT_THRESHOLD) PARAMETER(PARAM_SORT_BY_STRUCTURE_BITS) @@ -131,6 +136,7 @@ class LocalParameters : public Parameters { int prefMode; float tmScoreThr; + int tmScoreThrMode; int tmAlignHitOrder; float lddtThr; int sortByStructureBits; diff --git a/src/commons/TMaligner.cpp b/src/commons/TMaligner.cpp index 03bd31d7..9e58518c 100644 --- a/src/commons/TMaligner.cpp +++ b/src/commons/TMaligner.cpp @@ -8,6 +8,7 @@ #include #include "StructureSmithWaterman.h" #include "StructureSmithWaterman.h" +#include "LocalParameters.h" TMaligner::TMaligner(unsigned int maxSeqLen, bool tmAlignFast, bool tmScoreOnly, bool computeExactScore) : tmAlignFast(tmAlignFast), @@ -323,4 +324,15 @@ Matcher::result_t TMaligner::align(unsigned int dbKey, float *x, float *y, float float qCov = StructureSmithWaterman::computeCov(shiftQ, queryLen-endQ-1, queryLen); float tCov = StructureSmithWaterman::computeCov(shiftT, targetLen-endT-1, targetLen); return Matcher::result_t(dbKey, TM_0*100000 , qCov, tCov, seqId, TM2, backtrace.length(), shiftQ, queryLen-endQ-1, queryLen, shiftT, targetLen-endT-1, targetLen, Matcher::compressAlignment(backtrace)); -} \ No newline at end of file +} + +unsigned int TMaligner::normalization(int mode, unsigned int alignmentLen, unsigned int queryLen, unsigned int targetLen) { + if(mode == LocalParameters::TMSCORE_THRESHOLD_MODE_ALIGNMENT){ + return alignmentLen; + } else if(mode == LocalParameters::TMSCORE_THRESHOLD_MODE_QUERY){ + return queryLen; + } else if(mode == LocalParameters::TMSCORE_THRESHOLD_MODE_TARGET){ + return targetLen; + } + return 0; +} diff --git a/src/commons/TMaligner.h b/src/commons/TMaligner.h index 2d1488bc..9c99fe40 100644 --- a/src/commons/TMaligner.h +++ b/src/commons/TMaligner.h @@ -43,6 +43,8 @@ class TMaligner{ Matcher::result_t align(unsigned int dbKey, float *target_x, float *target_y, float *target_z, char * targetSeq, unsigned int targetLen, float &TM); + static unsigned int normalization(int mode, unsigned int alignmentLen, unsigned int queryLen, unsigned int targetLen); + private: AffineNeedlemanWunsch * affineNW; std::string backtrace; diff --git a/src/strucclustutils/aln2tmscore.cpp b/src/strucclustutils/aln2tmscore.cpp index fbe849a8..43b6b2ca 100644 --- a/src/strucclustutils/aln2tmscore.cpp +++ b/src/strucclustutils/aln2tmscore.cpp @@ -102,7 +102,8 @@ int aln2tmscore(int argc, const char **argv, const Command& command) { // Matching residue index collection TMaligner::TMscoreResult tmres = tmaln.computeTMscore(tdata, &tdata[targetLen], &tdata[targetLen + targetLen], targetLen, - res.qStartPos, res.dbStartPos, res.backtrace, res.backtrace.size()); + res.qStartPos, res.dbStartPos, res.backtrace, + TMaligner::normalization(par.tmScoreThrMode, res.backtrace.size(), res.qLen, res.dbLen)); //std::cout << TMalnScore << std::endl; resultsStr.append(SSTR(dbKey)); resultsStr.push_back(' '); diff --git a/src/strucclustutils/structurealign.cpp b/src/strucclustutils/structurealign.cpp index c259d7ec..c059b030 100644 --- a/src/strucclustutils/structurealign.cpp +++ b/src/strucclustutils/structurealign.cpp @@ -388,7 +388,7 @@ int structurealign(int argc, const char **argv, const Command& command) { res.qStartPos, res.dbStartPos, res.backtrace, - res.backtrace.size()); + TMaligner::normalization(par.tmScoreThrMode, res.backtrace.size(), res.qLen, res.dbLen)); if (tmres.tmscore < par.tmScoreThr) { continue; } diff --git a/src/strucclustutils/structurerescorediagonal.cpp b/src/strucclustutils/structurerescorediagonal.cpp index 0d49e0d3..434533e6 100644 --- a/src/strucclustutils/structurerescorediagonal.cpp +++ b/src/strucclustutils/structurerescorediagonal.cpp @@ -344,7 +344,7 @@ int structureungappedalign(int argc, const char **argv, const Command& command) float* targetCaData = tcoords.read(tcadata, res.dbLen, tCaLength); TMaligner::TMscoreResult tmres = tmaligner->computeTMscore(targetCaData, &targetCaData[res.dbLen], &targetCaData[res.dbLen+res.dbLen], res.dbLen, res.qStartPos, res.dbStartPos, Matcher::uncompressAlignment(res.backtrace), - res.backtrace.size()); + TMaligner::normalization(par.tmScoreThrMode, res.backtrace.size(), res.qLen, res.dbLen)); if(tmres.tmscore < par.tmScoreThr){ continue; }