From b16b2d06c8263351fbe0755d3447ddfbe224615c Mon Sep 17 00:00:00 2001 From: Hannes Hauswedell Date: Mon, 8 Jan 2024 14:56:53 +0100 Subject: [PATCH] [fix] check presence of taxtree in index --- src/mkindex_algo.hpp | 6 +++++- src/search_algo.hpp | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mkindex_algo.hpp b/src/mkindex_algo.hpp index 5e63190bd..c26851889 100644 --- a/src/mkindex_algo.hpp +++ b/src/mkindex_algo.hpp @@ -580,15 +580,19 @@ auto parseAndStoreTaxTree(std::vector & taxIdIsPresent, LambdaIndexerOptio myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n"); - taxonNames[0] = "invalid"; + taxonNames[0] = "invalid"; + size_t taxaWithoutNameCount = 0; for (uint32_t i = 0; i < std::ranges::size(taxonNames); ++i) { if (taxIdIsPresentOrParent[i] && empty(taxonNames[i])) { std::cerr << "Warning: Taxon with ID " << i << " has no name associated, defaulting to \"n/a\".\n"; taxonNames[i] = "n/a"; + ++taxaWithoutNameCount; } } + if (taxaWithoutNameCount * 10 > taxonNames.size()) + std::cerr << "Warning: More than 10% of taxa have no valid name entry.\n"; return ret; } diff --git a/src/search_algo.hpp b/src/search_algo.hpp index c00b2cf6e..ffe259643 100644 --- a/src/search_algo.hpp +++ b/src/search_algo.hpp @@ -296,7 +296,7 @@ void loadDbIndexFromDisk( myPrint(options, 2, " size of search space: ", searchSpaceSize, "\n"); bool const indexHasSTaxIDs = globalHolder.indexFile.sTaxIds.size() == globalHolder.indexFile.seqs.size(); myPrint(options, 2, " has taxonomic IDs: ", indexHasSTaxIDs, "\n"); - bool const indexHasTaxTree = globalHolder.indexFile.taxonNames.size() >= globalHolder.indexFile.seqs.size(); + bool const indexHasTaxTree = !globalHolder.indexFile.taxonNames.empty(); myPrint(options, 2, " has taxonomic tree: ", indexHasTaxTree, "\n"); myPrint(options, 2, "Runtime: ", finish, "s \n\n");