From b16b2d06c8263351fbe0755d3447ddfbe224615c Mon Sep 17 00:00:00 2001
From: Hannes Hauswedell
Date: Mon, 8 Jan 2024 14:56:53 +0100
Subject: [PATCH] [fix] check presence of taxtree in index
---
src/mkindex_algo.hpp | 6 +++++-
src/search_algo.hpp | 2 +-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/mkindex_algo.hpp b/src/mkindex_algo.hpp
index 5e63190bd..c26851889 100644
--- a/src/mkindex_algo.hpp
+++ b/src/mkindex_algo.hpp
@@ -580,15 +580,19 @@ auto parseAndStoreTaxTree(std::vector & taxIdIsPresent, LambdaIndexerOptio
myPrint(options, 1, "done.\n");
myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n");
- taxonNames[0] = "invalid";
+ taxonNames[0] = "invalid";
+ size_t taxaWithoutNameCount = 0;
for (uint32_t i = 0; i < std::ranges::size(taxonNames); ++i)
{
if (taxIdIsPresentOrParent[i] && empty(taxonNames[i]))
{
std::cerr << "Warning: Taxon with ID " << i << " has no name associated, defaulting to \"n/a\".\n";
taxonNames[i] = "n/a";
+ ++taxaWithoutNameCount;
}
}
+ if (taxaWithoutNameCount * 10 > taxonNames.size())
+ std::cerr << "Warning: More than 10% of taxa have no valid name entry.\n";
return ret;
}
diff --git a/src/search_algo.hpp b/src/search_algo.hpp
index c00b2cf6e..ffe259643 100644
--- a/src/search_algo.hpp
+++ b/src/search_algo.hpp
@@ -296,7 +296,7 @@ void loadDbIndexFromDisk(
myPrint(options, 2, " size of search space: ", searchSpaceSize, "\n");
bool const indexHasSTaxIDs = globalHolder.indexFile.sTaxIds.size() == globalHolder.indexFile.seqs.size();
myPrint(options, 2, " has taxonomic IDs: ", indexHasSTaxIDs, "\n");
- bool const indexHasTaxTree = globalHolder.indexFile.taxonNames.size() >= globalHolder.indexFile.seqs.size();
+ bool const indexHasTaxTree = !globalHolder.indexFile.taxonNames.empty();
myPrint(options, 2, " has taxonomic tree: ", indexHasTaxTree, "\n");
myPrint(options, 2, "Runtime: ", finish, "s \n\n");