diff --git a/src/mkindex_algo.hpp b/src/mkindex_algo.hpp index 5e63190bd..60a44de36 100644 --- a/src/mkindex_algo.hpp +++ b/src/mkindex_algo.hpp @@ -580,15 +580,19 @@ auto parseAndStoreTaxTree(std::vector & taxIdIsPresent, LambdaIndexerOptio myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n"); - taxonNames[0] = "invalid"; + taxonNames[0] = "invalid"; + size_t taxaWithoutNameCount = 0; for (uint32_t i = 0; i < std::ranges::size(taxonNames); ++i) { if (taxIdIsPresentOrParent[i] && empty(taxonNames[i])) { std::cerr << "Warning: Taxon with ID " << i << " has no name associated, defaulting to \"n/a\".\n"; taxonNames[i] = "n/a"; + ++taxaWithoutNameCount; } } + if (taxaWithoutNameCount * 100 / taxonNames.size() > 10) + std::cerr << "Warning: More than 10% of taxa have no valid name entry.\n"; return ret; } diff --git a/src/search_algo.hpp b/src/search_algo.hpp index c00b2cf6e..ffe259643 100644 --- a/src/search_algo.hpp +++ b/src/search_algo.hpp @@ -296,7 +296,7 @@ void loadDbIndexFromDisk( myPrint(options, 2, " size of search space: ", searchSpaceSize, "\n"); bool const indexHasSTaxIDs = globalHolder.indexFile.sTaxIds.size() == globalHolder.indexFile.seqs.size(); myPrint(options, 2, " has taxonomic IDs: ", indexHasSTaxIDs, "\n"); - bool const indexHasTaxTree = globalHolder.indexFile.taxonNames.size() >= globalHolder.indexFile.seqs.size(); + bool const indexHasTaxTree = !globalHolder.indexFile.taxonNames.empty(); myPrint(options, 2, " has taxonomic tree: ", indexHasTaxTree, "\n"); myPrint(options, 2, "Runtime: ", finish, "s \n\n");