From 06d207b38a55a41dfb221294c353fee03eb8c428 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Tue, 19 Nov 2024 18:33:58 +0100 Subject: [PATCH 1/4] Add support for reading several rntuple files --- include/podio/RNTupleReader.h | 10 +++++--- src/RNTupleReader.cc | 46 +++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index 36248c5ca..97177ee71 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -133,10 +133,10 @@ class RNTupleReader { /** * Read and reconstruct the generic parameters of the Frame */ - GenericParameters readEventMetaData(const std::string& name, unsigned entNum); + GenericParameters readEventMetaData(const std::string& name, unsigned localEntry, unsigned readerIndex); template - void readParams(const std::string& name, unsigned entNum, GenericParameters& params); + void readParams(const std::string& name, unsigned entNum, unsigned readerIndex, GenericParameters& params); std::unique_ptr m_metadata{}; @@ -147,7 +147,11 @@ class RNTupleReader { std::unordered_map> m_metadata_readers{}; std::vector m_filenames{}; - std::unordered_map m_entries{}; + std::unordered_map m_entries{}; + // Map category to a vector that contains at how many entries each reader starts + // For example, if we have 3 readers and the first one has 10 entries, the second one 20 and the third one 30 + // then the vector will be {0, 10, 30, 60} + std::unordered_map> m_readerEntries; std::unordered_map m_totalEntries{}; struct CollectionInfo { diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index 6e0afe446..c2ef67a21 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -8,25 +8,27 @@ #include +#include #include namespace podio { template -void RNTupleReader::readParams(const std::string& name, unsigned entNum, GenericParameters& params) { - auto keyView = m_readers[name][0]->GetView>(root_utils::getGPKeyName()); - auto valueView = m_readers[name][0]->GetView>>(root_utils::getGPValueName()); +void RNTupleReader::readParams(const std::string& name, unsigned localEntry, unsigned readerIndex, + GenericParameters& params) { + auto keyView = m_readers[name][readerIndex]->GetView>(root_utils::getGPKeyName()); + auto valueView = m_readers[name][readerIndex]->GetView>>(root_utils::getGPValueName()); - params.loadFrom(keyView(entNum), valueView(entNum)); + params.loadFrom(keyView(localEntry), valueView(localEntry)); } -GenericParameters RNTupleReader::readEventMetaData(const std::string& name, unsigned entNum) { +GenericParameters RNTupleReader::readEventMetaData(const std::string& name, unsigned localEntry, unsigned readerIndex) { GenericParameters params; - readParams(name, entNum, params); - readParams(name, entNum, params); - readParams(name, entNum, params); - readParams(name, entNum, params); + readParams(name, localEntry, readerIndex, params); + readParams(name, localEntry, readerIndex, params); + readParams(name, localEntry, readerIndex, params); + readParams(name, localEntry, readerIndex, params); return params; } @@ -101,15 +103,20 @@ void RNTupleReader::openFiles(const std::vector& filenames) { unsigned RNTupleReader::getEntries(const std::string& name) { if (m_readers.find(name) == m_readers.end()) { + m_readerEntries[name].reserve(m_filenames.size() + 1); + m_readerEntries[name].push_back(0); for (auto& filename : m_filenames) { try { m_readers[name].emplace_back(ROOT::Experimental::RNTupleReader::Open(name, filename)); - } catch (const ROOT::Experimental::RException& e) { + m_readerEntries[name].push_back(m_readerEntries[name].back() + m_readers[name].back()->GetNEntries()); + + } catch (const ROOT::Experimental::RException&) { std::cout << "Category " << name << " not found in file " << filename << std::endl; } } - m_totalEntries[name] = std::accumulate(m_readers[name].begin(), m_readers[name].end(), 0, - [](int total, auto& reader) { return total + reader->GetNEntries(); }); + m_totalEntries[name] = m_readerEntries[name].back(); + // The last entry is not needed since it's the total number of entries + m_readerEntries[name].pop_back(); } return m_totalEntries[name]; } @@ -143,15 +150,22 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ m_entries[category] = entNum + 1; + // m_readerEntries contains the accumulated entries for all the readers + // therefore, the first number that is lower or equal to the entry number + // is the index of the reader that contains the entry + auto upper = std::ranges::upper_bound(m_readerEntries[category], entNum); + auto localEntry = entNum - *(upper - 1); + auto readerIndex = upper - m_readerEntries[category].begin() - 1; + ROOTFrameData::BufferMap buffers; #if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) // We need to create a non-bare entry here, because the entries for the // parameters are not explicitly (re)set and we need them default initialized. // In principle we would only need a bare entry for the collection data, since // we set all the fields there in any case. - auto dentry = m_readers[category][0]->GetModel().CreateEntry(); + auto dentry = m_readers[category][readerIndex]->GetModel().CreateEntry(); #else - auto dentry = m_readers[category][0]->GetModel()->GetDefaultEntry(); + auto dentry = m_readers[category][readerIndex]->GetModel()->GetDefaultEntry(); #endif for (size_t i = 0; i < m_collectionInfo[category].id.size(); ++i) { @@ -211,9 +225,9 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ buffers.emplace(m_collectionInfo[category].name[i], std::move(collBuffers)); } - m_readers[category][0]->LoadEntry(entNum, *dentry); + m_readers[category][readerIndex]->LoadEntry(localEntry, *dentry); - auto parameters = readEventMetaData(category, entNum); + auto parameters = readEventMetaData(category, localEntry, readerIndex); return std::make_unique(std::move(buffers), m_idTables[category], std::move(parameters)); } From f5ba504e7fdd1800b349599b14d7543b440d6310 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Tue, 19 Nov 2024 18:58:25 +0100 Subject: [PATCH 2/4] Change comment and the order of operations to make it more obvious --- src/RNTupleReader.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index c2ef67a21..db5ab4dee 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -152,10 +152,10 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ // m_readerEntries contains the accumulated entries for all the readers // therefore, the first number that is lower or equal to the entry number - // is the index of the reader that contains the entry + // is at the index of the reader that contains the entry auto upper = std::ranges::upper_bound(m_readerEntries[category], entNum); auto localEntry = entNum - *(upper - 1); - auto readerIndex = upper - m_readerEntries[category].begin() - 1; + auto readerIndex = upper - 1 - m_readerEntries[category].begin(); ROOTFrameData::BufferMap buffers; #if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) From 0c3935b5dace47e48999dc4a6df1066033ec9d26 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Thu, 28 Nov 2024 20:08:58 +0100 Subject: [PATCH 3/4] Initialize m_readerEntries --- include/podio/RNTupleReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index 97177ee71..d72cb9eb5 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -151,7 +151,7 @@ class RNTupleReader { // Map category to a vector that contains at how many entries each reader starts // For example, if we have 3 readers and the first one has 10 entries, the second one 20 and the third one 30 // then the vector will be {0, 10, 30, 60} - std::unordered_map> m_readerEntries; + std::unordered_map> m_readerEntries{}; std::unordered_map m_totalEntries{}; struct CollectionInfo { From 3be9333f50835157c68b15e6dab9c8e9366b4311 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Thu, 28 Nov 2024 20:22:21 +0100 Subject: [PATCH 4/4] Improve comment --- include/podio/RNTupleReader.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index d72cb9eb5..a6e106176 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -150,7 +150,8 @@ class RNTupleReader { std::unordered_map m_entries{}; // Map category to a vector that contains at how many entries each reader starts // For example, if we have 3 readers and the first one has 10 entries, the second one 20 and the third one 30 - // then the vector will be {0, 10, 30, 60} + // then the vector will be {0, 10, 30} + // 60 is not needed because anything after 30 will be in the last reader std::unordered_map> m_readerEntries{}; std::unordered_map m_totalEntries{};