Skip to content

Commit

Permalink
Merge pull request sxs-collaboration#5207 from aasthabagree21/combine-h5
Browse files Browse the repository at this point in the history
  • Loading branch information
nilsvu authored Aug 23, 2023
2 parents 3d9f3e8 + 3a3a7cd commit 0ffcb78
Show file tree
Hide file tree
Showing 12 changed files with 589 additions and 128 deletions.
132 changes: 4 additions & 128 deletions src/Executables/CombineH5/CombineH5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "DataStructures/DataVector.hpp"
#include "IO/H5/AccessType.hpp"
#include "IO/H5/CheckH5PropertiesMatch.hpp"
#include "IO/H5/CombineH5.hpp"
#include "IO/H5/File.hpp"
#include "IO/H5/SourceArchive.hpp"
#include "IO/H5/VolumeData.hpp"
Expand All @@ -23,131 +24,6 @@
// main module we just have it be empty
extern "C" void CkRegisterMainModule(void) {}

namespace {
// Returns all the observation_ids stored in the volume files. Assumes all
// volume files have the same observation ids
std::vector<size_t> get_observation_ids(const std::string& file_prefix,
const std::string& subfile_name) {
const h5::H5File<h5::AccessType::ReadOnly> initial_file(file_prefix + "0.h5",
false);
const auto& initial_volume_file =
initial_file.get<h5::VolumeData>("/" + subfile_name);
return initial_volume_file.list_observation_ids();
}

// Returns total number of elements for an observation id across all volume data
// files
size_t get_number_of_elements(const std::vector<std::string>& input_filenames,
const std::string& subfile_name,
const size_t& observation_id) {
size_t total_elements = 0;
for (const auto& input_filename : input_filenames) {
const h5::H5File<h5::AccessType::ReadOnly> original_file(input_filename,
false);
const auto& original_volume_file =
original_file.get<h5::VolumeData>("/" + subfile_name);
total_elements += original_volume_file.get_extents(observation_id).size();
}
return total_elements;
}

void combine_h5(const std::string& file_prefix, const std::string& subfile_name,
const std::string& output) {
// Parses for and stores all input files to be looped over
const std::vector<std::string>& file_names =
file_system::glob(file_prefix + "[0-9]*.h5");
Parallel::printf("Processing files:\n%s\n",
std::string{MakeString{} << file_names}.c_str());

// Checks that volume data was generated with identical versions of SpECTRE
if (!h5::check_src_files_match(file_names)) {
ERROR(
"One or more of your files were found to have differing src.tar.gz "
"files, meaning that they may be from differing versions of SpECTRE.");
}

// Checks that volume data files contain the same observation ids
if (!h5::check_observation_ids_match(file_names, subfile_name)) {
ERROR(
"One or more of your files were found to have differing observation "
"ids, meaning they may be from different runs of your SpECTRE "
"executable or were corrupted.");
}

// Braces to specify scope for H5 file
{
// Instantiates the output file and the .vol subfile to be filled with the
// combined data later
Parallel::printf("Creating output file: %s0.h5\n", output.c_str());
h5::H5File<h5::AccessType::ReadWrite> new_file(output + "0.h5", true);
new_file.insert<h5::VolumeData>("/" + subfile_name + ".vol");
new_file.close_current_object();
} // End of scope for H5 file

// Obtains list of observation ids to loop over
const std::vector<size_t> observation_ids =
get_observation_ids(file_prefix, subfile_name);

// Loops over observation ids to write volume data by observation id
for (size_t obs_index = 0; obs_index < observation_ids.size(); ++obs_index) {
const size_t obs_id = observation_ids[obs_index];
// Pre-calculates size of vector to store element data and allocates
// corresponding memory
const size_t vector_dim =
get_number_of_elements(file_names, subfile_name, obs_id);
std::vector<ElementVolumeData> element_data;
element_data.reserve(vector_dim);

double obs_val = 0.0;
std::optional<std::vector<char>> serialized_domain{};
std::optional<std::vector<char>> serialized_functions_of_time{};

// Loops over input files to append element data into a single vector to be
// stored in a single H5
bool printed = false;
for (auto const& file_name : file_names) {
const h5::H5File<h5::AccessType::ReadOnly> original_file(file_name,
false);
const auto& original_volume_file =
original_file.get<h5::VolumeData>("/" + subfile_name);
obs_val = original_volume_file.get_observation_value(obs_id);
if (not printed) {
Parallel::printf(
"Processing obsevation ID %lo (%lo/%lo) with value %1.14e\n",
obs_id, obs_index, observation_ids.size(), obs_val);
printed = true;
}
Parallel::printf(" Processing file: %s\n", file_name.c_str());

serialized_domain = original_volume_file.get_domain(obs_id);
serialized_functions_of_time =
original_volume_file.get_functions_of_time(obs_id);

// Get vector of element data for this `obs_id` and `file_name`
std::vector<ElementVolumeData> data_by_element =
std::move(std::get<2>(original_volume_file.get_data_by_element(
obs_val * (1.0 - 4.0 * std::numeric_limits<double>::epsilon()),
obs_val * (1.0 + 4.0 * std::numeric_limits<double>::epsilon()),
std::nullopt)[0]));

// Append vector to total vector of element data for this `obs_id`
element_data.insert(element_data.end(),
std::make_move_iterator(data_by_element.begin()),
std::make_move_iterator(data_by_element.end()));
data_by_element.clear();
original_file.close_current_object();
}

h5::H5File<h5::AccessType::ReadWrite> new_file(output + "0.h5", true);
auto& new_volume_file = new_file.get<h5::VolumeData>("/" + subfile_name);
new_volume_file.write_volume_data(obs_id, obs_val, element_data,
serialized_domain,
serialized_functions_of_time);
new_file.close_current_object();
}
}
} // namespace

/*
* This executable is used for combining a series of HDF5 volume files into one
* continuous dataset to be stored in a single HDF5 volume file.
Expand Down Expand Up @@ -180,7 +56,7 @@ int main(int argc, char** argv) {
return 1;
}

combine_h5(vars["file_prefix"].as<std::string>(),
vars["subfile_name"].as<std::string>(),
vars["output"].as<std::string>());
h5::combine_h5(vars["file_prefix"].as<std::string>(),
vars["subfile_name"].as<std::string>(),
vars["output"].as<std::string>());
}
2 changes: 2 additions & 0 deletions src/IO/H5/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ spectre_target_sources(
PRIVATE
AccessType.cpp
CheckH5PropertiesMatch.cpp
CombineH5.cpp
Dat.cpp
EosTable.cpp
ExtendConnectivityHelpers.cpp
Expand All @@ -32,6 +33,7 @@ spectre_target_headers(
AccessType.hpp
CheckH5.hpp
CheckH5PropertiesMatch.hpp
CombineH5.hpp
Dat.hpp
EosTable.hpp
ExtendConnectivityHelpers.hpp
Expand Down
151 changes: 151 additions & 0 deletions src/IO/H5/CombineH5.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Distributed under the MIT License.
// See LICENSE.txt for details.

#include "IO/H5/CombineH5.hpp"

#include <boost/program_options.hpp>
#include <cstddef>
#include <cstdlib>
#include <iterator>
#include <string>
#include <vector>

#include "DataStructures/DataVector.hpp"
#include "IO/H5/AccessType.hpp"
#include "IO/H5/CheckH5PropertiesMatch.hpp"
#include "IO/H5/File.hpp"
#include "IO/H5/SourceArchive.hpp"
#include "IO/H5/VolumeData.hpp"
#include "Parallel/Printf.hpp"
#include "Utilities/FileSystem.hpp"
#include "Utilities/MakeString.hpp"
#include "Utilities/StdHelpers.hpp"

namespace {
// Returns all the observation_ids stored in the volume files. Assumes all
// volume files have the same observation ids
std::vector<size_t> get_observation_ids(const std::string& file_prefix,
const std::string& subfile_name) {
const h5::H5File<h5::AccessType::ReadOnly> initial_file(file_prefix + "0.h5",
false);
const auto& initial_volume_file =
initial_file.get<h5::VolumeData>("/" + subfile_name);
return initial_volume_file.list_observation_ids();
}

// Returns total number of elements for an observation id across all volume data
// files
size_t get_number_of_elements(const std::vector<std::string>& input_filenames,
const std::string& subfile_name,
const size_t& observation_id) {
size_t total_elements = 0;
for (const auto& input_filename : input_filenames) {
const h5::H5File<h5::AccessType::ReadOnly> original_file(input_filename,
false);
const auto& original_volume_file =
original_file.get<h5::VolumeData>("/" + subfile_name);
total_elements += original_volume_file.get_extents(observation_id).size();
}
return total_elements;
}
} //namespace
namespace h5 {

void combine_h5(const std::string& file_prefix, const std::string& subfile_name,
const std::string& output, const bool check_src) {
// Parses for and stores all input files to be looped over
const std::vector<std::string>& file_names =
file_system::glob(file_prefix + "[0-9]*.h5");
Parallel::printf("Processing files:\n%s\n",
std::string{MakeString{} << file_names}.c_str());

// Checks that volume data was generated with identical versions of SpECTRE
if (check_src){
if (!h5::check_src_files_match(file_names)) {
ERROR(
"One or more of your files were found to have differing src.tar.gz "
"files, meaning that they may be from differing versions of SpECTRE.");
}
}

// Checks that volume data files contain the same observation ids
if (!h5::check_observation_ids_match(file_names, subfile_name)) {
ERROR(
"One or more of your files were found to have differing observation "
"ids, meaning they may be from different runs of your SpECTRE "
"executable or were corrupted.");
}

// Braces to specify scope for H5 file
{
// Instantiates the output file and the .vol subfile to be filled with the
// combined data later
Parallel::printf("Creating output file: %s0.h5\n", output.c_str());
h5::H5File<h5::AccessType::ReadWrite> new_file(output + "0.h5", true);
new_file.insert<h5::VolumeData>("/" + subfile_name + ".vol");
new_file.close_current_object();
} // End of scope for H5 file

// Obtains list of observation ids to loop over
const std::vector<size_t> observation_ids =
get_observation_ids(file_prefix, subfile_name);

// Loops over observation ids to write volume data by observation id
for (size_t obs_index = 0; obs_index < observation_ids.size(); ++obs_index) {
const size_t obs_id = observation_ids[obs_index];
// Pre-calculates size of vector to store element data and allocates
// corresponding memory
const size_t vector_dim =
get_number_of_elements(file_names, subfile_name, obs_id);
std::vector<ElementVolumeData> element_data;
element_data.reserve(vector_dim);

double obs_val = 0.0;
std::optional<std::vector<char>> serialized_domain{};
std::optional<std::vector<char>> serialized_functions_of_time{};

// Loops over input files to append element data into a single vector to be
// stored in a single H5
bool printed = false;
for (auto const& file_name : file_names) {
const h5::H5File<h5::AccessType::ReadOnly> original_file(file_name,
false);
const auto& original_volume_file =
original_file.get<h5::VolumeData>("/" + subfile_name);
obs_val = original_volume_file.get_observation_value(obs_id);
if (not printed) {
Parallel::printf(
"Processing obsevation ID %lo (%lo/%lo) with value %1.14e\n",
obs_id, obs_index, observation_ids.size(), obs_val);
printed = true;
}
Parallel::printf(" Processing file: %s\n", file_name.c_str());

serialized_domain = original_volume_file.get_domain(obs_id);
serialized_functions_of_time =
original_volume_file.get_functions_of_time(obs_id);

// Get vector of element data for this `obs_id` and `file_name`
std::vector<ElementVolumeData> data_by_element =
std::move(std::get<2>(original_volume_file.get_data_by_element(
obs_val * (1.0 - 4.0 * std::numeric_limits<double>::epsilon()),
obs_val * (1.0 + 4.0 * std::numeric_limits<double>::epsilon()),
std::nullopt)[0]));

// Append vector to total vector of element data for this `obs_id`
element_data.insert(element_data.end(),
std::make_move_iterator(data_by_element.begin()),
std::make_move_iterator(data_by_element.end()));
data_by_element.clear();
original_file.close_current_object();
}

h5::H5File<h5::AccessType::ReadWrite> new_file(output + "0.h5", true);
auto& new_volume_file = new_file.get<h5::VolumeData>("/" + subfile_name);
new_volume_file.write_volume_data(obs_id, obs_val, element_data,
serialized_domain,
serialized_functions_of_time);
new_file.close_current_object();
}
}
} // namespace h5
13 changes: 13 additions & 0 deletions src/IO/H5/CombineH5.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Distributed under the MIT License.
// See LICENSE.txt for details.

#pragma once

#include <string>

namespace h5 {

void combine_h5(const std::string& file_prefix, const std::string& subfile_name,
const std::string& output, const bool check_src = true);

} // namespace h5
2 changes: 2 additions & 0 deletions src/IO/H5/Python/Bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <pybind11/pybind11.h>

#include "IO/H5/Python/CombineH5.hpp"
#include "IO/H5/Python/Dat.hpp"
#include "IO/H5/Python/File.hpp"
#include "IO/H5/Python/TensorData.hpp"
Expand All @@ -19,4 +20,5 @@ PYBIND11_MODULE(_Pybindings, m) { // NOLINT
py_bindings::bind_h5dat(m);
py_bindings::bind_h5vol(m);
py_bindings::bind_tensordata(m);
py_bindings::bind_h5combine(m);
}
3 changes: 3 additions & 0 deletions src/IO/H5/Python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@ spectre_python_add_module(
LIBRARY_NAME ${LIBRARY}
SOURCES
Bindings.cpp
CombineH5.cpp
Dat.cpp
File.cpp
TensorData.cpp
VolumeData.cpp
PYTHON_FILES
__init__.py
CombineH5.py
DeleteSubfiles.py
ExtendConnectivityData.py
ExtractDatFromH5.py
Expand All @@ -26,6 +28,7 @@ spectre_python_headers(
${LIBRARY}
INCLUDE_DIRECTORY ${CMAKE_SOURCE_DIR}/src
HEADERS
CombineH5.hpp
Dat.hpp
File.hpp
TensorData.hpp
Expand Down
20 changes: 20 additions & 0 deletions src/IO/H5/Python/CombineH5.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Distributed under the MIT License.
// See LICENSE.txt for details.

#include "IO/H5/Python/CombineH5.hpp"

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <string>

#include "IO/H5/CombineH5.hpp"

namespace py = pybind11;

namespace py_bindings {
void bind_h5combine(py::module& m) {
// Wrapper for combining h5 files
m.def("combine_h5", &h5::combine_h5, py::arg("file_prefix"),
py::arg("subfile_name"), py::arg("output"), py::arg("check_src"));
}
} // namespace py_bindings
Loading

0 comments on commit 0ffcb78

Please sign in to comment.