Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into pgrete/user-output
Browse files Browse the repository at this point in the history
  • Loading branch information
pgrete committed Jul 18, 2024
2 parents 7f00e54 + 8db87fb commit bdb997a
Show file tree
Hide file tree
Showing 147 changed files with 5,478 additions and 1,183 deletions.
76 changes: 76 additions & 0 deletions .github/workflows/ci-extended.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ env:
CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
MACHINE_CFG: cmake/machinecfg/CI.cmake
OMPI_MCA_mpi_common_cuda_event_max: 1000
# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
OMPI_MCA_btl_vader_single_copy_mechanism: none

jobs:
perf-and-regression:
Expand All @@ -44,10 +46,20 @@ jobs:
path: tst/regression/gold_standard/
key: gold-standard

- name: Set vars based on matrix
id: cmake-vars
run: |
if ${{ matrix.device == 'host' }}; then
echo "enable_asan=ON" >> $GITHUB_OUTPUT
else
echo "enable_asan=OFF" >> $GITHUB_OUTPUT
fi
- name: Configure
run: |
cmake -B build \
-DCMAKE_BUILD_TYPE=Release \
-DENABLE_ASAN=${{ steps.cmake-vars.outputs.enable_asan }} \
-DMACHINE_VARIANT=${{ matrix.device }}-${{ matrix.parallel }}
- name: Build
Expand All @@ -60,6 +72,10 @@ jobs:
cd build
# Pick GPU with most available memory
export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk '{ print $NF }')
# Sanitizers options (leak detection is disabled)
export ASAN_OPTIONS=abort_on_error=1:fast_unwind_on_malloc=1
export UBSAN_OPTIONS=print_stacktrace=0
export LSAN_OPTIONS=detect_leaks=0
ctest -L performance -LE perf-reg
# run regression tests
Expand All @@ -68,6 +84,10 @@ jobs:
cd build
# Pick GPU with most available memory
export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk '{ print $NF }')
# Sanitizers options (disable leak detection for MPI runs, due to OpenMPI leaks)
export ASAN_OPTIONS=abort_on_error=1:fast_unwind_on_malloc=1
export UBSAN_OPTIONS=print_stacktrace=0
export LSAN_OPTIONS=detect_leaks=0
ctest -L regression -L ${{ matrix.parallel }} -LE perf-reg --timeout 3600
# Test Ascent integration (only most complex setup with MPI and on device)
Expand Down Expand Up @@ -103,3 +123,59 @@ jobs:
example/advection/ascent_render_57.png
retention-days: 3

perf-and-regression-amdgpu:
strategy:
matrix:
parallel: ['serial', 'mpi']
runs-on: [self-hosted, navi1030]
container:
image: ghcr.io/parthenon-hpc-lab/rocm5.4.3-mpi-hdf5
# Map to local user id on CI machine to allow writing to build cache and
# forward device handles to access AMD GPU within container
options: --user 1000 -w /home/ci --device /dev/kfd --device /dev/dri --security-opt seccomp=unconfined
env:
CMAKE_GENERATOR: Ninja
CMAKE_BUILD_PARALLEL_LEVEL: 8 # num threads for build
steps:
- uses: actions/checkout@v3
with:
submodules: 'true'

- name: Setup cache for gold standard
uses: actions/cache@v3
with:
path: tst/regression/gold_standard/
key: gold-standard

- name: Configure
run: |
cmake -B build \
-DMACHINE_CFG=${PWD}/cmake/machinecfg/GitHubActions.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMACHINE_VARIANT=hip-${{ matrix.parallel }} \
-DCMAKE_CXX_COMPILER=hipcc
- name: Build
run: cmake --build build

# run performance "unit" tests (none use MPI)
- name: Performance tests
if: ${{ matrix.parallel == 'serial' }}
run: |
cd build
ctest -L performance -LE perf-reg
# run regression tests
- name: Regression tests
run: |
cd build
ctest -L regression -L ${{ matrix.parallel }} -LE perf-reg --timeout 3600
- uses: actions/upload-artifact@v3
with:
name: log-and-convergence-${{ matrix.parallel }}
path: |
build/CMakeFiles/CMakeOutput.log
build/tst/regression/outputs/advection_convergence*/advection-errors.dat
build/tst/regression/outputs/advection_convergence*/advection-errors.png
retention-days: 3
43 changes: 43 additions & 0 deletions .github/workflows/ci-short.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ env:
CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
MACHINE_CFG: cmake/machinecfg/CI.cmake
OMPI_MCA_mpi_common_cuda_event_max: 1000
# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
OMPI_MCA_btl_vader_single_copy_mechanism: none

jobs:
style:
Expand Down Expand Up @@ -130,3 +132,44 @@ jobs:
build/profile.txt
retention-days: 3

integration-amdgpu:
runs-on: [self-hosted, navi1030]
container:
image: ghcr.io/parthenon-hpc-lab/rocm5.4.3-mpi-hdf5
# Map to local user id on CI machine to allow writing to build cache and
# forward device handles to access AMD GPU within container
options: --user 1000 -w /home/ci --device /dev/kfd --device /dev/dri --security-opt seccomp=unconfined
env:
CMAKE_GENERATOR: Ninja
CMAKE_BUILD_PARALLEL_LEVEL: 8 # num threads for build
steps:
- uses: actions/checkout@v3
with:
submodules: 'true'
- name: Configure
run: |
cmake -B build \
-DMACHINE_CFG=${PWD}/cmake/machinecfg/GitHubActions.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMACHINE_VARIANT=hip-mpi \
-DCMAKE_CXX_COMPILER=hipcc
# Test example with "variables" and output
- name: advection
run: |
cmake --build build -t advection-example
cd build
ctest -R regression_mpi_test:output_hdf5
# Test example with swarms
- name: particle-leapfrog
run: |
cmake --build build -t particle-leapfrog
cd build
ctest -R regression_mpi_test:particle_leapfrog
- uses: actions/upload-artifact@v3
with:
name: configure-log-integration-amdgpu
path: |
build/CMakeFiles/CMakeOutput.log
retention-days: 3

25 changes: 24 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
## Current develop

### Added (new features/APIs/variables/...)
- [[PR 1047]](https://github.com/parthenon-hpc-lab/parthenon/pull/1047) General three- and four-valent 2D forests w/ arbitrary orientations.
- [[PR 1130]](https://github.com/parthenon-hpc-lab/parthenon/pull/1130) Enable `parthenon::par_reduce` for MD loops with Kokkos 1D Range
- [[PR 1119]](https://github.com/parthenon-hpc-lab/parthenon/pull/1119) Formalize MeshData partitioning.
- [[PR 1128]](https://github.com/parthenon-hpc-lab/parthenon/pull/1128) Add cycle and nbtotal to hst
- [[PR 1099]](https://github.com/parthenon-hpc-lab/parthenon/pull/1099) Functionality for outputting task graphs in GraphViz format.
- [[PR 1091]](https://github.com/parthenon-hpc-lab/parthenon/pull/1091) Add vector wave equation example.
- [[PR 991]](https://github.com/parthenon-hpc-lab/parthenon/pull/991) Add fine fields.
- [[PR 1106]](https://github.com/parthenon-hpc-lab/parthenon/pull/1106) Add CMake options for turning on ASAN and HWASAN
- [[PR 1100]](https://github.com/parthenon-hpc-lab/parthenon/pull/1100) Custom refinement ops propagated to fluxes
- [[PR 1090]](https://github.com/parthenon-hpc-lab/parthenon/pull/1090) SMR with swarms
- [[PR 1079]](https://github.com/parthenon-hpc-lab/parthenon/pull/1079) Address XDMF/Visit Issues
Expand All @@ -19,11 +27,19 @@
- [[PR 1019]](https://github.com/parthenon-hpc-lab/parthenon/pull/1019) Enable output for non-cell-centered variables

### Changed (changing behavior/API/variables/...)
- [[PR 1105]](https://github.com/parthenon-hpc-lab/parthenon/pull/1105) Refactor parameter input for linear solvers
- [[PR 1078]](https://github.com/parthenon-hpc-lab/parthenon/pull/1078) Add reduction fallback in 1D. Add IndexRange overload for 1D par loops
- [[PR 1024]](https://github.com/parthenon-hpc-lab/parthenon/pull/1024) Add .outN. to history output filenames
- [[PR 1004]](https://github.com/parthenon-hpc-lab/parthenon/pull/1004) Allow parameter modification from an input file for restarts

### Fixed (not changing behavior/API/variables/...)
- [[PR 1131]](https://github.com/parthenon-hpc-lab/parthenon/pull/1131) Make deallocation of fine and sparse fields work
- [[PR 1127]](https://github.com/parthenon-hpc-lab/parthenon/pull/1127) Add WithFluxes to IsRefined check
- [[PR 1111]](https://github.com/parthenon-hpc-lab/parthenon/pull/1111) Fix undefined behavior due to bitshift of negative number in LogicalLocation
- [[PR 1092]](https://github.com/parthenon-hpc-lab/parthenon/pull/1092) Updates to DataCollection and MeshData to remove requirement of predefining MeshBlockData
- [[PR 1113]](https://github.com/parthenon-hpc-lab/parthenon/pull/1113) Prevent division by zero
- [[PR 1112]](https://github.com/parthenon-hpc-lab/parthenon/pull/1112) Remove shared_ptr cycle in forest::Tree
- [[PR 1104]](https://github.com/parthenon-hpc-lab/parthenon/pull/1104) Fix reading restarts due to hidden ghost var
- [[PR 1098]](https://github.com/parthenon-hpc-lab/parthenon/pull/1098) Move to symmetrized logical coordinates and fix SMR bug
- [[PR 1095]](https://github.com/parthenon-hpc-lab/parthenon/pull/1095) Add missing include guards in hdf5 restart
- [[PR 1093]](https://github.com/parthenon-hpc-lab/parthenon/pull/1093) Fix forest size for symmetry dimensions
Expand All @@ -43,6 +59,11 @@
- [[PR 1031]](https://github.com/parthenon-hpc-lab/parthenon/pull/1031) Fix bug in non-cell centered AMR

### Infrastructure (changes irrelevant to downstream codes)
- [[PR 1117]](https://github.com/parthenon-hpc-lab/parthenon/pull/1117) Enable CI pipelines on AMD GPUs with ROCM/HIP
- [[PR 1114]](https://github.com/parthenon-hpc-lab/parthenon/pull/1114) Enable sanitizers for extended CI host build
- [[PR 1123]](https://github.com/parthenon-hpc-lab/parthenon/pull/1123) Default initialize ProResInfo.dir
- [[PR 1121]](https://github.com/parthenon-hpc-lab/parthenon/pull/1121) Default initialize BndInfo.dir
- [[PR 1116]](https://github.com/parthenon-hpc-lab/parthenon/pull/1116) Fix NumPy 2.0 test script breakage
- [[PR 1055]](https://github.com/parthenon-hpc-lab/parthenon/pull/1055) Refactor mesh constructors
- [[PR 1066]](https://github.com/parthenon-hpc-lab/parthenon/pull/1066) Re-introduce default loop patterns and exec spaces
- [[PR 1064]](https://github.com/parthenon-hpc-lab/parthenon/pull/1064) Forbid erroneous edge case when adding MeshData on a partition
Expand All @@ -53,9 +74,11 @@


### Removed (removing behavior/API/varaibles/...)

- [[PR 1108]](https://github.com/parthenon-hpc-lab/parthenon/pull/1108) Remove NaN payload tags infrastructure

### Incompatibilities (i.e. breaking changes)
- [[PR 1128]](https://github.com/parthenon-hpc-lab/parthenon/pull/1128) Add cycle and nbtotal to hst
- [[PR 1108]](https://github.com/parthenon-hpc-lab/parthenon/pull/1108) Remove NaN payload tags infrastructure
- [[PR 1026]](https://github.com/parthenon-hpc-lab/parthenon/pull/1026) Particle BCs without relocatable device code
- [[PR 1037]](https://github.com/parthenon-hpc-lab/parthenon/pull/1037) Add SwarmPacks
- [[PR 1042]](https://github.com/parthenon-hpc-lab/parthenon/pull/1042) Use Offset class and clean up of NeighborBlock
Expand Down
20 changes: 17 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,16 @@ option(CHECK_REGISTRY_PRESSURE "Check the registry pressure for Kokkos CUDA kern
option(TEST_INTEL_OPTIMIZATION "Test intel optimization and vectorization" OFF)
option(TEST_ERROR_CHECKING "Enables the error checking unit test. This test will FAIL" OFF)
option(CODE_COVERAGE "Enable code coverage reporting" OFF)
option(ENABLE_ASAN "Turn on ASAN" OFF)
option(ENABLE_HWASAN "Turn on HWASAN (currently ARM-only)" OFF)

include(cmake/Format.cmake)
include(cmake/Lint.cmake)

# regression test reference data
set(REGRESSION_GOLD_STANDARD_VER 23 CACHE STRING "Version of gold standard to download and use")
set(REGRESSION_GOLD_STANDARD_VER 24 CACHE STRING "Version of gold standard to download and use")
set(REGRESSION_GOLD_STANDARD_HASH
"SHA512=bb070f78ae0ecd65bd662f670eee60b4414804770b5041867652d9b5a8e411c59612457499a532068b2584acaa6d120ceb0db96bfde196a9cd129a6246b76fb3"
"SHA512=e220df92a335131131e42ddb52dc221a6dbd6bb56361483b4af0292620eeb82ffb21ef3b95fd9a7c5cc158fb754da0bf1a1015bec98b5bbad05f4bceb1ee99bc"
CACHE STRING "Hash of default gold standard file to download")
option(REGRESSION_GOLD_STANDARD_SYNC "Automatically sync gold standard files." ON)

Expand Down Expand Up @@ -290,7 +292,19 @@ if (Kokkos_ENABLE_CUDA AND "${PARTHENON_ENABLE_GPU_MPI_CHECKS}" )
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake @ONLY)
endif()


# option to turn on AddressSanitizer for debugging
if(ENABLE_ASAN)
message(STATUS "Compiling with AddressSanitizer and UndefinedBehaviorSanitizer *enabled*")
add_compile_options(-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow -fno-sanitize=null -fno-sanitize=alignment)
add_link_options(-fsanitize=address -fsanitize=undefined)
endif(ENABLE_ASAN)

# option to turn on HWAddressSanitizer for debugging
if(ENABLE_HWASAN)
message(STATUS "Compiling with HWAddressSanitizer *enabled*")
add_compile_options(-fsanitize=hwaddress)
add_link_options(-fsanitize=hwaddress)
endif(ENABLE_HWASAN)


# Build Tests and download Catch2
Expand Down
Binary file added MG_grid_hierarchy.pdf
Binary file not shown.
6 changes: 3 additions & 3 deletions benchmarks/burgers/burgers_diff.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# ========================================================================================
# (C) (or copyright) 2023. Triad National Security, LLC. All rights reserved.
# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved.
#
# This program was produced under U.S. Government contract 89233218CNA000001 for Los
# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
Expand All @@ -21,7 +21,7 @@
description="Compute difference between two history solvers parthenon VIBE",
)
parser.add_argument("file1", type=str, help="First file in diff")
parser.add_argument("file2", type=str, help="Second fiel in diff")
parser.add_argument("file2", type=str, help="Second file in diff")
parser.add_argument(
"-t", "--tolerance", type=float, default=1e-8, help="Relative tolerance for diff"
)
Expand Down Expand Up @@ -54,4 +54,4 @@ def compare_files(file1, file2, tolerance, print_results=True):

if __name__ == "__main__":
args = parser.parse_args()
sys.exit(compare_files(args.file1, args.file1, args.tolerance, True))
sys.exit(compare_files(args.file1, args.file2, args.tolerance, True))
5 changes: 1 addition & 4 deletions benchmarks/burgers/burgers_package.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//========================================================================================
// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved.
// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001 for Los
// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
Expand Down Expand Up @@ -132,7 +132,6 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
hst_vars.emplace_back(HstSum, ReduceMass, "MS Mass " + std::to_string(i_octant));
i_octant++;
}
hst_vars.emplace_back(HstSum, MeshCountHistory, "Meshblock count");
pkg->AddParam(parthenon::hist_param_key, hst_vars);

pkg->EstimateTimestepMesh = EstimateTimestepMesh;
Expand Down Expand Up @@ -439,6 +438,4 @@ Real MassHistory(MeshData<Real> *md, const Real x1min, const Real x1max, const R
return result;
}

Real MeshCountHistory(MeshData<Real> *md) { return md->NumBlocks(); }

} // namespace burgers_package
3 changes: 1 addition & 2 deletions benchmarks/burgers/burgers_package.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//========================================================================================
// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved.
// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001 for Los
// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
Expand Down Expand Up @@ -27,7 +27,6 @@ Real EstimateTimestepMesh(MeshData<Real> *md);
TaskStatus CalculateFluxes(MeshData<Real> *md);
Real MassHistory(MeshData<Real> *md, const Real x1min, const Real x1max, const Real x2min,
const Real x2max, const Real x3min, const Real x3max);
Real MeshCountHistory(MeshData<Real> *md);

// compute the hll flux for Burgers' equation
KOKKOS_INLINE_FUNCTION
Expand Down
2 changes: 1 addition & 1 deletion cmake/TestSetup.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ function(setup_test_parallel nproc dir arg extra_labels)
list(APPEND labels "${extra_labels}")

if(Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP)
set(PARTHENON_KOKKOS_TEST_ARGS "--kokkos-num-devices=${NUM_GPU_DEVICES_PER_NODE}")
set(PARTHENON_KOKKOS_TEST_ARGS "--kokkos-map-device-id-by=mpi_rank")
list(APPEND labels "cuda")
endif()
if (Kokkos_ENABLE_OPENMP)
Expand Down
5 changes: 3 additions & 2 deletions cmake/machinecfg/GitHubActions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ if (${MACHINE_VARIANT} MATCHES "cuda")
set(MACHINE_CXX_FLAGS "${MACHINE_CXX_FLAGS} -Wno-unknown-cuda-version")
endif()
elseif (${MACHINE_VARIANT} MATCHES "hip")
# using an arbitrary arch as GitHub Action runners don't have GPUs
set(Kokkos_ARCH_VEGA908 ON CACHE BOOL "GPU architecture")
# using an arch that matches Hamilton at Hamburg Obs
set(Kokkos_ARCH_NAVI1030 ON CACHE BOOL "GPU architecture")
set(Kokkos_ENABLE_HIP ON CACHE BOOL "Enable HIP")
set(Kokkos_ENABLE_ZEN3 ON CACHE BOOL "Enable Zen3")
else()
set(MACHINE_CXX_FLAGS "${MACHINE_CXX_FLAGS} -fopenmp-simd")
endif()
Expand Down
Binary file added convergence.pdf
Binary file not shown.
Binary file added doc/latex/MG_grid_hierarchy.pdf
Binary file not shown.
Binary file added doc/latex/convergence.pdf
Binary file not shown.
37 changes: 37 additions & 0 deletions doc/latex/coordinate_transform.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
\begin{tikzpicture}
\begin{axis}[grid=both,
ymin=0,
ymax=4.5,
xmax=4.5,
xmin=0,
xticklabel=\empty,
yticklabel=\empty,
minor tick num=1,
axis lines = middle,
xlabel=$x_1$,
ylabel=$x_2$,
label style = {at={(ticklabel cs:1.1)}},
axis equal=true, width=6cm, height=6cm]

\coordinate (t1ll) at (1, 1);
\pic at (t1ll) {ig_tree={t1, $\Omega_1$}};
\pic at (t1ll) {ig_tree_region={{0.8, 0.8}, {1.0, 1.0}, green}};

\coordinate (t2ll) at (3, 1);
\pic at (t2ll) {ig_tree={t2, $\Omega_2$}};
\pic at (t2ll) {ig_tree_region={{-0.2, 0.8}, {0.0, 1.0}, green}};

\coordinate (t3ll) at (3, 3);
\pic at (t3ll) {ig_tree={t3, $\Omega_3$}};
\pic at (t3ll) {ig_tree_region={{-0.2, -0.2}, {0.0, 0.0}, green}};

\path[thick, ->] ([shift={(0.9, 0.85)}]t1ll) edge[bend right] node [below]
{$\tau_{1 \rightarrow 2}$} ([shift={(-0.15, 0.85)}]t2ll);

\path[thick, ->] ([shift={(-0.05, 0.95)}]t2ll) edge[bend right] node [right]
{$\tau_{2 \rightarrow 3}$} ([shift={(-0.05, -0.15)}]t3ll);

\path[thick, ->] ([shift={(0.9, 0.95)}]t1ll) edge node [left]
{$\tau_{1 \rightarrow 3}$} ([shift={(-0.15, -0.05)}]t3ll);
\end{axis}
\end{tikzpicture}
Loading

0 comments on commit bdb997a

Please sign in to comment.