Skip to content

Commit

Permalink
first pass at env config refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
ekemper committed Nov 20, 2024
1 parent 80e5e90 commit d974fb2
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 166 deletions.
179 changes: 13 additions & 166 deletions cstar/base/environment.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import io
import os
import platform
import importlib.util
from pathlib import Path
from typing import Optional
from contextlib import redirect_stderr, redirect_stdout
from cstar.base.runtime_env_config import RuntimeEnvConfig

top_level_package_name = __name__.split(".")[0]
spec = importlib.util.find_spec(top_level_package_name)
Expand All @@ -25,170 +23,19 @@
_CSTAR_SYSTEM_MEMGB_PER_NODE: Optional[int]
_CSTAR_SYSTEM_MAX_WALLTIME: Optional[str]

runtimeConfig = RuntimeEnvConfig(_CSTAR_ROOT)

if (platform.system() == "Linux") and ("LMOD_DIR" in list(os.environ)):
# Dynamically load the env_modules_python module using pathlib
module_path = Path(os.environ["LMOD_DIR"]).parent / "init" / "env_modules_python.py"
spec = importlib.util.spec_from_file_location("env_modules_python", module_path)
if (spec is None) or (spec.loader is None):
raise EnvironmentError(
f"Could not find env_modules_python on this machine at {module_path}"
)
env_modules = importlib.util.module_from_spec(spec)
if env_modules is None:
raise EnvironmentError(
f"No module found by importlib corresponding to spec {spec}"
)
spec.loader.exec_module(env_modules)
module = env_modules.module
print(runtimeConfig.envVars)

sysname = os.environ.get("LMOD_SYSHOST") or os.environ.get("LMOD_SYSTEM_NAME")
if not sysname:
raise EnvironmentError(
"unable to find LMOD_SYSHOST or LMOD_SYSTEM_NAME in environment. "
+ "Your system may be unsupported"
)

module_stdout = io.StringIO()
module_stderr = io.StringIO()

# Load Linux Environment Modules for this machine:
with redirect_stdout(module_stdout), redirect_stderr(module_stderr):
module("reset")
with open(f"{_CSTAR_ROOT}/additional_files/lmod_lists/{sysname}.lmod") as F:
lmod_list = F.readlines()
for mod in lmod_list:
module("load", mod)
if any(
keyword in module_stderr.getvalue().casefold() for keyword in ["fail", "error"]
):
raise EnvironmentError(
"Error with linux environment modules: " + module_stderr.getvalue()
)

match sysname:
case "expanse":
_CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"] = os.environ[
"NETCDF_FORTRANHOME"
]
_CSTAR_ENVIRONMENT_VARIABLES["MPIHOME"] = os.environ["MVAPICH2HOME"]
_CSTAR_ENVIRONMENT_VARIABLES["NETCDF"] = os.environ["NETCDF_FORTRANHOME"]
_CSTAR_ENVIRONMENT_VARIABLES["MPI_ROOT"] = os.environ["MVAPICH2HOME"]
_CSTAR_COMPILER = "intel"
_CSTAR_SYSTEM = "expanse"
_CSTAR_SCHEDULER = (
"slurm" # can get this with `scontrol show config` or `sinfo --version`
)
_CSTAR_SYSTEM_DEFAULT_PARTITION = "compute"
_CSTAR_SYSTEM_CORES_PER_NODE = (
128 # cpu nodes, can get dynamically node-by-node
)
_CSTAR_SYSTEM_MEMGB_PER_NODE = 256 # with `sinfo -o "%n %c %m %l"`
_CSTAR_SYSTEM_MAX_WALLTIME = "48:00:00" # (hostname/cpus/mem[MB]/walltime)

case "derecho":
_CSTAR_ENVIRONMENT_VARIABLES["MPIHOME"] = (
"/opt/cray/pe/mpich/8.1.25/ofi/intel/19.0/"
)
_CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"] = os.environ["NETCDF"]
_CSTAR_ENVIRONMENT_VARIABLES["LD_LIBRARY_PATH"] = (
os.environ.get("LD_LIBRARY_PATH", default="")
+ ":"
+ _CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"]
+ "/lib"
)

_CSTAR_COMPILER = "intel"
_CSTAR_SYSTEM = "derecho"
_CSTAR_SCHEDULER = (
"pbs" # can determine dynamically by testing for `qstat --version`
)
_CSTAR_SYSTEM_DEFAULT_PARTITION = "main"
_CSTAR_SYSTEM_CORES_PER_NODE = (
128 # Harder to dynamically get this info on PBS
)
_CSTAR_SYSTEM_MEMGB_PER_NODE = (
256 # Can combine `qstat -Qf` and `pbsnodes -a`
)
_CSTAR_SYSTEM_MAX_WALLTIME = "12:00:00" # with grep or awk

case "perlmutter":
_CSTAR_ENVIRONMENT_VARIABLES["MPIHOME"] = (
"/opt/cray/pe/mpich/8.1.28/ofi/gnu/12.3/"
)
_CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"] = (
"/opt/cray/pe/netcdf/4.9.0.9/gnu/12.3/"
)
_CSTAR_ENVIRONMENT_VARIABLES["PATH"] = (
os.environ.get("PATH", default="")
+ ":"
+ _CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"]
+ "/bin"
)
_CSTAR_ENVIRONMENT_VARIABLES["LD_LIBRARY_PATH"] = (
os.environ.get("LD_LIBRARY_PATH", default="")
+ ":"
+ _CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"]
+ "/lib"
)
_CSTAR_ENVIRONMENT_VARIABLES["LIBRARY_PATH"] = (
os.environ.get("LIBRARY_PATH", default="")
+ ":"
+ _CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"]
+ "/lib"
)

_CSTAR_COMPILER = "gnu"
_CSTAR_SYSTEM = "perlmutter"
_CSTAR_SCHEDULER = "slurm"
_CSTAR_SYSTEM_DEFAULT_PARTITION = "regular"
_CSTAR_SYSTEM_CORES_PER_NODE = (
128 # cpu nodes, can get dynamically node-by-node
)
_CSTAR_SYSTEM_MEMGB_PER_NODE = 512 # with `sinfo -o "%n %c %m %l"`
_CSTAR_SYSTEM_MAX_WALLTIME = "24:00:00" # (hostname/cpus/mem[MB]/walltime)


elif (platform.system() == "Darwin") and (platform.machine() == "arm64"):
# if on MacOS arm64 all dependencies should have been installed by conda

_CSTAR_ENVIRONMENT_VARIABLES["MPIHOME"] = os.environ["CONDA_PREFIX"]
_CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"] = os.environ["CONDA_PREFIX"]
_CSTAR_ENVIRONMENT_VARIABLES["LD_LIBRARY_PATH"] = (
os.environ.get("LD_LIBRARY_PATH", default="")
+ ":"
+ _CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"]
+ "/lib"
)
_CSTAR_COMPILER = "gnu"
_CSTAR_SYSTEM = "osx_arm64"
_CSTAR_SCHEDULER = None
_CSTAR_SYSTEM_DEFAULT_PARTITION = None
_CSTAR_SYSTEM_CORES_PER_NODE = os.cpu_count()
_CSTAR_SYSTEM_MEMGB_PER_NODE = None
_CSTAR_SYSTEM_MAX_WALLTIME = None

elif (
(platform.system() == "Linux")
and (platform.machine() == "x86_64")
and ("LMOD_DIR" not in list(os.environ))
):
_CSTAR_ENVIRONMENT_VARIABLES["MPIHOME"] = os.environ["CONDA_PREFIX"]
_CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"] = os.environ["CONDA_PREFIX"]
_CSTAR_ENVIRONMENT_VARIABLES["LD_LIBRARY_PATH"] = (
os.environ.get("LD_LIBRARY_PATH", default="")
+ ":"
+ _CSTAR_ENVIRONMENT_VARIABLES["NETCDFHOME"]
+ "/lib"
)
_CSTAR_COMPILER = "gnu"
_CSTAR_SYSTEM = "linux_x86_64"
_CSTAR_SCHEDULER = None
_CSTAR_SYSTEM_DEFAULT_PARTITION = None
_CSTAR_SYSTEM_CORES_PER_NODE = os.cpu_count()
_CSTAR_SYSTEM_MEMGB_PER_NODE = None
_CSTAR_SYSTEM_MAX_WALLTIME = None
# TODO: lots of this is repeat code, can determine a lot of these vars using functions rather than hardcoding
# TODO: what is a cleaner way to destructure this as one would in JS?
_CSTAR_COMPILER = runtimeConfig.envVars._CSTAR_COMPILER
_CSTAR_SYSTEM = runtimeConfig.envVars._CSTAR_SYSTEM
_CSTAR_SCHEDULER = runtimeConfig.envVars._CSTAR_SCHEDULER
_CSTAR_ENVIRONMENT_VARIABLES = runtimeConfig.envVars._CSTAR_ENVIRONMENT_VARIABLES
_CSTAR_SYSTEM_DEFAULT_PARTITION = runtimeConfig.envVars._CSTAR_SYSTEM_DEFAULT_PARTITION
_CSTAR_SYSTEM_CORES_PER_NODE = runtimeConfig.envVars._CSTAR_SYSTEM_CORES_PER_NODE
_CSTAR_SYSTEM_MEMGB_PER_NODE = runtimeConfig.envVars._CSTAR_SYSTEM_MEMGB_PER_NODE
_CSTAR_SYSTEM_MAX_WALLTIME = runtimeConfig.envVars._CSTAR_SYSTEM_MAX_WALLTIME

# Now read the local/custom initialisation file
# This sets variables associated with external codebases that are not installed
Expand All @@ -197,8 +44,8 @@
_CSTAR_CONFIG_FILE = _CSTAR_ROOT + "/cstar_local_config.py"
if Path(_CSTAR_CONFIG_FILE).exists():
from cstar.cstar_local_config import get_user_environment

get_user_environment()

for var, value in _CSTAR_ENVIRONMENT_VARIABLES.items():
os.environ[var] = value

Expand Down
100 changes: 100 additions & 0 deletions cstar/base/hpc_env_var_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os

PERLMUTTER_NCDF_HOME_PATH = "/opt/cray/pe/netcdf/4.9.0.9/gnu/12.3/"

def values():
return {

"expanse": {
"_CSTAR_ENVIRONMENT_VARIABLES": {
"NETCDFHOME": os.environ[
"NETCDF_FORTRANHOME"
],
"MPIHOME": os.environ["MVAPICH2HOME"],
"NETCDF": os.environ["NETCDF_FORTRANHOME"],
"MPI_ROOT": os.environ["MVAPICH2HOME"]
},
"_CSTAR_COMPILER": "intel",
"_CSTAR_SYSTEM": "expanse",
"_CSTAR_SCHEDULER": (
"slurm" # can get this with `scontrol show config` or `sinfo --version`
),
"_CSTAR_SYSTEM_DEFAULT_PARTITION": "compute",
"_CSTAR_SYSTEM_MEMGB_PER_NODE": 256, # with `sinfo -o "%n %c %m %l"`
"_CSTAR_SYSTEM_MAX_WALLTIME": "48:00:00" # (hostname/cpus/mem[MB]/walltime)
},


"derecho": {
"_CSTAR_ENVIRONMENT_VARIABLES": {
"MPIHOME": (
"/opt/cray/pe/mpich/8.1.25/ofi/intel/19.0/"
),
"NETCDFHOME": os.environ["NETCDF"],
"LD_LIBRARY_PATH": (
os.environ.get("LD_LIBRARY_PATH", default="")
+ ":"
+ os.environ["NETCDF"]
+ "/lib"
)
},
"_CSTAR_COMPILER": "intel",
"_CSTAR_SYSTEM": "derecho",
"_CSTAR_SCHEDULER": (
"pbs" # can determine dynamically by testing for `qstat --version`
),
"_CSTAR_SYSTEM_DEFAULT_PARTITION": "main",
"_CSTAR_SYSTEM_MEMGB_PER_NOD": (
256 # Can combine `qstat -Qf` and `pbsnodes -a`
),
"_CSTAR_SYSTEM_MAX_WALLTIME": "12:00:00" # with grep or awk
},


"perlmutter": {
"_CSTAR_ENVIRONMENT_VARIABLES": {
"MPIHOME": (
"/opt/cray/pe/mpich/8.1.28/ofi/gnu/12.3/"
),
"NETCDFHOME": PERLMUTTER_NCDF_HOME_PATH,
"PATH": (
os.environ.get("PATH", default="")
+ ":"
+ PERLMUTTER_NCDF_HOME_PATH
+ "/bin"
),
"LD_LIBRARY_PATH": (
os.environ.get("LD_LIBRARY_PATH", default="")
+ ":"
+ PERLMUTTER_NCDF_HOME_PATH
+ "/lib"
),
"LIBRARY_PATH": (
os.environ.get("LIBRARY_PATH", default="")
+ ":"
+ PERLMUTTER_NCDF_HOME_PATH
+ "/lib"
)
},
"_CSTAR_COMPILER": "gnu",
"_CSTAR_SYSTEM": "perlmutter",
"_CSTAR_SCHEDULER": "slurm",
"_CSTAR_SYSTEM_DEFAULT_PARTITION": "regular",
"_CSTAR_SYSTEM_MEMGB_PER_NODE": 512, # with `sinfo -o "%n %c %m %l"`
"_CSTAR_SYSTEM_MAX_WALLTIME": "24:00:00" # (hostname/cpus/mem[MB]/walltime)
}

}

commonValues = {
"_CSTAR_SYSTEM_CORES_PER_NODE": (
128 # cpu nodes, can get dynamically node-by-node
),
}

def determineHPCEnvVars(sysname):

if sysname not in values.keys:
raise EnvironmentError(f"Unable to configure environment variables for system: {sysname}")

return {**commonValues, **values()[sysname]}
Loading

0 comments on commit d974fb2

Please sign in to comment.