diff --git a/CI/docker-compose.yml b/CI/docker-compose.yml index cd533e88..b4efb375 100644 --- a/CI/docker-compose.yml +++ b/CI/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3' - services: controller: image: @container_image@ diff --git a/CI/src/common/util.py b/CI/src/common/util.py index 51a244e6..5c028ace 100644 --- a/CI/src/common/util.py +++ b/CI/src/common/util.py @@ -269,6 +269,14 @@ def assert_sarus_raises_error_containing_text(command, text): 'Expected: {}'.format(sarus_output, text)) +def assert_sarus_raises_error_containing_regex(command, expr): + import re + sarus_output = get_sarus_error_output(command) + assert re.search(expr, sarus_output), ('Sarus generated an error, but it did not contain the expected regex.\n' + 'Generated message: {}\n' + 'Expected: "{}"'.format(sarus_output, expr)) + + def modify_sarus_json(new_parameters): if os.geteuid() == 0: backup_sarus_json() diff --git a/CI/src/integration_tests/test_mpi_hook.py b/CI/src/integration_tests/test_mpi_hook.py index c64f94e7..9396bcf8 100755 --- a/CI/src/integration_tests/test_mpi_hook.py +++ b/CI/src/integration_tests/test_mpi_hook.py @@ -14,162 +14,262 @@ import common.util as util +_OCIHOOK_CONFIG_FILE = os.environ["CMAKE_INSTALL_PREFIX"] + "/etc/hooks.d/mpi_hook.json" + +_SITE_LIBS_PREFIX = tempfile.mkdtemp() + +_HOST_MPI_LIBS_FULL = {"libmpi.so.12.5.5", "libmpich.so.12.5.5"} +_HOST_MPI_LIBS_MAJOR = {"libmpi.so.12", "libmpich.so.12"} +_HOST_MPI_DEPENDENCY_LIBS = {"libdependency0.so", "libdependency1.so"} + +_CI_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +_DUMMY_LIB_PATH = _CI_DIR + "/dummy_libs/lib_dummy_0.so" +_HOST_LIB_HASH = util.generate_file_md5_hash(_DUMMY_LIB_PATH, "md5") + + +def _generate_base_hook_config(): + mpi_dependency_libs = [_SITE_LIBS_PREFIX + "/" + value for value in _HOST_MPI_DEPENDENCY_LIBS] + + hook_config = { + "version": "1.0.0", + "hook": { + "path": os.environ["CMAKE_INSTALL_PREFIX"] + "/bin/mpi_hook", + "env": [ + "LDCONFIG_PATH=" + shutil.which("ldconfig"), + "MPI_DEPENDENCY_LIBS=" + ":".join(mpi_dependency_libs), + ] + }, + "when": { + "annotations": { + "^com.hooks.mpi.enabled$": "^true$" + } + }, + "stages": ["createContainer"] + } + + return hook_config + + +def _get_hook_config_and_libs_default(): + mpi_libs = _HOST_MPI_LIBS_MAJOR + mpi_libs_paths = [_SITE_LIBS_PREFIX + "/" + value for value in mpi_libs] + hook_config = _generate_base_hook_config() + hook_config["hook"]["env"].append("MPI_LIBS=" + ":".join(mpi_libs_paths)) + return hook_config, mpi_libs + + +def _get_hook_config_and_libs_major(): + hook_config, mpi_libs = _get_hook_config_and_libs_default() + hook_config["hook"]["env"].append("MPI_COMPATIBILITY_TYPE=major") + return hook_config, mpi_libs + + +def _get_hook_config_and_libs_full(): + mpi_libs = _HOST_MPI_LIBS_FULL + mpi_libs_paths = [_SITE_LIBS_PREFIX + "/" + value for value in mpi_libs] + hook_config = _generate_base_hook_config() + hook_config["hook"]["env"].append("MPI_LIBS=" + ":".join(mpi_libs_paths)) + hook_config["hook"]["env"].append("MPI_COMPATIBILITY_TYPE=full") + return hook_config, mpi_libs + + +def _get_hook_config_and_libs_strict(): + hook_config, mpi_libs = _get_hook_config_and_libs_full() + hook_config["hook"]["env"].append("MPI_COMPATIBILITY_TYPE=strict") + return hook_config, mpi_libs + + +def assert_sarus_raises_mpi_warning_containing_text(container_image, text, expected_occurrences): + command = ["sarus", "run", "--mpi", container_image, "true"] + output = util.get_sarus_error_output(command, fail_expected=False) + number_of_occurrences = sum(["[WARN]" in line and text in line for line in output.split('\n')]) + assert number_of_occurrences == expected_occurrences, \ + 'Sarus didn\'t generate the expected MPI warnings containing the text "{}".'.format(text) + + +def assert_sarus_raises_mpi_error_containing_regex(container_image, text): + command = ["sarus", "run", "--mpi", container_image, "true"] + util.assert_sarus_raises_error_containing_regex(command, text) + + +def check_exception_message(hook_config, container_image, error_regex): + with util.temporary_hook_files((hook_config, _OCIHOOK_CONFIG_FILE)): + assert_sarus_raises_mpi_error_containing_regex(container_image=container_image, text=error_regex) + + +def get_hashes_of_host_libs_in_container(container_image, mpi_command_line_option=True): + options = [] + if mpi_command_line_option: + options.append("--mpi") + hashes = util.get_hashes_of_host_libs_in_container(is_centralized_repository=False, + image=container_image, + options_of_run_command=options) + return hashes + class TestMPIHook(unittest.TestCase): """ These tests verify that the host MPI libraries are properly brought into the container. """ - _OCIHOOK_CONFIG_FILE = os.environ["CMAKE_INSTALL_PREFIX"] + "/etc/hooks.d/mpi_hook.json" - - _SITE_LIBS_PREFIX = tempfile.mkdtemp() - - _HOST_MPI_LIBS = {"libmpi.so.12.5.5", "libmpich.so.12.5.5"} - _HOST_MPI_DEPENDENCY_LIBS = {"libdependency0.so", "libdependency1.so"} - - _CI_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - _DUMMY_LIB_PATH = _CI_DIR + "/dummy_libs/lib_dummy_0.so" - _HOST_LIB_HASH = util.generate_file_md5_hash(_DUMMY_LIB_PATH, "md5") @classmethod def setUpClass(cls): cls._pull_docker_images() cls._create_site_resources() - cls._enable_hook(cls._generate_hook_config()) @classmethod def tearDownClass(cls): cls._remove_site_resources() - cls._disable_hook() @classmethod def _pull_docker_images(cls): - util.pull_image_if_necessary(is_centralized_repository=False, - image="quay.io/ethcscs/sarus-integration-tests:mpich_compatible") - util.pull_image_if_necessary(is_centralized_repository=False, - image="quay.io/ethcscs/sarus-integration-tests:mpich_compatible_symlink") - util.pull_image_if_necessary(is_centralized_repository=False, - image="quay.io/ethcscs/sarus-integration-tests:mpich_major_incompatible") - util.pull_image_if_necessary(is_centralized_repository=False, - image="quay.io/ethcscs/sarus-integration-tests:mpich_minor_incompatible") - util.pull_image_if_necessary(is_centralized_repository=False, - image="quay.io/ethcscs/sarus-integration-tests:no_mpi_libraries") - util.pull_image_if_necessary(is_centralized_repository=False, - image="quay.io/ethcscs/sarus-integration-tests:nonexisting_ldcache_entry_f35") + for tag in [ + "mpich_compatible", "mpich_compatible_symlink", "mpich_major_incompatible", + "mpich_minor_incompatible", "no_mpi_libraries", "nonexisting_ldcache_entry_f35" + ]: + util.pull_image_if_necessary(is_centralized_repository=False, image=f"quay.io/ethcscs/sarus-integration-tests:{tag}") @classmethod def _create_site_resources(cls): # MPI libs - for value in cls._HOST_MPI_LIBS: - lib_path = cls._SITE_LIBS_PREFIX + "/" + value - subprocess.call(["cp", cls._DUMMY_LIB_PATH, lib_path]) + for value in _HOST_MPI_LIBS_FULL | _HOST_MPI_LIBS_MAJOR: + lib_path = _SITE_LIBS_PREFIX + "/" + value + subprocess.call(["cp", _DUMMY_LIB_PATH, lib_path]) # MPI dependency libs - for value in cls._HOST_MPI_DEPENDENCY_LIBS: - lib_path = cls._SITE_LIBS_PREFIX + "/" + value - subprocess.call(["cp", cls._DUMMY_LIB_PATH, lib_path]) + for value in _HOST_MPI_DEPENDENCY_LIBS: + lib_path = _SITE_LIBS_PREFIX + "/" + value + subprocess.call(["cp", _DUMMY_LIB_PATH, lib_path]) @classmethod def _remove_site_resources(cls): - shutil.rmtree(cls._SITE_LIBS_PREFIX) + shutil.rmtree(_SITE_LIBS_PREFIX) - @classmethod - def _enable_hook(cls, hook_config): - util.create_hook_file(hook_config, cls._OCIHOOK_CONFIG_FILE) + def test_no_mpi_support(self): + hook_config, _ = _get_hook_config_and_libs_default() + with util.temporary_hook_files((hook_config, _OCIHOOK_CONFIG_FILE)): + hashes = get_hashes_of_host_libs_in_container( + "quay.io/ethcscs/sarus-integration-tests:mpich_compatible", + mpi_command_line_option=False + ) + assert not hashes - @classmethod - def _disable_hook(cls): - subprocess.call(["sudo", "rm", cls._OCIHOOK_CONFIG_FILE]) + def test_mpich_compatible_default(self): + self._test_mpich_compatible(*_get_hook_config_and_libs_default()) - @classmethod - def _generate_hook_config(cls): - mpi_libs = [cls._SITE_LIBS_PREFIX + "/" + value for value in cls._HOST_MPI_LIBS] - mpi_dependency_libs = [cls._SITE_LIBS_PREFIX + "/" + value for value in cls._HOST_MPI_DEPENDENCY_LIBS] - - hook_config = { - "version": "1.0.0", - "hook": { - "path": os.environ["CMAKE_INSTALL_PREFIX"] + "/bin/mpi_hook", - "env": [ - "LDCONFIG_PATH=" + shutil.which("ldconfig"), - "MPI_LIBS=" + ":".join(mpi_libs), - "MPI_DEPENDENCY_LIBS=" + ":".join(mpi_dependency_libs), - ] - }, - "when": { - "annotations": { - "^com.hooks.mpi.enabled$": "^true$" - } - }, - "stages": ["createContainer"] - } + def test_mpich_compatible_major(self): + self._test_mpich_compatible(*_get_hook_config_and_libs_major()) - return hook_config + def test_mpich_compatible_full(self): + self._test_mpich_compatible(*_get_hook_config_and_libs_full()) - def setUp(self): - self._mpi_command_line_option = None + def test_mpich_compatible_strict(self): + self._test_mpich_compatible(*_get_hook_config_and_libs_strict()) - def test_no_mpi_support(self): - self._mpi_command_line_option = False - self._container_image = "quay.io/ethcscs/sarus-integration-tests:mpich_compatible" - hashes = self._get_hashes_of_host_libs_in_container() - assert not hashes - - def test_mpich_compatible(self): - self._mpi_command_line_option = True - self._container_image = "quay.io/ethcscs/sarus-integration-tests:mpich_compatible" - hashes = self._get_hashes_of_host_libs_in_container() - number_of_expected_mounts = len(self._HOST_MPI_LIBS) + len(self._HOST_MPI_DEPENDENCY_LIBS) - assert hashes.count(self._HOST_LIB_HASH) == number_of_expected_mounts - - def test_mpich_compatible_symlink(self): - self._mpi_command_line_option = True - self._container_image = "quay.io/ethcscs/sarus-integration-tests:mpich_compatible_symlink" - hashes = self._get_hashes_of_host_libs_in_container() - number_of_expected_mounts = len(self._HOST_MPI_LIBS) + len(self._HOST_MPI_DEPENDENCY_LIBS) - assert hashes.count(self._HOST_LIB_HASH) == number_of_expected_mounts + def _test_mpich_compatible(self, hook_config, mpi_libs): + with util.temporary_hook_files((hook_config, _OCIHOOK_CONFIG_FILE)): + hashes = get_hashes_of_host_libs_in_container("quay.io/ethcscs/sarus-integration-tests:mpich_compatible") + number_of_expected_mounts = len(mpi_libs) + len(_HOST_MPI_DEPENDENCY_LIBS) + assert hashes.count(_HOST_LIB_HASH) == number_of_expected_mounts + + def test_mpich_compatible_symlink_default(self): + self._test_mpich_compatible_symlink(*_get_hook_config_and_libs_default()) + + def test_mpich_compatible_symlink_major(self): + self._test_mpich_compatible_symlink(*_get_hook_config_and_libs_major()) + + def test_mpich_compatible_symlink_full(self): + self._test_mpich_compatible_symlink(*_get_hook_config_and_libs_full()) + + def test_mpich_compatible_symlink_strict(self): + self._test_mpich_compatible_symlink(*_get_hook_config_and_libs_strict()) + + def _test_mpich_compatible_symlink(self, hook_config, mpi_libs): + with util.temporary_hook_files((hook_config, _OCIHOOK_CONFIG_FILE)): + hashes = get_hashes_of_host_libs_in_container("quay.io/ethcscs/sarus-integration-tests:mpich_compatible_symlink") + number_of_expected_mounts = len(mpi_libs) + len(_HOST_MPI_DEPENDENCY_LIBS) + assert hashes.count(_HOST_LIB_HASH) == number_of_expected_mounts + + def test_mpich_minor_incompatible_default(self): + self._test_mpich_minor_incompatible(*_get_hook_config_and_libs_default(), 0, True) + + def test_mpich_minor_incompatible_major(self): + self._test_mpich_minor_incompatible(*_get_hook_config_and_libs_major(), 0, True) @pytest.mark.xfail(reason="Hooks stdout/err are not captured by Pytest after changes for runc 1.1.12") - def test_mpich_minor_incompatible(self): - self._mpi_command_line_option = True - self._container_image = "quay.io/ethcscs/sarus-integration-tests:mpich_minor_incompatible" - self._assert_sarus_raises_mpi_warning_containing_text( - text="Partial ABI compatibility detected", expected_occurrences=2) - hashes = self._get_hashes_of_host_libs_in_container() - number_of_expected_mounts = len(self._HOST_MPI_LIBS) + len(self._HOST_MPI_DEPENDENCY_LIBS) - assert hashes.count(self._HOST_LIB_HASH) == number_of_expected_mounts - - def test_mpich_major_incompatible(self): - self._mpi_command_line_option = True - self._container_image = "quay.io/ethcscs/sarus-integration-tests:mpich_major_incompatible" - self._assert_sarus_raises_mpi_error_containing_text("not ABI compatible with container's MPI library") - - def test_container_without_mpi_libraries(self): - self._mpi_command_line_option = True - self._container_image = "quay.io/ethcscs/sarus-integration-tests:no_mpi_libraries" - self._assert_sarus_raises_mpi_error_containing_text("No MPI libraries found in the container") - - def test_container_without_mpi_libraries_and_nonexisting_ldcache_entry(self): - self._mpi_command_line_option = True - self._container_image = "quay.io/ethcscs/sarus-integration-tests:nonexisting_ldcache_entry_f35" - self._assert_sarus_raises_mpi_error_containing_text("No MPI libraries found in the container") - - def _get_hashes_of_host_libs_in_container(self): - options = [] - if self._mpi_command_line_option: - options.append("--mpi") - hashes = util.get_hashes_of_host_libs_in_container(is_centralized_repository=False, - image=self._container_image, - options_of_run_command=options) - return hashes - - def _assert_sarus_raises_mpi_error_containing_text(self, text): - command = ["sarus", "run", "--mpi", self._container_image, "true"] - util.assert_sarus_raises_error_containing_text(command, text) - - def _assert_sarus_raises_mpi_warning_containing_text(self, text, expected_occurrences): - command = ["sarus", "run", "--mpi", self._container_image, "true"] - output = util.get_sarus_error_output(command, fail_expected=False) - number_of_occurrences = sum(["[WARN]" in line and text in line for line in output.split('\n')]) - assert number_of_occurrences == expected_occurrences, 'Sarus didn\'t generate the expected MPI warnings containing the text "{}".'.format(text) + def test_mpich_minor_incompatible_full(self): + self._test_mpich_minor_incompatible(*_get_hook_config_and_libs_full(), 2, True) + + def test_mpich_minor_incompatible_strict(self): + self._test_mpich_minor_incompatible(*_get_hook_config_and_libs_strict(), 0, False) + + def _test_mpich_minor_incompatible(self, hook_config, mpi_libs, num_warnings, success_expected): + with util.temporary_hook_files((hook_config, _OCIHOOK_CONFIG_FILE)): + if success_expected: + assert_sarus_raises_mpi_warning_containing_text( + container_image="quay.io/ethcscs/sarus-integration-tests:mpich_minor_incompatible", + text="Partial ABI compatibility detected", expected_occurrences=num_warnings) + hashes = get_hashes_of_host_libs_in_container("quay.io/ethcscs/sarus-integration-tests:mpich_minor_incompatible") + number_of_expected_mounts = len(mpi_libs) + len(_HOST_MPI_DEPENDENCY_LIBS) + assert hashes.count(_HOST_LIB_HASH) == number_of_expected_mounts + else: + assert_sarus_raises_mpi_error_containing_regex("quay.io/ethcscs/sarus-integration-tests:mpich_minor_incompatible", r"not[ \w]*ABI compatible with container's MPI library") + + def test_mpich_major_incompatible_default(self): + self._test_mpich_major_incompatible(_get_hook_config_and_libs_default()[0]) + + def test_mpich_major_incompatible_major(self): + self._test_mpich_major_incompatible(_get_hook_config_and_libs_major()[0]) + + def test_mpich_major_incompatible_full(self): + self._test_mpich_major_incompatible(_get_hook_config_and_libs_full()[0]) + + def test_mpich_major_incompatible_strict(self): + self._test_mpich_major_incompatible(_get_hook_config_and_libs_strict()[0]) + + def _test_mpich_major_incompatible(self, hook_config): + check_exception_message( + hook_config=hook_config, + container_image="quay.io/ethcscs/sarus-integration-tests:mpich_major_incompatible", + error_regex=r"not[ \w]*ABI compatible with container's MPI library" + ) + + def test_container_without_mpi_libraries_default(self): + self._test_container_without_mpi_libraries(_get_hook_config_and_libs_default()[0]) + + def test_container_without_mpi_libraries_major(self): + self._test_container_without_mpi_libraries(_get_hook_config_and_libs_major()[0]) + + def test_container_without_mpi_libraries_full(self): + self._test_container_without_mpi_libraries(_get_hook_config_and_libs_full()[0]) + + def test_container_without_mpi_libraries_strict(self): + self._test_container_without_mpi_libraries(_get_hook_config_and_libs_strict()[0]) + + def _test_container_without_mpi_libraries(self, hook_config): + check_exception_message( + hook_config=hook_config, + container_image="quay.io/ethcscs/sarus-integration-tests:no_mpi_libraries", + error_regex="No MPI libraries found in the container" + ) + + def test_container_without_mpi_libraries_and_nonexisting_ldcache_entry_default(self): + self._test_container_without_mpi_libraries_and_nonexisting_ldcache_entry(_get_hook_config_and_libs_default()[0]) + + def test_container_without_mpi_libraries_and_nonexisting_ldcache_entry_major(self): + self._test_container_without_mpi_libraries_and_nonexisting_ldcache_entry(_get_hook_config_and_libs_major()[0]) + + def test_container_without_mpi_libraries_and_nonexisting_ldcache_entry_full(self): + self._test_container_without_mpi_libraries_and_nonexisting_ldcache_entry(_get_hook_config_and_libs_full()[0]) + + def test_container_without_mpi_libraries_and_nonexisting_ldcache_entry_strict(self): + self._test_container_without_mpi_libraries_and_nonexisting_ldcache_entry(_get_hook_config_and_libs_strict()[0]) + + def _test_container_without_mpi_libraries_and_nonexisting_ldcache_entry(self, hook_config): + check_exception_message( + hook_config=hook_config, + container_image="quay.io/ethcscs/sarus-integration-tests:nonexisting_ldcache_entry_f35", + error_regex="No MPI libraries found in the container" + ) @pytest.mark.asroot @@ -186,7 +286,6 @@ def setUpClass(cls): util.pull_image_if_necessary(is_centralized_repository=False, image=cls.CONTAINER_IMAGE) TestMPIHook._create_site_resources() cls._create_device_file() - TestMPIHook._enable_hook(cls._generate_hook_config_with_device()) @classmethod def tearDownClass(cls): @@ -202,13 +301,14 @@ def _create_device_file(cls): @classmethod def _generate_hook_config_with_device(cls): - hook_config = TestMPIHook._generate_hook_config() + hook_config, _ = _get_hook_config_and_libs_default() hook_config["hook"]["env"].append("BIND_MOUNTS=/dev/test0:/var/opt:/var/lib") return hook_config def test_whitelist_device(self): - devices_list = self._get_devices_list_from_cgroup_in_container() - assert "c 511:511 rw" in devices_list + with util.temporary_hook_files((self._generate_hook_config_with_device(), _OCIHOOK_CONFIG_FILE)): + devices_list = self._get_devices_list_from_cgroup_in_container() + assert "c 511:511 rw" in devices_list def _get_devices_list_from_cgroup_in_container(self): return util.run_command_in_container(is_centralized_repository=False, diff --git a/doc/config/mpi-hook.rst b/doc/config/mpi-hook.rst index 941cff25..456d3302 100644 --- a/doc/config/mpi-hook.rst +++ b/doc/config/mpi-hook.rst @@ -34,18 +34,20 @@ Hook configuration The program is meant to be run as a **createContainer** hook and does not accept arguments, but its actions are controlled through a few environment variables: -* ``LDCONFIG_PATH``: Absolute path to a trusted ``ldconfig`` +* ``LDCONFIG_PATH`` (REQUIRED): Absolute path to a trusted ``ldconfig`` program **on the host**. -* ``MPI_LIBS``: Colon separated list of full paths to the host's +* ``MPI_LIBS`` (REQUIRED): Colon separated list of full paths to the host's libraries that will substitute the container's libraries. The ABI - compatibility check is performed by comparing the version numbers specified in + compatibility is checked by comparing the version numbers specified in the libraries' file names according to the specifications selected with the - variable ``MPI_COMPATIBILITY_TYPE`` + variable ``MPI_COMPATIBILITY_TYPE``. -* ``MPI_COMPATIBILITY_TYPE``: Option for the ABI compatibility check, must be one of - ``major``, ``full``, ``strict``. The checks performed for the compatibility in each - case are the following: +* ``MPI_COMPATIBILITY_TYPE`` (OPTIONAL): String determining the logic adopted + to check the ABI compatibility of MPI libraries. + Must be one of ``major``, ``full``, or ``strict``. + If not defined, defaults to ``major``. + The checks performed for compatibility in the different cases are as follows: * ``major`` @@ -57,11 +59,12 @@ arguments, but its actions are controlled through a few environment variables: - The major numbers (first from the left) must be present and equal. - - The host's minor number (second from the left) must be present and greater or equal - to the container's minor number. In case the minor number from the - container is greater than the host's minor number, the hook will print - a warning but will proceed in the attempt to let the container - application run. + - The host's minor number (second from the left) must be present and greater than + or equal to the container's minor number. In case the minor number from the + container is greater than the host's minor number (i.e. the container + library is probably being replaced with an older revision), the hook + will print a verbose log message but will proceed in the attempt to let + the container application run. * ``strict`` @@ -73,11 +76,11 @@ arguments, but its actions are controlled through a few environment variables: This compatibility check is in agreement with the MPICH ABI version number schema. -* ``MPI_DEPENDENCY_LIBS``: Colon separated list of absolute paths to +* ``MPI_DEPENDENCY_LIBS`` (OPTIONAL): Colon separated list of absolute paths to libraries that are dependencies of the ``MPI_LIBS``. These libraries are always bind mounted in the container under ``/usr/lib``. -* ``BIND_MOUNTS``: Colon separated list of absolute paths to generic +* ``BIND_MOUNTS`` (OPTIONAL): Colon separated list of absolute paths to generic files or directories that are required for the correct functionality of the host MPI implementation (e.g. specific device files). These resources will be bind mounted inside the container with the same path they have on the host.