GPU batching (#473)

* remove GPU keyword * Add GPU check and include it in the memory_manager Co-authored-by: Samuel Tovey <tovey.samuel@gmail.com>
zincware · Jan 25, 2022 · 8f55d26 · 8f55d26
1 parent 824af77
commit 8f55d26
Show file tree

Hide file tree

Showing 18 changed files with 18 additions and 73 deletions.
diff --git a/CI/unit_tests/utils/test_meta_functions.py b/CI/unit_tests/utils/test_meta_functions.py
@@ -29,12 +29,14 @@
 import unittest
 
 import numpy as np
+import tensorflow as tf
 
 from mdsuite.utils.meta_functions import (
     find_item,
     get_dimensionality,
     get_machine_properties,
     golden_section_search,
+    gpu_available,
     join_path,
     line_counter,
     linear_fitting_function,
@@ -45,6 +47,14 @@
 )
 
 
+def test_gpu_available():
+    """
+    Ideally this should be checked against something else than tf.config
+    but I don't have any better measure.
+    """
+    assert gpu_available() == (len(tf.config.list_physical_devices("GPU")) > 1)
+
+
 class TestMetaFunction(unittest.TestCase):
     """
     A test class for the meta functions module.

diff --git a/mdsuite/calculators/angular_distribution_function.py b/mdsuite/calculators/angular_distribution_function.py
@@ -153,7 +153,6 @@ def __call__(
         species: list = None,
         use_tf_function: bool = False,
         molecules: bool = False,
-        gpu: bool = False,
         plot: bool = True,
         norm_power: int = 4,
         **kwargs,
@@ -188,9 +187,6 @@ def __call__(
             If set to zero no distance normalization will be applied.
         molecules : bool
                 if true, perform the analysis on molecules.
-        gpu : bool
-                if true, scale the memory requirements to that of the biggest
-                GPU on the machine.
         plot : bool
                 If true, plot the result of the analysis.
         """
@@ -212,7 +208,6 @@ def __call__(
         # Parse the user arguments.
         self.use_tf_function = use_tf_function
         self.cutoff = cutoff
-        self.gpu = gpu
         self.plot = plot
         self._batch_size = batch_size  # memory management for all batches
         self.adf_minibatch = (

diff --git a/mdsuite/calculators/calculator.py b/mdsuite/calculators/calculator.py
@@ -160,8 +160,6 @@ class Calculator(CalculatorDatabase):
             List of experiments on which to run the calculator.
     plot : bool
             If true, the results will be plotted.
-    gpu : bool
-            If true, the memory will be scaled down to the best GPU on the system.
     system_property: bool (default = False)
             If the calculator returns a value for the whole system such as ionic
             conductivity or viscosity as opposed to a species-specific number.
@@ -219,7 +217,6 @@ def __init__(
             self.experiments = [self.experiment]
 
         self.plot = False
-        self.gpu = False
 
         # SQL data attributes.
         self.result_keys = None

diff --git a/mdsuite/calculators/einstein_diffusion_coefficients.py b/mdsuite/calculators/einstein_diffusion_coefficients.py
@@ -112,7 +112,6 @@ def __call__(
         atom_selection: np.s_ = np.s_[:],
         molecules: bool = False,
         tau_values: Union[int, List, Any] = np.s_[:],
-        gpu: bool = False,
     ):
         """
 
@@ -132,9 +131,6 @@ def __call__(
                 If true, molecules are used instead of atoms.
         tau_values : Union[int, list, np.s_]
                 Selection of tau values to use in the window sliding.
-        gpu : bool
-                If true, scale the memory requirement down to the amount of
-                the biggest GPU in the system.
 
         Returns
         -------
@@ -154,7 +150,6 @@ def __call__(
             molecules=molecules,
             species=species,
         )
-        self.gpu = gpu
         self.plot = plot
         self.system_property = False
         self.time = self._handle_tau_values()

diff --git a/mdsuite/calculators/einstein_distinct_diffusion_coefficients.py b/mdsuite/calculators/einstein_distinct_diffusion_coefficients.py
@@ -123,7 +123,6 @@ def __call__(
         molecules: bool = False,
         export: bool = False,
         atom_selection: dict = np.s_[:],
-        gpu: bool = False,
     ):
         """
         Parameters
@@ -143,9 +142,6 @@ def __call__(
                 Selection of atoms to use within the HDF5 database.
         export : bool
                 If true, export the data directly into a csv file.
-        gpu : bool
-                If true, scale the memory requirement down to the amount of
-                the biggest GPU in the system.
 
         Returns
         -------
@@ -157,7 +153,6 @@ def __call__(
             species = list(self.experiment.species)
         self.combinations = list(itertools.combinations_with_replacement(species, 2))
 
-        self.gpu = gpu
         self.plot = plot
 
         # set args that will affect the computation result

diff --git a/mdsuite/calculators/einstein_helfand_ionic_conductivity.py b/mdsuite/calculators/einstein_helfand_ionic_conductivity.py
@@ -100,7 +100,6 @@ def __call__(
         data_range=500,
         correlation_time=1,
         tau_values: np.s_ = np.s_[:],
-        gpu: bool = False,
     ):
         """
         Python constructor
@@ -113,9 +112,6 @@ def __call__(
                 Number of configurations to use in each ensemble
         correlation_time : int
                 Correlation time to use in the analysis.
-        gpu : bool
-                If true, reduce memory usage to the maximum GPU capability.
-
         """
 
         # set args that will affect the computation result
@@ -126,7 +122,6 @@ def __call__(
             atom_selection=np.s_[:],
         )
 
-        self.gpu = gpu
         self.plot = plot
         self.time = self._handle_tau_values()
         self.msd_array = np.zeros(self.data_resolution)

diff --git a/mdsuite/calculators/einstein_helfand_thermal_conductivity.py b/mdsuite/calculators/einstein_helfand_thermal_conductivity.py
@@ -110,7 +110,6 @@ def __call__(
         data_range=500,
         correlation_time=1,
         tau_values: np.s_ = np.s_[:],
-        gpu: bool = False,
     ):
         """
         Python constructor
@@ -123,9 +122,6 @@ def __call__(
                 Data range to use in the analysis.
         correlation_time : int
                 Correlation time to use in the window sampling.
-        gpu : bool
-                If true, scale the memory requirement down to the amount of
-                the biggest GPU in the system.
         """
         # set args that will affect the computation result
         self.args = Args(
@@ -134,8 +130,6 @@ def __call__(
             tau_values=tau_values,
             atom_selection=np.s_[:],
         )
-
-        self.gpu = gpu
         self.plot = plot
         self.time = self._handle_tau_values()
         self.msd_array = np.zeros(self.data_resolution)

diff --git a/mdsuite/calculators/einstein_helfand_thermal_kinaci.py b/mdsuite/calculators/einstein_helfand_thermal_kinaci.py
@@ -113,7 +113,6 @@ def __call__(
         data_range=500,
         correlation_time=1,
         tau_values: np.s_ = np.s_[:],
-        gpu: bool = False,
     ):
         """
         Python constructor
@@ -127,9 +126,6 @@ def __call__(
 
         correlation_time : int
                 Correlation time to use in the window sampling.
-        gpu : bool
-                If true, scale the memory requirement down to the amount of
-                the biggest GPU in the system.
         """
         # set args that will affect the computation result
         self.args = Args(
@@ -140,7 +136,6 @@ def __call__(
         )
 
         self.plot = plot
-        self.gpu = gpu
         self.time = self._handle_tau_values()
         self.msd_array = np.zeros(self.data_resolution)
 

diff --git a/mdsuite/calculators/green_kubo_distinct_diffusion_coefficients.py b/mdsuite/calculators/green_kubo_distinct_diffusion_coefficients.py
@@ -118,7 +118,6 @@ def __call__(
         molecules: bool = False,
         export: bool = False,
         atom_selection: dict = np.s_[:],
-        gpu: bool = False,
         integration_range: int = None,
     ):
         """
@@ -142,9 +141,6 @@ def __call__(
                 Selection of atoms to use within the HDF5 database.
         export : bool
                 If true, export the data directly into a csv file.
-        gpu : bool
-                If true, scale the memory requirement down to the amount of
-                the biggest GPU in the system.
         integration_range : int
                 Range over which to perform the integration.
         """
@@ -162,7 +158,6 @@ def __call__(
             integration_range=integration_range,
         )
 
-        self.gpu = gpu
         self.plot = plot
         self.time = self._handle_tau_values()
 

diff --git a/mdsuite/calculators/green_kubo_ionic_conductivity.py b/mdsuite/calculators/green_kubo_ionic_conductivity.py
@@ -116,7 +116,6 @@ def __call__(
         data_range=500,
         correlation_time=1,
         tau_values: np.s_ = np.s_[:],
-        gpu: bool = False,
         integration_range: int = None,
     ):
         """
@@ -129,14 +128,10 @@ def __call__(
                 Data range to use in the analysis.
         correlation_time : int
                 Correlation time to use in the window sampling.
-        gpu : bool
-                If true, scale the memory requirement down to the amount of
-                the biggest GPU in the system.
         integration_range : int
                 Range over which integration should be performed.
         """
 
-        self.gpu = gpu
         self.plot = plot
         self.jacf: np.ndarray
         self.sigma = []

diff --git a/mdsuite/calculators/green_kubo_self_diffusion_coefficients.py b/mdsuite/calculators/green_kubo_self_diffusion_coefficients.py
@@ -118,7 +118,6 @@ def __call__(
         correlation_time: int = 1,
         atom_selection=np.s_[:],
         molecules: bool = False,
-        gpu: bool = False,
         tau_values: Union[int, List, Any] = np.s_[:],
         integration_range: int = None,
     ):
@@ -139,9 +138,6 @@ def __call__(
                 Selection of atoms to use within the HDF5 database.
         molecules : bool
                 If true, molecules are used instead of atoms.
-        gpu : bool
-                If true, scale the memory requirement down to the amount of
-                the biggest GPU in the system.
         integration_range : int
                 Range over which to integrate. Default is to integrate over
                 the full data range.
@@ -165,7 +161,6 @@ def __call__(
             integration_range=integration_range,
         )
 
-        self.gpu = gpu
         self.plot = plot
         self.time = self._handle_tau_values()
         self.vacf = np.zeros(self.data_resolution)

diff --git a/mdsuite/calculators/green_kubo_thermal_conductivity.py b/mdsuite/calculators/green_kubo_thermal_conductivity.py
@@ -105,7 +105,6 @@ def __call__(
         data_range=500,
         tau_values: np.s_ = np.s_[:],
         correlation_time: int = 1,
-        gpu: bool = False,
         integration_range: int = None,
     ):
         """
@@ -119,13 +118,9 @@ def __call__(
                 Data range to use in the analysis.
         correlation_time : int
                 Correlation time to use in the window sampling.
-        gpu : bool
-                If true, scale the memory requirement down to the amount of
-                the biggest GPU in the system.
         integration_range : int
                 Range over which the integration should be performed.
         """
-        self.gpu = gpu
         self.plot = plot
         self.jacf: np.ndarray
         self.prefactor: float

diff --git a/mdsuite/calculators/green_kubo_viscosity.py b/mdsuite/calculators/green_kubo_viscosity.py
@@ -106,7 +106,6 @@ def __call__(
         data_range=500,
         tau_values: np.s_ = np.s_[:],
         correlation_time: int = 1,
-        gpu: bool = False,
         integration_range: int = None,
     ):
         """
@@ -118,8 +117,6 @@ def __call__(
         data_range :
                 Number of configurations to use in each ensemble
         """
-
-        self.gpu = gpu
         self.plot = plot
         self.sigma = []
 

diff --git a/mdsuite/calculators/green_kubo_viscosity_flux.py b/mdsuite/calculators/green_kubo_viscosity_flux.py
@@ -104,7 +104,6 @@ def __call__(
         data_range=500,
         correlation_time=1,
         tau_values: np.s_ = np.s_[:],
-        gpu: bool = False,
         integration_range: int = None,
     ):
         """
@@ -117,8 +116,6 @@ def __call__(
         data_range : int
                 Number of configurations to include in each ensemble
         """
-
-        self.gpu = gpu
         self.plot = plot
         self.sigma = []
 

diff --git a/mdsuite/calculators/radial_distribution_function.py b/mdsuite/calculators/radial_distribution_function.py
@@ -136,8 +136,6 @@ def __init__(self, **kwargs):
         self.key_list = None
         self.rdf = None
 
-        self.correct_minibatch_batching = None
-
     @call
     def __call__(
         self,
@@ -151,7 +149,6 @@ def __call__(
         minibatch: int = -1,
         species: list = None,
         molecules: bool = False,
-        gpu: bool = False,
         **kwargs,
     ):
         """
@@ -184,8 +181,6 @@ def __call__(
             issues. Increase this value for better performance.
         molecules: bool
             If true, the molecules will be analyzed rather than the atoms.
-        gpu: bool
-            Calculate batch size based on GPU memory instead of CPU memory
         kwargs:
             overide_n_batches: int
                     override the automatic batch size calculation
@@ -209,7 +204,6 @@ def __call__(
         # usually performance or plotting
         self.rdf_minibatch = minibatch
         self.plot = plot
-        self.gpu = gpu
 
         # kwargs parsing
         self.use_tf_function = kwargs.pop("use_tf_function", False)
@@ -252,10 +246,6 @@ def check_input(self):
             else:
                 self.args.species = list(self.experiment.species)
 
-        if self.gpu:
-            self.correct_minibatch_batching = 100
-            # 100 seems to be a good value for most systems
-
         self._initialize_rdf_parameters()
 
     def _initialize_rdf_parameters(self):

diff --git a/mdsuite/calculators/trajectory_calculator.py b/mdsuite/calculators/trajectory_calculator.py
@@ -247,7 +247,6 @@ def _prepare_managers(self, data_path: list, correct: bool = False):
             database=self.database,
             memory_fraction=0.8,
             scale_function=self.scale_function,
-            gpu=self.gpu,
         )
         (
             self.batch_size,