diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dea57ca1..1f835e90 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,8 +15,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.9] - torch-version: [1.10.0, 1.11.0] + python-version: [3.9] + torch-version: [1.11.0, 1.12.1] steps: - uses: actions/checkout@v2 @@ -44,4 +44,4 @@ jobs: - name: Test with pytest run: | # See https://github.com/pytest-dev/pytest/issues/1075 - PYTHONHASHSEED=0 pytest -n auto --ignore=docs/ . + PYTHONHASHSEED=0 pytest -n auto tests/ diff --git a/.github/workflows/tests_develop.yml b/.github/workflows/tests_develop.yml index bae5795e..2c23350c 100644 --- a/.github/workflows/tests_develop.yml +++ b/.github/workflows/tests_develop.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: python-version: [3.9] - torch-version: [1.11.0] + torch-version: [1.12.1] steps: - uses: actions/checkout@v2 @@ -44,4 +44,4 @@ jobs: - name: Test with pytest run: | # See https://github.com/pytest-dev/pytest/issues/1075 - PYTHONHASHSEED=0 pytest -n auto --ignore=docs/ . + PYTHONHASHSEED=0 pytest -n auto tests/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f13a642..cf50972d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. -## [Unreleased] - 0.5.6 +## [0.5.6] - 2022-12-19 +### Added +- sklearn dependency removed +- `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights +- `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary` +- `nequip-benchmark --pdb` for debugging model (builder) errors +- More information in `nequip-deploy info` + +### Changed +- Minimum e3nn is now 0.4.4 +- `--equivariance-test` now prints much more information, especially when there is a failure + +### Fixed +- Git utilities when installed as ZIPed `.egg` (#264) ## [0.5.5] - 2022-06-20 ### Added diff --git a/configs/full.yaml b/configs/full.yaml index 1b8a3a2c..2f98164e 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -17,7 +17,20 @@ default_dtype: float32 allow_tf32: false # whether to use TensorFloat32 if it is available # device: cuda # which device to use. Default: automatically detected cuda or "cpu" -# network +# == network == + +# `model_builders` defines a series of functions that will be called to construct the model +# each model builder has the opportunity to update the model, the config, or both +# model builders from other packages are allowed (see mir-group/allegro for an example); those from `nequip.model` don't require a prefix +# these are the default model builders: +model_builders: + - SimpleIrrepsConfig # update the config with all the irreps for the network if using the simplified `l_max` / `num_features` / `parity` syntax + - EnergyModel # build a full NequIP model + - PerSpeciesRescale # add per-atom / per-species scaling and shifting to the NequIP model before the total energy sum + - ForceOutput # wrap the energy model in a module that uses autodifferention to compute the forces + - RescaleEnergyEtc # wrap the entire model in the appropriate global rescaling of the energy, forces, etc. +# ^ global rescaling blocks must always go last! + r_max: 4.0 # cutoff radius in length units, here Angstrom, this is an important hyperparamter to scan num_layers: 4 # number of interaction blocks, we find 3-5 to work best @@ -198,6 +211,8 @@ loss_coeffs: total_energy: - 1 - PerAtomMSELoss +# note that the ratio between force and energy loss matters for the training process. One may consider using 1:1 with the PerAtomMSELoss. If the energy loss still significantly dominate the loss function at the initial epochs, tune the energy loss weight lower helps the training a lot. + # # default loss function is MSELoss, the name has to be exactly the same as those in torch.nn. # the only supprted targets are forces and total_energy @@ -302,10 +317,10 @@ per_species_rescale_scales: dataset_forces_rms # If not provided, defaults to dataset_per_species_force_rms or dataset_per_atom_total_energy_std, depending on whether forces are being trained. # per_species_rescale_kwargs: # total_energy: -# alpha: 0.1 +# alpha: 0.001 # max_iteration: 20 # stride: 100 -# keywords for GP decomposition of per specie energy. Optional. Defaults to 0.1 +# keywords for ridge regression decomposition of per specie energy. Optional. Defaults to 0.001. The value should be in the range of 1e-3 to 1e-2 # per_species_rescale_arguments_in_dataset_units: True # if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values. @@ -329,9 +344,10 @@ global_rescale_scale_trainable: false # global_rescale_shift_trainable: false # global_rescale_scale: dataset_forces_rms # global_rescale_scale_trainable: false -# per_species_rescale_trainable: true -# per_species_rescale_shifts: dataset_per_atom_total_energy_mean -# per_species_rescale_scales: dataset_per_atom_total_energy_std +# per_species_rescale_shifts_trainable: false +# per_species_rescale_scales_trainable: true +# per_species_rescale_shifts: dataset_per_species_total_energy_mean +# per_species_rescale_scales: dataset_per_species_forces_rms # # full block needed for global rescale # global_rescale_shift: dataset_total_energy_mean diff --git a/configs/minimal_toy_emt.yaml b/configs/minimal_toy_emt.yaml index c9c904d1..38b7f95d 100644 --- a/configs/minimal_toy_emt.yaml +++ b/configs/minimal_toy_emt.yaml @@ -6,15 +6,18 @@ dataset_seed: 456 # network model_builders: + - SimpleIrrepsConfig - EnergyModel - PerSpeciesRescale - StressForceOutput - RescaleEnergyEtc + num_basis: 8 r_max: 4.0 -irreps_edge_sh: 0e + 1o -conv_to_output_hidden_irreps_out: 16x0e -feature_irreps_hidden: 16x0o + 16x0e + 16x1o + 16x1e +l_max: 1 +parity: true +num_features: 16 +num_layers: 4 # data set dataset: EMTTest # type of data set, can be npz or ase @@ -23,10 +26,6 @@ dataset_num_frames: 100 chemical_symbols: - Cu -global_rescale_scale: dataset_total_energy_std -per_species_rescale_shifts: dataset_per_atom_total_energy_mean -per_species_rescale_scales: dataset_per_atom_total_energy_std - # logging wandb: false # verbose: debug diff --git a/docs/api/nequip.rst b/docs/api/nequip.rst index 13bc37ca..6f6250cf 100644 --- a/docs/api/nequip.rst +++ b/docs/api/nequip.rst @@ -3,4 +3,5 @@ Python API .. toctree:: - data \ No newline at end of file + data + trainer diff --git a/docs/api/trainer.rst b/docs/api/trainer.rst new file mode 100644 index 00000000..983e6f6b --- /dev/null +++ b/docs/api/trainer.rst @@ -0,0 +1,10 @@ +nequip.trainer +============== + + .. automodule:: nequip.train.trainer + :members: + :imported-members: + + .. automodule:: nequip.train.trainer_wandb + :members: + :imported-members: diff --git a/docs/cite.rst b/docs/cite.rst new file mode 100644 index 00000000..9f8296cc --- /dev/null +++ b/docs/cite.rst @@ -0,0 +1,3 @@ +Citing Nequip +============= + diff --git a/docs/commandline/commands.rst b/docs/commandline/commands.rst new file mode 100644 index 00000000..b58c87ab --- /dev/null +++ b/docs/commandline/commands.rst @@ -0,0 +1,132 @@ +Command-line Executables +======================== + +``nequip-train`` +---------------- + + .. code :: + + usage: nequip-train [-h] [--equivariance-test] [--model-debug-mode] [--grad-anomaly-mode] [--log LOG] config + +Train (or restart training of) a NequIP model. + +positional arguments: + config YAML file configuring the model, dataset, and other options + +optional arguments: + -h, --help show this help message and exit + --equivariance-test test the model's equivariance before training + --model-debug-mode enable model debug mode, which can sometimes give much more useful error messages at the + cost of some speed. Do not use for production training! + --grad-anomaly-mode enable PyTorch autograd anomaly mode to debug NaN gradients. Do not use for production + training! + --log LOG log file to store all the screen logging + +``nequip-evaluate`` +------------------- + + .. code :: + + usage: nequip-evaluate [-h] [--train-dir TRAIN_DIR] [--model MODEL] [--dataset-config DATASET_CONFIG] + [--metrics-config METRICS_CONFIG] [--test-indexes TEST_INDEXES] [--batch-size BATCH_SIZE] + [--device DEVICE] [--output OUTPUT] [--log LOG] + +Compute the error of a model on a test set using various metrics. The model, metrics, dataset, etc. can specified +in individual YAML config files, or a training session can be indicated with ``--train-dir``. In order of priority, +the global settings (dtype, TensorFloat32, etc.) are taken from: (1) the model config (for a training session), (2) +the dataset config (for a deployed model), or (3) the defaults. Prints only the final result in ``name = num`` format +to stdout; all other information is ``logging.debug``ed to stderr. WARNING: Please note that results of CUDA models +are rarely exactly reproducible, and that even CPU models can be nondeterministic. + +optional arguments: + -h, --help show this help message and exit + --train-dir TRAIN_DIR + Path to a working directory from a training session. + --model MODEL A deployed or pickled NequIP model to load. If omitted, defaults to `best_model.pth` in + `train_dir`. + --dataset-config DATASET_CONFIG + A YAML config file specifying the dataset to load test data from. If omitted, `config.yaml` + in `train_dir` will be used + --metrics-config METRICS_CONFIG + A YAML config file specifying the metrics to compute. If omitted, `config.yaml` in + `train_dir` will be used. If the config does not specify `metrics_components`, the default + is to logging.debug MAEs and RMSEs for all fields given in the loss function. If the + literal string `None`, no metrics will be computed. + --test-indexes TEST_INDEXES + Path to a file containing the indexes in the dataset that make up the test set. If omitted, + all data frames *not* used as training or validation data in the training session + `train_dir` will be used. + --batch-size BATCH_SIZE + Batch size to use. Larger is usually faster on GPU. + --device DEVICE Device to run the model on. If not provided, defaults to CUDA if available and CPU + otherwise. + --output OUTPUT XYZ file to write out the test set and model predicted forces, energies, etc. to. + --log LOG log file to store all the metrics and screen logging.debug + +``nequip-deploy`` +----------------- + + .. code :: + + usage: nequip-deploy [-h] {info,build} ... + +Deploy and view information about previously deployed NequIP models. + +optional arguments: + -h, --help show this help message and exit + +commands: + {info,build} + info Get information from a deployed model file + build Build a deployment model + +``nequip-deploy info`` +~~~~~~~~~~~~~~~~~~~~~~ + + .. code :: + + usage: nequip-deploy info [-h] model_path + +positional arguments: + model_path Path to a deployed model file. + +optional arguments: + -h, --help show this help message and exit + + +``nequip-deploy build`` +~~~~~~~~~~~~~~~~~~~~~~~ + + .. code :: + + usage: nequip-deploy build [-h] train_dir out_file + +positional arguments: + train_dir Path to a working directory from a training session. + out_file Output file for deployed model. + +optional arguments: + -h, --help show this help message and exit + + +``nequip-benchmark`` +-------------------- + + .. code :: + + usage: nequip-benchmark [-h] [--profile PROFILE] [--device DEVICE] [-n N] [--n-data N_DATA] [--timestep TIMESTEP] + config + +Benchmark the approximate MD performance of a given model configuration / dataset pair. + +positional arguments: + config configuration file + +optional arguments: + -h, --help show this help message and exit + --profile PROFILE Profile instead of timing, creating and outputing a Chrome trace JSON to the given path. + --device DEVICE Device to run the model on. If not provided, defaults to CUDA if available and CPU + otherwise. + -n N Number of trials. + --n-data N_DATA Number of frames to use. + --timestep TIMESTEP MD timestep for ns/day esimation, in fs. Defauts to 1fs. diff --git a/docs/guide/FAQ.rst b/docs/errors/errors.rst similarity index 56% rename from docs/guide/FAQ.rst rename to docs/errors/errors.rst index 92ac758e..576e553d 100644 --- a/docs/guide/FAQ.rst +++ b/docs/errors/errors.rst @@ -1,14 +1,5 @@ -FAQ -=== - -How do I... ------------ - -... continue to train a model that reached a stopping condition? - There will be an answer here. - -1. Reload the model trained with version 0.3.3 to the code in 0.4. - check out the migration note at :ref:`migration_note`. +Errors +====== Common errors ------------- diff --git a/docs/faq/FAQ.rst b/docs/faq/FAQ.rst new file mode 100644 index 00000000..411e77c1 --- /dev/null +++ b/docs/faq/FAQ.rst @@ -0,0 +1,14 @@ +FAQ +=== + +How do I... +----------- + +... continue to train a model that reached a stopping condition? + There will be an answer here. + +1. Reload the model trained with version 0.3.3 to the code in 0.4. + check out the migration note at :ref:`migration_note`. + +2. Specify my dataset for `nequip-train` and `nequip-eval`, see :ref:`_dataset_note`. + diff --git a/docs/guide/guide.rst b/docs/guide/guide.rst deleted file mode 100644 index 6def3859..00000000 --- a/docs/guide/guide.rst +++ /dev/null @@ -1,9 +0,0 @@ -NequIP User Guide -================= - - .. toctree:: - - intro - irreps - conventions - FAQ \ No newline at end of file diff --git a/docs/guide/intro.rst b/docs/guide/intro.rst deleted file mode 100644 index 7afa4132..00000000 --- a/docs/guide/intro.rst +++ /dev/null @@ -1,4 +0,0 @@ -Tutorial: Introduction to NequIP -================================ - -TODO \ No newline at end of file diff --git a/docs/guide/irreps.rst b/docs/guide/irreps.rst deleted file mode 100644 index 5f9b2735..00000000 --- a/docs/guide/irreps.rst +++ /dev/null @@ -1,9 +0,0 @@ -Irreps -====== - -.. _Irreps: - -Syntax to specify irreps ------------------------- - -TODO: descripe irreps syntax here \ No newline at end of file diff --git a/docs/guide/conventions.rst b/docs/howto/conventions.rst similarity index 100% rename from docs/guide/conventions.rst rename to docs/howto/conventions.rst diff --git a/docs/howto/dataset.rst b/docs/howto/dataset.rst new file mode 100644 index 00000000..2b5267e7 --- /dev/null +++ b/docs/howto/dataset.rst @@ -0,0 +1,156 @@ +.. _dataset_note: + +How to prepare training dataset +=============================== + +What does NequIP behind the scene +--------------------------------- + +NequIP uses AtomicDataset class to store the atomic configurations. +During the initialization of an AtomicDataset object, +NequIP reads the atomic structures from the dataset, +computes the neighbor list and other data structures needed for the GNN +by converting raw data to a list of ``AtomicData`` objects. + +The computed results are then cached on harddisk ``root/processed_hashkey`` folder. +The hashing is based on all the metadata provided for the dataset, +which includes the file name, the cutoff radius, float number precision and etc. +In the case where multiple training/evaluation runs use the same dataset, +the neighbor list will only be computed in the first NequIP run. +The later runs will directly load the AtomicDataset object from the cache file to save computation time. + +Note: be careful to the cached file. If you update your raw data file but keep using the same filename, +NequIP will not automatically update the cached data. + +Key concepts +------------ + +fixed_fields +~~~~~~~~~~~~ +Fixed fields are the quantities that are shared among all the configurations in the dataset. +For example, if the dataset is a trajectory of an NVT MD simulation, the super cell size and the atomic species +are indeed a constant matrix/vector through out the whole dataset. +In this case, in stead of repeating the same values for many times, +we specify the cell and species as fixed fields and only provide them once. + +yaml interface +~~~~~~~~~~~~~~ +``nequip-train`` and ``nequip-evaluate`` automatically construct the AtomicDataset based on the yaml arguments. +Later sections offer a couple different examples. + +If the training and validation datasets are from different raw files, the arguments for each set +can be defined with ``dataset`` prefix and ``validation_dataset`` prefix, respectively. + +For example, ``dataset_file_name`` is used for training data and ``validation_dataset_file_name`` is for validation data. + +Python interface +~~~~~~~~~~~~~~~~ +See ``nequip.data.dataset.AtomicInMemoryDataset``. + +Prepare dataset and specify in yaml config +------------------------------------------ + +ASE format +~~~~~~~~~~ + +NequIP accept all format that can be parsed by `ase.io.read` function. +We recommend `extxyz`. + +Example: Given an atomic data stored in "H2.extxyz" that looks like below: + +.. code:: extxyz + + 2 + Properties=species:S:1:pos:R:3 energy=-10 user_label=2.0 pbc="F F F" + H 0.00000000 0.00000000 0.00000000 + H 0.00000000 0.00000000 1.02000000 + +The yaml input should be + +.. code:: yaml + + dataset: ase + dataset_file_name: H2.extxyz + ase_args: + format: extxyz + include_keys: + - user_label + key_mapping: + user_label: label0 + chemical_symbol_to_type: + H: 0 + +For other formats than `extxyz`, be careful to the ase parsers; they may have different behavior from the extxyz parser. +For example, the ase vasp parser store potential energy to `free_energy` instead of `energy`. +Because we optimize our code to the `extxyz` parser, NequIP will not be able to load any `total_energy` labels. +We need some additional keys to help NequIP to understand the situtaion +Here's an example for vasp outcar. + +.. code:: yaml + + dataset: ase + dataset_file_name: OUTCAR + ase_args: + format: vasp-out + key_mapping: + free_energy: total_energy + chemical_symbol_to_type: + H: 0 + +The way around is to use key mapping, please see more note below. + +NPZ format +~~~~~~~~~~ + +If your dataset constitute configurations that always have the same number of atoms, npz data format can be an option. + +In the npz file, all the values should have the same row as the number of the configurations. +For example, the force array of 36 atomic configurations of an N-atom system should have the shape of (36, N, 3); +their total_energy array should have the shape of (36). + +Below is an example of the yaml specification. + +.. code:: yaml + + dataset: npz + dataset_file_name: example.npz + include_keys: + - user_label1 + - user_label2 + npz_fixed_field_keys: + - cell + - atomic_numbers + key_mapping: + position: pos + force: forces + energy: total_energy + Z: atomic_numbers + + +Note on key mapping +~~~~~~~~~~~~~~~~~~~ + +NequIP has default key names for energy, force, cell (defined at nequip.data._keys) +Unlike in the ASE format where these information is automatically parsed, +in the npz data format, the correct key names have to be provided. +The common key names are: `total_energy`, `forces`, `atomic_numbers`, `pos`, `cell`, `pbc`. +the key_mapping can help to convert the user defined name (key) to NequIP default name (value). + + +Advanced options +---------------- + +skip frames during data processing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The `include_frame` argument can be specified in yaml to skip certain frames in the raw datafile. +The item has to be a list or a python iteratable object. + +register user-defined graph, node, edge fields +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Graph, node, edge fields are quantities that belong to +the whole graph, each atom, each edge, respectively. +Example graph fields include cell, pbc, and total_energy. +Example node fields include pos, forces + +To help NequIP to properly assemble the batch data, graph quantity other than +cell, pbc, total_energy should be registered. diff --git a/docs/howto/howto.rst b/docs/howto/howto.rst new file mode 100644 index 00000000..07e84e84 --- /dev/null +++ b/docs/howto/howto.rst @@ -0,0 +1,7 @@ +How-to Tutorials +================ + + .. toctree:: + + dataset + migrate diff --git a/docs/guide/migrate.rst b/docs/howto/migrate.rst similarity index 100% rename from docs/guide/migrate.rst rename to docs/howto/migrate.rst diff --git a/docs/index.rst b/docs/index.rst index dc6ecd43..d2edd1a6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,12 +9,20 @@ NequIP NequIP is an open-source package for creating, training, and using E(3)-equivariant machine learning interatomic potentials. .. toctree:: - :maxdepth: 3 + :maxdepth: 2 :caption: Contents: - guide/guide + introduction/intro + cite + installation/install + yaml/yaml + howto/howto + faq/FAQ + commandline/commands + lammps/all options/options api/nequip + errors/errors diff --git a/docs/installation/install.rst b/docs/installation/install.rst new file mode 100644 index 00000000..3e946815 --- /dev/null +++ b/docs/installation/install.rst @@ -0,0 +1,39 @@ +Installation +============ + +NequIP requires: + + * Python >= 3.6 + * PyTorch >= 1.8, <=1.11.*. PyTorch can be installed following the `instructions from their documentation `_. Note that neither ``torchvision`` nor ``torchaudio``, included in the default install command, are needed for NequIP. + +To install: + + * We use `Weights&Biases `_ to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account `here `_ and install the Python package:: + + pip install wandb + + * Install the latest stable NequIP:: + + pip install https://github.com/mir-group/nequip/archive/main.zip + +To install previous versions of NequIP, please clone the repository from GitHub and check out the appropriate tag (for example ``v0.3.3`` for version 0.3.3). + +To install the current **unstable** development version of NequIP, please clone our repository and check out the ``develop`` branch. + +Installation Issues +------------------- + +The easiest way to check if your installation is working is to train a _toy_ model:: + + nequip-train configs/minimal.yaml + +If you suspect something is wrong, encounter errors, or just want to confirm that everything is in working order, you can also run the unit tests:: + + pip install pytest + pytest tests/unit/ + +To run the full tests, including a set of longer/more intensive integration tests, run:: + + pytest tests/ + +If a GPU is present, the unit tests will use it. \ No newline at end of file diff --git a/docs/introduction/intro.rst b/docs/introduction/intro.rst new file mode 100644 index 00000000..e0dcc32c --- /dev/null +++ b/docs/introduction/intro.rst @@ -0,0 +1,4 @@ +Overview +======== + +TODO diff --git a/docs/lammps/all.rst b/docs/lammps/all.rst new file mode 100644 index 00000000..9faac07e --- /dev/null +++ b/docs/lammps/all.rst @@ -0,0 +1,7 @@ +Integration to LAMMPS, ASE +========================== + + .. toctree:: + + lammps + ase diff --git a/docs/lammps/ase.rst b/docs/lammps/ase.rst new file mode 100644 index 00000000..3729cde3 --- /dev/null +++ b/docs/lammps/ase.rst @@ -0,0 +1,2 @@ +ASE +=== diff --git a/docs/lammps/lammps.rst b/docs/lammps/lammps.rst new file mode 100644 index 00000000..f9d0ba9f --- /dev/null +++ b/docs/lammps/lammps.rst @@ -0,0 +1,2 @@ +LAMMPS +====== diff --git a/docs/options/dataset.rst b/docs/options/dataset.rst index 54b39fc9..f3ca194c 100644 --- a/docs/options/dataset.rst +++ b/docs/options/dataset.rst @@ -33,7 +33,7 @@ key_mapping | Type: dict | Default: ``{'positions': 'pos', 'energy': 'total_energy', 'force': 'forces', 'forces': 'forces', 'Z': 'atomic_numbers', 'atomic_number': 'atomic_numbers'}`` -npz_keys +include_keys ^^^^^^^^ | Type: list | Default: ``[]`` @@ -68,5 +68,11 @@ include_frames | Type: NoneType | Default: ``None`` +ase_args +^^^^^^^^ + | Type: dict + | Default: ``{}`` + Advanced --------- \ No newline at end of file +-------- +See tutorial on :ref:`../guide/_dataset_note`. diff --git a/docs/yaml/yaml.rst b/docs/yaml/yaml.rst new file mode 100644 index 00000000..fd804436 --- /dev/null +++ b/docs/yaml/yaml.rst @@ -0,0 +1,4 @@ +YAML input +========== + +TODO diff --git a/nequip/_version.py b/nequip/_version.py index 91faf40b..b02164d2 100644 --- a/nequip/_version.py +++ b/nequip/_version.py @@ -2,4 +2,4 @@ # See Python packaging guide # https://packaging.python.org/guides/single-sourcing-package-version/ -__version__ = "0.5.5" +__version__ = "0.5.6" diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 3f2e348b..728c260b 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -48,6 +48,7 @@ AtomicDataDict.EDGE_LENGTH_KEY, AtomicDataDict.EDGE_ATTRS_KEY, AtomicDataDict.EDGE_EMBEDDING_KEY, + AtomicDataDict.EDGE_FEATURES_KEY, } _DEFAULT_GRAPH_FIELDS: Set[str] = { AtomicDataDict.TOTAL_ENERGY_KEY, @@ -773,7 +774,7 @@ def neighbor_list_and_relative_vec( keep_edge = ~bad_edge if not np.any(keep_edge): raise ValueError( - "After eliminating self edges, no edges remain in this system." + f"Every single atom has no neighbors within the cutoff r_max={r_max} (after eliminating self edges, no edges remain in this system)" ) first_idex = first_idex[keep_edge] second_idex = second_idex[keep_edge] diff --git a/nequip/data/_keys.py b/nequip/data/_keys.py index c0535edd..54b66ce3 100644 --- a/nequip/data/_keys.py +++ b/nequip/data/_keys.py @@ -44,6 +44,7 @@ EDGE_ATTRS_KEY: Final[str] = "edge_attrs" # [n_edge, dim] invariant embedding of the edges EDGE_EMBEDDING_KEY: Final[str] = "edge_embedding" +EDGE_FEATURES_KEY: Final[str] = "edge_features" NODE_FEATURES_KEY: Final[str] = "node_features" NODE_ATTRS_KEY: Final[str] = "node_attrs" diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index 847b3795..c38b8eae 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -295,7 +295,13 @@ def process(self): # type conversion _process_dict(fixed_fields, ignore_fields=["r_max"]) - logging.info(f"Loaded data: {data}") + total_MBs = sum(item.numel() * item.element_size() for _, item in data) / ( + 1024 * 1024 + ) + logging.info( + f"Loaded data: {data}\n processed data size: ~{total_MBs:.2f} MB" + ) + del total_MBs # use atomic writes to avoid race conditions between # different trainings that use the same dataset @@ -635,7 +641,7 @@ class NpzDataset(AtomicInMemoryDataset): """Load data from an npz file. To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``include_keys``, - ``npz_fixed_fields`` or ``extra_fixed_fields``. + ``npz_fixed_fields_keys`` or ``extra_fixed_fields``. Args: key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys. Optional diff --git a/nequip/data/transforms.py b/nequip/data/transforms.py index f2c7ec32..4f6331b7 100644 --- a/nequip/data/transforms.py +++ b/nequip/data/transforms.py @@ -121,11 +121,13 @@ def transform(self, atomic_numbers): f"Data included atomic numbers {bad_set} that are not part of the atomic number -> type mapping!" ) - return self._Z_to_index[atomic_numbers - self._min_Z] + return self._Z_to_index.to(device=atomic_numbers.device)[ + atomic_numbers - self._min_Z + ] def untransform(self, atom_types): """Transform atom types back into atomic numbers""" - return self._index_to_Z[atom_types] + return self._index_to_Z[atom_types].to(device=atom_types.device) @property def has_chemical_symbols(self) -> bool: diff --git a/nequip/model/_build.py b/nequip/model/_build.py index 0fe4e21d..7e1a63fd 100644 --- a/nequip/model/_build.py +++ b/nequip/model/_build.py @@ -8,7 +8,10 @@ def model_from_config( - config, initialize: bool = False, dataset: Optional[AtomicDataset] = None + config, + initialize: bool = False, + dataset: Optional[AtomicDataset] = None, + deploy: bool = False, ) -> GraphModuleMixin: """Build a model based on `config`. @@ -17,11 +20,13 @@ def model_from_config( - ``model``: the model produced by the previous builder. Cannot be requested by the first builder, must be requested by subsequent ones. - ``initialize``: whether to initialize the model - ``dataset``: if ``initialize`` is True, the dataset + - ``deploy``: whether the model object is for deployment / inference Args: config - initialize (bool): if True (default False), ``model_initializers`` will also be run. + initialize (bool): whether ``model_builders`` should be instructed to initialize the model dataset: dataset for initializers if ``initialize`` is True. + deploy (bool): whether ``model_builders`` should be told the model is for deployment / inference Returns: The build model. @@ -61,6 +66,8 @@ def model_from_config( params = {} if "initialize" in pnames: params["initialize"] = initialize + if "deploy" in pnames: + params["deploy"] = deploy if "config" in pnames: params["config"] = config if "dataset" in pnames: diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py index f5554d25..8a7ffa46 100644 --- a/nequip/model/_scaling.py +++ b/nequip/model/_scaling.py @@ -12,7 +12,10 @@ def RescaleEnergyEtc( - model: GraphModuleMixin, config, dataset: AtomicDataset, initialize: bool + model: GraphModuleMixin, + config, + initialize: bool, + dataset: Optional[AtomicDataset] = None, ): return GlobalRescale( model=model, @@ -34,7 +37,6 @@ def RescaleEnergyEtc( def GlobalRescale( model: GraphModuleMixin, config, - dataset: AtomicDataset, initialize: bool, module_prefix: str, default_scale: Union[str, float, list], @@ -43,6 +45,7 @@ def GlobalRescale( default_shift_keys: list, default_related_scale_keys: list, default_related_shift_keys: list, + dataset: Optional[AtomicDataset] = None, ): """Add global rescaling for energy(-based quantities). @@ -75,11 +78,12 @@ def GlobalRescale( raise ValueError(f"Invalid global scale `{value}`") # = Compute shifts and scales = - computed_stats = _compute_stats( - str_names=str_names, - dataset=dataset, - stride=config.dataset_statistics_stride, - ) + if len(str_names) > 0: + computed_stats = _compute_stats( + str_names=str_names, + dataset=dataset, + stride=config.dataset_statistics_stride, + ) if isinstance(global_scale, str): s = global_scale @@ -129,8 +133,8 @@ def GlobalRescale( def PerSpeciesRescale( model: GraphModuleMixin, config, - dataset: AtomicDataset, initialize: bool, + dataset: Optional[AtomicDataset] = None, ): """Add global rescaling for energy(-based quantities). @@ -199,12 +203,13 @@ def PerSpeciesRescale( ], "Requested to set either the shifts or scales of the per_species_rescale using dataset values, but chose to provide the other in non-dataset units. Please give the explictly specified shifts/scales in dataset units and set per_species_rescale_arguments_in_dataset_units" # = Compute shifts and scales = - computed_stats = _compute_stats( - str_names=str_names, - dataset=dataset, - stride=config.dataset_statistics_stride, - kwargs=config.get(module_prefix + "_kwargs", {}), - ) + if len(str_names) > 0: + computed_stats = _compute_stats( + str_names=str_names, + dataset=dataset, + stride=config.dataset_statistics_stride, + kwargs=config.get(module_prefix + "_kwargs", {}), + ) if isinstance(scales, str): s = scales diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py index ffc13140..673f8ff0 100644 --- a/nequip/nn/_grad_output.py +++ b/nequip/nn/_grad_output.py @@ -315,10 +315,10 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: torch.cross(cell[:, 1, :], cell[:, 2, :], dim=1), ).unsqueeze(-1) stress = virial / volume.view(-1, 1, 1) - data[AtomicDataDict.STRESS_KEY] = stress data[AtomicDataDict.CELL_KEY] = orig_cell else: stress = self._empty # torchscript + data[AtomicDataDict.STRESS_KEY] = stress # see discussion in https://github.com/libAtoms/QUIP/issues/227 about sign convention # they say the standard convention is virial = -stress x volume diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py index 579e60ea..1deb0de2 100644 --- a/nequip/scripts/benchmark.py +++ b/nequip/scripts/benchmark.py @@ -3,6 +3,10 @@ import tempfile import itertools import time +import logging +import sys +import pdb +import traceback import torch from torch.utils.benchmark import Timer, Measurement @@ -11,9 +15,10 @@ from e3nn.util.jit import script from nequip.utils import Config +from nequip.utils.test import assert_AtomicData_equivariant from nequip.data import AtomicData, AtomicDataDict, dataset_from_config from nequip.model import model_from_config -from nequip.scripts.deploy import _compile_for_deploy +from nequip.scripts.deploy import _compile_for_deploy, load_deployed_model from nequip.scripts.train import default_config, check_code_version from nequip.utils._global_options import _set_global_options @@ -25,12 +30,23 @@ def main(args=None): ) ) parser.add_argument("config", help="configuration file") + parser.add_argument( + "--model", + help="A deployed model to load instead of building a new one from `config`. ", + type=str, + default=None, + ) parser.add_argument( "--profile", help="Profile instead of timing, creating and outputing a Chrome trace JSON to the given path.", type=str, default=None, ) + parser.add_argument( + "--equivariance-test", + help="test the model's equivariance on `--n-data` frames.", + action="store_true", + ) parser.add_argument( "--device", help="Device to run the model on. If not provided, defaults to CUDA if available and CPU otherwise.", @@ -55,11 +71,33 @@ def main(args=None): type=float, default=1, ) - - # TODO: option to show memory use + parser.add_argument( + "--no-compile", + help="Don't compile the model to TorchScript", + action="store_true", + ) + parser.add_argument( + "--memory-summary", + help="Print torch.cuda.memory_summary() after running the model", + action="store_true", + ) + parser.add_argument( + "--verbose", help="Logging verbosity level", type=str, default="error" + ) + parser.add_argument( + "--pdb", + help="Run model builders and model under debugger to easily drop to debugger to investigate errors.", + action="store_true", + ) # Parse the args args = parser.parse_args(args=args) + if args.pdb: + assert args.profile is None + + root_logger = logging.getLogger() + root_logger.setLevel(getattr(logging, args.verbose.upper())) + root_logger.handlers = [logging.StreamHandler(sys.stderr)] if args.device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -80,12 +118,12 @@ def main(args=None): print(f" loading dataset took {dataset_time:.4f}s") dataset_rng = torch.Generator() dataset_rng.manual_seed(config.get("dataset_seed", config.get("seed", 12345))) - datas = [ + datas_list = [ AtomicData.to_AtomicDataDict(dataset[i].to(device)) for i in torch.randperm(len(dataset), generator=dataset_rng)[: args.n_data] ] - n_atom: int = len(datas[0]["pos"]) - if not all(len(d["pos"]) == n_atom for d in datas): + n_atom: int = len(datas_list[0]["pos"]) + if not all(len(d["pos"]) == n_atom for d in datas_list): raise NotImplementedError( "nequip-benchmark does not currently handle benchmarking on data frames with variable number of atoms" ) @@ -97,7 +135,7 @@ def main(args=None): print(f" number of atoms: {n_atom}") print(f" number of types: {dataset.type_mapper.num_types}") print( - f" avg. num edges: {sum(d[AtomicDataDict.EDGE_INDEX_KEY].shape[1] for d in datas) / len(datas)}" + f" avg. num edges: {sum(d[AtomicDataDict.EDGE_INDEX_KEY].shape[1] for d in datas_list) / len(datas_list)}" ) avg_edges_per_atom = torch.mean( torch.cat( @@ -106,14 +144,14 @@ def main(args=None): d[AtomicDataDict.EDGE_INDEX_KEY][0], minlength=d[AtomicDataDict.POSITIONS_KEY].shape[0], ).float() - for d in datas + for d in datas_list ] ) ).item() print(f" avg. neigh/atom: {avg_edges_per_atom}") # cycle over the datas we loaded - datas = itertools.cycle(datas) + datas = itertools.cycle(datas_list) # short circut if args.n == 0: @@ -121,29 +159,64 @@ def main(args=None): return # Load model: - print("Building model... ") - model_time = time.time() - model = model_from_config(config, initialize=True, dataset=dataset) - model_time = time.time() - model_time - print(f" building model took {model_time:.4f}s") - print("Compile...") - # "Deploy" it + if args.model is None: + print("Building model... ") + model_time = time.time() + try: + model = model_from_config( + config, initialize=True, dataset=dataset, deploy=True + ) + except: # noqa: E722 + if args.pdb: + traceback.print_exc() + pdb.post_mortem() + else: + raise + model_time = time.time() - model_time + print(f" building model took {model_time:.4f}s") + else: + print("Loading model...") + model, metadata = load_deployed_model(args.model, device=device, freeze=False) + print(" deployed model has metadata:") + print( + "\n".join( + " %s: %s" % e for e in metadata.items() if e[0] != "config" + ) + ) + print(f" model has {sum(p.numel() for p in model.parameters())} weights") + print( + f" model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights" + ) + print( + f" model weights and buffers take {sum(p.numel() * p.element_size() for p in itertools.chain(model.parameters(), model.buffers())) / (1024 * 1024):.2f} MB" + ) + model.eval() - compile_time = time.time() - model = script(model) - model = _compile_for_deploy(model) - compile_time = time.time() - compile_time - print(f" compilation took {compile_time:.4f}s") - - # save and reload to avoid bugs - with tempfile.NamedTemporaryFile() as f: - torch.jit.save(model, f.name) - model = torch.jit.load(f.name, map_location=device) - # freeze like in the LAMMPS plugin - model = torch.jit.freeze(model) - # and reload again just to avoid bugs - torch.jit.save(model, f.name) - model = torch.jit.load(f.name, map_location=device) + if args.equivariance_test: + args.no_compile = True + if args.model is not None: + raise RuntimeError("Can't equivariance test a deployed model.") + + if args.no_compile: + model = model.to(device) + else: + print("Compile...") + # "Deploy" it + compile_time = time.time() + model = script(model) + model = _compile_for_deploy(model) + compile_time = time.time() - compile_time + print(f" compilation took {compile_time:.4f}s") + + # save and reload to avoid bugs + with tempfile.NamedTemporaryFile() as f: + torch.jit.save(model, f.name) + model = torch.jit.load(f.name, map_location=device) + # freeze like in the LAMMPS plugin + model = torch.jit.freeze(model) + # and reload again just to avoid bugs + torch.jit.save(model, f.name) + model = torch.jit.load(f.name, map_location=device) # Make sure we're warm past compilation warmup = config["_jit_bailout_depth"] + 4 # just to be safe... @@ -154,7 +227,7 @@ def trace_handler(p): p.export_chrome_trace(args.profile) print(f"Wrote profiling trace to `{args.profile}`") - print("Starting...") + print("Starting profiling...") with torch.profiler.profile( activities=[ torch.profiler.ProfilerActivity.CPU, @@ -168,6 +241,34 @@ def trace_handler(p): for _ in range(1 + warmup + args.n): model(next(datas).copy()) p.step() + elif args.pdb: + print("Running model under debugger...") + try: + for _ in range(args.n): + model(next(datas).copy()) + except: # noqa: E722 + traceback.print_exc() + pdb.post_mortem() + print("Done.") + elif args.equivariance_test: + print("Warmup...") + warmup_time = time.time() + for _ in range(warmup): + model(next(datas).copy()) + warmup_time = time.time() - warmup_time + print(f" {warmup} calls of warmup took {warmup_time:.4f}s") + print("Running equivariance test...") + errstr = assert_AtomicData_equivariant(model, datas_list) + print( + " Equivariance test passed; equivariance errors:\n" + " Errors are in real units, where relevant.\n" + " Please note that the large scale of the typical\n" + " shifts to the (atomic) energy can cause\n" + " catastrophic cancellation and give incorrectly\n" + " the equivariance error as zero for those fields.\n" + f"{errstr}" + ) + del errstr else: print("Warmup...") warmup_time = time.time() @@ -176,13 +277,17 @@ def trace_handler(p): warmup_time = time.time() - warmup_time print(f" {warmup} calls of warmup took {warmup_time:.4f}s") - print("Starting...") + print("Benchmarking...") # just time t = Timer( stmt="model(next(datas).copy())", globals={"model": model, "datas": datas} ) perloop: Measurement = t.timeit(args.n) + if args.memory_summary and torch.cuda.is_available(): + print("Memory usage summary:") + print(torch.cuda.memory_summary()) + print(" -- Results --") print( f"PLEASE NOTE: these are speeds for the MODEL, evaluated on --n-data={args.n_data} configurations kept in memory." diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index 8185ab75..394c0005 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -9,6 +9,7 @@ import pathlib import logging import yaml +import itertools # This is a weird hack to avoid Intel MKL issues on the cluster when this is called as a subprocess of a process that has itself initialized PyTorch. # Since numpy gets imported later anyway for dataset stuff, this shouldn't affect performance. @@ -129,7 +130,7 @@ def load_deployed_model( def main(args=None): parser = argparse.ArgumentParser( - description="Create and view information about deployed NequIP potentials." + description="Deploy and view information about previously deployed NequIP models." ) # backward compat for 3.6 if sys.version_info[1] > 6: @@ -146,6 +147,11 @@ def main(args=None): help="Path to a deployed model file.", type=pathlib.Path, ) + info_parser.add_argument( + "--print-config", + help="Print the full config of the model.", + action="store_true", + ) build_parser = subparsers.add_parser("build", help="Build a deployment model") build_parser.add_argument( @@ -169,13 +175,25 @@ def main(args=None): logging.basicConfig(level=getattr(logging, args.verbose.upper())) if args.command == "info": - model, metadata = load_deployed_model(args.model_path, set_global_options=False) - del model + model, metadata = load_deployed_model( + args.model_path, set_global_options=False, freeze=False + ) config = metadata.pop(CONFIG_KEY) - metadata_str = "\n".join(" %s: %s" % e for e in metadata.items()) - logging.info(f"Loaded TorchScript model with metadata:\n{metadata_str}\n") - logging.info("Model was built with config:") - print(config) + if args.print_config: + print(config) + else: + metadata_str = "\n".join(" %s: %s" % e for e in metadata.items()) + logging.info(f"Loaded TorchScript model with metadata:\n{metadata_str}\n") + logging.info( + f"Model has {sum(p.numel() for p in model.parameters())} weights" + ) + logging.info( + f"Model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights" + ) + logging.info( + f"Model weights and buffers take {sum(p.numel() * p.element_size() for p in itertools.chain(model.parameters(), model.buffers())) / (1024 * 1024):.2f} MB" + ) + logging.debug(f"Model had config:\n{config}") elif args.command == "build": if args.model and args.train_dir: @@ -198,7 +216,7 @@ def main(args=None): args.train_dir, model_name="best_model.pth", device="cpu" ) elif args.model is not None: - model = model_from_config(config) + model = model_from_config(config, deploy=True) else: raise AssertionError diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index d67d750f..f7dfa12b 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -30,13 +30,13 @@ def main(args=None, running_as_script: bool = True): description=textwrap.dedent( """Compute the error of a model on a test set using various metrics. - The model, metrics, dataset, etc. can specified individually, or a training session can be indicated with `--train-dir`. + The model, metrics, dataset, etc. can specified in individual YAML config files, or a training session can be indicated with `--train-dir`. In order of priority, the global settings (dtype, TensorFloat32, etc.) are taken from: - 1. The model config (for a training session) - 2. The dataset config (for a deployed model) - 3. The defaults + (1) the model config (for a training session), + (2) the dataset config (for a deployed model), + or (3) the defaults. - Prints only the final result in `name = num` format to stdout; all other information is logging.debuged to stderr. + Prints only the final result in `name = num` format to stdout; all other information is `logging.debug`ed to stderr. WARNING: Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic. """ @@ -74,7 +74,7 @@ def main(args=None, running_as_script: bool = True): ) parser.add_argument( "--batch-size", - help="Batch size to use. Larger is usually faster on GPU. If you run out of memory, lower this.", + help="Batch size to use. Larger is usually faster on GPU. If you run out of memory, lower this. You can also try to raise this for faster evaluation. Default: 50.", type=int, default=50, ) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index c6aa7785..88b55f7e 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -81,8 +81,12 @@ def main(args=None, running_as_script: bool = True): def parse_command_line(args=None): - parser = argparse.ArgumentParser(description="Train a NequIP model.") - parser.add_argument("config", help="configuration file") + parser = argparse.ArgumentParser( + description="Train (or restart training of) a NequIP model." + ) + parser.add_argument( + "config", help="YAML file configuring the model, dataset, and other options" + ) parser.add_argument( "--equivariance-test", help="test the model's equivariance before training on n (default 1) random frames from the dataset", diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 7ce4d4e1..55efec32 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -706,6 +706,9 @@ def init(self): self.num_weights = sum(p.numel() for p in self.model.parameters()) self.logger.info(f"Number of weights: {self.num_weights}") + self.logger.info( + f"Number of trainable weights: {sum(p.numel() for p in self.model.parameters() if p.requires_grad)}" + ) self.rescale_layers = [] outer_layer = self.model @@ -1177,7 +1180,9 @@ def set_dataset( if self.n_train > len(dataset): raise ValueError("Not enough data in dataset for requested n_train") if self.n_val > len(validation_dataset): - raise ValueError("Not enough data in dataset for requested n_train") + raise ValueError( + "Not enough data in validation dataset for requested n_val" + ) if self.train_val_split == "random": self.train_idcs = torch.randperm( len(dataset), generator=self.dataset_rng diff --git a/nequip/utils/git.py b/nequip/utils/git.py index a78a87fc..a5fbe7f3 100644 --- a/nequip/utils/git.py +++ b/nequip/utils/git.py @@ -8,7 +8,14 @@ def get_commit(module: str) -> Optional[str]: module = import_module(module) - path = str(Path(module.__file__).parents[0] / "..") + package = Path(module.__file__).parents[0] + if package.is_file(): + # We're installed as a ZIP .egg file, + # which means there's no git information + # and looking for the parent would fail anyway + # https://github.com/mir-group/nequip/issues/264 + return None + path = str(package / "..") retcode = subprocess.run( "git show --oneline --abbrev=40 -s".split(), diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py index 3d23cf84..76d140bc 100644 --- a/nequip/utils/regressor.py +++ b/nequip/utils/regressor.py @@ -1,181 +1,76 @@ import logging import torch -import numpy as np -from typing import Optional -from sklearn.gaussian_process import GaussianProcessRegressor -from sklearn.gaussian_process.kernels import DotProduct, Kernel, Hyperparameter +from torch import matmul +from torch.linalg import solve, inv +from typing import Optional, Sequence +from opt_einsum import contract -def solver(X, y, regressor: Optional[str] = "NormalizedGaussianProcess", **kwargs): - if regressor == "GaussianProcess": - return gp(X, y, **kwargs) - elif regressor == "NormalizedGaussianProcess": - return normalized_gp(X, y, **kwargs) - else: - raise NotImplementedError(f"{regressor} is not implemented") +def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kwargs): + # results are in the same "units" as y, so same dtype too: + dtype_out = y.dtype + # always solve in float64 for numerical stability + dtype = torch.float64 + X = X[::stride].to(dtype) + y = y[::stride].to(dtype) + + X, y = down_sampling_by_composition(X, y) + + X_norm = torch.sum(X) + + X = X / X_norm + y = y / X_norm -def normalized_gp(X, y, **kwargs): - feature_rms = 1.0 / np.sqrt(np.average(X**2, axis=0)) - feature_rms = np.nan_to_num(feature_rms, 1) y_mean = torch.sum(y) / torch.sum(X) - mean, std = base_gp( - X, - y - (torch.sum(X, axis=1) * y_mean).reshape(y.shape), - NormalizedDotProduct, - {"diagonal_elements": feature_rms}, - **kwargs, - ) - return mean + y_mean, std + feature_rms = torch.sqrt(torch.mean(X**2, axis=0)) -def gp(X, y, **kwargs): - return base_gp( - X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, **kwargs - ) + alpha_mat = torch.diag(feature_rms) * alpha * alpha + + A = matmul(X.T, X) + alpha_mat + dy = y - (torch.sum(X, axis=1, keepdim=True) * y_mean).reshape(y.shape) + Xy = matmul(X.T, dy) + + mean = solve(A, Xy) + + sigma2 = torch.var(matmul(X, mean) - dy) + Ainv = inv(A) + cov = torch.sqrt(sigma2 * contract("ij,kj,kl,li->i", Ainv, X, X, Ainv)) + mean = mean + y_mean.reshape([-1]) -def base_gp( - X, - y, - kernel, - kernel_kwargs, - alpha: Optional[float] = 0.1, - max_iteration: int = 20, - stride: Optional[int] = None, + logging.debug(f"Ridge Regression, residue {sigma2}") + + return mean.to(dtype_out), cov.to(dtype_out) + + +def down_sampling_by_composition( + X: torch.Tensor, y: torch.Tensor, percentage: Sequence = [0.25, 0.5, 0.75] ): - if len(y.shape) == 1: - y = y.reshape([-1, 1]) - - if stride is not None: - X = X[::stride] - y = y[::stride] - - not_fit = True - iteration = 0 - mean = None - std = None - while not_fit: - logging.debug(f"GP fitting iteration {iteration} {alpha}") - try: - _kernel = kernel(**kernel_kwargs) - gpr = GaussianProcessRegressor(kernel=_kernel, random_state=0, alpha=alpha) - gpr = gpr.fit(X, y) - - vec = torch.diag(torch.ones(X.shape[1])) - mean, std = gpr.predict(vec, return_std=True) - - mean = torch.as_tensor(mean, dtype=torch.get_default_dtype()).reshape([-1]) - # ignore all the off-diagonal terms - std = torch.as_tensor(std, dtype=torch.get_default_dtype()).reshape([-1]) - likelihood = gpr.log_marginal_likelihood() - - res = torch.sqrt( - torch.square(torch.matmul(X, mean.reshape([-1, 1])) - y).mean() - ) - - logging.debug( - f"GP fitting: alpha {alpha}:\n" - f" residue {res}\n" - f" mean {mean} std {std}\n" - f" log marginal likelihood {likelihood}" - ) - not_fit = False - - except Exception as e: - logging.info(f"GP fitting failed for alpha={alpha} and {e.args}") - if alpha == 0 or alpha is None: - logging.info("try a non-zero alpha") - not_fit = False - raise ValueError( - f"Please set the {alpha} to non-zero value. \n" - "The dataset energy is rank deficient to be solved with GP" - ) - else: - alpha = alpha * 2 - iteration += 1 - logging.debug(f" increase alpha to {alpha}") - - if iteration >= max_iteration or not_fit is False: - raise ValueError( - "Please set the per species shift and scale to zeros and ones. \n" - "The dataset energy is to diverge to be solved with GP" - ) - - return mean, std - - -class NormalizedDotProduct(Kernel): - r"""Dot-Product kernel. - .. math:: - k(x_i, x_j) = x_i \cdot A \cdot x_j - """ - - def __init__(self, diagonal_elements): - # TO DO: check shape - self.diagonal_elements = diagonal_elements - self.A = np.diag(diagonal_elements) - - def __call__(self, X, Y=None, eval_gradient=False): - """Return the kernel k(X, Y) and optionally its gradient. - Parameters - ---------- - X : ndarray of shape (n_samples_X, n_features) - Left argument of the returned kernel k(X, Y) - Y : ndarray of shape (n_samples_Y, n_features), default=None - Right argument of the returned kernel k(X, Y). If None, k(X, X) - if evaluated instead. - eval_gradient : bool, default=False - Determines whether the gradient with respect to the log of - the kernel hyperparameter is computed. - Only supported when Y is None. - Returns - ------- - K : ndarray of shape (n_samples_X, n_samples_Y) - Kernel k(X, Y) - K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\ - optional - The gradient of the kernel k(X, X) with respect to the log of the - hyperparameter of the kernel. Only returned when `eval_gradient` - is True. - """ - X = np.atleast_2d(X) - if Y is None: - K = (X.dot(self.A)).dot(X.T) - else: - if eval_gradient: - raise ValueError("Gradient can only be evaluated when Y is None.") - K = (X.dot(self.A)).dot(Y.T) - - if eval_gradient: - return K, np.empty((X.shape[0], X.shape[0], 0)) - else: - return K - - def diag(self, X): - """Returns the diagonal of the kernel k(X, X). - The result of this method is identical to np.diag(self(X)); however, - it can be evaluated more efficiently since only the diagonal is - evaluated. - Parameters - ---------- - X : ndarray of shape (n_samples_X, n_features) - Left argument of the returned kernel k(X, Y). - Returns - ------- - K_diag : ndarray of shape (n_samples_X,) - Diagonal of kernel k(X, X). - """ - return np.einsum("ij,ij,jj->i", X, X, self.A) - - def __repr__(self): - return "" - - def is_stationary(self): - """Returns whether the kernel is stationary.""" - return False - - @property - def hyperparameter_diagonal_elements(self): - return Hyperparameter("diagonal_elements", "numeric", "fixed") + unique_comps, comp_ids = torch.unique(X, dim=0, return_inverse=True) + + n_types = torch.max(comp_ids) + 1 + + sort_by = torch.argsort(comp_ids) + + # find out the block for each composition + d_icomp = comp_ids[sort_by] + d_icomp = d_icomp[:-1] - d_icomp[1:] + node_icomp = torch.where(d_icomp != 0)[0] + id_start = torch.cat((torch.as_tensor([0]), node_icomp + 1)) + id_end = torch.cat((node_icomp + 1, torch.as_tensor([len(sort_by)]))) + + n_points = len(percentage) + new_X = torch.zeros( + (n_types * n_points, X.shape[1]), dtype=X.dtype, device=X.device + ) + new_y = torch.zeros((n_types * n_points), dtype=y.dtype, device=y.device) + for i in range(n_types): + ids = sort_by[id_start[i] : id_end[i]] + for j, p in enumerate(percentage): + new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear") + new_X[i * n_points + j] = unique_comps[i] + + return new_X, new_y diff --git a/nequip/utils/test.py b/nequip/utils/test.py index edf2c1e8..60e68730 100644 --- a/nequip/utils/test.py +++ b/nequip/utils/test.py @@ -28,7 +28,7 @@ def assert_permutation_equivariant( data_in: AtomicDataDict.Type, tolerance: Optional[float] = None, raise_error: bool = True, -): +) -> str: r"""Test the permutation equivariance of ``func``. Standard fields are assumed to be equivariant to node or edge permutations according to their standard interpretions; all other fields are assumed to be invariant to all permutations. Non-standard fields can be registered as node/edge permutation equivariant using ``register_fields``. @@ -93,38 +93,42 @@ def assert_permutation_equivariant( out_perm.keys() ), "Permutation changed the set of fields returned by model" - problems = [] + messages = [] + num_problems: int = 0 for k in out_orig.keys(): if k in node_permute_fields: - if not torch.allclose(out_orig[k][node_perm], out_perm[k], atol=atol): - err = (out_orig[k][node_perm] - out_perm[k]).abs().max() - problems.append( - f"node permutation equivariance violated for field {k}; maximum componentwise error: {err:e}" - ) + err = (out_orig[k][node_perm] - out_perm[k]).abs().max() + fail = not torch.allclose(out_orig[k][node_perm], out_perm[k], atol=atol) + if fail: + num_problems += 1 + messages.append( + f" node permutation equivariance of field {k:20} -> max error={err:.3e}{' FAIL' if fail else ''}" + ) elif k in edge_permute_fields: - if not torch.allclose(out_orig[k][edge_perm], out_perm[k], atol=atol): - err = (out_orig[k][edge_perm] - out_perm[k]).abs().max() - problems.append( - f"edge permutation equivariance violated for field {k}; maximum componentwise error: {err:e}" - ) + err = (out_orig[k][edge_perm] - out_perm[k]).abs().max() + fail = not torch.allclose(out_orig[k][edge_perm], out_perm[k], atol=atol) + if fail: + num_problems += 1 + messages.append( + f" edge permutation equivariance of field {k:20} -> max error={err:.3e}{' FAIL' if fail else ''}" + ) elif k == AtomicDataDict.EDGE_INDEX_KEY: pass else: # Assume invariant if out_orig[k].dtype == torch.bool: - if not torch.all(out_orig[k] == out_perm[k]): - problems.append( - f"edge/node permutation invariance violated for field {k} ({k} was assumed to be invariant, should it have been marked as equivariant?)" - ) + err = (out_orig[k] != out_perm[k]).max() else: - if not torch.allclose(out_orig[k], out_perm[k], atol=atol): - err = (out_orig[k] - out_perm[k]).abs().max() - problems.append( - f"edge/node permutation invariance violated for field {k}; maximum componentwise error: {err:e}. (`{k}` was assumed to be invariant, should it have been marked as equivariant?)" - ) - msg = "\n".join(problems) - if len(problems) == 0: - return + err = (out_orig[k] - out_perm[k]).abs().max() + fail = not torch.allclose(out_orig[k], out_perm[k], atol=atol) + if fail: + num_problems += 1 + messages.append( + f" edge & node permutation invariance for field {k:20} -> max error={err:.3e}{' FAIL' if fail else ''}" + ) + msg = "\n".join(messages) + if num_problems == 0: + return msg else: if raise_error: raise AssertionError(msg) @@ -169,7 +173,7 @@ def assert_AtomicData_equivariant( # == Test permutation of graph nodes == # since permutation is discrete and should not be data dependent, run only on one frame. - permutation_problems = assert_permutation_equivariant( + permutation_message = assert_permutation_equivariant( func, data_in[0], tolerance=permutation_tolerance, raise_error=False ) @@ -255,53 +259,23 @@ def wrapper(*args): if o3_tolerance is None: o3_tolerance = FLOAT_TOLERANCE[torch.get_default_dtype()] - anerr = next(iter(errs.values())) - if isinstance(anerr, float) or anerr.ndim == 0: - # old e3nn doesn't report which key - problems = {k: v for k, v in errs.items() if v > o3_tolerance} - - def _describe(errors): - return ( - permutation_problems + "\n" if permutation_problems is not None else "" - ) + "\n".join( - "(parity_k={:d}, did_translate={}) -> max error={:.3e}".format( - int(k[0]), - bool(k[1]), - float(v), - ) - for k, v in errors.items() - ) - - if len(problems) > 0 or permutation_problems is not None: - raise AssertionError( - "Equivariance test failed for cases:" + _describe(problems) - ) - - return _describe(errs) - else: - # it's newer and tells us which is which - all_errs = [] - for case, err in errs.items(): - for key, this_err in zip(irreps_out.keys(), err): - all_errs.append(case + (key, this_err)) - problems = [e for e in all_errs if e[-1] > o3_tolerance] - - def _describe(errors): - return ( - permutation_problems + "\n" if permutation_problems is not None else "" - ) + "\n".join( - " (parity_k={:1d}, did_translate={:5}, field={:20}) -> max error={:.3e}".format( - int(k[0]), str(bool(k[1])), str(k[2]), float(k[3]) - ) - for k in errors - ) + all_errs = [] + for case, err in errs.items(): + for key, this_err in zip(irreps_out.keys(), err): + all_errs.append(case + (key, this_err)) + is_problem = [e[-1] > o3_tolerance for e in all_errs] + + message = (permutation_message + "\n") + "\n".join( + " (parity_k={:1d}, did_translate={:5}, field={:20}) -> max error={:.3e}".format( + int(k[0]), str(bool(k[1])), str(k[2]), float(k[3]) + ) + for k, prob in zip(all_errs, is_problem) + ) - if len(problems) > 0 or permutation_problems is not None: - raise AssertionError( - "Equivariance test failed for cases:\n" + _describe(problems) - ) + if sum(is_problem) > 0 or "FAIL" in permutation_message: + raise AssertionError(f"Equivariance test failed for cases:\n{message}") - return _describe(all_errs) + return message _DEBUG_HOOKS = None diff --git a/nequip/utils/unittests/__init__.py b/nequip/utils/unittests/__init__.py new file mode 100644 index 00000000..2309cb02 --- /dev/null +++ b/nequip/utils/unittests/__init__.py @@ -0,0 +1,3 @@ +import pathlib + +CONFTEST_PATH = pathlib.Path(__file__).parent / "conftest.py" diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py new file mode 100644 index 00000000..4cfa98ff --- /dev/null +++ b/nequip/utils/unittests/conftest.py @@ -0,0 +1,154 @@ +from typing import List, Tuple +import numpy as np +import pathlib +import pytest +import tempfile +import os + +from ase.atoms import Atoms +from ase.build import molecule +from ase.calculators.singlepoint import SinglePointCalculator +from ase.io import write + +import torch + +from nequip.utils.test import set_irreps_debug +from nequip.data import AtomicData, ASEDataset +from nequip.data.transforms import TypeMapper +from nequip.utils.torch_geometric import Batch +from nequip.utils._global_options import _set_global_options +from nequip.utils.misc import dtype_from_name + +if "NEQUIP_NUM_TASKS" not in os.environ: + # Test parallelization, but don't waste time spawning tons of workers if lots of cores available + os.environ["NEQUIP_NUM_TASKS"] = "2" + +# The default float tolerance +FLOAT_TOLERANCE = { + t: torch.as_tensor(v, dtype=dtype_from_name(t)) + for t, v in {"float32": 1e-3, "float64": 1e-10}.items() +} + + +@pytest.fixture(scope="session", autouse=True, params=["float32", "float64"]) +def float_tolerance(request): + """Run all tests with various PyTorch default dtypes. + + This is a session-wide, autouse fixture — you only need to request it explicitly if a test needs to know the tolerance for the current default dtype. + + Returns + -------- + A precision threshold to use for closeness tests. + """ + old_dtype = torch.get_default_dtype() + dtype = request.param + _set_global_options({"default_dtype": dtype}) + yield FLOAT_TOLERANCE[dtype] + _set_global_options( + { + "default_dtype": {torch.float32: "float32", torch.float64: "float64"}[ + old_dtype + ] + } + ) + + +# - Ampere and TF32 - +# Many of the tests for NequIP involve numerically checking +# algebraic properties— normalization, equivariance, +# continuity, etc. +# With the added numerical noise of TF32, some of those tests fail +# with the current (and usually generous) thresholds. +# +# Thus we go on the assumption that PyTorch + NVIDIA got everything +# right, that this setting DOES NOT AFFECT the model outputs except +# for increased numerical noise, and only test without it. +# +# TODO: consider running tests with and without +# TODO: check how much thresholds have to be changed to accomidate TF32 +torch.backends.cuda.matmul.allow_tf32 = False +torch.backends.cudnn.allow_tf32 = False + + +@pytest.fixture(scope="session") +def BENCHMARK_ROOT(): + return pathlib.Path(__file__).parent / "../benchmark_data/" + + +@pytest.fixture(scope="session") +def temp_data(float_tolerance): + with tempfile.TemporaryDirectory() as tmpdirname: + yield tmpdirname + + +@pytest.fixture(scope="session") +def CH3CHO(CH3CHO_no_typemap) -> Tuple[Atoms, AtomicData]: + atoms, data = CH3CHO_no_typemap + tm = TypeMapper(chemical_symbol_to_type={"C": 0, "O": 1, "H": 2}) + data = tm(data) + return atoms, data + + +@pytest.fixture(scope="session") +def CH3CHO_no_typemap(float_tolerance) -> Tuple[Atoms, AtomicData]: + atoms = molecule("CH3CHO") + data = AtomicData.from_ase(atoms, r_max=2.0) + return atoms, data + + +@pytest.fixture(scope="session") +def molecules() -> List[Atoms]: + atoms_list = [] + for i in range(8): + atoms = molecule("CH3CHO" if i % 2 == 0 else "H2") + atoms.rattle() + atoms.calc = SinglePointCalculator( + energy=np.random.random(), + forces=np.random.random((len(atoms), 3)), + stress=None, + magmoms=None, + atoms=atoms, + ) + atoms_list.append(atoms) + return atoms_list + + +@pytest.fixture(scope="session") +def nequip_dataset(molecules, temp_data, float_tolerance): + with tempfile.NamedTemporaryFile(suffix=".xyz") as fp: + for atoms in molecules: + write(fp.name, atoms, format="extxyz", append=True) + a = ASEDataset( + file_name=fp.name, + root=temp_data, + extra_fixed_fields={"r_max": 3.0}, + ase_args=dict(format="extxyz"), + type_mapper=TypeMapper(chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}), + ) + yield a + + +@pytest.fixture(scope="session") +def atomic_batch(nequip_dataset): + return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]]) + + +@pytest.fixture(scope="function") +def per_species_set(): + dtype = torch.get_default_dtype() + rng = torch.Generator().manual_seed(127) + mean_min = 1 + mean_max = 100 + std = 20 + n_sample = 1000 + n_species = 9 + ref_mean = torch.rand((n_species), generator=rng) * (mean_max - mean_min) + mean_min + t_mean = torch.ones((n_sample, 1)) * ref_mean.reshape([1, -1]) + ref_std = torch.rand((n_species), generator=rng) * std + t_std = torch.ones((n_sample, 1)) * ref_std.reshape([1, -1]) + E = torch.normal(t_mean, t_std, generator=rng) + return ref_mean.to(dtype), ref_std.to(dtype), E.to(dtype), n_sample, n_species + + +# Use debug mode +set_irreps_debug(True) diff --git a/tests/unit/model/test_eng_force.py b/nequip/utils/unittests/model_tests.py similarity index 56% rename from tests/unit/model/test_eng_force.py rename to nequip/utils/unittests/model_tests.py index 0adcd4c9..2b6a8b63 100644 --- a/tests/unit/model/test_eng_force.py +++ b/nequip/utils/unittests/model_tests.py @@ -1,150 +1,87 @@ import pytest -import logging import tempfile import functools import torch import numpy as np -from e3nn import o3 from e3nn.util.jit import script -from nequip.data import AtomicDataDict, AtomicData, Collater +from nequip.data import ( + AtomicDataDict, + AtomicData, + Collater, + _GRAPH_FIELDS, + _NODE_FIELDS, + _EDGE_FIELDS, +) from nequip.data.transforms import TypeMapper -from nequip.model import model_from_config, uniform_initialize_FCs -from nequip.nn import GraphModuleMixin, AtomwiseLinear +from nequip.model import model_from_config +from nequip.nn import GraphModuleMixin from nequip.utils.test import assert_AtomicData_equivariant -logging.basicConfig(level=logging.DEBUG) - -COMMON_CONFIG = { - "num_types": 3, - "types_names": ["H", "C", "O"], - "avg_num_neighbors": None, -} -r_max = 3 -minimal_config1 = dict( - irreps_edge_sh="0e + 1o", - r_max=4, - feature_irreps_hidden="4x0e + 4x1o", - num_layers=2, - num_basis=8, - PolynomialCutoff_p=6, - nonlinearity_type="norm", - **COMMON_CONFIG -) -minimal_config2 = dict( - irreps_edge_sh="0e + 1o", - r_max=4, - chemical_embedding_irreps_out="8x0e + 8x0o + 8x1e + 8x1o", - irreps_mid_output_block="2x0e", - feature_irreps_hidden="4x0e + 4x1o", - **COMMON_CONFIG -) -minimal_config3 = dict( - irreps_edge_sh="0e + 1o", - r_max=4, - feature_irreps_hidden="4x0e + 4x1o", - num_layers=2, - num_basis=8, - PolynomialCutoff_p=6, - nonlinearity_type="gate", - **COMMON_CONFIG -) -minimal_config4 = dict( - irreps_edge_sh="0e + 1o + 2e", - r_max=4, - feature_irreps_hidden="2x0e + 2x1o + 2x2e", - num_layers=2, - num_basis=3, - PolynomialCutoff_p=6, - nonlinearity_type="gate", - # test custom nonlinearities - nonlinearity_scalars={"e": "silu", "o": "tanh"}, - nonlinearity_gates={"e": "silu", "o": "abs"}, - **COMMON_CONFIG -) - - -@pytest.fixture( - scope="module", - params=[minimal_config1, minimal_config2, minimal_config3, minimal_config4], -) -def config(request): - return request.param +# see https://github.com/pytest-dev/pytest/issues/421#issuecomment-943386533 +# to allow external packages to import tests through subclassing +class BaseModelTests: + @pytest.fixture(scope="class") + def config(self): + """Implemented by subclasses. + Return a tuple of config, out_field + """ + raise NotImplementedError -@pytest.fixture( - params=[ - ( - ["EnergyModel", "ForceOutput"], - AtomicDataDict.FORCE_KEY, + @pytest.fixture( + scope="class", + params=( + [torch.device("cuda"), torch.device("cpu")] + if torch.cuda.is_available() + else [torch.device("cpu")] ), - ( - ["EnergyModel"], - AtomicDataDict.TOTAL_ENERGY_KEY, - ), - ( - ["EnergyModel", "StressForceOutput"], - AtomicDataDict.STRESS_KEY, - ), - ] -) -def model(request, config): - torch.manual_seed(0) - np.random.seed(0) - builder, out_field = request.param - config = config.copy() - config["model_builders"] = builder - return model_from_config(config), out_field - - -@pytest.fixture( - scope="module", - params=( - [torch.device("cuda"), torch.device("cpu")] - if torch.cuda.is_available() - else [torch.device("cpu")] - ), -) -def device(request): - return request.param - + ) + def device(self, request): + return request.param + + @staticmethod + def make_model(config, device, initialize: bool = True, deploy: bool = False): + torch.manual_seed(127) + np.random.seed(193) + config = config.copy() + config.update( + { + "num_types": 3, + "types_names": ["H", "C", "O"], + } + ) + model = model_from_config(config, initialize=initialize, deploy=deploy) + model = model.to(device) + return model -class TestWorkflow: - """ - test class methods - """ + @pytest.fixture(scope="class") + def model(self, config, device): + config, out_fields = config + model = self.make_model(config, device=device) + return model, out_fields + # == common tests for all models == def test_init(self, model): instance, _ = model assert isinstance(instance, GraphModuleMixin) - def test_weight_init(self, model, atomic_batch, device): - instance, out_field = model - data = AtomicData.to_AtomicDataDict(atomic_batch.to(device=device)) - instance = instance.to(device=device) - - out_orig = instance(data)[out_field] - - instance = uniform_initialize_FCs(instance, initialize=True) - - out_unif = instance(data)[out_field] - assert not torch.allclose(out_orig, out_unif) - def test_jit(self, model, atomic_batch, device): - instance, out_field = model + instance, out_fields = model data = AtomicData.to_AtomicDataDict(atomic_batch.to(device=device)) instance = instance.to(device=device) model_script = script(instance) - assert torch.allclose( - instance(data)[out_field], - model_script(data)[out_field], - atol=1e-6, - ) + for out_field in out_fields: + assert torch.allclose( + instance(data)[out_field], + model_script(data)[out_field], + atol=1e-6, + ) # - Try saving, loading in another process, and running - with tempfile.TemporaryDirectory() as tmpdir: @@ -163,39 +100,25 @@ def test_jit(self, model, atomic_batch, device): torch.float64: 1e-10, }[torch.get_default_dtype()] - assert torch.allclose( - model_script(data)[out_field], - load_model(load_dat)[out_field], - atol=atol, - ) - - def test_submods(self): - config = minimal_config2.copy() - config["model_builders"] = ["EnergyModel"] - model = model_from_config(config=config, initialize=True) - assert isinstance(model.chemical_embedding, AtomwiseLinear) - true_irreps = o3.Irreps(minimal_config2["chemical_embedding_irreps_out"]) - assert ( - model.chemical_embedding.irreps_out[model.chemical_embedding.out_field] - == true_irreps - ) - # Make sure it propagates - assert ( - model.layer0_convnet.irreps_in[model.chemical_embedding.out_field] - == true_irreps - ) + for out_field in out_fields: + assert torch.allclose( + model_script(data)[out_field], + load_model(load_dat)[out_field], + atol=atol, + ) def test_forward(self, model, atomic_batch, device): - instance, out_field = model + instance, out_fields = model instance.to(device) data = atomic_batch.to(device) output = instance(AtomicData.to_AtomicDataDict(data)) - assert out_field in output + for out_field in out_fields: + assert out_field in output def test_batch(self, model, atomic_batch, device, float_tolerance): """Confirm that the results for individual examples are the same regardless of whether they are batched.""" allclose = functools.partial(torch.allclose, atol=float_tolerance) - instance, out_field = model + instance, out_fields = model instance.to(device) data = atomic_batch.to(device) data1 = data.get_example(0) @@ -203,33 +126,186 @@ def test_batch(self, model, atomic_batch, device, float_tolerance): output1 = instance(AtomicData.to_AtomicDataDict(data1)) output2 = instance(AtomicData.to_AtomicDataDict(data2)) output = instance(AtomicData.to_AtomicDataDict(data)) - if out_field in (AtomicDataDict.TOTAL_ENERGY_KEY, AtomicDataDict.STRESS_KEY): - assert allclose( - output1[out_field], - output[out_field][0], - ) - assert allclose( - output2[out_field], - output[out_field][1], - ) - elif out_field in (AtomicDataDict.FORCE_KEY,): - assert allclose( - output1[out_field], - output[out_field][output[AtomicDataDict.BATCH_KEY] == 0], - ) - assert allclose( - output2[out_field], - output[out_field][output[AtomicDataDict.BATCH_KEY] == 1], - ) + for out_field in out_fields: + if out_field in _GRAPH_FIELDS: + assert allclose( + output1[out_field], + output[out_field][0], + ) + assert allclose( + output2[out_field], + output[out_field][1], + ) + elif out_field in _NODE_FIELDS: + assert allclose( + output1[out_field], + output[out_field][output[AtomicDataDict.BATCH_KEY] == 0], + ) + assert allclose( + output2[out_field], + output[out_field][output[AtomicDataDict.BATCH_KEY] == 1], + ) + elif out_field in _EDGE_FIELDS: + assert allclose( + output1[out_field], + output[out_field][ + output[AtomicDataDict.BATCH_KEY][ + output[AtomicDataDict.EDGE_INDEX_KEY][0] + ] + == 0 + ], + ) + assert allclose( + output2[out_field], + output[out_field][ + output[AtomicDataDict.BATCH_KEY][ + output[AtomicDataDict.EDGE_INDEX_KEY][0] + ] + == 1 + ], + ) + else: + raise NotImplementedError + + def test_equivariance(self, model, atomic_batch, device): + instance, out_fields = model + instance = instance.to(device=device) + atomic_batch = atomic_batch.to(device=device) + assert_AtomicData_equivariant(func=instance, data_in=atomic_batch) + + def test_embedding_cutoff(self, model, config, device): + instance, out_fields = model + config, out_fields = config + r_max = config["r_max"] + + # make a synthetic three atom example + data = AtomicData( + atom_types=np.random.choice([0, 1, 2], size=3), + pos=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]), + edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]), + ) + data = data.to(device) + edge_embed = instance(AtomicData.to_AtomicDataDict(data)) + if AtomicDataDict.EDGE_FEATURES_KEY in edge_embed: + key = AtomicDataDict.EDGE_FEATURES_KEY else: - raise NotImplementedError + key = AtomicDataDict.EDGE_EMBEDDING_KEY + edge_embed = edge_embed[key] + data.pos[2, 1] = r_max # put it past the cutoff + edge_embed2 = instance(AtomicData.to_AtomicDataDict(data))[key] + if key == AtomicDataDict.EDGE_EMBEDDING_KEY: + # we can only check that other edges are unaffected if we know it's an embedding + # For example, an Allegro edge feature is many body so will be affected + assert torch.allclose(edge_embed[:2], edge_embed2[:2]) + assert edge_embed[2:].abs().sum() > 1e-6 # some nonzero terms + assert torch.allclose(edge_embed2[2:], torch.zeros(1, device=device)) -class TestGradient: - def test_numeric_gradient(self, config, atomic_batch, device, float_tolerance): - config = config.copy() - config["model_builders"] = ["EnergyModel", "ForceOutput"] - model = model_from_config(config=config, initialize=True) + # test gradients + in_dict = AtomicData.to_AtomicDataDict(data) + in_dict[AtomicDataDict.POSITIONS_KEY].requires_grad_(True) + + with torch.autograd.set_detect_anomaly(True): + out = instance(in_dict) + + # is the edge embedding of the cutoff length edge unchanged at the cutoff? + grads = torch.autograd.grad( + outputs=out[key][2:].sum(), + inputs=in_dict[AtomicDataDict.POSITIONS_KEY], + retain_graph=True, + )[0] + assert torch.allclose(grads, torch.zeros(1, device=device)) + + if AtomicDataDict.PER_ATOM_ENERGY_KEY in out: + # are the first two atom's energies unaffected by atom at the cutoff? + grads = torch.autograd.grad( + outputs=out[AtomicDataDict.PER_ATOM_ENERGY_KEY][:2].sum(), + inputs=in_dict[AtomicDataDict.POSITIONS_KEY], + )[0] + print(grads) + # only care about gradient wrt moved atom + assert grads.shape == (3, 3) + assert torch.allclose(grads[2], torch.zeros(1, device=device)) + + +class BaseEnergyModelTests(BaseModelTests): + def test_large_separation(self, model, config, molecules, device): + atol = {torch.float32: 1e-4, torch.float64: 1e-10}[torch.get_default_dtype()] + instance, _ = model + instance.to(device) + config, out_fields = config + r_max = config["r_max"] + atoms1 = molecules[0].copy() + atoms2 = molecules[1].copy() + # translate atoms2 far away + atoms2.positions += 40.0 + np.random.randn(3) + atoms_both = atoms1.copy() + atoms_both.extend(atoms2) + tm = TypeMapper(chemical_symbols=["H", "C", "O"]) + data1 = tm(AtomicData.from_ase(atoms1, r_max=r_max).to(device=device)) + data2 = tm(AtomicData.from_ase(atoms2, r_max=r_max).to(device=device)) + data_both = tm(AtomicData.from_ase(atoms_both, r_max=r_max).to(device=device)) + assert ( + data_both[AtomicDataDict.EDGE_INDEX_KEY].shape[1] + == data1[AtomicDataDict.EDGE_INDEX_KEY].shape[1] + + data2[AtomicDataDict.EDGE_INDEX_KEY].shape[1] + ) + + out1 = instance(AtomicData.to_AtomicDataDict(data1)) + out2 = instance(AtomicData.to_AtomicDataDict(data2)) + out_both = instance(AtomicData.to_AtomicDataDict(data_both)) + + assert torch.allclose( + out1[AtomicDataDict.TOTAL_ENERGY_KEY] + + out2[AtomicDataDict.TOTAL_ENERGY_KEY], + out_both[AtomicDataDict.TOTAL_ENERGY_KEY], + atol=atol, + ) + + atoms_both2 = atoms1.copy() + atoms3 = atoms2.copy() + atoms3.positions += np.random.randn(3) + atoms_both2.extend(atoms3) + data_both2 = tm(AtomicData.from_ase(atoms_both2, r_max=r_max).to(device=device)) + out_both2 = instance(AtomicData.to_AtomicDataDict(data_both2)) + assert torch.allclose( + out_both2[AtomicDataDict.TOTAL_ENERGY_KEY], + out_both[AtomicDataDict.TOTAL_ENERGY_KEY], + atol=atol, + ) + assert torch.allclose( + out_both2[AtomicDataDict.PER_ATOM_ENERGY_KEY], + out_both[AtomicDataDict.PER_ATOM_ENERGY_KEY], + atol=atol, + ) + + def test_cross_frame_grad(self, model, device, nequip_dataset): + c = Collater.for_dataset(nequip_dataset) + batch = c([nequip_dataset[i] for i in range(len(nequip_dataset))]) + energy_model, out_fields = model + energy_model.to(device) + data = AtomicData.to_AtomicDataDict(batch.to(device)) + data[AtomicDataDict.POSITIONS_KEY].requires_grad = True + + output = energy_model(data) + grads = torch.autograd.grad( + outputs=output[AtomicDataDict.TOTAL_ENERGY_KEY][-1], + inputs=data[AtomicDataDict.POSITIONS_KEY], + allow_unused=True, + )[0] + + last_frame_n_atom = batch.ptr[-1] - batch.ptr[-2] + + in_frame_grad = grads[-last_frame_n_atom:] + cross_frame_grad = grads[:-last_frame_n_atom] + + assert cross_frame_grad.abs().max().item() == 0 + assert in_frame_grad.abs().max().item() > 0 + + def test_numeric_gradient(self, model, atomic_batch, device): + model, out_fields = model + if AtomicDataDict.FORCE_KEY not in out_fields: + pytest.skip() model.to(device) data = atomic_batch.to(device) output = model(AtomicData.to_AtomicDataDict(data)) @@ -256,16 +332,15 @@ def test_numeric_gradient(self, config, atomic_batch, device, float_tolerance): numeric, analytical, rtol=5e-2 ) - def test_partial_forces(self, atomic_batch, device): - config = minimal_config1.copy() - config["model_builders"] = [ - "EnergyModel", - "ForceOutput", - ] + def test_partial_forces(self, config, atomic_batch, device, strict_locality): + config, out_fields = config + if "ForceOutput" not in config["model_builders"]: + pytest.skip() + config = config.copy() partial_config = config.copy() partial_config["model_builders"] = [ - "EnergyModel", - "PartialForceOutput", + "PartialForceOutput" if b == "ForceOutput" else b + for b in partial_config["model_builders"] ] model = model_from_config(config=config, initialize=True) partial_model = model_from_config(config=partial_config, initialize=True) @@ -284,7 +359,7 @@ def test_partial_forces(self, atomic_batch, device): assert torch.allclose( output[k], output_partial[k], - atol=1e-6 if k == AtomicDataDict.FORCE_KEY else 1e-8, + atol=1e-8 if k == AtomicDataDict.TOTAL_ENERGY_KEY else 1e-6, ) else: assert torch.equal(output[k], output_partial[k]) @@ -293,152 +368,17 @@ def test_partial_forces(self, atomic_batch, device): assert partial_forces.shape == (n_at, n_at, 3) # confirm that sparsity matches graph topology: edge_index = data[AtomicDataDict.EDGE_INDEX_KEY] - adjacency = torch.zeros(n_at, n_at, dtype=torch.bool) - strict_locality = False + adjacency = torch.zeros( + n_at, n_at, dtype=torch.bool, device=partial_forces.device + ) if strict_locality: # only adjacent for nonzero deriv to neighbors adjacency[edge_index[0], edge_index[1]] = True - adjacency[ - torch.arange(n_at), torch.arange(n_at) - ] = True # diagonal is ofc True + arange = torch.arange(n_at, device=partial_forces.device) + adjacency[arange, arange] = True # diagonal is ofc True else: # technically only adjacent to n-th degree neighbor, but in this tiny test system that is same as all-to-all and easier to program adjacency = data[AtomicDataDict.BATCH_KEY].view(-1, 1) == data[ AtomicDataDict.BATCH_KEY ].view(1, -1) assert torch.equal(adjacency, torch.any(partial_forces != 0, dim=-1)) - - -class TestAutoGradient: - def test_cross_frame_grad(self, config, nequip_dataset): - c = Collater.for_dataset(nequip_dataset) - batch = c([nequip_dataset[i] for i in range(len(nequip_dataset))]) - device = "cpu" - config = config.copy() - config["model_builders"] = ["EnergyModel"] - energy_model = model_from_config(config=config, initialize=True) - energy_model.to(device) - data = AtomicData.to_AtomicDataDict(batch.to(device)) - data[AtomicDataDict.POSITIONS_KEY].requires_grad = True - - output = energy_model(data) - grads = torch.autograd.grad( - outputs=output[AtomicDataDict.TOTAL_ENERGY_KEY][-1], - inputs=data[AtomicDataDict.POSITIONS_KEY], - allow_unused=True, - )[0] - - last_frame_n_atom = batch.ptr[-1] - batch.ptr[-2] - - in_frame_grad = grads[-last_frame_n_atom:] - cross_frame_grad = grads[:-last_frame_n_atom] - - assert cross_frame_grad.abs().max().item() == 0 - assert in_frame_grad.abs().max().item() > 0 - - -class TestEquivariance: - def test_forward(self, model, atomic_batch, device): - instance, out_field = model - instance = instance.to(device=device) - atomic_batch = atomic_batch.to(device=device) - assert_AtomicData_equivariant(func=instance, data_in=atomic_batch) - - -class TestCutoff: - def test_large_separation(self, model, config, molecules): - atol = {torch.float32: 1e-4, torch.float64: 1e-10}[torch.get_default_dtype()] - instance, _ = model - r_max = config["r_max"] - atoms1 = molecules[0].copy() - atoms2 = molecules[1].copy() - # translate atoms2 far away - atoms2.positions += 40.0 + np.random.randn(3) - atoms_both = atoms1.copy() - atoms_both.extend(atoms2) - tm = TypeMapper(chemical_symbols=["H", "C", "O"]) - data1 = tm(AtomicData.from_ase(atoms1, r_max=r_max)) - data2 = tm(AtomicData.from_ase(atoms2, r_max=r_max)) - data_both = tm(AtomicData.from_ase(atoms_both, r_max=r_max)) - assert ( - data_both[AtomicDataDict.EDGE_INDEX_KEY].shape[1] - == data1[AtomicDataDict.EDGE_INDEX_KEY].shape[1] - + data2[AtomicDataDict.EDGE_INDEX_KEY].shape[1] - ) - - out1 = instance(AtomicData.to_AtomicDataDict(data1)) - out2 = instance(AtomicData.to_AtomicDataDict(data2)) - out_both = instance(AtomicData.to_AtomicDataDict(data_both)) - - assert torch.allclose( - out1[AtomicDataDict.TOTAL_ENERGY_KEY] - + out2[AtomicDataDict.TOTAL_ENERGY_KEY], - out_both[AtomicDataDict.TOTAL_ENERGY_KEY], - atol=atol, - ) - - atoms_both2 = atoms1.copy() - atoms3 = atoms2.copy() - atoms3.positions += np.random.randn(3) - atoms_both2.extend(atoms3) - data_both2 = tm(AtomicData.from_ase(atoms_both2, r_max=r_max)) - out_both2 = instance(AtomicData.to_AtomicDataDict(data_both2)) - assert torch.allclose( - out_both2[AtomicDataDict.TOTAL_ENERGY_KEY], - out_both[AtomicDataDict.TOTAL_ENERGY_KEY], - atol=atol, - ) - assert torch.allclose( - out_both2[AtomicDataDict.PER_ATOM_ENERGY_KEY], - out_both[AtomicDataDict.PER_ATOM_ENERGY_KEY], - atol=atol, - ) - - def test_embedding_cutoff(self, config): - config = config.copy() - config["model_builders"] = ["EnergyModel"] - instance = model_from_config(config=config, initialize=True) - r_max = config["r_max"] - - # make a synthetic three atom example - data = AtomicData( - atom_types=np.random.choice([0, 1, 2], size=3), - pos=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]), - edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]), - ) - edge_embed = instance(AtomicData.to_AtomicDataDict(data))[ - AtomicDataDict.EDGE_EMBEDDING_KEY - ] - data.pos[2, 1] = r_max # put it past the cutoff - edge_embed2 = instance(AtomicData.to_AtomicDataDict(data))[ - AtomicDataDict.EDGE_EMBEDDING_KEY - ] - - assert torch.allclose(edge_embed[:2], edge_embed2[:2]) - assert edge_embed[2:].abs().sum() > 1e-6 # some nonzero terms - assert torch.allclose(edge_embed2[2:], torch.zeros(1)) - - # test gradients - in_dict = AtomicData.to_AtomicDataDict(data) - in_dict[AtomicDataDict.POSITIONS_KEY].requires_grad_(True) - - with torch.autograd.set_detect_anomaly(True): - out = instance(in_dict) - - # is the edge embedding of the cutoff length edge unchanged at the cutoff? - grads = torch.autograd.grad( - outputs=out[AtomicDataDict.EDGE_EMBEDDING_KEY][2:].sum(), - inputs=in_dict[AtomicDataDict.POSITIONS_KEY], - retain_graph=True, - )[0] - assert torch.allclose(grads, torch.zeros(1)) - - # are the first two atom's energies unaffected by atom at the cutoff? - grads = torch.autograd.grad( - outputs=out[AtomicDataDict.PER_ATOM_ENERGY_KEY][:2].sum(), - inputs=in_dict[AtomicDataDict.POSITIONS_KEY], - )[0] - print(grads) - # only care about gradient wrt moved atom - assert grads.shape == (3, 3) - assert torch.allclose(grads[2], torch.zeros(1)) diff --git a/setup.py b/setup.py index 8c977e0a..d7a5b465 100644 --- a/setup.py +++ b/setup.py @@ -29,15 +29,14 @@ "numpy", "ase", "tqdm", - "torch>=1.8,<=1.12,!=1.9.0", # torch.fx added in 1.8 - "e3nn>=0.3.5,<0.6.0", + "torch>=1.10.0,<1.13,!=1.9.0", + "e3nn>=0.4.4,<0.6.0", "pyyaml", "contextlib2;python_version<'3.7'", # backport of nullcontext 'contextvars;python_version<"3.7"', # backport of contextvars for savenload "typing_extensions;python_version<'3.8'", # backport of Final "torch-runstats>=0.2.0", "torch-ema>=0.3.0", - "scikit_learn<=1.0.1", # for GaussianProcess for per-species statistics; 1.0.2 has a bug! ], zip_safe=True, ) diff --git a/tests/conftest.py b/tests/conftest.py index 060e5e7b..e9719bcd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,137 +1,5 @@ -from typing import List, Tuple -import numpy as np -import pathlib -import pytest -import tempfile -import os +from nequip.utils.unittests import CONFTEST_PATH -from ase.atoms import Atoms -from ase.build import molecule -from ase.calculators.singlepoint import SinglePointCalculator -from ase.io import write - -import torch - -from nequip.utils.test import set_irreps_debug -from nequip.data import AtomicData, ASEDataset -from nequip.data.transforms import TypeMapper -from nequip.utils.torch_geometric import Batch -from nequip.utils._global_options import _set_global_options -from nequip.utils.misc import dtype_from_name - -if "NEQUIP_NUM_TASKS" not in os.environ: - # Test parallelization, but don't waste time spawning tons of workers if lots of cores available - os.environ["NEQUIP_NUM_TASKS"] = "2" - -# The default float tolerance -FLOAT_TOLERANCE = { - t: torch.as_tensor(v, dtype=dtype_from_name(t)) - for t, v in {"float32": 1e-3, "float64": 1e-10}.items() -} - - -@pytest.fixture(scope="session", autouse=True, params=["float32", "float64"]) -def float_tolerance(request): - """Run all tests with various PyTorch default dtypes. - - This is a session-wide, autouse fixture — you only need to request it explicitly if a test needs to know the tolerance for the current default dtype. - - Returns - -------- - A precision threshold to use for closeness tests. - """ - old_dtype = torch.get_default_dtype() - dtype = request.param - _set_global_options({"default_dtype": dtype}) - yield FLOAT_TOLERANCE[dtype] - _set_global_options( - { - "default_dtype": {torch.float32: "float32", torch.float64: "float64"}[ - old_dtype - ] - } - ) - - -# - Ampere and TF32 - -# Many of the tests for NequIP involve numerically checking -# algebraic properties— normalization, equivariance, -# continuity, etc. -# With the added numerical noise of TF32, some of those tests fail -# with the current (and usually generous) thresholds. -# -# Thus we go on the assumption that PyTorch + NVIDIA got everything -# right, that this setting DOES NOT AFFECT the model outputs except -# for increased numerical noise, and only test without it. -# -# TODO: consider running tests with and without -# TODO: check how much thresholds have to be changed to accomidate TF32 -torch.backends.cuda.matmul.allow_tf32 = False -torch.backends.cudnn.allow_tf32 = False - - -@pytest.fixture(scope="session") -def BENCHMARK_ROOT(): - return pathlib.Path(__file__).parent / "../benchmark_data/" - - -@pytest.fixture(scope="session") -def temp_data(float_tolerance): - with tempfile.TemporaryDirectory() as tmpdirname: - yield tmpdirname - - -@pytest.fixture(scope="session") -def CH3CHO(CH3CHO_no_typemap) -> Tuple[Atoms, AtomicData]: - atoms, data = CH3CHO_no_typemap - tm = TypeMapper(chemical_symbol_to_type={"C": 0, "O": 1, "H": 2}) - data = tm(data) - return atoms, data - - -@pytest.fixture(scope="session") -def CH3CHO_no_typemap(float_tolerance) -> Tuple[Atoms, AtomicData]: - atoms = molecule("CH3CHO") - data = AtomicData.from_ase(atoms, r_max=2.0) - return atoms, data - - -@pytest.fixture(scope="session") -def molecules() -> List[Atoms]: - atoms_list = [] - for i in range(8): - atoms = molecule("CH3CHO" if i % 2 == 0 else "H2") - atoms.rattle() - atoms.calc = SinglePointCalculator( - energy=np.random.random(), - forces=np.random.random((len(atoms), 3)), - stress=None, - magmoms=None, - atoms=atoms, - ) - atoms_list.append(atoms) - return atoms_list - - -@pytest.fixture(scope="session") -def nequip_dataset(molecules, temp_data, float_tolerance): - with tempfile.NamedTemporaryFile(suffix=".xyz") as fp: - for atoms in molecules: - write(fp.name, atoms, format="extxyz", append=True) - a = ASEDataset( - file_name=fp.name, - root=temp_data, - extra_fixed_fields={"r_max": 3.0}, - ase_args=dict(format="extxyz"), - type_mapper=TypeMapper(chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}), - ) - yield a - - -@pytest.fixture(scope="session") -def atomic_batch(nequip_dataset): - return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]]) - - -# Use debug mode -set_irreps_debug(True) +# like `source` in bash +with open(CONFTEST_PATH) as f: + exec(f.read()) diff --git a/tests/unit/data/test_dataset.py b/tests/unit/data/test_dataset.py index f45e0ca8..95cfe48d 100644 --- a/tests/unit/data/test_dataset.py +++ b/tests/unit/data/test_dataset.py @@ -31,7 +31,7 @@ def ase_file(molecules): MAX_ATOMIC_NUMBER: int = 5 -NATOMS = 3 +NATOMS = 10 @pytest.fixture(scope="function") @@ -231,8 +231,8 @@ def test_per_graph_field(self, npz_dataset, fixed_field, subset, key, dim): if npz_dataset is None: return - torch.manual_seed(0) - E = torch.rand((npz_dataset.len(),) + dim) + rng = torch.Generator().manual_seed(454) + E = torch.rand((npz_dataset.len(),) + dim, generator=rng) ref_mean = torch.mean(E / NATOMS, dim=0) ref_std = torch.std(E / NATOMS, dim=0) @@ -277,16 +277,11 @@ def test_per_node_field(self, npz_dataset, fixed_field, mode, subset): ) print(result) - @pytest.mark.parametrize("alpha", [1e-5, 1e-3, 0.1, 0.5]) + @pytest.mark.parametrize("alpha", [0, 1e-3, 0.01]) @pytest.mark.parametrize("fixed_field", [True, False]) @pytest.mark.parametrize("full_rank", [True, False]) @pytest.mark.parametrize("subset", [True, False]) - @pytest.mark.parametrize( - "regressor", ["NormalizedGaussianProcess", "GaussianProcess"] - ) - def test_per_graph_field( - self, npz_dataset, alpha, fixed_field, full_rank, regressor, subset - ): + def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subset): if alpha <= 1e-4 and not full_rank: return @@ -308,10 +303,7 @@ def test_per_graph_field( del n_spec del Ns - if alpha == 1e-5: - ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.0) - else: - ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.5) + ref_mean, ref_std, E = generate_E(N, 100, 1000, 10) if subset: E_orig_order = torch.zeros_like( @@ -333,7 +325,6 @@ def test_per_graph_field( AtomicDataDict.TOTAL_ENERGY_KEY + "per_species_mean_std": { "alpha": alpha, - "regressor": regressor, "stride": 1, } }, @@ -341,21 +332,18 @@ def test_per_graph_field( res = torch.matmul(N, mean.reshape([-1, 1])) - E.reshape([-1, 1]) res2 = torch.sum(torch.square(res)) - print("residue", alpha, res2 - ref_res2) + print("alpha, residue, actual residue", alpha, res2, ref_res2) print("mean", mean, ref_mean) print("diff in mean", mean - ref_mean) print("std", std, ref_std) + tolerance = torch.max(ref_std) * 4 if full_rank: - if alpha == 1e-5: - assert torch.allclose(mean, ref_mean, rtol=1e-1) - else: - assert torch.allclose(mean, ref_mean, rtol=1) - assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100) - elif regressor == "NormalizedGaussianProcess": - assert torch.std(mean).numpy() == 0 + assert torch.allclose(mean, ref_mean, atol=tolerance) + # assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100) else: - assert mean[0] == mean[1] * 2 + assert torch.allclose(mean, mean[0], atol=tolerance) + # assert torch.std(mean).numpy() == 0 class TestReload: @@ -449,12 +437,14 @@ def test_from_atoms(self, molecules): def generate_E(N, mean_min, mean_max, std): - torch.manual_seed(0) - ref_mean = torch.rand((N.shape[1])) * (mean_max - mean_min) + mean_min + rng = torch.Generator().manual_seed(568) + ref_mean = ( + torch.rand((N.shape[1]), generator=rng) * (mean_max - mean_min) + mean_min + ) t_mean = torch.ones((N.shape[0], 1)) * ref_mean.reshape([1, -1]) - ref_std = torch.rand((N.shape[1])) * std + ref_std = torch.rand((N.shape[1]), generator=rng) * std t_std = torch.ones((N.shape[0], 1)) * ref_std.reshape([1, -1]) - E = torch.normal(t_mean, t_std) + E = torch.normal(t_mean, t_std, generator=rng) return ref_mean, ref_std, (N * E).sum(axis=-1) diff --git a/tests/unit/model/test_nequip_model.py b/tests/unit/model/test_nequip_model.py new file mode 100644 index 00000000..2aa82e15 --- /dev/null +++ b/tests/unit/model/test_nequip_model.py @@ -0,0 +1,122 @@ +import pytest + +from e3nn import o3 + +from nequip.data import AtomicDataDict +from nequip.model import model_from_config +from nequip.nn import AtomwiseLinear +from nequip.utils.unittests.model_tests import BaseEnergyModelTests + +COMMON_CONFIG = { + "avg_num_neighbors": None, + "num_types": 3, + "types_names": ["H", "C", "O"], +} +r_max = 3 +minimal_config1 = dict( + irreps_edge_sh="0e + 1o", + r_max=4, + feature_irreps_hidden="4x0e + 4x1o", + num_layers=2, + num_basis=8, + PolynomialCutoff_p=6, + nonlinearity_type="norm", + **COMMON_CONFIG +) +minimal_config2 = dict( + irreps_edge_sh="0e + 1o", + r_max=4, + chemical_embedding_irreps_out="8x0e + 8x0o + 8x1e + 8x1o", + irreps_mid_output_block="2x0e", + feature_irreps_hidden="4x0e + 4x1o", + **COMMON_CONFIG +) +minimal_config3 = dict( + irreps_edge_sh="0e + 1o", + r_max=4, + feature_irreps_hidden="4x0e + 4x1o", + num_layers=2, + num_basis=8, + PolynomialCutoff_p=6, + nonlinearity_type="gate", + **COMMON_CONFIG +) +minimal_config4 = dict( + irreps_edge_sh="0e + 1o + 2e", + r_max=4, + feature_irreps_hidden="2x0e + 2x1o + 2x2e", + num_layers=2, + num_basis=3, + PolynomialCutoff_p=6, + nonlinearity_type="gate", + # test custom nonlinearities + nonlinearity_scalars={"e": "silu", "o": "tanh"}, + nonlinearity_gates={"e": "silu", "o": "abs"}, + **COMMON_CONFIG +) + + +class TestNequIPModel(BaseEnergyModelTests): + @pytest.fixture + def strict_locality(self): + return False + + @pytest.fixture( + params=[minimal_config1, minimal_config2, minimal_config3, minimal_config4], + scope="class", + ) + def base_config(self, request): + return request.param + + @pytest.fixture( + params=[ + ( + ["EnergyModel", "ForceOutput"], + [ + AtomicDataDict.TOTAL_ENERGY_KEY, + AtomicDataDict.PER_ATOM_ENERGY_KEY, + AtomicDataDict.FORCE_KEY, + ], + ), + ( + ["EnergyModel"], + [ + AtomicDataDict.TOTAL_ENERGY_KEY, + AtomicDataDict.PER_ATOM_ENERGY_KEY, + ], + ), + ( + ["EnergyModel", "StressForceOutput"], + [ + AtomicDataDict.TOTAL_ENERGY_KEY, + AtomicDataDict.PER_ATOM_ENERGY_KEY, + AtomicDataDict.FORCE_KEY, + AtomicDataDict.STRESS_KEY, + AtomicDataDict.VIRIAL_KEY, + ], + ), + ], + scope="class", + ) + def config(self, request, base_config): + config = base_config.copy() + builder, out_fields = request.param + config = config.copy() + config["model_builders"] = builder + return config, out_fields + + def test_submods(self): + config = minimal_config2.copy() + config["model_builders"] = ["EnergyModel"] + model = model_from_config(config=config, initialize=True) + assert isinstance(model.chemical_embedding, AtomwiseLinear) + true_irreps = o3.Irreps(minimal_config2["chemical_embedding_irreps_out"]) + assert ( + model.chemical_embedding.irreps_out[model.chemical_embedding.out_field] + == true_irreps + ) + # Make sure it propagates + assert ( + model.layer0_convnet.irreps_in[model.chemical_embedding.out_field] + == true_irreps + ) diff --git a/tests/unit/trainer/test_trainer.py b/tests/unit/trainer/test_trainer.py index c8169fda..860be357 100644 --- a/tests/unit/trainer/test_trainer.py +++ b/tests/unit/trainer/test_trainer.py @@ -106,26 +106,6 @@ def test_save(self, trainer, format, suffix): assert isfile(file_name), "fail to save to file" assert suffix in file_name - @pytest.mark.parametrize("append", [True]) # , False]) - def test_from_dict(self, trainer, append): - - # torch.save(trainer.model, trainer.best_model_path) - - dictionary = trainer.as_dict(state_dict=True, training_progress=True) - trainer1 = Trainer.from_dict(dictionary, append=append) - - for key in [ - "best_model_path", - "last_model_path", - "logfile", - "epoch_log", - "batch_log", - "workdir", - ]: - v1 = getattr(trainer, key, None) - v2 = getattr(trainer1, key, None) - assert append == (v1 == v2) - @pytest.mark.parametrize("append", [True]) # , False]) def test_from_file(self, trainer, append): diff --git a/tests/unit/utils/test_gp.py b/tests/unit/utils/test_gp.py deleted file mode 100644 index 4792b9d2..00000000 --- a/tests/unit/utils/test_gp.py +++ /dev/null @@ -1,37 +0,0 @@ -import torch -import pytest - -from nequip.utils.regressor import base_gp -from sklearn.gaussian_process.kernels import DotProduct - - -# @pytest.mark.parametrize("full_rank", [True, False]) -@pytest.mark.parametrize("full_rank", [False]) -@pytest.mark.parametrize("alpha", [0, 1e-3, 0.1, 1]) -def test_random(full_rank, alpha): - - if alpha == 0 and not full_rank: - return - - torch.manual_seed(0) - n_samples = 10 - n_dim = 3 - - if full_rank: - X = torch.randint(low=1, high=10, size=(n_samples, n_dim)) - else: - X = torch.randint(low=1, high=10, size=(n_samples, 1)) * torch.ones( - (n_samples, n_dim) - ) - - ref_mean = torch.rand((n_dim, 1)) - y = torch.matmul(X, ref_mean) - - mean, std = base_gp( - X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, alpha=0.1 - ) - - if full_rank: - assert torch.allclose(ref_mean, mean, rtol=0.5) - else: - assert torch.allclose(mean, mean[0], rtol=1e-3) diff --git a/tests/unit/utils/test_solver.py b/tests/unit/utils/test_solver.py new file mode 100644 index 00000000..de78cbd8 --- /dev/null +++ b/tests/unit/utils/test_solver.py @@ -0,0 +1,38 @@ +import torch +import pytest + +from nequip.utils.regressor import solver + + +@pytest.mark.parametrize("full_rank", [True, False]) +@pytest.mark.parametrize("alpha", [0, 1e-3, 1e-2]) +def test_random(full_rank, alpha, per_species_set): + + if alpha == 0 and not full_rank: + return + + rng = torch.Generator().manual_seed(343) + + ref_mean, ref_std, E, n_samples, n_dim = per_species_set + + X = torch.randint(low=1, high=10, size=(n_samples, n_dim), generator=rng).to( + torch.get_default_dtype() + ) + if not full_rank: + X[:, n_dim - 2] = X[:, n_dim - 1] * 2 + y = (X * E).sum(axis=-1) + + mean, std = solver(X, y, alpha=alpha) + + tolerance = torch.max(ref_std) + + print("tolerance", tolerance) + print("solution", mean, std) + print("diff", mean - ref_mean) + + if full_rank: + assert torch.allclose(ref_mean, mean, atol=tolerance) + else: + assert torch.allclose(mean[n_dim - 1], mean[n_dim - 2], atol=tolerance) + + assert torch.max(std) < tolerance