diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index dea57ca1..1f835e90 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -15,8 +15,8 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: [3.7, 3.9]
- torch-version: [1.10.0, 1.11.0]
+ python-version: [3.9]
+ torch-version: [1.11.0, 1.12.1]
steps:
- uses: actions/checkout@v2
@@ -44,4 +44,4 @@ jobs:
- name: Test with pytest
run: |
# See https://github.com/pytest-dev/pytest/issues/1075
- PYTHONHASHSEED=0 pytest -n auto --ignore=docs/ .
+ PYTHONHASHSEED=0 pytest -n auto tests/
diff --git a/.github/workflows/tests_develop.yml b/.github/workflows/tests_develop.yml
index bae5795e..2c23350c 100644
--- a/.github/workflows/tests_develop.yml
+++ b/.github/workflows/tests_develop.yml
@@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
python-version: [3.9]
- torch-version: [1.11.0]
+ torch-version: [1.12.1]
steps:
- uses: actions/checkout@v2
@@ -44,4 +44,4 @@ jobs:
- name: Test with pytest
run: |
# See https://github.com/pytest-dev/pytest/issues/1075
- PYTHONHASHSEED=0 pytest -n auto --ignore=docs/ .
+ PYTHONHASHSEED=0 pytest -n auto tests/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f13a642..cf50972d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Most recent change on the bottom.
-## [Unreleased] - 0.5.6
+## [0.5.6] - 2022-12-19
+### Added
+- sklearn dependency removed
+- `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
+- `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary`
+- `nequip-benchmark --pdb` for debugging model (builder) errors
+- More information in `nequip-deploy info`
+
+### Changed
+- Minimum e3nn is now 0.4.4
+- `--equivariance-test` now prints much more information, especially when there is a failure
+
+### Fixed
+- Git utilities when installed as ZIPed `.egg` (#264)
## [0.5.5] - 2022-06-20
### Added
diff --git a/configs/full.yaml b/configs/full.yaml
index 1b8a3a2c..2f98164e 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -17,7 +17,20 @@ default_dtype: float32
allow_tf32: false # whether to use TensorFloat32 if it is available
# device: cuda # which device to use. Default: automatically detected cuda or "cpu"
-# network
+# == network ==
+
+# `model_builders` defines a series of functions that will be called to construct the model
+# each model builder has the opportunity to update the model, the config, or both
+# model builders from other packages are allowed (see mir-group/allegro for an example); those from `nequip.model` don't require a prefix
+# these are the default model builders:
+model_builders:
+ - SimpleIrrepsConfig # update the config with all the irreps for the network if using the simplified `l_max` / `num_features` / `parity` syntax
+ - EnergyModel # build a full NequIP model
+ - PerSpeciesRescale # add per-atom / per-species scaling and shifting to the NequIP model before the total energy sum
+ - ForceOutput # wrap the energy model in a module that uses autodifferention to compute the forces
+ - RescaleEnergyEtc # wrap the entire model in the appropriate global rescaling of the energy, forces, etc.
+# ^ global rescaling blocks must always go last!
+
r_max: 4.0 # cutoff radius in length units, here Angstrom, this is an important hyperparamter to scan
num_layers: 4 # number of interaction blocks, we find 3-5 to work best
@@ -198,6 +211,8 @@ loss_coeffs:
total_energy:
- 1
- PerAtomMSELoss
+# note that the ratio between force and energy loss matters for the training process. One may consider using 1:1 with the PerAtomMSELoss. If the energy loss still significantly dominate the loss function at the initial epochs, tune the energy loss weight lower helps the training a lot.
+
# # default loss function is MSELoss, the name has to be exactly the same as those in torch.nn.
# the only supprted targets are forces and total_energy
@@ -302,10 +317,10 @@ per_species_rescale_scales: dataset_forces_rms
# If not provided, defaults to dataset_per_species_force_rms or dataset_per_atom_total_energy_std, depending on whether forces are being trained.
# per_species_rescale_kwargs:
# total_energy:
-# alpha: 0.1
+# alpha: 0.001
# max_iteration: 20
# stride: 100
-# keywords for GP decomposition of per specie energy. Optional. Defaults to 0.1
+# keywords for ridge regression decomposition of per specie energy. Optional. Defaults to 0.001. The value should be in the range of 1e-3 to 1e-2
# per_species_rescale_arguments_in_dataset_units: True
# if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values.
@@ -329,9 +344,10 @@ global_rescale_scale_trainable: false
# global_rescale_shift_trainable: false
# global_rescale_scale: dataset_forces_rms
# global_rescale_scale_trainable: false
-# per_species_rescale_trainable: true
-# per_species_rescale_shifts: dataset_per_atom_total_energy_mean
-# per_species_rescale_scales: dataset_per_atom_total_energy_std
+# per_species_rescale_shifts_trainable: false
+# per_species_rescale_scales_trainable: true
+# per_species_rescale_shifts: dataset_per_species_total_energy_mean
+# per_species_rescale_scales: dataset_per_species_forces_rms
# # full block needed for global rescale
# global_rescale_shift: dataset_total_energy_mean
diff --git a/configs/minimal_toy_emt.yaml b/configs/minimal_toy_emt.yaml
index c9c904d1..38b7f95d 100644
--- a/configs/minimal_toy_emt.yaml
+++ b/configs/minimal_toy_emt.yaml
@@ -6,15 +6,18 @@ dataset_seed: 456
# network
model_builders:
+ - SimpleIrrepsConfig
- EnergyModel
- PerSpeciesRescale
- StressForceOutput
- RescaleEnergyEtc
+
num_basis: 8
r_max: 4.0
-irreps_edge_sh: 0e + 1o
-conv_to_output_hidden_irreps_out: 16x0e
-feature_irreps_hidden: 16x0o + 16x0e + 16x1o + 16x1e
+l_max: 1
+parity: true
+num_features: 16
+num_layers: 4
# data set
dataset: EMTTest # type of data set, can be npz or ase
@@ -23,10 +26,6 @@ dataset_num_frames: 100
chemical_symbols:
- Cu
-global_rescale_scale: dataset_total_energy_std
-per_species_rescale_shifts: dataset_per_atom_total_energy_mean
-per_species_rescale_scales: dataset_per_atom_total_energy_std
-
# logging
wandb: false
# verbose: debug
diff --git a/docs/api/nequip.rst b/docs/api/nequip.rst
index 13bc37ca..6f6250cf 100644
--- a/docs/api/nequip.rst
+++ b/docs/api/nequip.rst
@@ -3,4 +3,5 @@ Python API
.. toctree::
- data
\ No newline at end of file
+ data
+ trainer
diff --git a/docs/api/trainer.rst b/docs/api/trainer.rst
new file mode 100644
index 00000000..983e6f6b
--- /dev/null
+++ b/docs/api/trainer.rst
@@ -0,0 +1,10 @@
+nequip.trainer
+==============
+
+ .. automodule:: nequip.train.trainer
+ :members:
+ :imported-members:
+
+ .. automodule:: nequip.train.trainer_wandb
+ :members:
+ :imported-members:
diff --git a/docs/cite.rst b/docs/cite.rst
new file mode 100644
index 00000000..9f8296cc
--- /dev/null
+++ b/docs/cite.rst
@@ -0,0 +1,3 @@
+Citing Nequip
+=============
+
diff --git a/docs/commandline/commands.rst b/docs/commandline/commands.rst
new file mode 100644
index 00000000..b58c87ab
--- /dev/null
+++ b/docs/commandline/commands.rst
@@ -0,0 +1,132 @@
+Command-line Executables
+========================
+
+``nequip-train``
+----------------
+
+ .. code ::
+
+ usage: nequip-train [-h] [--equivariance-test] [--model-debug-mode] [--grad-anomaly-mode] [--log LOG] config
+
+Train (or restart training of) a NequIP model.
+
+positional arguments:
+ config YAML file configuring the model, dataset, and other options
+
+optional arguments:
+ -h, --help show this help message and exit
+ --equivariance-test test the model's equivariance before training
+ --model-debug-mode enable model debug mode, which can sometimes give much more useful error messages at the
+ cost of some speed. Do not use for production training!
+ --grad-anomaly-mode enable PyTorch autograd anomaly mode to debug NaN gradients. Do not use for production
+ training!
+ --log LOG log file to store all the screen logging
+
+``nequip-evaluate``
+-------------------
+
+ .. code ::
+
+ usage: nequip-evaluate [-h] [--train-dir TRAIN_DIR] [--model MODEL] [--dataset-config DATASET_CONFIG]
+ [--metrics-config METRICS_CONFIG] [--test-indexes TEST_INDEXES] [--batch-size BATCH_SIZE]
+ [--device DEVICE] [--output OUTPUT] [--log LOG]
+
+Compute the error of a model on a test set using various metrics. The model, metrics, dataset, etc. can specified
+in individual YAML config files, or a training session can be indicated with ``--train-dir``. In order of priority,
+the global settings (dtype, TensorFloat32, etc.) are taken from: (1) the model config (for a training session), (2)
+the dataset config (for a deployed model), or (3) the defaults. Prints only the final result in ``name = num`` format
+to stdout; all other information is ``logging.debug``ed to stderr. WARNING: Please note that results of CUDA models
+are rarely exactly reproducible, and that even CPU models can be nondeterministic.
+
+optional arguments:
+ -h, --help show this help message and exit
+ --train-dir TRAIN_DIR
+ Path to a working directory from a training session.
+ --model MODEL A deployed or pickled NequIP model to load. If omitted, defaults to `best_model.pth` in
+ `train_dir`.
+ --dataset-config DATASET_CONFIG
+ A YAML config file specifying the dataset to load test data from. If omitted, `config.yaml`
+ in `train_dir` will be used
+ --metrics-config METRICS_CONFIG
+ A YAML config file specifying the metrics to compute. If omitted, `config.yaml` in
+ `train_dir` will be used. If the config does not specify `metrics_components`, the default
+ is to logging.debug MAEs and RMSEs for all fields given in the loss function. If the
+ literal string `None`, no metrics will be computed.
+ --test-indexes TEST_INDEXES
+ Path to a file containing the indexes in the dataset that make up the test set. If omitted,
+ all data frames *not* used as training or validation data in the training session
+ `train_dir` will be used.
+ --batch-size BATCH_SIZE
+ Batch size to use. Larger is usually faster on GPU.
+ --device DEVICE Device to run the model on. If not provided, defaults to CUDA if available and CPU
+ otherwise.
+ --output OUTPUT XYZ file to write out the test set and model predicted forces, energies, etc. to.
+ --log LOG log file to store all the metrics and screen logging.debug
+
+``nequip-deploy``
+-----------------
+
+ .. code ::
+
+ usage: nequip-deploy [-h] {info,build} ...
+
+Deploy and view information about previously deployed NequIP models.
+
+optional arguments:
+ -h, --help show this help message and exit
+
+commands:
+ {info,build}
+ info Get information from a deployed model file
+ build Build a deployment model
+
+``nequip-deploy info``
+~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code ::
+
+ usage: nequip-deploy info [-h] model_path
+
+positional arguments:
+ model_path Path to a deployed model file.
+
+optional arguments:
+ -h, --help show this help message and exit
+
+
+``nequip-deploy build``
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code ::
+
+ usage: nequip-deploy build [-h] train_dir out_file
+
+positional arguments:
+ train_dir Path to a working directory from a training session.
+ out_file Output file for deployed model.
+
+optional arguments:
+ -h, --help show this help message and exit
+
+
+``nequip-benchmark``
+--------------------
+
+ .. code ::
+
+ usage: nequip-benchmark [-h] [--profile PROFILE] [--device DEVICE] [-n N] [--n-data N_DATA] [--timestep TIMESTEP]
+ config
+
+Benchmark the approximate MD performance of a given model configuration / dataset pair.
+
+positional arguments:
+ config configuration file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --profile PROFILE Profile instead of timing, creating and outputing a Chrome trace JSON to the given path.
+ --device DEVICE Device to run the model on. If not provided, defaults to CUDA if available and CPU
+ otherwise.
+ -n N Number of trials.
+ --n-data N_DATA Number of frames to use.
+ --timestep TIMESTEP MD timestep for ns/day esimation, in fs. Defauts to 1fs.
diff --git a/docs/guide/FAQ.rst b/docs/errors/errors.rst
similarity index 56%
rename from docs/guide/FAQ.rst
rename to docs/errors/errors.rst
index 92ac758e..576e553d 100644
--- a/docs/guide/FAQ.rst
+++ b/docs/errors/errors.rst
@@ -1,14 +1,5 @@
-FAQ
-===
-
-How do I...
------------
-
-... continue to train a model that reached a stopping condition?
- There will be an answer here.
-
-1. Reload the model trained with version 0.3.3 to the code in 0.4.
- check out the migration note at :ref:`migration_note`.
+Errors
+======
Common errors
-------------
diff --git a/docs/faq/FAQ.rst b/docs/faq/FAQ.rst
new file mode 100644
index 00000000..411e77c1
--- /dev/null
+++ b/docs/faq/FAQ.rst
@@ -0,0 +1,14 @@
+FAQ
+===
+
+How do I...
+-----------
+
+... continue to train a model that reached a stopping condition?
+ There will be an answer here.
+
+1. Reload the model trained with version 0.3.3 to the code in 0.4.
+ check out the migration note at :ref:`migration_note`.
+
+2. Specify my dataset for `nequip-train` and `nequip-eval`, see :ref:`_dataset_note`.
+
diff --git a/docs/guide/guide.rst b/docs/guide/guide.rst
deleted file mode 100644
index 6def3859..00000000
--- a/docs/guide/guide.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-NequIP User Guide
-=================
-
- .. toctree::
-
- intro
- irreps
- conventions
- FAQ
\ No newline at end of file
diff --git a/docs/guide/intro.rst b/docs/guide/intro.rst
deleted file mode 100644
index 7afa4132..00000000
--- a/docs/guide/intro.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Tutorial: Introduction to NequIP
-================================
-
-TODO
\ No newline at end of file
diff --git a/docs/guide/irreps.rst b/docs/guide/irreps.rst
deleted file mode 100644
index 5f9b2735..00000000
--- a/docs/guide/irreps.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-Irreps
-======
-
-.. _Irreps:
-
-Syntax to specify irreps
-------------------------
-
-TODO: descripe irreps syntax here
\ No newline at end of file
diff --git a/docs/guide/conventions.rst b/docs/howto/conventions.rst
similarity index 100%
rename from docs/guide/conventions.rst
rename to docs/howto/conventions.rst
diff --git a/docs/howto/dataset.rst b/docs/howto/dataset.rst
new file mode 100644
index 00000000..2b5267e7
--- /dev/null
+++ b/docs/howto/dataset.rst
@@ -0,0 +1,156 @@
+.. _dataset_note:
+
+How to prepare training dataset
+===============================
+
+What does NequIP behind the scene
+---------------------------------
+
+NequIP uses AtomicDataset class to store the atomic configurations.
+During the initialization of an AtomicDataset object,
+NequIP reads the atomic structures from the dataset,
+computes the neighbor list and other data structures needed for the GNN
+by converting raw data to a list of ``AtomicData`` objects.
+
+The computed results are then cached on harddisk ``root/processed_hashkey`` folder.
+The hashing is based on all the metadata provided for the dataset,
+which includes the file name, the cutoff radius, float number precision and etc.
+In the case where multiple training/evaluation runs use the same dataset,
+the neighbor list will only be computed in the first NequIP run.
+The later runs will directly load the AtomicDataset object from the cache file to save computation time.
+
+Note: be careful to the cached file. If you update your raw data file but keep using the same filename,
+NequIP will not automatically update the cached data.
+
+Key concepts
+------------
+
+fixed_fields
+~~~~~~~~~~~~
+Fixed fields are the quantities that are shared among all the configurations in the dataset.
+For example, if the dataset is a trajectory of an NVT MD simulation, the super cell size and the atomic species
+are indeed a constant matrix/vector through out the whole dataset.
+In this case, in stead of repeating the same values for many times,
+we specify the cell and species as fixed fields and only provide them once.
+
+yaml interface
+~~~~~~~~~~~~~~
+``nequip-train`` and ``nequip-evaluate`` automatically construct the AtomicDataset based on the yaml arguments.
+Later sections offer a couple different examples.
+
+If the training and validation datasets are from different raw files, the arguments for each set
+can be defined with ``dataset`` prefix and ``validation_dataset`` prefix, respectively.
+
+For example, ``dataset_file_name`` is used for training data and ``validation_dataset_file_name`` is for validation data.
+
+Python interface
+~~~~~~~~~~~~~~~~
+See ``nequip.data.dataset.AtomicInMemoryDataset``.
+
+Prepare dataset and specify in yaml config
+------------------------------------------
+
+ASE format
+~~~~~~~~~~
+
+NequIP accept all format that can be parsed by `ase.io.read` function.
+We recommend `extxyz`.
+
+Example: Given an atomic data stored in "H2.extxyz" that looks like below:
+
+.. code:: extxyz
+
+ 2
+ Properties=species:S:1:pos:R:3 energy=-10 user_label=2.0 pbc="F F F"
+ H 0.00000000 0.00000000 0.00000000
+ H 0.00000000 0.00000000 1.02000000
+
+The yaml input should be
+
+.. code:: yaml
+
+ dataset: ase
+ dataset_file_name: H2.extxyz
+ ase_args:
+ format: extxyz
+ include_keys:
+ - user_label
+ key_mapping:
+ user_label: label0
+ chemical_symbol_to_type:
+ H: 0
+
+For other formats than `extxyz`, be careful to the ase parsers; they may have different behavior from the extxyz parser.
+For example, the ase vasp parser store potential energy to `free_energy` instead of `energy`.
+Because we optimize our code to the `extxyz` parser, NequIP will not be able to load any `total_energy` labels.
+We need some additional keys to help NequIP to understand the situtaion
+Here's an example for vasp outcar.
+
+.. code:: yaml
+
+ dataset: ase
+ dataset_file_name: OUTCAR
+ ase_args:
+ format: vasp-out
+ key_mapping:
+ free_energy: total_energy
+ chemical_symbol_to_type:
+ H: 0
+
+The way around is to use key mapping, please see more note below.
+
+NPZ format
+~~~~~~~~~~
+
+If your dataset constitute configurations that always have the same number of atoms, npz data format can be an option.
+
+In the npz file, all the values should have the same row as the number of the configurations.
+For example, the force array of 36 atomic configurations of an N-atom system should have the shape of (36, N, 3);
+their total_energy array should have the shape of (36).
+
+Below is an example of the yaml specification.
+
+.. code:: yaml
+
+ dataset: npz
+ dataset_file_name: example.npz
+ include_keys:
+ - user_label1
+ - user_label2
+ npz_fixed_field_keys:
+ - cell
+ - atomic_numbers
+ key_mapping:
+ position: pos
+ force: forces
+ energy: total_energy
+ Z: atomic_numbers
+
+
+Note on key mapping
+~~~~~~~~~~~~~~~~~~~
+
+NequIP has default key names for energy, force, cell (defined at nequip.data._keys)
+Unlike in the ASE format where these information is automatically parsed,
+in the npz data format, the correct key names have to be provided.
+The common key names are: `total_energy`, `forces`, `atomic_numbers`, `pos`, `cell`, `pbc`.
+the key_mapping can help to convert the user defined name (key) to NequIP default name (value).
+
+
+Advanced options
+----------------
+
+skip frames during data processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The `include_frame` argument can be specified in yaml to skip certain frames in the raw datafile.
+The item has to be a list or a python iteratable object.
+
+register user-defined graph, node, edge fields
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Graph, node, edge fields are quantities that belong to
+the whole graph, each atom, each edge, respectively.
+Example graph fields include cell, pbc, and total_energy.
+Example node fields include pos, forces
+
+To help NequIP to properly assemble the batch data, graph quantity other than
+cell, pbc, total_energy should be registered.
diff --git a/docs/howto/howto.rst b/docs/howto/howto.rst
new file mode 100644
index 00000000..07e84e84
--- /dev/null
+++ b/docs/howto/howto.rst
@@ -0,0 +1,7 @@
+How-to Tutorials
+================
+
+ .. toctree::
+
+ dataset
+ migrate
diff --git a/docs/guide/migrate.rst b/docs/howto/migrate.rst
similarity index 100%
rename from docs/guide/migrate.rst
rename to docs/howto/migrate.rst
diff --git a/docs/index.rst b/docs/index.rst
index dc6ecd43..d2edd1a6 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,12 +9,20 @@ NequIP
NequIP is an open-source package for creating, training, and using E(3)-equivariant machine learning interatomic potentials.
.. toctree::
- :maxdepth: 3
+ :maxdepth: 2
:caption: Contents:
- guide/guide
+ introduction/intro
+ cite
+ installation/install
+ yaml/yaml
+ howto/howto
+ faq/FAQ
+ commandline/commands
+ lammps/all
options/options
api/nequip
+ errors/errors
diff --git a/docs/installation/install.rst b/docs/installation/install.rst
new file mode 100644
index 00000000..3e946815
--- /dev/null
+++ b/docs/installation/install.rst
@@ -0,0 +1,39 @@
+Installation
+============
+
+NequIP requires:
+
+ * Python >= 3.6
+ * PyTorch >= 1.8, <=1.11.*. PyTorch can be installed following the `instructions from their documentation `_. Note that neither ``torchvision`` nor ``torchaudio``, included in the default install command, are needed for NequIP.
+
+To install:
+
+ * We use `Weights&Biases `_ to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account `here `_ and install the Python package::
+
+ pip install wandb
+
+ * Install the latest stable NequIP::
+
+ pip install https://github.com/mir-group/nequip/archive/main.zip
+
+To install previous versions of NequIP, please clone the repository from GitHub and check out the appropriate tag (for example ``v0.3.3`` for version 0.3.3).
+
+To install the current **unstable** development version of NequIP, please clone our repository and check out the ``develop`` branch.
+
+Installation Issues
+-------------------
+
+The easiest way to check if your installation is working is to train a _toy_ model::
+
+ nequip-train configs/minimal.yaml
+
+If you suspect something is wrong, encounter errors, or just want to confirm that everything is in working order, you can also run the unit tests::
+
+ pip install pytest
+ pytest tests/unit/
+
+To run the full tests, including a set of longer/more intensive integration tests, run::
+
+ pytest tests/
+
+If a GPU is present, the unit tests will use it.
\ No newline at end of file
diff --git a/docs/introduction/intro.rst b/docs/introduction/intro.rst
new file mode 100644
index 00000000..e0dcc32c
--- /dev/null
+++ b/docs/introduction/intro.rst
@@ -0,0 +1,4 @@
+Overview
+========
+
+TODO
diff --git a/docs/lammps/all.rst b/docs/lammps/all.rst
new file mode 100644
index 00000000..9faac07e
--- /dev/null
+++ b/docs/lammps/all.rst
@@ -0,0 +1,7 @@
+Integration to LAMMPS, ASE
+==========================
+
+ .. toctree::
+
+ lammps
+ ase
diff --git a/docs/lammps/ase.rst b/docs/lammps/ase.rst
new file mode 100644
index 00000000..3729cde3
--- /dev/null
+++ b/docs/lammps/ase.rst
@@ -0,0 +1,2 @@
+ASE
+===
diff --git a/docs/lammps/lammps.rst b/docs/lammps/lammps.rst
new file mode 100644
index 00000000..f9d0ba9f
--- /dev/null
+++ b/docs/lammps/lammps.rst
@@ -0,0 +1,2 @@
+LAMMPS
+======
diff --git a/docs/options/dataset.rst b/docs/options/dataset.rst
index 54b39fc9..f3ca194c 100644
--- a/docs/options/dataset.rst
+++ b/docs/options/dataset.rst
@@ -33,7 +33,7 @@ key_mapping
| Type: dict
| Default: ``{'positions': 'pos', 'energy': 'total_energy', 'force': 'forces', 'forces': 'forces', 'Z': 'atomic_numbers', 'atomic_number': 'atomic_numbers'}``
-npz_keys
+include_keys
^^^^^^^^
| Type: list
| Default: ``[]``
@@ -68,5 +68,11 @@ include_frames
| Type: NoneType
| Default: ``None``
+ase_args
+^^^^^^^^
+ | Type: dict
+ | Default: ``{}``
+
Advanced
---------
\ No newline at end of file
+--------
+See tutorial on :ref:`../guide/_dataset_note`.
diff --git a/docs/yaml/yaml.rst b/docs/yaml/yaml.rst
new file mode 100644
index 00000000..fd804436
--- /dev/null
+++ b/docs/yaml/yaml.rst
@@ -0,0 +1,4 @@
+YAML input
+==========
+
+TODO
diff --git a/nequip/_version.py b/nequip/_version.py
index 91faf40b..b02164d2 100644
--- a/nequip/_version.py
+++ b/nequip/_version.py
@@ -2,4 +2,4 @@
# See Python packaging guide
# https://packaging.python.org/guides/single-sourcing-package-version/
-__version__ = "0.5.5"
+__version__ = "0.5.6"
diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py
index 3f2e348b..728c260b 100644
--- a/nequip/data/AtomicData.py
+++ b/nequip/data/AtomicData.py
@@ -48,6 +48,7 @@
AtomicDataDict.EDGE_LENGTH_KEY,
AtomicDataDict.EDGE_ATTRS_KEY,
AtomicDataDict.EDGE_EMBEDDING_KEY,
+ AtomicDataDict.EDGE_FEATURES_KEY,
}
_DEFAULT_GRAPH_FIELDS: Set[str] = {
AtomicDataDict.TOTAL_ENERGY_KEY,
@@ -773,7 +774,7 @@ def neighbor_list_and_relative_vec(
keep_edge = ~bad_edge
if not np.any(keep_edge):
raise ValueError(
- "After eliminating self edges, no edges remain in this system."
+ f"Every single atom has no neighbors within the cutoff r_max={r_max} (after eliminating self edges, no edges remain in this system)"
)
first_idex = first_idex[keep_edge]
second_idex = second_idex[keep_edge]
diff --git a/nequip/data/_keys.py b/nequip/data/_keys.py
index c0535edd..54b66ce3 100644
--- a/nequip/data/_keys.py
+++ b/nequip/data/_keys.py
@@ -44,6 +44,7 @@
EDGE_ATTRS_KEY: Final[str] = "edge_attrs"
# [n_edge, dim] invariant embedding of the edges
EDGE_EMBEDDING_KEY: Final[str] = "edge_embedding"
+EDGE_FEATURES_KEY: Final[str] = "edge_features"
NODE_FEATURES_KEY: Final[str] = "node_features"
NODE_ATTRS_KEY: Final[str] = "node_attrs"
diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py
index 847b3795..c38b8eae 100644
--- a/nequip/data/dataset.py
+++ b/nequip/data/dataset.py
@@ -295,7 +295,13 @@ def process(self):
# type conversion
_process_dict(fixed_fields, ignore_fields=["r_max"])
- logging.info(f"Loaded data: {data}")
+ total_MBs = sum(item.numel() * item.element_size() for _, item in data) / (
+ 1024 * 1024
+ )
+ logging.info(
+ f"Loaded data: {data}\n processed data size: ~{total_MBs:.2f} MB"
+ )
+ del total_MBs
# use atomic writes to avoid race conditions between
# different trainings that use the same dataset
@@ -635,7 +641,7 @@ class NpzDataset(AtomicInMemoryDataset):
"""Load data from an npz file.
To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``include_keys``,
- ``npz_fixed_fields`` or ``extra_fixed_fields``.
+ ``npz_fixed_fields_keys`` or ``extra_fixed_fields``.
Args:
key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys. Optional
diff --git a/nequip/data/transforms.py b/nequip/data/transforms.py
index f2c7ec32..4f6331b7 100644
--- a/nequip/data/transforms.py
+++ b/nequip/data/transforms.py
@@ -121,11 +121,13 @@ def transform(self, atomic_numbers):
f"Data included atomic numbers {bad_set} that are not part of the atomic number -> type mapping!"
)
- return self._Z_to_index[atomic_numbers - self._min_Z]
+ return self._Z_to_index.to(device=atomic_numbers.device)[
+ atomic_numbers - self._min_Z
+ ]
def untransform(self, atom_types):
"""Transform atom types back into atomic numbers"""
- return self._index_to_Z[atom_types]
+ return self._index_to_Z[atom_types].to(device=atom_types.device)
@property
def has_chemical_symbols(self) -> bool:
diff --git a/nequip/model/_build.py b/nequip/model/_build.py
index 0fe4e21d..7e1a63fd 100644
--- a/nequip/model/_build.py
+++ b/nequip/model/_build.py
@@ -8,7 +8,10 @@
def model_from_config(
- config, initialize: bool = False, dataset: Optional[AtomicDataset] = None
+ config,
+ initialize: bool = False,
+ dataset: Optional[AtomicDataset] = None,
+ deploy: bool = False,
) -> GraphModuleMixin:
"""Build a model based on `config`.
@@ -17,11 +20,13 @@ def model_from_config(
- ``model``: the model produced by the previous builder. Cannot be requested by the first builder, must be requested by subsequent ones.
- ``initialize``: whether to initialize the model
- ``dataset``: if ``initialize`` is True, the dataset
+ - ``deploy``: whether the model object is for deployment / inference
Args:
config
- initialize (bool): if True (default False), ``model_initializers`` will also be run.
+ initialize (bool): whether ``model_builders`` should be instructed to initialize the model
dataset: dataset for initializers if ``initialize`` is True.
+ deploy (bool): whether ``model_builders`` should be told the model is for deployment / inference
Returns:
The build model.
@@ -61,6 +66,8 @@ def model_from_config(
params = {}
if "initialize" in pnames:
params["initialize"] = initialize
+ if "deploy" in pnames:
+ params["deploy"] = deploy
if "config" in pnames:
params["config"] = config
if "dataset" in pnames:
diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py
index f5554d25..8a7ffa46 100644
--- a/nequip/model/_scaling.py
+++ b/nequip/model/_scaling.py
@@ -12,7 +12,10 @@
def RescaleEnergyEtc(
- model: GraphModuleMixin, config, dataset: AtomicDataset, initialize: bool
+ model: GraphModuleMixin,
+ config,
+ initialize: bool,
+ dataset: Optional[AtomicDataset] = None,
):
return GlobalRescale(
model=model,
@@ -34,7 +37,6 @@ def RescaleEnergyEtc(
def GlobalRescale(
model: GraphModuleMixin,
config,
- dataset: AtomicDataset,
initialize: bool,
module_prefix: str,
default_scale: Union[str, float, list],
@@ -43,6 +45,7 @@ def GlobalRescale(
default_shift_keys: list,
default_related_scale_keys: list,
default_related_shift_keys: list,
+ dataset: Optional[AtomicDataset] = None,
):
"""Add global rescaling for energy(-based quantities).
@@ -75,11 +78,12 @@ def GlobalRescale(
raise ValueError(f"Invalid global scale `{value}`")
# = Compute shifts and scales =
- computed_stats = _compute_stats(
- str_names=str_names,
- dataset=dataset,
- stride=config.dataset_statistics_stride,
- )
+ if len(str_names) > 0:
+ computed_stats = _compute_stats(
+ str_names=str_names,
+ dataset=dataset,
+ stride=config.dataset_statistics_stride,
+ )
if isinstance(global_scale, str):
s = global_scale
@@ -129,8 +133,8 @@ def GlobalRescale(
def PerSpeciesRescale(
model: GraphModuleMixin,
config,
- dataset: AtomicDataset,
initialize: bool,
+ dataset: Optional[AtomicDataset] = None,
):
"""Add global rescaling for energy(-based quantities).
@@ -199,12 +203,13 @@ def PerSpeciesRescale(
], "Requested to set either the shifts or scales of the per_species_rescale using dataset values, but chose to provide the other in non-dataset units. Please give the explictly specified shifts/scales in dataset units and set per_species_rescale_arguments_in_dataset_units"
# = Compute shifts and scales =
- computed_stats = _compute_stats(
- str_names=str_names,
- dataset=dataset,
- stride=config.dataset_statistics_stride,
- kwargs=config.get(module_prefix + "_kwargs", {}),
- )
+ if len(str_names) > 0:
+ computed_stats = _compute_stats(
+ str_names=str_names,
+ dataset=dataset,
+ stride=config.dataset_statistics_stride,
+ kwargs=config.get(module_prefix + "_kwargs", {}),
+ )
if isinstance(scales, str):
s = scales
diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py
index ffc13140..673f8ff0 100644
--- a/nequip/nn/_grad_output.py
+++ b/nequip/nn/_grad_output.py
@@ -315,10 +315,10 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type:
torch.cross(cell[:, 1, :], cell[:, 2, :], dim=1),
).unsqueeze(-1)
stress = virial / volume.view(-1, 1, 1)
- data[AtomicDataDict.STRESS_KEY] = stress
data[AtomicDataDict.CELL_KEY] = orig_cell
else:
stress = self._empty # torchscript
+ data[AtomicDataDict.STRESS_KEY] = stress
# see discussion in https://github.com/libAtoms/QUIP/issues/227 about sign convention
# they say the standard convention is virial = -stress x volume
diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index 579e60ea..1deb0de2 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -3,6 +3,10 @@
import tempfile
import itertools
import time
+import logging
+import sys
+import pdb
+import traceback
import torch
from torch.utils.benchmark import Timer, Measurement
@@ -11,9 +15,10 @@
from e3nn.util.jit import script
from nequip.utils import Config
+from nequip.utils.test import assert_AtomicData_equivariant
from nequip.data import AtomicData, AtomicDataDict, dataset_from_config
from nequip.model import model_from_config
-from nequip.scripts.deploy import _compile_for_deploy
+from nequip.scripts.deploy import _compile_for_deploy, load_deployed_model
from nequip.scripts.train import default_config, check_code_version
from nequip.utils._global_options import _set_global_options
@@ -25,12 +30,23 @@ def main(args=None):
)
)
parser.add_argument("config", help="configuration file")
+ parser.add_argument(
+ "--model",
+ help="A deployed model to load instead of building a new one from `config`. ",
+ type=str,
+ default=None,
+ )
parser.add_argument(
"--profile",
help="Profile instead of timing, creating and outputing a Chrome trace JSON to the given path.",
type=str,
default=None,
)
+ parser.add_argument(
+ "--equivariance-test",
+ help="test the model's equivariance on `--n-data` frames.",
+ action="store_true",
+ )
parser.add_argument(
"--device",
help="Device to run the model on. If not provided, defaults to CUDA if available and CPU otherwise.",
@@ -55,11 +71,33 @@ def main(args=None):
type=float,
default=1,
)
-
- # TODO: option to show memory use
+ parser.add_argument(
+ "--no-compile",
+ help="Don't compile the model to TorchScript",
+ action="store_true",
+ )
+ parser.add_argument(
+ "--memory-summary",
+ help="Print torch.cuda.memory_summary() after running the model",
+ action="store_true",
+ )
+ parser.add_argument(
+ "--verbose", help="Logging verbosity level", type=str, default="error"
+ )
+ parser.add_argument(
+ "--pdb",
+ help="Run model builders and model under debugger to easily drop to debugger to investigate errors.",
+ action="store_true",
+ )
# Parse the args
args = parser.parse_args(args=args)
+ if args.pdb:
+ assert args.profile is None
+
+ root_logger = logging.getLogger()
+ root_logger.setLevel(getattr(logging, args.verbose.upper()))
+ root_logger.handlers = [logging.StreamHandler(sys.stderr)]
if args.device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -80,12 +118,12 @@ def main(args=None):
print(f" loading dataset took {dataset_time:.4f}s")
dataset_rng = torch.Generator()
dataset_rng.manual_seed(config.get("dataset_seed", config.get("seed", 12345)))
- datas = [
+ datas_list = [
AtomicData.to_AtomicDataDict(dataset[i].to(device))
for i in torch.randperm(len(dataset), generator=dataset_rng)[: args.n_data]
]
- n_atom: int = len(datas[0]["pos"])
- if not all(len(d["pos"]) == n_atom for d in datas):
+ n_atom: int = len(datas_list[0]["pos"])
+ if not all(len(d["pos"]) == n_atom for d in datas_list):
raise NotImplementedError(
"nequip-benchmark does not currently handle benchmarking on data frames with variable number of atoms"
)
@@ -97,7 +135,7 @@ def main(args=None):
print(f" number of atoms: {n_atom}")
print(f" number of types: {dataset.type_mapper.num_types}")
print(
- f" avg. num edges: {sum(d[AtomicDataDict.EDGE_INDEX_KEY].shape[1] for d in datas) / len(datas)}"
+ f" avg. num edges: {sum(d[AtomicDataDict.EDGE_INDEX_KEY].shape[1] for d in datas_list) / len(datas_list)}"
)
avg_edges_per_atom = torch.mean(
torch.cat(
@@ -106,14 +144,14 @@ def main(args=None):
d[AtomicDataDict.EDGE_INDEX_KEY][0],
minlength=d[AtomicDataDict.POSITIONS_KEY].shape[0],
).float()
- for d in datas
+ for d in datas_list
]
)
).item()
print(f" avg. neigh/atom: {avg_edges_per_atom}")
# cycle over the datas we loaded
- datas = itertools.cycle(datas)
+ datas = itertools.cycle(datas_list)
# short circut
if args.n == 0:
@@ -121,29 +159,64 @@ def main(args=None):
return
# Load model:
- print("Building model... ")
- model_time = time.time()
- model = model_from_config(config, initialize=True, dataset=dataset)
- model_time = time.time() - model_time
- print(f" building model took {model_time:.4f}s")
- print("Compile...")
- # "Deploy" it
+ if args.model is None:
+ print("Building model... ")
+ model_time = time.time()
+ try:
+ model = model_from_config(
+ config, initialize=True, dataset=dataset, deploy=True
+ )
+ except: # noqa: E722
+ if args.pdb:
+ traceback.print_exc()
+ pdb.post_mortem()
+ else:
+ raise
+ model_time = time.time() - model_time
+ print(f" building model took {model_time:.4f}s")
+ else:
+ print("Loading model...")
+ model, metadata = load_deployed_model(args.model, device=device, freeze=False)
+ print(" deployed model has metadata:")
+ print(
+ "\n".join(
+ " %s: %s" % e for e in metadata.items() if e[0] != "config"
+ )
+ )
+ print(f" model has {sum(p.numel() for p in model.parameters())} weights")
+ print(
+ f" model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights"
+ )
+ print(
+ f" model weights and buffers take {sum(p.numel() * p.element_size() for p in itertools.chain(model.parameters(), model.buffers())) / (1024 * 1024):.2f} MB"
+ )
+
model.eval()
- compile_time = time.time()
- model = script(model)
- model = _compile_for_deploy(model)
- compile_time = time.time() - compile_time
- print(f" compilation took {compile_time:.4f}s")
-
- # save and reload to avoid bugs
- with tempfile.NamedTemporaryFile() as f:
- torch.jit.save(model, f.name)
- model = torch.jit.load(f.name, map_location=device)
- # freeze like in the LAMMPS plugin
- model = torch.jit.freeze(model)
- # and reload again just to avoid bugs
- torch.jit.save(model, f.name)
- model = torch.jit.load(f.name, map_location=device)
+ if args.equivariance_test:
+ args.no_compile = True
+ if args.model is not None:
+ raise RuntimeError("Can't equivariance test a deployed model.")
+
+ if args.no_compile:
+ model = model.to(device)
+ else:
+ print("Compile...")
+ # "Deploy" it
+ compile_time = time.time()
+ model = script(model)
+ model = _compile_for_deploy(model)
+ compile_time = time.time() - compile_time
+ print(f" compilation took {compile_time:.4f}s")
+
+ # save and reload to avoid bugs
+ with tempfile.NamedTemporaryFile() as f:
+ torch.jit.save(model, f.name)
+ model = torch.jit.load(f.name, map_location=device)
+ # freeze like in the LAMMPS plugin
+ model = torch.jit.freeze(model)
+ # and reload again just to avoid bugs
+ torch.jit.save(model, f.name)
+ model = torch.jit.load(f.name, map_location=device)
# Make sure we're warm past compilation
warmup = config["_jit_bailout_depth"] + 4 # just to be safe...
@@ -154,7 +227,7 @@ def trace_handler(p):
p.export_chrome_trace(args.profile)
print(f"Wrote profiling trace to `{args.profile}`")
- print("Starting...")
+ print("Starting profiling...")
with torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
@@ -168,6 +241,34 @@ def trace_handler(p):
for _ in range(1 + warmup + args.n):
model(next(datas).copy())
p.step()
+ elif args.pdb:
+ print("Running model under debugger...")
+ try:
+ for _ in range(args.n):
+ model(next(datas).copy())
+ except: # noqa: E722
+ traceback.print_exc()
+ pdb.post_mortem()
+ print("Done.")
+ elif args.equivariance_test:
+ print("Warmup...")
+ warmup_time = time.time()
+ for _ in range(warmup):
+ model(next(datas).copy())
+ warmup_time = time.time() - warmup_time
+ print(f" {warmup} calls of warmup took {warmup_time:.4f}s")
+ print("Running equivariance test...")
+ errstr = assert_AtomicData_equivariant(model, datas_list)
+ print(
+ " Equivariance test passed; equivariance errors:\n"
+ " Errors are in real units, where relevant.\n"
+ " Please note that the large scale of the typical\n"
+ " shifts to the (atomic) energy can cause\n"
+ " catastrophic cancellation and give incorrectly\n"
+ " the equivariance error as zero for those fields.\n"
+ f"{errstr}"
+ )
+ del errstr
else:
print("Warmup...")
warmup_time = time.time()
@@ -176,13 +277,17 @@ def trace_handler(p):
warmup_time = time.time() - warmup_time
print(f" {warmup} calls of warmup took {warmup_time:.4f}s")
- print("Starting...")
+ print("Benchmarking...")
# just time
t = Timer(
stmt="model(next(datas).copy())", globals={"model": model, "datas": datas}
)
perloop: Measurement = t.timeit(args.n)
+ if args.memory_summary and torch.cuda.is_available():
+ print("Memory usage summary:")
+ print(torch.cuda.memory_summary())
+
print(" -- Results --")
print(
f"PLEASE NOTE: these are speeds for the MODEL, evaluated on --n-data={args.n_data} configurations kept in memory."
diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py
index 8185ab75..394c0005 100644
--- a/nequip/scripts/deploy.py
+++ b/nequip/scripts/deploy.py
@@ -9,6 +9,7 @@
import pathlib
import logging
import yaml
+import itertools
# This is a weird hack to avoid Intel MKL issues on the cluster when this is called as a subprocess of a process that has itself initialized PyTorch.
# Since numpy gets imported later anyway for dataset stuff, this shouldn't affect performance.
@@ -129,7 +130,7 @@ def load_deployed_model(
def main(args=None):
parser = argparse.ArgumentParser(
- description="Create and view information about deployed NequIP potentials."
+ description="Deploy and view information about previously deployed NequIP models."
)
# backward compat for 3.6
if sys.version_info[1] > 6:
@@ -146,6 +147,11 @@ def main(args=None):
help="Path to a deployed model file.",
type=pathlib.Path,
)
+ info_parser.add_argument(
+ "--print-config",
+ help="Print the full config of the model.",
+ action="store_true",
+ )
build_parser = subparsers.add_parser("build", help="Build a deployment model")
build_parser.add_argument(
@@ -169,13 +175,25 @@ def main(args=None):
logging.basicConfig(level=getattr(logging, args.verbose.upper()))
if args.command == "info":
- model, metadata = load_deployed_model(args.model_path, set_global_options=False)
- del model
+ model, metadata = load_deployed_model(
+ args.model_path, set_global_options=False, freeze=False
+ )
config = metadata.pop(CONFIG_KEY)
- metadata_str = "\n".join(" %s: %s" % e for e in metadata.items())
- logging.info(f"Loaded TorchScript model with metadata:\n{metadata_str}\n")
- logging.info("Model was built with config:")
- print(config)
+ if args.print_config:
+ print(config)
+ else:
+ metadata_str = "\n".join(" %s: %s" % e for e in metadata.items())
+ logging.info(f"Loaded TorchScript model with metadata:\n{metadata_str}\n")
+ logging.info(
+ f"Model has {sum(p.numel() for p in model.parameters())} weights"
+ )
+ logging.info(
+ f"Model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights"
+ )
+ logging.info(
+ f"Model weights and buffers take {sum(p.numel() * p.element_size() for p in itertools.chain(model.parameters(), model.buffers())) / (1024 * 1024):.2f} MB"
+ )
+ logging.debug(f"Model had config:\n{config}")
elif args.command == "build":
if args.model and args.train_dir:
@@ -198,7 +216,7 @@ def main(args=None):
args.train_dir, model_name="best_model.pth", device="cpu"
)
elif args.model is not None:
- model = model_from_config(config)
+ model = model_from_config(config, deploy=True)
else:
raise AssertionError
diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py
index d67d750f..f7dfa12b 100644
--- a/nequip/scripts/evaluate.py
+++ b/nequip/scripts/evaluate.py
@@ -30,13 +30,13 @@ def main(args=None, running_as_script: bool = True):
description=textwrap.dedent(
"""Compute the error of a model on a test set using various metrics.
- The model, metrics, dataset, etc. can specified individually, or a training session can be indicated with `--train-dir`.
+ The model, metrics, dataset, etc. can specified in individual YAML config files, or a training session can be indicated with `--train-dir`.
In order of priority, the global settings (dtype, TensorFloat32, etc.) are taken from:
- 1. The model config (for a training session)
- 2. The dataset config (for a deployed model)
- 3. The defaults
+ (1) the model config (for a training session),
+ (2) the dataset config (for a deployed model),
+ or (3) the defaults.
- Prints only the final result in `name = num` format to stdout; all other information is logging.debuged to stderr.
+ Prints only the final result in `name = num` format to stdout; all other information is `logging.debug`ed to stderr.
WARNING: Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic.
"""
@@ -74,7 +74,7 @@ def main(args=None, running_as_script: bool = True):
)
parser.add_argument(
"--batch-size",
- help="Batch size to use. Larger is usually faster on GPU. If you run out of memory, lower this.",
+ help="Batch size to use. Larger is usually faster on GPU. If you run out of memory, lower this. You can also try to raise this for faster evaluation. Default: 50.",
type=int,
default=50,
)
diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py
index c6aa7785..88b55f7e 100644
--- a/nequip/scripts/train.py
+++ b/nequip/scripts/train.py
@@ -81,8 +81,12 @@ def main(args=None, running_as_script: bool = True):
def parse_command_line(args=None):
- parser = argparse.ArgumentParser(description="Train a NequIP model.")
- parser.add_argument("config", help="configuration file")
+ parser = argparse.ArgumentParser(
+ description="Train (or restart training of) a NequIP model."
+ )
+ parser.add_argument(
+ "config", help="YAML file configuring the model, dataset, and other options"
+ )
parser.add_argument(
"--equivariance-test",
help="test the model's equivariance before training on n (default 1) random frames from the dataset",
diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py
index 7ce4d4e1..55efec32 100644
--- a/nequip/train/trainer.py
+++ b/nequip/train/trainer.py
@@ -706,6 +706,9 @@ def init(self):
self.num_weights = sum(p.numel() for p in self.model.parameters())
self.logger.info(f"Number of weights: {self.num_weights}")
+ self.logger.info(
+ f"Number of trainable weights: {sum(p.numel() for p in self.model.parameters() if p.requires_grad)}"
+ )
self.rescale_layers = []
outer_layer = self.model
@@ -1177,7 +1180,9 @@ def set_dataset(
if self.n_train > len(dataset):
raise ValueError("Not enough data in dataset for requested n_train")
if self.n_val > len(validation_dataset):
- raise ValueError("Not enough data in dataset for requested n_train")
+ raise ValueError(
+ "Not enough data in validation dataset for requested n_val"
+ )
if self.train_val_split == "random":
self.train_idcs = torch.randperm(
len(dataset), generator=self.dataset_rng
diff --git a/nequip/utils/git.py b/nequip/utils/git.py
index a78a87fc..a5fbe7f3 100644
--- a/nequip/utils/git.py
+++ b/nequip/utils/git.py
@@ -8,7 +8,14 @@
def get_commit(module: str) -> Optional[str]:
module = import_module(module)
- path = str(Path(module.__file__).parents[0] / "..")
+ package = Path(module.__file__).parents[0]
+ if package.is_file():
+ # We're installed as a ZIP .egg file,
+ # which means there's no git information
+ # and looking for the parent would fail anyway
+ # https://github.com/mir-group/nequip/issues/264
+ return None
+ path = str(package / "..")
retcode = subprocess.run(
"git show --oneline --abbrev=40 -s".split(),
diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py
index 3d23cf84..76d140bc 100644
--- a/nequip/utils/regressor.py
+++ b/nequip/utils/regressor.py
@@ -1,181 +1,76 @@
import logging
import torch
-import numpy as np
-from typing import Optional
-from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process.kernels import DotProduct, Kernel, Hyperparameter
+from torch import matmul
+from torch.linalg import solve, inv
+from typing import Optional, Sequence
+from opt_einsum import contract
-def solver(X, y, regressor: Optional[str] = "NormalizedGaussianProcess", **kwargs):
- if regressor == "GaussianProcess":
- return gp(X, y, **kwargs)
- elif regressor == "NormalizedGaussianProcess":
- return normalized_gp(X, y, **kwargs)
- else:
- raise NotImplementedError(f"{regressor} is not implemented")
+def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kwargs):
+ # results are in the same "units" as y, so same dtype too:
+ dtype_out = y.dtype
+ # always solve in float64 for numerical stability
+ dtype = torch.float64
+ X = X[::stride].to(dtype)
+ y = y[::stride].to(dtype)
+
+ X, y = down_sampling_by_composition(X, y)
+
+ X_norm = torch.sum(X)
+
+ X = X / X_norm
+ y = y / X_norm
-def normalized_gp(X, y, **kwargs):
- feature_rms = 1.0 / np.sqrt(np.average(X**2, axis=0))
- feature_rms = np.nan_to_num(feature_rms, 1)
y_mean = torch.sum(y) / torch.sum(X)
- mean, std = base_gp(
- X,
- y - (torch.sum(X, axis=1) * y_mean).reshape(y.shape),
- NormalizedDotProduct,
- {"diagonal_elements": feature_rms},
- **kwargs,
- )
- return mean + y_mean, std
+ feature_rms = torch.sqrt(torch.mean(X**2, axis=0))
-def gp(X, y, **kwargs):
- return base_gp(
- X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, **kwargs
- )
+ alpha_mat = torch.diag(feature_rms) * alpha * alpha
+
+ A = matmul(X.T, X) + alpha_mat
+ dy = y - (torch.sum(X, axis=1, keepdim=True) * y_mean).reshape(y.shape)
+ Xy = matmul(X.T, dy)
+
+ mean = solve(A, Xy)
+
+ sigma2 = torch.var(matmul(X, mean) - dy)
+ Ainv = inv(A)
+ cov = torch.sqrt(sigma2 * contract("ij,kj,kl,li->i", Ainv, X, X, Ainv))
+ mean = mean + y_mean.reshape([-1])
-def base_gp(
- X,
- y,
- kernel,
- kernel_kwargs,
- alpha: Optional[float] = 0.1,
- max_iteration: int = 20,
- stride: Optional[int] = None,
+ logging.debug(f"Ridge Regression, residue {sigma2}")
+
+ return mean.to(dtype_out), cov.to(dtype_out)
+
+
+def down_sampling_by_composition(
+ X: torch.Tensor, y: torch.Tensor, percentage: Sequence = [0.25, 0.5, 0.75]
):
- if len(y.shape) == 1:
- y = y.reshape([-1, 1])
-
- if stride is not None:
- X = X[::stride]
- y = y[::stride]
-
- not_fit = True
- iteration = 0
- mean = None
- std = None
- while not_fit:
- logging.debug(f"GP fitting iteration {iteration} {alpha}")
- try:
- _kernel = kernel(**kernel_kwargs)
- gpr = GaussianProcessRegressor(kernel=_kernel, random_state=0, alpha=alpha)
- gpr = gpr.fit(X, y)
-
- vec = torch.diag(torch.ones(X.shape[1]))
- mean, std = gpr.predict(vec, return_std=True)
-
- mean = torch.as_tensor(mean, dtype=torch.get_default_dtype()).reshape([-1])
- # ignore all the off-diagonal terms
- std = torch.as_tensor(std, dtype=torch.get_default_dtype()).reshape([-1])
- likelihood = gpr.log_marginal_likelihood()
-
- res = torch.sqrt(
- torch.square(torch.matmul(X, mean.reshape([-1, 1])) - y).mean()
- )
-
- logging.debug(
- f"GP fitting: alpha {alpha}:\n"
- f" residue {res}\n"
- f" mean {mean} std {std}\n"
- f" log marginal likelihood {likelihood}"
- )
- not_fit = False
-
- except Exception as e:
- logging.info(f"GP fitting failed for alpha={alpha} and {e.args}")
- if alpha == 0 or alpha is None:
- logging.info("try a non-zero alpha")
- not_fit = False
- raise ValueError(
- f"Please set the {alpha} to non-zero value. \n"
- "The dataset energy is rank deficient to be solved with GP"
- )
- else:
- alpha = alpha * 2
- iteration += 1
- logging.debug(f" increase alpha to {alpha}")
-
- if iteration >= max_iteration or not_fit is False:
- raise ValueError(
- "Please set the per species shift and scale to zeros and ones. \n"
- "The dataset energy is to diverge to be solved with GP"
- )
-
- return mean, std
-
-
-class NormalizedDotProduct(Kernel):
- r"""Dot-Product kernel.
- .. math::
- k(x_i, x_j) = x_i \cdot A \cdot x_j
- """
-
- def __init__(self, diagonal_elements):
- # TO DO: check shape
- self.diagonal_elements = diagonal_elements
- self.A = np.diag(diagonal_elements)
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Y : ndarray of shape (n_samples_Y, n_features), default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- X = np.atleast_2d(X)
- if Y is None:
- K = (X.dot(self.A)).dot(X.T)
- else:
- if eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
- K = (X.dot(self.A)).dot(Y.T)
-
- if eval_gradient:
- return K, np.empty((X.shape[0], X.shape[0], 0))
- else:
- return K
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y).
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X).
- """
- return np.einsum("ij,ij,jj->i", X, X, self.A)
-
- def __repr__(self):
- return ""
-
- def is_stationary(self):
- """Returns whether the kernel is stationary."""
- return False
-
- @property
- def hyperparameter_diagonal_elements(self):
- return Hyperparameter("diagonal_elements", "numeric", "fixed")
+ unique_comps, comp_ids = torch.unique(X, dim=0, return_inverse=True)
+
+ n_types = torch.max(comp_ids) + 1
+
+ sort_by = torch.argsort(comp_ids)
+
+ # find out the block for each composition
+ d_icomp = comp_ids[sort_by]
+ d_icomp = d_icomp[:-1] - d_icomp[1:]
+ node_icomp = torch.where(d_icomp != 0)[0]
+ id_start = torch.cat((torch.as_tensor([0]), node_icomp + 1))
+ id_end = torch.cat((node_icomp + 1, torch.as_tensor([len(sort_by)])))
+
+ n_points = len(percentage)
+ new_X = torch.zeros(
+ (n_types * n_points, X.shape[1]), dtype=X.dtype, device=X.device
+ )
+ new_y = torch.zeros((n_types * n_points), dtype=y.dtype, device=y.device)
+ for i in range(n_types):
+ ids = sort_by[id_start[i] : id_end[i]]
+ for j, p in enumerate(percentage):
+ new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear")
+ new_X[i * n_points + j] = unique_comps[i]
+
+ return new_X, new_y
diff --git a/nequip/utils/test.py b/nequip/utils/test.py
index edf2c1e8..60e68730 100644
--- a/nequip/utils/test.py
+++ b/nequip/utils/test.py
@@ -28,7 +28,7 @@ def assert_permutation_equivariant(
data_in: AtomicDataDict.Type,
tolerance: Optional[float] = None,
raise_error: bool = True,
-):
+) -> str:
r"""Test the permutation equivariance of ``func``.
Standard fields are assumed to be equivariant to node or edge permutations according to their standard interpretions; all other fields are assumed to be invariant to all permutations. Non-standard fields can be registered as node/edge permutation equivariant using ``register_fields``.
@@ -93,38 +93,42 @@ def assert_permutation_equivariant(
out_perm.keys()
), "Permutation changed the set of fields returned by model"
- problems = []
+ messages = []
+ num_problems: int = 0
for k in out_orig.keys():
if k in node_permute_fields:
- if not torch.allclose(out_orig[k][node_perm], out_perm[k], atol=atol):
- err = (out_orig[k][node_perm] - out_perm[k]).abs().max()
- problems.append(
- f"node permutation equivariance violated for field {k}; maximum componentwise error: {err:e}"
- )
+ err = (out_orig[k][node_perm] - out_perm[k]).abs().max()
+ fail = not torch.allclose(out_orig[k][node_perm], out_perm[k], atol=atol)
+ if fail:
+ num_problems += 1
+ messages.append(
+ f" node permutation equivariance of field {k:20} -> max error={err:.3e}{' FAIL' if fail else ''}"
+ )
elif k in edge_permute_fields:
- if not torch.allclose(out_orig[k][edge_perm], out_perm[k], atol=atol):
- err = (out_orig[k][edge_perm] - out_perm[k]).abs().max()
- problems.append(
- f"edge permutation equivariance violated for field {k}; maximum componentwise error: {err:e}"
- )
+ err = (out_orig[k][edge_perm] - out_perm[k]).abs().max()
+ fail = not torch.allclose(out_orig[k][edge_perm], out_perm[k], atol=atol)
+ if fail:
+ num_problems += 1
+ messages.append(
+ f" edge permutation equivariance of field {k:20} -> max error={err:.3e}{' FAIL' if fail else ''}"
+ )
elif k == AtomicDataDict.EDGE_INDEX_KEY:
pass
else:
# Assume invariant
if out_orig[k].dtype == torch.bool:
- if not torch.all(out_orig[k] == out_perm[k]):
- problems.append(
- f"edge/node permutation invariance violated for field {k} ({k} was assumed to be invariant, should it have been marked as equivariant?)"
- )
+ err = (out_orig[k] != out_perm[k]).max()
else:
- if not torch.allclose(out_orig[k], out_perm[k], atol=atol):
- err = (out_orig[k] - out_perm[k]).abs().max()
- problems.append(
- f"edge/node permutation invariance violated for field {k}; maximum componentwise error: {err:e}. (`{k}` was assumed to be invariant, should it have been marked as equivariant?)"
- )
- msg = "\n".join(problems)
- if len(problems) == 0:
- return
+ err = (out_orig[k] - out_perm[k]).abs().max()
+ fail = not torch.allclose(out_orig[k], out_perm[k], atol=atol)
+ if fail:
+ num_problems += 1
+ messages.append(
+ f" edge & node permutation invariance for field {k:20} -> max error={err:.3e}{' FAIL' if fail else ''}"
+ )
+ msg = "\n".join(messages)
+ if num_problems == 0:
+ return msg
else:
if raise_error:
raise AssertionError(msg)
@@ -169,7 +173,7 @@ def assert_AtomicData_equivariant(
# == Test permutation of graph nodes ==
# since permutation is discrete and should not be data dependent, run only on one frame.
- permutation_problems = assert_permutation_equivariant(
+ permutation_message = assert_permutation_equivariant(
func, data_in[0], tolerance=permutation_tolerance, raise_error=False
)
@@ -255,53 +259,23 @@ def wrapper(*args):
if o3_tolerance is None:
o3_tolerance = FLOAT_TOLERANCE[torch.get_default_dtype()]
- anerr = next(iter(errs.values()))
- if isinstance(anerr, float) or anerr.ndim == 0:
- # old e3nn doesn't report which key
- problems = {k: v for k, v in errs.items() if v > o3_tolerance}
-
- def _describe(errors):
- return (
- permutation_problems + "\n" if permutation_problems is not None else ""
- ) + "\n".join(
- "(parity_k={:d}, did_translate={}) -> max error={:.3e}".format(
- int(k[0]),
- bool(k[1]),
- float(v),
- )
- for k, v in errors.items()
- )
-
- if len(problems) > 0 or permutation_problems is not None:
- raise AssertionError(
- "Equivariance test failed for cases:" + _describe(problems)
- )
-
- return _describe(errs)
- else:
- # it's newer and tells us which is which
- all_errs = []
- for case, err in errs.items():
- for key, this_err in zip(irreps_out.keys(), err):
- all_errs.append(case + (key, this_err))
- problems = [e for e in all_errs if e[-1] > o3_tolerance]
-
- def _describe(errors):
- return (
- permutation_problems + "\n" if permutation_problems is not None else ""
- ) + "\n".join(
- " (parity_k={:1d}, did_translate={:5}, field={:20}) -> max error={:.3e}".format(
- int(k[0]), str(bool(k[1])), str(k[2]), float(k[3])
- )
- for k in errors
- )
+ all_errs = []
+ for case, err in errs.items():
+ for key, this_err in zip(irreps_out.keys(), err):
+ all_errs.append(case + (key, this_err))
+ is_problem = [e[-1] > o3_tolerance for e in all_errs]
+
+ message = (permutation_message + "\n") + "\n".join(
+ " (parity_k={:1d}, did_translate={:5}, field={:20}) -> max error={:.3e}".format(
+ int(k[0]), str(bool(k[1])), str(k[2]), float(k[3])
+ )
+ for k, prob in zip(all_errs, is_problem)
+ )
- if len(problems) > 0 or permutation_problems is not None:
- raise AssertionError(
- "Equivariance test failed for cases:\n" + _describe(problems)
- )
+ if sum(is_problem) > 0 or "FAIL" in permutation_message:
+ raise AssertionError(f"Equivariance test failed for cases:\n{message}")
- return _describe(all_errs)
+ return message
_DEBUG_HOOKS = None
diff --git a/nequip/utils/unittests/__init__.py b/nequip/utils/unittests/__init__.py
new file mode 100644
index 00000000..2309cb02
--- /dev/null
+++ b/nequip/utils/unittests/__init__.py
@@ -0,0 +1,3 @@
+import pathlib
+
+CONFTEST_PATH = pathlib.Path(__file__).parent / "conftest.py"
diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py
new file mode 100644
index 00000000..4cfa98ff
--- /dev/null
+++ b/nequip/utils/unittests/conftest.py
@@ -0,0 +1,154 @@
+from typing import List, Tuple
+import numpy as np
+import pathlib
+import pytest
+import tempfile
+import os
+
+from ase.atoms import Atoms
+from ase.build import molecule
+from ase.calculators.singlepoint import SinglePointCalculator
+from ase.io import write
+
+import torch
+
+from nequip.utils.test import set_irreps_debug
+from nequip.data import AtomicData, ASEDataset
+from nequip.data.transforms import TypeMapper
+from nequip.utils.torch_geometric import Batch
+from nequip.utils._global_options import _set_global_options
+from nequip.utils.misc import dtype_from_name
+
+if "NEQUIP_NUM_TASKS" not in os.environ:
+ # Test parallelization, but don't waste time spawning tons of workers if lots of cores available
+ os.environ["NEQUIP_NUM_TASKS"] = "2"
+
+# The default float tolerance
+FLOAT_TOLERANCE = {
+ t: torch.as_tensor(v, dtype=dtype_from_name(t))
+ for t, v in {"float32": 1e-3, "float64": 1e-10}.items()
+}
+
+
+@pytest.fixture(scope="session", autouse=True, params=["float32", "float64"])
+def float_tolerance(request):
+ """Run all tests with various PyTorch default dtypes.
+
+ This is a session-wide, autouse fixture — you only need to request it explicitly if a test needs to know the tolerance for the current default dtype.
+
+ Returns
+ --------
+ A precision threshold to use for closeness tests.
+ """
+ old_dtype = torch.get_default_dtype()
+ dtype = request.param
+ _set_global_options({"default_dtype": dtype})
+ yield FLOAT_TOLERANCE[dtype]
+ _set_global_options(
+ {
+ "default_dtype": {torch.float32: "float32", torch.float64: "float64"}[
+ old_dtype
+ ]
+ }
+ )
+
+
+# - Ampere and TF32 -
+# Many of the tests for NequIP involve numerically checking
+# algebraic properties— normalization, equivariance,
+# continuity, etc.
+# With the added numerical noise of TF32, some of those tests fail
+# with the current (and usually generous) thresholds.
+#
+# Thus we go on the assumption that PyTorch + NVIDIA got everything
+# right, that this setting DOES NOT AFFECT the model outputs except
+# for increased numerical noise, and only test without it.
+#
+# TODO: consider running tests with and without
+# TODO: check how much thresholds have to be changed to accomidate TF32
+torch.backends.cuda.matmul.allow_tf32 = False
+torch.backends.cudnn.allow_tf32 = False
+
+
+@pytest.fixture(scope="session")
+def BENCHMARK_ROOT():
+ return pathlib.Path(__file__).parent / "../benchmark_data/"
+
+
+@pytest.fixture(scope="session")
+def temp_data(float_tolerance):
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ yield tmpdirname
+
+
+@pytest.fixture(scope="session")
+def CH3CHO(CH3CHO_no_typemap) -> Tuple[Atoms, AtomicData]:
+ atoms, data = CH3CHO_no_typemap
+ tm = TypeMapper(chemical_symbol_to_type={"C": 0, "O": 1, "H": 2})
+ data = tm(data)
+ return atoms, data
+
+
+@pytest.fixture(scope="session")
+def CH3CHO_no_typemap(float_tolerance) -> Tuple[Atoms, AtomicData]:
+ atoms = molecule("CH3CHO")
+ data = AtomicData.from_ase(atoms, r_max=2.0)
+ return atoms, data
+
+
+@pytest.fixture(scope="session")
+def molecules() -> List[Atoms]:
+ atoms_list = []
+ for i in range(8):
+ atoms = molecule("CH3CHO" if i % 2 == 0 else "H2")
+ atoms.rattle()
+ atoms.calc = SinglePointCalculator(
+ energy=np.random.random(),
+ forces=np.random.random((len(atoms), 3)),
+ stress=None,
+ magmoms=None,
+ atoms=atoms,
+ )
+ atoms_list.append(atoms)
+ return atoms_list
+
+
+@pytest.fixture(scope="session")
+def nequip_dataset(molecules, temp_data, float_tolerance):
+ with tempfile.NamedTemporaryFile(suffix=".xyz") as fp:
+ for atoms in molecules:
+ write(fp.name, atoms, format="extxyz", append=True)
+ a = ASEDataset(
+ file_name=fp.name,
+ root=temp_data,
+ extra_fixed_fields={"r_max": 3.0},
+ ase_args=dict(format="extxyz"),
+ type_mapper=TypeMapper(chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}),
+ )
+ yield a
+
+
+@pytest.fixture(scope="session")
+def atomic_batch(nequip_dataset):
+ return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]])
+
+
+@pytest.fixture(scope="function")
+def per_species_set():
+ dtype = torch.get_default_dtype()
+ rng = torch.Generator().manual_seed(127)
+ mean_min = 1
+ mean_max = 100
+ std = 20
+ n_sample = 1000
+ n_species = 9
+ ref_mean = torch.rand((n_species), generator=rng) * (mean_max - mean_min) + mean_min
+ t_mean = torch.ones((n_sample, 1)) * ref_mean.reshape([1, -1])
+ ref_std = torch.rand((n_species), generator=rng) * std
+ t_std = torch.ones((n_sample, 1)) * ref_std.reshape([1, -1])
+ E = torch.normal(t_mean, t_std, generator=rng)
+ return ref_mean.to(dtype), ref_std.to(dtype), E.to(dtype), n_sample, n_species
+
+
+# Use debug mode
+set_irreps_debug(True)
diff --git a/tests/unit/model/test_eng_force.py b/nequip/utils/unittests/model_tests.py
similarity index 56%
rename from tests/unit/model/test_eng_force.py
rename to nequip/utils/unittests/model_tests.py
index 0adcd4c9..2b6a8b63 100644
--- a/tests/unit/model/test_eng_force.py
+++ b/nequip/utils/unittests/model_tests.py
@@ -1,150 +1,87 @@
import pytest
-import logging
import tempfile
import functools
import torch
import numpy as np
-from e3nn import o3
from e3nn.util.jit import script
-from nequip.data import AtomicDataDict, AtomicData, Collater
+from nequip.data import (
+ AtomicDataDict,
+ AtomicData,
+ Collater,
+ _GRAPH_FIELDS,
+ _NODE_FIELDS,
+ _EDGE_FIELDS,
+)
from nequip.data.transforms import TypeMapper
-from nequip.model import model_from_config, uniform_initialize_FCs
-from nequip.nn import GraphModuleMixin, AtomwiseLinear
+from nequip.model import model_from_config
+from nequip.nn import GraphModuleMixin
from nequip.utils.test import assert_AtomicData_equivariant
-logging.basicConfig(level=logging.DEBUG)
-
-COMMON_CONFIG = {
- "num_types": 3,
- "types_names": ["H", "C", "O"],
- "avg_num_neighbors": None,
-}
-r_max = 3
-minimal_config1 = dict(
- irreps_edge_sh="0e + 1o",
- r_max=4,
- feature_irreps_hidden="4x0e + 4x1o",
- num_layers=2,
- num_basis=8,
- PolynomialCutoff_p=6,
- nonlinearity_type="norm",
- **COMMON_CONFIG
-)
-minimal_config2 = dict(
- irreps_edge_sh="0e + 1o",
- r_max=4,
- chemical_embedding_irreps_out="8x0e + 8x0o + 8x1e + 8x1o",
- irreps_mid_output_block="2x0e",
- feature_irreps_hidden="4x0e + 4x1o",
- **COMMON_CONFIG
-)
-minimal_config3 = dict(
- irreps_edge_sh="0e + 1o",
- r_max=4,
- feature_irreps_hidden="4x0e + 4x1o",
- num_layers=2,
- num_basis=8,
- PolynomialCutoff_p=6,
- nonlinearity_type="gate",
- **COMMON_CONFIG
-)
-minimal_config4 = dict(
- irreps_edge_sh="0e + 1o + 2e",
- r_max=4,
- feature_irreps_hidden="2x0e + 2x1o + 2x2e",
- num_layers=2,
- num_basis=3,
- PolynomialCutoff_p=6,
- nonlinearity_type="gate",
- # test custom nonlinearities
- nonlinearity_scalars={"e": "silu", "o": "tanh"},
- nonlinearity_gates={"e": "silu", "o": "abs"},
- **COMMON_CONFIG
-)
-
-
-@pytest.fixture(
- scope="module",
- params=[minimal_config1, minimal_config2, minimal_config3, minimal_config4],
-)
-def config(request):
- return request.param
+# see https://github.com/pytest-dev/pytest/issues/421#issuecomment-943386533
+# to allow external packages to import tests through subclassing
+class BaseModelTests:
+ @pytest.fixture(scope="class")
+ def config(self):
+ """Implemented by subclasses.
+ Return a tuple of config, out_field
+ """
+ raise NotImplementedError
-@pytest.fixture(
- params=[
- (
- ["EnergyModel", "ForceOutput"],
- AtomicDataDict.FORCE_KEY,
+ @pytest.fixture(
+ scope="class",
+ params=(
+ [torch.device("cuda"), torch.device("cpu")]
+ if torch.cuda.is_available()
+ else [torch.device("cpu")]
),
- (
- ["EnergyModel"],
- AtomicDataDict.TOTAL_ENERGY_KEY,
- ),
- (
- ["EnergyModel", "StressForceOutput"],
- AtomicDataDict.STRESS_KEY,
- ),
- ]
-)
-def model(request, config):
- torch.manual_seed(0)
- np.random.seed(0)
- builder, out_field = request.param
- config = config.copy()
- config["model_builders"] = builder
- return model_from_config(config), out_field
-
-
-@pytest.fixture(
- scope="module",
- params=(
- [torch.device("cuda"), torch.device("cpu")]
- if torch.cuda.is_available()
- else [torch.device("cpu")]
- ),
-)
-def device(request):
- return request.param
-
+ )
+ def device(self, request):
+ return request.param
+
+ @staticmethod
+ def make_model(config, device, initialize: bool = True, deploy: bool = False):
+ torch.manual_seed(127)
+ np.random.seed(193)
+ config = config.copy()
+ config.update(
+ {
+ "num_types": 3,
+ "types_names": ["H", "C", "O"],
+ }
+ )
+ model = model_from_config(config, initialize=initialize, deploy=deploy)
+ model = model.to(device)
+ return model
-class TestWorkflow:
- """
- test class methods
- """
+ @pytest.fixture(scope="class")
+ def model(self, config, device):
+ config, out_fields = config
+ model = self.make_model(config, device=device)
+ return model, out_fields
+ # == common tests for all models ==
def test_init(self, model):
instance, _ = model
assert isinstance(instance, GraphModuleMixin)
- def test_weight_init(self, model, atomic_batch, device):
- instance, out_field = model
- data = AtomicData.to_AtomicDataDict(atomic_batch.to(device=device))
- instance = instance.to(device=device)
-
- out_orig = instance(data)[out_field]
-
- instance = uniform_initialize_FCs(instance, initialize=True)
-
- out_unif = instance(data)[out_field]
- assert not torch.allclose(out_orig, out_unif)
-
def test_jit(self, model, atomic_batch, device):
- instance, out_field = model
+ instance, out_fields = model
data = AtomicData.to_AtomicDataDict(atomic_batch.to(device=device))
instance = instance.to(device=device)
model_script = script(instance)
- assert torch.allclose(
- instance(data)[out_field],
- model_script(data)[out_field],
- atol=1e-6,
- )
+ for out_field in out_fields:
+ assert torch.allclose(
+ instance(data)[out_field],
+ model_script(data)[out_field],
+ atol=1e-6,
+ )
# - Try saving, loading in another process, and running -
with tempfile.TemporaryDirectory() as tmpdir:
@@ -163,39 +100,25 @@ def test_jit(self, model, atomic_batch, device):
torch.float64: 1e-10,
}[torch.get_default_dtype()]
- assert torch.allclose(
- model_script(data)[out_field],
- load_model(load_dat)[out_field],
- atol=atol,
- )
-
- def test_submods(self):
- config = minimal_config2.copy()
- config["model_builders"] = ["EnergyModel"]
- model = model_from_config(config=config, initialize=True)
- assert isinstance(model.chemical_embedding, AtomwiseLinear)
- true_irreps = o3.Irreps(minimal_config2["chemical_embedding_irreps_out"])
- assert (
- model.chemical_embedding.irreps_out[model.chemical_embedding.out_field]
- == true_irreps
- )
- # Make sure it propagates
- assert (
- model.layer0_convnet.irreps_in[model.chemical_embedding.out_field]
- == true_irreps
- )
+ for out_field in out_fields:
+ assert torch.allclose(
+ model_script(data)[out_field],
+ load_model(load_dat)[out_field],
+ atol=atol,
+ )
def test_forward(self, model, atomic_batch, device):
- instance, out_field = model
+ instance, out_fields = model
instance.to(device)
data = atomic_batch.to(device)
output = instance(AtomicData.to_AtomicDataDict(data))
- assert out_field in output
+ for out_field in out_fields:
+ assert out_field in output
def test_batch(self, model, atomic_batch, device, float_tolerance):
"""Confirm that the results for individual examples are the same regardless of whether they are batched."""
allclose = functools.partial(torch.allclose, atol=float_tolerance)
- instance, out_field = model
+ instance, out_fields = model
instance.to(device)
data = atomic_batch.to(device)
data1 = data.get_example(0)
@@ -203,33 +126,186 @@ def test_batch(self, model, atomic_batch, device, float_tolerance):
output1 = instance(AtomicData.to_AtomicDataDict(data1))
output2 = instance(AtomicData.to_AtomicDataDict(data2))
output = instance(AtomicData.to_AtomicDataDict(data))
- if out_field in (AtomicDataDict.TOTAL_ENERGY_KEY, AtomicDataDict.STRESS_KEY):
- assert allclose(
- output1[out_field],
- output[out_field][0],
- )
- assert allclose(
- output2[out_field],
- output[out_field][1],
- )
- elif out_field in (AtomicDataDict.FORCE_KEY,):
- assert allclose(
- output1[out_field],
- output[out_field][output[AtomicDataDict.BATCH_KEY] == 0],
- )
- assert allclose(
- output2[out_field],
- output[out_field][output[AtomicDataDict.BATCH_KEY] == 1],
- )
+ for out_field in out_fields:
+ if out_field in _GRAPH_FIELDS:
+ assert allclose(
+ output1[out_field],
+ output[out_field][0],
+ )
+ assert allclose(
+ output2[out_field],
+ output[out_field][1],
+ )
+ elif out_field in _NODE_FIELDS:
+ assert allclose(
+ output1[out_field],
+ output[out_field][output[AtomicDataDict.BATCH_KEY] == 0],
+ )
+ assert allclose(
+ output2[out_field],
+ output[out_field][output[AtomicDataDict.BATCH_KEY] == 1],
+ )
+ elif out_field in _EDGE_FIELDS:
+ assert allclose(
+ output1[out_field],
+ output[out_field][
+ output[AtomicDataDict.BATCH_KEY][
+ output[AtomicDataDict.EDGE_INDEX_KEY][0]
+ ]
+ == 0
+ ],
+ )
+ assert allclose(
+ output2[out_field],
+ output[out_field][
+ output[AtomicDataDict.BATCH_KEY][
+ output[AtomicDataDict.EDGE_INDEX_KEY][0]
+ ]
+ == 1
+ ],
+ )
+ else:
+ raise NotImplementedError
+
+ def test_equivariance(self, model, atomic_batch, device):
+ instance, out_fields = model
+ instance = instance.to(device=device)
+ atomic_batch = atomic_batch.to(device=device)
+ assert_AtomicData_equivariant(func=instance, data_in=atomic_batch)
+
+ def test_embedding_cutoff(self, model, config, device):
+ instance, out_fields = model
+ config, out_fields = config
+ r_max = config["r_max"]
+
+ # make a synthetic three atom example
+ data = AtomicData(
+ atom_types=np.random.choice([0, 1, 2], size=3),
+ pos=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]),
+ edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]),
+ )
+ data = data.to(device)
+ edge_embed = instance(AtomicData.to_AtomicDataDict(data))
+ if AtomicDataDict.EDGE_FEATURES_KEY in edge_embed:
+ key = AtomicDataDict.EDGE_FEATURES_KEY
else:
- raise NotImplementedError
+ key = AtomicDataDict.EDGE_EMBEDDING_KEY
+ edge_embed = edge_embed[key]
+ data.pos[2, 1] = r_max # put it past the cutoff
+ edge_embed2 = instance(AtomicData.to_AtomicDataDict(data))[key]
+ if key == AtomicDataDict.EDGE_EMBEDDING_KEY:
+ # we can only check that other edges are unaffected if we know it's an embedding
+ # For example, an Allegro edge feature is many body so will be affected
+ assert torch.allclose(edge_embed[:2], edge_embed2[:2])
+ assert edge_embed[2:].abs().sum() > 1e-6 # some nonzero terms
+ assert torch.allclose(edge_embed2[2:], torch.zeros(1, device=device))
-class TestGradient:
- def test_numeric_gradient(self, config, atomic_batch, device, float_tolerance):
- config = config.copy()
- config["model_builders"] = ["EnergyModel", "ForceOutput"]
- model = model_from_config(config=config, initialize=True)
+ # test gradients
+ in_dict = AtomicData.to_AtomicDataDict(data)
+ in_dict[AtomicDataDict.POSITIONS_KEY].requires_grad_(True)
+
+ with torch.autograd.set_detect_anomaly(True):
+ out = instance(in_dict)
+
+ # is the edge embedding of the cutoff length edge unchanged at the cutoff?
+ grads = torch.autograd.grad(
+ outputs=out[key][2:].sum(),
+ inputs=in_dict[AtomicDataDict.POSITIONS_KEY],
+ retain_graph=True,
+ )[0]
+ assert torch.allclose(grads, torch.zeros(1, device=device))
+
+ if AtomicDataDict.PER_ATOM_ENERGY_KEY in out:
+ # are the first two atom's energies unaffected by atom at the cutoff?
+ grads = torch.autograd.grad(
+ outputs=out[AtomicDataDict.PER_ATOM_ENERGY_KEY][:2].sum(),
+ inputs=in_dict[AtomicDataDict.POSITIONS_KEY],
+ )[0]
+ print(grads)
+ # only care about gradient wrt moved atom
+ assert grads.shape == (3, 3)
+ assert torch.allclose(grads[2], torch.zeros(1, device=device))
+
+
+class BaseEnergyModelTests(BaseModelTests):
+ def test_large_separation(self, model, config, molecules, device):
+ atol = {torch.float32: 1e-4, torch.float64: 1e-10}[torch.get_default_dtype()]
+ instance, _ = model
+ instance.to(device)
+ config, out_fields = config
+ r_max = config["r_max"]
+ atoms1 = molecules[0].copy()
+ atoms2 = molecules[1].copy()
+ # translate atoms2 far away
+ atoms2.positions += 40.0 + np.random.randn(3)
+ atoms_both = atoms1.copy()
+ atoms_both.extend(atoms2)
+ tm = TypeMapper(chemical_symbols=["H", "C", "O"])
+ data1 = tm(AtomicData.from_ase(atoms1, r_max=r_max).to(device=device))
+ data2 = tm(AtomicData.from_ase(atoms2, r_max=r_max).to(device=device))
+ data_both = tm(AtomicData.from_ase(atoms_both, r_max=r_max).to(device=device))
+ assert (
+ data_both[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
+ == data1[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
+ + data2[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
+ )
+
+ out1 = instance(AtomicData.to_AtomicDataDict(data1))
+ out2 = instance(AtomicData.to_AtomicDataDict(data2))
+ out_both = instance(AtomicData.to_AtomicDataDict(data_both))
+
+ assert torch.allclose(
+ out1[AtomicDataDict.TOTAL_ENERGY_KEY]
+ + out2[AtomicDataDict.TOTAL_ENERGY_KEY],
+ out_both[AtomicDataDict.TOTAL_ENERGY_KEY],
+ atol=atol,
+ )
+
+ atoms_both2 = atoms1.copy()
+ atoms3 = atoms2.copy()
+ atoms3.positions += np.random.randn(3)
+ atoms_both2.extend(atoms3)
+ data_both2 = tm(AtomicData.from_ase(atoms_both2, r_max=r_max).to(device=device))
+ out_both2 = instance(AtomicData.to_AtomicDataDict(data_both2))
+ assert torch.allclose(
+ out_both2[AtomicDataDict.TOTAL_ENERGY_KEY],
+ out_both[AtomicDataDict.TOTAL_ENERGY_KEY],
+ atol=atol,
+ )
+ assert torch.allclose(
+ out_both2[AtomicDataDict.PER_ATOM_ENERGY_KEY],
+ out_both[AtomicDataDict.PER_ATOM_ENERGY_KEY],
+ atol=atol,
+ )
+
+ def test_cross_frame_grad(self, model, device, nequip_dataset):
+ c = Collater.for_dataset(nequip_dataset)
+ batch = c([nequip_dataset[i] for i in range(len(nequip_dataset))])
+ energy_model, out_fields = model
+ energy_model.to(device)
+ data = AtomicData.to_AtomicDataDict(batch.to(device))
+ data[AtomicDataDict.POSITIONS_KEY].requires_grad = True
+
+ output = energy_model(data)
+ grads = torch.autograd.grad(
+ outputs=output[AtomicDataDict.TOTAL_ENERGY_KEY][-1],
+ inputs=data[AtomicDataDict.POSITIONS_KEY],
+ allow_unused=True,
+ )[0]
+
+ last_frame_n_atom = batch.ptr[-1] - batch.ptr[-2]
+
+ in_frame_grad = grads[-last_frame_n_atom:]
+ cross_frame_grad = grads[:-last_frame_n_atom]
+
+ assert cross_frame_grad.abs().max().item() == 0
+ assert in_frame_grad.abs().max().item() > 0
+
+ def test_numeric_gradient(self, model, atomic_batch, device):
+ model, out_fields = model
+ if AtomicDataDict.FORCE_KEY not in out_fields:
+ pytest.skip()
model.to(device)
data = atomic_batch.to(device)
output = model(AtomicData.to_AtomicDataDict(data))
@@ -256,16 +332,15 @@ def test_numeric_gradient(self, config, atomic_batch, device, float_tolerance):
numeric, analytical, rtol=5e-2
)
- def test_partial_forces(self, atomic_batch, device):
- config = minimal_config1.copy()
- config["model_builders"] = [
- "EnergyModel",
- "ForceOutput",
- ]
+ def test_partial_forces(self, config, atomic_batch, device, strict_locality):
+ config, out_fields = config
+ if "ForceOutput" not in config["model_builders"]:
+ pytest.skip()
+ config = config.copy()
partial_config = config.copy()
partial_config["model_builders"] = [
- "EnergyModel",
- "PartialForceOutput",
+ "PartialForceOutput" if b == "ForceOutput" else b
+ for b in partial_config["model_builders"]
]
model = model_from_config(config=config, initialize=True)
partial_model = model_from_config(config=partial_config, initialize=True)
@@ -284,7 +359,7 @@ def test_partial_forces(self, atomic_batch, device):
assert torch.allclose(
output[k],
output_partial[k],
- atol=1e-6 if k == AtomicDataDict.FORCE_KEY else 1e-8,
+ atol=1e-8 if k == AtomicDataDict.TOTAL_ENERGY_KEY else 1e-6,
)
else:
assert torch.equal(output[k], output_partial[k])
@@ -293,152 +368,17 @@ def test_partial_forces(self, atomic_batch, device):
assert partial_forces.shape == (n_at, n_at, 3)
# confirm that sparsity matches graph topology:
edge_index = data[AtomicDataDict.EDGE_INDEX_KEY]
- adjacency = torch.zeros(n_at, n_at, dtype=torch.bool)
- strict_locality = False
+ adjacency = torch.zeros(
+ n_at, n_at, dtype=torch.bool, device=partial_forces.device
+ )
if strict_locality:
# only adjacent for nonzero deriv to neighbors
adjacency[edge_index[0], edge_index[1]] = True
- adjacency[
- torch.arange(n_at), torch.arange(n_at)
- ] = True # diagonal is ofc True
+ arange = torch.arange(n_at, device=partial_forces.device)
+ adjacency[arange, arange] = True # diagonal is ofc True
else:
# technically only adjacent to n-th degree neighbor, but in this tiny test system that is same as all-to-all and easier to program
adjacency = data[AtomicDataDict.BATCH_KEY].view(-1, 1) == data[
AtomicDataDict.BATCH_KEY
].view(1, -1)
assert torch.equal(adjacency, torch.any(partial_forces != 0, dim=-1))
-
-
-class TestAutoGradient:
- def test_cross_frame_grad(self, config, nequip_dataset):
- c = Collater.for_dataset(nequip_dataset)
- batch = c([nequip_dataset[i] for i in range(len(nequip_dataset))])
- device = "cpu"
- config = config.copy()
- config["model_builders"] = ["EnergyModel"]
- energy_model = model_from_config(config=config, initialize=True)
- energy_model.to(device)
- data = AtomicData.to_AtomicDataDict(batch.to(device))
- data[AtomicDataDict.POSITIONS_KEY].requires_grad = True
-
- output = energy_model(data)
- grads = torch.autograd.grad(
- outputs=output[AtomicDataDict.TOTAL_ENERGY_KEY][-1],
- inputs=data[AtomicDataDict.POSITIONS_KEY],
- allow_unused=True,
- )[0]
-
- last_frame_n_atom = batch.ptr[-1] - batch.ptr[-2]
-
- in_frame_grad = grads[-last_frame_n_atom:]
- cross_frame_grad = grads[:-last_frame_n_atom]
-
- assert cross_frame_grad.abs().max().item() == 0
- assert in_frame_grad.abs().max().item() > 0
-
-
-class TestEquivariance:
- def test_forward(self, model, atomic_batch, device):
- instance, out_field = model
- instance = instance.to(device=device)
- atomic_batch = atomic_batch.to(device=device)
- assert_AtomicData_equivariant(func=instance, data_in=atomic_batch)
-
-
-class TestCutoff:
- def test_large_separation(self, model, config, molecules):
- atol = {torch.float32: 1e-4, torch.float64: 1e-10}[torch.get_default_dtype()]
- instance, _ = model
- r_max = config["r_max"]
- atoms1 = molecules[0].copy()
- atoms2 = molecules[1].copy()
- # translate atoms2 far away
- atoms2.positions += 40.0 + np.random.randn(3)
- atoms_both = atoms1.copy()
- atoms_both.extend(atoms2)
- tm = TypeMapper(chemical_symbols=["H", "C", "O"])
- data1 = tm(AtomicData.from_ase(atoms1, r_max=r_max))
- data2 = tm(AtomicData.from_ase(atoms2, r_max=r_max))
- data_both = tm(AtomicData.from_ase(atoms_both, r_max=r_max))
- assert (
- data_both[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
- == data1[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
- + data2[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
- )
-
- out1 = instance(AtomicData.to_AtomicDataDict(data1))
- out2 = instance(AtomicData.to_AtomicDataDict(data2))
- out_both = instance(AtomicData.to_AtomicDataDict(data_both))
-
- assert torch.allclose(
- out1[AtomicDataDict.TOTAL_ENERGY_KEY]
- + out2[AtomicDataDict.TOTAL_ENERGY_KEY],
- out_both[AtomicDataDict.TOTAL_ENERGY_KEY],
- atol=atol,
- )
-
- atoms_both2 = atoms1.copy()
- atoms3 = atoms2.copy()
- atoms3.positions += np.random.randn(3)
- atoms_both2.extend(atoms3)
- data_both2 = tm(AtomicData.from_ase(atoms_both2, r_max=r_max))
- out_both2 = instance(AtomicData.to_AtomicDataDict(data_both2))
- assert torch.allclose(
- out_both2[AtomicDataDict.TOTAL_ENERGY_KEY],
- out_both[AtomicDataDict.TOTAL_ENERGY_KEY],
- atol=atol,
- )
- assert torch.allclose(
- out_both2[AtomicDataDict.PER_ATOM_ENERGY_KEY],
- out_both[AtomicDataDict.PER_ATOM_ENERGY_KEY],
- atol=atol,
- )
-
- def test_embedding_cutoff(self, config):
- config = config.copy()
- config["model_builders"] = ["EnergyModel"]
- instance = model_from_config(config=config, initialize=True)
- r_max = config["r_max"]
-
- # make a synthetic three atom example
- data = AtomicData(
- atom_types=np.random.choice([0, 1, 2], size=3),
- pos=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]),
- edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]),
- )
- edge_embed = instance(AtomicData.to_AtomicDataDict(data))[
- AtomicDataDict.EDGE_EMBEDDING_KEY
- ]
- data.pos[2, 1] = r_max # put it past the cutoff
- edge_embed2 = instance(AtomicData.to_AtomicDataDict(data))[
- AtomicDataDict.EDGE_EMBEDDING_KEY
- ]
-
- assert torch.allclose(edge_embed[:2], edge_embed2[:2])
- assert edge_embed[2:].abs().sum() > 1e-6 # some nonzero terms
- assert torch.allclose(edge_embed2[2:], torch.zeros(1))
-
- # test gradients
- in_dict = AtomicData.to_AtomicDataDict(data)
- in_dict[AtomicDataDict.POSITIONS_KEY].requires_grad_(True)
-
- with torch.autograd.set_detect_anomaly(True):
- out = instance(in_dict)
-
- # is the edge embedding of the cutoff length edge unchanged at the cutoff?
- grads = torch.autograd.grad(
- outputs=out[AtomicDataDict.EDGE_EMBEDDING_KEY][2:].sum(),
- inputs=in_dict[AtomicDataDict.POSITIONS_KEY],
- retain_graph=True,
- )[0]
- assert torch.allclose(grads, torch.zeros(1))
-
- # are the first two atom's energies unaffected by atom at the cutoff?
- grads = torch.autograd.grad(
- outputs=out[AtomicDataDict.PER_ATOM_ENERGY_KEY][:2].sum(),
- inputs=in_dict[AtomicDataDict.POSITIONS_KEY],
- )[0]
- print(grads)
- # only care about gradient wrt moved atom
- assert grads.shape == (3, 3)
- assert torch.allclose(grads[2], torch.zeros(1))
diff --git a/setup.py b/setup.py
index 8c977e0a..d7a5b465 100644
--- a/setup.py
+++ b/setup.py
@@ -29,15 +29,14 @@
"numpy",
"ase",
"tqdm",
- "torch>=1.8,<=1.12,!=1.9.0", # torch.fx added in 1.8
- "e3nn>=0.3.5,<0.6.0",
+ "torch>=1.10.0,<1.13,!=1.9.0",
+ "e3nn>=0.4.4,<0.6.0",
"pyyaml",
"contextlib2;python_version<'3.7'", # backport of nullcontext
'contextvars;python_version<"3.7"', # backport of contextvars for savenload
"typing_extensions;python_version<'3.8'", # backport of Final
"torch-runstats>=0.2.0",
"torch-ema>=0.3.0",
- "scikit_learn<=1.0.1", # for GaussianProcess for per-species statistics; 1.0.2 has a bug!
],
zip_safe=True,
)
diff --git a/tests/conftest.py b/tests/conftest.py
index 060e5e7b..e9719bcd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,137 +1,5 @@
-from typing import List, Tuple
-import numpy as np
-import pathlib
-import pytest
-import tempfile
-import os
+from nequip.utils.unittests import CONFTEST_PATH
-from ase.atoms import Atoms
-from ase.build import molecule
-from ase.calculators.singlepoint import SinglePointCalculator
-from ase.io import write
-
-import torch
-
-from nequip.utils.test import set_irreps_debug
-from nequip.data import AtomicData, ASEDataset
-from nequip.data.transforms import TypeMapper
-from nequip.utils.torch_geometric import Batch
-from nequip.utils._global_options import _set_global_options
-from nequip.utils.misc import dtype_from_name
-
-if "NEQUIP_NUM_TASKS" not in os.environ:
- # Test parallelization, but don't waste time spawning tons of workers if lots of cores available
- os.environ["NEQUIP_NUM_TASKS"] = "2"
-
-# The default float tolerance
-FLOAT_TOLERANCE = {
- t: torch.as_tensor(v, dtype=dtype_from_name(t))
- for t, v in {"float32": 1e-3, "float64": 1e-10}.items()
-}
-
-
-@pytest.fixture(scope="session", autouse=True, params=["float32", "float64"])
-def float_tolerance(request):
- """Run all tests with various PyTorch default dtypes.
-
- This is a session-wide, autouse fixture — you only need to request it explicitly if a test needs to know the tolerance for the current default dtype.
-
- Returns
- --------
- A precision threshold to use for closeness tests.
- """
- old_dtype = torch.get_default_dtype()
- dtype = request.param
- _set_global_options({"default_dtype": dtype})
- yield FLOAT_TOLERANCE[dtype]
- _set_global_options(
- {
- "default_dtype": {torch.float32: "float32", torch.float64: "float64"}[
- old_dtype
- ]
- }
- )
-
-
-# - Ampere and TF32 -
-# Many of the tests for NequIP involve numerically checking
-# algebraic properties— normalization, equivariance,
-# continuity, etc.
-# With the added numerical noise of TF32, some of those tests fail
-# with the current (and usually generous) thresholds.
-#
-# Thus we go on the assumption that PyTorch + NVIDIA got everything
-# right, that this setting DOES NOT AFFECT the model outputs except
-# for increased numerical noise, and only test without it.
-#
-# TODO: consider running tests with and without
-# TODO: check how much thresholds have to be changed to accomidate TF32
-torch.backends.cuda.matmul.allow_tf32 = False
-torch.backends.cudnn.allow_tf32 = False
-
-
-@pytest.fixture(scope="session")
-def BENCHMARK_ROOT():
- return pathlib.Path(__file__).parent / "../benchmark_data/"
-
-
-@pytest.fixture(scope="session")
-def temp_data(float_tolerance):
- with tempfile.TemporaryDirectory() as tmpdirname:
- yield tmpdirname
-
-
-@pytest.fixture(scope="session")
-def CH3CHO(CH3CHO_no_typemap) -> Tuple[Atoms, AtomicData]:
- atoms, data = CH3CHO_no_typemap
- tm = TypeMapper(chemical_symbol_to_type={"C": 0, "O": 1, "H": 2})
- data = tm(data)
- return atoms, data
-
-
-@pytest.fixture(scope="session")
-def CH3CHO_no_typemap(float_tolerance) -> Tuple[Atoms, AtomicData]:
- atoms = molecule("CH3CHO")
- data = AtomicData.from_ase(atoms, r_max=2.0)
- return atoms, data
-
-
-@pytest.fixture(scope="session")
-def molecules() -> List[Atoms]:
- atoms_list = []
- for i in range(8):
- atoms = molecule("CH3CHO" if i % 2 == 0 else "H2")
- atoms.rattle()
- atoms.calc = SinglePointCalculator(
- energy=np.random.random(),
- forces=np.random.random((len(atoms), 3)),
- stress=None,
- magmoms=None,
- atoms=atoms,
- )
- atoms_list.append(atoms)
- return atoms_list
-
-
-@pytest.fixture(scope="session")
-def nequip_dataset(molecules, temp_data, float_tolerance):
- with tempfile.NamedTemporaryFile(suffix=".xyz") as fp:
- for atoms in molecules:
- write(fp.name, atoms, format="extxyz", append=True)
- a = ASEDataset(
- file_name=fp.name,
- root=temp_data,
- extra_fixed_fields={"r_max": 3.0},
- ase_args=dict(format="extxyz"),
- type_mapper=TypeMapper(chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}),
- )
- yield a
-
-
-@pytest.fixture(scope="session")
-def atomic_batch(nequip_dataset):
- return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]])
-
-
-# Use debug mode
-set_irreps_debug(True)
+# like `source` in bash
+with open(CONFTEST_PATH) as f:
+ exec(f.read())
diff --git a/tests/unit/data/test_dataset.py b/tests/unit/data/test_dataset.py
index f45e0ca8..95cfe48d 100644
--- a/tests/unit/data/test_dataset.py
+++ b/tests/unit/data/test_dataset.py
@@ -31,7 +31,7 @@ def ase_file(molecules):
MAX_ATOMIC_NUMBER: int = 5
-NATOMS = 3
+NATOMS = 10
@pytest.fixture(scope="function")
@@ -231,8 +231,8 @@ def test_per_graph_field(self, npz_dataset, fixed_field, subset, key, dim):
if npz_dataset is None:
return
- torch.manual_seed(0)
- E = torch.rand((npz_dataset.len(),) + dim)
+ rng = torch.Generator().manual_seed(454)
+ E = torch.rand((npz_dataset.len(),) + dim, generator=rng)
ref_mean = torch.mean(E / NATOMS, dim=0)
ref_std = torch.std(E / NATOMS, dim=0)
@@ -277,16 +277,11 @@ def test_per_node_field(self, npz_dataset, fixed_field, mode, subset):
)
print(result)
- @pytest.mark.parametrize("alpha", [1e-5, 1e-3, 0.1, 0.5])
+ @pytest.mark.parametrize("alpha", [0, 1e-3, 0.01])
@pytest.mark.parametrize("fixed_field", [True, False])
@pytest.mark.parametrize("full_rank", [True, False])
@pytest.mark.parametrize("subset", [True, False])
- @pytest.mark.parametrize(
- "regressor", ["NormalizedGaussianProcess", "GaussianProcess"]
- )
- def test_per_graph_field(
- self, npz_dataset, alpha, fixed_field, full_rank, regressor, subset
- ):
+ def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subset):
if alpha <= 1e-4 and not full_rank:
return
@@ -308,10 +303,7 @@ def test_per_graph_field(
del n_spec
del Ns
- if alpha == 1e-5:
- ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.0)
- else:
- ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.5)
+ ref_mean, ref_std, E = generate_E(N, 100, 1000, 10)
if subset:
E_orig_order = torch.zeros_like(
@@ -333,7 +325,6 @@ def test_per_graph_field(
AtomicDataDict.TOTAL_ENERGY_KEY
+ "per_species_mean_std": {
"alpha": alpha,
- "regressor": regressor,
"stride": 1,
}
},
@@ -341,21 +332,18 @@ def test_per_graph_field(
res = torch.matmul(N, mean.reshape([-1, 1])) - E.reshape([-1, 1])
res2 = torch.sum(torch.square(res))
- print("residue", alpha, res2 - ref_res2)
+ print("alpha, residue, actual residue", alpha, res2, ref_res2)
print("mean", mean, ref_mean)
print("diff in mean", mean - ref_mean)
print("std", std, ref_std)
+ tolerance = torch.max(ref_std) * 4
if full_rank:
- if alpha == 1e-5:
- assert torch.allclose(mean, ref_mean, rtol=1e-1)
- else:
- assert torch.allclose(mean, ref_mean, rtol=1)
- assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100)
- elif regressor == "NormalizedGaussianProcess":
- assert torch.std(mean).numpy() == 0
+ assert torch.allclose(mean, ref_mean, atol=tolerance)
+ # assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100)
else:
- assert mean[0] == mean[1] * 2
+ assert torch.allclose(mean, mean[0], atol=tolerance)
+ # assert torch.std(mean).numpy() == 0
class TestReload:
@@ -449,12 +437,14 @@ def test_from_atoms(self, molecules):
def generate_E(N, mean_min, mean_max, std):
- torch.manual_seed(0)
- ref_mean = torch.rand((N.shape[1])) * (mean_max - mean_min) + mean_min
+ rng = torch.Generator().manual_seed(568)
+ ref_mean = (
+ torch.rand((N.shape[1]), generator=rng) * (mean_max - mean_min) + mean_min
+ )
t_mean = torch.ones((N.shape[0], 1)) * ref_mean.reshape([1, -1])
- ref_std = torch.rand((N.shape[1])) * std
+ ref_std = torch.rand((N.shape[1]), generator=rng) * std
t_std = torch.ones((N.shape[0], 1)) * ref_std.reshape([1, -1])
- E = torch.normal(t_mean, t_std)
+ E = torch.normal(t_mean, t_std, generator=rng)
return ref_mean, ref_std, (N * E).sum(axis=-1)
diff --git a/tests/unit/model/test_nequip_model.py b/tests/unit/model/test_nequip_model.py
new file mode 100644
index 00000000..2aa82e15
--- /dev/null
+++ b/tests/unit/model/test_nequip_model.py
@@ -0,0 +1,122 @@
+import pytest
+
+from e3nn import o3
+
+from nequip.data import AtomicDataDict
+from nequip.model import model_from_config
+from nequip.nn import AtomwiseLinear
+from nequip.utils.unittests.model_tests import BaseEnergyModelTests
+
+COMMON_CONFIG = {
+ "avg_num_neighbors": None,
+ "num_types": 3,
+ "types_names": ["H", "C", "O"],
+}
+r_max = 3
+minimal_config1 = dict(
+ irreps_edge_sh="0e + 1o",
+ r_max=4,
+ feature_irreps_hidden="4x0e + 4x1o",
+ num_layers=2,
+ num_basis=8,
+ PolynomialCutoff_p=6,
+ nonlinearity_type="norm",
+ **COMMON_CONFIG
+)
+minimal_config2 = dict(
+ irreps_edge_sh="0e + 1o",
+ r_max=4,
+ chemical_embedding_irreps_out="8x0e + 8x0o + 8x1e + 8x1o",
+ irreps_mid_output_block="2x0e",
+ feature_irreps_hidden="4x0e + 4x1o",
+ **COMMON_CONFIG
+)
+minimal_config3 = dict(
+ irreps_edge_sh="0e + 1o",
+ r_max=4,
+ feature_irreps_hidden="4x0e + 4x1o",
+ num_layers=2,
+ num_basis=8,
+ PolynomialCutoff_p=6,
+ nonlinearity_type="gate",
+ **COMMON_CONFIG
+)
+minimal_config4 = dict(
+ irreps_edge_sh="0e + 1o + 2e",
+ r_max=4,
+ feature_irreps_hidden="2x0e + 2x1o + 2x2e",
+ num_layers=2,
+ num_basis=3,
+ PolynomialCutoff_p=6,
+ nonlinearity_type="gate",
+ # test custom nonlinearities
+ nonlinearity_scalars={"e": "silu", "o": "tanh"},
+ nonlinearity_gates={"e": "silu", "o": "abs"},
+ **COMMON_CONFIG
+)
+
+
+class TestNequIPModel(BaseEnergyModelTests):
+ @pytest.fixture
+ def strict_locality(self):
+ return False
+
+ @pytest.fixture(
+ params=[minimal_config1, minimal_config2, minimal_config3, minimal_config4],
+ scope="class",
+ )
+ def base_config(self, request):
+ return request.param
+
+ @pytest.fixture(
+ params=[
+ (
+ ["EnergyModel", "ForceOutput"],
+ [
+ AtomicDataDict.TOTAL_ENERGY_KEY,
+ AtomicDataDict.PER_ATOM_ENERGY_KEY,
+ AtomicDataDict.FORCE_KEY,
+ ],
+ ),
+ (
+ ["EnergyModel"],
+ [
+ AtomicDataDict.TOTAL_ENERGY_KEY,
+ AtomicDataDict.PER_ATOM_ENERGY_KEY,
+ ],
+ ),
+ (
+ ["EnergyModel", "StressForceOutput"],
+ [
+ AtomicDataDict.TOTAL_ENERGY_KEY,
+ AtomicDataDict.PER_ATOM_ENERGY_KEY,
+ AtomicDataDict.FORCE_KEY,
+ AtomicDataDict.STRESS_KEY,
+ AtomicDataDict.VIRIAL_KEY,
+ ],
+ ),
+ ],
+ scope="class",
+ )
+ def config(self, request, base_config):
+ config = base_config.copy()
+ builder, out_fields = request.param
+ config = config.copy()
+ config["model_builders"] = builder
+ return config, out_fields
+
+ def test_submods(self):
+ config = minimal_config2.copy()
+ config["model_builders"] = ["EnergyModel"]
+ model = model_from_config(config=config, initialize=True)
+ assert isinstance(model.chemical_embedding, AtomwiseLinear)
+ true_irreps = o3.Irreps(minimal_config2["chemical_embedding_irreps_out"])
+ assert (
+ model.chemical_embedding.irreps_out[model.chemical_embedding.out_field]
+ == true_irreps
+ )
+ # Make sure it propagates
+ assert (
+ model.layer0_convnet.irreps_in[model.chemical_embedding.out_field]
+ == true_irreps
+ )
diff --git a/tests/unit/trainer/test_trainer.py b/tests/unit/trainer/test_trainer.py
index c8169fda..860be357 100644
--- a/tests/unit/trainer/test_trainer.py
+++ b/tests/unit/trainer/test_trainer.py
@@ -106,26 +106,6 @@ def test_save(self, trainer, format, suffix):
assert isfile(file_name), "fail to save to file"
assert suffix in file_name
- @pytest.mark.parametrize("append", [True]) # , False])
- def test_from_dict(self, trainer, append):
-
- # torch.save(trainer.model, trainer.best_model_path)
-
- dictionary = trainer.as_dict(state_dict=True, training_progress=True)
- trainer1 = Trainer.from_dict(dictionary, append=append)
-
- for key in [
- "best_model_path",
- "last_model_path",
- "logfile",
- "epoch_log",
- "batch_log",
- "workdir",
- ]:
- v1 = getattr(trainer, key, None)
- v2 = getattr(trainer1, key, None)
- assert append == (v1 == v2)
-
@pytest.mark.parametrize("append", [True]) # , False])
def test_from_file(self, trainer, append):
diff --git a/tests/unit/utils/test_gp.py b/tests/unit/utils/test_gp.py
deleted file mode 100644
index 4792b9d2..00000000
--- a/tests/unit/utils/test_gp.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import torch
-import pytest
-
-from nequip.utils.regressor import base_gp
-from sklearn.gaussian_process.kernels import DotProduct
-
-
-# @pytest.mark.parametrize("full_rank", [True, False])
-@pytest.mark.parametrize("full_rank", [False])
-@pytest.mark.parametrize("alpha", [0, 1e-3, 0.1, 1])
-def test_random(full_rank, alpha):
-
- if alpha == 0 and not full_rank:
- return
-
- torch.manual_seed(0)
- n_samples = 10
- n_dim = 3
-
- if full_rank:
- X = torch.randint(low=1, high=10, size=(n_samples, n_dim))
- else:
- X = torch.randint(low=1, high=10, size=(n_samples, 1)) * torch.ones(
- (n_samples, n_dim)
- )
-
- ref_mean = torch.rand((n_dim, 1))
- y = torch.matmul(X, ref_mean)
-
- mean, std = base_gp(
- X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, alpha=0.1
- )
-
- if full_rank:
- assert torch.allclose(ref_mean, mean, rtol=0.5)
- else:
- assert torch.allclose(mean, mean[0], rtol=1e-3)
diff --git a/tests/unit/utils/test_solver.py b/tests/unit/utils/test_solver.py
new file mode 100644
index 00000000..de78cbd8
--- /dev/null
+++ b/tests/unit/utils/test_solver.py
@@ -0,0 +1,38 @@
+import torch
+import pytest
+
+from nequip.utils.regressor import solver
+
+
+@pytest.mark.parametrize("full_rank", [True, False])
+@pytest.mark.parametrize("alpha", [0, 1e-3, 1e-2])
+def test_random(full_rank, alpha, per_species_set):
+
+ if alpha == 0 and not full_rank:
+ return
+
+ rng = torch.Generator().manual_seed(343)
+
+ ref_mean, ref_std, E, n_samples, n_dim = per_species_set
+
+ X = torch.randint(low=1, high=10, size=(n_samples, n_dim), generator=rng).to(
+ torch.get_default_dtype()
+ )
+ if not full_rank:
+ X[:, n_dim - 2] = X[:, n_dim - 1] * 2
+ y = (X * E).sum(axis=-1)
+
+ mean, std = solver(X, y, alpha=alpha)
+
+ tolerance = torch.max(ref_std)
+
+ print("tolerance", tolerance)
+ print("solution", mean, std)
+ print("diff", mean - ref_mean)
+
+ if full_rank:
+ assert torch.allclose(ref_mean, mean, atol=tolerance)
+ else:
+ assert torch.allclose(mean[n_dim - 1], mean[n_dim - 2], atol=tolerance)
+
+ assert torch.max(std) < tolerance