From d4874f0f67b9957071bf20d6a46a4b30a867eb3e Mon Sep 17 00:00:00 2001 From: Lily Wang <31115101+lilyminium@users.noreply.github.com> Date: Fri, 11 Aug 2023 06:07:25 +1000 Subject: [PATCH] Remove offmolecule and uncomment test (#50) Co-authored-by: Lily Wang --- CHANGELOG.md | 1 + openff/nagl/features/_base.py | 6 +-- openff/nagl/features/_featurizers.py | 4 +- openff/nagl/features/atoms.py | 8 ++-- openff/nagl/molecule/_dgl/molecule.py | 4 +- openff/nagl/tests/testing/utils.py | 8 ++-- openff/nagl/tests/utils/test_openff.py | 59 +++++++++++++------------- 7 files changed, 46 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18b6d160..cf94c47e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ The rules for this file: ### Changed - Major refactor to move to using Arrow databases (PR #45, PR #48) +- Removed importing `OFFMolecule` in favour of `Molecule` (PR #50, Issue #13) ### Removed - Old `_app` and `_cli` utilities that were not well tested diff --git a/openff/nagl/features/_base.py b/openff/nagl/features/_base.py index 3933cd61..2e9ca88a 100644 --- a/openff/nagl/features/_base.py +++ b/openff/nagl/features/_base.py @@ -15,7 +15,7 @@ if typing.TYPE_CHECKING: import torch - from openff.toolkit.topology import Molecule as OFFMolecule + from openff.toolkit.topology import Molecule # class FeatureMeta(ModelMetaclass, create_registry_metaclass("feature_name")): @@ -73,7 +73,7 @@ def _with_args(cls, *args): kwargs = dict(zip(cls.__fields__, args)) return cls(**kwargs) - def encode(self, molecule: "OFFMolecule") -> "torch.Tensor": + def encode(self, molecule: "Molecule") -> "torch.Tensor": """ Encode the molecule feature into a tensor. @@ -85,7 +85,7 @@ def encode(self, molecule: "OFFMolecule") -> "torch.Tensor": return self._encode(molecule).reshape(self.tensor_shape) @abc.abstractmethod - def _encode(self, molecule: "OFFMolecule") -> "torch.Tensor": + def _encode(self, molecule: "Molecule") -> "torch.Tensor": """ Encode the molecule feature into a tensor. """ diff --git a/openff/nagl/features/_featurizers.py b/openff/nagl/features/_featurizers.py index 8997e63a..1e1911fb 100644 --- a/openff/nagl/features/_featurizers.py +++ b/openff/nagl/features/_featurizers.py @@ -7,7 +7,7 @@ from .bonds import BondFeature if TYPE_CHECKING: - from openff.toolkit.topology import Molecule as OFFMolecule + from openff.toolkit.topology import Molecule T = TypeVar("T", bound=Feature) @@ -22,7 +22,7 @@ def __init__(self, features: List[T]): feature = feature() self.features.append(feature) - def featurize(self, molecule: "OFFMolecule") -> torch.Tensor: + def featurize(self, molecule: "Molecule") -> torch.Tensor: encoded = [feature.encode(molecule) for feature in self.features] features = torch.hstack(encoded) return features diff --git a/openff/nagl/features/atoms.py b/openff/nagl/features/atoms.py index 1f1b0a79..d1ea8605 100644 --- a/openff/nagl/features/atoms.py +++ b/openff/nagl/features/atoms.py @@ -41,7 +41,7 @@ from pydantic import validator, Field if typing.TYPE_CHECKING: - from openff.toolkit.topology import Molecule as OFFMolecule + from openff.toolkit.topology import Molecule __all__ = [ "AtomFeature", @@ -91,7 +91,7 @@ class AtomicElement(CategoricalMixin, AtomFeature): categories: typing.List[str] = ["H", "C", "N", "O", "F", "Cl", "Br", "S", "P", "I"] """Elements to provide one-hot encodings for.""" - def _encode(self, molecule: "OFFMolecule") -> torch.Tensor: + def _encode(self, molecule: "Molecule") -> torch.Tensor: try: elements = [atom.element for atom in molecule.atoms] except AttributeError: @@ -219,7 +219,7 @@ class AtomInRingOfSize(AtomFeature): ring_size: int """The size of the ring that this feature describes.""" - def _encode(self, molecule: "OFFMolecule") -> torch.Tensor: + def _encode(self, molecule: "Molecule") -> torch.Tensor: from openff.nagl.toolkits.openff import get_atoms_are_in_ring_size in_ring_size = get_atoms_are_in_ring_size(molecule, self.ring_size) @@ -272,7 +272,7 @@ class AtomAverageFormalCharge(AtomFeature): """ name: typing.Literal["atom_average_formal_charge"] = "atom_average_formal_charge" - def _encode(self, molecule: "OFFMolecule") -> torch.Tensor: + def _encode(self, molecule: "Molecule") -> torch.Tensor: from openff.nagl.utils.resonance import enumerate_resonance_forms from openff.nagl.toolkits.openff import normalize_molecule diff --git a/openff/nagl/molecule/_dgl/molecule.py b/openff/nagl/molecule/_dgl/molecule.py index 909aec9c..52985e82 100644 --- a/openff/nagl/molecule/_dgl/molecule.py +++ b/openff/nagl/molecule/_dgl/molecule.py @@ -1,7 +1,7 @@ from typing import ClassVar, Optional, Tuple import torch -from openff.toolkit.topology.molecule import Molecule as OFFMolecule +from openff.toolkit.topology.molecule import Molecule from openff.utilities import requires_package from openff.nagl.features.atoms import AtomFeature @@ -43,7 +43,7 @@ def n_graph_edges(self): @requires_package("dgl") def from_openff( cls, - molecule: OFFMolecule, + molecule: Molecule, atom_features: Tuple[AtomFeature, ...] = tuple(), bond_features: Tuple[BondFeature, ...] = tuple(), atom_feature_tensor: Optional[torch.Tensor] = None, diff --git a/openff/nagl/tests/testing/utils.py b/openff/nagl/tests/testing/utils.py index 2cbe88d3..8d8d9c28 100644 --- a/openff/nagl/tests/testing/utils.py +++ b/openff/nagl/tests/testing/utils.py @@ -1,9 +1,9 @@ -from openff.toolkit.topology.molecule import Molecule as OFFMolecule +from openff.toolkit.topology.molecule import Molecule from openff.nagl.toolkits.openff import capture_toolkit_warnings def rdkit_molecule_to_smiles(rdkit_molecule): - smiles = OFFMolecule.from_rdkit( + smiles = Molecule.from_rdkit( rdkit_molecule, allow_undefined_stereo=True, ).to_smiles() @@ -13,9 +13,9 @@ def rdkit_molecule_to_smiles(rdkit_molecule): def clean_smiles(smiles, mapped=False): with capture_toolkit_warnings(): if mapped: - func = OFFMolecule.from_mapped_smiles + func = Molecule.from_mapped_smiles else: - func = OFFMolecule.from_smiles + func = Molecule.from_smiles return func( smiles, allow_undefined_stereo=True, diff --git a/openff/nagl/tests/utils/test_openff.py b/openff/nagl/tests/utils/test_openff.py index 08ef5448..956e9b0d 100644 --- a/openff/nagl/tests/utils/test_openff.py +++ b/openff/nagl/tests/utils/test_openff.py @@ -1,7 +1,7 @@ import numpy as np import pytest from numpy.testing import assert_allclose -from openff.toolkit.topology.molecule import Molecule as OFFMolecule +from openff.toolkit.topology.molecule import Molecule from openff.toolkit.utils.toolkits import OPENEYE_AVAILABLE, RDKIT_AVAILABLE from openff.units import unit @@ -43,13 +43,13 @@ def test_smiles_to_inchi_key(smiles, expected): ], ) def test_normalize_molecule(expected_smiles, given_smiles): - expected_molecule = OFFMolecule.from_smiles(expected_smiles) + expected_molecule = Molecule.from_smiles(expected_smiles) - molecule = OFFMolecule.from_smiles(given_smiles) - assert not OFFMolecule.are_isomorphic(molecule, expected_molecule)[0] + molecule = Molecule.from_smiles(given_smiles) + assert not Molecule.are_isomorphic(molecule, expected_molecule)[0] output_molecule = normalize_molecule(molecule) - assert OFFMolecule.are_isomorphic(output_molecule, expected_molecule)[0] + assert Molecule.are_isomorphic(output_molecule, expected_molecule)[0] @pytest.mark.parametrize( @@ -73,7 +73,7 @@ def test_map_indexed_smiles(smiles_a, smiles_b, expected): ], ) def test_is_conformer_identical_generated(smiles): - offmol = OFFMolecule.from_smiles(smiles) + offmol = Molecule.from_smiles(smiles) offmol.generate_conformers(n_conformers=1) ordered_conf = offmol.conformers[0].m_as(unit.angstrom) # ordered_conf = get_coordinates_in_angstrom(offmol.conformers[0]) @@ -100,7 +100,7 @@ def test_is_conformer_identical_generated(smiles): def test_is_conformer_identical_linear(): - offmol = OFFMolecule.from_smiles("CCC") + offmol = Molecule.from_smiles("CCC") c_coords = np.array( [ [1, 0, 0], @@ -124,7 +124,7 @@ def test_is_conformer_identical_linear(): def test_not_is_conformer_identical(): smiles = "[C:1]([H:4])([H:5])([H:6])[C:2]([Cl:7])=[O:3]" - offmol = OFFMolecule.from_mapped_smiles(smiles) + offmol = Molecule.from_mapped_smiles(smiles) offmol.generate_conformers(n_conformers=1) conformer = offmol.conformers[0].m_as(unit.angstrom) @@ -150,29 +150,30 @@ def test_not_is_conformer_identical(): def test_calculate_circular_fingerprint_similarity( smiles1, smiles2, radius, similarity ): - mol1 = OFFMolecule.from_smiles(smiles1) - mol2 = OFFMolecule.from_smiles(smiles2) + mol1 = Molecule.from_smiles(smiles1) + mol2 = Molecule.from_smiles(smiles2) dice = calculate_circular_fingerprint_similarity(mol1, mol2, radius=radius) assert_allclose(dice, similarity) -# def test_get_best_rmsd(): -# from rdkit.Chem import rdMolAlign - -# offmol = OFFMolecule.from_smiles("CCC") -# offmol._conformers = [ -# np.random.random((11, 3)) * unit.angstrom, -# np.random.random((11, 3)) * unit.angstrom, -# ] - -# rdmol = offmol.to_rdkit() -# assert rdmol.GetNumConformers() == 2 - -# reference_rmsd = rdMolAlign.GetBestRMS(rdmol, rdmol, 0, 1) -# rmsd = get_best_rmsd( -# offmol, -# offmol.conformers[0].m_as(unit.angstrom), -# offmol.conformers[1].m_as(unit.angstrom), -# ) -# assert_allclose(rmsd, reference_rmsd) +@pytest.mark.skipif(not RDKIT_AVAILABLE, reason="requires rdkit") +def test_get_best_rmsd(): + from rdkit.Chem import rdMolAlign + + offmol = Molecule.from_smiles("CCC") + offmol._conformers = [ + np.random.random((11, 3)) * unit.angstrom, + np.random.random((11, 3)) * unit.angstrom, + ] + + rdmol = offmol.to_rdkit() + assert rdmol.GetNumConformers() == 2 + + reference_rmsd = rdMolAlign.GetBestRMS(rdmol, rdmol, 0, 1) + rmsd = get_best_rmsd( + offmol, + offmol.conformers[0].m_as(unit.angstrom), + offmol.conformers[1].m_as(unit.angstrom), + ) + assert_allclose(rmsd, reference_rmsd)