Skip to content

Commit

Permalink
Pdb Loading Fix (#666)
Browse files Browse the repository at this point in the history
* Improved local pdb loading

Previously, an error would be raised when attempting to find bonds in a PDB file that lacked bond information and contained multiple chain identifiers. Now, the atoms are first loaded and then connected.

* Getting rid of unused imports

* Adding new test for local pdb with no bonds

* attempt to import bonds first

---------

Co-authored-by: Brady Johnston <36021261+BradyAJohnston@users.noreply.github.com>
Co-authored-by: Brady Johnston <brady.johnston@me.com>
  • Loading branch information
3 people authored Nov 26, 2024
1 parent a1fd75f commit 362678f
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 6 deletions.
35 changes: 29 additions & 6 deletions molecularnodes/entities/molecule/pdb.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import biotite.structure as struc
import numpy as np
from biotite import InvalidFileError
from biotite.structure import BadStructureError, annotate_sse, spread_residue_wise
from biotite.structure import (
BadStructureError,
annotate_sse,
spread_residue_wise,
connect_via_residue_names,
)
from biotite.structure.io import pdb

from .assembly import AssemblyParser
Expand All @@ -20,11 +25,29 @@ def read(self, file_path):

def _get_structure(self):
# TODO: implement entity ID, sec_struct for PDB files
array = pdb.get_structure(
pdb_file=self.file,
extra_fields=["b_factor", "occupancy", "charge", "atom_id"],
include_bonds=True,
)

# a bit dirty, but we first try and get the bond information from the file
# if that fails, then we extract without the bonds and try to create bonds based
# on residue / atom names.
try:
array = pdb.get_structure(
pdb_file=self.file,
extra_fields=["b_factor", "occupancy", "charge", "atom_id"],
include_bonds=True,
)
except AttributeError as e:
print(
f"Unable to get bond information: {e}\nAttempting `connect_via_residue_names()`"
)
array = pdb.get_structure(
pdb_file=self.file,
extra_fields=["b_factor", "occupancy", "charge", "atom_id"],
include_bonds=False,
)
try:
array.bonds = connect_via_residue_names(array)
except AttributeError as e:
print("Not able to find bonds via residue: {e}")

try:
sec_struct = _get_sec_struct(self.file, array)
Expand Down
21 changes: 21 additions & 0 deletions tests/__snapshots__/test_load.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,27 @@
[ 2.2e-01 5.4e-02 -4.0e-02]
[ 3.3e-01 8.1e-02 9.6e-02]]
# ---
# name: test_pdb_no_bonds
[[ 0.0 0.0 0.0]
[ 0.0 0.0 0.0]
[ 0.0 0.0 0.0]
[ 0.0 0.0 0.0]
[ 0.0 0.0 0.0]
[ 0.1 0.1 0.1]
[ 0.1 0.1 0.1]
[ 0.1 0.1 0.1]
[ 0.1 0.1 0.1]
[-0.0 -0.0 -0.0]
[-0.0 -0.0 -0.0]
[-0.0 -0.0 -0.0]
[-0.0 -0.0 -0.0]
[-0.0 -0.0 -0.0]
[-0.1 -0.1 -0.1]
[-0.1 -0.1 -0.1]]
# ---
# name: test_pdb_no_bonds.1
AttributeError("The selected attribute 'bond_type' does not exist on the mesh.")
# ---
# name: test_rcsb_nmr[False]
[[ 0.1 0.0 -0.1]
[ 0.1 0.0 -0.1]
Expand Down
18 changes: 18 additions & 0 deletions tests/data/no_bonds.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
REMARK original generated coordinate pdb file
ATOM 0 C1 BGX 1 1.000 1.000 1.000 1.00 0.00 C
ATOM 1 H1 BGX 1 2.000 2.000 2.000 1.00 0.00 H
ATOM 2 C5 BGX 1 3.000 3.000 3.000 1.00 0.00 C
ATOM 3 H5 BGX 1 4.000 4.000 4.000 1.00 0.00 H
ATOM 4 O5 BGX 1 5.000 5.000 5.000 1.00 0.00 O
ATOM 5 C2 BGX 1 6.000 6.000 6.000 1.00 0.00 C
ATOM 6 H2 BGX 1 7.000 7.000 7.000 1.00 0.00 H
ATOM 7 O2 BGX 1 8.000 8.000 8.000 1.00 0.00 O
ATOM 8 HO2 BGX 1 9.000 9.000 9.000 1.00 0.00 H
ATOM 9 C3 BGX 1 -1.000 -1.000 -1.000 1.00 0.00 C
ATOM A H3 BGX 1 -2.000 -2.000 -2.000 1.00 0.00 H
ATOM B O3 BGX 1 -3.000 -3.000 -3.000 1.00 0.00 O
ATOM C HO3 BGX 1 -4.000 -4.000 -4.000 1.00 0.00 H
ATOM D C4 BGX 1 -5.000 -5.000 -5.000 1.00 0.00 C
ATOM E H4 BGX 1 -6.000 -6.000 -6.000 1.00 0.00 H
ATOM F O4 BGX 2 -7.000 -7.000 -7.000 1.00 0.00 O
END
5 changes: 5 additions & 0 deletions tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ def test_local_pdb(snapshot_custom):
for mol in molecules:
assert snapshot_custom == sample_attribute(mol, att, evaluate=False)

def test_pdb_no_bonds(snapshot_custom):
molecule = mn.entities.load_local(data_dir / f"no_bonds.pdb", style="spheres")
for attr in ["position", "bond_type"]:
assert snapshot_custom == sample_attribute(molecule, attr, evaluate=False)


@pytest.mark.parametrize("del_hydrogen", [True, False])
def test_rcsb_nmr(snapshot_custom, del_hydrogen):
Expand Down

0 comments on commit 362678f

Please sign in to comment.