Add getter to access the strand of a nhmmer domain

althonos · Mar 5, 2024 · 9862cc7 · 9862cc7
1 parent 330e536
commit 9862cc7
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 1 deletion.
diff --git a/pyhmmer/plan7.pyi b/pyhmmer/plan7.pyi
@@ -37,6 +37,7 @@ WEIGHTING = Literal["pb", "gsc", "blosum", "none", "given"]
 EFFECTIVE = Literal["entropy", "exp", "clust", "none"]
 PRIOR_SCHEME = Literal["laplace", "alphabet"]
 STRAND = Literal["watson", "crick"]
+STRAND_SIGN = Literal["+", "-"]
 HITS_FORMAT = Literal["targets", "domain", "pfam"]
 HITS_MODE = Literal["search", "scan"]
 
@@ -207,6 +208,8 @@ class Domain(object):
     @property
     def env_to(self) -> int: ...
     @property
+    def strand(self) -> typing.Optional[STRAND_SIGN]: ...
+    @property
     def score(self) -> float: ...
     @property
     def bias(self) -> float: ...

diff --git a/pyhmmer/plan7.pyx b/pyhmmer/plan7.pyx
@@ -1496,6 +1496,23 @@ cdef class Domain:
         assert self._dom != NULL
         return self._dom.jenv
 
+    @property
+    def strand(self):
+        """`str` or `None`: The strand where the domain is located.
+
+        When running a search with the `LongTargetsPipeline`, both strands
+        of each target sequence are processed (unless disabled), so the
+        domain may be located on either strand, either ``+`` or ``-``.
+        For default `Pipeline` searches, this is always `None`.
+
+        .. versionadded:: 0.10.8
+
+        """
+        assert self._dom != NULL
+        if not self.hit.hits._pli.long_targets:
+            return None
+        return "+" if self._dom.iali < self._dom.jali else "-"
+
     @property
     def score(self):
         """`float`: The overall score in bits, *null2*-corrected.

diff --git a/pyhmmer/tests/test_hmmer.py b/pyhmmer/tests/test_hmmer.py
@@ -1,4 +1,5 @@
 import abc
+import collections
 import math
 import io
 import itertools
@@ -682,18 +683,27 @@ def test_rf0001_genome_file(self):
         with SequenceFile(path, "fasta", digital=True, alphabet=alphabet) as seqs_file:
             hits = list(pyhmmer.nhmmer(self.rf00001, seqs_file, cpus=1))[0]
             hits.sort()
+
         self.assertAlmostEqual(hits[0].evalue, 1.4e-14)
         self.assertAlmostEqual(hits[9].evalue, 1.7e-13)
 
+        strands = collections.Counter(hit.best_domain.strand for hit in hits.reported)
+        self.assertEqual(strands["+"], 9)
+        self.assertEqual(strands["-"], 1)
+
     def test_rf0001_genome_file_wlen_3878(self):
         alphabet = Alphabet.rna()
         path = resource_files(__package__).joinpath("data", "seqs", "CP000560.2.fna")
         with SequenceFile(path, "fasta", digital=True, alphabet=alphabet) as seqs_file:
             hits = list(pyhmmer.nhmmer(self.rf00001, seqs_file, cpus=1, window_length=3878))[0]
             hits.sort()
+
         self.assertAlmostEqual(hits[0].evalue, 3.1e-14)
         self.assertAlmostEqual(hits[9].evalue, 3.7e-13)
 
+        strands = collections.Counter(hit.best_domain.strand for hit in hits.reported)
+        self.assertEqual(strands["+"], 9)
+        self.assertEqual(strands["-"], 1)
 
 class TestHmmalign(unittest.TestCase):
     def setUp(self):
@@ -707,7 +717,7 @@ def test_luxc(self):
         hmm_path = resource_files(__package__).joinpath("data", "hmms", "txt", "LuxC.hmm")
         seqs_path = resource_files(__package__).joinpath("data", "seqs", "LuxC.faa")
         ref_path = resource_files(__package__).joinpath("data", "msa", "LuxC.hmmalign.sto")
-            
+
         with HMMFile(hmm_path) as hmm_file:
             hmm = hmm_file.read()
         with SequenceFile(seqs_path, digital=True, alphabet=hmm.alphabet) as seqs_file: