Skip to content

Commit

Permalink
Add getter to access the strand of a nhmmer domain
Browse files Browse the repository at this point in the history
  • Loading branch information
althonos committed Mar 5, 2024
1 parent 330e536 commit 9862cc7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
3 changes: 3 additions & 0 deletions pyhmmer/plan7.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ WEIGHTING = Literal["pb", "gsc", "blosum", "none", "given"]
EFFECTIVE = Literal["entropy", "exp", "clust", "none"]
PRIOR_SCHEME = Literal["laplace", "alphabet"]
STRAND = Literal["watson", "crick"]
STRAND_SIGN = Literal["+", "-"]
HITS_FORMAT = Literal["targets", "domain", "pfam"]
HITS_MODE = Literal["search", "scan"]

Expand Down Expand Up @@ -207,6 +208,8 @@ class Domain(object):
@property
def env_to(self) -> int: ...
@property
def strand(self) -> typing.Optional[STRAND_SIGN]: ...
@property
def score(self) -> float: ...
@property
def bias(self) -> float: ...
Expand Down
17 changes: 17 additions & 0 deletions pyhmmer/plan7.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,23 @@ cdef class Domain:
assert self._dom != NULL
return self._dom.jenv

@property
def strand(self):
"""`str` or `None`: The strand where the domain is located.
When running a search with the `LongTargetsPipeline`, both strands
of each target sequence are processed (unless disabled), so the
domain may be located on either strand, either ``+`` or ``-``.
For default `Pipeline` searches, this is always `None`.
.. versionadded:: 0.10.8
"""
assert self._dom != NULL
if not self.hit.hits._pli.long_targets:
return None
return "+" if self._dom.iali < self._dom.jali else "-"

@property
def score(self):
"""`float`: The overall score in bits, *null2*-corrected.
Expand Down
12 changes: 11 additions & 1 deletion pyhmmer/tests/test_hmmer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import abc
import collections
import math
import io
import itertools
Expand Down Expand Up @@ -682,18 +683,27 @@ def test_rf0001_genome_file(self):
with SequenceFile(path, "fasta", digital=True, alphabet=alphabet) as seqs_file:
hits = list(pyhmmer.nhmmer(self.rf00001, seqs_file, cpus=1))[0]
hits.sort()

self.assertAlmostEqual(hits[0].evalue, 1.4e-14)
self.assertAlmostEqual(hits[9].evalue, 1.7e-13)

strands = collections.Counter(hit.best_domain.strand for hit in hits.reported)
self.assertEqual(strands["+"], 9)
self.assertEqual(strands["-"], 1)

def test_rf0001_genome_file_wlen_3878(self):
alphabet = Alphabet.rna()
path = resource_files(__package__).joinpath("data", "seqs", "CP000560.2.fna")
with SequenceFile(path, "fasta", digital=True, alphabet=alphabet) as seqs_file:
hits = list(pyhmmer.nhmmer(self.rf00001, seqs_file, cpus=1, window_length=3878))[0]
hits.sort()

self.assertAlmostEqual(hits[0].evalue, 3.1e-14)
self.assertAlmostEqual(hits[9].evalue, 3.7e-13)

strands = collections.Counter(hit.best_domain.strand for hit in hits.reported)
self.assertEqual(strands["+"], 9)
self.assertEqual(strands["-"], 1)

class TestHmmalign(unittest.TestCase):
def setUp(self):
Expand All @@ -707,7 +717,7 @@ def test_luxc(self):
hmm_path = resource_files(__package__).joinpath("data", "hmms", "txt", "LuxC.hmm")
seqs_path = resource_files(__package__).joinpath("data", "seqs", "LuxC.faa")
ref_path = resource_files(__package__).joinpath("data", "msa", "LuxC.hmmalign.sto")

with HMMFile(hmm_path) as hmm_file:
hmm = hmm_file.read()
with SequenceFile(seqs_path, digital=True, alphabet=hmm.alphabet) as seqs_file:
Expand Down

0 comments on commit 9862cc7

Please sign in to comment.