Skip to content

Commit

Permalink
Merge branch 'develop' into '262-is-rna-available'
Browse files Browse the repository at this point in the history
# Conflicts:
#   neofox/__init__.py
  • Loading branch information
Pablo Riesgo Ferreiro committed Feb 8, 2023
2 parents 65d7c67 + b23c6c3 commit c5ee1ad
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 33 deletions.
3 changes: 1 addition & 2 deletions neofox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.#


VERSION = "1.0.5"

VERSION = "1.1.0b1"

REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER"
NEOFOX_BLASTP_ENV = "NEOFOX_BLASTP"
Expand Down
2 changes: 1 addition & 1 deletion neofox/annotator/abstract_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(
self.priority_score_calculator = PriorityScore()
self.iedb_immunogenicity = IEDBimmunogenicity()
self.amplitude = Amplitude()
self.hex = Hex(runner=self.runner, configuration=configuration, references=references)
self.hex = Hex(references=references)

def get_additional_annotations_neoepitope_mhci(
self, epitope: PredictedEpitope, neoantigen: Neoantigen = None) -> PredictedEpitope:
Expand Down
Binary file removed neofox/published_features/hex/BLOSUM62.rda
Binary file not shown.
19 changes: 4 additions & 15 deletions neofox/published_features/hex/hex.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,22 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.#
from typing import List
import os
from neofox.model.neoantigen import Annotation, PredictedEpitope
from neofox.model.factories import AnnotationFactory
from neofox.published_features.hex.pyhex import PyHex
from neofox.references.references import ReferenceFolder


class Hex(object):

def __init__(self, references: ReferenceFolder, runner, configuration):
"""
:type runner: neofox.helpers.runner.Runner
:type configuration: neofox.references.DependenciesConfiguration
"""
self.runner = runner
self.configuration = configuration
def __init__(self, references: ReferenceFolder):
self.iedb_fasta = references.get_iedb_fasta()
self.pyhex = PyHex(self.iedb_fasta)

def apply_hex(self, mut_peptide):
"""this function calls hex tool. this tool analyses the neoepitope candidate sequence for molecular mimicry to viral epitopes
"""
my_path = os.path.abspath(os.path.dirname(__file__))
tool_path = os.path.join(my_path, "hex.R")
cmd = [self.configuration.rscript, tool_path, mut_peptide, self.iedb_fasta, my_path]
output, _ = self.runner.run_command(cmd)
if output == "":
output = None
return output
return self.pyhex.run(mut_peptide)

def get_annotation(
self, mutated_peptide_mhci: PredictedEpitope, mutated_peptide_mhcii: PredictedEpitope) -> List[Annotation]:
Expand Down
51 changes: 51 additions & 0 deletions neofox/published_features/hex/pyhex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from math import ceil, floor

from Bio import SeqIO
from Bio.Align import substitution_matrices
from Bio.Alphabet.IUPAC import ExtendedIUPACProtein


class PyHex:

def __init__(self, iedb_fasta, magic_number=4):
self.iedb_sequences = self._read_fasta(iedb_fasta)
self.magic_number = magic_number
self.blosum = substitution_matrices.load("BLOSUM62")

@staticmethod
def _read_fasta(fasta_file):
sequences = []
# read fasta
with open(fasta_file, "r") as handle:
for record in SeqIO.parse(handle, "fasta"):
# include only records that do not contain non-standard amino acids
if not any([aa not in ExtendedIUPACProtein.letters for aa in record.seq]):
sequences.append(record)
return sequences

def _align(self, sequence, mutated_sequence):
weights = self._get_sequence_weights(mutated_sequence)
score = sum([self.blosum[q, t] * w for q, t, w in zip(sequence, mutated_sequence, weights)])
return score

def _get_sequence_weights(self, mutated_sequence):
length_mutated_sequence = len(mutated_sequence)
mid_score = ceil(length_mutated_sequence / 2) * self.magic_number
weights = list(range(1, mid_score, self.magic_number))
weights.extend(reversed(weights[0:floor(length_mutated_sequence / 2)]))

top_floor = floor(length_mutated_sequence / 3)
weights[0:top_floor] = list(range(1, top_floor + 1))
tail = length_mutated_sequence - top_floor
weights[tail:length_mutated_sequence] = list(reversed(range(1, top_floor + 1)))

return weights

def run(self, mutated_sequence):
# excludes sequences that have different length than the mutated sequence
sequences = [s for s in self.iedb_sequences if len(s.seq) == len(mutated_sequence)]
# align each of the sequences
alignment_scores = [self._align(s.seq, mutated_sequence) for s in sequences]
# gets the best score of all the alignments
best_score = max(alignment_scores)
return best_score
6 changes: 0 additions & 6 deletions neofox/references/install_r_dependencies.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
install.packages("lattice", repo="http://cran.rstudio.com/")
install.packages("ggplot2", repo="http://cran.rstudio.com/")
install.packages("caret", repo="http://cran.rstudio.com/")
install.packages("Peptides", repo="http://cran.rstudio.com/")
install.packages("doParallel", repo="http://cran.rstudio.com/")
install.packages("gbm", repo="http://cran.rstudio.com/")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Biostrings")
27 changes: 18 additions & 9 deletions neofox/tests/integration_tests/test_hex.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,32 @@
from neofox.helpers.runner import Runner

import neofox.tests.integration_tests.integration_test_tools as integration_test_tools

from neofox.published_features.hex.pyhex import PyHex


class TestHex(TestCase):
def setUp(self):
self.references, self.configuration = integration_test_tools.load_references()
self.runner = Runner()


def test_hex(self):
res = Hex(
runner=self.runner, configuration=self.configuration, references=self.references
).apply_hex(
mut_peptide="FGLAIDVDD"
)
logger.info(res)
self.assertEqual(float(res), 148)
res = Hex(references=self.references).apply_hex(mut_peptide="FGLAIDVDD")
self.assertEqual(int(res), 148)

def test_pyhex(self):
pyhex = PyHex(iedb_fasta=self.references.get_iedb_fasta())
res = pyhex.run("FGLAIDVDD")
self.assertEqual(res, 148)

def test_comparison(self):
for i in range(10):
for k in range(9, 30):
peptide = integration_test_tools.get_random_kmer(k=k)
logger.info(peptide)
res = Hex(references=self.references).apply_hex(mut_peptide=peptide)
pyhex = PyHex(iedb_fasta=self.references.get_iedb_fasta())
res_pyhex = pyhex.run(peptide)
self.assertEqual(float(res), res_pyhex, "Peptide: {}".format(peptide))



0 comments on commit c5ee1ad

Please sign in to comment.