Skip to content

Commit

Permalink
Merge branch '262-is-rna-available' into 'develop'
Browse files Browse the repository at this point in the history
Resolve "`is_rna_available` is set to False if it is not provided in the patient-data"

See merge request tron/addannot!229
  • Loading branch information
Pablo Riesgo Ferreiro committed Feb 8, 2023
2 parents b23c6c3 + c5ee1ad commit be15532
Show file tree
Hide file tree
Showing 17 changed files with 19 additions and 83 deletions.
1 change: 1 addition & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,5 @@ publish_package:
- python3 setup.py sdist bdist_wheel
- TWINE_PASSWORD=${CI_JOB_TOKEN} TWINE_USERNAME=gitlab-ci-token python -m twine upload --repository-url https://gitlab.rlp.net/api/v4/projects/${CI_PROJECT_ID}/packages/pypi dist/*
only:
# deploys in private gitlab package repository only the develop branch, the master branch is published in PyPI
- develop
2 changes: 1 addition & 1 deletion docs/source/03_03_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ neofox --input-file neoantigens_candidates.tsv \
[--output-prefix out_prefix] \
[--organism human|mouse] \
[--rank-mhci-threshold 2.0] \
[--rank-mhcii-threshold 4.0] \
[--rank-mhcii-threshold 5.0] \
[--num-cpus] \
[--config] \
[--patient-id] \
Expand Down
1 change: 0 additions & 1 deletion docs/source/05_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ The metadata required for analysis for a given patient + its patient identifier
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| identifier | [string](#string) | | Patient identifier |
| isRnaAvailable | [bool](#bool) | | Is RNA expression available? |
| tumorType | [string](#string) | | Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations |
| mhc1 | [Mhc1](#neoantigen.Mhc1) | repeated | MHC I classic molecules |
| mhc2 | [Mhc2](#neoantigen.Mhc2) | repeated | MHC II classic molecules |
Expand Down
3 changes: 1 addition & 2 deletions neofox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.#


VERSION = "1.0.6"

VERSION = "1.1.0b1"

REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER"
NEOFOX_BLASTP_ENV = "NEOFOX_BLASTP"
Expand Down
20 changes: 0 additions & 20 deletions neofox/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,16 +213,6 @@ def _read_data(input_file, patients_data, mhc_database: MhcDatabase) -> Tuple[Li
else:
raise ValueError('Not supported input file extension: {}'.format(input_file))

patients_dict : Dict[str, Patient]
patients_dict = {p.identifier: p for p in patients}

for n in neoantigens:
patient = patients_dict.get(n.patient_identifier)
if not patient.is_rna_available:
# removes RNA vaf if indicated in patient that this information is no good
# iCam legacy
n.rna_variant_allele_frequency = None

return neoantigens, patients


Expand Down Expand Up @@ -386,16 +376,6 @@ def _read_data_epitopes(
else:
raise ValueError('Not supported input file extension: {}'.format(input_file))

patients_dict : Dict[str, Patient]
patients_dict = {p.identifier: p for p in patients}

for n in neoepitopes:
patient = patients_dict.get(n.patient_identifier)
if patient is not None and not patient.is_rna_available:
# removes RNA vaf if indicated in patient that this information is no good
# iCam legacy
n.rna_variant_allele_frequency = None

return neoepitopes, patients


Expand Down
1 change: 0 additions & 1 deletion neofox/model/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ def parse_patients_file(patients_file: str, mhc_database: MhcDatabase) -> List[P
patient_dict = row.to_dict()
patient = PatientFactory.build_patient(
identifier=patient_dict.get("identifier"),
is_rna_available=patient_dict.get("isRnaAvailable", False),
tumor_type=patient_dict.get("tumorType"),
mhc_alleles=patient_dict.get("mhcIAlleles", []),
mhc2_alleles=patient_dict.get("mhcIIAlleles", []),
Expand Down
3 changes: 1 addition & 2 deletions neofox/model/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,10 @@ def build_neoepitope(mutated_peptide=None, wild_type_peptide=None, patient_ident

class PatientFactory(object):
@staticmethod
def build_patient(identifier, is_rna_available=False, tumor_type=None, mhc_alleles: List[str] = [],
def build_patient(identifier, tumor_type=None, mhc_alleles: List[str] = [],
mhc2_alleles: List[str] = [], mhc_database: MhcDatabase =None):
patient = Patient(
identifier=identifier,
is_rna_available=is_rna_available,
tumor_type=tumor_type,
mhc1=MhcFactory.build_mhc1_alleles(mhc_alleles, mhc_database),
mhc2=MhcFactory.build_mhc2_alleles(mhc2_alleles, mhc_database)
Expand Down
1 change: 0 additions & 1 deletion neofox/model/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ The metadata required for analysis for a given patient + its patient identifier
| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| identifier | [string](#string) | | Patient identifier |
| isRnaAvailable | [bool](#bool) | | Is RNA expression available? |
| tumorType | [string](#string) | | Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations |
| mhc1 | [Mhc1](#neoantigen.Mhc1) | repeated | MHC I classic molecules |
| mhc2 | [Mhc2](#neoantigen.Mhc2) | repeated | MHC II classic molecules |
Expand Down
10 changes: 3 additions & 7 deletions neofox/model/neoantigen.proto
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,17 @@ message Patient {
*/
string identifier = 1;
/**
Is RNA expression available?
*/
bool isRnaAvailable = 2;
/**
Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations
*/
string tumorType = 3;
string tumorType = 2;
/**
MHC I classic molecules
*/
repeated Mhc1 mhc1 = 4;
repeated Mhc1 mhc1 = 3;
/**
MHC II classic molecules
*/
repeated Mhc2 mhc2 = 5;
repeated Mhc2 mhc2 = 4;
}

/**
Expand Down
8 changes: 3 additions & 5 deletions neofox/model/neoantigen.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 1 addition & 5 deletions neofox/neofox.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,6 @@ def __init__(
for neoantigen in self.neoantigens:
expression_per_patient[neoantigen.patient_identifier].append(neoantigen.rna_expression)

for patient in self.patients:
self.patients[patient].is_rna_available = all(e is not None for e in
expression_per_patient[self.patients[patient].identifier])

# only performs the expression imputation for humans
if self.reference_folder.organism == ORGANISM_HOMO_SAPIENS:
# impute expresssion from TCGA, ONLY if isRNAavailable = False for given patient,
Expand All @@ -137,7 +133,7 @@ def _conditional_expression_imputation(self) -> List[Neoantigen]:
gene_expression = expression_annotator.get_gene_expression_annotation(
gene_name=neoantigen.gene, tcga_cohort=patient.tumor_type
)
if not patient.is_rna_available and patient.tumor_type is not None and patient.tumor_type != "":
if expression_value is None and patient.tumor_type is not None and patient.tumor_type != "":
expression_value = gene_expression
neoantigen_transformed.rna_expression = expression_value
neoantigen.imputed_gene_expression = gene_expression
Expand Down
2 changes: 0 additions & 2 deletions neofox/neofox_epitope.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,6 @@ def _conditional_expression_imputation(self) -> List[PredictedEpitope]:
neoepitope_transformed = neoepitope
gene_expression = expression_annotator.get_gene_expression_annotation(
gene_name=neoepitope.gene, tcga_cohort=patient.tumor_type)
if not patient.is_rna_available and patient.tumor_type is not None and patient.tumor_type != "":
neoepitope_transformed.rna_expression = gene_expression
neoepitope.imputed_gene_expression = gene_expression
neoepitopes_transformed.append(neoepitope_transformed)
else:
Expand Down
2 changes: 0 additions & 2 deletions neofox/tests/integration_tests/test_neofox.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,6 @@ def test_neofox_without_mhc1(self):

def test_gene_expression_imputation(self):
neoantigens, patients = self._get_test_data()
for p in patients:
p.is_rna_available = False
neofox = NeoFox(
neoantigens=neoantigens,
patients=patients,
Expand Down
1 change: 0 additions & 1 deletion neofox/tests/synthetic_data/factories.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def patient(self) -> Patient:
try:
patient = Patient(
identifier=self.generator.unique.uuid4(),
is_rna_available=True,
tumor_type=self.random_elements(self.available_tumor_types, length=1)[0],
# by setting unique=True we enforce that all patients are heterozygous
mhc1=MhcFactory.build_mhc1_alleles(
Expand Down
6 changes: 0 additions & 6 deletions neofox/tests/unit_tests/test_model_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ def test_patients_csv_file2model(self):
self.assertEqual(
9, len([a for m in patients[0].mhc2 for g in m.genes for a in g.alleles])
)
self.assertEqual(patients[0].is_rna_available, False)

def test_patients_without_mhc2(self):
patients_file = pkg_resources.resource_filename(
Expand All @@ -208,7 +207,6 @@ def test_patients_without_mhc2(self):
self.assertEqual(3, len(patients[0].mhc1))
self.assertEqual(6, len([a for m in patients[0].mhc1 for a in m.alleles]))
self.assertEqual(0, len(patients[0].mhc2))
self.assertEqual(patients[0].is_rna_available, False)

def test_patients_csv_file2model_mouse(self):
patients_file = pkg_resources.resource_filename(
Expand All @@ -226,7 +224,6 @@ def test_patients_csv_file2model_mouse(self):
self.assertEqual(
3, len([a for m in patients[0].mhc2 for g in m.genes for a in g.alleles])
)
self.assertEqual(patients[0].is_rna_available, False)

def test_patients_csv_file2model2(self):
patients_file = pkg_resources.resource_filename(
Expand All @@ -244,7 +241,6 @@ def test_patients_csv_file2model2(self):
self.assertEqual(
9, len([a for m in patients[0].mhc2 for g in m.genes for a in g.alleles])
)
self.assertEqual(patients[0].is_rna_available, True)

def test_patients_csv_file2model3(self):
patients_file = pkg_resources.resource_filename(
Expand All @@ -269,7 +265,6 @@ def test_patients_csv_file2model3(self):
"HLA-DQA1*04:01"
in [a.name for m in patients[0].mhc2 for g in m.genes for a in g.alleles]
)
self.assertTrue(patients[0].is_rna_available)

def test_patients_csv_file2model_without_mhc1(self):
patients_file = pkg_resources.resource_filename(
Expand All @@ -287,7 +282,6 @@ def test_patients_csv_file2model_without_mhc1(self):
self.assertEqual(
9, len([a for m in patients[0].mhc2 for g in m.genes for a in g.alleles])
)
self.assertEqual(patients[0].is_rna_available, True)

def test_patients_csv_file2model_without_mhc2(self):
patients_file = pkg_resources.resource_filename(
Expand Down
13 changes: 7 additions & 6 deletions neofox/tests/unit_tests/test_neofox.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,12 @@ def test_with_expression_imputation(self):
reference_folder=FakeReferenceFolder(),
configuration=FakeDependenciesConfiguration(),
)
for neoantigen in original_neoantigens:
for neoantigen_imputed in neofox_runner.neoantigens:
self.assertFalse(
neoantigen.rna_expression == neoantigen_imputed.rna_expression
)
for neoantigen, neoantigen_imputed in zip(original_neoantigens, neofox_runner.neoantigens):
self.assertIsNotNone(neoantigen_imputed.imputed_gene_expression)
if neoantigen.rna_expression is None:
self.assertNotEqual(neoantigen.rna_expression, neoantigen_imputed.rna_expression)
else:
self.assertEqual(neoantigen.rna_expression, neoantigen_imputed.rna_expression)

def _get_test_neoantigen(self):
return Neoantigen(
Expand All @@ -207,7 +208,7 @@ def _get_test_neoantigen(self):
)

def _get_test_patient(self):
return Patient(identifier="12345", is_rna_available=True)
return Patient(identifier="12345")


if __name__ == "__main__":
Expand Down
22 changes: 1 addition & 21 deletions neofox/tests/unit_tests/test_validation.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,6 @@ def test_bad_type_raises_exception(self):
Neoantigen(patient_identifier="1234", rna_expression="0.45"),
) # this should be a float)

self.assertRaises(
NeofoxDataValidationException,
ModelValidator.validate,
Patient(identifier="1234", is_rna_available="Richtig"),
) # this should be a boolean)

# TODO: make validation capture this data types errors!
ModelValidator.validate(
Neoantigen(
Expand All @@ -63,7 +57,7 @@ def test_good_data_does_not_raise_exceptions(self):
neoantigen = Neoantigen(patient_identifier="1234", rna_expression=0.45)
ModelValidator.validate(neoantigen)

patient = Patient(identifier="1234", is_rna_available=True)
patient = Patient(identifier="1234")
ModelValidator.validate(patient)

def test_enum_with_wrong_value(self):
Expand Down Expand Up @@ -695,20 +689,6 @@ def test_empty_patient_identifier(self):
NeofoxDataValidationException, ModelValidator.validate_patient, patient, ORGANISM_HOMO_SAPIENS
)

def test_bad_is_rna_available(self):
ModelValidator.validate_patient(
Patient(identifier="123", is_rna_available=True), ORGANISM_HOMO_SAPIENS
)
ModelValidator.validate_patient(
Patient(identifier="123", is_rna_available=False), ORGANISM_HOMO_SAPIENS
)
self.assertRaises(
NeofoxDataValidationException,
ModelValidator.validate_patient,
Patient(identifier="123", is_rna_available="False"),
ORGANISM_HOMO_SAPIENS
)

def test_validate_neoepitope_mhci(self):
neoepitope = PredictedEpitope(
mutated_peptide="DILVTDQTR",
Expand Down

0 comments on commit be15532

Please sign in to comment.