From 11bcff26483b30125b7feb1729a7bbfd3f4db8a6 Mon Sep 17 00:00:00 2001 From: Chris Bielow Date: Mon, 29 Jul 2024 17:08:24 +0200 Subject: [PATCH] updated USI example to mzQC v1.0 (#229) * updated USI example to mzQC v1.0 * Update USI-example.mzQC fix validation issues * Some editing --------- Co-authored-by: Mathias Walzer Co-authored-by: Wout Bittremieux --- .../pages/worked-examples/USI-example.mzQC.md | 56 ---------------- docs/pages/worked-examples/adv_mzqc_usi.md | 65 +++++++++++++++++++ .../{USI-example.mzQC => adv_mzqc_usi.mzQC} | 16 +++-- .../examples/intro_run.mzQC | 5 ++ 4 files changed, 79 insertions(+), 63 deletions(-) delete mode 100644 docs/pages/worked-examples/USI-example.mzQC.md create mode 100644 docs/pages/worked-examples/adv_mzqc_usi.md rename specification_documents/examples/{USI-example.mzQC => adv_mzqc_usi.mzQC} (81%) diff --git a/docs/pages/worked-examples/USI-example.mzQC.md b/docs/pages/worked-examples/USI-example.mzQC.md deleted file mode 100644 index 425dfb3..0000000 --- a/docs/pages/worked-examples/USI-example.mzQC.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -layout: page -title: "USI Example of mzQC" -permalink: /examples/USI-example/ ---- - -Here, we describe a mzQC JSON document to exemplify the use of USI to address the quality of specific spectra by metrics recorded in a table value type. - - -Find the complete example file at the bottom of this document or in the example folder. - -We explained the basic structure of an mzQC in previous examples (e.g. individual-runs.mzQC) and here dive directly into the details. -The metric values are derived from spectra of a particular run in the CPTAC project, acquired on a Thermo Orbitrap Velos instrument. -For purposes of examplification the original run was truncated to the first 10 MS/MS spectra. -``` - "inputFiles": [ - { - "location": "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2014/09/PXD000966/CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09.raw", - "name": "CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09.trfr.t3.mzML", - "fileFormat": { - "accession": "MS:1000584", - "name": "mzML format" - }, -``` -The metric gauges each considered spectra's TIC in relation to the spectra's peak intensities. -It does so by calculating the (minimal) fraction of peaks neccessary to sum up to half the total intensity of the spectrum itself. -Thus, the metric value will range between 0 and 1, where values closer to 0 will indicate spectra which' TIC is dominated by a few high intensity peaks. -``` - "qualityMetrics": [ - { - "accession": "MS:4000068", - "name": "spectra half-TIC", - "value": { -``` -The values for this metric are recorded in a table value type and consist of a column for the spectrum reference and the fractional value ("UO:0000191"). -The spectrum reference is defined by the metric as either the NativeID and/or preferrably the USI in a separate column. -The latter two columns are optional, however at least one has to be present. In our case, as the originating MS-run is available via proteomexchange, and the USI is a more detailed version of NativeID, no NativeID column is given. -``` - "MS:1003063": [ - "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:2","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:7","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:29","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:31","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:34","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:43","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:45","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:48","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:50","mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:51" - ], - "UO:0000191": [ - 0.1134,0.1628,0.0536,0.102,0.1042,0.0947,0.0784,0.1239,0.2593,0.1214 - ] - } - } - ], -``` -As you can see from the values in the fractional value column, -most of the first ten spectra cluster around a value of about 10% of the peaks dominating the respective spectras' TIC, -with only a few outliers to the 25% and the 5% range. -Each row represents one spectrum that can be directly looked up, in the case of using USI for spectra reference, -[even directly from the web](https://www.proteomicsdb.org/use/?usi=mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:2). - -### This is the mzQC file once again, in full: -**[USI-example.mzQC](https://github.com/HUPO-PSI/mzQC/tree/main/specification_documents/examples/USI-example.mzQC)** \ No newline at end of file diff --git a/docs/pages/worked-examples/adv_mzqc_usi.md b/docs/pages/worked-examples/adv_mzqc_usi.md new file mode 100644 index 0000000..ed1a9c6 --- /dev/null +++ b/docs/pages/worked-examples/adv_mzqc_usi.md @@ -0,0 +1,65 @@ +--- +layout: page +title: "Integration of the Universal Spectrum Identifier (USI) in mzQC" +permalink: /examples/adv_mzqc_usi/ +--- + +The mzQC format supports various QC metrics that can operate at the level of MS runs or individual spectra. +One of the powerful features of mzQC is its compatibility with the [Universal Spectrum Identifier (USI)](https://www.psidev.info/usi), also developed by the Proteomics Standards Initiative. +This compatibility allows mzQC to reference individual spectra explicitly and precisely within a larger dataset. + +## Example: Computing the "spectra half-TIC" metric using USI + +In this example, we demonstrate the calculation of the "spectra half-TIC" metric for multiple MS/MS spectra. +The "spectra half-TIC" metric measures the minimal number of the most intense peaks needed to achieve half of the total ion current (TIC) of a spectrum. +This metric helps in understanding the peak intensity distribution within a spectrum, where a lower value indicates a concentration of intensity in fewer peaks. + +For this example, the calculation is limited to the first 10 MS/MS spectra. +The results are formatted as a tabular metric where one column lists the USIs of the spectra and the other the computed fractional TIC values. +This example emphasizes the utility of USIs for directly linking QC metrics to specific spectra in public databases like ProteomeXchange. + +The JSON representation of the quality metric in the mzQC format includes: + +```json +"qualityMetrics": [ + { + "accession": "MS:4000068", + "name": "spectra half-TIC", + "description": "The minimal proportion of peaks needed to account for at least 50% of the total ion current in each individual spectrum considered, recorded in a mandatory fraction column. Either USI or native spectrum identifier columns must be present as well.", + "value": { + "MS:1003063": [ + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:2", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:7", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:29", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:31", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:34", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:43", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:45", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:48", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:50", + "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:51" + ], + "UO:0000191": [ + 0.1134, + 0.1628, + 0.0536, + 0.1020, + 0.1042, + 0.0947, + 0.0784, + 0.1239, + 0.2593, + 0.1214 + ] + } + } +] +``` + +The table pairs each spectrum's USI with its corresponding "spectra half-TIC" value. +This format enables clear, unambiguous links between the QC data and the specific spectra, facilitating straightforward validation and further analysis. +Spectrum references use USIs which provide a robust method to trace back to the exact source data in public repositories, enhancing transparency and reproducibility in proteomic research. + +Each spectrum and its TIC contribution can be directly accessed and verified online, ensuring that researchers can easily validate and reproduce findings. +For instance, the spectrum at USI [`mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:2`](https://www.ebi.ac.uk/pride/archive/usi?usi=mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:2) can be directly resolved and viewed on PRIDE. +Additionally, the full mzQC file is available [here](https://github.com/HUPO-PSI/mzQC/tree/main/specification_documents/examples/adv_mzqc_usi.mzQC). diff --git a/specification_documents/examples/USI-example.mzQC b/specification_documents/examples/adv_mzqc_usi.mzQC similarity index 81% rename from specification_documents/examples/USI-example.mzQC rename to specification_documents/examples/adv_mzqc_usi.mzQC index 8348792..170d3d4 100644 --- a/specification_documents/examples/USI-example.mzQC +++ b/specification_documents/examples/adv_mzqc_usi.mzQC @@ -4,14 +4,14 @@ "version": "1.0.0", "contactName": "Mathias Walzer", "contactAddress": "walzer@ebi.ac.uk", - "description": "A simple mzQC file containing a metric exemplifying USI incorporation to a metric.", + "description": "A simple mzQC file exemplifying integration with the Universal Spectrum Identifier to refer to individual spectra.", "runQualities": [ { "metadata": { "inputFiles": [ { "location": "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2014/09/PXD000966/CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09.raw", - "name": "CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09.trfr.t3.mzML", + "name": "CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09", "fileFormat": { "accession": "MS:1000584", "name": "mzML format" @@ -39,6 +39,7 @@ { "accession": "MS:1001058", "name": "quality estimation by manual validation", + "description": "The quality estimation was done manually.", "version": "0", "uri": "https://dx.doi.org/10.1021/pr201071t" } @@ -48,6 +49,7 @@ { "accession": "MS:4000068", "name": "spectra half-TIC", + "description": "The minimal proportion of peaks needed to account for at least 50% of the total ion current in each individual spectrum considered, recorded in a mandatory fraction column. Either USI or native spectrum identifier columns must be present as well.", "value": { "MS:1003063": [ "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_01_2Feb12_Cougar_11-10-09:scan:2", @@ -65,7 +67,7 @@ 0.1134, 0.1628, 0.0536, - 0.102, + 0.1020, 0.1042, 0.0947, 0.0784, @@ -81,13 +83,13 @@ "controlledVocabularies": [ { "name": "Proteomics Standards Initiative Mass Spectrometry Ontology", - "uri": "https://github.com/HUPO-PSI/psi-ms-CV/blob/master/psi-ms.obo", - "version": "4.1.7" + "uri": "https://github.com/HUPO-PSI/psi-ms-CV/releases/download/v4.1.157/psi-ms.obo", + "version": "4.1.157" }, { "name": "Unit Ontology", - "uri": "https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/master/unit.obo", - "version": "f9ff25b" + "uri": "https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/v2023-05-23/unit-ontology.obo", + "version": "v2023-05-23" } ] } diff --git a/specification_documents/examples/intro_run.mzQC b/specification_documents/examples/intro_run.mzQC index da6b8e1..6fefa8b 100644 --- a/specification_documents/examples/intro_run.mzQC +++ b/specification_documents/examples/intro_run.mzQC @@ -117,6 +117,11 @@ "name": "Proteomics Standards Initiative Mass Spectrometry Ontology", "uri": "https://github.com/HUPO-PSI/psi-ms-CV/releases/download/v4.1.130/psi-ms.obo", "version": "4.1.130" + }, + { + "name": "Unit Ontology", + "uri": "https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/v2023-05-23/unit-ontology.obo", + "version": "v2023-05-23" } ] }