From 540b70b94f8873da1b1b1f34a94ab1af186d2f61 Mon Sep 17 00:00:00 2001 From: Alex Wagner Date: Wed, 2 Mar 2022 23:09:28 -0500 Subject: [PATCH 01/83] update CODEOWNERS --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b9e932dc..ad2df34b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,2 @@ # Each line is a file pattern followed by one or more owners. -* @reece @ahwagner @larrybabb +* @andreasprlic @ahwagner @larrybabb From a1d38bce7dcca7b594b25def71178cb0b76df3dd Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Thu, 3 Mar 2022 09:57:27 -0500 Subject: [PATCH 02/83] absolute copy and relative copy schemas --- ...{CopyNumber.rst => AbsoluteCopyNumber.rst} | 4 +- schema/defs/vrs/RelativeCopyNumber.rst | 34 ++++++++++ schema/ga4gh.yaml | 5 +- schema/vrs-source.yaml | 39 ++++++++++-- schema/vrs.json | 62 +++++++++++++++++-- schema/vrs.yaml | 44 +++++++++++-- 6 files changed, 171 insertions(+), 17 deletions(-) rename schema/defs/vrs/{CopyNumber.rst => AbsoluteCopyNumber.rst} (88%) create mode 100644 schema/defs/vrs/RelativeCopyNumber.rst diff --git a/schema/defs/vrs/CopyNumber.rst b/schema/defs/vrs/AbsoluteCopyNumber.rst similarity index 88% rename from schema/defs/vrs/CopyNumber.rst rename to schema/defs/vrs/AbsoluteCopyNumber.rst index 17a483cb..1b18fa32 100644 --- a/schema/defs/vrs/CopyNumber.rst +++ b/schema/defs/vrs/AbsoluteCopyNumber.rst @@ -4,7 +4,7 @@ The absolute count of discrete copies of a :ref:`MolecularVariation`, :ref:`Feat **Information Model** -Some CopyNumber attributes are inherited from :ref:`Variation`. +Some AbsoluteCopyNumber attributes are inherited from :ref:`Variation`. .. list-table:: :class: clean-wrap @@ -23,7 +23,7 @@ Some CopyNumber attributes are inherited from :ref:`Variation`. * - type - string - 1..1 - - MUST be "CopyNumber" + - MUST be "AbsoluteCopyNumber" * - subject - :ref:`MolecularVariation` | :ref:`Feature` | :ref:`SequenceExpression` | :ref:`CURIE` - 1..1 diff --git a/schema/defs/vrs/RelativeCopyNumber.rst b/schema/defs/vrs/RelativeCopyNumber.rst new file mode 100644 index 00000000..65501483 --- /dev/null +++ b/schema/defs/vrs/RelativeCopyNumber.rst @@ -0,0 +1,34 @@ +**Computational Definition** + +The relative copies of a :ref:`MolecularVariation`, :ref:`Feature`, :ref:`SequenceExpression`, or a :ref:`CURIE` reference within an unspecified baseline in a system (e.g. genome, cell, etc.). + +**Information Model** + +Some RelativeCopyNumber attributes are inherited from :ref:`Variation`. + +.. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. + * - type + - string + - 1..1 + - MUST be "RelativeCopyNumber" + * - subject + - :ref:`MolecularVariation` | :ref:`Feature` | :ref:`SequenceExpression` | :ref:`CURIE` + - 1..1 + - Subject of the Copy Number object + * - relative_copy_class + - string + - 1..1 + - MUST be one of "complete loss", "partial loss", "copy neutral", "low-level gain" or "high-level gain". diff --git a/schema/ga4gh.yaml b/schema/ga4gh.yaml index 054d8d24..c96d2558 100644 --- a/schema/ga4gh.yaml +++ b/schema/ga4gh.yaml @@ -25,10 +25,9 @@ identifiers: Text: VT # Genotype: VG Haplotype: VH - CopyNumber: VCN - + AbsoluteCopyNumber: VAC + RelativeCopyNumber: VRC SequenceLocation: VSL ChromosomeLocation: VCL - regexp: '^ga4gh:(?P[^.]+)\.(?P.+)$' diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index bcc9f0f0..ebd76a7c 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -67,7 +67,8 @@ definitions: A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes. oneOf: - - $ref: "#/definitions/CopyNumber" + - $ref: "#/definitions/AbsoluteCopyNumber" + - $ref: "#/definitions/RelativeCopyNumber" discriminator: propertyName: type @@ -176,9 +177,10 @@ definitions: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # SystemicVariation - CopyNumber: + AbsoluteCopyNumber: additionalProperties: false type: object + status: draft description: >- The absolute count of discrete copies of a :ref:`MolecularVariation`, :ref:`Feature`, :ref:`SequenceExpression`, or a :ref:`CURIE` reference @@ -186,9 +188,9 @@ definitions: properties: type: type: string - const: "CopyNumber" + const: "AbsoluteCopyNumber" description: >- - MUST be "CopyNumber" + MUST be "AbsoluteCopyNumber" subject: oneOf: - $ref: "#/definitions/MolecularVariation" @@ -240,6 +242,35 @@ definitions: required: [ "subject", "copies" ] + RelativeCopyNumber: + additionalProperties: false + type: object + status: draft + description: >- + The relative copies of a :ref:`MolecularVariation`, + :ref:`Feature`, :ref:`SequenceExpression`, or a :ref:`CURIE` reference + within an unspecified baseline in a system (e.g. genome, cell, etc.). + properties: + type: + type: string + const: "RelativeCopyNumber" + description: >- + MUST be "RelativeCopyNumber" + subject: + oneOf: + - $ref: "#/definitions/MolecularVariation" + - $ref: "#/definitions/Feature" + - $ref: "#/definitions/SequenceExpression" + - $ref: "#/definitions/CURIE" + description: >- + Subject of the Copy Number object + relative_copy_class: + type: string + enum: [ "complete loss", "partial loss", "copy neutral", "low-level gain", "high-level gain" ] + description: >- + MUST be one of "complete loss", "partial loss", "copy neutral", "low-level gain" or "high-level gain". + required: [ "subject", "relative_copy_class" ] + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Locations diff --git a/schema/vrs.json b/schema/vrs.json index 5c22a730..e718d801 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -52,7 +52,10 @@ "description": "A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.", "oneOf": [ { - "$ref": "#/definitions/CopyNumber" + "$ref": "#/definitions/AbsoluteCopyNumber" + }, + { + "$ref": "#/definitions/RelativeCopyNumber" } ], "discriminator": { @@ -202,9 +205,10 @@ "type" ] }, - "CopyNumber": { + "AbsoluteCopyNumber": { "additionalProperties": false, "type": "object", + "status": "draft", "description": "The absolute count of discrete copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.).", "properties": { "_id": { @@ -213,8 +217,8 @@ }, "type": { "type": "string", - "const": "CopyNumber", - "description": "MUST be \"CopyNumber\"" + "const": "AbsoluteCopyNumber", + "description": "MUST be \"AbsoluteCopyNumber\"" }, "subject": { "oneOf": [ @@ -319,6 +323,56 @@ "type" ] }, + "RelativeCopyNumber": { + "additionalProperties": false, + "type": "object", + "status": "draft", + "description": "The relative copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within an unspecified baseline in a system (e.g. genome, cell, etc.).", + "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Variation Id. MUST be unique within document." + }, + "type": { + "type": "string", + "const": "RelativeCopyNumber", + "description": "MUST be \"RelativeCopyNumber\"" + }, + "subject": { + "oneOf": [ + { + "$ref": "#/definitions/MolecularVariation" + }, + { + "$ref": "#/definitions/Feature" + }, + { + "$ref": "#/definitions/SequenceExpression" + }, + { + "$ref": "#/definitions/CURIE" + } + ], + "description": "Subject of the Copy Number object" + }, + "relative_copy_class": { + "type": "string", + "enum": [ + "complete loss", + "partial loss", + "copy neutral", + "low-level gain", + "high-level gain" + ], + "description": "MUST be one of \"complete loss\", \"partial loss\", \"copy neutral\", \"low-level gain\" or \"high-level gain\"." + } + }, + "required": [ + "relative_copy_class", + "subject", + "type" + ] + }, "Location": { "description": "A contiguous segment of a biological sequence.", "oneOf": [ diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 4bd5e486..f74da7da 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -30,7 +30,8 @@ definitions: description: A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes. oneOf: - - $ref: '#/definitions/CopyNumber' + - $ref: '#/definitions/AbsoluteCopyNumber' + - $ref: '#/definitions/RelativeCopyNumber' discriminator: propertyName: type Allele: @@ -124,9 +125,10 @@ definitions: required: - members - type - CopyNumber: + AbsoluteCopyNumber: additionalProperties: false type: object + status: draft description: The absolute count of discrete copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.). @@ -134,8 +136,8 @@ definitions: _id: *id001 type: type: string - const: CopyNumber - description: MUST be "CopyNumber" + const: AbsoluteCopyNumber + description: MUST be "AbsoluteCopyNumber" subject: oneOf: - $ref: '#/definitions/MolecularVariation' @@ -186,6 +188,40 @@ definitions: - copies - subject - type + RelativeCopyNumber: + additionalProperties: false + type: object + status: draft + description: The relative copies of a MolecularVariation, Feature, SequenceExpression, + or a CURIE reference within an unspecified baseline in a system (e.g. genome, + cell, etc.). + properties: + _id: *id001 + type: + type: string + const: RelativeCopyNumber + description: MUST be "RelativeCopyNumber" + subject: + oneOf: + - $ref: '#/definitions/MolecularVariation' + - $ref: '#/definitions/Feature' + - $ref: '#/definitions/SequenceExpression' + - $ref: '#/definitions/CURIE' + description: Subject of the Copy Number object + relative_copy_class: + type: string + enum: + - complete loss + - partial loss + - copy neutral + - low-level gain + - high-level gain + description: MUST be one of "complete loss", "partial loss", "copy neutral", + "low-level gain" or "high-level gain". + required: + - relative_copy_class + - subject + - type Location: description: A contiguous segment of a biological sequence. oneOf: From 914795233fdea8a9d9bb813a1ed5aa39178c020c Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Thu, 3 Mar 2022 09:59:30 -0500 Subject: [PATCH 03/83] absolute CN docs --- docs/source/terms_and_model.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 65502771..0a97190e 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -371,12 +371,12 @@ Systemic Variation .. include:: defs/SystemicVariation.rst -.. _CopyNumber: +.. _AbsoluteCopyNumber: -CopyNumber -$$$$$$$$$$ +AbsoluteCopyNumber +$$$$$$$$$$$$$$$$$$ -*Copy Number Variation* captures the copies of a molecule within a +*Absolute Copy Number Variation* captures the copies of a molecule within a genome, and can be used to express concepts such as amplification and copy loss. Copy Number Variation has conflated meanings in the genomics community, and can mean either (or both) the notion of copy @@ -386,7 +386,7 @@ of :ref:`SystemicVariation` and so describes the number of copies in a genome. The related :ref:`MolecularVariation` concept can be expressed as an :ref:`Allele` with a :ref:`RepeatedSequenceExpression`. -.. include:: defs/CopyNumber.rst +.. include:: defs/AbsoluteCopyNumber.rst **Examples** @@ -404,7 +404,7 @@ Two, three, or four total copies of BRCA1: "gene_id": "ncbigene:348", "type": "Gene" }, - "type": "CopyNumber" + "type": "AbsoluteCopyNumber" } From 8fc1a1fc780dc8d8df9e3ffa451c67a006262a8a Mon Sep 17 00:00:00 2001 From: Reece Hart Date: Sat, 5 Mar 2022 22:40:45 -0800 Subject: [PATCH 04/83] explain ref agree normalization rules --- docs/source/appendices/design_decisions.rst | 46 +++++++-------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/docs/source/appendices/design_decisions.rst b/docs/source/appendices/design_decisions.rst index c781c046..841873c3 100644 --- a/docs/source/appendices/design_decisions.rst +++ b/docs/source/appendices/design_decisions.rst @@ -32,10 +32,10 @@ Allele Rather than Variant The most primitive sequence assertion in VRS is the :ref:`Allele` entity. Colloquially, the words "allele" and "variant" have similar meanings and they are often used interchangeably. However, the VR -contributors believe that it is essential to distinguish the state of -the sequence from the change between states of a sequence. It is +contributors believe that it is essential to distinguish the *state* of +the sequence from the *change between states* of a sequence. It is imperative that precise terms are used when modelling data. Therefore, -within VRS, Allele refers to a state and "variant" refers to the change +within VRS, "Allele" refers to a state and "variant" refers to the change from one Allele to another. The word "variant", which implies change, makes it awkward to refer to @@ -47,42 +47,28 @@ consequence is better associated with an allele than with a variant. .. _should-normalize: -Implementations should normalize -@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +Implementations should normalize Alleles +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ VRS STRONGLY RECOMMENDS that Alleles be :ref:`normalized ` when generating :ref:`computed identifiers -`. The rationale for recommending, rather than -requiring, normalization is grounded in dual views of Allele objects -with distinct interpretations: +` unless there is compelling reason to do otherwise. +Those reasons are the subject of this section. -* Allele as minimal representation of a change in sequence. In this - view, normalization is a process that makes the representation - minimal and unambiguous. +:ref:`Fully-justified Normalization ` is the process of comparing a span of reference sequence to a sequence state (often the alternative sequence). Normalization consists of two steps: trimming and shuffling. In the trimming step, common flanking prefix and suffix sequences are removed. For example, a CAG-to-CTG Allele would be trimmed to merely A-to-T, with the position adjusted accordingly. There are four cases of the resulting sequences: -* Allele as an assertion of state. In this view, it is reasonable to - want to assert state that may include (or be composed entirely of) - reference bases, for which the normalization process would alter the - intent. + 1. The trimmed sequences are empty: The Allele refers to reference state. + 2. The trimmed sequences are non-empty: The Allele is a substitution (perhaps multi-residue). + 3. The reference sequence is empty: The Allele is a net insertion. + 4. The state sequence is empty: The Allele is a net deletion. -Although this rationale applies only to Alleles, it may have have -parallels with other VRS types. In addition, it is desirable for all -VRS types to be treated similarly. +When the Allele refers to a reference state (case 1), trimming would reduce the variant to a null change. However, reduction to a null state would make it impossible to refer to a specific span of reference sequence. In order to permit users to refer to spans of reference sequence, VRS does not require normalizing reference agreement Alleles. -Furthermore, if normalization were required in order to generate -:ref:`computed-identifiers`, but did not apply to certain instances of -VRS Variation, implementations would likely require secondary -identifier mechanisms, which would undermine the intent of a global -computed identifier. +The trimming step applies only when the reference or the state sequences are empty (cases 3 and 4). When these occur in the context of repeating reference sequence that matches the inserted or deleted sequence, the Allele may be shuffled left and right to identify the fully-justified location of the variation. (See :ref:`normalization` for details.) -The primary downside of not requiring normalization is that Variation -objects might be written in non-canonical forms, thereby creating -unintended degeneracy. - -Therefore, normalization of all VRS Variation classes is optional in -order to support the view of Allele as an assertion of state on a -sequence. +In rare cases, data originators might have reason to associate an annotation with a specific repeating unit in the context of repeated sequence. In order to support this case, normalization is not strictly required. +Most users will normalize most Alleles. Normalization should be skipped only when doing so would decrease the intended precision of an Allele. .. _fully-justified: From f832a3475b549ac4ce0717149782bbda18fa31b8 Mon Sep 17 00:00:00 2001 From: Reece Hart Date: Sat, 5 Mar 2022 22:43:11 -0800 Subject: [PATCH 05/83] fill paragraphs for consistency --- docs/source/appendices/design_decisions.rst | 51 +++++++++++++++------ 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/docs/source/appendices/design_decisions.rst b/docs/source/appendices/design_decisions.rst index 841873c3..5513b368 100644 --- a/docs/source/appendices/design_decisions.rst +++ b/docs/source/appendices/design_decisions.rst @@ -52,23 +52,46 @@ Implementations should normalize Alleles VRS STRONGLY RECOMMENDS that Alleles be :ref:`normalized ` when generating :ref:`computed identifiers -` unless there is compelling reason to do otherwise. -Those reasons are the subject of this section. - -:ref:`Fully-justified Normalization ` is the process of comparing a span of reference sequence to a sequence state (often the alternative sequence). Normalization consists of two steps: trimming and shuffling. In the trimming step, common flanking prefix and suffix sequences are removed. For example, a CAG-to-CTG Allele would be trimmed to merely A-to-T, with the position adjusted accordingly. There are four cases of the resulting sequences: - - 1. The trimmed sequences are empty: The Allele refers to reference state. - 2. The trimmed sequences are non-empty: The Allele is a substitution (perhaps multi-residue). +` unless there is compelling reason to do +otherwise. Those reasons are the subject of this section. + +:ref:`Fully-justified Normalization ` is the process of +comparing a span of reference sequence to a sequence state (often the +alternative sequence). Normalization consists of two steps: trimming +and shuffling. In the trimming step, common flanking prefix and +suffix sequences are removed. For example, a CAG-to-CTG Allele would +be trimmed to merely A-to-T, with the position adjusted accordingly. +There are four cases of the resulting sequences: + + 1. The trimmed sequences are empty: The Allele refers to reference + state. + 2. The trimmed sequences are non-empty: The Allele is a substitution + (perhaps multi-residue). 3. The reference sequence is empty: The Allele is a net insertion. 4. The state sequence is empty: The Allele is a net deletion. -When the Allele refers to a reference state (case 1), trimming would reduce the variant to a null change. However, reduction to a null state would make it impossible to refer to a specific span of reference sequence. In order to permit users to refer to spans of reference sequence, VRS does not require normalizing reference agreement Alleles. - -The trimming step applies only when the reference or the state sequences are empty (cases 3 and 4). When these occur in the context of repeating reference sequence that matches the inserted or deleted sequence, the Allele may be shuffled left and right to identify the fully-justified location of the variation. (See :ref:`normalization` for details.) - -In rare cases, data originators might have reason to associate an annotation with a specific repeating unit in the context of repeated sequence. In order to support this case, normalization is not strictly required. - -Most users will normalize most Alleles. Normalization should be skipped only when doing so would decrease the intended precision of an Allele. +When the Allele refers to a reference state (case 1), trimming would +reduce the variant to a null change. However, reduction to a null +state would make it impossible to refer to a specific span of +reference sequence. In order to permit users to refer to spans of +reference sequence, VRS does not require normalizing reference +agreement Alleles. + +The trimming step applies only when the reference or the state +sequences are empty (cases 3 and 4). When these occur in the context +of repeating reference sequence that matches the inserted or deleted +sequence, the Allele may be shuffled left and right to identify the +fully-justified location of the variation. (See :ref:`normalization` +for details.) + +In rare cases, data originators might have reason to associate an +annotation with a specific repeating unit in the context of repeated +sequence. In order to support this case, normalization is not +strictly required. + +Most users will normalize most Alleles. Normalization should be +skipped only when doing so would decrease the intended precision of an +Allele. .. _fully-justified: From 2a7521fcb1df438b839967d1393f15c8e8f174e1 Mon Sep 17 00:00:00 2001 From: Reece Hart Date: Sat, 5 Mar 2022 22:44:14 -0800 Subject: [PATCH 06/83] swapped order of FJ and normalization rational design decisions --- docs/source/appendices/design_decisions.rst | 57 +++++++++++---------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/docs/source/appendices/design_decisions.rst b/docs/source/appendices/design_decisions.rst index 5513b368..a2c76306 100644 --- a/docs/source/appendices/design_decisions.rst +++ b/docs/source/appendices/design_decisions.rst @@ -45,6 +45,35 @@ when referring to an unchanged residue. In some cases, such "variants" are even associated with allele frequencies. Similarly, a predicted consequence is better associated with an allele than with a variant. + +.. _fully-justified: + +Alleles are Fully Justified +@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +In order to standardize the representation of sequence variation, +Alleles SHOULD be fully justified from the description of the NCBI +`Variant Overprecision Correction Algorithm (VOCA)`_. Furthermore, +normalization rules are identical for all sequence types (DNA, RNA, +and protein). + +The choice of algorithm was relatively straightforward: VOCA is +published, easily understood, easily implemented, and +covers a wide range of cases. + +The choice to fully justify is a departure from other common variation +formats. The HGVS nomenclature recommendations, originally published in +1998, require that alleles be right normalized `(3' rule)`_ on all sequence +types. The Variant Call Format (VCF), released as a PDF specification +in 2009, made the conflicting choice to write variants `left (5') +normalized`_ and anchored to the previous nucleotide. + +Fully-justified alleles represent an alternate approach. A fully-justified +representation does not make an arbitrary choice of where a variant truly +occurs in a low-complexity region, but rather describes the final and +unambiguous state of the resultant sequence. + + .. _should-normalize: Implementations should normalize Alleles @@ -94,34 +123,6 @@ skipped only when doing so would decrease the intended precision of an Allele. -.. _fully-justified: - -Alleles are Fully Justified -@@@@@@@@@@@@@@@@@@@@@@@@@@@ - -In order to standardize the representation of sequence variation, -Alleles SHOULD be fully justified from the description of the NCBI -`Variant Overprecision Correction Algorithm (VOCA)`_. Furthermore, -normalization rules are identical for all sequence types (DNA, RNA, -and protein). - -The choice of algorithm was relatively straightforward: VOCA is -published, easily understood, easily implemented, and -covers a wide range of cases. - -The choice to fully justify is a departure from other common variation -formats. The HGVS nomenclature recommendations, originally published in -1998, require that alleles be right normalized `(3' rule)`_ on all sequence -types. The Variant Call Format (VCF), released as a PDF specification -in 2009, made the conflicting choice to write variants `left (5') -normalized`_ and anchored to the previous nucleotide. - -Fully-justified alleles represent an alternate approach. A fully-justified -representation does not make an arbitrary choice of where a variant truly -occurs in a low-complexity region, but rather describes the final and -unambiguous state of the resultant sequence. - - .. _inter-residue-coordinates-design: Inter-residue Coordinates From d56ddb780d1df566cf4d8014eb17ed2165d75fac Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Tue, 8 Mar 2022 13:55:51 -0500 Subject: [PATCH 07/83] add docs --- docs/source/terms_and_model.rst | 27 +++++++++++++++++++++++++++ schema/vrs-source.yaml | 6 +++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 0a97190e..1103ad75 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -407,6 +407,33 @@ Two, three, or four total copies of BRCA1: "type": "AbsoluteCopyNumber" } +.. _RelativeCopyNumber: + +RelativeCopyNumber +$$$$$$$$$$$$$$$$$$ + +*Relative Copy Number Variation* captures a classification of copies +of a molecule within a system, relative to a baseline. These types +of Variation are common outputs from CNV callers, particularly in the +somatic domain where Absolute Copy Counts are difficult to estimate +and less useful in practice than relative statements. + +.. include:: defs/RelativeCopyNumber.rst + +**Examples** + +Low-level copy gain of BRCA1: + +.. parsed-literal:: + + { + "relative_copy_class": "low-level gain", + "subject": { + "gene_id": "ncbigene:348", + "type": "Gene" + }, + "type": "RelativeCopyNumber" + } .. _UtilityVariation: diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index ebd76a7c..ce07f891 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -180,7 +180,7 @@ definitions: AbsoluteCopyNumber: additionalProperties: false type: object - status: draft + maturity: draft description: >- The absolute count of discrete copies of a :ref:`MolecularVariation`, :ref:`Feature`, :ref:`SequenceExpression`, or a :ref:`CURIE` reference @@ -245,11 +245,11 @@ definitions: RelativeCopyNumber: additionalProperties: false type: object - status: draft + maturity: draft description: >- The relative copies of a :ref:`MolecularVariation`, :ref:`Feature`, :ref:`SequenceExpression`, or a :ref:`CURIE` reference - within an unspecified baseline in a system (e.g. genome, cell, etc.). + against an unspecified baseline in a system (e.g. genome, cell, etc.). properties: type: type: string From d4d3d1da6a0a461915b2b6a95aa08ec44df0a290 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Tue, 8 Mar 2022 13:57:35 -0500 Subject: [PATCH 08/83] build artifacts --- schema/defs/vrs/RelativeCopyNumber.rst | 2 +- schema/vrs.json | 6 +++--- schema/vrs.yaml | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/schema/defs/vrs/RelativeCopyNumber.rst b/schema/defs/vrs/RelativeCopyNumber.rst index 65501483..059316f3 100644 --- a/schema/defs/vrs/RelativeCopyNumber.rst +++ b/schema/defs/vrs/RelativeCopyNumber.rst @@ -1,6 +1,6 @@ **Computational Definition** -The relative copies of a :ref:`MolecularVariation`, :ref:`Feature`, :ref:`SequenceExpression`, or a :ref:`CURIE` reference within an unspecified baseline in a system (e.g. genome, cell, etc.). +The relative copies of a :ref:`MolecularVariation`, :ref:`Feature`, :ref:`SequenceExpression`, or a :ref:`CURIE` reference against an unspecified baseline in a system (e.g. genome, cell, etc.). **Information Model** diff --git a/schema/vrs.json b/schema/vrs.json index e718d801..ef20bca1 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -208,7 +208,7 @@ "AbsoluteCopyNumber": { "additionalProperties": false, "type": "object", - "status": "draft", + "maturity": "draft", "description": "The absolute count of discrete copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.).", "properties": { "_id": { @@ -326,8 +326,8 @@ "RelativeCopyNumber": { "additionalProperties": false, "type": "object", - "status": "draft", - "description": "The relative copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within an unspecified baseline in a system (e.g. genome, cell, etc.).", + "maturity": "draft", + "description": "The relative copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference against an unspecified baseline in a system (e.g. genome, cell, etc.).", "properties": { "_id": { "$ref": "#/definitions/CURIE", diff --git a/schema/vrs.yaml b/schema/vrs.yaml index f74da7da..f460fb52 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -128,7 +128,7 @@ definitions: AbsoluteCopyNumber: additionalProperties: false type: object - status: draft + maturity: draft description: The absolute count of discrete copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.). @@ -191,9 +191,9 @@ definitions: RelativeCopyNumber: additionalProperties: false type: object - status: draft + maturity: draft description: The relative copies of a MolecularVariation, Feature, SequenceExpression, - or a CURIE reference within an unspecified baseline in a system (e.g. genome, + or a CURIE reference against an unspecified baseline in a system (e.g. genome, cell, etc.). properties: _id: *id001 From df49a60a4118e7367d701adc78ff811a3a70afd2 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 10 Mar 2022 20:21:16 -0500 Subject: [PATCH 09/83] Update validation models for Copy Number variation --- validation/models.yaml | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/validation/models.yaml b/validation/models.yaml index 54ec9253..91be8488 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -1,19 +1,19 @@ Number: - - + - in: type: Number value: 55 out: ga4gh_serialize: '{"type":"Number","value":55}' Gene: - - + - in: gene_id: ncbigene:384 type: Gene out: ga4gh_serialize: '{"gene_id":"ncbigene:384","type":"Gene"}' SimpleInterval: - - + - in: end: 44908822 start: 44908821 @@ -262,7 +262,7 @@ Haplotype: ga4gh_digest: i8owCOBHIlRCPtcw_WzRFNTunwJRy99- ga4gh_identify: ga4gh:VH.i8owCOBHIlRCPtcw_WzRFNTunwJRy99- ga4gh_serialize: '{"members":["-kUJh47Pu24Y3Wdsk1rXEDKsXWNY-68x","Z_rYRxpUvwqCLsCBO3YLl70o2uf9_Op1"],"type":"Haplotype"}' -CopyNumber: +AbsoluteCopyNumber: - name: ">=3 copies APOE" in: copies: @@ -270,13 +270,25 @@ CopyNumber: type: IndefiniteRange value: 3 subject: - gene_id: ncbigene:384 + gene_id: ncbigene:348 + type: Gene + type: AbsoluteCopyNumber + out: + ga4gh_digest: 5DNZrhIFslE6Eeo0CsDyQQERR6x7v9OE + ga4gh_identify: ga4gh:VAC.5DNZrhIFslE6Eeo0CsDyQQERR6x7v9OE + ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":{"gene_id":"ncbigene:348","type":"Gene"},"type":"AbsoluteCopyNumber"}' +RelativeCopyNumber: + - name: "Low-level copy gain of BRCA1" + in: + relative_copy_class: low-level gain + subject: + gene_id: ncbigene:348 type: Gene - type: CopyNumber + type: RelativeCopyNumber out: - ga4gh_digest: xksSWn--_z28Qaj-Udlhot4OKqYGkywy - ga4gh_identify: ga4gh:VCN.xksSWn--_z28Qaj-Udlhot4OKqYGkywy - ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":{"gene_id":"ncbigene:384","type":"Gene"},"type":"CopyNumber"}' + ga4gh_digest: CUf7xZJd36mapcm0h3cOZR8kjM_Aj4UV + ga4gh_identify: ga4gh:VRC.CUf7xZJd36mapcm0h3cOZR8kjM_Aj4UV + ga4gh_serialize: '{"relative_copy_class":"low-level gain","subject":{"gene_id":"ncbigene:348","type":"Gene"},"type":"RelativeCopyNumber"}' Text: - in: From 857642e1247e22a01a65ccdc2172f306e02c3488 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Tue, 15 Mar 2022 14:42:45 -0400 Subject: [PATCH 10/83] Move SequenceExpression ahead of SequenceState --- schema/defs/vrs/ComposedSequenceExpression.rst | 4 ++-- schema/vrs-source.yaml | 12 ++++++------ schema/vrs.json | 8 ++++---- schema/vrs.yaml | 12 ++++++------ 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/schema/defs/vrs/ComposedSequenceExpression.rst b/schema/defs/vrs/ComposedSequenceExpression.rst index 88959bae..fc777bd6 100644 --- a/schema/defs/vrs/ComposedSequenceExpression.rst +++ b/schema/defs/vrs/ComposedSequenceExpression.rst @@ -1,6 +1,6 @@ **Computational Definition** -An expression of a sequence composed from multiple other :ref:`Sequence Expressions` objects. MUST have at least one component that is not a ref:`LiteralSequenceExpression`. CANNOT be composed from nested composed sequence expressions. +An expression of a sequence composed from multiple other :ref:`Sequence Expressions` objects. MUST have at least one component that is not a ref:`LiteralSequenceExpression`. CANNOT be composed from nested composed sequence expressions. **Information Model** @@ -23,4 +23,4 @@ Some ComposedSequenceExpression attributes are inherited from :ref:`SequenceExpr * - components - :ref:`LiteralSequenceExpression` | :ref:`RepeatedSequenceExpression` | :ref:`DerivedSequenceExpression` - 2..m - - An ordered list of :ref:`SequenceExpression` components comprising the expression. + - An ordered list of :ref:`SequenceExpression` components comprising the expression. diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index ce07f891..e2899ba0 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -99,8 +99,8 @@ definitions: Where Allele is located state: oneOf: - - $ref: "#/definitions/SequenceState" # DEPRECATED; remove in 2.0 - $ref: "#/definitions/SequenceExpression" + - $ref: "#/definitions/SequenceState" # DEPRECATED; remove in 2.0 description: >- An expression of the sequence state deprecated: @@ -602,10 +602,10 @@ definitions: ComposedSequenceExpression: description: >- - An expression of a sequence composed from multiple other - :ref:`Sequence Expressions` - objects. MUST have at least one component that is not a - ref:`LiteralSequenceExpression`. CANNOT be composed from + An expression of a sequence composed from multiple other + :ref:`Sequence Expressions` + objects. MUST have at least one component that is not a + ref:`LiteralSequenceExpression`. CANNOT be composed from nested composed sequence expressions. additionalProperties: false type: object @@ -628,7 +628,7 @@ definitions: - $ref: "#/definitions/RepeatedSequenceExpression" - $ref: "#/definitions/DerivedSequenceExpression" description: >- - An ordered list of :ref:`SequenceExpression` components + An ordered list of :ref:`SequenceExpression` components comprising the expression. required: [ "components" ] diff --git a/schema/vrs.json b/schema/vrs.json index ef20bca1..1b2c3923 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -90,10 +90,10 @@ "state": { "oneOf": [ { - "$ref": "#/definitions/SequenceState" + "$ref": "#/definitions/SequenceExpression" }, { - "$ref": "#/definitions/SequenceExpression" + "$ref": "#/definitions/SequenceState" } ], "description": "An expression of the sequence state", @@ -834,7 +834,7 @@ ] }, "ComposedSequenceExpression": { - "description": "An expression of a sequence composed from multiple other Sequence Expressions objects. MUST have at least one component that is not a ref:`LiteralSequenceExpression`. CANNOT be composed from nested composed sequence expressions.", + "description": "An expression of a sequence composed from multiple other Sequence Expressions objects. MUST have at least one component that is not a ref:`LiteralSequenceExpression`. CANNOT be composed from nested composed sequence expressions.", "additionalProperties": false, "type": "object", "properties": { @@ -870,7 +870,7 @@ } ] }, - "description": "An ordered list of SequenceExpression components comprising the expression." + "description": "An ordered list of SequenceExpression components comprising the expression." } }, "required": [ diff --git a/schema/vrs.yaml b/schema/vrs.yaml index f460fb52..543d03db 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -53,8 +53,8 @@ definitions: description: Where Allele is located state: oneOf: - - $ref: '#/definitions/SequenceState' - $ref: '#/definitions/SequenceExpression' + - $ref: '#/definitions/SequenceState' description: An expression of the sequence state deprecated: - $ref: '#/definitions/SequenceState' @@ -509,9 +509,9 @@ definitions: - seq_expr - type ComposedSequenceExpression: - description: An expression of a sequence composed from multiple other Sequence - Expressions objects. MUST have at least one component that is not a ref:`LiteralSequenceExpression`. - CANNOT be composed from nested composed sequence expressions. + description: An expression of a sequence composed from multiple other Sequence + Expressions objects. MUST have at least one component that is not a ref:`LiteralSequenceExpression`. + CANNOT be composed from nested composed sequence expressions. additionalProperties: false type: object properties: @@ -532,8 +532,8 @@ definitions: oneOf: - $ref: '#/definitions/RepeatedSequenceExpression' - $ref: '#/definitions/DerivedSequenceExpression' - description: An ordered list of SequenceExpression components comprising - the expression. + description: An ordered list of SequenceExpression components comprising the + expression. required: - components - type From f9765b44bf5749b1bd6e96fbf91a9ae226db7c3d Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Wed, 23 Mar 2022 21:09:18 -0400 Subject: [PATCH 11/83] closes 386 --- docs/source/impl-guide/computed_identifiers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/impl-guide/computed_identifiers.rst b/docs/source/impl-guide/computed_identifiers.rst index 2fb8662b..3aa7f4ae 100644 --- a/docs/source/impl-guide/computed_identifiers.rst +++ b/docs/source/impl-guide/computed_identifiers.rst @@ -193,7 +193,7 @@ Truncated Digest (sha512t24u) The sha512t24u truncated digest algorithm [Hart2020]_ computes an ASCII digest from binary data. The method uses two well-established standard algorithms, the `SHA-512`_ hash function, which generates a binary -digest from binary data, and `Base64`_ URL encoding, which encodes +digest from binary data, and `base64url`_ encoding, which encodes binary data using printable characters. Computing the sha512t24u truncated digest for binary data consists of From 15c42b88b213c8368ce4d2f2bfea32036077fae1 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Thu, 24 Mar 2022 10:47:00 -0400 Subject: [PATCH 12/83] second revision per https://github.com/ga4gh/vrs/pull/387\#issuecomment-1077616351 --- docs/source/impl-guide/computed_identifiers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/impl-guide/computed_identifiers.rst b/docs/source/impl-guide/computed_identifiers.rst index 3aa7f4ae..3e93b2a1 100644 --- a/docs/source/impl-guide/computed_identifiers.rst +++ b/docs/source/impl-guide/computed_identifiers.rst @@ -193,7 +193,7 @@ Truncated Digest (sha512t24u) The sha512t24u truncated digest algorithm [Hart2020]_ computes an ASCII digest from binary data. The method uses two well-established standard algorithms, the `SHA-512`_ hash function, which generates a binary -digest from binary data, and `base64url`_ encoding, which encodes +digest from binary data, and a URL-safe variant of `Base64`_ encoding, which encodes binary data using printable characters. Computing the sha512t24u truncated digest for binary data consists of From 67c47684eb035cd2c3665930bb309018b04b09ff Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Thu, 24 Mar 2022 10:57:21 -0400 Subject: [PATCH 13/83] add Sphinx version to requirements --- .requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.requirements.txt b/.requirements.txt index eeb24e5e..9f502d15 100644 --- a/.requirements.txt +++ b/.requirements.txt @@ -3,4 +3,5 @@ python-jsonschema-objects>=0.3,<=0.3.10 jsonschema==3.2.0 ipython pyyaml -ga4gh.gks.metaschema>=0.1.1 \ No newline at end of file +ga4gh.gks.metaschema>=0.1.1 +Sphinx==3.5.4 \ No newline at end of file From 9eae6839e73514ffedc6a3b4cc7c4fb110dd6734 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Thu, 24 Mar 2022 11:00:28 -0400 Subject: [PATCH 14/83] update python version --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 30994c13..f1c77f45 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -4,6 +4,6 @@ sphinx: configuration: docs/source/conf.py python: - version: "3.8" + version: "3.9" install: - requirements: docs/source/requirements.txt \ No newline at end of file From 6994599a66e89e294bad619ab29d2ff5c9335c11 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Thu, 24 Mar 2022 11:03:43 -0400 Subject: [PATCH 15/83] revert to 3.8 for RTD support --- .readthedocs.yaml | 2 +- .requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f1c77f45..30994c13 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -4,6 +4,6 @@ sphinx: configuration: docs/source/conf.py python: - version: "3.9" + version: "3.8" install: - requirements: docs/source/requirements.txt \ No newline at end of file diff --git a/.requirements.txt b/.requirements.txt index 9f502d15..65e0c4fb 100644 --- a/.requirements.txt +++ b/.requirements.txt @@ -4,4 +4,4 @@ jsonschema==3.2.0 ipython pyyaml ga4gh.gks.metaschema>=0.1.1 -Sphinx==3.5.4 \ No newline at end of file +sphinx ~= 3.5 \ No newline at end of file From 0b0c8807ecbffcfc45c1d5c2fc94cf379455b18c Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Thu, 24 Mar 2022 11:14:35 -0400 Subject: [PATCH 16/83] I think jinja2 broke stuff for RTD, fixing version --- docs/source/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt index ea95d523..01d3cb96 100644 --- a/docs/source/requirements.txt +++ b/docs/source/requirements.txt @@ -1 +1,2 @@ -sphinx ~= 3.5 \ No newline at end of file +sphinx ~= 3.5 +jinja2 == 3.0.3 \ No newline at end of file From b12347cabf0584e7a43b1ca7a2bf6807e20ce1aa Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 8 Apr 2022 20:45:31 -0400 Subject: [PATCH 17/83] Update docs/source/appendices/design_decisions.rst --- docs/source/appendices/design_decisions.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/appendices/design_decisions.rst b/docs/source/appendices/design_decisions.rst index a2c76306..14d6f151 100644 --- a/docs/source/appendices/design_decisions.rst +++ b/docs/source/appendices/design_decisions.rst @@ -32,8 +32,8 @@ Allele Rather than Variant The most primitive sequence assertion in VRS is the :ref:`Allele` entity. Colloquially, the words "allele" and "variant" have similar meanings and they are often used interchangeably. However, the VR -contributors believe that it is essential to distinguish the *state* of -the sequence from the *change between states* of a sequence. It is +contributors assert that it is essential to distinguish between the *state of* +a reference sequence from the *change from* a reference sequence. It is imperative that precise terms are used when modelling data. Therefore, within VRS, "Allele" refers to a state and "variant" refers to the change from one Allele to another. From 4bc9ab7c9da2e37cc5b27b6b0730f8bb63033a00 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 8 Apr 2022 20:45:44 -0400 Subject: [PATCH 18/83] Update docs/source/appendices/design_decisions.rst --- docs/source/appendices/design_decisions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/appendices/design_decisions.rst b/docs/source/appendices/design_decisions.rst index 14d6f151..4edef074 100644 --- a/docs/source/appendices/design_decisions.rst +++ b/docs/source/appendices/design_decisions.rst @@ -35,7 +35,7 @@ meanings and they are often used interchangeably. However, the VR contributors assert that it is essential to distinguish between the *state of* a reference sequence from the *change from* a reference sequence. It is imperative that precise terms are used when modelling data. Therefore, -within VRS, "Allele" refers to a state and "variant" refers to the change +within VRS, "allele" refers to a state of a reference sequence and "variant" refers to a change from one Allele to another. The word "variant", which implies change, makes it awkward to refer to From 99e001af6784a098c21e4067a6c8ec7367c64bf5 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 8 Apr 2022 20:46:01 -0400 Subject: [PATCH 19/83] Update docs/source/appendices/design_decisions.rst --- docs/source/appendices/design_decisions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/appendices/design_decisions.rst b/docs/source/appendices/design_decisions.rst index 4edef074..d81c6588 100644 --- a/docs/source/appendices/design_decisions.rst +++ b/docs/source/appendices/design_decisions.rst @@ -36,7 +36,7 @@ contributors assert that it is essential to distinguish between the *state of* a reference sequence from the *change from* a reference sequence. It is imperative that precise terms are used when modelling data. Therefore, within VRS, "allele" refers to a state of a reference sequence and "variant" refers to a change -from one Allele to another. +from a reference sequence. The word "variant", which implies change, makes it awkward to refer to the (unchanged) reference allele. Some systems will use an HGVS-like From 8dd547de08e40d367aca93fdbfbce73187ec45c8 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 8 Apr 2022 20:46:12 -0400 Subject: [PATCH 20/83] Update docs/source/appendices/design_decisions.rst --- docs/source/appendices/design_decisions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/appendices/design_decisions.rst b/docs/source/appendices/design_decisions.rst index d81c6588..b437cd24 100644 --- a/docs/source/appendices/design_decisions.rst +++ b/docs/source/appendices/design_decisions.rst @@ -84,7 +84,7 @@ VRS STRONGLY RECOMMENDS that Alleles be :ref:`normalized ` unless there is compelling reason to do otherwise. Those reasons are the subject of this section. -:ref:`Fully-justified Normalization ` is the process of +:ref:`Allele Normalization ` is the process of comparing a span of reference sequence to a sequence state (often the alternative sequence). Normalization consists of two steps: trimming and shuffling. In the trimming step, common flanking prefix and From 4ce25041cf8f2b75cd60668cc4651507439e9215 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 8 Apr 2022 20:46:25 -0400 Subject: [PATCH 21/83] Update docs/source/appendices/design_decisions.rst --- docs/source/appendices/design_decisions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/appendices/design_decisions.rst b/docs/source/appendices/design_decisions.rst index b437cd24..b42a04d5 100644 --- a/docs/source/appendices/design_decisions.rst +++ b/docs/source/appendices/design_decisions.rst @@ -86,7 +86,7 @@ otherwise. Those reasons are the subject of this section. :ref:`Allele Normalization ` is the process of comparing a span of reference sequence to a sequence state (often the -alternative sequence). Normalization consists of two steps: trimming +alternative sequence) and resolving that span to an unambiguous form. The fully-justified Allele normalization in VRS consists of two steps: trimming and shuffling. In the trimming step, common flanking prefix and suffix sequences are removed. For example, a CAG-to-CTG Allele would be trimmed to merely A-to-T, with the position adjusted accordingly. From c79382be679fba3e46d734d5ed62340fbfb6d92f Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 13 Apr 2022 16:25:27 -0400 Subject: [PATCH 22/83] Add default value for types + update readme for using smoketests --- README.md | 10 +++++----- schema/vrs-source.yaml | 20 ++++++++++++++++++++ schema/vrs.json | 20 ++++++++++++++++++++ schema/vrs.yaml | 20 ++++++++++++++++++++ 4 files changed, 65 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 97d44d3c..630ee4a0 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ initiatives, and diagnostic testing laboratories. VRS is licensed under the [Apache License 2.0](LICENSE). -> **NOTE:** VRS is under active development. See [VR +> **NOTE:** VRS is under active development. See [VR > Project Roadmap](https://github.com/orgs/ga4gh/projects/5). @@ -67,12 +67,12 @@ To watch for changes and update automatically: The VR specification documentation is written in reStructuredText and located in `docs/source/`. Commits to this repo are built -automatically at `vrs.ga4gh.org`. +automatically at `vrs.ga4gh.org`. To build documentation locally, type: make -C docs clean watch & - + Then, open `docs/build/html/index.html`. The above make command should build docs when source changes. (Some types of changes require recleaning and building.) @@ -95,7 +95,7 @@ The smoketests require python 3.8+. This is the recommended setup: $ python3 -m venv venv $ source venv/bin/activate $ pip install -U setuptools pip -$ pip install -r requirements.txt -$ pytest +$ pip install -r .requirements.txt +$ python3 -m pytest ``` diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index e2899ba0..2f8e5c6b 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -89,6 +89,7 @@ definitions: type: type: string const: "Allele" + default: "Allele" description: >- MUST be "Allele" location: @@ -116,6 +117,7 @@ definitions: type: type: string const: "Haplotype" + default: "Haplotype" description: >- MUST be "Haplotype" members: @@ -143,6 +145,7 @@ definitions: type: type: string const: "Text" + default: "Text" description: MUST be "Text" definition: type: string @@ -160,6 +163,7 @@ definitions: type: type: string const: "VariationSet" + default: "VariationSet" description: MUST be "VariationSet" members: type: array @@ -189,6 +193,7 @@ definitions: type: type: string const: "AbsoluteCopyNumber" + default: "AbsoluteCopyNumber" description: >- MUST be "AbsoluteCopyNumber" subject: @@ -254,6 +259,7 @@ definitions: type: type: string const: "RelativeCopyNumber" + default: "RelativeCopyNumber" description: >- MUST be "RelativeCopyNumber" subject: @@ -302,6 +308,7 @@ definitions: type: type: string const: "ChromosomeLocation" + default: "ChromosomeLocation" description: MUST be "ChromosomeLocation" species_id: $ref: "#/definitions/CURIE" @@ -330,6 +337,7 @@ definitions: type: type: string const: "SequenceLocation" + default: "SequenceLocation" description: MUST be "SequenceLocation" sequence_id: $ref: "#/definitions/CURIE" @@ -364,6 +372,7 @@ definitions: type: type: string const: "SequenceInterval" + default: "SequenceInterval" description: MUST be "SequenceInterval" start: oneOf: @@ -463,6 +472,7 @@ definitions: type: type: string const: "CytobandInterval" + default: "CytobandInterval" description: MUST be "CytobandInterval" start: $ref: "#/definitions/HumanCytoband" @@ -510,6 +520,7 @@ definitions: type: type: string const: "LiteralSequenceExpression" + default: "LiteralSequenceExpression" description: MUST be "LiteralSequenceExpression" sequence: $ref: "#/definitions/Sequence" @@ -530,6 +541,7 @@ definitions: type: type: string const: "DerivedSequenceExpression" + default: "DerivedSequenceExpression" description: MUST be "DerivedSequenceExpression" location: $ref: "#/definitions/SequenceLocation" @@ -551,6 +563,7 @@ definitions: type: type: string const: "RepeatedSequenceExpression" + default: "RepeatedSequenceExpression" description: MUST be "RepeatedSequenceExpression" seq_expr: oneOf: @@ -613,6 +626,7 @@ definitions: type: type: string const: "ComposedSequenceExpression" + default: "ComposedSequenceExpression" description: MUST be "ComposedSequenceExpression" components: type: array @@ -662,6 +676,7 @@ definitions: type: type: string const: "Gene" + default: "Gene" description: MUST be "Gene" gene_id: $ref: "#/definitions/CURIE" @@ -681,6 +696,7 @@ definitions: type: type: string const: "Number" + default: "Number" description: MUST be "Number" value: type: integer @@ -696,6 +712,7 @@ definitions: type: type: string const: "DefiniteRange" + default: "DefiniteRange" description: MUST be "DefiniteRange" min: type: number @@ -717,6 +734,7 @@ definitions: type: type: string const: "IndefiniteRange" + default: "IndefiniteRange" description: MUST be "IndefiniteRange" value: type: number @@ -793,6 +811,7 @@ definitions: type: type: string const: "SequenceState" + default: "SequenceState" description: MUST be "SequenceState" sequence: $ref: "#/definitions/Sequence" @@ -815,6 +834,7 @@ definitions: type: type: string const: "SimpleInterval" + default: "SimpleInterval" description: MUST be "SimpleInterval" start: type: integer diff --git a/schema/vrs.json b/schema/vrs.json index 1b2c3923..653092d0 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -74,6 +74,7 @@ "type": { "type": "string", "const": "Allele", + "default": "Allele", "description": "MUST be \"Allele\"" }, "location": { @@ -122,6 +123,7 @@ "type": { "type": "string", "const": "Haplotype", + "default": "Haplotype", "description": "MUST be \"Haplotype\"" }, "members": { @@ -158,6 +160,7 @@ "type": { "type": "string", "const": "Text", + "default": "Text", "description": "MUST be \"Text\"" }, "definition": { @@ -182,6 +185,7 @@ "type": { "type": "string", "const": "VariationSet", + "default": "VariationSet", "description": "MUST be \"VariationSet\"" }, "members": { @@ -218,6 +222,7 @@ "type": { "type": "string", "const": "AbsoluteCopyNumber", + "default": "AbsoluteCopyNumber", "description": "MUST be \"AbsoluteCopyNumber\"" }, "subject": { @@ -336,6 +341,7 @@ "type": { "type": "string", "const": "RelativeCopyNumber", + "default": "RelativeCopyNumber", "description": "MUST be \"RelativeCopyNumber\"" }, "subject": { @@ -399,6 +405,7 @@ "type": { "type": "string", "const": "ChromosomeLocation", + "default": "ChromosomeLocation", "description": "MUST be \"ChromosomeLocation\"" }, "species_id": { @@ -434,6 +441,7 @@ "type": { "type": "string", "const": "SequenceLocation", + "default": "SequenceLocation", "description": "MUST be \"SequenceLocation\"" }, "sequence_id": { @@ -466,6 +474,7 @@ "type": { "type": "string", "const": "SequenceInterval", + "default": "SequenceInterval", "description": "MUST be \"SequenceInterval\"" }, "start": { @@ -639,6 +648,7 @@ "type": { "type": "string", "const": "CytobandInterval", + "default": "CytobandInterval", "description": "MUST be \"CytobandInterval\"" }, "start": { @@ -689,6 +699,7 @@ "type": { "type": "string", "const": "LiteralSequenceExpression", + "default": "LiteralSequenceExpression", "description": "MUST be \"LiteralSequenceExpression\"" }, "sequence": { @@ -709,6 +720,7 @@ "type": { "type": "string", "const": "DerivedSequenceExpression", + "default": "DerivedSequenceExpression", "description": "MUST be \"DerivedSequenceExpression\"" }, "location": { @@ -734,6 +746,7 @@ "type": { "type": "string", "const": "RepeatedSequenceExpression", + "default": "RepeatedSequenceExpression", "description": "MUST be \"RepeatedSequenceExpression\"" }, "seq_expr": { @@ -841,6 +854,7 @@ "type": { "type": "string", "const": "ComposedSequenceExpression", + "default": "ComposedSequenceExpression", "description": "MUST be \"ComposedSequenceExpression\"" }, "components": { @@ -897,6 +911,7 @@ "type": { "type": "string", "const": "Gene", + "default": "Gene", "description": "MUST be \"Gene\"" }, "gene_id": { @@ -917,6 +932,7 @@ "type": { "type": "string", "const": "Number", + "default": "Number", "description": "MUST be \"Number\"" }, "value": { @@ -937,6 +953,7 @@ "type": { "type": "string", "const": "DefiniteRange", + "default": "DefiniteRange", "description": "MUST be \"DefiniteRange\"" }, "min": { @@ -962,6 +979,7 @@ "type": { "type": "string", "const": "IndefiniteRange", + "default": "IndefiniteRange", "description": "MUST be \"IndefiniteRange\"" }, "value": { @@ -1018,6 +1036,7 @@ "type": { "type": "string", "const": "SequenceState", + "default": "SequenceState", "description": "MUST be \"SequenceState\"" }, "sequence": { @@ -1043,6 +1062,7 @@ "type": { "type": "string", "const": "SimpleInterval", + "default": "SimpleInterval", "description": "MUST be \"SimpleInterval\"" }, "start": { diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 543d03db..424aa9f6 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -45,6 +45,7 @@ definitions: type: type: string const: Allele + default: Allele description: MUST be "Allele" location: oneOf: @@ -72,6 +73,7 @@ definitions: type: type: string const: Haplotype + default: Haplotype description: MUST be "Haplotype" members: type: array @@ -95,6 +97,7 @@ definitions: type: type: string const: Text + default: Text description: MUST be "Text" definition: type: string @@ -112,6 +115,7 @@ definitions: type: type: string const: VariationSet + default: VariationSet description: MUST be "VariationSet" members: type: array @@ -137,6 +141,7 @@ definitions: type: type: string const: AbsoluteCopyNumber + default: AbsoluteCopyNumber description: MUST be "AbsoluteCopyNumber" subject: oneOf: @@ -200,6 +205,7 @@ definitions: type: type: string const: RelativeCopyNumber + default: RelativeCopyNumber description: MUST be "RelativeCopyNumber" subject: oneOf: @@ -240,6 +246,7 @@ definitions: type: type: string const: ChromosomeLocation + default: ChromosomeLocation description: MUST be "ChromosomeLocation" species_id: $ref: '#/definitions/CURIE' @@ -267,6 +274,7 @@ definitions: type: type: string const: SequenceLocation + default: SequenceLocation description: MUST be "SequenceLocation" sequence_id: $ref: '#/definitions/CURIE' @@ -290,6 +298,7 @@ definitions: type: type: string const: SequenceInterval + default: SequenceInterval description: MUST be "SequenceInterval" start: oneOf: @@ -386,6 +395,7 @@ definitions: type: type: string const: CytobandInterval + default: CytobandInterval description: MUST be "CytobandInterval" start: $ref: '#/definitions/HumanCytoband' @@ -420,6 +430,7 @@ definitions: type: type: string const: LiteralSequenceExpression + default: LiteralSequenceExpression description: MUST be "LiteralSequenceExpression" sequence: $ref: '#/definitions/Sequence' @@ -439,6 +450,7 @@ definitions: type: type: string const: DerivedSequenceExpression + default: DerivedSequenceExpression description: MUST be "DerivedSequenceExpression" location: $ref: '#/definitions/SequenceLocation' @@ -459,6 +471,7 @@ definitions: type: type: string const: RepeatedSequenceExpression + default: RepeatedSequenceExpression description: MUST be "RepeatedSequenceExpression" seq_expr: oneOf: @@ -518,6 +531,7 @@ definitions: type: type: string const: ComposedSequenceExpression + default: ComposedSequenceExpression description: MUST be "ComposedSequenceExpression" components: type: array @@ -555,6 +569,7 @@ definitions: type: type: string const: Gene + default: Gene description: MUST be "Gene" gene_id: $ref: '#/definitions/CURIE' @@ -570,6 +585,7 @@ definitions: type: type: string const: Number + default: Number description: MUST be "Number" value: type: integer @@ -585,6 +601,7 @@ definitions: type: type: string const: DefiniteRange + default: DefiniteRange description: MUST be "DefiniteRange" min: type: number @@ -607,6 +624,7 @@ definitions: type: type: string const: IndefiniteRange + default: IndefiniteRange description: MUST be "IndefiniteRange" value: type: number @@ -663,6 +681,7 @@ definitions: type: type: string const: SequenceState + default: SequenceState description: MUST be "SequenceState" sequence: $ref: '#/definitions/Sequence' @@ -684,6 +703,7 @@ definitions: type: type: string const: SimpleInterval + default: SimpleInterval description: MUST be "SimpleInterval" start: type: integer From cb3e5d63c9761a60aff8d8d9eb879c70c1b0e6b0 Mon Sep 17 00:00:00 2001 From: Michael Baudis Date: Fri, 1 Jul 2022 12:08:54 +0200 Subject: [PATCH 23/83] C & P error fix in CytobandInterval The description for `end` used `start` ... No code change. --- schema/vrs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 424aa9f6..7d87125f 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -403,7 +403,7 @@ definitions: end (telomere) of the chromosome p-arm than `end`. end: $ref: '#/definitions/HumanCytoband' - description: The start cytoband region. MUST specify a region nearer the terminal + description: The end cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`. example: type: CytobandInterval From 1645e77b743640efc430504ab9adb0fffafb90fc Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 11 Jul 2022 16:39:11 -0500 Subject: [PATCH 24/83] restricting Haplotypes to 2+ members (Tristan) --- schema/vrs-source.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 2f8e5c6b..9c482af0 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -122,7 +122,7 @@ definitions: MUST be "Haplotype" members: type: array - minItems: 1 + minItems: 2 uniqueItems: true items: oneOf: From 7364d0199825da1c2b3f8274e77652bd59f9fb24 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 11 Jul 2022 16:39:11 -0500 Subject: [PATCH 25/83] restricting Haplotypes to 2+ members (Tristan) --- schema/vrs-source.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 2b450cb9..6bc3e1ae 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -119,7 +119,7 @@ definitions: MUST be "Haplotype" members: type: array - minItems: 1 + minItems: 2 uniqueItems: true items: oneOf: From 26dfffc8249bfa8baaf4714721f6d211edb1d152 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 11 Jul 2022 17:31:21 -0500 Subject: [PATCH 26/83] add Genotype --- docs/source/terms_and_model.rst | 11 ++++ schema/defs/vrs/Genotype.rst | 34 ++++++++++++ schema/defs/vrs/GenotypeMember.rst | 28 ++++++++++ schema/defs/vrs/Haplotype.rst | 2 +- schema/vrs-source.yaml | 63 +++++++++++++++++++++++ schema/vrs.json | 83 +++++++++++++++++++++++++++++- schema/vrs.yaml | 56 +++++++++++++++++++- 7 files changed, 274 insertions(+), 3 deletions(-) create mode 100644 schema/defs/vrs/Genotype.rst create mode 100644 schema/defs/vrs/GenotypeMember.rst diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 0e51b09d..cb2e1614 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -407,6 +407,12 @@ Two, three, or four total copies of BRCA1: "type": "CopyNumber" } +.. _Genotype: + +Genotype +$$$$$$$$ + +.. include:: defs/Genotype.rst .. _UtilityVariation: @@ -1045,6 +1051,11 @@ This value is equivalent to the concept of "equal to or greater than "value": 22 } +GenotypeMember +############## + +.. include:: defs/GenotypeMember.rst + Primitives @@@@@@@@@@ diff --git a/schema/defs/vrs/Genotype.rst b/schema/defs/vrs/Genotype.rst new file mode 100644 index 00000000..3f5a0f1a --- /dev/null +++ b/schema/defs/vrs/Genotype.rst @@ -0,0 +1,34 @@ +**Computational Definition** + +A set of trans-phased :ref:`MolecularVariation` members, with associated copy counts, across a specified number of genomic locus `copies`. + +**Information Model** + +Some Genotype attributes are inherited from :ref:`Variation`. + +.. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. + * - type + - string + - 1..1 + - MUST be "Genotype" + * - members + - :ref:`GenotypeMember` + - 1..m + - Each GenotypeMember in `members` describes a :ref:`MolecularVariation` and the count of that variation at the locus. + * - copies + - :ref:`Number` | :ref:`IndefiniteRange` | :ref:`DefiniteRange` + - 1..1 + - The total number of copies of all :ref:`MolecularVariation` at this locus, MUST be greater than or equal to the sum of :ref:`GenotypeMember` copy counts. If greater than the total counts, this implies additional :ref:`MolecularVariation` that are expected to exist but are not explicitly indicated. diff --git a/schema/defs/vrs/GenotypeMember.rst b/schema/defs/vrs/GenotypeMember.rst new file mode 100644 index 00000000..afd50939 --- /dev/null +++ b/schema/defs/vrs/GenotypeMember.rst @@ -0,0 +1,28 @@ +**Computational Definition** + +A class describing a :ref:`Genotype` `member`. + +**Information Model** + +.. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - type + - string + - 0..1 + - MUST be "GenotypeMember". + * - copies + - :ref:`Number` | :ref:`IndefiniteRange` | :ref:`DefiniteRange` + - 1..1 + - The number of copies of the `variation` at a :ref:`Genotype` locus. + * - variation + - :ref:`MolecularVariation` + - 1..1 + - A :ref:`MolecularVariation` at a :ref:`Genotype` locus. diff --git a/schema/defs/vrs/Haplotype.rst b/schema/defs/vrs/Haplotype.rst index c36e22f4..6202690a 100644 --- a/schema/defs/vrs/Haplotype.rst +++ b/schema/defs/vrs/Haplotype.rst @@ -26,5 +26,5 @@ Some Haplotype attributes are inherited from :ref:`Variation`. - MUST be "Haplotype" * - members - :ref:`Allele` | :ref:`CURIE` - - 1..m + - 2..m - List of Alleles, or references to Alleles, that comprise this Haplotype. diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 6bc3e1ae..3c2d0953 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -68,6 +68,7 @@ definitions: a genome, sample, or homologous chromosomes. oneOf: - $ref: "#/definitions/CopyNumber" + - $ref: "#/definitions/Genotype" discriminator: propertyName: type @@ -240,6 +241,42 @@ definitions: required: [ "subject", "copies" ] + Genotype: + description: >- + A set of trans-phased :ref:`MolecularVariation` members, with associated + copy counts, across a specified number of genomic locus `copies`. + additionalProperties: false + type: object + properties: + type: + type: string + const: "Genotype" + description: >- + MUST be "Genotype" + members: + type: array + uniqueItems: true + minItems: 1 + items: + $ref: "#/definitions/GenotypeMember" + description: >- + Each GenotypeMember in `members` describes a :ref:`MolecularVariation` + and the count of that variation at the locus. + copies: + oneOf: + - $ref: "#/definitions/Number" + - $ref: "#/definitions/IndefiniteRange" + - $ref: "#/definitions/DefiniteRange" + description: >- + The total number of copies of all :ref:`MolecularVariation` at this locus, + MUST be greater than or equal to the sum of :ref:`GenotypeMember` copy counts. + If greater than the total counts, this implies additional + :ref:`MolecularVariation` that are expected to exist but are not explicitly + indicated. + required: [ "members", "copies" ] + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Locations @@ -568,6 +605,32 @@ definitions: minimum: 0 required: [ "seq_expr", "count" ] + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Nested Classes + + GenotypeMember: + description: >- + A class describing a :ref:`Genotype` `member`. + type: object + additionalProperties: false + properties: + type: + type: string + const: "GenotypeMember" + description: MUST be "GenotypeMember". + copies: + oneOf: + - $ref: "#/definitions/Number" + - $ref: "#/definitions/IndefiniteRange" + - $ref: "#/definitions/DefiniteRange" + description: >- + The number of copies of the `variation` at a :ref:`Genotype` locus. + variation: + $ref: "#/definitions/MolecularVariation" + description: >- + A :ref:`MolecularVariation` at a :ref:`Genotype` locus. + required: [ "copies", "variation" ] + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Feature diff --git a/schema/vrs.json b/schema/vrs.json index e1ee9510..c0076020 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -53,6 +53,9 @@ "oneOf": [ { "$ref": "#/definitions/CopyNumber" + }, + { + "$ref": "#/definitions/Genotype" } ], "discriminator": { @@ -123,7 +126,7 @@ }, "members": { "type": "array", - "minItems": 1, + "minItems": 2, "uniqueItems": true, "items": { "oneOf": [ @@ -319,6 +322,50 @@ "type" ] }, + "Genotype": { + "description": "A set of trans-phased MolecularVariation members, with associated copy counts, across a specified number of genomic locus `copies`.", + "additionalProperties": false, + "type": "object", + "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Variation Id. MUST be unique within document." + }, + "type": { + "type": "string", + "const": "Genotype", + "description": "MUST be \"Genotype\"" + }, + "members": { + "type": "array", + "uniqueItems": true, + "minItems": 1, + "items": { + "$ref": "#/definitions/GenotypeMember" + }, + "description": "Each GenotypeMember in `members` describes a MolecularVariation and the count of that variation at the locus." + }, + "copies": { + "oneOf": [ + { + "$ref": "#/definitions/Number" + }, + { + "$ref": "#/definitions/IndefiniteRange" + }, + { + "$ref": "#/definitions/DefiniteRange" + } + ], + "description": "The total number of copies of all MolecularVariation at this locus, MUST be greater than or equal to the sum of GenotypeMember copy counts. If greater than the total counts, this implies additional MolecularVariation that are expected to exist but are not explicitly indicated." + } + }, + "required": [ + "copies", + "members", + "type" + ] + }, "Location": { "description": "A contiguous segment of a biological sequence.", "oneOf": [ @@ -776,6 +823,40 @@ "type" ] }, + "GenotypeMember": { + "description": "A class describing a Genotype `member`.", + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "const": "GenotypeMember", + "description": "MUST be \"GenotypeMember\"." + }, + "copies": { + "oneOf": [ + { + "$ref": "#/definitions/Number" + }, + { + "$ref": "#/definitions/IndefiniteRange" + }, + { + "$ref": "#/definitions/DefiniteRange" + } + ], + "description": "The number of copies of the `variation` at a Genotype locus." + }, + "variation": { + "$ref": "#/definitions/MolecularVariation", + "description": "A MolecularVariation at a Genotype locus." + } + }, + "required": [ + "copies", + "variation" + ] + }, "Feature": { "description": "A named entity that can be mapped to a Location. Genes, protein domains, exons, and chromosomes are some examples of common biological entities that may be Features.", "oneOf": [ diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 5b7f0daf..7461b5d9 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -31,6 +31,7 @@ definitions: a genome, sample, or homologous chromosomes. oneOf: - $ref: '#/definitions/CopyNumber' + - $ref: '#/definitions/Genotype' discriminator: propertyName: type Allele: @@ -74,7 +75,7 @@ definitions: description: MUST be "Haplotype" members: type: array - minItems: 1 + minItems: 2 uniqueItems: true items: oneOf: @@ -186,6 +187,38 @@ definitions: - copies - subject - type + Genotype: + description: A set of trans-phased MolecularVariation members, with associated + copy counts, across a specified number of genomic locus `copies`. + additionalProperties: false + type: object + properties: + _id: *id001 + type: + type: string + const: Genotype + description: MUST be "Genotype" + members: + type: array + uniqueItems: true + minItems: 1 + items: + $ref: '#/definitions/GenotypeMember' + description: Each GenotypeMember in `members` describes a MolecularVariation + and the count of that variation at the locus. + copies: + oneOf: + - $ref: '#/definitions/Number' + - $ref: '#/definitions/IndefiniteRange' + - $ref: '#/definitions/DefiniteRange' + description: The total number of copies of all MolecularVariation at this + locus, MUST be greater than or equal to the sum of GenotypeMember copy counts. + If greater than the total counts, this implies additional MolecularVariation + that are expected to exist but are not explicitly indicated. + required: + - copies + - members + - type Location: description: A contiguous segment of a biological sequence. oneOf: @@ -471,6 +504,27 @@ definitions: - count - seq_expr - type + GenotypeMember: + description: A class describing a Genotype `member`. + type: object + additionalProperties: false + properties: + type: + type: string + const: GenotypeMember + description: MUST be "GenotypeMember". + copies: + oneOf: + - $ref: '#/definitions/Number' + - $ref: '#/definitions/IndefiniteRange' + - $ref: '#/definitions/DefiniteRange' + description: The number of copies of the `variation` at a Genotype locus. + variation: + $ref: '#/definitions/MolecularVariation' + description: A MolecularVariation at a Genotype locus. + required: + - copies + - variation Feature: description: A named entity that can be mapped to a Location. Genes, protein domains, exons, and chromosomes are some examples of common biological entities that From c4192bf0d46bacbf926cec0850eb45828fc89acb Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Tue, 12 Jul 2022 16:46:13 -0500 Subject: [PATCH 27/83] add defaults --- schema/vrs-source.yaml | 18 ++++++++++++++++++ schema/vrs.json | 18 ++++++++++++++++++ schema/vrs.yaml | 18 ++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 3c2d0953..c0168b82 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -89,6 +89,7 @@ definitions: type: type: string const: "Allele" + default: "Allele" description: >- MUST be "Allele" location: @@ -116,6 +117,7 @@ definitions: type: type: string const: "Haplotype" + default: "Haplotype" description: >- MUST be "Haplotype" members: @@ -143,6 +145,7 @@ definitions: type: type: string const: "Text" + default: "Text" description: MUST be "Text" definition: type: string @@ -160,6 +163,7 @@ definitions: type: type: string const: "VariationSet" + default: "VariationSet" description: MUST be "VariationSet" members: type: array @@ -188,6 +192,7 @@ definitions: type: type: string const: "CopyNumber" + default: "CopyNumber" description: >- MUST be "CopyNumber" subject: @@ -251,6 +256,7 @@ definitions: type: type: string const: "Genotype" + default: "Genotype" description: >- MUST be "Genotype" members: @@ -308,6 +314,7 @@ definitions: type: type: string const: "ChromosomeLocation" + default: "ChromosomeLocation" description: MUST be "ChromosomeLocation" species_id: $ref: "#/definitions/CURIE" @@ -336,6 +343,7 @@ definitions: type: type: string const: "SequenceLocation" + default: "SequenceLocation" description: MUST be "SequenceLocation" sequence_id: $ref: "#/definitions/CURIE" @@ -370,6 +378,7 @@ definitions: type: type: string const: "SequenceInterval" + default: "SequenceInterval" description: MUST be "SequenceInterval" start: oneOf: @@ -469,6 +478,7 @@ definitions: type: type: string const: "CytobandInterval" + default: "CytobandInterval" description: MUST be "CytobandInterval" start: $ref: "#/definitions/HumanCytoband" @@ -515,6 +525,7 @@ definitions: type: type: string const: "LiteralSequenceExpression" + default: "LiteralSequenceExpression" description: MUST be "LiteralSequenceExpression" sequence: $ref: "#/definitions/Sequence" @@ -535,6 +546,7 @@ definitions: type: type: string const: "DerivedSequenceExpression" + default: "DerivedSequenceExpression" description: MUST be "DerivedSequenceExpression" location: $ref: "#/definitions/SequenceLocation" @@ -556,6 +568,7 @@ definitions: type: type: string const: "RepeatedSequenceExpression" + default: "RepeatedSequenceExpression" description: MUST be "RepeatedSequenceExpression" seq_expr: oneOf: @@ -617,6 +630,7 @@ definitions: type: type: string const: "GenotypeMember" + default: "GenotypeMember" description: MUST be "GenotypeMember". copies: oneOf: @@ -661,6 +675,7 @@ definitions: type: type: string const: "Gene" + default: "Gene" description: MUST be "Gene" gene_id: $ref: "#/definitions/CURIE" @@ -680,6 +695,7 @@ definitions: type: type: string const: "Number" + default: "Number" description: MUST be "Number" value: type: integer @@ -695,6 +711,7 @@ definitions: type: type: string const: "DefiniteRange" + default: "DefiniteRange" description: MUST be "DefiniteRange" min: type: number @@ -716,6 +733,7 @@ definitions: type: type: string const: "IndefiniteRange" + default: "IndefiniteRange" description: MUST be "IndefiniteRange" value: type: number diff --git a/schema/vrs.json b/schema/vrs.json index c0076020..d5ecb96e 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -74,6 +74,7 @@ "type": { "type": "string", "const": "Allele", + "default": "Allele", "description": "MUST be \"Allele\"" }, "location": { @@ -122,6 +123,7 @@ "type": { "type": "string", "const": "Haplotype", + "default": "Haplotype", "description": "MUST be \"Haplotype\"" }, "members": { @@ -158,6 +160,7 @@ "type": { "type": "string", "const": "Text", + "default": "Text", "description": "MUST be \"Text\"" }, "definition": { @@ -182,6 +185,7 @@ "type": { "type": "string", "const": "VariationSet", + "default": "VariationSet", "description": "MUST be \"VariationSet\"" }, "members": { @@ -217,6 +221,7 @@ "type": { "type": "string", "const": "CopyNumber", + "default": "CopyNumber", "description": "MUST be \"CopyNumber\"" }, "subject": { @@ -334,6 +339,7 @@ "type": { "type": "string", "const": "Genotype", + "default": "Genotype", "description": "MUST be \"Genotype\"" }, "members": { @@ -392,6 +398,7 @@ "type": { "type": "string", "const": "ChromosomeLocation", + "default": "ChromosomeLocation", "description": "MUST be \"ChromosomeLocation\"" }, "species_id": { @@ -427,6 +434,7 @@ "type": { "type": "string", "const": "SequenceLocation", + "default": "SequenceLocation", "description": "MUST be \"SequenceLocation\"" }, "sequence_id": { @@ -459,6 +467,7 @@ "type": { "type": "string", "const": "SequenceInterval", + "default": "SequenceInterval", "description": "MUST be \"SequenceInterval\"" }, "start": { @@ -632,6 +641,7 @@ "type": { "type": "string", "const": "CytobandInterval", + "default": "CytobandInterval", "description": "MUST be \"CytobandInterval\"" }, "start": { @@ -679,6 +689,7 @@ "type": { "type": "string", "const": "LiteralSequenceExpression", + "default": "LiteralSequenceExpression", "description": "MUST be \"LiteralSequenceExpression\"" }, "sequence": { @@ -699,6 +710,7 @@ "type": { "type": "string", "const": "DerivedSequenceExpression", + "default": "DerivedSequenceExpression", "description": "MUST be \"DerivedSequenceExpression\"" }, "location": { @@ -724,6 +736,7 @@ "type": { "type": "string", "const": "RepeatedSequenceExpression", + "default": "RepeatedSequenceExpression", "description": "MUST be \"RepeatedSequenceExpression\"" }, "seq_expr": { @@ -831,6 +844,7 @@ "type": { "type": "string", "const": "GenotypeMember", + "default": "GenotypeMember", "description": "MUST be \"GenotypeMember\"." }, "copies": { @@ -876,6 +890,7 @@ "type": { "type": "string", "const": "Gene", + "default": "Gene", "description": "MUST be \"Gene\"" }, "gene_id": { @@ -896,6 +911,7 @@ "type": { "type": "string", "const": "Number", + "default": "Number", "description": "MUST be \"Number\"" }, "value": { @@ -916,6 +932,7 @@ "type": { "type": "string", "const": "DefiniteRange", + "default": "DefiniteRange", "description": "MUST be \"DefiniteRange\"" }, "min": { @@ -941,6 +958,7 @@ "type": { "type": "string", "const": "IndefiniteRange", + "default": "IndefiniteRange", "description": "MUST be \"IndefiniteRange\"" }, "value": { diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 7461b5d9..94d972b1 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -45,6 +45,7 @@ definitions: type: type: string const: Allele + default: Allele description: MUST be "Allele" location: oneOf: @@ -72,6 +73,7 @@ definitions: type: type: string const: Haplotype + default: Haplotype description: MUST be "Haplotype" members: type: array @@ -95,6 +97,7 @@ definitions: type: type: string const: Text + default: Text description: MUST be "Text" definition: type: string @@ -112,6 +115,7 @@ definitions: type: type: string const: VariationSet + default: VariationSet description: MUST be "VariationSet" members: type: array @@ -136,6 +140,7 @@ definitions: type: type: string const: CopyNumber + default: CopyNumber description: MUST be "CopyNumber" subject: oneOf: @@ -197,6 +202,7 @@ definitions: type: type: string const: Genotype + default: Genotype description: MUST be "Genotype" members: type: array @@ -237,6 +243,7 @@ definitions: type: type: string const: ChromosomeLocation + default: ChromosomeLocation description: MUST be "ChromosomeLocation" species_id: $ref: '#/definitions/CURIE' @@ -264,6 +271,7 @@ definitions: type: type: string const: SequenceLocation + default: SequenceLocation description: MUST be "SequenceLocation" sequence_id: $ref: '#/definitions/CURIE' @@ -287,6 +295,7 @@ definitions: type: type: string const: SequenceInterval + default: SequenceInterval description: MUST be "SequenceInterval" start: oneOf: @@ -383,6 +392,7 @@ definitions: type: type: string const: CytobandInterval + default: CytobandInterval description: MUST be "CytobandInterval" start: $ref: '#/definitions/HumanCytoband' @@ -416,6 +426,7 @@ definitions: type: type: string const: LiteralSequenceExpression + default: LiteralSequenceExpression description: MUST be "LiteralSequenceExpression" sequence: $ref: '#/definitions/Sequence' @@ -435,6 +446,7 @@ definitions: type: type: string const: DerivedSequenceExpression + default: DerivedSequenceExpression description: MUST be "DerivedSequenceExpression" location: $ref: '#/definitions/SequenceLocation' @@ -455,6 +467,7 @@ definitions: type: type: string const: RepeatedSequenceExpression + default: RepeatedSequenceExpression description: MUST be "RepeatedSequenceExpression" seq_expr: oneOf: @@ -512,6 +525,7 @@ definitions: type: type: string const: GenotypeMember + default: GenotypeMember description: MUST be "GenotypeMember". copies: oneOf: @@ -543,6 +557,7 @@ definitions: type: type: string const: Gene + default: Gene description: MUST be "Gene" gene_id: $ref: '#/definitions/CURIE' @@ -558,6 +573,7 @@ definitions: type: type: string const: Number + default: Number description: MUST be "Number" value: type: integer @@ -573,6 +589,7 @@ definitions: type: type: string const: DefiniteRange + default: DefiniteRange description: MUST be "DefiniteRange" min: type: number @@ -595,6 +612,7 @@ definitions: type: type: string const: IndefiniteRange + default: IndefiniteRange description: MUST be "IndefiniteRange" value: type: number From 7d6582c070591e4a5cc263c5f22c6303c65b2580 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Tue, 12 Jul 2022 17:11:09 -0500 Subject: [PATCH 28/83] squash that bug --- schema/defs/vrs/GenotypeMember.rst | 2 +- schema/vrs-source.yaml | 4 +++- schema/vrs.json | 9 ++++++++- schema/vrs.yaml | 4 +++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/schema/defs/vrs/GenotypeMember.rst b/schema/defs/vrs/GenotypeMember.rst index afd50939..05a0a7a8 100644 --- a/schema/defs/vrs/GenotypeMember.rst +++ b/schema/defs/vrs/GenotypeMember.rst @@ -23,6 +23,6 @@ A class describing a :ref:`Genotype` `member`. - 1..1 - The number of copies of the `variation` at a :ref:`Genotype` locus. * - variation - - :ref:`MolecularVariation` + - :ref:`Allele` | :ref:`Haplotype` - 1..1 - A :ref:`MolecularVariation` at a :ref:`Genotype` locus. diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index c0168b82..0058c3bd 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -640,7 +640,9 @@ definitions: description: >- The number of copies of the `variation` at a :ref:`Genotype` locus. variation: - $ref: "#/definitions/MolecularVariation" + oneOf: + - $ref: "#/definitions/Allele" + - $ref: "#/definitions/Haplotype" description: >- A :ref:`MolecularVariation` at a :ref:`Genotype` locus. required: [ "copies", "variation" ] diff --git a/schema/vrs.json b/schema/vrs.json index d5ecb96e..122e9a33 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -862,7 +862,14 @@ "description": "The number of copies of the `variation` at a Genotype locus." }, "variation": { - "$ref": "#/definitions/MolecularVariation", + "oneOf": [ + { + "$ref": "#/definitions/Allele" + }, + { + "$ref": "#/definitions/Haplotype" + } + ], "description": "A MolecularVariation at a Genotype locus." } }, diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 94d972b1..195af8e2 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -534,7 +534,9 @@ definitions: - $ref: '#/definitions/DefiniteRange' description: The number of copies of the `variation` at a Genotype locus. variation: - $ref: '#/definitions/MolecularVariation' + oneOf: + - $ref: '#/definitions/Allele' + - $ref: '#/definitions/Haplotype' description: A MolecularVariation at a Genotype locus. required: - copies From 9dc4b3bd72f6a9705c0d6abb71b05b2f51114466 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Tue, 12 Jul 2022 17:11:09 -0500 Subject: [PATCH 29/83] fixed genotype molecularvariation construction error --- schema/defs/vrs/GenotypeMember.rst | 2 +- schema/vrs-source.yaml | 4 +++- schema/vrs.json | 9 ++++++++- schema/vrs.yaml | 4 +++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/schema/defs/vrs/GenotypeMember.rst b/schema/defs/vrs/GenotypeMember.rst index afd50939..05a0a7a8 100644 --- a/schema/defs/vrs/GenotypeMember.rst +++ b/schema/defs/vrs/GenotypeMember.rst @@ -23,6 +23,6 @@ A class describing a :ref:`Genotype` `member`. - 1..1 - The number of copies of the `variation` at a :ref:`Genotype` locus. * - variation - - :ref:`MolecularVariation` + - :ref:`Allele` | :ref:`Haplotype` - 1..1 - A :ref:`MolecularVariation` at a :ref:`Genotype` locus. diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index c0168b82..0058c3bd 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -640,7 +640,9 @@ definitions: description: >- The number of copies of the `variation` at a :ref:`Genotype` locus. variation: - $ref: "#/definitions/MolecularVariation" + oneOf: + - $ref: "#/definitions/Allele" + - $ref: "#/definitions/Haplotype" description: >- A :ref:`MolecularVariation` at a :ref:`Genotype` locus. required: [ "copies", "variation" ] diff --git a/schema/vrs.json b/schema/vrs.json index d5ecb96e..122e9a33 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -862,7 +862,14 @@ "description": "The number of copies of the `variation` at a Genotype locus." }, "variation": { - "$ref": "#/definitions/MolecularVariation", + "oneOf": [ + { + "$ref": "#/definitions/Allele" + }, + { + "$ref": "#/definitions/Haplotype" + } + ], "description": "A MolecularVariation at a Genotype locus." } }, diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 94d972b1..195af8e2 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -534,7 +534,9 @@ definitions: - $ref: '#/definitions/DefiniteRange' description: The number of copies of the `variation` at a Genotype locus. variation: - $ref: '#/definitions/MolecularVariation' + oneOf: + - $ref: '#/definitions/Allele' + - $ref: '#/definitions/Haplotype' description: A MolecularVariation at a Genotype locus. required: - copies From fd7a3b8ad502f2c2e077c68e7de34124156f7465 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Tue, 12 Jul 2022 17:16:27 -0500 Subject: [PATCH 30/83] genotype prefix --- schema/ga4gh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/ga4gh.yaml b/schema/ga4gh.yaml index 054d8d24..21a25220 100644 --- a/schema/ga4gh.yaml +++ b/schema/ga4gh.yaml @@ -23,7 +23,7 @@ identifiers: Allele: VA VariationSet: VS Text: VT - # Genotype: VG + Genotype: GT Haplotype: VH CopyNumber: VCN From 72e0933406139128139717a9f704167a19e96f8b Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Sat, 23 Jul 2022 22:17:23 -0400 Subject: [PATCH 31/83] use strict mode from gks.metaschema --- schema/vrs-source.yaml | 25 +------------ schema/vrs.json | 84 ++++++++++++++++++++---------------------- schema/vrs.yaml | 44 ++++++++++------------ 3 files changed, 61 insertions(+), 92 deletions(-) diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 0058c3bd..f22449c5 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -11,6 +11,7 @@ $schema: "http://json-schema.org/draft-07/schema" title: "GA4GH-VRS-Definitions" type: object +strict: true definitions: # VRS definitions are presented top-down. Everything rolls up to @@ -83,7 +84,6 @@ definitions: Allele: description: >- The state of a molecule at a :ref:`Location`. - additionalProperties: false type: object properties: type: @@ -111,7 +111,6 @@ definitions: Haplotype: description: >- A set of non-overlapping :ref:`Allele` members that co-occur on the same molecule. - additionalProperties: false type: "object" properties: type: @@ -139,7 +138,6 @@ definitions: Text: description: >- A free-text definition of variation. - additionalProperties: false type: object properties: type: @@ -158,7 +156,6 @@ definitions: description: >- An unconstrained set of Variation members. type: object - additionalProperties: false properties: type: type: string @@ -182,7 +179,6 @@ definitions: # SystemicVariation CopyNumber: - additionalProperties: false type: object description: >- The absolute count of discrete copies of a :ref:`MolecularVariation`, @@ -250,7 +246,6 @@ definitions: description: >- A set of trans-phased :ref:`MolecularVariation` members, with associated copy counts, across a specified number of genomic locus `copies`. - additionalProperties: false type: object properties: type: @@ -306,7 +301,6 @@ definitions: propertyName: type ChromosomeLocation: - additionalProperties: false description: >- A Location on a chromosome defined by a species and chromosome name. type: object @@ -335,7 +329,6 @@ definitions: required: [ "species_id", "chr", "interval" ] SequenceLocation: - additionalProperties: false description: >- A :ref:`Location` defined by an interval on a referenced :ref:`Sequence`. type: object @@ -373,7 +366,6 @@ definitions: always represented by contiguous spans using interbase coordinates or coordinate ranges. type: object - additionalProperties: false properties: type: type: string @@ -473,7 +465,6 @@ definitions: The span includes the constituent regions described by the start and end cytobands, as well as any intervening regions. type: object - additionalProperties: false properties: type: type: string @@ -520,7 +511,6 @@ definitions: description: >- An explicit expression of a Sequence. type: object - additionalProperties: false properties: type: type: string @@ -541,7 +531,6 @@ definitions: large regions in contexts where the use of an approximate sequence is inconsequential. type: object - additionalProperties: false properties: type: type: string @@ -562,7 +551,6 @@ definitions: RepeatedSequenceExpression: description: >- An expression of a sequence comprised of a tandem repeating subsequence. - additionalProperties: false type: object properties: type: @@ -625,7 +613,6 @@ definitions: description: >- A class describing a :ref:`Genotype` `member`. type: object - additionalProperties: false properties: type: type: string @@ -672,7 +659,6 @@ definitions: the use of `hgnc `_ as the gene authority is RECOMMENDED. type: object - additionalProperties: false properties: type: type: string @@ -692,7 +678,6 @@ definitions: description: >- A simple integer value as a VRS class. type: object - additionalProperties: false properties: type: type: string @@ -708,7 +693,6 @@ definitions: description: >- A bounded, inclusive range of numbers. type: object - additionalProperties: false properties: type: type: string @@ -730,7 +714,6 @@ definitions: '>=' are all numbers greater than and including `value`, '<=' are all numbers less than and including `value`. type: object - additionalProperties: false properties: type: type: string @@ -755,7 +738,6 @@ definitions: # ============================================================================= CURIE: - additionalProperties: false description: >- A `W3C Compact URI `_ formatted string. A CURIE string has the structure ``prefix``:``reference``, as defined by @@ -765,7 +747,6 @@ definitions: example: "ensembl:ENSG00000139618" HumanCytoband: - additionalProperties: false description: >- A character string representing cytobands derived from the *International System for Human Cytogenomic Nomenclature* (ISCN) @@ -775,7 +756,6 @@ definitions: example: "q22.3" Residue: - additionalProperties: false description: >- A character representing a specific residue (i.e., molecular species) or groupings of these ("ambiguity codes"), using `one-letter IUPAC @@ -785,7 +765,6 @@ definitions: pattern: '[A-Z*\-]' Sequence: - additionalProperties: false description: >- A character string of :ref:`Residues ` that represents a biological sequence using the conventional sequence order (5’-to-3’ for @@ -806,7 +785,6 @@ definitions: to use for representing "ref-alt" style variation, including SNVs, MNVs, del, ins, and delins. This class is deprecated. Use :ref:`LiteralSequenceExpression` instead. - additionalProperties: false type: object properties: type: @@ -828,7 +806,6 @@ definitions: always represented by contiguous spans using interbase coordinates. This class is deprecated. Use SequenceInterval instead. - additionalProperties: false type: object properties: type: diff --git a/schema/vrs.json b/schema/vrs.json index 122e9a33..d101e368 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -64,7 +64,6 @@ }, "Allele": { "description": "The state of a molecule at a Location.", - "additionalProperties": false, "type": "object", "properties": { "_id": { @@ -109,11 +108,11 @@ "location", "state", "type" - ] + ], + "additionalProperties": false }, "Haplotype": { "description": "A set of non-overlapping Allele members that co-occur on the same molecule.", - "additionalProperties": false, "type": "object", "properties": { "_id": { @@ -146,11 +145,11 @@ "required": [ "members", "type" - ] + ], + "additionalProperties": false }, "Text": { "description": "A free-text definition of variation.", - "additionalProperties": false, "type": "object", "properties": { "_id": { @@ -171,12 +170,12 @@ "required": [ "definition", "type" - ] + ], + "additionalProperties": false }, "VariationSet": { "description": "An unconstrained set of Variation members.", "type": "object", - "additionalProperties": false, "properties": { "_id": { "$ref": "#/definitions/CURIE", @@ -207,10 +206,10 @@ "required": [ "members", "type" - ] + ], + "additionalProperties": false }, "CopyNumber": { - "additionalProperties": false, "type": "object", "description": "The absolute count of discrete copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.).", "properties": { @@ -325,11 +324,11 @@ "copies", "subject", "type" - ] + ], + "additionalProperties": false }, "Genotype": { "description": "A set of trans-phased MolecularVariation members, with associated copy counts, across a specified number of genomic locus `copies`.", - "additionalProperties": false, "type": "object", "properties": { "_id": { @@ -370,7 +369,8 @@ "copies", "members", "type" - ] + ], + "additionalProperties": false }, "Location": { "description": "A contiguous segment of a biological sequence.", @@ -387,7 +387,6 @@ } }, "ChromosomeLocation": { - "additionalProperties": false, "description": "A Location on a chromosome defined by a species and chromosome name.", "type": "object", "properties": { @@ -420,10 +419,10 @@ "interval", "species_id", "type" - ] + ], + "additionalProperties": false }, "SequenceLocation": { - "additionalProperties": false, "description": "A Location defined by an interval on a referenced Sequence.", "type": "object", "properties": { @@ -457,12 +456,12 @@ "interval", "sequence_id", "type" - ] + ], + "additionalProperties": false }, "SequenceInterval": { "description": "A SequenceInterval represents a span on a Sequence. Positions are always represented by contiguous spans using interbase coordinates or coordinate ranges.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -631,12 +630,12 @@ "end", "start", "type" - ] + ], + "additionalProperties": false }, "CytobandInterval": { "description": "A contiguous span on a chromosome defined by cytoband features. The span includes the constituent regions described by the start and end cytobands, as well as any intervening regions.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -662,7 +661,8 @@ "end", "start", "type" - ] + ], + "additionalProperties": false }, "SequenceExpression": { "description": "An expression describing a Sequence.", @@ -684,7 +684,6 @@ "LiteralSequenceExpression": { "description": "An explicit expression of a Sequence.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -700,12 +699,12 @@ "required": [ "sequence", "type" - ] + ], + "additionalProperties": false }, "DerivedSequenceExpression": { "description": "An approximate expression of a sequence that is derived from a referenced sequence location. Use of this class indicates that the derived sequence is *approximately equivalent* to the reference indicated, and is typically used for describing large regions in contexts where the use of an approximate sequence is inconsequential.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -726,11 +725,11 @@ "location", "reverse_complement", "type" - ] + ], + "additionalProperties": false }, "RepeatedSequenceExpression": { "description": "An expression of a sequence comprised of a tandem repeating subsequence.", - "additionalProperties": false, "type": "object", "properties": { "type": { @@ -834,12 +833,12 @@ "count", "seq_expr", "type" - ] + ], + "additionalProperties": false }, "GenotypeMember": { "description": "A class describing a Genotype `member`.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -876,7 +875,8 @@ "required": [ "copies", "variation" - ] + ], + "additionalProperties": false }, "Feature": { "description": "A named entity that can be mapped to a Location. Genes, protein domains, exons, and chromosomes are some examples of common biological entities that may be Features.", @@ -892,7 +892,6 @@ "Gene": { "description": "A reference to a Gene as defined by an authority. For human genes, the use of [hgnc](https://registry.identifiers.org/registry/hgnc) as the gene authority is RECOMMENDED.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -908,12 +907,12 @@ "required": [ "gene_id", "type" - ] + ], + "additionalProperties": false }, "Number": { "description": "A simple integer value as a VRS class.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -929,12 +928,12 @@ "required": [ "type", "value" - ] + ], + "additionalProperties": false }, "DefiniteRange": { "description": "A bounded, inclusive range of numbers.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -955,12 +954,12 @@ "max", "min", "type" - ] + ], + "additionalProperties": false }, "IndefiniteRange": { "description": "A half-bounded range of numbers represented as a number bound and associated comparator. The bound operator is interpreted as follows: '>=' are all numbers greater than and including `value`, '<=' are all numbers less than and including `value`.", "type": "object", - "additionalProperties": false, "properties": { "type": { "type": "string", @@ -985,30 +984,27 @@ "comparator", "type", "value" - ] + ], + "additionalProperties": false }, "CURIE": { - "additionalProperties": false, "description": "A [W3C Compact URI](https://www.w3.org/TR/curie/) formatted string. A CURIE string has the structure ``prefix``:``reference``, as defined by the W3C syntax.", "type": "string", "pattern": "^\\w[^:]*:.+$", "example": "ensembl:ENSG00000139618" }, "HumanCytoband": { - "additionalProperties": false, "description": "A character string representing cytobands derived from the *International System for Human Cytogenomic Nomenclature* (ISCN) [guidelines](http://doi.org/10.1159/isbn.978-3-318-06861-0).", "type": "string", "pattern": "^cen|[pq](ter|([1-9][0-9]*(\\.[1-9][0-9]*)?))$", "example": "q22.3" }, "Residue": { - "additionalProperties": false, "description": "A character representing a specific residue (i.e., molecular species) or groupings of these (\"ambiguity codes\"), using [one-letter IUPAC abbreviations](https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry#Amino_acid_and_nucleotide_base_codes) for nucleic acids and amino acids.", "type": "string", "pattern": "[A-Z*\\-]" }, "Sequence": { - "additionalProperties": false, "description": "A character string of Residues that represents a biological sequence using the conventional sequence order (5\u2019-to-3\u2019 for nucleic acid sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity codes are permitted in Sequences.", "type": "string", "pattern": "^[A-Z*\\-]*$" @@ -1016,7 +1012,6 @@ "SequenceState": { "deprecated": true, "description": "DEPRECATED. A Sequence as a State. This is the State class to use for representing \"ref-alt\" style variation, including SNVs, MNVs, del, ins, and delins. This class is deprecated. Use LiteralSequenceExpression instead.", - "additionalProperties": false, "type": "object", "properties": { "type": { @@ -1036,12 +1031,12 @@ "required": [ "sequence", "type" - ] + ], + "additionalProperties": false }, "SimpleInterval": { "deprecated": true, "description": "DEPRECATED: A SimpleInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates. This class is deprecated. Use SequenceInterval instead.", - "additionalProperties": false, "type": "object", "properties": { "type": { @@ -1067,7 +1062,8 @@ "end", "start", "type" - ] + ], + "additionalProperties": false } } } \ No newline at end of file diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 195af8e2..c91ec0ca 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -36,7 +36,6 @@ definitions: propertyName: type Allele: description: The state of a molecule at a Location. - additionalProperties: false type: object properties: _id: &id001 @@ -63,10 +62,10 @@ definitions: - location - state - type + additionalProperties: false Haplotype: description: A set of non-overlapping Allele members that co-occur on the same molecule. - additionalProperties: false type: object properties: _id: *id001 @@ -88,9 +87,9 @@ definitions: required: - members - type + additionalProperties: false Text: description: A free-text definition of variation. - additionalProperties: false type: object properties: _id: *id001 @@ -106,10 +105,10 @@ definitions: required: - definition - type + additionalProperties: false VariationSet: description: An unconstrained set of Variation members. type: object - additionalProperties: false properties: _id: *id001 type: @@ -129,8 +128,8 @@ definitions: required: - members - type - CopyNumber: additionalProperties: false + CopyNumber: type: object description: The absolute count of discrete copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, @@ -192,10 +191,10 @@ definitions: - copies - subject - type + additionalProperties: false Genotype: description: A set of trans-phased MolecularVariation members, with associated copy counts, across a specified number of genomic locus `copies`. - additionalProperties: false type: object properties: _id: *id001 @@ -225,6 +224,7 @@ definitions: - copies - members - type + additionalProperties: false Location: description: A contiguous segment of a biological sequence. oneOf: @@ -233,7 +233,6 @@ definitions: discriminator: propertyName: type ChromosomeLocation: - additionalProperties: false description: A Location on a chromosome defined by a species and chromosome name. type: object properties: @@ -262,8 +261,8 @@ definitions: - interval - species_id - type - SequenceLocation: additionalProperties: false + SequenceLocation: description: A Location defined by an interval on a referenced Sequence. type: object properties: @@ -285,12 +284,12 @@ definitions: - interval - sequence_id - type + additionalProperties: false SequenceInterval: description: A SequenceInterval represents a span on a Sequence. Positions are always represented by contiguous spans using interbase coordinates or coordinate ranges. type: object - additionalProperties: false properties: type: type: string @@ -382,12 +381,12 @@ definitions: - end - start - type + additionalProperties: false CytobandInterval: description: A contiguous span on a chromosome defined by cytoband features. The span includes the constituent regions described by the start and end cytobands, as well as any intervening regions. type: object - additionalProperties: false properties: type: type: string @@ -410,6 +409,7 @@ definitions: - end - start - type + additionalProperties: false SequenceExpression: description: An expression describing a Sequence. oneOf: @@ -421,7 +421,6 @@ definitions: LiteralSequenceExpression: description: An explicit expression of a Sequence. type: object - additionalProperties: false properties: type: type: string @@ -434,6 +433,7 @@ definitions: required: - sequence - type + additionalProperties: false DerivedSequenceExpression: description: An approximate expression of a sequence that is derived from a referenced sequence location. Use of this class indicates that the derived sequence is @@ -441,7 +441,6 @@ definitions: for describing large regions in contexts where the use of an approximate sequence is inconsequential. type: object - additionalProperties: false properties: type: type: string @@ -459,9 +458,9 @@ definitions: - location - reverse_complement - type + additionalProperties: false RepeatedSequenceExpression: description: An expression of a sequence comprised of a tandem repeating subsequence. - additionalProperties: false type: object properties: type: @@ -517,10 +516,10 @@ definitions: - count - seq_expr - type + additionalProperties: false GenotypeMember: description: A class describing a Genotype `member`. type: object - additionalProperties: false properties: type: type: string @@ -541,6 +540,7 @@ definitions: required: - copies - variation + additionalProperties: false Feature: description: A named entity that can be mapped to a Location. Genes, protein domains, exons, and chromosomes are some examples of common biological entities that @@ -554,7 +554,6 @@ definitions: the use of [hgnc](https://registry.identifiers.org/registry/hgnc) as the gene authority is RECOMMENDED. type: object - additionalProperties: false properties: type: type: string @@ -567,10 +566,10 @@ definitions: required: - gene_id - type + additionalProperties: false Number: description: A simple integer value as a VRS class. type: object - additionalProperties: false properties: type: type: string @@ -583,10 +582,10 @@ definitions: required: - type - value + additionalProperties: false DefiniteRange: description: A bounded, inclusive range of numbers. type: object - additionalProperties: false properties: type: type: string @@ -603,13 +602,13 @@ definitions: - max - min - type + additionalProperties: false IndefiniteRange: description: 'A half-bounded range of numbers represented as a number bound and associated comparator. The bound operator is interpreted as follows: ''>='' are all numbers greater than and including `value`, ''<='' are all numbers less than and including `value`.' type: object - additionalProperties: false properties: type: type: string @@ -630,8 +629,8 @@ definitions: - comparator - type - value - CURIE: additionalProperties: false + CURIE: description: A [W3C Compact URI](https://www.w3.org/TR/curie/) formatted string. A CURIE string has the structure ``prefix``:``reference``, as defined by the W3C syntax. @@ -639,21 +638,18 @@ definitions: pattern: ^\w[^:]*:.+$ example: ensembl:ENSG00000139618 HumanCytoband: - additionalProperties: false description: A character string representing cytobands derived from the *International System for Human Cytogenomic Nomenclature* (ISCN) [guidelines](http://doi.org/10.1159/isbn.978-3-318-06861-0). type: string pattern: ^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$ example: q22.3 Residue: - additionalProperties: false description: A character representing a specific residue (i.e., molecular species) or groupings of these ("ambiguity codes"), using [one-letter IUPAC abbreviations](https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry#Amino_acid_and_nucleotide_base_codes) for nucleic acids and amino acids. type: string pattern: '[A-Z*\-]' Sequence: - additionalProperties: false description: "A character string of Residues that represents a biological sequence\ \ using the conventional sequence order (5\u2019-to-3\u2019 for nucleic acid\ \ sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity\ @@ -665,7 +661,6 @@ definitions: description: DEPRECATED. A Sequence as a State. This is the State class to use for representing "ref-alt" style variation, including SNVs, MNVs, del, ins, and delins. This class is deprecated. Use LiteralSequenceExpression instead. - additionalProperties: false type: object properties: type: @@ -681,12 +676,12 @@ definitions: required: - sequence - type + additionalProperties: false SimpleInterval: deprecated: true description: 'DEPRECATED: A SimpleInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates. This class is deprecated. Use SequenceInterval instead.' - additionalProperties: false type: object properties: type: @@ -707,3 +702,4 @@ definitions: - end - start - type + additionalProperties: false From 2d2532d2b5b742fe42902690888b176095b48b98 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Thu, 28 Jul 2022 23:52:33 -0400 Subject: [PATCH 32/83] update Genotype definition --- schema/defs/vrs/Genotype.rst | 2 +- schema/vrs-source.yaml | 3 +-- schema/vrs.json | 2 +- schema/vrs.yaml | 3 +-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/schema/defs/vrs/Genotype.rst b/schema/defs/vrs/Genotype.rst index 3f5a0f1a..0a0dbbf4 100644 --- a/schema/defs/vrs/Genotype.rst +++ b/schema/defs/vrs/Genotype.rst @@ -1,6 +1,6 @@ **Computational Definition** -A set of trans-phased :ref:`MolecularVariation` members, with associated copy counts, across a specified number of genomic locus `copies`. +A quantified set of _in-trans_ :ref:`Molecular Variation` at a genomic locus. **Information Model** diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index f22449c5..59e3f501 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -244,8 +244,7 @@ definitions: Genotype: description: >- - A set of trans-phased :ref:`MolecularVariation` members, with associated - copy counts, across a specified number of genomic locus `copies`. + A quantified set of _in-trans_ :ref:`Molecular Variation` at a genomic locus. type: object properties: type: diff --git a/schema/vrs.json b/schema/vrs.json index d101e368..d49e9ed0 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -328,7 +328,7 @@ "additionalProperties": false }, "Genotype": { - "description": "A set of trans-phased MolecularVariation members, with associated copy counts, across a specified number of genomic locus `copies`.", + "description": "A quantified set of _in-trans_ Molecular Variation at a genomic locus.", "type": "object", "properties": { "_id": { diff --git a/schema/vrs.yaml b/schema/vrs.yaml index c91ec0ca..9a282057 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -193,8 +193,7 @@ definitions: - type additionalProperties: false Genotype: - description: A set of trans-phased MolecularVariation members, with associated - copy counts, across a specified number of genomic locus `copies`. + description: A quantified set of _in-trans_ Molecular Variation at a genomic locus. type: object properties: _id: *id001 From 40909a3aef166b68d6e21272ac3fced1d01d59eb Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Fri, 29 Jul 2022 00:02:07 -0400 Subject: [PATCH 33/83] update genotypemember definition. closees #397 --- docs/source/terms_and_model.rst | 2 ++ schema/defs/vrs/Genotype.rst | 4 ++-- schema/defs/vrs/GenotypeMember.rst | 4 ++-- schema/vrs-source.yaml | 13 +++++++------ schema/vrs.json | 12 ++++++------ schema/vrs.yaml | 13 +++++++------ 6 files changed, 26 insertions(+), 22 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index cb2e1614..5a08dd46 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -1051,6 +1051,8 @@ This value is equivalent to the concept of "equal to or greater than "value": 22 } +.. _GenotypeMember: + GenotypeMember ############## diff --git a/schema/defs/vrs/Genotype.rst b/schema/defs/vrs/Genotype.rst index 0a0dbbf4..e96e184a 100644 --- a/schema/defs/vrs/Genotype.rst +++ b/schema/defs/vrs/Genotype.rst @@ -1,6 +1,6 @@ **Computational Definition** -A quantified set of _in-trans_ :ref:`Molecular Variation` at a genomic locus. +A quantified set of _in-trans_ :ref:`MolecularVariation` at a genomic locus. **Information Model** @@ -28,7 +28,7 @@ Some Genotype attributes are inherited from :ref:`Variation`. - :ref:`GenotypeMember` - 1..m - Each GenotypeMember in `members` describes a :ref:`MolecularVariation` and the count of that variation at the locus. - * - copies + * - count - :ref:`Number` | :ref:`IndefiniteRange` | :ref:`DefiniteRange` - 1..1 - The total number of copies of all :ref:`MolecularVariation` at this locus, MUST be greater than or equal to the sum of :ref:`GenotypeMember` copy counts. If greater than the total counts, this implies additional :ref:`MolecularVariation` that are expected to exist but are not explicitly indicated. diff --git a/schema/defs/vrs/GenotypeMember.rst b/schema/defs/vrs/GenotypeMember.rst index 05a0a7a8..d3e0383a 100644 --- a/schema/defs/vrs/GenotypeMember.rst +++ b/schema/defs/vrs/GenotypeMember.rst @@ -1,6 +1,6 @@ **Computational Definition** -A class describing a :ref:`Genotype` `member`. +A class for expressing the count of a specific :ref:`MolecularVariation` present _in-trans_ at a genomic locus represented by a :ref:`Genotype`. **Information Model** @@ -18,7 +18,7 @@ A class describing a :ref:`Genotype` `member`. - string - 0..1 - MUST be "GenotypeMember". - * - copies + * - count - :ref:`Number` | :ref:`IndefiniteRange` | :ref:`DefiniteRange` - 1..1 - The number of copies of the `variation` at a :ref:`Genotype` locus. diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 59e3f501..7d176e17 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -244,7 +244,7 @@ definitions: Genotype: description: >- - A quantified set of _in-trans_ :ref:`Molecular Variation` at a genomic locus. + A quantified set of _in-trans_ :ref:`MolecularVariation` at a genomic locus. type: object properties: type: @@ -262,7 +262,7 @@ definitions: description: >- Each GenotypeMember in `members` describes a :ref:`MolecularVariation` and the count of that variation at the locus. - copies: + count: oneOf: - $ref: "#/definitions/Number" - $ref: "#/definitions/IndefiniteRange" @@ -273,7 +273,7 @@ definitions: If greater than the total counts, this implies additional :ref:`MolecularVariation` that are expected to exist but are not explicitly indicated. - required: [ "members", "copies" ] + required: [ "members", "count" ] @@ -610,7 +610,8 @@ definitions: GenotypeMember: description: >- - A class describing a :ref:`Genotype` `member`. + A class for expressing the count of a specific :ref:`MolecularVariation` present + _in-trans_ at a genomic locus represented by a :ref:`Genotype`. type: object properties: type: @@ -618,7 +619,7 @@ definitions: const: "GenotypeMember" default: "GenotypeMember" description: MUST be "GenotypeMember". - copies: + count: oneOf: - $ref: "#/definitions/Number" - $ref: "#/definitions/IndefiniteRange" @@ -631,7 +632,7 @@ definitions: - $ref: "#/definitions/Haplotype" description: >- A :ref:`MolecularVariation` at a :ref:`Genotype` locus. - required: [ "copies", "variation" ] + required: [ "count", "variation" ] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Feature diff --git a/schema/vrs.json b/schema/vrs.json index d49e9ed0..71023a28 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -328,7 +328,7 @@ "additionalProperties": false }, "Genotype": { - "description": "A quantified set of _in-trans_ Molecular Variation at a genomic locus.", + "description": "A quantified set of _in-trans_ MolecularVariation at a genomic locus.", "type": "object", "properties": { "_id": { @@ -350,7 +350,7 @@ }, "description": "Each GenotypeMember in `members` describes a MolecularVariation and the count of that variation at the locus." }, - "copies": { + "count": { "oneOf": [ { "$ref": "#/definitions/Number" @@ -366,7 +366,7 @@ } }, "required": [ - "copies", + "count", "members", "type" ], @@ -837,7 +837,7 @@ "additionalProperties": false }, "GenotypeMember": { - "description": "A class describing a Genotype `member`.", + "description": "A class for expressing the count of a specific MolecularVariation present _in-trans_ at a genomic locus represented by a Genotype.", "type": "object", "properties": { "type": { @@ -846,7 +846,7 @@ "default": "GenotypeMember", "description": "MUST be \"GenotypeMember\"." }, - "copies": { + "count": { "oneOf": [ { "$ref": "#/definitions/Number" @@ -873,7 +873,7 @@ } }, "required": [ - "copies", + "count", "variation" ], "additionalProperties": false diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 9a282057..d51ee26b 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -193,7 +193,7 @@ definitions: - type additionalProperties: false Genotype: - description: A quantified set of _in-trans_ Molecular Variation at a genomic locus. + description: A quantified set of _in-trans_ MolecularVariation at a genomic locus. type: object properties: _id: *id001 @@ -210,7 +210,7 @@ definitions: $ref: '#/definitions/GenotypeMember' description: Each GenotypeMember in `members` describes a MolecularVariation and the count of that variation at the locus. - copies: + count: oneOf: - $ref: '#/definitions/Number' - $ref: '#/definitions/IndefiniteRange' @@ -220,7 +220,7 @@ definitions: If greater than the total counts, this implies additional MolecularVariation that are expected to exist but are not explicitly indicated. required: - - copies + - count - members - type additionalProperties: false @@ -517,7 +517,8 @@ definitions: - type additionalProperties: false GenotypeMember: - description: A class describing a Genotype `member`. + description: A class for expressing the count of a specific MolecularVariation + present _in-trans_ at a genomic locus represented by a Genotype. type: object properties: type: @@ -525,7 +526,7 @@ definitions: const: GenotypeMember default: GenotypeMember description: MUST be "GenotypeMember". - copies: + count: oneOf: - $ref: '#/definitions/Number' - $ref: '#/definitions/IndefiniteRange' @@ -537,7 +538,7 @@ definitions: - $ref: '#/definitions/Haplotype' description: A MolecularVariation at a Genotype locus. required: - - copies + - count - variation additionalProperties: false Feature: From 2def58c73365272afbd696dcec0c47a2836048de Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 10:30:57 -0400 Subject: [PATCH 34/83] update docs --- .requirements.txt | 2 +- docs/source/appendices/future_plans.rst | 123 -------------- docs/source/terms_and_model.rst | 71 +++++++- schema/Makefile | 8 +- schema/defs/vrs/Allele.rst | 8 +- schema/defs/vrs/ChromosomeLocation.rst | 8 +- schema/defs/vrs/CopyNumber.rst | 8 +- schema/defs/vrs/DerivedSequenceExpression.rst | 4 +- schema/defs/vrs/Gene.rst | 4 +- schema/defs/vrs/Genotype.rst | 10 +- schema/defs/vrs/GenotypeMember.rst | 2 +- schema/defs/vrs/Haplotype.rst | 8 +- schema/defs/vrs/LiteralSequenceExpression.rst | 4 +- .../defs/vrs/RepeatedSequenceExpression.rst | 4 +- schema/defs/vrs/SequenceLocation.rst | 8 +- schema/defs/vrs/Text.rst | 8 +- schema/defs/vrs/VariationSet.rst | 8 +- schema/vrs-source.yaml | 7 +- schema/vrs.json | 152 +++++++++--------- schema/vrs.yaml | 80 ++++----- 20 files changed, 200 insertions(+), 327 deletions(-) diff --git a/.requirements.txt b/.requirements.txt index 65e0c4fb..0c406467 100644 --- a/.requirements.txt +++ b/.requirements.txt @@ -3,5 +3,5 @@ python-jsonschema-objects>=0.3,<=0.3.10 jsonschema==3.2.0 ipython pyyaml -ga4gh.gks.metaschema>=0.1.1 +ga4gh.gks.metaschema==0.2.0rc3 sphinx ~= 3.5 \ No newline at end of file diff --git a/docs/source/appendices/future_plans.rst b/docs/source/appendices/future_plans.rst index d73d361c..bf81b2ea 100644 --- a/docs/source/appendices/future_plans.rst +++ b/docs/source/appendices/future_plans.rst @@ -96,129 +96,6 @@ Under consideration. See https://github.com/ga4gh/vrs/issues/28. t(9;22)(q34;q11) in BCR-ABL -.. _genotype: - -Genotype -######## - -The genetic state of an organism, whether complete (defined over the -whole genome) or incomplete (defined over a subset of the genome). - -**Computational definition** - -A list of Haplotypes. - -**Information model** - -.. list-table:: - :class: reece-wrap - :header-rows: 1 - :align: left - :widths: auto - - * - Field - - Type - - Limits - - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id; MUST be unique within document - * - type - - string - - 1..1 - - Variation type; MUST be set to '**Genotype**' - * - completeness - - enum - - 1..1 - - Declaration of completeness of the Haplotype definition. - Values are: - - * UNKNOWN: Other Haplotypes may exist. - * PARTIAL: Other Haplotypes exist but are unspecified. - * COMPLETE: The Genotype declares a complete set of Haplotypes. - - * - members - - :ref:`Haplotype`\[] or :ref:`CURIE`\[] - - 0..* - - List of Haplotypes or Haplotype identifiers; length MUST agree - with ploidy of genomic region - - -**Implementation guidance** - -* Haplotypes in a Genotype MAY occur at different locations or on - different reference sequences. For example, an individual may have - haplotypes on two population-specific references. -* Haplotypes in a Genotype MAY contain differing numbers of Alleles or - Alleles at different Locations. - -**Notes** - -* The term "genotype" has two, related definitions in common use. The - narrower definition is a set of alleles observed at a single - location and with a ploidy of two, such as a pair of single residue - variants on an autosome. The broader, generalized definition is a - set of alleles at multiple locations and/or with ploidy other than - two.The VRS Genotype entity is based on this broader definition. -* The term "diplotype" is often used to refer to two haplotypes. The - VRS Genotype entity subsumes the conventional definition of - diplotype. Therefore, the VRS model does not include an explicit - entity for diplotypes. See :ref:`this note - ` for a - discussion. -* The VRS model makes no assumptions about ploidy of an organism or - individual. The number of Haplotypes in a Genotype is the observed - ploidy of the individual. -* In diploid organisms, there are typically two instances of each - autosomal chromosome, and therefore two instances of sequence at a - particular location. Thus, Genotypes will often list two - Haplotypes. In the case of haploid chromosomes or - haploinsufficiency, the Genotype consists of a single Haplotype. -* A consequence of the computational definition is that Haplotypes at - overlapping or adjacent intervals MUST NOT be included in the same - Genotype. However, two or more Alleles MAY always be rewritten as an - equivalent Allele with a common sequence and interval context. -* The rationale for permitting Genotypes with Haplotypes defined on - different reference sequences is to enable the accurate - representation of segments of DNA with the most appropriate - population-specific reference sequence. - -**Sources** - -SO: `Genotype (SO:0001027) -`__ -— A genotype is a variant genome, complete or incomplete. - -.. _genotypes-represent-haplotypes-with-arbitrary-ploidy: - -.. note:: Genotypes represent Haplotypes with arbitrary ploidy - The VRS defines Haplotypes as a list of Alleles, and Genotypes as - a list of Haplotypes. In essence, Haplotypes and Genotypes represent - two distinct dimensions of containment: Haplotypes represent the "in - phase" relationship of Alleles while Genotypes represents sets of - Haplotypes of arbitrary ploidy. - - There are two important consequences of these definitions: There is no - single-location Genotype. Users of SNP data will be familiar with - representations like rs7412 C/C, which indicates the diploid state at - a position. In the VRS, this is merely a special case of a - Genotype with two Haplotypes, each of which is defined with only one - Allele (the same Allele in this case). The VRS does not define a - diplotype type. A diplotype is a special case of a VRS Genotype - with exactly two Haplotypes. In practice, software data types that - assume a ploidy of 2 make it very difficult to represent haploid - states, copy number loss, and copy number gain, all of which occur - when representing human data. In addition, assuming ploidy=2 makes - software incompatible with organisms with other ploidy. The VRS - makes no assumptions about "normal" ploidy. - - In other words, the VRS does not represent single-position - Genotypes or diplotypes because both concepts are subsumed by the - Allele, Haplotype, and Genotypes entities. - - - .. _GitHub issue: https://github.com/ga4gh/vrs/issues .. _genetic variation: https://en.wikipedia.org/wiki/Genetic_variation diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 5a08dd46..98eaa7c1 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -407,13 +407,80 @@ Two, three, or four total copies of BRCA1: "type": "CopyNumber" } -.. _Genotype: +.. _genotype: Genotype $$$$$$$$ .. include:: defs/Genotype.rst +**Implementation guidance** + +* Haplotypes or Alleles in :ref:`GenotypeMember` objects MAY occur at different locations or on + different reference sequences. For example, an individual may have haplotypes on two + population-specific references. + +**Notes** + +* The term "genotype" has two, related definitions in common use. The + narrower definition is a set of alleles observed at a single + location and often with a ploidy of two, such as a pair of single residue + variants on an autosome. The broader, generalized definition is a + set of alleles at multiple locations and/or with ploidy other than + two. The VRS Genotype entity is based on this broader definition. +* The term "diplotype" is often used to refer to two in-trans haplotypes at a locus. + The VRS Genotype entity subsumes the conventional definition of diplotype. Therefore, + the VRS model does not include an explicit entity for diplotypes. See :ref:`this note + ` for a discussion. +* The VRS model makes no assumptions about ploidy of an organism or individual nor any + polysomy affecting a locus. The `genotype.count` attribute explicitly captures the total + count of in-trans molecules at a genomic locus represented by the Genotype. +* In diploid organisms, there are typically two instances of each autosomal chromosome, + and therefore two instances of sequence at a particular location. Thus, Genotypes will + often list two GenotypeMembers each based on a distinct Haplotype or Allele. In the case + of haploid chromosomes or haploinsufficiency, the Genotype consists of a single GenotypeMember. +* A consequence of the computational definition is that Haplotypes at + overlapping or adjacent intervals MUST NOT be included in the same + Genotype. However, two or more Alleles MAY always be rewritten as an + equivalent Allele with a common sequence and interval context. +* The rationale for permitting Genotypes with Haplotypes defined on + different reference sequences is to enable the accurate + representation of segments of DNA with the most appropriate + population-specific reference sequence. + +**Sources** + +SO: `Genotype (SO:0001027) +`__ +— A genotype is a variant genome, complete or incomplete. + +.. _genotypes-represent-haplotypes-with-arbitrary-ploidy: + +.. note:: Genotypes represent Haplotypes with arbitrary ploidy + The VRS defines Haplotypes as a list of Alleles, and Genotypes as + a list of Haplotypes. In essence, Haplotypes and Genotypes represent + two distinct dimensions of containment: Haplotypes represent the "in + phase" relationship of Alleles while Genotypes represents sets of + Haplotypes of arbitrary ploidy. + + There are two important consequences of these definitions: There is no + single-location Genotype. Users of SNP data will be familiar with + representations like rs7412 C/C, which indicates the diploid state at + a position. In the VRS, this is merely a special case of a + Genotype with two Haplotypes, each of which is defined with only one + Allele (the same Allele in this case). The VRS does not define a + diplotype type. A diplotype is a special case of a VRS Genotype + with exactly two Haplotypes. In practice, software data types that + assume a ploidy of 2 make it very difficult to represent haploid + states, copy number loss, and copy number gain, all of which occur + when representing human data. In addition, assuming ploidy=2 makes + software incompatible with organisms with other ploidy. The VRS + makes no assumptions about "normal" ploidy. + + In other words, the VRS does not represent single-position + Genotypes or diplotypes because both concepts are subsumed by the + Allele, Haplotype, and Genotypes entities. + .. _UtilityVariation: Utility Variation @@ -1051,7 +1118,7 @@ This value is equivalent to the concept of "equal to or greater than "value": 22 } -.. _GenotypeMember: +.. _genotypemember: GenotypeMember ############## diff --git a/schema/Makefile b/schema/Makefile index 8a0c601c..e163066b 100644 --- a/schema/Makefile +++ b/schema/Makefile @@ -6,13 +6,13 @@ JSYAMLS:=vrs.yaml JSONS:=${JSYAMLS:.yaml=.json} -all: vrs.json defs +all: ${JSONS} defs -vrs.json: vrs.yaml +%.json: %.yaml jsy2js.py <$< >$@ -vrs.yaml: vrs-source.yaml - source2jsy.py <$< >$@ +%.yaml: %-source.yaml + source2jsy.py $< >$@ defs: rm -rf defs diff --git a/schema/defs/vrs/Allele.rst b/schema/defs/vrs/Allele.rst index 1bf84b06..2dc0a247 100644 --- a/schema/defs/vrs/Allele.rst +++ b/schema/defs/vrs/Allele.rst @@ -4,8 +4,6 @@ The state of a molecule at a :ref:`Location`. **Information Model** -Some Allele attributes are inherited from :ref:`Variation`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,13 +14,9 @@ Some Allele attributes are inherited from :ref:`Variation`. - Type - Limits - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id. MUST be unique within document. * - type - string - - 1..1 + - 0..1 - MUST be "Allele" * - location - :ref:`CURIE` | :ref:`Location` diff --git a/schema/defs/vrs/ChromosomeLocation.rst b/schema/defs/vrs/ChromosomeLocation.rst index 91151861..5e205869 100644 --- a/schema/defs/vrs/ChromosomeLocation.rst +++ b/schema/defs/vrs/ChromosomeLocation.rst @@ -4,8 +4,6 @@ A Location on a chromosome defined by a species and chromosome name. **Information Model** -Some ChromosomeLocation attributes are inherited from :ref:`Location`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,13 +14,9 @@ Some ChromosomeLocation attributes are inherited from :ref:`Location`. - Type - Limits - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Location Id. MUST be unique within document. * - type - string - - 1..1 + - 0..1 - MUST be "ChromosomeLocation" * - species_id - :ref:`CURIE` diff --git a/schema/defs/vrs/CopyNumber.rst b/schema/defs/vrs/CopyNumber.rst index 17a483cb..110fad38 100644 --- a/schema/defs/vrs/CopyNumber.rst +++ b/schema/defs/vrs/CopyNumber.rst @@ -4,8 +4,6 @@ The absolute count of discrete copies of a :ref:`MolecularVariation`, :ref:`Feat **Information Model** -Some CopyNumber attributes are inherited from :ref:`Variation`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,13 +14,9 @@ Some CopyNumber attributes are inherited from :ref:`Variation`. - Type - Limits - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id. MUST be unique within document. * - type - string - - 1..1 + - 0..1 - MUST be "CopyNumber" * - subject - :ref:`MolecularVariation` | :ref:`Feature` | :ref:`SequenceExpression` | :ref:`CURIE` diff --git a/schema/defs/vrs/DerivedSequenceExpression.rst b/schema/defs/vrs/DerivedSequenceExpression.rst index 99421f5b..6821f947 100644 --- a/schema/defs/vrs/DerivedSequenceExpression.rst +++ b/schema/defs/vrs/DerivedSequenceExpression.rst @@ -4,8 +4,6 @@ An approximate expression of a sequence that is derived from a referenced sequen **Information Model** -Some DerivedSequenceExpression attributes are inherited from :ref:`SequenceExpression`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -18,7 +16,7 @@ Some DerivedSequenceExpression attributes are inherited from :ref:`SequenceExpre - Description * - type - string - - 1..1 + - 0..1 - MUST be "DerivedSequenceExpression" * - location - :ref:`SequenceLocation` diff --git a/schema/defs/vrs/Gene.rst b/schema/defs/vrs/Gene.rst index c598388b..792ae83a 100644 --- a/schema/defs/vrs/Gene.rst +++ b/schema/defs/vrs/Gene.rst @@ -4,8 +4,6 @@ A reference to a Gene as defined by an authority. For human genes, the use of `h **Information Model** -Some Gene attributes are inherited from :ref:`Feature`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -18,7 +16,7 @@ Some Gene attributes are inherited from :ref:`Feature`. - Description * - type - string - - 1..1 + - 0..1 - MUST be "Gene" * - gene_id - :ref:`CURIE` diff --git a/schema/defs/vrs/Genotype.rst b/schema/defs/vrs/Genotype.rst index e96e184a..74093a7a 100644 --- a/schema/defs/vrs/Genotype.rst +++ b/schema/defs/vrs/Genotype.rst @@ -1,11 +1,9 @@ **Computational Definition** -A quantified set of _in-trans_ :ref:`MolecularVariation` at a genomic locus. +A quantified set of *in-trans* :ref:`MolecularVariation` at a genomic locus. **Information Model** -Some Genotype attributes are inherited from :ref:`Variation`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,13 +14,9 @@ Some Genotype attributes are inherited from :ref:`Variation`. - Type - Limits - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id. MUST be unique within document. * - type - string - - 1..1 + - 0..1 - MUST be "Genotype" * - members - :ref:`GenotypeMember` diff --git a/schema/defs/vrs/GenotypeMember.rst b/schema/defs/vrs/GenotypeMember.rst index d3e0383a..49935e2b 100644 --- a/schema/defs/vrs/GenotypeMember.rst +++ b/schema/defs/vrs/GenotypeMember.rst @@ -1,6 +1,6 @@ **Computational Definition** -A class for expressing the count of a specific :ref:`MolecularVariation` present _in-trans_ at a genomic locus represented by a :ref:`Genotype`. +A class for expressing the count of a specific :ref:`MolecularVariation` present *in-trans* at a genomic locus represented by a :ref:`Genotype`. **Information Model** diff --git a/schema/defs/vrs/Haplotype.rst b/schema/defs/vrs/Haplotype.rst index 6202690a..82697cdc 100644 --- a/schema/defs/vrs/Haplotype.rst +++ b/schema/defs/vrs/Haplotype.rst @@ -4,8 +4,6 @@ A set of non-overlapping :ref:`Allele` members that co-occur on the same molecul **Information Model** -Some Haplotype attributes are inherited from :ref:`Variation`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,13 +14,9 @@ Some Haplotype attributes are inherited from :ref:`Variation`. - Type - Limits - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id. MUST be unique within document. * - type - string - - 1..1 + - 0..1 - MUST be "Haplotype" * - members - :ref:`Allele` | :ref:`CURIE` diff --git a/schema/defs/vrs/LiteralSequenceExpression.rst b/schema/defs/vrs/LiteralSequenceExpression.rst index fe8347b0..9475c3e6 100644 --- a/schema/defs/vrs/LiteralSequenceExpression.rst +++ b/schema/defs/vrs/LiteralSequenceExpression.rst @@ -4,8 +4,6 @@ An explicit expression of a Sequence. **Information Model** -Some LiteralSequenceExpression attributes are inherited from :ref:`SequenceExpression`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -18,7 +16,7 @@ Some LiteralSequenceExpression attributes are inherited from :ref:`SequenceExpre - Description * - type - string - - 1..1 + - 0..1 - MUST be "LiteralSequenceExpression" * - sequence - :ref:`Sequence` diff --git a/schema/defs/vrs/RepeatedSequenceExpression.rst b/schema/defs/vrs/RepeatedSequenceExpression.rst index 1ac1c75a..b81c519f 100644 --- a/schema/defs/vrs/RepeatedSequenceExpression.rst +++ b/schema/defs/vrs/RepeatedSequenceExpression.rst @@ -4,8 +4,6 @@ An expression of a sequence comprised of a tandem repeating subsequence. **Information Model** -Some RepeatedSequenceExpression attributes are inherited from :ref:`SequenceExpression`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -18,7 +16,7 @@ Some RepeatedSequenceExpression attributes are inherited from :ref:`SequenceExpr - Description * - type - string - - 1..1 + - 0..1 - MUST be "RepeatedSequenceExpression" * - seq_expr - :ref:`LiteralSequenceExpression` | :ref:`DerivedSequenceExpression` diff --git a/schema/defs/vrs/SequenceLocation.rst b/schema/defs/vrs/SequenceLocation.rst index 606bb6f7..501251c3 100644 --- a/schema/defs/vrs/SequenceLocation.rst +++ b/schema/defs/vrs/SequenceLocation.rst @@ -4,8 +4,6 @@ A :ref:`Location` defined by an interval on a referenced :ref:`Sequence`. **Information Model** -Some SequenceLocation attributes are inherited from :ref:`Location`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,13 +14,9 @@ Some SequenceLocation attributes are inherited from :ref:`Location`. - Type - Limits - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Location Id. MUST be unique within document. * - type - string - - 1..1 + - 0..1 - MUST be "SequenceLocation" * - sequence_id - :ref:`CURIE` diff --git a/schema/defs/vrs/Text.rst b/schema/defs/vrs/Text.rst index 31b13b7c..5e2e1f29 100644 --- a/schema/defs/vrs/Text.rst +++ b/schema/defs/vrs/Text.rst @@ -4,8 +4,6 @@ A free-text definition of variation. **Information Model** -Some Text attributes are inherited from :ref:`Variation`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,13 +14,9 @@ Some Text attributes are inherited from :ref:`Variation`. - Type - Limits - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id. MUST be unique within document. * - type - string - - 1..1 + - 0..1 - MUST be "Text" * - definition - string diff --git a/schema/defs/vrs/VariationSet.rst b/schema/defs/vrs/VariationSet.rst index 9f365389..15cef5a7 100644 --- a/schema/defs/vrs/VariationSet.rst +++ b/schema/defs/vrs/VariationSet.rst @@ -4,8 +4,6 @@ An unconstrained set of Variation members. **Information Model** -Some VariationSet attributes are inherited from :ref:`Variation`. - .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,13 +14,9 @@ Some VariationSet attributes are inherited from :ref:`Variation`. - Type - Limits - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id. MUST be unique within document. * - type - string - - 1..1 + - 0..1 - MUST be "VariationSet" * - members - :ref:`CURIE` | :ref:`Variation` diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 7d176e17..568d1fc8 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -123,6 +123,7 @@ definitions: type: array minItems: 2 uniqueItems: true + ordered: false items: oneOf: - $ref: "#/definitions/Allele" @@ -165,6 +166,7 @@ definitions: members: type: array uniqueItems: true + ordered: false items: oneOf: - $ref: "#/definitions/CURIE" @@ -244,7 +246,7 @@ definitions: Genotype: description: >- - A quantified set of _in-trans_ :ref:`MolecularVariation` at a genomic locus. + A quantified set of *in-trans* :ref:`MolecularVariation` at a genomic locus. type: object properties: type: @@ -257,6 +259,7 @@ definitions: type: array uniqueItems: true minItems: 1 + ordered: false items: $ref: "#/definitions/GenotypeMember" description: >- @@ -611,7 +614,7 @@ definitions: GenotypeMember: description: >- A class for expressing the count of a specific :ref:`MolecularVariation` present - _in-trans_ at a genomic locus represented by a :ref:`Genotype`. + *in-trans* at a genomic locus represented by a :ref:`Genotype`. type: object properties: type: diff --git a/schema/vrs.json b/schema/vrs.json index 71023a28..775553a5 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -66,10 +66,6 @@ "description": "The state of a molecule at a Location.", "type": "object", "properties": { - "_id": { - "$ref": "#/definitions/CURIE", - "description": "Variation Id. MUST be unique within document." - }, "type": { "type": "string", "const": "Allele", @@ -82,7 +78,10 @@ "$ref": "#/definitions/CURIE" }, { - "$ref": "#/definitions/Location" + "$ref": "#/definitions/ChromosomeLocation" + }, + { + "$ref": "#/definitions/SequenceLocation" } ], "description": "Where Allele is located" @@ -90,10 +89,16 @@ "state": { "oneOf": [ { - "$ref": "#/definitions/SequenceState" + "$ref": "#/definitions/DerivedSequenceExpression" }, { - "$ref": "#/definitions/SequenceExpression" + "$ref": "#/definitions/LiteralSequenceExpression" + }, + { + "$ref": "#/definitions/RepeatedSequenceExpression" + }, + { + "$ref": "#/definitions/SequenceState" } ], "description": "An expression of the sequence state", @@ -106,8 +111,7 @@ }, "required": [ "location", - "state", - "type" + "state" ], "additionalProperties": false }, @@ -115,10 +119,6 @@ "description": "A set of non-overlapping Allele members that co-occur on the same molecule.", "type": "object", "properties": { - "_id": { - "$ref": "#/definitions/CURIE", - "description": "Variation Id. MUST be unique within document." - }, "type": { "type": "string", "const": "Haplotype", @@ -129,6 +129,7 @@ "type": "array", "minItems": 2, "uniqueItems": true, + "ordered": false, "items": { "oneOf": [ { @@ -143,8 +144,7 @@ } }, "required": [ - "members", - "type" + "members" ], "additionalProperties": false }, @@ -152,10 +152,6 @@ "description": "A free-text definition of variation.", "type": "object", "properties": { - "_id": { - "$ref": "#/definitions/CURIE", - "description": "Variation Id. MUST be unique within document." - }, "type": { "type": "string", "const": "Text", @@ -168,8 +164,7 @@ } }, "required": [ - "definition", - "type" + "definition" ], "additionalProperties": false }, @@ -177,10 +172,6 @@ "description": "An unconstrained set of Variation members.", "type": "object", "properties": { - "_id": { - "$ref": "#/definitions/CURIE", - "description": "Variation Id. MUST be unique within document." - }, "type": { "type": "string", "const": "VariationSet", @@ -190,13 +181,29 @@ "members": { "type": "array", "uniqueItems": true, + "ordered": false, "items": { "oneOf": [ + { + "$ref": "#/definitions/Allele" + }, { "$ref": "#/definitions/CURIE" }, { - "$ref": "#/definitions/Variation" + "$ref": "#/definitions/CopyNumber" + }, + { + "$ref": "#/definitions/Genotype" + }, + { + "$ref": "#/definitions/Haplotype" + }, + { + "$ref": "#/definitions/Text" + }, + { + "$ref": "#/definitions/VariationSet" } ] }, @@ -204,8 +211,7 @@ } }, "required": [ - "members", - "type" + "members" ], "additionalProperties": false }, @@ -213,10 +219,6 @@ "type": "object", "description": "The absolute count of discrete copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.).", "properties": { - "_id": { - "$ref": "#/definitions/CURIE", - "description": "Variation Id. MUST be unique within document." - }, "type": { "type": "string", "const": "CopyNumber", @@ -226,16 +228,25 @@ "subject": { "oneOf": [ { - "$ref": "#/definitions/MolecularVariation" + "$ref": "#/definitions/Allele" }, { - "$ref": "#/definitions/Feature" + "$ref": "#/definitions/CURIE" }, { - "$ref": "#/definitions/SequenceExpression" + "$ref": "#/definitions/DerivedSequenceExpression" }, { - "$ref": "#/definitions/CURIE" + "$ref": "#/definitions/Gene" + }, + { + "$ref": "#/definitions/Haplotype" + }, + { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + { + "$ref": "#/definitions/RepeatedSequenceExpression" } ], "description": "Subject of the Copy Number object" @@ -243,13 +254,13 @@ "copies": { "oneOf": [ { - "$ref": "#/definitions/Number" + "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { - "$ref": "#/definitions/DefiniteRange" + "$ref": "#/definitions/Number" } ], "description": "The integral number of copies of the subject in a system" @@ -322,19 +333,14 @@ ], "required": [ "copies", - "subject", - "type" + "subject" ], "additionalProperties": false }, "Genotype": { - "description": "A quantified set of _in-trans_ MolecularVariation at a genomic locus.", + "description": "A quantified set of *in-trans* MolecularVariation at a genomic locus.", "type": "object", "properties": { - "_id": { - "$ref": "#/definitions/CURIE", - "description": "Variation Id. MUST be unique within document." - }, "type": { "type": "string", "const": "Genotype", @@ -345,6 +351,7 @@ "type": "array", "uniqueItems": true, "minItems": 1, + "ordered": false, "items": { "$ref": "#/definitions/GenotypeMember" }, @@ -353,13 +360,13 @@ "count": { "oneOf": [ { - "$ref": "#/definitions/Number" + "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { - "$ref": "#/definitions/DefiniteRange" + "$ref": "#/definitions/Number" } ], "description": "The total number of copies of all MolecularVariation at this locus, MUST be greater than or equal to the sum of GenotypeMember copy counts. If greater than the total counts, this implies additional MolecularVariation that are expected to exist but are not explicitly indicated." @@ -367,8 +374,7 @@ }, "required": [ "count", - "members", - "type" + "members" ], "additionalProperties": false }, @@ -390,10 +396,6 @@ "description": "A Location on a chromosome defined by a species and chromosome name.", "type": "object", "properties": { - "_id": { - "$ref": "#/definitions/CURIE", - "description": "Location Id. MUST be unique within document." - }, "type": { "type": "string", "const": "ChromosomeLocation", @@ -417,8 +419,7 @@ "required": [ "chr", "interval", - "species_id", - "type" + "species_id" ], "additionalProperties": false }, @@ -426,10 +427,6 @@ "description": "A Location defined by an interval on a referenced Sequence.", "type": "object", "properties": { - "_id": { - "$ref": "#/definitions/CURIE", - "description": "Location Id. MUST be unique within document." - }, "type": { "type": "string", "const": "SequenceLocation", @@ -454,8 +451,7 @@ }, "required": [ "interval", - "sequence_id", - "type" + "sequence_id" ], "additionalProperties": false }, @@ -472,13 +468,13 @@ "start": { "oneOf": [ { - "$ref": "#/definitions/Number" + "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { - "$ref": "#/definitions/DefiniteRange" + "$ref": "#/definitions/Number" } ], "description": "The start coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range less than the value of `end`." @@ -486,13 +482,13 @@ "end": { "oneOf": [ { - "$ref": "#/definitions/Number" + "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { - "$ref": "#/definitions/DefiniteRange" + "$ref": "#/definitions/Number" } ], "description": "The end coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range greater than the value of `start`." @@ -697,8 +693,7 @@ } }, "required": [ - "sequence", - "type" + "sequence" ], "additionalProperties": false }, @@ -723,8 +718,7 @@ }, "required": [ "location", - "reverse_complement", - "type" + "reverse_complement" ], "additionalProperties": false }, @@ -741,10 +735,10 @@ "seq_expr": { "oneOf": [ { - "$ref": "#/definitions/LiteralSequenceExpression" + "$ref": "#/definitions/DerivedSequenceExpression" }, { - "$ref": "#/definitions/DerivedSequenceExpression" + "$ref": "#/definitions/LiteralSequenceExpression" } ], "description": "An expression of the repeating subsequence" @@ -752,13 +746,13 @@ "count": { "oneOf": [ { - "$ref": "#/definitions/Number" + "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { - "$ref": "#/definitions/DefiniteRange" + "$ref": "#/definitions/Number" } ], "description": "The count of repeated units, as an integer or inclusive range" @@ -831,13 +825,12 @@ ], "required": [ "count", - "seq_expr", - "type" + "seq_expr" ], "additionalProperties": false }, "GenotypeMember": { - "description": "A class for expressing the count of a specific MolecularVariation present _in-trans_ at a genomic locus represented by a Genotype.", + "description": "A class for expressing the count of a specific MolecularVariation present *in-trans* at a genomic locus represented by a Genotype.", "type": "object", "properties": { "type": { @@ -849,13 +842,13 @@ "count": { "oneOf": [ { - "$ref": "#/definitions/Number" + "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { - "$ref": "#/definitions/DefiniteRange" + "$ref": "#/definitions/Number" } ], "description": "The number of copies of the `variation` at a Genotype locus." @@ -905,8 +898,7 @@ } }, "required": [ - "gene_id", - "type" + "gene_id" ], "additionalProperties": false }, diff --git a/schema/vrs.yaml b/schema/vrs.yaml index d51ee26b..1ac58a4f 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -38,9 +38,6 @@ definitions: description: The state of a molecule at a Location. type: object properties: - _id: &id001 - $ref: '#/definitions/CURIE' - description: Variation Id. MUST be unique within document. type: type: string const: Allele @@ -49,26 +46,27 @@ definitions: location: oneOf: - $ref: '#/definitions/CURIE' - - $ref: '#/definitions/Location' + - $ref: '#/definitions/ChromosomeLocation' + - $ref: '#/definitions/SequenceLocation' description: Where Allele is located state: oneOf: + - $ref: '#/definitions/DerivedSequenceExpression' + - $ref: '#/definitions/LiteralSequenceExpression' + - $ref: '#/definitions/RepeatedSequenceExpression' - $ref: '#/definitions/SequenceState' - - $ref: '#/definitions/SequenceExpression' description: An expression of the sequence state deprecated: - $ref: '#/definitions/SequenceState' required: - location - state - - type additionalProperties: false Haplotype: description: A set of non-overlapping Allele members that co-occur on the same molecule. type: object properties: - _id: *id001 type: type: string const: Haplotype @@ -78,6 +76,7 @@ definitions: type: array minItems: 2 uniqueItems: true + ordered: false items: oneOf: - $ref: '#/definitions/Allele' @@ -86,13 +85,11 @@ definitions: Haplotype. required: - members - - type additionalProperties: false Text: description: A free-text definition of variation. type: object properties: - _id: *id001 type: type: string const: Text @@ -104,13 +101,11 @@ definitions: subclasses of Variation. required: - definition - - type additionalProperties: false VariationSet: description: An unconstrained set of Variation members. type: object properties: - _id: *id001 type: type: string const: VariationSet @@ -119,15 +114,20 @@ definitions: members: type: array uniqueItems: true + ordered: false items: oneOf: + - $ref: '#/definitions/Allele' - $ref: '#/definitions/CURIE' - - $ref: '#/definitions/Variation' + - $ref: '#/definitions/CopyNumber' + - $ref: '#/definitions/Genotype' + - $ref: '#/definitions/Haplotype' + - $ref: '#/definitions/Text' + - $ref: '#/definitions/VariationSet' description: List of Variation objects or identifiers. Attribute is required, but MAY be empty. required: - members - - type additionalProperties: false CopyNumber: type: object @@ -135,7 +135,6 @@ definitions: SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.). properties: - _id: *id001 type: type: string const: CopyNumber @@ -143,16 +142,19 @@ definitions: description: MUST be "CopyNumber" subject: oneOf: - - $ref: '#/definitions/MolecularVariation' - - $ref: '#/definitions/Feature' - - $ref: '#/definitions/SequenceExpression' + - $ref: '#/definitions/Allele' - $ref: '#/definitions/CURIE' + - $ref: '#/definitions/DerivedSequenceExpression' + - $ref: '#/definitions/Gene' + - $ref: '#/definitions/Haplotype' + - $ref: '#/definitions/LiteralSequenceExpression' + - $ref: '#/definitions/RepeatedSequenceExpression' description: Subject of the Copy Number object copies: oneOf: - - $ref: '#/definitions/Number' - - $ref: '#/definitions/IndefiniteRange' - $ref: '#/definitions/DefiniteRange' + - $ref: '#/definitions/IndefiniteRange' + - $ref: '#/definitions/Number' description: The integral number of copies of the subject in a system allOf: - if: @@ -190,13 +192,11 @@ definitions: required: - copies - subject - - type additionalProperties: false Genotype: - description: A quantified set of _in-trans_ MolecularVariation at a genomic locus. + description: A quantified set of *in-trans* MolecularVariation at a genomic locus. type: object properties: - _id: *id001 type: type: string const: Genotype @@ -206,15 +206,16 @@ definitions: type: array uniqueItems: true minItems: 1 + ordered: false items: $ref: '#/definitions/GenotypeMember' description: Each GenotypeMember in `members` describes a MolecularVariation and the count of that variation at the locus. count: oneOf: - - $ref: '#/definitions/Number' - - $ref: '#/definitions/IndefiniteRange' - $ref: '#/definitions/DefiniteRange' + - $ref: '#/definitions/IndefiniteRange' + - $ref: '#/definitions/Number' description: The total number of copies of all MolecularVariation at this locus, MUST be greater than or equal to the sum of GenotypeMember copy counts. If greater than the total counts, this implies additional MolecularVariation @@ -222,7 +223,6 @@ definitions: required: - count - members - - type additionalProperties: false Location: description: A contiguous segment of a biological sequence. @@ -235,9 +235,6 @@ definitions: description: A Location on a chromosome defined by a species and chromosome name. type: object properties: - _id: &id002 - $ref: '#/definitions/CURIE' - description: Location Id. MUST be unique within document. type: type: string const: ChromosomeLocation @@ -259,13 +256,11 @@ definitions: - chr - interval - species_id - - type additionalProperties: false SequenceLocation: description: A Location defined by an interval on a referenced Sequence. type: object properties: - _id: *id002 type: type: string const: SequenceLocation @@ -282,7 +277,6 @@ definitions: required: - interval - sequence_id - - type additionalProperties: false SequenceInterval: description: A SequenceInterval represents a span on a Sequence. Positions are @@ -297,17 +291,17 @@ definitions: description: MUST be "SequenceInterval" start: oneOf: - - $ref: '#/definitions/Number' - - $ref: '#/definitions/IndefiniteRange' - $ref: '#/definitions/DefiniteRange' + - $ref: '#/definitions/IndefiniteRange' + - $ref: '#/definitions/Number' description: The start coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range less than the value of `end`. end: oneOf: - - $ref: '#/definitions/Number' - - $ref: '#/definitions/IndefiniteRange' - $ref: '#/definitions/DefiniteRange' + - $ref: '#/definitions/IndefiniteRange' + - $ref: '#/definitions/Number' description: The end coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range greater than the value of `start`. @@ -431,7 +425,6 @@ definitions: description: the literal Sequence expressed required: - sequence - - type additionalProperties: false DerivedSequenceExpression: description: An approximate expression of a sequence that is derived from a referenced @@ -456,7 +449,6 @@ definitions: required: - location - reverse_complement - - type additionalProperties: false RepeatedSequenceExpression: description: An expression of a sequence comprised of a tandem repeating subsequence. @@ -469,14 +461,14 @@ definitions: description: MUST be "RepeatedSequenceExpression" seq_expr: oneOf: - - $ref: '#/definitions/LiteralSequenceExpression' - $ref: '#/definitions/DerivedSequenceExpression' + - $ref: '#/definitions/LiteralSequenceExpression' description: An expression of the repeating subsequence count: oneOf: - - $ref: '#/definitions/Number' - - $ref: '#/definitions/IndefiniteRange' - $ref: '#/definitions/DefiniteRange' + - $ref: '#/definitions/IndefiniteRange' + - $ref: '#/definitions/Number' description: The count of repeated units, as an integer or inclusive range allOf: - if: @@ -514,11 +506,10 @@ definitions: required: - count - seq_expr - - type additionalProperties: false GenotypeMember: description: A class for expressing the count of a specific MolecularVariation - present _in-trans_ at a genomic locus represented by a Genotype. + present *in-trans* at a genomic locus represented by a Genotype. type: object properties: type: @@ -528,9 +519,9 @@ definitions: description: MUST be "GenotypeMember". count: oneOf: - - $ref: '#/definitions/Number' - - $ref: '#/definitions/IndefiniteRange' - $ref: '#/definitions/DefiniteRange' + - $ref: '#/definitions/IndefiniteRange' + - $ref: '#/definitions/Number' description: The number of copies of the `variation` at a Genotype locus. variation: oneOf: @@ -565,7 +556,6 @@ definitions: description: A CURIE reference to a Gene concept required: - gene_id - - type additionalProperties: false Number: description: A simple integer value as a VRS class. From dc2c85ab4130b7d116ec9c9425e3f3099bf958ce Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 10:33:59 -0400 Subject: [PATCH 35/83] update tests --- tests/config.py | 1 + tests/test_basic.py | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/config.py b/tests/config.py index 385f3123..18443c0b 100644 --- a/tests/config.py +++ b/tests/config.py @@ -4,4 +4,5 @@ schema_dir = root_dir / "schema" vrs_yaml_path = schema_dir / "vrs-source.yaml" vrs_json_path = schema_dir / "vrs.json" +vrs_merged_yaml_path = schema_dir / "merged.yaml" diff --git a/tests/test_basic.py b/tests/test_basic.py index fea4e985..1d56ee83 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -5,11 +5,10 @@ from schema.helpers import pjs_filter from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor -from config import vrs_json_path, vrs_yaml_path +from config import vrs_json_path, vrs_yaml_path, vrs_merged_yaml_path # Are the yaml and json parsable and do they match? -y = yaml.load(open(vrs_yaml_path), Loader=yaml.SafeLoader) -p = YamlSchemaProcessor(y) +p = YamlSchemaProcessor(vrs_yaml_path) j = json.load(open(vrs_json_path)) @@ -19,5 +18,5 @@ def test_json_yaml_match(): # Can pjs handle this schema? def test_pjs_smoke(): - ob = pjs.ObjectBuilder(pjs_filter(y)) + ob = pjs.ObjectBuilder(pjs_filter(j)) assert ob.build_classes() # no exception => okay From 969a52b1bb07770eca9befb5b4a7fb51da0688a2 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 10:34:23 -0400 Subject: [PATCH 36/83] remove unused imports --- tests/test_basic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_basic.py b/tests/test_basic.py index 1d56ee83..febe2904 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,11 +1,10 @@ import json import python_jsonschema_objects as pjs -import yaml from schema.helpers import pjs_filter from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor -from config import vrs_json_path, vrs_yaml_path, vrs_merged_yaml_path +from config import vrs_json_path, vrs_yaml_path # Are the yaml and json parsable and do they match? p = YamlSchemaProcessor(vrs_yaml_path) From 983f8b3cdc6e4ef9cbe4de54cb61b37e363bf1db Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 10:58:40 -0400 Subject: [PATCH 37/83] add definition --- docs/source/terms_and_model.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 98eaa7c1..41e64624 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -412,6 +412,12 @@ Two, three, or four total copies of BRCA1: Genotype $$$$$$$$ +A *genotype* is a representation of the variants present at a given genomic locus, and may be referred +to either by individual nucleotide representations (e.g. GT representation in VCF files) or symbolically +(e.g. A/B/O blood type reporting). To support these use cases, VRS genotypes enable representation of +genotypes using either :ref:`Allele` objects (as commonly done in VCF records) or larger :ref:`Haplotype` +objects (which would otherwise be represented using symbolic shorthand). + .. include:: defs/Genotype.rst **Implementation guidance** From 36b126832f665c1d7b690797db87811f06fa1b78 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 11:58:26 -0400 Subject: [PATCH 38/83] update inheritance model --- schema/defs/vrs/Allele.rst | 8 ++- schema/defs/vrs/ChromosomeLocation.rst | 8 ++- schema/defs/vrs/CopyNumber.rst | 8 ++- schema/defs/vrs/DerivedSequenceExpression.rst | 4 +- schema/defs/vrs/Gene.rst | 4 +- schema/defs/vrs/Genotype.rst | 8 ++- schema/defs/vrs/GenotypeMember.rst | 2 +- schema/defs/vrs/Haplotype.rst | 8 ++- schema/defs/vrs/LiteralSequenceExpression.rst | 4 +- .../defs/vrs/RepeatedSequenceExpression.rst | 4 +- schema/defs/vrs/SequenceLocation.rst | 8 ++- schema/defs/vrs/Text.rst | 8 ++- schema/defs/vrs/VariationSet.rst | 8 ++- schema/vrs-source.yaml | 17 ++++- schema/vrs.json | 69 +++++++++++++++---- schema/vrs.yaml | 37 ++++++++++ 16 files changed, 179 insertions(+), 26 deletions(-) diff --git a/schema/defs/vrs/Allele.rst b/schema/defs/vrs/Allele.rst index 2dc0a247..1bf84b06 100644 --- a/schema/defs/vrs/Allele.rst +++ b/schema/defs/vrs/Allele.rst @@ -4,6 +4,8 @@ The state of a molecule at a :ref:`Location`. **Information Model** +Some Allele attributes are inherited from :ref:`Variation`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -14,9 +16,13 @@ The state of a molecule at a :ref:`Location`. - Type - Limits - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. * - type - string - - 0..1 + - 1..1 - MUST be "Allele" * - location - :ref:`CURIE` | :ref:`Location` diff --git a/schema/defs/vrs/ChromosomeLocation.rst b/schema/defs/vrs/ChromosomeLocation.rst index 5e205869..91151861 100644 --- a/schema/defs/vrs/ChromosomeLocation.rst +++ b/schema/defs/vrs/ChromosomeLocation.rst @@ -4,6 +4,8 @@ A Location on a chromosome defined by a species and chromosome name. **Information Model** +Some ChromosomeLocation attributes are inherited from :ref:`Location`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -14,9 +16,13 @@ A Location on a chromosome defined by a species and chromosome name. - Type - Limits - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Location Id. MUST be unique within document. * - type - string - - 0..1 + - 1..1 - MUST be "ChromosomeLocation" * - species_id - :ref:`CURIE` diff --git a/schema/defs/vrs/CopyNumber.rst b/schema/defs/vrs/CopyNumber.rst index 110fad38..17a483cb 100644 --- a/schema/defs/vrs/CopyNumber.rst +++ b/schema/defs/vrs/CopyNumber.rst @@ -4,6 +4,8 @@ The absolute count of discrete copies of a :ref:`MolecularVariation`, :ref:`Feat **Information Model** +Some CopyNumber attributes are inherited from :ref:`Variation`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -14,9 +16,13 @@ The absolute count of discrete copies of a :ref:`MolecularVariation`, :ref:`Feat - Type - Limits - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. * - type - string - - 0..1 + - 1..1 - MUST be "CopyNumber" * - subject - :ref:`MolecularVariation` | :ref:`Feature` | :ref:`SequenceExpression` | :ref:`CURIE` diff --git a/schema/defs/vrs/DerivedSequenceExpression.rst b/schema/defs/vrs/DerivedSequenceExpression.rst index 6821f947..99421f5b 100644 --- a/schema/defs/vrs/DerivedSequenceExpression.rst +++ b/schema/defs/vrs/DerivedSequenceExpression.rst @@ -4,6 +4,8 @@ An approximate expression of a sequence that is derived from a referenced sequen **Information Model** +Some DerivedSequenceExpression attributes are inherited from :ref:`SequenceExpression`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,7 +18,7 @@ An approximate expression of a sequence that is derived from a referenced sequen - Description * - type - string - - 0..1 + - 1..1 - MUST be "DerivedSequenceExpression" * - location - :ref:`SequenceLocation` diff --git a/schema/defs/vrs/Gene.rst b/schema/defs/vrs/Gene.rst index 792ae83a..c598388b 100644 --- a/schema/defs/vrs/Gene.rst +++ b/schema/defs/vrs/Gene.rst @@ -4,6 +4,8 @@ A reference to a Gene as defined by an authority. For human genes, the use of `h **Information Model** +Some Gene attributes are inherited from :ref:`Feature`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,7 +18,7 @@ A reference to a Gene as defined by an authority. For human genes, the use of `h - Description * - type - string - - 0..1 + - 1..1 - MUST be "Gene" * - gene_id - :ref:`CURIE` diff --git a/schema/defs/vrs/Genotype.rst b/schema/defs/vrs/Genotype.rst index 74093a7a..9738064d 100644 --- a/schema/defs/vrs/Genotype.rst +++ b/schema/defs/vrs/Genotype.rst @@ -4,6 +4,8 @@ A quantified set of *in-trans* :ref:`MolecularVariation` at a genomic locus. **Information Model** +Some Genotype attributes are inherited from :ref:`Variation`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -14,9 +16,13 @@ A quantified set of *in-trans* :ref:`MolecularVariation` at a genomic locus. - Type - Limits - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. * - type - string - - 0..1 + - 1..1 - MUST be "Genotype" * - members - :ref:`GenotypeMember` diff --git a/schema/defs/vrs/GenotypeMember.rst b/schema/defs/vrs/GenotypeMember.rst index 49935e2b..39775064 100644 --- a/schema/defs/vrs/GenotypeMember.rst +++ b/schema/defs/vrs/GenotypeMember.rst @@ -16,7 +16,7 @@ A class for expressing the count of a specific :ref:`MolecularVariation` present - Description * - type - string - - 0..1 + - 1..1 - MUST be "GenotypeMember". * - count - :ref:`Number` | :ref:`IndefiniteRange` | :ref:`DefiniteRange` diff --git a/schema/defs/vrs/Haplotype.rst b/schema/defs/vrs/Haplotype.rst index 82697cdc..6202690a 100644 --- a/schema/defs/vrs/Haplotype.rst +++ b/schema/defs/vrs/Haplotype.rst @@ -4,6 +4,8 @@ A set of non-overlapping :ref:`Allele` members that co-occur on the same molecul **Information Model** +Some Haplotype attributes are inherited from :ref:`Variation`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -14,9 +16,13 @@ A set of non-overlapping :ref:`Allele` members that co-occur on the same molecul - Type - Limits - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. * - type - string - - 0..1 + - 1..1 - MUST be "Haplotype" * - members - :ref:`Allele` | :ref:`CURIE` diff --git a/schema/defs/vrs/LiteralSequenceExpression.rst b/schema/defs/vrs/LiteralSequenceExpression.rst index 9475c3e6..fe8347b0 100644 --- a/schema/defs/vrs/LiteralSequenceExpression.rst +++ b/schema/defs/vrs/LiteralSequenceExpression.rst @@ -4,6 +4,8 @@ An explicit expression of a Sequence. **Information Model** +Some LiteralSequenceExpression attributes are inherited from :ref:`SequenceExpression`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,7 +18,7 @@ An explicit expression of a Sequence. - Description * - type - string - - 0..1 + - 1..1 - MUST be "LiteralSequenceExpression" * - sequence - :ref:`Sequence` diff --git a/schema/defs/vrs/RepeatedSequenceExpression.rst b/schema/defs/vrs/RepeatedSequenceExpression.rst index b81c519f..1ac1c75a 100644 --- a/schema/defs/vrs/RepeatedSequenceExpression.rst +++ b/schema/defs/vrs/RepeatedSequenceExpression.rst @@ -4,6 +4,8 @@ An expression of a sequence comprised of a tandem repeating subsequence. **Information Model** +Some RepeatedSequenceExpression attributes are inherited from :ref:`SequenceExpression`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -16,7 +18,7 @@ An expression of a sequence comprised of a tandem repeating subsequence. - Description * - type - string - - 0..1 + - 1..1 - MUST be "RepeatedSequenceExpression" * - seq_expr - :ref:`LiteralSequenceExpression` | :ref:`DerivedSequenceExpression` diff --git a/schema/defs/vrs/SequenceLocation.rst b/schema/defs/vrs/SequenceLocation.rst index 501251c3..606bb6f7 100644 --- a/schema/defs/vrs/SequenceLocation.rst +++ b/schema/defs/vrs/SequenceLocation.rst @@ -4,6 +4,8 @@ A :ref:`Location` defined by an interval on a referenced :ref:`Sequence`. **Information Model** +Some SequenceLocation attributes are inherited from :ref:`Location`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -14,9 +16,13 @@ A :ref:`Location` defined by an interval on a referenced :ref:`Sequence`. - Type - Limits - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Location Id. MUST be unique within document. * - type - string - - 0..1 + - 1..1 - MUST be "SequenceLocation" * - sequence_id - :ref:`CURIE` diff --git a/schema/defs/vrs/Text.rst b/schema/defs/vrs/Text.rst index 5e2e1f29..31b13b7c 100644 --- a/schema/defs/vrs/Text.rst +++ b/schema/defs/vrs/Text.rst @@ -4,6 +4,8 @@ A free-text definition of variation. **Information Model** +Some Text attributes are inherited from :ref:`Variation`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -14,9 +16,13 @@ A free-text definition of variation. - Type - Limits - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. * - type - string - - 0..1 + - 1..1 - MUST be "Text" * - definition - string diff --git a/schema/defs/vrs/VariationSet.rst b/schema/defs/vrs/VariationSet.rst index 15cef5a7..9f365389 100644 --- a/schema/defs/vrs/VariationSet.rst +++ b/schema/defs/vrs/VariationSet.rst @@ -4,6 +4,8 @@ An unconstrained set of Variation members. **Information Model** +Some VariationSet attributes are inherited from :ref:`Variation`. + .. list-table:: :class: clean-wrap :header-rows: 1 @@ -14,9 +16,13 @@ An unconstrained set of Variation members. - Type - Limits - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. * - type - string - - 0..1 + - 1..1 - MUST be "VariationSet" * - members - :ref:`CURIE` | :ref:`Variation` diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 568d1fc8..54dd9597 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -44,6 +44,7 @@ definitions: propertyName: type MolecularVariation: + inherits: Variation description: >- A :ref:`variation` on a contiguous molecule. oneOf: @@ -53,6 +54,7 @@ definitions: propertyName: type UtilityVariation: + inherits: Variation description: >- A collection of :ref:`Variation` subclasses that cannot be constrained to a specific class of biological variation, but @@ -64,6 +66,7 @@ definitions: propertyName: type SystemicVariation: + inherits: Variation description: >- A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes. @@ -82,6 +85,7 @@ definitions: # Molecular Variation Allele: + inherits: MolecularVariation description: >- The state of a molecule at a :ref:`Location`. type: object @@ -109,6 +113,7 @@ definitions: required: [ "location", "state" ] Haplotype: + inherits: MolecularVariation description: >- A set of non-overlapping :ref:`Allele` members that co-occur on the same molecule. type: "object" @@ -137,6 +142,7 @@ definitions: # UtilityVariation Text: + inherits: UtilityVariation description: >- A free-text definition of variation. type: object @@ -154,6 +160,7 @@ definitions: required: [ "definition" ] VariationSet: + inherits: UtilityVariation description: >- An unconstrained set of Variation members. type: object @@ -181,6 +188,7 @@ definitions: # SystemicVariation CopyNumber: + inherits: SystemicVariation type: object description: >- The absolute count of discrete copies of a :ref:`MolecularVariation`, @@ -245,6 +253,7 @@ definitions: required: [ "subject", "copies" ] Genotype: + inherits: SystemicVariation description: >- A quantified set of *in-trans* :ref:`MolecularVariation` at a genomic locus. type: object @@ -303,6 +312,7 @@ definitions: propertyName: type ChromosomeLocation: + inherits: Location description: >- A Location on a chromosome defined by a species and chromosome name. type: object @@ -331,6 +341,7 @@ definitions: required: [ "species_id", "chr", "interval" ] SequenceLocation: + inherits: Location description: >- A :ref:`Location` defined by an interval on a referenced :ref:`Sequence`. type: object @@ -510,6 +521,7 @@ definitions: heritable_required: ["type"] LiteralSequenceExpression: + inherits: SequenceExpression description: >- An explicit expression of a Sequence. type: object @@ -525,6 +537,7 @@ definitions: required: [ "sequence" ] DerivedSequenceExpression: + inherits: SequenceExpression description: >- An approximate expression of a sequence that is derived from a referenced sequence location. Use of this class @@ -551,6 +564,7 @@ definitions: required: [ "location", "reverse_complement" ] RepeatedSequenceExpression: + inherits: SequenceExpression description: >- An expression of a sequence comprised of a tandem repeating subsequence. type: object @@ -635,7 +649,7 @@ definitions: - $ref: "#/definitions/Haplotype" description: >- A :ref:`MolecularVariation` at a :ref:`Genotype` locus. - required: [ "count", "variation" ] + required: [ "type", "count", "variation" ] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Feature @@ -657,6 +671,7 @@ definitions: heritable_required: [ "type" ] Gene: + inherits: Feature description: >- A reference to a Gene as defined by an authority. For human genes, the use of `hgnc `_ diff --git a/schema/vrs.json b/schema/vrs.json index 775553a5..f3da11f3 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -66,6 +66,10 @@ "description": "The state of a molecule at a Location.", "type": "object", "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Variation Id. MUST be unique within document." + }, "type": { "type": "string", "const": "Allele", @@ -111,7 +115,8 @@ }, "required": [ "location", - "state" + "state", + "type" ], "additionalProperties": false }, @@ -119,6 +124,10 @@ "description": "A set of non-overlapping Allele members that co-occur on the same molecule.", "type": "object", "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Variation Id. MUST be unique within document." + }, "type": { "type": "string", "const": "Haplotype", @@ -144,7 +153,8 @@ } }, "required": [ - "members" + "members", + "type" ], "additionalProperties": false }, @@ -152,6 +162,10 @@ "description": "A free-text definition of variation.", "type": "object", "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Variation Id. MUST be unique within document." + }, "type": { "type": "string", "const": "Text", @@ -164,7 +178,8 @@ } }, "required": [ - "definition" + "definition", + "type" ], "additionalProperties": false }, @@ -172,6 +187,10 @@ "description": "An unconstrained set of Variation members.", "type": "object", "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Variation Id. MUST be unique within document." + }, "type": { "type": "string", "const": "VariationSet", @@ -211,7 +230,8 @@ } }, "required": [ - "members" + "members", + "type" ], "additionalProperties": false }, @@ -219,6 +239,10 @@ "type": "object", "description": "The absolute count of discrete copies of a MolecularVariation, Feature, SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.).", "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Variation Id. MUST be unique within document." + }, "type": { "type": "string", "const": "CopyNumber", @@ -333,7 +357,8 @@ ], "required": [ "copies", - "subject" + "subject", + "type" ], "additionalProperties": false }, @@ -341,6 +366,10 @@ "description": "A quantified set of *in-trans* MolecularVariation at a genomic locus.", "type": "object", "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Variation Id. MUST be unique within document." + }, "type": { "type": "string", "const": "Genotype", @@ -374,7 +403,8 @@ }, "required": [ "count", - "members" + "members", + "type" ], "additionalProperties": false }, @@ -396,6 +426,10 @@ "description": "A Location on a chromosome defined by a species and chromosome name.", "type": "object", "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Location Id. MUST be unique within document." + }, "type": { "type": "string", "const": "ChromosomeLocation", @@ -419,7 +453,8 @@ "required": [ "chr", "interval", - "species_id" + "species_id", + "type" ], "additionalProperties": false }, @@ -427,6 +462,10 @@ "description": "A Location defined by an interval on a referenced Sequence.", "type": "object", "properties": { + "_id": { + "$ref": "#/definitions/CURIE", + "description": "Location Id. MUST be unique within document." + }, "type": { "type": "string", "const": "SequenceLocation", @@ -451,7 +490,8 @@ }, "required": [ "interval", - "sequence_id" + "sequence_id", + "type" ], "additionalProperties": false }, @@ -693,7 +733,8 @@ } }, "required": [ - "sequence" + "sequence", + "type" ], "additionalProperties": false }, @@ -718,7 +759,8 @@ }, "required": [ "location", - "reverse_complement" + "reverse_complement", + "type" ], "additionalProperties": false }, @@ -825,7 +867,8 @@ ], "required": [ "count", - "seq_expr" + "seq_expr", + "type" ], "additionalProperties": false }, @@ -867,6 +910,7 @@ }, "required": [ "count", + "type", "variation" ], "additionalProperties": false @@ -898,7 +942,8 @@ } }, "required": [ - "gene_id" + "gene_id", + "type" ], "additionalProperties": false }, diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 1ac58a4f..6cf9bd77 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -38,6 +38,9 @@ definitions: description: The state of a molecule at a Location. type: object properties: + _id: + $ref: '#/definitions/CURIE' + description: Variation Id. MUST be unique within document. type: type: string const: Allele @@ -61,12 +64,16 @@ definitions: required: - location - state + - type additionalProperties: false Haplotype: description: A set of non-overlapping Allele members that co-occur on the same molecule. type: object properties: + _id: + $ref: '#/definitions/CURIE' + description: Variation Id. MUST be unique within document. type: type: string const: Haplotype @@ -85,11 +92,15 @@ definitions: Haplotype. required: - members + - type additionalProperties: false Text: description: A free-text definition of variation. type: object properties: + _id: + $ref: '#/definitions/CURIE' + description: Variation Id. MUST be unique within document. type: type: string const: Text @@ -101,11 +112,15 @@ definitions: subclasses of Variation. required: - definition + - type additionalProperties: false VariationSet: description: An unconstrained set of Variation members. type: object properties: + _id: + $ref: '#/definitions/CURIE' + description: Variation Id. MUST be unique within document. type: type: string const: VariationSet @@ -128,6 +143,7 @@ definitions: but MAY be empty. required: - members + - type additionalProperties: false CopyNumber: type: object @@ -135,6 +151,9 @@ definitions: SequenceExpression, or a CURIE reference within a system (e.g. genome, cell, etc.). properties: + _id: + $ref: '#/definitions/CURIE' + description: Variation Id. MUST be unique within document. type: type: string const: CopyNumber @@ -192,11 +211,15 @@ definitions: required: - copies - subject + - type additionalProperties: false Genotype: description: A quantified set of *in-trans* MolecularVariation at a genomic locus. type: object properties: + _id: + $ref: '#/definitions/CURIE' + description: Variation Id. MUST be unique within document. type: type: string const: Genotype @@ -223,6 +246,7 @@ definitions: required: - count - members + - type additionalProperties: false Location: description: A contiguous segment of a biological sequence. @@ -235,6 +259,9 @@ definitions: description: A Location on a chromosome defined by a species and chromosome name. type: object properties: + _id: + $ref: '#/definitions/CURIE' + description: Location Id. MUST be unique within document. type: type: string const: ChromosomeLocation @@ -256,11 +283,15 @@ definitions: - chr - interval - species_id + - type additionalProperties: false SequenceLocation: description: A Location defined by an interval on a referenced Sequence. type: object properties: + _id: + $ref: '#/definitions/CURIE' + description: Location Id. MUST be unique within document. type: type: string const: SequenceLocation @@ -277,6 +308,7 @@ definitions: required: - interval - sequence_id + - type additionalProperties: false SequenceInterval: description: A SequenceInterval represents a span on a Sequence. Positions are @@ -425,6 +457,7 @@ definitions: description: the literal Sequence expressed required: - sequence + - type additionalProperties: false DerivedSequenceExpression: description: An approximate expression of a sequence that is derived from a referenced @@ -449,6 +482,7 @@ definitions: required: - location - reverse_complement + - type additionalProperties: false RepeatedSequenceExpression: description: An expression of a sequence comprised of a tandem repeating subsequence. @@ -506,6 +540,7 @@ definitions: required: - count - seq_expr + - type additionalProperties: false GenotypeMember: description: A class for expressing the count of a specific MolecularVariation @@ -530,6 +565,7 @@ definitions: description: A MolecularVariation at a Genotype locus. required: - count + - type - variation additionalProperties: false Feature: @@ -556,6 +592,7 @@ definitions: description: A CURIE reference to a Gene concept required: - gene_id + - type additionalProperties: false Number: description: A simple integer value as a VRS class. From 6edd4e7d89a4a494702b3831d109ad9061a7fa7a Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 13:45:46 -0400 Subject: [PATCH 39/83] update note --- docs/source/terms_and_model.rst | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 41e64624..e968ae2d 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -463,6 +463,7 @@ SO: `Genotype (SO:0001027) .. _genotypes-represent-haplotypes-with-arbitrary-ploidy: .. note:: Genotypes represent Haplotypes with arbitrary ploidy + The VRS defines Haplotypes as a list of Alleles, and Genotypes as a list of Haplotypes. In essence, Haplotypes and Genotypes represent two distinct dimensions of containment: Haplotypes represent the "in @@ -473,19 +474,16 @@ SO: `Genotype (SO:0001027) single-location Genotype. Users of SNP data will be familiar with representations like rs7412 C/C, which indicates the diploid state at a position. In the VRS, this is merely a special case of a - Genotype with two Haplotypes, each of which is defined with only one - Allele (the same Allele in this case). The VRS does not define a - diplotype type. A diplotype is a special case of a VRS Genotype - with exactly two Haplotypes. In practice, software data types that - assume a ploidy of 2 make it very difficult to represent haploid + Genotype with one GenotypeMember, defined by a single Allele with + two copies. The VRS does not define a diplotype class. A diplotype + is a special case of a VRS Genotype with count = 2. In practice, software + data types that assume a ploidy of 2 make it very difficult to represent haploid states, copy number loss, and copy number gain, all of which occur - when representing human data. In addition, assuming ploidy=2 makes + when representing human data. In addition, inferred ploidy = 2 makes software incompatible with organisms with other ploidy. The VRS - makes no assumptions about "normal" ploidy. - - In other words, the VRS does not represent single-position - Genotypes or diplotypes because both concepts are subsumed by the - Allele, Haplotype, and Genotypes entities. + requires explicit definition of the in-trans molecules at a genomic locus + with the `count` attribute, though this count may be inexact (e.g. a + :ref:`DefiniteRange` or :ref:`IndefiniteRange`. .. _UtilityVariation: From ed3306afb853821ee678b1e82ffb1920cd701fa0 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 13:50:22 -0400 Subject: [PATCH 40/83] remove note intro --- docs/source/terms_and_model.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index e968ae2d..53d26ce3 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -462,8 +462,7 @@ SO: `Genotype (SO:0001027) .. _genotypes-represent-haplotypes-with-arbitrary-ploidy: -.. note:: Genotypes represent Haplotypes with arbitrary ploidy - +.. note:: The VRS defines Haplotypes as a list of Alleles, and Genotypes as a list of Haplotypes. In essence, Haplotypes and Genotypes represent two distinct dimensions of containment: Haplotypes represent the "in From 429087e82a1cacb92d088052eb52dc1614f2808f Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 13:52:45 -0400 Subject: [PATCH 41/83] reverse the VRS --- docs/source/terms_and_model.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 53d26ce3..bb90d9fe 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -433,12 +433,12 @@ objects (which would otherwise be represented using symbolic shorthand). location and often with a ploidy of two, such as a pair of single residue variants on an autosome. The broader, generalized definition is a set of alleles at multiple locations and/or with ploidy other than - two. The VRS Genotype entity is based on this broader definition. + two. VRS Genotype entity is based on this broader definition. * The term "diplotype" is often used to refer to two in-trans haplotypes at a locus. - The VRS Genotype entity subsumes the conventional definition of diplotype. Therefore, - the VRS model does not include an explicit entity for diplotypes. See :ref:`this note + VRS Genotype entity subsumes the conventional definition of diplotype. Therefore, + VRS does not include an explicit entity for diplotypes. See :ref:`this note ` for a discussion. -* The VRS model makes no assumptions about ploidy of an organism or individual nor any +* VRS makes no assumptions about ploidy of an organism or individual nor any polysomy affecting a locus. The `genotype.count` attribute explicitly captures the total count of in-trans molecules at a genomic locus represented by the Genotype. * In diploid organisms, there are typically two instances of each autosomal chromosome, @@ -463,7 +463,7 @@ SO: `Genotype (SO:0001027) .. _genotypes-represent-haplotypes-with-arbitrary-ploidy: .. note:: - The VRS defines Haplotypes as a list of Alleles, and Genotypes as + VRS defines Haplotypes as a list of Alleles, and Genotypes as a list of Haplotypes. In essence, Haplotypes and Genotypes represent two distinct dimensions of containment: Haplotypes represent the "in phase" relationship of Alleles while Genotypes represents sets of @@ -472,14 +472,14 @@ SO: `Genotype (SO:0001027) There are two important consequences of these definitions: There is no single-location Genotype. Users of SNP data will be familiar with representations like rs7412 C/C, which indicates the diploid state at - a position. In the VRS, this is merely a special case of a + a position. In VRS, this is merely a special case of a Genotype with one GenotypeMember, defined by a single Allele with - two copies. The VRS does not define a diplotype class. A diplotype + two copies. VRS does not define a diplotype class. A diplotype is a special case of a VRS Genotype with count = 2. In practice, software data types that assume a ploidy of 2 make it very difficult to represent haploid states, copy number loss, and copy number gain, all of which occur when representing human data. In addition, inferred ploidy = 2 makes - software incompatible with organisms with other ploidy. The VRS + software incompatible with organisms with other ploidy. VRS requires explicit definition of the in-trans molecules at a genomic locus with the `count` attribute, though this count may be inexact (e.g. a :ref:`DefiniteRange` or :ref:`IndefiniteRange`. @@ -1237,7 +1237,7 @@ derived from the IUPAC one-letter nucleic acid and amino acid codes. to define an :ref:`Allele`. A Sequence that replaces another Sequence is called a "replacement sequence". * In some contexts outside VRS, "reference sequence" may refer - to a member of set of sequences that comprise a genome assembly. In the VRS + to a member of set of sequences that comprise a genome assembly. In VRS specification, any sequence may be a "reference sequence", including those in a genome assembly. * For the purposes of representing sequence variation, it is not From 63d86d1ce0a83d7da7f2479774adfb3c8cd13747 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 12 Sep 2022 22:03:01 -0400 Subject: [PATCH 42/83] addresses https://github.com/ga4gh/vrs/pull/394#discussion_r932147364 --- docs/source/terms_and_model.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index bb90d9fe..e5ff313b 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -445,10 +445,12 @@ objects (which would otherwise be represented using symbolic shorthand). and therefore two instances of sequence at a particular location. Thus, Genotypes will often list two GenotypeMembers each based on a distinct Haplotype or Allele. In the case of haploid chromosomes or haploinsufficiency, the Genotype consists of a single GenotypeMember. -* A consequence of the computational definition is that Haplotypes at - overlapping or adjacent intervals MUST NOT be included in the same - Genotype. However, two or more Alleles MAY always be rewritten as an - equivalent Allele with a common sequence and interval context. +* A consequence of the computational definition is that in-cis Haplotypes at overlapping or + adjacent intervals MUST be merged into a single Haplotype for the same Genotype. +* A `GenotypeMember.variation` value MUST be unique among Genotype Members within a Genotype. + When more than one Genotype Member would have the same `variation` value (e.g. in the case + of a homozygous variant), this would be represented as a Genotype Value with a corresponding + `count` (i.e. for a diploid homozygous variant, `GenotypeMember.count = 2`). * The rationale for permitting Genotypes with Haplotypes defined on different reference sequences is to enable the accurate representation of segments of DNA with the most appropriate @@ -463,8 +465,8 @@ SO: `Genotype (SO:0001027) .. _genotypes-represent-haplotypes-with-arbitrary-ploidy: .. note:: - VRS defines Haplotypes as a list of Alleles, and Genotypes as - a list of Haplotypes. In essence, Haplotypes and Genotypes represent + VRS defines Genotypes as a list of GenotypeMembers defined by Haplotypes + or Alleles. In essence, Haplotypes and Genotypes represent two distinct dimensions of containment: Haplotypes represent the "in phase" relationship of Alleles while Genotypes represents sets of Haplotypes of arbitrary ploidy. From 3984b52125a352e1baee8994a257b6117e776777 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 3 Oct 2022 16:59:53 -0400 Subject: [PATCH 43/83] closes #401 --- docs/source/terms_and_model.rst | 33 ++++++++++++++++----------------- schema/defs/vrs/Genotype.rst | 4 ++-- schema/vrs-source.yaml | 11 ++++++----- schema/vrs.json | 4 ++-- schema/vrs.yaml | 10 ++++++---- 5 files changed, 32 insertions(+), 30 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index e5ff313b..1f4326ba 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -267,11 +267,8 @@ genetic markers that tend to be transmitted together. * The locations of Alleles within the Haplotype MUST be interpreted independently. Alleles that create a net insertion or deletion of sequence MUST NOT change the location of "downstream" Alleles. -* The `members` attribute is required and MUST contain at least one - Allele. -* Haplotypes with one Allele are intended to be distinct entities from - the Allele by itself. See discussion on :ref:`equivalence`. - +* The `members` attribute is required and MUST contain at least two + Alleles. **Sources** @@ -435,14 +432,15 @@ objects (which would otherwise be represented using symbolic shorthand). set of alleles at multiple locations and/or with ploidy other than two. VRS Genotype entity is based on this broader definition. * The term "diplotype" is often used to refer to two in-trans haplotypes at a locus. - VRS Genotype entity subsumes the conventional definition of diplotype. Therefore, + VRS Genotype entity subsumes the conventional definition of diplotype, though + it describes no explicit in-trans phase relationship. Therefore, VRS does not include an explicit entity for diplotypes. See :ref:`this note ` for a discussion. * VRS makes no assumptions about ploidy of an organism or individual nor any polysomy affecting a locus. The `genotype.count` attribute explicitly captures the total - count of in-trans molecules at a genomic locus represented by the Genotype. + count of molecules associated with a genomic locus represented by the Genotype. * In diploid organisms, there are typically two instances of each autosomal chromosome, - and therefore two instances of sequence at a particular location. Thus, Genotypes will + and therefore two instances of sequence at a particular locus. Thus, Genotypes will often list two GenotypeMembers each based on a distinct Haplotype or Allele. In the case of haploid chromosomes or haploinsufficiency, the Genotype consists of a single GenotypeMember. * A consequence of the computational definition is that in-cis Haplotypes at overlapping or @@ -451,10 +449,11 @@ objects (which would otherwise be represented using symbolic shorthand). When more than one Genotype Member would have the same `variation` value (e.g. in the case of a homozygous variant), this would be represented as a Genotype Value with a corresponding `count` (i.e. for a diploid homozygous variant, `GenotypeMember.count = 2`). -* The rationale for permitting Genotypes with Haplotypes defined on - different reference sequences is to enable the accurate - representation of segments of DNA with the most appropriate - population-specific reference sequence. +* The rationale for permitting Genotypes with Haplotypes defined on different reference + sequences is to enable the accurate representation of segments of DNA with the most + appropriate population-specific reference sequence. +* Deletion of sequence at locus would be represented by the presence of Alleles of deleted + sequence, not absence of Alleles; therefore Genotypes MAY NOT have count < 1. **Sources** @@ -465,8 +464,8 @@ SO: `Genotype (SO:0001027) .. _genotypes-represent-haplotypes-with-arbitrary-ploidy: .. note:: - VRS defines Genotypes as a list of GenotypeMembers defined by Haplotypes - or Alleles. In essence, Haplotypes and Genotypes represent + VRS defines Genotypes using a list of GenotypeMembers defined by + Haplotypes or Alleles. In essence, Haplotypes and Genotypes represent two distinct dimensions of containment: Haplotypes represent the "in phase" relationship of Alleles while Genotypes represents sets of Haplotypes of arbitrary ploidy. @@ -482,9 +481,9 @@ SO: `Genotype (SO:0001027) states, copy number loss, and copy number gain, all of which occur when representing human data. In addition, inferred ploidy = 2 makes software incompatible with organisms with other ploidy. VRS - requires explicit definition of the in-trans molecules at a genomic locus - with the `count` attribute, though this count may be inexact (e.g. a - :ref:`DefiniteRange` or :ref:`IndefiniteRange`. + requires explicit definition of the count of molecules associated with + a genomic locus using the `count` attribute, though this count may be inexact + (e.g. a :ref:`DefiniteRange` or :ref:`IndefiniteRange`). .. _UtilityVariation: diff --git a/schema/defs/vrs/Genotype.rst b/schema/defs/vrs/Genotype.rst index 9738064d..d599a862 100644 --- a/schema/defs/vrs/Genotype.rst +++ b/schema/defs/vrs/Genotype.rst @@ -1,6 +1,6 @@ **Computational Definition** -A quantified set of *in-trans* :ref:`MolecularVariation` at a genomic locus. +A quantified set of :ref:`MolecularVariation` associated with a genomic locus. **Information Model** @@ -31,4 +31,4 @@ Some Genotype attributes are inherited from :ref:`Variation`. * - count - :ref:`Number` | :ref:`IndefiniteRange` | :ref:`DefiniteRange` - 1..1 - - The total number of copies of all :ref:`MolecularVariation` at this locus, MUST be greater than or equal to the sum of :ref:`GenotypeMember` copy counts. If greater than the total counts, this implies additional :ref:`MolecularVariation` that are expected to exist but are not explicitly indicated. + - The total number of copies of all :ref:`MolecularVariation` at this locus, MUST be greater than or equal to the sum of :ref:`GenotypeMember` copy counts and MUST be greater than or equal to 1. If greater than the total of GenotypeMember counts, this field describes additional :ref:`MolecularVariation` that exist but are not explicitly described. diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 54dd9597..8f22d0cf 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -255,7 +255,7 @@ definitions: Genotype: inherits: SystemicVariation description: >- - A quantified set of *in-trans* :ref:`MolecularVariation` at a genomic locus. + A quantified set of :ref:`MolecularVariation` associated with a genomic locus. type: object properties: type: @@ -281,10 +281,11 @@ definitions: - $ref: "#/definitions/DefiniteRange" description: >- The total number of copies of all :ref:`MolecularVariation` at this locus, - MUST be greater than or equal to the sum of :ref:`GenotypeMember` copy counts. - If greater than the total counts, this implies additional - :ref:`MolecularVariation` that are expected to exist but are not explicitly - indicated. + MUST be greater than or equal to the sum of :ref:`GenotypeMember` copy counts + and MUST be greater than or equal to 1. + If greater than the total of GenotypeMember counts, this field describes + additional :ref:`MolecularVariation` that exist but are not + explicitly described. required: [ "members", "count" ] diff --git a/schema/vrs.json b/schema/vrs.json index f3da11f3..2ca4b27b 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -363,7 +363,7 @@ "additionalProperties": false }, "Genotype": { - "description": "A quantified set of *in-trans* MolecularVariation at a genomic locus.", + "description": "A quantified set of MolecularVariation associated with a genomic locus.", "type": "object", "properties": { "_id": { @@ -398,7 +398,7 @@ "$ref": "#/definitions/Number" } ], - "description": "The total number of copies of all MolecularVariation at this locus, MUST be greater than or equal to the sum of GenotypeMember copy counts. If greater than the total counts, this implies additional MolecularVariation that are expected to exist but are not explicitly indicated." + "description": "The total number of copies of all MolecularVariation at this locus, MUST be greater than or equal to the sum of GenotypeMember copy counts and MUST be greater than or equal to 1. If greater than the total of GenotypeMember counts, this field describes additional MolecularVariation that exist but are not explicitly described." } }, "required": [ diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 6cf9bd77..daa2cf8a 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -214,7 +214,8 @@ definitions: - type additionalProperties: false Genotype: - description: A quantified set of *in-trans* MolecularVariation at a genomic locus. + description: A quantified set of MolecularVariation associated with a genomic + locus. type: object properties: _id: @@ -240,9 +241,10 @@ definitions: - $ref: '#/definitions/IndefiniteRange' - $ref: '#/definitions/Number' description: The total number of copies of all MolecularVariation at this - locus, MUST be greater than or equal to the sum of GenotypeMember copy counts. - If greater than the total counts, this implies additional MolecularVariation - that are expected to exist but are not explicitly indicated. + locus, MUST be greater than or equal to the sum of GenotypeMember copy counts + and MUST be greater than or equal to 1. If greater than the total of GenotypeMember + counts, this field describes additional MolecularVariation that exist but + are not explicitly described. required: - count - members From bd172bfda6082977aa8b6ba126f2c9d2725c1fae Mon Sep 17 00:00:00 2001 From: korikuzma Date: Mon, 3 Oct 2022 17:49:49 -0400 Subject: [PATCH 44/83] fix: get smoketests to pass - update python-jsonschema-objects version to >=0.4.0 - fix typo in CytobandInterval end - Regenerate vrs yaml/json to update members to have 2 min items --- .requirements.txt | 2 +- schema/defs/vrs/CytobandInterval.rst | 2 +- schema/defs/vrs/Haplotype.rst | 2 +- schema/vrs-source.yaml | 2 +- schema/vrs.json | 4 ++-- schema/vrs.yaml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.requirements.txt b/.requirements.txt index 65e0c4fb..92b4c292 100644 --- a/.requirements.txt +++ b/.requirements.txt @@ -1,5 +1,5 @@ pytest -python-jsonschema-objects>=0.3,<=0.3.10 +python-jsonschema-objects>=0.4.0 jsonschema==3.2.0 ipython pyyaml diff --git a/schema/defs/vrs/CytobandInterval.rst b/schema/defs/vrs/CytobandInterval.rst index 1cd1a7d1..460de65e 100644 --- a/schema/defs/vrs/CytobandInterval.rst +++ b/schema/defs/vrs/CytobandInterval.rst @@ -25,4 +25,4 @@ A contiguous span on a chromosome defined by cytoband features. The span include * - end - :ref:`HumanCytoband` - 1..1 - - The start cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`. + - The end cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`. diff --git a/schema/defs/vrs/Haplotype.rst b/schema/defs/vrs/Haplotype.rst index c36e22f4..6202690a 100644 --- a/schema/defs/vrs/Haplotype.rst +++ b/schema/defs/vrs/Haplotype.rst @@ -26,5 +26,5 @@ Some Haplotype attributes are inherited from :ref:`Variation`. - MUST be "Haplotype" * - members - :ref:`Allele` | :ref:`CURIE` - - 1..m + - 2..m - List of Alleles, or references to Alleles, that comprise this Haplotype. diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 9c482af0..025665cb 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -482,7 +482,7 @@ definitions: end: $ref: "#/definitions/HumanCytoband" description: >- - The start cytoband region. MUST specify a region nearer the + The end cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`. example: type: CytobandInterval diff --git a/schema/vrs.json b/schema/vrs.json index 653092d0..e23c0926 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -128,7 +128,7 @@ }, "members": { "type": "array", - "minItems": 1, + "minItems": 2, "uniqueItems": true, "items": { "oneOf": [ @@ -657,7 +657,7 @@ }, "end": { "$ref": "#/definitions/HumanCytoband", - "description": "The start cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`." + "description": "The end cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`." } }, "example": { diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 7d87125f..302f834c 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -77,7 +77,7 @@ definitions: description: MUST be "Haplotype" members: type: array - minItems: 1 + minItems: 2 uniqueItems: true items: oneOf: From 40ef2af31ed5667149945a45765081bbe5dc46b1 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Mon, 3 Oct 2022 18:49:14 -0400 Subject: [PATCH 45/83] addresses https://github.com/ga4gh/vrs/pull/394#discussion_r986247682 --- docs/source/terms_and_model.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 1f4326ba..28e6991c 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -443,6 +443,9 @@ objects (which would otherwise be represented using symbolic shorthand). and therefore two instances of sequence at a particular locus. Thus, Genotypes will often list two GenotypeMembers each based on a distinct Haplotype or Allele. In the case of haploid chromosomes or haploinsufficiency, the Genotype consists of a single GenotypeMember. +* A specific (heterozygous) diplotype SHOULD be represented as a Genotype of two GenotypeMember + instances each containing a constituent :ref:`Haplotype`. A homozygous diplotype SHOULD be + represented as a Genotype of one constituent GenotypeMember (with `GenotypeMember.count=2`). * A consequence of the computational definition is that in-cis Haplotypes at overlapping or adjacent intervals MUST be merged into a single Haplotype for the same Genotype. * A `GenotypeMember.variation` value MUST be unique among Genotype Members within a Genotype. From ffdc666a88a0ef7d452c3cb742c5db796510b396 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Fri, 7 Oct 2022 10:58:49 -0400 Subject: [PATCH 46/83] update metaschema proc version --- .requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.requirements.txt b/.requirements.txt index 0c406467..2d7e26be 100644 --- a/.requirements.txt +++ b/.requirements.txt @@ -3,5 +3,5 @@ python-jsonschema-objects>=0.3,<=0.3.10 jsonschema==3.2.0 ipython pyyaml -ga4gh.gks.metaschema==0.2.0rc3 +ga4gh.gks.metaschema==0.2.0rc4 sphinx ~= 3.5 \ No newline at end of file From 0f72199544c5249bfdf17d36e993478ac9e98a8a Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Fri, 7 Oct 2022 11:01:08 -0400 Subject: [PATCH 47/83] enable pre-releases --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c51c25bf..5a07ad18 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,7 +20,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools - pip install -r .requirements.txt + pip install -r --pre .requirements.txt - name: Test with pytest run: | From 90893a4ad2db2b0c353c8f60ff275c0462999495 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Fri, 7 Oct 2022 11:02:28 -0400 Subject: [PATCH 48/83] fix pip install command --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5a07ad18..31f83610 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,7 +20,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools - pip install -r --pre .requirements.txt + pip install --pre -r .requirements.txt - name: Test with pytest run: | From 2a718ab59aad994282655caf78d982710f445edb Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Fri, 7 Oct 2022 11:06:59 -0400 Subject: [PATCH 49/83] merge gt & rcn docs --- docs/source/terms_and_model.rst | 58 ++++++++++++++++----------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index af554648..72bb86c2 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -404,7 +404,34 @@ Two, three, or four total copies of BRCA1: "type": "AbsoluteCopyNumber" } -<<<<<<< HEAD +.. _RelativeCopyNumber: + +RelativeCopyNumber +$$$$$$$$$$$$$$$$$$ + +*Relative Copy Number Variation* captures a classification of copies +of a molecule within a system, relative to a baseline. These types +of Variation are common outputs from CNV callers, particularly in the +somatic domain where Absolute Copy Counts are difficult to estimate +and less useful in practice than relative statements. + +.. include:: defs/RelativeCopyNumber.rst + +**Examples** + +Low-level copy gain of BRCA1: + +.. parsed-literal:: + + { + "relative_copy_class": "low-level gain", + "subject": { + "gene_id": "ncbigene:348", + "type": "Gene" + }, + "type": "RelativeCopyNumber" + } + .. _genotype: Genotype @@ -488,35 +515,6 @@ SO: `Genotype (SO:0001027) requires explicit definition of the count of molecules associated with a genomic locus using the `count` attribute, though this count may be inexact (e.g. a :ref:`DefiniteRange` or :ref:`IndefiniteRange`). -======= -.. _RelativeCopyNumber: - -RelativeCopyNumber -$$$$$$$$$$$$$$$$$$ - -*Relative Copy Number Variation* captures a classification of copies -of a molecule within a system, relative to a baseline. These types -of Variation are common outputs from CNV callers, particularly in the -somatic domain where Absolute Copy Counts are difficult to estimate -and less useful in practice than relative statements. - -.. include:: defs/RelativeCopyNumber.rst - -**Examples** - -Low-level copy gain of BRCA1: - -.. parsed-literal:: - - { - "relative_copy_class": "low-level gain", - "subject": { - "gene_id": "ncbigene:348", - "type": "Gene" - }, - "type": "RelativeCopyNumber" - } ->>>>>>> main .. _UtilityVariation: From 92e88fd3d52ac0c091ee84df0a524ac4cdcf9600 Mon Sep 17 00:00:00 2001 From: Alex Handler Wagner Date: Fri, 7 Oct 2022 11:19:47 -0400 Subject: [PATCH 50/83] update vrs.yaml --- schema/vrs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/vrs.yaml b/schema/vrs.yaml index cd7a0c5b..b9cc4723 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -366,7 +366,7 @@ definitions: end (telomere) of the chromosome p-arm than `end`. end: $ref: '#/definitions/HumanCytoband' - description: The start cytoband region. MUST specify a region nearer the terminal + description: The end cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`. example: type: CytobandInterval From 8e348eaf8da570f8aadef2c40e224ba97efd82b7 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Mon, 24 Oct 2022 09:00:43 -0400 Subject: [PATCH 51/83] fix: absolute + relative copy number models in models.yaml subject is now a CURIE --- validation/models.yaml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/validation/models.yaml b/validation/models.yaml index 91be8488..2386d614 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -269,26 +269,22 @@ AbsoluteCopyNumber: comparator: '>=' type: IndefiniteRange value: 3 - subject: - gene_id: ncbigene:348 - type: Gene + subject: ncbigene:348 type: AbsoluteCopyNumber out: - ga4gh_digest: 5DNZrhIFslE6Eeo0CsDyQQERR6x7v9OE - ga4gh_identify: ga4gh:VAC.5DNZrhIFslE6Eeo0CsDyQQERR6x7v9OE - ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":{"gene_id":"ncbigene:348","type":"Gene"},"type":"AbsoluteCopyNumber"}' + ga4gh_digest: jwTswRixKm46exoepUPTnCvKCJIWDO3j + ga4gh_identify: ga4gh:VAC.jwTswRixKm46exoepUPTnCvKCJIWDO3j + ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"ncbigene:348","type":"AbsoluteCopyNumber"}' RelativeCopyNumber: - name: "Low-level copy gain of BRCA1" in: relative_copy_class: low-level gain - subject: - gene_id: ncbigene:348 - type: Gene + subject: ncbigene:348 type: RelativeCopyNumber out: - ga4gh_digest: CUf7xZJd36mapcm0h3cOZR8kjM_Aj4UV - ga4gh_identify: ga4gh:VRC.CUf7xZJd36mapcm0h3cOZR8kjM_Aj4UV - ga4gh_serialize: '{"relative_copy_class":"low-level gain","subject":{"gene_id":"ncbigene:348","type":"Gene"},"type":"RelativeCopyNumber"}' + ga4gh_digest: fdoeFEYsGiCe5jY4pw8xKiGqowvg-F47 + ga4gh_identify: ga4gh:VRC.fdoeFEYsGiCe5jY4pw8xKiGqowvg-F47 + ga4gh_serialize: '{"relative_copy_class":"low-level gain","subject":"ncbigene:348","type":"RelativeCopyNumber"}' Text: - in: From f48077c0d3d3715ad8ed1bcfbd6a887dc41b7aad Mon Sep 17 00:00:00 2001 From: korikuzma Date: Mon, 24 Oct 2022 10:09:49 -0400 Subject: [PATCH 52/83] add GenotypeMember + Genotype models to validation data --- validation/models.yaml | 132 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/validation/models.yaml b/validation/models.yaml index 2386d614..51725cc3 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -343,3 +343,135 @@ VariationSet: ga4gh_digest: QLQXSNSIFlqNYWmQbw-YkfmexPi4NeDE ga4gh_identify: ga4gh:VS.QLQXSNSIFlqNYWmQbw-YkfmexPi4NeDE ga4gh_serialize: '{"members":["-kUJh47Pu24Y3Wdsk1rXEDKsXWNY-68x","Z_rYRxpUvwqCLsCBO3YLl70o2uf9_Op1"],"type":"VariationSet"}' +GenotypeMember: + - name: "GenotypeMember w/ Allele" + in: + count: + value: 1 + type: Number + variation: + location: + interval: + end: + type: Number + value: 94842866 + start: + type: Number + value: 94842865 + type: SequenceInterval + sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB + type: SequenceLocation + state: + sequence: G + type: LiteralSequenceExpression + type: Allele + type: GenotypeMember + out: + ga4gh_serialize: '{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"geQCxa1Enel8UBUAQQ2-rbphDjIR-cq0"}' +GenotypeMember: + - name: "GenotypeMember w/ Haplotype" + in: + count: + value: 1 + type: Number + variation: + members: + - location: + interval: + end: + type: Number + value: 94761900 + start: + type: Number + value: 94761899 + sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB + type: SequenceLocation + state: + sequence: T + type: LiteralSequenceExpression + type: Allele + - location: + interval: + end: + type: Number + value: 94842866 + start: + type: Number + value: 94842865 + sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB + type: SequenceLocation + state: + sequence: G + type: LiteralSequenceExpression + type: Allele + type: Haplotype + type: GenotypeMember + out: + ga4gh_serialize: '{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"746-M4L_EZL-CiduWjqODaYN26lTkEN1"}' +Genotype: + - + in: + members: + - count: + value: 1 + type: Number + variation: + location: + interval: + end: + type: Number + value: 94842866 + start: + type: Number + value: 94842865 + type: SequenceInterval + sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB + type: SequenceLocation + state: + sequence: G + type: LiteralSequenceExpression + type: Allele + type: GenotypeMember + - count: + value: 1 + type: Number + variation: + members: + - location: + interval: + end: + type: Number + value: 94761900 + start: + type: Number + value: 94761899 + sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB + type: SequenceLocation + state: + sequence: T + type: LiteralSequenceExpression + type: Allele + - location: + interval: + end: + type: Number + value: 94842866 + start: + type: Number + value: 94842865 + sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB + type: SequenceLocation + state: + sequence: G + type: LiteralSequenceExpression + type: Allele + type: Haplotype + type: GenotypeMember + count: + type: Number + value: 2 + type: Genotype + out: + ga4gh_digest: wuTcSAvkSCHgpCAPOvt88K5VVyT6oEZ5 + ga4gh_identify: ga4gh:GT.wuTcSAvkSCHgpCAPOvt88K5VVyT6oEZ5 + ga4gh_serialize: '{"count":{"type":"Number","value":2},"members":[{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"geQCxa1Enel8UBUAQQ2-rbphDjIR-cq0"},{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"746-M4L_EZL-CiduWjqODaYN26lTkEN1"}],"type":"Genotype"}' From 057342f2da8694eb57c3b2f046044c6b88a76fac Mon Sep 17 00:00:00 2001 From: korikuzma Date: Mon, 24 Oct 2022 13:49:05 -0400 Subject: [PATCH 53/83] copy number subject is a sequence location --- validation/models.yaml | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/validation/models.yaml b/validation/models.yaml index 51725cc3..f528cbfa 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -269,22 +269,42 @@ AbsoluteCopyNumber: comparator: '>=' type: IndefiniteRange value: 3 - subject: ncbigene:348 + subject: + sequence_id: ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl + interval: + end: + type: Number + value: 44909393 + start: + type: Number + value: 44905795 + type: SequenceInterval + type: SequenceLocation type: AbsoluteCopyNumber out: - ga4gh_digest: jwTswRixKm46exoepUPTnCvKCJIWDO3j - ga4gh_identify: ga4gh:VAC.jwTswRixKm46exoepUPTnCvKCJIWDO3j - ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"ncbigene:348","type":"AbsoluteCopyNumber"}' + ga4gh_digest: New2SZ7NZU_gBbjzcmA8IwmA-EShG5JI + ga4gh_identify: ga4gh:VAC.New2SZ7NZU_gBbjzcmA8IwmA-EShG5JI + ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"AbsoluteCopyNumber"}' RelativeCopyNumber: - name: "Low-level copy gain of BRCA1" in: relative_copy_class: low-level gain - subject: ncbigene:348 + subject: + sequence_id: ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl + interval: + end: + type: Number + value: 44909393 + start: + type: Number + value: 44905795 + type: SequenceInterval + type: SequenceLocation type: RelativeCopyNumber out: - ga4gh_digest: fdoeFEYsGiCe5jY4pw8xKiGqowvg-F47 - ga4gh_identify: ga4gh:VRC.fdoeFEYsGiCe5jY4pw8xKiGqowvg-F47 - ga4gh_serialize: '{"relative_copy_class":"low-level gain","subject":"ncbigene:348","type":"RelativeCopyNumber"}' + ga4gh_digest: 69x30aZU0KQF0RDqq3CaaVBid_xrgzrI + ga4gh_identify: ga4gh:VRC.69x30aZU0KQF0RDqq3CaaVBid_xrgzrI + ga4gh_serialize: '{"relative_copy_class":"low-level gain","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"RelativeCopyNumber"}' Text: - in: From d0446fb36a69c43a5760884c08bd4baa86aee4ed Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 2 Nov 2022 08:39:49 -0400 Subject: [PATCH 54/83] fix: models.yaml to use ordered property for digests --- validation/models.yaml | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/validation/models.yaml b/validation/models.yaml index f528cbfa..061b94e0 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -427,7 +427,7 @@ GenotypeMember: type: Haplotype type: GenotypeMember out: - ga4gh_serialize: '{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"746-M4L_EZL-CiduWjqODaYN26lTkEN1"}' + ga4gh_serialize: '{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"Ow_uE0YaVWHIno4pQfdmYpWmlGPNtXQr"}' Genotype: - in: @@ -459,30 +459,32 @@ Genotype: members: - location: interval: + type: SequenceInterval end: type: Number - value: 94761900 + value: 94842866 start: type: Number - value: 94761899 + value: 94842865 sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB type: SequenceLocation state: - sequence: T + sequence: G type: LiteralSequenceExpression type: Allele - location: interval: + type: SequenceInterval end: type: Number - value: 94842866 + value: 94761900 start: type: Number - value: 94842865 + value: 94761899 sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB type: SequenceLocation state: - sequence: G + sequence: T type: LiteralSequenceExpression type: Allele type: Haplotype @@ -492,6 +494,6 @@ Genotype: value: 2 type: Genotype out: - ga4gh_digest: wuTcSAvkSCHgpCAPOvt88K5VVyT6oEZ5 - ga4gh_identify: ga4gh:GT.wuTcSAvkSCHgpCAPOvt88K5VVyT6oEZ5 - ga4gh_serialize: '{"count":{"type":"Number","value":2},"members":[{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"geQCxa1Enel8UBUAQQ2-rbphDjIR-cq0"},{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"746-M4L_EZL-CiduWjqODaYN26lTkEN1"}],"type":"Genotype"}' + ga4gh_digest: CP6d8olj838oDLk5h0CbM_7DVK9kYcld + ga4gh_identify: ga4gh:GT.CP6d8olj838oDLk5h0CbM_7DVK9kYcld + ga4gh_serialize: '{"count":{"type":"Number","value":2},"members":[{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"Ow_uE0YaVWHIno4pQfdmYpWmlGPNtXQr"},{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"geQCxa1Enel8UBUAQQ2-rbphDjIR-cq0"}],"type":"Genotype"}' From 8f521e87d3e6e9328029151eb66ef92aaf98a669 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Tue, 8 Nov 2022 12:52:24 -0500 Subject: [PATCH 55/83] update models.yaml with serialization changes --- validation/models.yaml | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/validation/models.yaml b/validation/models.yaml index 061b94e0..ff56e958 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -459,32 +459,30 @@ Genotype: members: - location: interval: - type: SequenceInterval end: type: Number - value: 94842866 + value: 94761900 start: type: Number - value: 94842865 + value: 94761899 sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB type: SequenceLocation state: - sequence: G + sequence: T type: LiteralSequenceExpression type: Allele - location: interval: - type: SequenceInterval end: type: Number - value: 94761900 + value: 94842866 start: type: Number - value: 94761899 + value: 94842865 sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB type: SequenceLocation state: - sequence: T + sequence: G type: LiteralSequenceExpression type: Allele type: Haplotype @@ -494,6 +492,6 @@ Genotype: value: 2 type: Genotype out: - ga4gh_digest: CP6d8olj838oDLk5h0CbM_7DVK9kYcld - ga4gh_identify: ga4gh:GT.CP6d8olj838oDLk5h0CbM_7DVK9kYcld - ga4gh_serialize: '{"count":{"type":"Number","value":2},"members":[{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"Ow_uE0YaVWHIno4pQfdmYpWmlGPNtXQr"},{"count":{"type":"Number","value":1},"type":"GenotypeMember","variation":"geQCxa1Enel8UBUAQQ2-rbphDjIR-cq0"}],"type":"Genotype"}' + ga4gh_digest: fz-TMM88G2hmK6cQ-JwrpVAr8d_3eTVq + ga4gh_identify: ga4gh:GT.fz-TMM88G2hmK6cQ-JwrpVAr8d_3eTVq + ga4gh_serialize: '{"count":{"type":"Number","value":2},"members":["EhA9scQ-F-n1eQdQOJYClDXq613IZLQm","oJg9piBqrJ-_t3PSLA21d4z8f4tJHKqI"],"type":"Genotype"}' From 0698c46fdb52407936bd4d6252413a17d1c12388 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 10 Nov 2022 09:28:31 -0500 Subject: [PATCH 56/83] Add tests to check schema validation in models.yaml (#406) --- tests/test_basic.py | 17 ++++++++++++++++- validation/models.yaml | 4 ++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/test_basic.py b/tests/test_basic.py index febe2904..75efe0f1 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,10 +1,12 @@ import json +import yaml import python_jsonschema_objects as pjs from schema.helpers import pjs_filter from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor +from jsonschema import validate, RefResolver -from config import vrs_json_path, vrs_yaml_path +from config import vrs_json_path, vrs_yaml_path, root_dir # Are the yaml and json parsable and do they match? p = YamlSchemaProcessor(vrs_yaml_path) @@ -19,3 +21,16 @@ def test_json_yaml_match(): def test_pjs_smoke(): ob = pjs.ObjectBuilder(pjs_filter(j)) assert ob.build_classes() # no exception => okay + + +def test_schema_validation(): + """Test that examples in validation/models.yaml are valid""" + resolver = RefResolver.from_schema(j, store={"definitions": j}) + schema_definitions = j["definitions"] + validation_models = root_dir / "validation" / "models.yaml" + validation_tests = yaml.load(open(validation_models), Loader=yaml.SafeLoader) + for cls, tests in validation_tests.items(): + for t in tests: + validate(instance=t["in"], + schema=schema_definitions[cls], + resolver=resolver) diff --git a/validation/models.yaml b/validation/models.yaml index ff56e958..26329c47 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -404,6 +404,7 @@ GenotypeMember: start: type: Number value: 94761899 + type: SequenceInterval sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB type: SequenceLocation state: @@ -418,6 +419,7 @@ GenotypeMember: start: type: Number value: 94842865 + type: SequenceInterval sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB type: SequenceLocation state: @@ -465,6 +467,7 @@ Genotype: start: type: Number value: 94761899 + type: SequenceInterval sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB type: SequenceLocation state: @@ -479,6 +482,7 @@ Genotype: start: type: Number value: 94842865 + type: SequenceInterval sequence_id: ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB type: SequenceLocation state: From 55d085935b962ed5156faa13d79622166819e484 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Mon, 14 Nov 2022 09:05:48 -0500 Subject: [PATCH 57/83] Add tests for ComposedSequenceExpression (#408) --- validation/models.yaml | 94 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/validation/models.yaml b/validation/models.yaml index ff56e958..461ba45c 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -169,6 +169,38 @@ RepeatedSequenceExpression: type: RepeatedSequenceExpression out: ga4gh_serialize: '{"count":{"comparator":">=","type":"IndefiniteRange","value":6},"seq_expr":{"location":"QrRSuBj-VScAGV_gEdxNgsnh41jYH1Kg","reverse_complement":false,"type":"DerivedSequenceExpression"},"type":"RepeatedSequenceExpression"}' +ComposedSequenceExpression: + - name: "Composed Sequence Expression w/ order 1" + in: + components: + - type: LiteralSequenceExpression + sequence: CGC + - type: RepeatedSequenceExpression + seq_expr: + type: LiteralSequenceExpression + sequence: CGA + count: + type: Number + value: 3 + type: ComposedSequenceExpression + out: + ga4gh_serialize: '{"components":[{"sequence":"CGC","type":"LiteralSequenceExpression"},{"count":{"type":"Number","value":3},"seq_expr":{"sequence":"CGA","type":"LiteralSequenceExpression"},"type":"RepeatedSequenceExpression"}],"type":"ComposedSequenceExpression"}' +ComposedSequenceExpression: + - name: "Composed Sequence Expression w/ order 2" + in: + components: + - type: RepeatedSequenceExpression + seq_expr: + type: LiteralSequenceExpression + sequence: CGA + count: + type: Number + value: 3 + - type: LiteralSequenceExpression + sequence: CGC + type: ComposedSequenceExpression + out: + ga4gh_serialize: '{"components":[{"count":{"type":"Number","value":3},"seq_expr":{"sequence":"CGA","type":"LiteralSequenceExpression"},"type":"RepeatedSequenceExpression"},{"sequence":"CGC","type":"LiteralSequenceExpression"}],"type":"ComposedSequenceExpression"}' Allele: - name: "rs7412@GRCh38>T w/SequenceState" in: @@ -213,6 +245,68 @@ Allele: ga4gh_digest: CxiA_hvYbkD8Vqwjhx5AYuyul4mtlkpD ga4gh_identify: ga4gh:VA.CxiA_hvYbkD8Vqwjhx5AYuyul4mtlkpD ga4gh_serialize: '{"location":"QrRSuBj-VScAGV_gEdxNgsnh41jYH1Kg","state":{"sequence":"T","type":"LiteralSequenceExpression"},"type":"Allele"}' +Allele: + - name: "Allele w/ Composed Sequence Expression w/ order 1" + in: + location: + interval: + end: + type: Number + value: 44908822 + start: + type: Number + value: 44908821 + type: SequenceInterval + sequence_id: ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl + type: SequenceLocation + state: + components: + - type: LiteralSequenceExpression + sequence: CGC + - type: RepeatedSequenceExpression + seq_expr: + type: LiteralSequenceExpression + sequence: CGA + count: + type: Number + value: 3 + type: ComposedSequenceExpression + type: Allele + out: + ga4gh_digest: obWIAB54mfRE2HAwQiIzKZeIx0REPG-8 + ga4gh_identify: ga4gh:VA.obWIAB54mfRE2HAwQiIzKZeIx0REPG-8 + ga4gh_serialize: '{"location":"QrRSuBj-VScAGV_gEdxNgsnh41jYH1Kg","state":{"components":[{"sequence":"CGC","type":"LiteralSequenceExpression"},{"count":{"type":"Number","value":3},"seq_expr":{"sequence":"CGA","type":"LiteralSequenceExpression"},"type":"RepeatedSequenceExpression"}],"type":"ComposedSequenceExpression"},"type":"Allele"}' +Allele: + - name: "Allele w/ Composed Sequence Expression w/ order 2" + in: + location: + interval: + end: + type: Number + value: 44908822 + start: + type: Number + value: 44908821 + type: SequenceInterval + sequence_id: ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl + type: SequenceLocation + state: + components: + - type: RepeatedSequenceExpression + seq_expr: + type: LiteralSequenceExpression + sequence: CGA + count: + type: Number + value: 3 + - type: LiteralSequenceExpression + sequence: CGC + type: ComposedSequenceExpression + type: Allele + out: + ga4gh_digest: KDrbvmR-Y2dccsgckQnpEsQuLMq4p10d + ga4gh_identify: ga4gh:VA.KDrbvmR-Y2dccsgckQnpEsQuLMq4p10d + ga4gh_serialize: '{"location":"QrRSuBj-VScAGV_gEdxNgsnh41jYH1Kg","state":{"components":[{"count":{"type":"Number","value":3},"seq_expr":{"sequence":"CGA","type":"LiteralSequenceExpression"},"type":"RepeatedSequenceExpression"},{"sequence":"CGC","type":"LiteralSequenceExpression"}],"type":"ComposedSequenceExpression"},"type":"Allele"}' Haplotype: - name: "APOE1 on GRCh38, inline" in: From 20afe5aebdb2ed7add3c6719fdeab739ed8b523d Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 3 Mar 2023 10:19:23 -0500 Subject: [PATCH 58/83] draft CopyNumberAssessment --- schema/vrs-source.yaml | 65 +++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 175bf721..9e30dfa8 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -72,6 +72,7 @@ definitions: a genome, sample, or homologous chromosomes. oneOf: - $ref: "#/definitions/CopyNumber" + - $ref: "#/definitions/CopyNumberAssessment" - $ref: "#/definitions/Genotype" discriminator: propertyName: type @@ -189,33 +190,24 @@ definitions: CopyNumber: inherits: SystemicVariation - description: >- - The copies of :ref:`Location` in a system, expressed as an absolute integer - quantity (:ref:`AbsoluteCopyNumber`) or a qualitative description of copies - relative to a baseline state (:ref:`RelativeCopyNumber`). - heritable_properties: - subject: - oneOf: - - $ref: "#/definitions/Location" - - $ref: "#/definitions/CURIE" - description: >- - A location for which the number of systemic copies is described. - heritable_required: [ "subject" ] - - AbsoluteCopyNumber: - inherits: CopyNumber type: object - maturity: draft description: >- - The absolute count of discrete copies of a :ref:`Location`, + The absolute count of discrete copies of a :ref:`Location` or :ref:`Feature`, within a system (e.g. genome, cell, etc.). properties: type: type: string - const: "AbsoluteCopyNumber" - default: "AbsoluteCopyNumber" + const: "CopyNumber" + default: "CopyNumber" description: >- - MUST be "AbsoluteCopyNumber" + MUST be "CopyNumber" + subject: + oneOf: + - $ref: "#/definitions/Location" + - $ref: "#/definitions/CURIE" + - $ref: "#/definitions/Feature" + description: >- + A location for which the number of systemic copies is described. copies: oneOf: - $ref: "#/definitions/Number" @@ -223,28 +215,37 @@ definitions: - $ref: "#/definitions/DefiniteRange" description: >- The integral number of copies of the subject in a system - required: [ "copies" ] + required: [ "subject", "copies" ] - RelativeCopyNumber: - inherits: CopyNumber + CopyNumberAssessment: + inherits: SystemicVariation type: object maturity: draft description: >- - The copies of a :ref:`Location` within a system (e.g. genome, cell, etc.) - relative to a baseline state. + An assessment of the copy number of a :ref:`Location` or a :ref:`Feature` within a system (e.g. genome, cell, + etc.) relative to a baseline state. properties: type: type: string - const: "RelativeCopyNumber" - default: "RelativeCopyNumber" + const: "CopyNumberAssessment" + default: "CopyNumberAssessment" description: >- - MUST be "RelativeCopyNumber" - relative_copy_class: + MUST be "CopyNumberAssessment" + subject: + oneOf: + - $ref: "#/definitions/Location" + - $ref: "#/definitions/CURIE" + - $ref: "#/definitions/Feature" + description: >- + A location for which the number of systemic copies is described. + copy_assessment: type: string - enum: [ "complete loss", "partial loss", "copy neutral", "low-level gain", "high-level gain" ] + enum: [ "EFO_0030069", "EFO_XXXXXXX", "EFO_0030068", "EFO_0030067", "EFO_0030064", "EFO_0030070", "EFO_0030071", "EFO_0030072" ] description: >- - MUST be one of "complete loss", "partial loss", "copy neutral", "low-level gain" or "high-level gain". - required: [ "relative_copy_class" ] + MUST be one of "EFO_0030069" (complete genomic loss), (high-level loss), + "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), + "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). + required: [ "subject", "copy_assessment" ] Genotype: inherits: SystemicVariation From 20fe924dd395585adef3bb3326235fed46fd5415 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 3 Mar 2023 10:20:30 -0500 Subject: [PATCH 59/83] .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4ed748bd..a86d83bf 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__ archive docs/build venv +pyproject.toml From a747f490056cfb4957fcaa9321063b545927071d Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 3 Mar 2023 10:29:52 -0500 Subject: [PATCH 60/83] build schema --- schema/defs/vrs/AbsoluteCopyNumber.rst | 34 ----------- schema/defs/vrs/CopyNumber.rst | 33 ++++++++++- schema/defs/vrs/CopyNumberAssessment.rst | 34 +++++++++++ schema/defs/vrs/RelativeCopyNumber.rst | 34 ----------- schema/vrs.json | 74 +++++++++++++----------- schema/vrs.yaml | 62 +++++++++++--------- validation/models.yaml | 10 ++-- 7 files changed, 146 insertions(+), 135 deletions(-) delete mode 100644 schema/defs/vrs/AbsoluteCopyNumber.rst create mode 100644 schema/defs/vrs/CopyNumberAssessment.rst delete mode 100644 schema/defs/vrs/RelativeCopyNumber.rst diff --git a/schema/defs/vrs/AbsoluteCopyNumber.rst b/schema/defs/vrs/AbsoluteCopyNumber.rst deleted file mode 100644 index 9dccb69f..00000000 --- a/schema/defs/vrs/AbsoluteCopyNumber.rst +++ /dev/null @@ -1,34 +0,0 @@ -**Computational Definition** - -The absolute count of discrete copies of a :ref:`Location`, within a system (e.g. genome, cell, etc.). - -**Information Model** - -Some AbsoluteCopyNumber attributes are inherited from :ref:`Variation`. - -.. list-table:: - :class: clean-wrap - :header-rows: 1 - :align: left - :widths: auto - - * - Field - - Type - - Limits - - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id. MUST be unique within document. - * - type - - string - - 1..1 - - MUST be "AbsoluteCopyNumber" - * - subject - - :ref:`Location` | :ref:`CURIE` - - 1..1 - - A location for which the number of systemic copies is described. - * - copies - - :ref:`Number` | :ref:`IndefiniteRange` | :ref:`DefiniteRange` - - 1..1 - - The integral number of copies of the subject in a system diff --git a/schema/defs/vrs/CopyNumber.rst b/schema/defs/vrs/CopyNumber.rst index d6dcafbe..02347004 100644 --- a/schema/defs/vrs/CopyNumber.rst +++ b/schema/defs/vrs/CopyNumber.rst @@ -1,3 +1,34 @@ **Computational Definition** -The copies of :ref:`Location` in a system, expressed as an absolute integer quantity (:ref:`AbsoluteCopyNumber`) or a qualitative description of copies relative to a baseline state (:ref:`RelativeCopyNumber`). +The absolute count of discrete copies of a :ref:`Location` or :ref:`Feature`, within a system (e.g. genome, cell, etc.). + +**Information Model** + +Some CopyNumber attributes are inherited from :ref:`Variation`. + +.. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. + * - type + - string + - 1..1 + - MUST be "CopyNumber" + * - subject + - :ref:`Location` | :ref:`CURIE` | :ref:`Feature` + - 1..1 + - A location for which the number of systemic copies is described. + * - copies + - :ref:`Number` | :ref:`IndefiniteRange` | :ref:`DefiniteRange` + - 1..1 + - The integral number of copies of the subject in a system diff --git a/schema/defs/vrs/CopyNumberAssessment.rst b/schema/defs/vrs/CopyNumberAssessment.rst new file mode 100644 index 00000000..52661c96 --- /dev/null +++ b/schema/defs/vrs/CopyNumberAssessment.rst @@ -0,0 +1,34 @@ +**Computational Definition** + +An assessment of the copy number of a :ref:`Location` or a :ref:`Feature` within a system (e.g. genome, cell, etc.) relative to a baseline state. + +**Information Model** + +Some CopyNumberAssessment attributes are inherited from :ref:`Variation`. + +.. list-table:: + :class: clean-wrap + :header-rows: 1 + :align: left + :widths: auto + + * - Field + - Type + - Limits + - Description + * - _id + - :ref:`CURIE` + - 0..1 + - Variation Id. MUST be unique within document. + * - type + - string + - 1..1 + - MUST be "CopyNumberAssessment" + * - subject + - :ref:`Location` | :ref:`CURIE` | :ref:`Feature` + - 1..1 + - A location for which the number of systemic copies is described. + * - copy_assessment + - string + - 1..1 + - MUST be one of "EFO_0030069" (complete genomic loss), (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). diff --git a/schema/defs/vrs/RelativeCopyNumber.rst b/schema/defs/vrs/RelativeCopyNumber.rst deleted file mode 100644 index f076339e..00000000 --- a/schema/defs/vrs/RelativeCopyNumber.rst +++ /dev/null @@ -1,34 +0,0 @@ -**Computational Definition** - -The copies of a :ref:`Location` within a system (e.g. genome, cell, etc.) relative to a baseline state. - -**Information Model** - -Some RelativeCopyNumber attributes are inherited from :ref:`Variation`. - -.. list-table:: - :class: clean-wrap - :header-rows: 1 - :align: left - :widths: auto - - * - Field - - Type - - Limits - - Description - * - _id - - :ref:`CURIE` - - 0..1 - - Variation Id. MUST be unique within document. - * - type - - string - - 1..1 - - MUST be "RelativeCopyNumber" - * - subject - - :ref:`Location` | :ref:`CURIE` - - 1..1 - - A location for which the number of systemic copies is described. - * - relative_copy_class - - string - - 1..1 - - MUST be one of "complete loss", "partial loss", "copy neutral", "low-level gain" or "high-level gain". diff --git a/schema/vrs.json b/schema/vrs.json index e9f11868..ac8d19be 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -7,19 +7,19 @@ "description": "A representation of the state of one or more biomolecules.", "oneOf": [ { - "$ref": "#/definitions/AbsoluteCopyNumber" + "$ref": "#/definitions/Allele" }, { - "$ref": "#/definitions/Allele" + "$ref": "#/definitions/CopyNumber" }, { - "$ref": "#/definitions/Genotype" + "$ref": "#/definitions/CopyNumberAssessment" }, { - "$ref": "#/definitions/Haplotype" + "$ref": "#/definitions/Genotype" }, { - "$ref": "#/definitions/RelativeCopyNumber" + "$ref": "#/definitions/Haplotype" }, { "$ref": "#/definitions/Text" @@ -64,13 +64,13 @@ "description": "A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.", "oneOf": [ { - "$ref": "#/definitions/AbsoluteCopyNumber" + "$ref": "#/definitions/CopyNumber" }, { - "$ref": "#/definitions/Genotype" + "$ref": "#/definitions/CopyNumberAssessment" }, { - "$ref": "#/definitions/RelativeCopyNumber" + "$ref": "#/definitions/Genotype" } ], "discriminator": { @@ -221,9 +221,6 @@ "ordered": false, "items": { "oneOf": [ - { - "$ref": "#/definitions/AbsoluteCopyNumber" - }, { "$ref": "#/definitions/Allele" }, @@ -231,13 +228,16 @@ "$ref": "#/definitions/CURIE" }, { - "$ref": "#/definitions/Genotype" + "$ref": "#/definitions/CopyNumber" }, { - "$ref": "#/definitions/Haplotype" + "$ref": "#/definitions/CopyNumberAssessment" }, { - "$ref": "#/definitions/RelativeCopyNumber" + "$ref": "#/definitions/Genotype" + }, + { + "$ref": "#/definitions/Haplotype" }, { "$ref": "#/definitions/Text" @@ -256,10 +256,9 @@ ], "additionalProperties": false }, - "AbsoluteCopyNumber": { + "CopyNumber": { "type": "object", - "maturity": "draft", - "description": "The absolute count of discrete copies of a Location, within a system (e.g. genome, cell, etc.).", + "description": "The absolute count of discrete copies of a Location or Feature, within a system (e.g. genome, cell, etc.).", "properties": { "_id": { "$ref": "#/definitions/CURIE", @@ -267,9 +266,9 @@ }, "type": { "type": "string", - "const": "AbsoluteCopyNumber", - "default": "AbsoluteCopyNumber", - "description": "MUST be \"AbsoluteCopyNumber\"" + "const": "CopyNumber", + "default": "CopyNumber", + "description": "MUST be \"CopyNumber\"" }, "subject": { "oneOf": [ @@ -279,6 +278,9 @@ { "$ref": "#/definitions/ChromosomeLocation" }, + { + "$ref": "#/definitions/Gene" + }, { "$ref": "#/definitions/SequenceLocation" } @@ -307,10 +309,10 @@ ], "additionalProperties": false }, - "RelativeCopyNumber": { + "CopyNumberAssessment": { "type": "object", "maturity": "draft", - "description": "The copies of a Location within a system (e.g. genome, cell, etc.) relative to a baseline state.", + "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline state.", "properties": { "_id": { "$ref": "#/definitions/CURIE", @@ -318,9 +320,9 @@ }, "type": { "type": "string", - "const": "RelativeCopyNumber", - "default": "RelativeCopyNumber", - "description": "MUST be \"RelativeCopyNumber\"" + "const": "CopyNumberAssessment", + "default": "CopyNumberAssessment", + "description": "MUST be \"CopyNumberAssessment\"" }, "subject": { "oneOf": [ @@ -330,26 +332,32 @@ { "$ref": "#/definitions/ChromosomeLocation" }, + { + "$ref": "#/definitions/Gene" + }, { "$ref": "#/definitions/SequenceLocation" } ], "description": "A location for which the number of systemic copies is described." }, - "relative_copy_class": { + "copy_assessment": { "type": "string", "enum": [ - "complete loss", - "partial loss", - "copy neutral", - "low-level gain", - "high-level gain" + "EFO_0030069", + "EFO_XXXXXXX", + "EFO_0030068", + "EFO_0030067", + "EFO_0030064", + "EFO_0030070", + "EFO_0030071", + "EFO_0030072" ], - "description": "MUST be one of \"complete loss\", \"partial loss\", \"copy neutral\", \"low-level gain\" or \"high-level gain\"." + "description": "MUST be one of \"EFO_0030069\" (complete genomic loss), (high-level loss), \"EFO_0030068\" (low-level loss), \"EFO_0030067\" (loss), \"EFO_0030064\" (regional base ploidy), \"EFO_0030070\" (gain), \"EFO_0030071\" (low-level gain), \"EFO_0030072\" (high-level gain)." } }, "required": [ - "relative_copy_class", + "copy_assessment", "subject", "type" ], diff --git a/schema/vrs.yaml b/schema/vrs.yaml index b9cc4723..46e625d2 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -5,11 +5,11 @@ definitions: Variation: description: A representation of the state of one or more biomolecules. oneOf: - - $ref: '#/definitions/AbsoluteCopyNumber' - $ref: '#/definitions/Allele' + - $ref: '#/definitions/CopyNumber' + - $ref: '#/definitions/CopyNumberAssessment' - $ref: '#/definitions/Genotype' - $ref: '#/definitions/Haplotype' - - $ref: '#/definitions/RelativeCopyNumber' - $ref: '#/definitions/Text' - $ref: '#/definitions/VariationSet' discriminator: @@ -34,9 +34,9 @@ definitions: description: A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes. oneOf: - - $ref: '#/definitions/AbsoluteCopyNumber' + - $ref: '#/definitions/CopyNumber' + - $ref: '#/definitions/CopyNumberAssessment' - $ref: '#/definitions/Genotype' - - $ref: '#/definitions/RelativeCopyNumber' discriminator: propertyName: type Allele: @@ -138,12 +138,12 @@ definitions: ordered: false items: oneOf: - - $ref: '#/definitions/AbsoluteCopyNumber' - $ref: '#/definitions/Allele' - $ref: '#/definitions/CURIE' + - $ref: '#/definitions/CopyNumber' + - $ref: '#/definitions/CopyNumberAssessment' - $ref: '#/definitions/Genotype' - $ref: '#/definitions/Haplotype' - - $ref: '#/definitions/RelativeCopyNumber' - $ref: '#/definitions/Text' - $ref: '#/definitions/VariationSet' description: List of Variation objects or identifiers. Attribute is required, @@ -152,24 +152,24 @@ definitions: - members - type additionalProperties: false - AbsoluteCopyNumber: + CopyNumber: type: object - maturity: draft - description: The absolute count of discrete copies of a Location, within a system - (e.g. genome, cell, etc.). + description: The absolute count of discrete copies of a Location or Feature, within + a system (e.g. genome, cell, etc.). properties: _id: $ref: '#/definitions/CURIE' description: Variation Id. MUST be unique within document. type: type: string - const: AbsoluteCopyNumber - default: AbsoluteCopyNumber - description: MUST be "AbsoluteCopyNumber" + const: CopyNumber + default: CopyNumber + description: MUST be "CopyNumber" subject: oneOf: - $ref: '#/definitions/CURIE' - $ref: '#/definitions/ChromosomeLocation' + - $ref: '#/definitions/Gene' - $ref: '#/definitions/SequenceLocation' description: A location for which the number of systemic copies is described. copies: @@ -183,38 +183,44 @@ definitions: - subject - type additionalProperties: false - RelativeCopyNumber: + CopyNumberAssessment: type: object maturity: draft - description: The copies of a Location within a system (e.g. genome, cell, etc.) - relative to a baseline state. + description: An assessment of the copy number of a Location or a Feature within + a system (e.g. genome, cell, etc.) relative to a baseline state. properties: _id: $ref: '#/definitions/CURIE' description: Variation Id. MUST be unique within document. type: type: string - const: RelativeCopyNumber - default: RelativeCopyNumber - description: MUST be "RelativeCopyNumber" + const: CopyNumberAssessment + default: CopyNumberAssessment + description: MUST be "CopyNumberAssessment" subject: oneOf: - $ref: '#/definitions/CURIE' - $ref: '#/definitions/ChromosomeLocation' + - $ref: '#/definitions/Gene' - $ref: '#/definitions/SequenceLocation' description: A location for which the number of systemic copies is described. - relative_copy_class: + copy_assessment: type: string enum: - - complete loss - - partial loss - - copy neutral - - low-level gain - - high-level gain - description: MUST be one of "complete loss", "partial loss", "copy neutral", - "low-level gain" or "high-level gain". + - EFO_0030069 + - EFO_XXXXXXX + - EFO_0030068 + - EFO_0030067 + - EFO_0030064 + - EFO_0030070 + - EFO_0030071 + - EFO_0030072 + description: MUST be one of "EFO_0030069" (complete genomic loss), + (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), + "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" + (low-level gain), "EFO_0030072" (high-level gain). required: - - relative_copy_class + - copy_assessment - subject - type additionalProperties: false diff --git a/validation/models.yaml b/validation/models.yaml index 1e89dc11..3845579e 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -356,7 +356,7 @@ Haplotype: ga4gh_digest: i8owCOBHIlRCPtcw_WzRFNTunwJRy99- ga4gh_identify: ga4gh:VH.i8owCOBHIlRCPtcw_WzRFNTunwJRy99- ga4gh_serialize: '{"members":["-kUJh47Pu24Y3Wdsk1rXEDKsXWNY-68x","Z_rYRxpUvwqCLsCBO3YLl70o2uf9_Op1"],"type":"Haplotype"}' -AbsoluteCopyNumber: +CopyNumber: - name: ">=3 copies APOE" in: copies: @@ -374,15 +374,15 @@ AbsoluteCopyNumber: value: 44905795 type: SequenceInterval type: SequenceLocation - type: AbsoluteCopyNumber + type: CopyNumber out: ga4gh_digest: New2SZ7NZU_gBbjzcmA8IwmA-EShG5JI ga4gh_identify: ga4gh:VAC.New2SZ7NZU_gBbjzcmA8IwmA-EShG5JI ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"AbsoluteCopyNumber"}' -RelativeCopyNumber: +CopyNumberAssessment: - name: "Low-level copy gain of BRCA1" in: - relative_copy_class: low-level gain + copy_assessment: EFO_0030071 subject: sequence_id: ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl interval: @@ -394,7 +394,7 @@ RelativeCopyNumber: value: 44905795 type: SequenceInterval type: SequenceLocation - type: RelativeCopyNumber + type: CopyNumberAssessment out: ga4gh_digest: 69x30aZU0KQF0RDqq3CaaVBid_xrgzrI ga4gh_identify: ga4gh:VRC.69x30aZU0KQF0RDqq3CaaVBid_xrgzrI From 6a5f9ef619000fa04766e829a8cc41d933a49339 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 3 Mar 2023 13:28:36 -0500 Subject: [PATCH 61/83] update validation tests --- validation/models.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/validation/models.yaml b/validation/models.yaml index 3845579e..8332ebf3 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -376,9 +376,9 @@ CopyNumber: type: SequenceLocation type: CopyNumber out: - ga4gh_digest: New2SZ7NZU_gBbjzcmA8IwmA-EShG5JI - ga4gh_identify: ga4gh:VAC.New2SZ7NZU_gBbjzcmA8IwmA-EShG5JI - ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"AbsoluteCopyNumber"}' + ga4gh_digest: gwEL4SJx8fTbpWGwymtbPQoQGVYJkhGq + ga4gh_identify: ga4gh:VAC.gwEL4SJx8fTbpWGwymtbPQoQGVYJkhGq + ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumber"}' CopyNumberAssessment: - name: "Low-level copy gain of BRCA1" in: @@ -396,9 +396,9 @@ CopyNumberAssessment: type: SequenceLocation type: CopyNumberAssessment out: - ga4gh_digest: 69x30aZU0KQF0RDqq3CaaVBid_xrgzrI - ga4gh_identify: ga4gh:VRC.69x30aZU0KQF0RDqq3CaaVBid_xrgzrI - ga4gh_serialize: '{"relative_copy_class":"low-level gain","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"RelativeCopyNumber"}' + ga4gh_digest: bz33TOKnjKKcuy_sle4xGZM3mRcazEvJ + ga4gh_identify: ga4gh:VRC.bz33TOKnjKKcuy_sle4xGZM3mRcazEvJ + ga4gh_serialize: '{"copy_assessment":"low-level gain","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberAssessment"}' Text: - in: From 2d1c6bd0e075350162891aeb3634dcc258bf9541 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 3 Mar 2023 13:33:55 -0500 Subject: [PATCH 62/83] update type prefixes --- schema/ga4gh.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schema/ga4gh.yaml b/schema/ga4gh.yaml index 341e2567..abf2e4e2 100644 --- a/schema/ga4gh.yaml +++ b/schema/ga4gh.yaml @@ -25,8 +25,8 @@ identifiers: Text: VT Genotype: GT Haplotype: VH - AbsoluteCopyNumber: VAC - RelativeCopyNumber: VRC + CopyNumber: CNV + CopyNumberAssessment: CNA SequenceLocation: VSL ChromosomeLocation: VCL From 34ce2b1187db554bc6c3cdd817c9e0dcc94fd2ec Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 3 Mar 2023 13:38:06 -0500 Subject: [PATCH 63/83] update model validation tests --- validation/models.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validation/models.yaml b/validation/models.yaml index 8332ebf3..95975930 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -377,7 +377,7 @@ CopyNumber: type: CopyNumber out: ga4gh_digest: gwEL4SJx8fTbpWGwymtbPQoQGVYJkhGq - ga4gh_identify: ga4gh:VAC.gwEL4SJx8fTbpWGwymtbPQoQGVYJkhGq + ga4gh_identify: ga4gh:CNV.gwEL4SJx8fTbpWGwymtbPQoQGVYJkhGq ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumber"}' CopyNumberAssessment: - name: "Low-level copy gain of BRCA1" @@ -397,8 +397,8 @@ CopyNumberAssessment: type: CopyNumberAssessment out: ga4gh_digest: bz33TOKnjKKcuy_sle4xGZM3mRcazEvJ - ga4gh_identify: ga4gh:VRC.bz33TOKnjKKcuy_sle4xGZM3mRcazEvJ - ga4gh_serialize: '{"copy_assessment":"low-level gain","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberAssessment"}' + ga4gh_identify: ga4gh:CNA.bz33TOKnjKKcuy_sle4xGZM3mRcazEvJ + ga4gh_serialize: '{"copy_assessment":"EFO_0030071","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberAssessment"}' Text: - in: From b143463ff3547a0f1f3275ab65ab111bfd96d03f Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Mon, 6 Mar 2023 10:38:00 -0500 Subject: [PATCH 64/83] update Sphinx to 4.x --- .readthedocs.yaml | 2 +- .requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 30994c13..e4a83a6e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -4,6 +4,6 @@ sphinx: configuration: docs/source/conf.py python: - version: "3.8" + version: "3.10" install: - requirements: docs/source/requirements.txt \ No newline at end of file diff --git a/.requirements.txt b/.requirements.txt index 9af96934..c9d4cb68 100644 --- a/.requirements.txt +++ b/.requirements.txt @@ -4,4 +4,5 @@ jsonschema==3.2.0 ipython pyyaml ga4gh.gks.metaschema==0.2.0rc4 -sphinx ~= 3.5 \ No newline at end of file +sphinx ~= 4.5 +sphinx-rtd-theme ~= 1.2 \ No newline at end of file From c83165158324c723bc9f893939f75f3208f21dc8 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Mon, 6 Mar 2023 10:45:48 -0500 Subject: [PATCH 65/83] CNV and CNA docs --- docs/source/terms_and_model.rst | 35 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 72bb86c2..2e0aa46f 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -368,14 +368,13 @@ Systemic Variation .. include:: defs/SystemicVariation.rst -.. _AbsoluteCopyNumber: +.. _CopyNumber: -AbsoluteCopyNumber -$$$$$$$$$$$$$$$$$$ +CopyNumber +$$$$$$$$$$ -*Absolute Copy Number Variation* captures the copies of a molecule within a -genome, and can be used to express concepts such as amplification -and copy loss. Copy Number Variation has conflated meanings in the +*Copy Number Variation* captures the integral copies of a molecule within a +genome. Copy Number Variation has conflated meanings in the genomics community, and can mean either (or both) the notion of copy number *in a genome* or copy number *on a molecule*. VRS separates the concerns of these two types of statements; this concept is a type @@ -383,7 +382,7 @@ of :ref:`SystemicVariation` and so describes the number of copies in a genome. The related :ref:`MolecularVariation` concept can be expressed as an :ref:`Allele` with a :ref:`RepeatedSequenceExpression`. -.. include:: defs/AbsoluteCopyNumber.rst +.. include:: defs/CopyNumber.rst **Examples** @@ -401,21 +400,21 @@ Two, three, or four total copies of BRCA1: "gene_id": "ncbigene:348", "type": "Gene" }, - "type": "AbsoluteCopyNumber" + "type": "CopyNumber" } -.. _RelativeCopyNumber: +.. _CopyNumberAssessment: -RelativeCopyNumber -$$$$$$$$$$$$$$$$$$ +CopyNumberAssessment +$$$$$$$$$$$$$$$$$$$$ -*Relative Copy Number Variation* captures a classification of copies +*Copy Number Assessment* captures a categorization of copies of a molecule within a system, relative to a baseline. These types of Variation are common outputs from CNV callers, particularly in the -somatic domain where Absolute Copy Counts are difficult to estimate -and less useful in practice than relative statements. +somatic domain where integral :ref:`CopyNumber` variation are difficult to +estimate and less useful in practice than relative statements. -.. include:: defs/RelativeCopyNumber.rst +.. include:: defs/CopyNumberAssessment.rst **Examples** @@ -424,12 +423,12 @@ Low-level copy gain of BRCA1: .. parsed-literal:: { - "relative_copy_class": "low-level gain", + "relative_copy_class": "EFO_0030071", # low-level gain "subject": { - "gene_id": "ncbigene:348", + "gene_id": "ncbigene:348", # BRCA1 gene "type": "Gene" }, - "type": "RelativeCopyNumber" + "type": "CopyNumberAssessment" } .. _genotype: From 82a7adb89252a3a87e66833a8b98ab223a5aa54d Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Tue, 7 Mar 2023 22:44:38 -0500 Subject: [PATCH 66/83] back to 3.8 --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index e4a83a6e..30994c13 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -4,6 +4,6 @@ sphinx: configuration: docs/source/conf.py python: - version: "3.10" + version: "3.8" install: - requirements: docs/source/requirements.txt \ No newline at end of file From a270654216c7ae16a77d8ceea920258d7b1a220f Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Tue, 7 Mar 2023 22:45:23 -0500 Subject: [PATCH 67/83] Update schema/vrs.json Co-authored-by: Kori Kuzma --- schema/vrs.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/vrs.json b/schema/vrs.json index ac8d19be..fdbb4fbe 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -312,7 +312,7 @@ "CopyNumberAssessment": { "type": "object", "maturity": "draft", - "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline state.", + "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline state.", "properties": { "_id": { "$ref": "#/definitions/CURIE", From 0b22fdb522f90f27a46c34991b85e8335d54e2c0 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Tue, 7 Mar 2023 22:49:45 -0500 Subject: [PATCH 68/83] build JSON --- schema/vrs.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/vrs.json b/schema/vrs.json index fdbb4fbe..ac8d19be 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -312,7 +312,7 @@ "CopyNumberAssessment": { "type": "object", "maturity": "draft", - "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline state.", + "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline state.", "properties": { "_id": { "$ref": "#/definitions/CURIE", From 3f293292042801c4e69cd51119ad13025ce18317 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 31 Mar 2023 14:35:03 -0400 Subject: [PATCH 69/83] addresses #404 --- docs/source/terms_and_model.rst | 33 ++++++++++--------- ...berAssessment.rst => CopyNumberChange.rst} | 6 ++-- .../{CopyNumber.rst => CopyNumberCount.rst} | 4 +-- schema/ga4gh.yaml | 4 +-- schema/vrs-source.yaml | 24 +++++++------- schema/vrs.json | 32 +++++++++--------- schema/vrs.yaml | 32 +++++++++--------- validation/models.yaml | 20 +++++------ 8 files changed, 79 insertions(+), 76 deletions(-) rename schema/defs/vrs/{CopyNumberAssessment.rst => CopyNumberChange.rst} (64%) rename schema/defs/vrs/{CopyNumber.rst => CopyNumberCount.rst} (88%) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 2e0aa46f..9a016d1d 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -368,13 +368,13 @@ Systemic Variation .. include:: defs/SystemicVariation.rst -.. _CopyNumber: +.. _CopyNumberCount: -CopyNumber -$$$$$$$$$$ +CopyNumberCount +$$$$$$$$$$$$$$$ -*Copy Number Variation* captures the integral copies of a molecule within a -genome. Copy Number Variation has conflated meanings in the +*Copy Number Count* captures the integral copies of a molecule within a +genome. Copy Number Count has conflated meanings in the genomics community, and can mean either (or both) the notion of copy number *in a genome* or copy number *on a molecule*. VRS separates the concerns of these two types of statements; this concept is a type @@ -382,7 +382,7 @@ of :ref:`SystemicVariation` and so describes the number of copies in a genome. The related :ref:`MolecularVariation` concept can be expressed as an :ref:`Allele` with a :ref:`RepeatedSequenceExpression`. -.. include:: defs/CopyNumber.rst +.. include:: defs/CopyNumberCount.rst **Examples** @@ -400,21 +400,24 @@ Two, three, or four total copies of BRCA1: "gene_id": "ncbigene:348", "type": "Gene" }, - "type": "CopyNumber" + "type": "CopyNumberCount" } -.. _CopyNumberAssessment: +.. _CopyNumberChange: -CopyNumberAssessment -$$$$$$$$$$$$$$$$$$$$ +CopyNumberChange +$$$$$$$$$$$$$$$$ -*Copy Number Assessment* captures a categorization of copies +*Copy Number Change* captures a categorization of copies of a molecule within a system, relative to a baseline. These types of Variation are common outputs from CNV callers, particularly in the -somatic domain where integral :ref:`CopyNumber` variation are difficult to -estimate and less useful in practice than relative statements. +somatic domain where integral :ref:`CopyNumberCount` are difficult to +estimate and less useful in practice than relative statements. Somatic CNV +callers typically express changes as relative statements, and many HGVS +expressions submitted to express copy number variation are interpreted to be +relative copy changes. -.. include:: defs/CopyNumberAssessment.rst +.. include:: defs/CopyNumberChange.rst **Examples** @@ -428,7 +431,7 @@ Low-level copy gain of BRCA1: "gene_id": "ncbigene:348", # BRCA1 gene "type": "Gene" }, - "type": "CopyNumberAssessment" + "type": "CopyNumberChange" } .. _genotype: diff --git a/schema/defs/vrs/CopyNumberAssessment.rst b/schema/defs/vrs/CopyNumberChange.rst similarity index 64% rename from schema/defs/vrs/CopyNumberAssessment.rst rename to schema/defs/vrs/CopyNumberChange.rst index 52661c96..9d7b5366 100644 --- a/schema/defs/vrs/CopyNumberAssessment.rst +++ b/schema/defs/vrs/CopyNumberChange.rst @@ -4,7 +4,7 @@ An assessment of the copy number of a :ref:`Location` or a :ref:`Feature` within **Information Model** -Some CopyNumberAssessment attributes are inherited from :ref:`Variation`. +Some CopyNumberChange attributes are inherited from :ref:`Variation`. .. list-table:: :class: clean-wrap @@ -23,7 +23,7 @@ Some CopyNumberAssessment attributes are inherited from :ref:`Variation`. * - type - string - 1..1 - - MUST be "CopyNumberAssessment" + - MUST be "CopyNumberChange" * - subject - :ref:`Location` | :ref:`CURIE` | :ref:`Feature` - 1..1 @@ -31,4 +31,4 @@ Some CopyNumberAssessment attributes are inherited from :ref:`Variation`. * - copy_assessment - string - 1..1 - - MUST be one of "EFO_0030069" (complete genomic loss), (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). + - MUST be one of "EFO_0030069" (complete genomic loss), "EFO_0020073" (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). diff --git a/schema/defs/vrs/CopyNumber.rst b/schema/defs/vrs/CopyNumberCount.rst similarity index 88% rename from schema/defs/vrs/CopyNumber.rst rename to schema/defs/vrs/CopyNumberCount.rst index 02347004..ea8c6f1f 100644 --- a/schema/defs/vrs/CopyNumber.rst +++ b/schema/defs/vrs/CopyNumberCount.rst @@ -4,7 +4,7 @@ The absolute count of discrete copies of a :ref:`Location` or :ref:`Feature`, wi **Information Model** -Some CopyNumber attributes are inherited from :ref:`Variation`. +Some CopyNumberCount attributes are inherited from :ref:`Variation`. .. list-table:: :class: clean-wrap @@ -23,7 +23,7 @@ Some CopyNumber attributes are inherited from :ref:`Variation`. * - type - string - 1..1 - - MUST be "CopyNumber" + - MUST be "CopyNumberCount" * - subject - :ref:`Location` | :ref:`CURIE` | :ref:`Feature` - 1..1 diff --git a/schema/ga4gh.yaml b/schema/ga4gh.yaml index abf2e4e2..7cb574b8 100644 --- a/schema/ga4gh.yaml +++ b/schema/ga4gh.yaml @@ -25,8 +25,8 @@ identifiers: Text: VT Genotype: GT Haplotype: VH - CopyNumber: CNV - CopyNumberAssessment: CNA + CopyNumberCount: CN + CopyNumberChange: CX SequenceLocation: VSL ChromosomeLocation: VCL diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 9e30dfa8..79758d7e 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -71,8 +71,8 @@ definitions: A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes. oneOf: - - $ref: "#/definitions/CopyNumber" - - $ref: "#/definitions/CopyNumberAssessment" + - $ref: "#/definitions/CopyNumberCount" + - $ref: "#/definitions/CopyNumberChange" - $ref: "#/definitions/Genotype" discriminator: propertyName: type @@ -188,7 +188,7 @@ definitions: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # SystemicVariation - CopyNumber: + CopyNumberCount: inherits: SystemicVariation type: object description: >- @@ -197,10 +197,10 @@ definitions: properties: type: type: string - const: "CopyNumber" - default: "CopyNumber" + const: "CopyNumberCount" + default: "CopyNumberCount" description: >- - MUST be "CopyNumber" + MUST be "CopyNumberCount" subject: oneOf: - $ref: "#/definitions/Location" @@ -217,7 +217,7 @@ definitions: The integral number of copies of the subject in a system required: [ "subject", "copies" ] - CopyNumberAssessment: + CopyNumberChange: inherits: SystemicVariation type: object maturity: draft @@ -227,10 +227,10 @@ definitions: properties: type: type: string - const: "CopyNumberAssessment" - default: "CopyNumberAssessment" + const: "CopyNumberChange" + default: "CopyNumberChange" description: >- - MUST be "CopyNumberAssessment" + MUST be "CopyNumberChange" subject: oneOf: - $ref: "#/definitions/Location" @@ -240,9 +240,9 @@ definitions: A location for which the number of systemic copies is described. copy_assessment: type: string - enum: [ "EFO_0030069", "EFO_XXXXXXX", "EFO_0030068", "EFO_0030067", "EFO_0030064", "EFO_0030070", "EFO_0030071", "EFO_0030072" ] + enum: [ "EFO_0030069", "EFO_0020073", "EFO_0030068", "EFO_0030067", "EFO_0030064", "EFO_0030070", "EFO_0030071", "EFO_0030072" ] description: >- - MUST be one of "EFO_0030069" (complete genomic loss), (high-level loss), + MUST be one of "EFO_0030069" (complete genomic loss), "EFO_0020073" (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). required: [ "subject", "copy_assessment" ] diff --git a/schema/vrs.json b/schema/vrs.json index ac8d19be..f43c5667 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -10,10 +10,10 @@ "$ref": "#/definitions/Allele" }, { - "$ref": "#/definitions/CopyNumber" + "$ref": "#/definitions/CopyNumberChange" }, { - "$ref": "#/definitions/CopyNumberAssessment" + "$ref": "#/definitions/CopyNumberCount" }, { "$ref": "#/definitions/Genotype" @@ -64,10 +64,10 @@ "description": "A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.", "oneOf": [ { - "$ref": "#/definitions/CopyNumber" + "$ref": "#/definitions/CopyNumberChange" }, { - "$ref": "#/definitions/CopyNumberAssessment" + "$ref": "#/definitions/CopyNumberCount" }, { "$ref": "#/definitions/Genotype" @@ -228,10 +228,10 @@ "$ref": "#/definitions/CURIE" }, { - "$ref": "#/definitions/CopyNumber" + "$ref": "#/definitions/CopyNumberChange" }, { - "$ref": "#/definitions/CopyNumberAssessment" + "$ref": "#/definitions/CopyNumberCount" }, { "$ref": "#/definitions/Genotype" @@ -256,7 +256,7 @@ ], "additionalProperties": false }, - "CopyNumber": { + "CopyNumberCount": { "type": "object", "description": "The absolute count of discrete copies of a Location or Feature, within a system (e.g. genome, cell, etc.).", "properties": { @@ -266,9 +266,9 @@ }, "type": { "type": "string", - "const": "CopyNumber", - "default": "CopyNumber", - "description": "MUST be \"CopyNumber\"" + "const": "CopyNumberCount", + "default": "CopyNumberCount", + "description": "MUST be \"CopyNumberCount\"" }, "subject": { "oneOf": [ @@ -309,7 +309,7 @@ ], "additionalProperties": false }, - "CopyNumberAssessment": { + "CopyNumberChange": { "type": "object", "maturity": "draft", "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline state.", @@ -320,9 +320,9 @@ }, "type": { "type": "string", - "const": "CopyNumberAssessment", - "default": "CopyNumberAssessment", - "description": "MUST be \"CopyNumberAssessment\"" + "const": "CopyNumberChange", + "default": "CopyNumberChange", + "description": "MUST be \"CopyNumberChange\"" }, "subject": { "oneOf": [ @@ -345,7 +345,7 @@ "type": "string", "enum": [ "EFO_0030069", - "EFO_XXXXXXX", + "EFO_0020073", "EFO_0030068", "EFO_0030067", "EFO_0030064", @@ -353,7 +353,7 @@ "EFO_0030071", "EFO_0030072" ], - "description": "MUST be one of \"EFO_0030069\" (complete genomic loss), (high-level loss), \"EFO_0030068\" (low-level loss), \"EFO_0030067\" (loss), \"EFO_0030064\" (regional base ploidy), \"EFO_0030070\" (gain), \"EFO_0030071\" (low-level gain), \"EFO_0030072\" (high-level gain)." + "description": "MUST be one of \"EFO_0030069\" (complete genomic loss), \"EFO_0020073\" (high-level loss), \"EFO_0030068\" (low-level loss), \"EFO_0030067\" (loss), \"EFO_0030064\" (regional base ploidy), \"EFO_0030070\" (gain), \"EFO_0030071\" (low-level gain), \"EFO_0030072\" (high-level gain)." } }, "required": [ diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 46e625d2..d0d42bb5 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -6,8 +6,8 @@ definitions: description: A representation of the state of one or more biomolecules. oneOf: - $ref: '#/definitions/Allele' - - $ref: '#/definitions/CopyNumber' - - $ref: '#/definitions/CopyNumberAssessment' + - $ref: '#/definitions/CopyNumberChange' + - $ref: '#/definitions/CopyNumberCount' - $ref: '#/definitions/Genotype' - $ref: '#/definitions/Haplotype' - $ref: '#/definitions/Text' @@ -34,8 +34,8 @@ definitions: description: A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes. oneOf: - - $ref: '#/definitions/CopyNumber' - - $ref: '#/definitions/CopyNumberAssessment' + - $ref: '#/definitions/CopyNumberChange' + - $ref: '#/definitions/CopyNumberCount' - $ref: '#/definitions/Genotype' discriminator: propertyName: type @@ -140,8 +140,8 @@ definitions: oneOf: - $ref: '#/definitions/Allele' - $ref: '#/definitions/CURIE' - - $ref: '#/definitions/CopyNumber' - - $ref: '#/definitions/CopyNumberAssessment' + - $ref: '#/definitions/CopyNumberChange' + - $ref: '#/definitions/CopyNumberCount' - $ref: '#/definitions/Genotype' - $ref: '#/definitions/Haplotype' - $ref: '#/definitions/Text' @@ -152,7 +152,7 @@ definitions: - members - type additionalProperties: false - CopyNumber: + CopyNumberCount: type: object description: The absolute count of discrete copies of a Location or Feature, within a system (e.g. genome, cell, etc.). @@ -162,9 +162,9 @@ definitions: description: Variation Id. MUST be unique within document. type: type: string - const: CopyNumber - default: CopyNumber - description: MUST be "CopyNumber" + const: CopyNumberCount + default: CopyNumberCount + description: MUST be "CopyNumberCount" subject: oneOf: - $ref: '#/definitions/CURIE' @@ -183,7 +183,7 @@ definitions: - subject - type additionalProperties: false - CopyNumberAssessment: + CopyNumberChange: type: object maturity: draft description: An assessment of the copy number of a Location or a Feature within @@ -194,9 +194,9 @@ definitions: description: Variation Id. MUST be unique within document. type: type: string - const: CopyNumberAssessment - default: CopyNumberAssessment - description: MUST be "CopyNumberAssessment" + const: CopyNumberChange + default: CopyNumberChange + description: MUST be "CopyNumberChange" subject: oneOf: - $ref: '#/definitions/CURIE' @@ -208,14 +208,14 @@ definitions: type: string enum: - EFO_0030069 - - EFO_XXXXXXX + - EFO_0020073 - EFO_0030068 - EFO_0030067 - EFO_0030064 - EFO_0030070 - EFO_0030071 - EFO_0030072 - description: MUST be one of "EFO_0030069" (complete genomic loss), + description: MUST be one of "EFO_0030069" (complete genomic loss), "EFO_0020073" (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). diff --git a/validation/models.yaml b/validation/models.yaml index 95975930..3ee25b98 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -356,7 +356,7 @@ Haplotype: ga4gh_digest: i8owCOBHIlRCPtcw_WzRFNTunwJRy99- ga4gh_identify: ga4gh:VH.i8owCOBHIlRCPtcw_WzRFNTunwJRy99- ga4gh_serialize: '{"members":["-kUJh47Pu24Y3Wdsk1rXEDKsXWNY-68x","Z_rYRxpUvwqCLsCBO3YLl70o2uf9_Op1"],"type":"Haplotype"}' -CopyNumber: +CopyNumberCount: - name: ">=3 copies APOE" in: copies: @@ -374,12 +374,12 @@ CopyNumber: value: 44905795 type: SequenceInterval type: SequenceLocation - type: CopyNumber + type: CopyNumberCount out: - ga4gh_digest: gwEL4SJx8fTbpWGwymtbPQoQGVYJkhGq - ga4gh_identify: ga4gh:CNV.gwEL4SJx8fTbpWGwymtbPQoQGVYJkhGq - ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumber"}' -CopyNumberAssessment: + ga4gh_digest: salZa9yW-GduRxsRFwIGCQvi_YfpjeF4 + ga4gh_identify: ga4gh:CN.salZa9yW-GduRxsRFwIGCQvi_YfpjeF4 + ga4gh_serialize: '{"copies":{"comparator":">=","type":"IndefiniteRange","value":3},"subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberCount"}' +CopyNumberChange: - name: "Low-level copy gain of BRCA1" in: copy_assessment: EFO_0030071 @@ -394,11 +394,11 @@ CopyNumberAssessment: value: 44905795 type: SequenceInterval type: SequenceLocation - type: CopyNumberAssessment + type: CopyNumberChange out: - ga4gh_digest: bz33TOKnjKKcuy_sle4xGZM3mRcazEvJ - ga4gh_identify: ga4gh:CNA.bz33TOKnjKKcuy_sle4xGZM3mRcazEvJ - ga4gh_serialize: '{"copy_assessment":"EFO_0030071","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberAssessment"}' + ga4gh_digest: MLA_TGdelT-_jrlsC6N19S2itmcWqHfj + ga4gh_identify: ga4gh:CX.MLA_TGdelT-_jrlsC6N19S2itmcWqHfj + ga4gh_serialize: '{"copy_assessment":"EFO_0030071","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberChange"}' Text: - in: From 2329b6236418d71af7a889514c68304f7d943fdc Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 31 Mar 2023 15:01:28 -0400 Subject: [PATCH 70/83] baseline ploidy as state --- schema/defs/vrs/CopyNumberChange.rst | 2 +- schema/vrs-source.yaml | 2 +- schema/vrs.json | 2 +- schema/vrs.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/schema/defs/vrs/CopyNumberChange.rst b/schema/defs/vrs/CopyNumberChange.rst index 9d7b5366..06feb2f5 100644 --- a/schema/defs/vrs/CopyNumberChange.rst +++ b/schema/defs/vrs/CopyNumberChange.rst @@ -1,6 +1,6 @@ **Computational Definition** -An assessment of the copy number of a :ref:`Location` or a :ref:`Feature` within a system (e.g. genome, cell, etc.) relative to a baseline state. +An assessment of the copy number of a :ref:`Location` or a :ref:`Feature` within a system (e.g. genome, cell, etc.) relative to a baseline ploidy. **Information Model** diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 79758d7e..4909902c 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -223,7 +223,7 @@ definitions: maturity: draft description: >- An assessment of the copy number of a :ref:`Location` or a :ref:`Feature` within a system (e.g. genome, cell, - etc.) relative to a baseline state. + etc.) relative to a baseline ploidy. properties: type: type: string diff --git a/schema/vrs.json b/schema/vrs.json index f43c5667..811b1c9b 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -312,7 +312,7 @@ "CopyNumberChange": { "type": "object", "maturity": "draft", - "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline state.", + "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline ploidy.", "properties": { "_id": { "$ref": "#/definitions/CURIE", diff --git a/schema/vrs.yaml b/schema/vrs.yaml index d0d42bb5..612d04ab 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -187,7 +187,7 @@ definitions: type: object maturity: draft description: An assessment of the copy number of a Location or a Feature within - a system (e.g. genome, cell, etc.) relative to a baseline state. + a system (e.g. genome, cell, etc.) relative to a baseline ploidy. properties: _id: $ref: '#/definitions/CURIE' From d8e37cf3bba68cd66dc2ef4cca7206fe966d1cfb Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 31 Mar 2023 17:11:10 -0400 Subject: [PATCH 71/83] CSE initial draft --- docs/source/terms_and_model.rst | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 9a016d1d..c9309c1c 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -1029,6 +1029,52 @@ large-scale tandem duplications. "type": "RepeatedSequenceExpression" } +ComposedSequenceExpression +########################## + +*Composed Sequence* is a class of sequence expression composed of other sequence expression +types. It is useful, for example, when representing multiple repeating subunits that occur +in tandem. + +.. include:: defs/ComposedSequenceExpression.rst + +**Examples** + +.. parsed-literal:: + + { + "type": "Allele", + "location": { + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.sH4gymNtL5nxNdTE3evfxzZa4dg3fqDz", + "interval": { + "type": "SequenceInterval", + "start": { "type": "Number", "value": 3 }, + "end": { "type": "Number", "value": 33 } + } + }, + "state": { + "type": "ComposedSequenceExpression", + "components": [ + { + "type": "RepeatedSequenceExpression", + "seq_expr": { "type": "LiteralSequenceExpression", "sequence": "GCG" }, + "count": { "type": "Number", "value": 11 } + }, + { + "type": "RepeatedSequenceExpression", + "seq_expr": { "type": "LiteralSequenceExpression", "sequence": "GCA" }, + "count": { "type": "Number", "value": 3 } + }, + { + "type": "RepeatedSequenceExpression", + "seq_expr": { "type": "LiteralSequenceExpression", "sequence": "GCG" }, + "count": { "type": "Number", "value": 1 } + } + ] + } + } + .. _Feature: Feature From 4599dcddc0891414362d66c47ed8c967fe1198e3 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 10:41:46 -0400 Subject: [PATCH 72/83] clarify example --- docs/source/terms_and_model.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index c9309c1c..971c77a7 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -1034,7 +1034,8 @@ ComposedSequenceExpression *Composed Sequence* is a class of sequence expression composed of other sequence expression types. It is useful, for example, when representing multiple repeating subunits that occur -in tandem. +in tandem, such as in the description of *PABPN1* alleles in the diagnosis of +oculopharyngeal muscular dystrophy (OPMD). .. include:: defs/ComposedSequenceExpression.rst From 981442b03ac3ed385b207c1949fdbb6f13b6067c Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 11:31:09 -0400 Subject: [PATCH 73/83] add 1.3 release notes --- docs/source/releases/1.3.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 docs/source/releases/1.3.rst diff --git a/docs/source/releases/1.3.rst b/docs/source/releases/1.3.rst new file mode 100644 index 00000000..b8d59f3a --- /dev/null +++ b/docs/source/releases/1.3.rst @@ -0,0 +1,30 @@ +1.3 +!!! + +1.3.0 +@@@@@ + + +News +#### + + * A manuscript describing :ref:`Genotype` was published. Please cite + https://www.worldscientific.com/doi/abs/10.1142/9789811270611_0035. + +Major Changes +############# + + * :ref:`CopyNumberChange` introduced for relative copy number calls + * :ref:`CopyNumberCount` replaces `CopyNumber` + * :ref:`Genotype` introduced for describing genotypes + * :ref:`ComposedSequenceExpression` introduced for composing expressions + from multiple other sequence expressions + +Minor Changes +############# + + * Clarifying updates for :ref:`Allele normalization guidance <>` + * :ref:`Haplotype` allele member minimum was revised from 1 to 2 + * Updated metaschema processor version + * Introduced ordered / unordered attribute in array declarations + * Added explicit class inheritance From f62eb59bd89b480a666f7f3fe1a345bf2cb556f4 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 11:39:34 -0400 Subject: [PATCH 74/83] fix documentation build errors --- docs/source/impl-guide/computed_identifiers.rst | 4 +--- docs/source/releases/1.3.rst | 3 ++- docs/source/releases/index.rst | 1 + docs/source/terms_and_model.rst | 3 +++ 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/source/impl-guide/computed_identifiers.rst b/docs/source/impl-guide/computed_identifiers.rst index 3e93b2a1..59897461 100644 --- a/docs/source/impl-guide/computed_identifiers.rst +++ b/docs/source/impl-guide/computed_identifiers.rst @@ -119,9 +119,7 @@ If the object is an instance of a VRS class, implementations MUST: * ensure that objects are referenced with identifiers in the ``ga4gh`` namespace * replace each nested :term:`identifiable object` with their - corresponding *digests*. (Note: Attributes of some objects, such - as :ref:`CopyNumber`, permit a mix of identifiable and - non-identifiable values.) + corresponding *digests*. * order arrays of digests and ids by Unicode Character Set values * filter out fields that start with underscore (e.g., `_id`) * filter out fields with null values diff --git a/docs/source/releases/1.3.rst b/docs/source/releases/1.3.rst index b8d59f3a..ac31515f 100644 --- a/docs/source/releases/1.3.rst +++ b/docs/source/releases/1.3.rst @@ -23,7 +23,8 @@ Major Changes Minor Changes ############# - * Clarifying updates for :ref:`Allele normalization guidance <>` + * Clarifying updates for :ref:`Allele normalization guidance + ` * :ref:`Haplotype` allele member minimum was revised from 1 to 2 * Updated metaschema processor version * Introduced ordered / unordered attribute in array declarations diff --git a/docs/source/releases/index.rst b/docs/source/releases/index.rst index 194bfcdc..0f0af271 100644 --- a/docs/source/releases/index.rst +++ b/docs/source/releases/index.rst @@ -23,6 +23,7 @@ Releases :maxdepth: 2 :includehidden: + 1.3.rst 1.2.rst 1.1.rst 1.0.rst diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 971c77a7..266825b3 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -368,6 +368,7 @@ Systemic Variation .. include:: defs/SystemicVariation.rst +.. _CopyNumber: .. _CopyNumberCount: CopyNumberCount @@ -1029,6 +1030,8 @@ large-scale tandem duplications. "type": "RepeatedSequenceExpression" } +.. _ComposedSequenceExpression: + ComposedSequenceExpression ########################## From 842b21354b9d147ddeb1f5333862238d3ae2098d Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 11:44:52 -0400 Subject: [PATCH 75/83] style bug fix attempt --- docs/source/releases/1.3.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/releases/1.3.rst b/docs/source/releases/1.3.rst index ac31515f..23eb1060 100644 --- a/docs/source/releases/1.3.rst +++ b/docs/source/releases/1.3.rst @@ -17,8 +17,7 @@ Major Changes * :ref:`CopyNumberChange` introduced for relative copy number calls * :ref:`CopyNumberCount` replaces `CopyNumber` * :ref:`Genotype` introduced for describing genotypes - * :ref:`ComposedSequenceExpression` introduced for composing expressions - from multiple other sequence expressions + * :ref:`ComposedSequenceExpression` introduced for composing expressions from multiple other sequence expressions Minor Changes ############# From 4bdf9b69a1b8bd8fc401190a8d38c1892f885047 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 11:47:08 -0400 Subject: [PATCH 76/83] fix markup error --- docs/source/releases/1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/releases/1.3.rst b/docs/source/releases/1.3.rst index 23eb1060..a481f4ea 100644 --- a/docs/source/releases/1.3.rst +++ b/docs/source/releases/1.3.rst @@ -15,7 +15,7 @@ Major Changes ############# * :ref:`CopyNumberChange` introduced for relative copy number calls - * :ref:`CopyNumberCount` replaces `CopyNumber` + * :ref:`CopyNumberCount` replaces ```CopyNumber``` * :ref:`Genotype` introduced for describing genotypes * :ref:`ComposedSequenceExpression` introduced for composing expressions from multiple other sequence expressions From cb2e5461fc7e4776f190f26727d8ab4b9814ca3d Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 11:48:21 -0400 Subject: [PATCH 77/83] remove extra tildes --- docs/source/releases/1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/releases/1.3.rst b/docs/source/releases/1.3.rst index a481f4ea..185513bd 100644 --- a/docs/source/releases/1.3.rst +++ b/docs/source/releases/1.3.rst @@ -15,7 +15,7 @@ Major Changes ############# * :ref:`CopyNumberChange` introduced for relative copy number calls - * :ref:`CopyNumberCount` replaces ```CopyNumber``` + * :ref:`CopyNumberCount` replaces ``CopyNumber`` * :ref:`Genotype` introduced for describing genotypes * :ref:`ComposedSequenceExpression` introduced for composing expressions from multiple other sequence expressions From 15366b8c68319590aedbcbd556acecf273a7c80c Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 11:50:53 -0400 Subject: [PATCH 78/83] remove redundant descriptive description --- docs/source/releases/1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/releases/1.3.rst b/docs/source/releases/1.3.rst index 185513bd..67a0f955 100644 --- a/docs/source/releases/1.3.rst +++ b/docs/source/releases/1.3.rst @@ -16,7 +16,7 @@ Major Changes * :ref:`CopyNumberChange` introduced for relative copy number calls * :ref:`CopyNumberCount` replaces ``CopyNumber`` - * :ref:`Genotype` introduced for describing genotypes + * :ref:`Genotype` introduced as a new systemic variation concept * :ref:`ComposedSequenceExpression` introduced for composing expressions from multiple other sequence expressions Minor Changes From 3ba3ed0949f82731152ce3fef35109c6c42f7e2a Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 12:06:35 -0400 Subject: [PATCH 79/83] add v1.2 ref --- docs/source/releases/1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/releases/1.3.rst b/docs/source/releases/1.3.rst index 67a0f955..61d2bf66 100644 --- a/docs/source/releases/1.3.rst +++ b/docs/source/releases/1.3.rst @@ -15,7 +15,7 @@ Major Changes ############# * :ref:`CopyNumberChange` introduced for relative copy number calls - * :ref:`CopyNumberCount` replaces ``CopyNumber`` + * :ref:`CopyNumberCount` replaces `CopyNumber (v1.2) `_ * :ref:`Genotype` introduced as a new systemic variation concept * :ref:`ComposedSequenceExpression` introduced for composing expressions from multiple other sequence expressions From 78235585602a7b9a7d8a812bf279ae372e9972cf Mon Sep 17 00:00:00 2001 From: Michael Baudis Date: Sat, 1 Apr 2023 10:13:18 -0700 Subject: [PATCH 80/83] relative_copy_class -> copy_assessment IMO `relative_copy_class` should be `copy_assessment` in the `CopyNumberChange` example? --- docs/source/terms_and_model.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 266825b3..6170399d 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -427,7 +427,7 @@ Low-level copy gain of BRCA1: .. parsed-literal:: { - "relative_copy_class": "EFO_0030071", # low-level gain + "copy_assessment": "EFO_0030071", # low-level gain "subject": { "gene_id": "ncbigene:348", # BRCA1 gene "type": "Gene" From db53f2db815e79d6e964aa4cec4f5d3316e24002 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 16:34:31 -0400 Subject: [PATCH 81/83] update change_assertion --- schema/defs/vrs/CopyNumberChange.rst | 2 +- schema/vrs-source.yaml | 4 ++-- schema/vrs.json | 4 ++-- schema/vrs.yaml | 4 ++-- validation/models.yaml | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/schema/defs/vrs/CopyNumberChange.rst b/schema/defs/vrs/CopyNumberChange.rst index 06feb2f5..70ae2c5a 100644 --- a/schema/defs/vrs/CopyNumberChange.rst +++ b/schema/defs/vrs/CopyNumberChange.rst @@ -28,7 +28,7 @@ Some CopyNumberChange attributes are inherited from :ref:`Variation`. - :ref:`Location` | :ref:`CURIE` | :ref:`Feature` - 1..1 - A location for which the number of systemic copies is described. - * - copy_assessment + * - copy_change - string - 1..1 - MUST be one of "EFO_0030069" (complete genomic loss), "EFO_0020073" (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 4909902c..8a478037 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -238,14 +238,14 @@ definitions: - $ref: "#/definitions/Feature" description: >- A location for which the number of systemic copies is described. - copy_assessment: + copy_change: type: string enum: [ "EFO_0030069", "EFO_0020073", "EFO_0030068", "EFO_0030067", "EFO_0030064", "EFO_0030070", "EFO_0030071", "EFO_0030072" ] description: >- MUST be one of "EFO_0030069" (complete genomic loss), "EFO_0020073" (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). - required: [ "subject", "copy_assessment" ] + required: [ "subject", "copy_change" ] Genotype: inherits: SystemicVariation diff --git a/schema/vrs.json b/schema/vrs.json index 811b1c9b..7b3f79ea 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -341,7 +341,7 @@ ], "description": "A location for which the number of systemic copies is described." }, - "copy_assessment": { + "copy_change": { "type": "string", "enum": [ "EFO_0030069", @@ -357,7 +357,7 @@ } }, "required": [ - "copy_assessment", + "copy_change", "subject", "type" ], diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 612d04ab..472e073e 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -204,7 +204,7 @@ definitions: - $ref: '#/definitions/Gene' - $ref: '#/definitions/SequenceLocation' description: A location for which the number of systemic copies is described. - copy_assessment: + copy_change: type: string enum: - EFO_0030069 @@ -220,7 +220,7 @@ definitions: "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). required: - - copy_assessment + - copy_change - subject - type additionalProperties: false diff --git a/validation/models.yaml b/validation/models.yaml index 3ee25b98..a8973133 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -382,7 +382,7 @@ CopyNumberCount: CopyNumberChange: - name: "Low-level copy gain of BRCA1" in: - copy_assessment: EFO_0030071 + copy_change: EFO_0030071 subject: sequence_id: ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl interval: @@ -396,9 +396,9 @@ CopyNumberChange: type: SequenceLocation type: CopyNumberChange out: - ga4gh_digest: MLA_TGdelT-_jrlsC6N19S2itmcWqHfj - ga4gh_identify: ga4gh:CX.MLA_TGdelT-_jrlsC6N19S2itmcWqHfj - ga4gh_serialize: '{"copy_assessment":"EFO_0030071","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberChange"}' + ga4gh_digest: W6Mr7EOuc8_oP--jzFqZeAj-ZC7pK9F_ + ga4gh_identify: ga4gh:CX.W6Mr7EOuc8_oP--jzFqZeAj-ZC7pK9F_ + ga4gh_serialize: '{"copy_change":"EFO_0030071","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberChange"}' Text: - in: From 00d2dcaa674ef611994126e84e6476586d6128e8 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Sat, 1 Apr 2023 16:36:01 -0400 Subject: [PATCH 82/83] update example --- docs/source/terms_and_model.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 6170399d..896e439a 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -427,7 +427,7 @@ Low-level copy gain of BRCA1: .. parsed-literal:: { - "copy_assessment": "EFO_0030071", # low-level gain + "copy_change": "EFO_0030071", # low-level gain "subject": { "gene_id": "ncbigene:348", # BRCA1 gene "type": "Gene" From c4595ae173e10195e6a7157364a6edb5a7276283 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Thu, 13 Apr 2023 08:50:41 -0400 Subject: [PATCH 83/83] update efo codes to CURIEs --- docs/source/terms_and_model.rst | 2 +- schema/defs/vrs/CopyNumberChange.rst | 2 +- schema/vrs-source.yaml | 8 ++++---- schema/vrs.json | 18 +++++++++--------- schema/vrs.yaml | 24 ++++++++++++------------ validation/models.yaml | 8 ++++---- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/docs/source/terms_and_model.rst b/docs/source/terms_and_model.rst index 896e439a..653142d8 100644 --- a/docs/source/terms_and_model.rst +++ b/docs/source/terms_and_model.rst @@ -427,7 +427,7 @@ Low-level copy gain of BRCA1: .. parsed-literal:: { - "copy_change": "EFO_0030071", # low-level gain + "copy_change": "efo:0030071", # low-level gain "subject": { "gene_id": "ncbigene:348", # BRCA1 gene "type": "Gene" diff --git a/schema/defs/vrs/CopyNumberChange.rst b/schema/defs/vrs/CopyNumberChange.rst index 70ae2c5a..64adb97a 100644 --- a/schema/defs/vrs/CopyNumberChange.rst +++ b/schema/defs/vrs/CopyNumberChange.rst @@ -31,4 +31,4 @@ Some CopyNumberChange attributes are inherited from :ref:`Variation`. * - copy_change - string - 1..1 - - MUST be one of "EFO_0030069" (complete genomic loss), "EFO_0020073" (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). + - MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" (high-level loss), "efo:0030068" (low-level loss), "efo:0030067" (loss), "efo:0030064" (regional base ploidy), "efo:0030070" (gain), "efo:0030071" (low-level gain), "efo:0030072" (high-level gain). diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 8a478037..1841fdfa 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -240,11 +240,11 @@ definitions: A location for which the number of systemic copies is described. copy_change: type: string - enum: [ "EFO_0030069", "EFO_0020073", "EFO_0030068", "EFO_0030067", "EFO_0030064", "EFO_0030070", "EFO_0030071", "EFO_0030072" ] + enum: [ "efo:0030069", "efo:0020073", "efo:0030068", "efo:0030067", "efo:0030064", "efo:0030070", "efo:0030071", "efo:0030072" ] description: >- - MUST be one of "EFO_0030069" (complete genomic loss), "EFO_0020073" (high-level loss), - "EFO_0030068" (low-level loss), "EFO_0030067" (loss), "EFO_0030064" (regional base ploidy), - "EFO_0030070" (gain), "EFO_0030071" (low-level gain), "EFO_0030072" (high-level gain). + MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" (high-level loss), + "efo:0030068" (low-level loss), "efo:0030067" (loss), "efo:0030064" (regional base ploidy), + "efo:0030070" (gain), "efo:0030071" (low-level gain), "efo:0030072" (high-level gain). required: [ "subject", "copy_change" ] Genotype: diff --git a/schema/vrs.json b/schema/vrs.json index 7b3f79ea..e9c99691 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -344,16 +344,16 @@ "copy_change": { "type": "string", "enum": [ - "EFO_0030069", - "EFO_0020073", - "EFO_0030068", - "EFO_0030067", - "EFO_0030064", - "EFO_0030070", - "EFO_0030071", - "EFO_0030072" + "efo:0030069", + "efo:0020073", + "efo:0030068", + "efo:0030067", + "efo:0030064", + "efo:0030070", + "efo:0030071", + "efo:0030072" ], - "description": "MUST be one of \"EFO_0030069\" (complete genomic loss), \"EFO_0020073\" (high-level loss), \"EFO_0030068\" (low-level loss), \"EFO_0030067\" (loss), \"EFO_0030064\" (regional base ploidy), \"EFO_0030070\" (gain), \"EFO_0030071\" (low-level gain), \"EFO_0030072\" (high-level gain)." + "description": "MUST be one of \"efo:0030069\" (complete genomic loss), \"efo:0020073\" (high-level loss), \"efo:0030068\" (low-level loss), \"efo:0030067\" (loss), \"efo:0030064\" (regional base ploidy), \"efo:0030070\" (gain), \"efo:0030071\" (low-level gain), \"efo:0030072\" (high-level gain)." } }, "required": [ diff --git a/schema/vrs.yaml b/schema/vrs.yaml index 472e073e..e84f369e 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -207,18 +207,18 @@ definitions: copy_change: type: string enum: - - EFO_0030069 - - EFO_0020073 - - EFO_0030068 - - EFO_0030067 - - EFO_0030064 - - EFO_0030070 - - EFO_0030071 - - EFO_0030072 - description: MUST be one of "EFO_0030069" (complete genomic loss), "EFO_0020073" - (high-level loss), "EFO_0030068" (low-level loss), "EFO_0030067" (loss), - "EFO_0030064" (regional base ploidy), "EFO_0030070" (gain), "EFO_0030071" - (low-level gain), "EFO_0030072" (high-level gain). + - efo:0030069 + - efo:0020073 + - efo:0030068 + - efo:0030067 + - efo:0030064 + - efo:0030070 + - efo:0030071 + - efo:0030072 + description: MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" + (high-level loss), "efo:0030068" (low-level loss), "efo:0030067" (loss), + "efo:0030064" (regional base ploidy), "efo:0030070" (gain), "efo:0030071" + (low-level gain), "efo:0030072" (high-level gain). required: - copy_change - subject diff --git a/validation/models.yaml b/validation/models.yaml index a8973133..98695c55 100644 --- a/validation/models.yaml +++ b/validation/models.yaml @@ -382,7 +382,7 @@ CopyNumberCount: CopyNumberChange: - name: "Low-level copy gain of BRCA1" in: - copy_change: EFO_0030071 + copy_change: efo:0030071 subject: sequence_id: ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl interval: @@ -396,9 +396,9 @@ CopyNumberChange: type: SequenceLocation type: CopyNumberChange out: - ga4gh_digest: W6Mr7EOuc8_oP--jzFqZeAj-ZC7pK9F_ - ga4gh_identify: ga4gh:CX.W6Mr7EOuc8_oP--jzFqZeAj-ZC7pK9F_ - ga4gh_serialize: '{"copy_change":"EFO_0030071","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberChange"}' + ga4gh_digest: zRqNmX-TVTU5FOFxfR4y0jwBysw7ztPn + ga4gh_identify: ga4gh:CX.zRqNmX-TVTU5FOFxfR4y0jwBysw7ztPn + ga4gh_serialize: '{"copy_change":"efo:0030071","subject":"oz3NEuhtbBep3yqu3wrhqfDKbLPK7vcE","type":"CopyNumberChange"}' Text: - in: