From db76cce1976b871ea62b4955356ecbe2c78229bd Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 19 Dec 2024 09:20:55 -0500 Subject: [PATCH 01/13] wip: update submodules to 1.0.0-ballot.2024-11.2 --- .gitmodules | 2 +- submodules/va_spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 9d51a2b..68371cb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "submodules/va_spec"] path = submodules/va_spec url = https://github.com/ga4gh/va-spec - branch = 1.x + branch = 1.0.0-ballot.2024-11 diff --git a/submodules/va_spec b/submodules/va_spec index 3261ad7..9f40a87 160000 --- a/submodules/va_spec +++ b/submodules/va_spec @@ -1 +1 @@ -Subproject commit 3261ad79b0c6d03786ae8f14287f1926dc5b45bd +Subproject commit 9f40a874bfe3423a32abf45dcef194acb1838b7f From 691c85fcfd757274ca00a3a230084639bf1b8620 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 20 Dec 2024 13:12:37 -0500 Subject: [PATCH 02/13] wip --- pyproject.toml | 2 +- src/ga4gh/va_spec/base/caf_study_result.py | 25 + src/ga4gh/va_spec/base/core.py | 462 ++++++++++++++++++ src/ga4gh/va_spec/base/domain_entities.py | 54 ++ .../experimental_variant_functional_impact.py | 20 + src/ga4gh/va_spec/profiles/__init__.py | 44 -- .../va_spec/profiles/assay_var_effect.py | 134 ----- .../va_spec/profiles/caf_study_result.py | 49 -- src/ga4gh/va_spec/profiles/var_path_stmt.py | 54 -- src/ga4gh/va_spec/profiles/var_study_stmt.py | 208 -------- 10 files changed, 562 insertions(+), 490 deletions(-) create mode 100644 src/ga4gh/va_spec/base/caf_study_result.py create mode 100644 src/ga4gh/va_spec/base/core.py create mode 100644 src/ga4gh/va_spec/base/domain_entities.py create mode 100644 src/ga4gh/va_spec/base/experimental_variant_functional_impact.py delete mode 100644 src/ga4gh/va_spec/profiles/__init__.py delete mode 100644 src/ga4gh/va_spec/profiles/assay_var_effect.py delete mode 100644 src/ga4gh/va_spec/profiles/caf_study_result.py delete mode 100644 src/ga4gh/va_spec/profiles/var_path_stmt.py delete mode 100644 src/ga4gh/va_spec/profiles/var_study_stmt.py diff --git a/pyproject.toml b/pyproject.toml index 5374c81..75112d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -138,7 +138,7 @@ ignore = [ # B011 - assert-false # N815 - mixed-case-variable-in-class-scope "tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"] -"src/ga4gh/va_spec/profiles/*" = ["ANN102", "N815"] +"src/ga4gh/va_spec/*" = ["ANN102", "N815"] [tool.setuptools.packages.find] where = ["src"] diff --git a/src/ga4gh/va_spec/base/caf_study_result.py b/src/ga4gh/va_spec/base/caf_study_result.py new file mode 100644 index 0000000..909f4d4 --- /dev/null +++ b/src/ga4gh/va_spec/base/caf_study_result.py @@ -0,0 +1,25 @@ +"""Cohort Allele Frequency Study Result Standard Profile""" + + +from typing import Literal + +from pydantic import Field + +from ga4gh.core.models import iriReference +from ga4gh.va_spec.base.core import DataSet, StudyGroup, StudyResult +from ga4gh.vrs.models import Allele + + +class CohortAlleleFrequencyStudyResult(StudyResult): + """A StudyResult that reports measures related to the frequency of an Allele in a cohort""" + + type: Literal["CohortAlleleFrequencyStudyResult"] = Field("CohortAlleleFrequencyStudyResult", description="MUST be 'CohortAlleleFrequencyStudyResult'.") + sourceDataSet: DataSet | None = Field(None, description="The dataset from which the CohortAlleleFrequencyStudyResult was reported.") + focusAllele: Allele | iriReference = Field(..., description="The Allele for which frequency results are reported.") + focusAlleleCount: int = Field(..., description="The number of occurrences of the focusAllele in the cohort.") + locusAlleleCount: int = Field(..., description="The number of occurrences of all alleles at the locus in the cohort.") + focusAlleleFrequency: int = Field(..., description="The frequency of the focusAllele in the cohort.") + cohort: StudyGroup = Field(..., description="The cohort from which the frequency was derived.") + subCohortFrequency: list[CohortAlleleFrequencyStudyResult] | None = Field(None, description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. Subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.") + ancillaryResults: dict | None = None + qualityMeasures: dict | None = None diff --git a/src/ga4gh/va_spec/base/core.py b/src/ga4gh/va_spec/base/core.py new file mode 100644 index 0000000..509bf65 --- /dev/null +++ b/src/ga4gh/va_spec/base/core.py @@ -0,0 +1,462 @@ +from __future__ import annotations + +from abc import ABC +from datetime import date +from enum import Enum +from typing import Annotated, Literal + +from pydantic import ( + Field, + RootModel, + StringConstraints, +) + +from ga4gh.cat_vrs.models import CategoricalVariant +from ga4gh.core.models import Entity, MappableConcept, iriReference +from ga4gh.va_spec.base.domain_entities import Condition, Therapeutic +from ga4gh.vrs.models import MolecularVariation + +######################################### +# Abstract Core Classes +######################################### + + +class InformationEntity(Entity): + """An abstract (non-physical) entity that represents 'information content' carried by + physical or digital information artifacts such as books, web pages, data sets, or + images. + """ + + specifiedBy: Method | iriReference | None = Field( + None, + description="A specification that describes all or part of the process that led to creation of the Information Entity", + ) + contributions: list[Contribution] | None = Field( + None, + description="Specific actions taken by an Agent toward the creation, modification, validation, or deprecation of an Information Entity.", + ) + reportedIn: list[Document] | iriReference | None = Field( + None, description="A document in which the the Information Entity is reported." + ) + + +class StudyResult(InformationEntity): + """A collection of data items from a single study that pertain to a particular subject + or experimental unit in the study, along with optional provenance information + describing how these data items were generated. + """ + + focus: Entity | MappableConcept | iriReference = Field( + ..., + description="The specific participant, subject or experimental unit in a Study that data included in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.", + ) + sourceDataSet: DataSet | None = Field( + None, + description="A larger DataSet from which the data included in the StudyResult was taken or derived.", + ) + + +class Proposition(Entity): + """An abstract entity representing a possible fact that may be true or false. As + abstract entities, Propositions capture a 'sharable' piece of meaning whose identify + and existence is independent of space and time, or whether it is ever asserted to be + true by some agent. + """ + + subject: dict = Field( + ..., description="The Entity or concept about which the Proposition is made." + ) + predicate: str = Field( + ..., + description="The relationship declared to hold between the subject and the object of the Proposition.", + ) + object: dict = Field( + ..., + description="An Entity or concept that is related to the subject of a Proposition via its predicate.", + ) + + +class SubjectVariantProposition(RootModel): + """A `Proposition` that has a variant as the subject.""" + + root: ( + ExperimentalVariantFunctionalImpactProposition + | VariantPathogenicityProposition + | VariantDiagnosticProposition + | VariantPrognosticProposition + | VariantOncogenicityProposition + | VariantTherapeuticResponseProposition + ) + + +class _SubjectVariantPropositionBase(Entity, ABC): + subjectVariant: MolecularVariation | CategoricalVariant | iriReference | None = ( + Field(None, description="A variant that is the subject of the Proposition.") + ) + + +class ClinicalVariantProposition(_SubjectVariantPropositionBase): + """A proposition for use in describing the effect of variants in human subjects.""" + + geneContextQualifier: MappableConcept | iriReference | None = Field( + None, + description="Reports a gene impacted by the variant, which may contribute to the association described in the Proposition.", + ) + alleleOriginQualifier: MappableConcept | iriReference | None = Field( + None, + description="Reports whether the Proposition should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or another more nuanced concept.", + ) + + +class ExperimentalVariantFunctionalImpactProposition(_SubjectVariantPropositionBase): + """A Proposition describing the impact of a variant on the function sequence feature + (typically a gene or gene product). + """ + + type: Literal["ExperimentalVariantFunctionalImpactProposition"] = Field( + "ExperimentalVariantFunctionalImpactProposition", + description="MUST be 'ExperimentalVariantFunctionalImpactProposition'.", + ) + predicate: str = Field( + "impactsFunctionOf", + description="The relationship the Proposition describes between the subject variant and object sequence feature whose function it may alter.", + ) + objectSequenceFeature: iriReference | MappableConcept = Field( + ..., + description="The sequence feature (typically a gene or gene product) on whose function the impact of the subject variant is reported.", + ) + experimentalContextQualifier: iriReference | Document | dict | None = Field( + None, + description="An assay in which the reported variant functional impact was determined - providing a specific experimental context in which this effect is asserted to hold.", + ) + + +class DiagnosticPredicate(str, Enum): + """Define constraints for diagnostic predicate""" + + INCLUSIVE = "isDiagnosticInclusionCriterionFor" + EXCLUSIVE = "isDiagnosticExclusionCriterionFor" + + +class VariantDiagnosticProposition(ClinicalVariantProposition): + """A Proposition about whether a variant is associated with a disease (a diagnostic + inclusion criterion), or absence of a disease (diagnostic exclusion criterion).""" + + type: Literal["VariantDiagnosticProposition"] = Field( + "VariantDiagnosticProposition", + description="MUST be 'VariantDiagnosticProposition'.", + ) + predicate: DiagnosticPredicate + objectCondition: Condition | iriReference = Field( + ..., description="The disease that is evaluated for diagnosis." + ) + + +class VariantOncogenicityProposition(ClinicalVariantProposition): + """A proposition describing the role of a variant in causing a tumor type.""" + + type: Literal["VariantOncogenicityProposition"] = Field( + "VariantOncogenicityProposition", + description="MUST be 'VariantOncogenicityProposition'.", + ) + predicate: str = "isCausalFor" + objectTumorType: Condition | iriReference = Field( + ..., description="The tumor type for which the variant impact is evaluated." + ) + + +class VariantPathogenicityProposition(ClinicalVariantProposition): + """A proposition describing the role of a variant in causing a heritable condition.""" + + type: Literal["VariantPathogenicityProposition"] = Field( + "VariantPathogenicityProposition", + description="MUST be 'VariantPathogenicityProposition'", + ) + predicate: str = "isCausalFor" + objectCondition: Condition | iriReference = Field( + ..., description="The :ref:`Condition` for which the variant impact is stated." + ) + penetranceQualifier: MappableConcept | None = Field( + None, + description="Reports the penetrance of the pathogenic effect - i.e. the extent to which the variant impact is expressed by individuals carrying it as a measure of the proportion of carriers exhibiting the condition. ", + ) + modeOfInheritanceQualifier: MappableConcept | None = Field( + None, + description="Reports a pattern of inheritance expected for the pathogenic effect of the variant. HPO terms within the hierarchy of 'HP:0000005' (mode of inheritance) are recommended to specify.", + ) + + +class PrognosticPredicate(str, Enum): + """Define constraints for prognostic predicate""" + + BETTER_OUTCOME = "associatedWithBetterOutcomeFor" + WORSE_OUTCOME = "associatedWithWorseOutcomeFor" + + +class VariantPrognosticProposition(ClinicalVariantProposition): + """A Proposition about whether a variant is associated with an improved or worse outcome for a disease.""" + + type: Literal["VariantPrognosticProposition"] = Field( + "VariantPrognosticProposition", + description="MUST be 'VariantPrognosticProposition'.", + ) + predicate: PrognosticPredicate + objectCondition: Condition | iriReference = Field( + ..., description="The disease that is evaluated for outcome." + ) + + +class TherapeuticResponsePredicate(str, Enum): + """Define constraints for therapeutic response predicate""" + + SENSITIVITY = "predictsSensitivityTo" + RESISTANCE = "predictsResistanceTo" + + +class VariantTherapeuticResponseProposition(ClinicalVariantProposition): + """A Proposition about the role of a variant in modulating the response of a neoplasm to drug + administration or other therapeutic procedures.""" + + type: Literal["VariantTherapeuticResponseProposition"] = Field( + "VariantTherapeuticResponseProposition", + description="MUST be 'VariantTherapeuticResponseProposition'.", + ) + predicate: TherapeuticResponsePredicate + objectTherapeutic: Therapeutic | iriReference = Field( + ..., + description="A drug administration or other therapeutic procedure that the neoplasm is intended to respond to.", + ) + conditionQualifier: Condition | iriReference = Field( + ..., + description="Reports the disease context in which the variant's association with therapeutic sensitivity or resistance is evaluated. Note that this is a required qualifier in therapeutic response propositions.", + ) + + +######################################### +# Concrete Core Classes +######################################### + + +class CoreType(str, Enum): + METHOD = "Method" + CONTRIBUTION = "Contribution" + DOCUMENT = "Document" + AGENT = "Agent" + STATEMENT = "Statement" + EVIDENCE_LINE = "EvidenceLine" + DATA_SET = "DataSet" + STUDY_GROUP = "StudyGroup" + + +class Method(Entity): + """A set of instructions that specify how to achieve some objective.""" + + type: Literal["Method"] = Field( + CoreType.METHOD.value, description=f"MUST be '{CoreType.METHOD.value}'." + ) + subtype: MappableConcept | None = Field( + None, + description="A specific type of method that a Method instance represents (e.g. 'Variant Interpretation Guideline', or 'Experimental Protocol').", + ) + reportedIn: Document | iriReference | None = Field( + None, description="A document in which the the Method is reported." + ) + + +class Contribution(Entity): + """An action taken by an agent in contributing to the creation, modification, + assessment, or deprecation of a particular entity (e.g. a Statement, EvidenceLine, + DataSet, Publication, etc.) + """ + + type: Literal["Contribution"] = Field( + CoreType.CONTRIBUTION.value, + description=f"MUST be '{CoreType.CONTRIBUTION.value}'.", + ) + contributor: Agent | None = Field( + None, description="The agent that made the contribution." + ) + activityType: MappableConcept | None = Field( + None, + description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc.). Values of this property may be framed as activities, or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).", + ) + date: date | None + + +class Document(Entity): + """A collection of information, usually in a text-based or graphic human-readable + form, intended to be read and understood together as a whole. + """ + + type: Literal["Document"] = Field( + CoreType.DOCUMENT.value, description=f"Must be '{CoreType.DOCUMENT.value}'." + ) + subtype: MappableConcept | None = Field( + None, + description="A specific type of document that a Document instance represents (e.g. 'publication', 'patent', 'pathology report')", + ) + title: str | None = Field( + None, description="The official title given to the document by its authors." + ) + urls: ( + list[Annotated[str, StringConstraints(pattern=r"^(https?|s?ftp)://")]] | None + ) = Field( + None, + description="One or more URLs from which the content of the Document can be retrieved.", + ) + doi: ( + Annotated[str, StringConstraints(pattern=r"^10\.(\d+)(\.\d+)*\/[\w\-\.]+")] + | None + ) = Field( + None, + description="A `Digital Object Identifier `_ for the document.", + ) + pmid: int | None = Field( + None, + description="A `PubMed unique identifier `_ for the document.", + ) + + +class Agent(Entity): + """An autonomous actor (person, organization, or software agent) that bears some + form of responsibility for an activity taking place, for the existence of an entity, + or for another agent's activity. + """ + + type: Literal["Agent"] = Field( + CoreType.AGENT.value, description=f"MUST be '{CoreType.AGENT.value}'." + ) + name: str | None = Field(None, description="The given name of the Agent.") + subtype: MappableConcept | None = Field( + None, + description="A specific type of agent the Agent object represents. Recommended subtypes include codes for `person`, `organization`, or `software`.", + ) + + +class Direction(str, Enum): + """A term indicating whether the Statement supports, disputes, or remains neutral + w.r.t. the validity of the Proposition it evaluates.""" + + SUPPORTS = "supports" + NEUTRAL = "neutral" + DISPUTES = "disputes" + +class DataSet(Entity): + """A collection of related data items or records that are organized together in a + common format or structure, to enable their computational manipulation as a unit.""" + + type: Literal["DataSet"] = Field( + CoreType.DATA_SET.value, description=f"MUST be '{CoreType.DATA_SET.value}'." + ) + subtype: MappableConcept | None = Field( + None, + description="A specific type of data set the DataSet instance represents (e.g. a 'clinical data set', a 'sequencing data set', a 'gene expression data set', a 'genome annotation data set')", + ) + reportedIn: Document | iriReference | None = Field( + None, description="A document in which the the Method is reported." + ) + releaseDate: date | None = Field( + None, + description="Indicates the date a version of a DataSet was formally released.", + ) + version: str | None = Field( + None, description="The version of the DataSet, as assigned by its creator." + ) + license: MappableConcept | None = Field( + None, + description="A specific license that dictates legal permissions for how a data set can be used (by whom, where, for what purposes, with what additional requirements, etc.)", + ) + + +class EvidenceLine(InformationEntity): + """An independent, evidence-based argument that may support or refute the validity + of a specific Proposition. The strength and direction of this argument is based on + an interpretation of one or more pieces of information as evidence for or against + the target Proposition.""" + + type: Literal["EvidenceLine"] = Field( + CoreType.EVIDENCE_LINE.value, + description=f"MUST be '{CoreType.EVIDENCE_LINE.value}'.", + ) + targetProposition: Proposition | None = Field( + None, + description="The possible fact against which evidence items contained in an Evidence Line were collectively evaluated, in determining the overall strength and direction of support they provide. For example, in an ACMG Guideline-based assessment of variant pathogenicity, the support provided by distinct lines of evidence are assessed against a target proposition that the variant is pathogenic for a specific disease.", + ) + hasEvidenceItems: ( + list[StudyResult | Statement | EvidenceLine | iriReference] | None + ) = Field( + None, + description="An individual piece of information that was evaluated as evidence in building the argument represented by an Evidence Line.", + ) + directionOfEvidenceProvided: Direction = Field( + ..., + description="The direction of support that the Evidence Line is determined to provide toward its target Proposition (supports, disputes, neutral)", + ) + strengthOfEvidenceProvided: MappableConcept | None = Field( + None, + description="The strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.", + ) + scoreOfEvidenceProvided: float | None = Field( + None, + description="A quantitative score indicating the strength of support that an Evidence Line is determined to provide for or against its target Proposition, evaluated relative to the direction indicated by the directionOfEvidenceProvided value.", + ) + evidenceOutcome: MappableConcept | None = Field( + None, + description="A term summarizing the overall outcome of the evidence assessment represented by the Evidence Line, in terms of the direction and strength of support it provides for or against the target Proposition.", + ) + + +class Statement(InformationEntity): + """A claim of purported truth as made by a particular agent, on a particular + occasion. Statements may be used to put forth a possible fact (i.e. a 'Proposition') + as true or false, or to provide a more nuanced assessment of the level of confidence + or evidence supporting a particular Proposition. + """ + + type: Literal["Statement"] = Field( + CoreType.STATEMENT.value, description=f"MUST be '{CoreType.STATEMENT.value}'." + ) + proposition: Proposition = Field( + ..., + description="A possible fact, the validity of which is assessed and reported by the Statement. A Statement can put forth the proposition as being true, false, or uncertain, and may provide an assessment of the level of confidence/evidence supporting this claim. ", + ) + direction: Direction = Field( + ..., + description="A term indicating whether the Statement supports, disputes, or remains neutral w.r.t. the validity of the Proposition it evaluates.", + ) + strength: MappableConcept | None = Field( + None, + description="A term used to report the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Implementers may choose to frame a strength assessment in terms of how *confident* an agent is that the Proposition is true or false, or in terms of the *strength of all evidence* they believe supports or disputes it.", + ) + score: float | None = Field( + None, + description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it. Instructions for how to interpret the menaing of a given score may be gleaned from the method or document referenced in 'specifiedBy' attribute.", + ) + classification: MappableConcept | None = Field( + None, + description="A single term or phrase summarizing the outcome of direction and strength assessments of a Statement's Proposition, in terms of a classification of its subject.", + ) + hasEvidenceLines: list[EvidenceLine | iriReference] | None = Field( + None, + description="An evidence-based argument that supports or disputes the validity of the proposition that a Statement assesses or puts forth as true. The strength and direction of this argument (whether it supports or disputes the proposition, and how strongly) is based on an interpretation of one or more pieces of information as evidence (i.e. 'Evidence Items).", + ) + + +class StudyGroup(Entity): + """A collection of individuals or specimens from the same taxonomic class, selected + for analysis in a scientific study based on their exhibiting one or more common + characteristics (e.g. species, race, age, gender, disease state, income). May be + referred to as a 'cohort' or 'population' in specific research settings. + """ + + type: Literal["StudyGroup"] = Field( + CoreType.STUDY_GROUP.value, + description=f"Must be '{CoreType.STUDY_GROUP.value}'", + ) + memberCount: int | None = Field( + None, description="The total number of individual members in the StudyGroup." + ) + characteristics: list[MappableConcept] | None = Field( + None, + description="A feature or role shared by all members of the StudyGroup, representing a criterion for membership in the group.", + ) diff --git a/src/ga4gh/va_spec/base/domain_entities.py b/src/ga4gh/va_spec/base/domain_entities.py new file mode 100644 index 0000000..14bedaf --- /dev/null +++ b/src/ga4gh/va_spec/base/domain_entities.py @@ -0,0 +1,54 @@ +"""VA Spec Shared Domain Entity Data Structures""" + + +from pydantic import Field, RootModel + +from ga4gh.core.models import Element, MappableConcept + + +class TraitSet(Element): + """A set of conditions (diseases, phenotypes, traits) that are co-occurring.""" + + traits: list[MappableConcept] | None = Field( + ..., + min_length=2, + description="A list of conditions (diseases, phenotypes, traits) that are co-occurring.", + ) + + +class Condition(RootModel): + """A set of traits (TraitSet) or a single trait (Disease, Phenotype, etc.) that + represents the object of a Variant Pathogenicity statement. + """ + + root: TraitSet | MappableConcept = Field( + ..., + json_schema_extra={ + "description": "A set of traits (TraitSet) or a single trait (Disease, Phenotype, etc.) that represents the object of a Variant Pathogenicity statement." + }, + ) + + +class TherapyGroup(Element): + """A group of therapies that are applied together to treat a condition.""" + + therapies: list[MappableConcept] | None = Field( + ..., + min_length=2, + description="A list of therapies that are applied together to treat a condition.", + ) + groupType: MappableConcept | None = Field( + None, description="The type of the therapy group." + ) + + +class Therapeutic(RootModel): + """A group of therapies (TherapyGroup) or a single therapy (drug, procedure, behavioral intervention, etc.). + """ + + root: TherapyGroup | MappableConcept = Field( + ..., + json_schema_extra={ + "description": "A group of therapies (TherapyGroup) or a single therapy (drug, procedure, behavioral intervention, etc.)." + }, + ) diff --git a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py new file mode 100644 index 0000000..ce4f69f --- /dev/null +++ b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py @@ -0,0 +1,20 @@ +"""Cohort Allele Frequency Study Result Standard Profile""" + + +from typing import Literal + +from pydantic import Field + +from ga4gh.core.models import iriReference +from ga4gh.va_spec.base.core import DataSet, Method, StudyResult +from ga4gh.vrs.models import MolecularVariation + + +class ExperimentalVariantFunctionalImpactStudyResult(StudyResult): + """A StudyResult that reports a functional impact score from a variant functional assay or study.""" + + type: Literal["ExperimentalVariantFunctionalImpactStudyResult"] = Field("ExperimentalVariantFunctionalImpactStudyResult", description="MUST be 'ExperimentalVariantFunctionalImpactStudyResult'.") + focusVariant: MolecularVariation | iriReference | None = Field(None, description="The genetic variant for which a functional impact score is generated.") + functionalImpactScore: float | None = Field(None, description="The score of the variant impact measured in the assay or study.") + specifiedBy: Method | iriReference | None = Field(None, description="The assay that was performed to generate the reported functional impact score.") + sourceDataSet: DataSet | None = Field(None, description="The full data set that provided the reported the functional impact score.") diff --git a/src/ga4gh/va_spec/profiles/__init__.py b/src/ga4gh/va_spec/profiles/__init__.py deleted file mode 100644 index 558a390..0000000 --- a/src/ga4gh/va_spec/profiles/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Package for VA-Spec Python implementation""" - -from .assay_var_effect import ( - AssayVariantEffectClinicalClassificationStatement, - AssayVariantEffectFunctionalClassificationStatement, - AssayVariantEffectMeasurementStudyResult, - AveClinicalClassification, - AveFunctionalClassification, -) -from .caf_study_result import CohortAlleleFrequencyStudyResult -from .var_path_stmt import PenetranceQualifier, VariantPathogenicityStatement -from .var_study_stmt import ( - AlleleOriginQualifier, - AllelePrevalenceQualifier, - DiagnosticPredicate, - OncogenicPredicate, - PrognosticPredicate, - TherapeuticResponsePredicate, - VariantDiagnosticStudyStatement, - VariantOncogenicityStudyStatement, - VariantPrognosticStudyStatement, - VariantTherapeuticResponseStudyStatement, -) - -__all__ = [ - "AveFunctionalClassification", - "AveClinicalClassification", - "AssayVariantEffectFunctionalClassificationStatement", - "AssayVariantEffectClinicalClassificationStatement", - "AssayVariantEffectMeasurementStudyResult", - "CohortAlleleFrequencyStudyResult", - "PenetranceQualifier", - "VariantPathogenicityStatement", - "AlleleOriginQualifier", - "DiagnosticPredicate", - "OncogenicPredicate", - "PrognosticPredicate", - "TherapeuticResponsePredicate", - "AllelePrevalenceQualifier", - "VariantDiagnosticStudyStatement", - "VariantOncogenicityStudyStatement", - "VariantPrognosticStudyStatement", - "VariantTherapeuticResponseStudyStatement", -] diff --git a/src/ga4gh/va_spec/profiles/assay_var_effect.py b/src/ga4gh/va_spec/profiles/assay_var_effect.py deleted file mode 100644 index 5225f98..0000000 --- a/src/ga4gh/va_spec/profiles/assay_var_effect.py +++ /dev/null @@ -1,134 +0,0 @@ -"""VA Spec Assay Variant Effect statement and study result Profiles""" - -from __future__ import annotations - -from enum import Enum -from typing import Literal - -from ga4gh.cat_vrs.core_models import CategoricalVariant -from ga4gh.core.entity_models import ( - IRI, - Coding, - DataSet, - Method, - StatementBase, - StudyGroup, - StudyResult, - StudyResultBase, -) -from ga4gh.vrs.models import MolecularVariation -from pydantic import ConfigDict, Field - - -class AveFunctionalClassification(str, Enum): - """The functional classification of the variant effect in the assay.""" - - NORMAL = "normal" - INDETERMINATE = "indeterminate" - ABNORMAL = "abnormal" - - -class AveClinicalClassification(str, Enum): - """The clinical strength of evidence of the variant effect in the assay.""" - - PS3_STRONG = "PS3_Strong" - PS3_MODERATE = "PS3_Moderate" - PS3_SUPPORTING = "PS3_Supporting" - BS3_STRONG = "BS3_Strong" - BS3_MODERATE = "BS3_Moderate" - BS3_SUPPORTING = "BS3_Supporting" - - -class AssayVariantEffectFunctionalClassificationStatement(StatementBase): - """A statement that assigns a functional classification to a variant effect from a functional assay.""" - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["AssayVariantEffectFunctionalClassificationStatement"] = Field( - "AssayVariantEffectFunctionalClassificationStatement", - description="MUST be 'AssayVariantEffectFunctionalClassificationStatement'.", - ) - subjectVariant: MolecularVariation | CategoricalVariant | IRI = Field( - ..., - description="A protein or genomic contextual or canonical molecular variant.", - ) - predicate: Literal["hasAssayVariantEffectFor"] = Field( - "hasAssayVariantEffectFor", - description="The relationship declared to hold between the subject and the object of the Statement.", - ) - objectAssay: IRI | Coding = Field( - ..., - description="The assay that is evaluated for the variant effect. (e.g growth in haploid cell culture protein stability in fluorescence assay)", - ) - classification: AveFunctionalClassification = Field( - ..., - description="The functional classification of the variant effect in the assay.", - ) - specifiedBy: Method | IRI | None = Field( - None, - description="The method that specifies the functional classification of the variant effect in the assay.", - ) - - -class AssayVariantEffectClinicalClassificationStatement(StatementBase): - """A statement that assigns a clinical strength of evidence to a variant effect from a functional assay.""" - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["AssayVariantEffectClinicalClassificationStatement"] = Field( - "AssayVariantEffectClinicalClassificationStatement", - description="MUST be 'AssayVariantEffectClinicalClassificationStatement'.", - ) - subjectVariant: MolecularVariation | CategoricalVariant | IRI = Field( - ..., - description="A protein or genomic contextual or canonical molecular variant.", - ) - predicate: Literal["hasAssayVariantEffectFor"] = Field( - "hasAssayVariantEffectFor", - description="The relationship declared to hold between the subject and the object of the Statement.", - ) - objectAssay: IRI | Coding = Field( - ..., - description="The assay that is evaluated for the variant effect. (e.g growth in haploid cell culture protein stability in fluorescence assay)", - ) - classification: AveClinicalClassification = Field( - ..., - description="The clinical strength of evidence of the variant effect in the assay.", - ) - specifiedBy: Method | IRI | None = Field( - None, - description="The method that specifies the clinical strength of evidence of the variant effect in the assay.", - ) - - -class AssayVariantEffectMeasurementStudyResult(StudyResultBase): - """A StudyResult that reports a variant effect score from a functional assay.""" - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["AssayVariantEffectMeasurementStudyResult"] = Field( - "AssayVariantEffectMeasurementStudyResult", - description="MUST be 'AssayVariantEffectMeasurementStudyResult'.", - ) - componentResult: list[StudyResult] | None = Field( - None, - description="Another StudyResult comprised of data items about the same focus as its parent Result, but based on a more narrowly scoped analysis of the foundational data (e.g. an analysis based on data about a subset of the parent Results full study population) .", - ) - studyGroup: StudyGroup | None = Field( - None, - description="A description of a specific group or population of subjects interrogated in the ResearchStudy that produced the data captured in the StudyResult.", - ) - focusVariant: MolecularVariation | IRI | None = Field( - None, - description="The human mapped representation of the variant that is the subject of the Statement.", - ) - score: float | None = Field( - None, description="The score of the variant effect in the assay." - ) - specifiedBy: Method | IRI | None = Field( - None, - description="The assay that was used to measure the variant effect with all the various properties", - ) - sourceDataSet: list[DataSet] | None = Field( - None, description="The full data set that this measurement is a part of" - ) diff --git a/src/ga4gh/va_spec/profiles/caf_study_result.py b/src/ga4gh/va_spec/profiles/caf_study_result.py deleted file mode 100644 index a544d46..0000000 --- a/src/ga4gh/va_spec/profiles/caf_study_result.py +++ /dev/null @@ -1,49 +0,0 @@ -"""VA Spec Cohort Allele Frequency (population frequency) Study Result Standard Profile""" - -from __future__ import annotations - -from typing import Literal - -from ga4gh.core.entity_models import ( - DataSet, - StudyGroup, - StudyResultBase, -) -from ga4gh.vrs.models import Allele -from pydantic import ConfigDict, Field - - -class CohortAlleleFrequencyStudyResult(StudyResultBase): - """A StudyResult that reports measures related to the frequency of an Allele in a cohort""" - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["CohortAlleleFrequencyStudyResult"] = Field( - "CohortAlleleFrequencyStudyResult", - description="MUST be 'CohortAlleleFrequencyStudyResult'.", - ) - sourceDataSet: list[DataSet] | None = Field( - None, - description="The dataset from which the CohortAlleleFrequencyStudyResult was reported.", - ) - focusAllele: Allele | str = Field( - ..., - description="The specific subject or experimental unit in a Study that data in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.", - ) - focusAlleleCount: int = Field( - ..., description="The number of occurrences of the focusAllele in the cohort." - ) - locusAlleleCount: int = Field( - ..., - description="The number of occurrences of all alleles at the locus in the cohort (sometimes referred to as 'allele number')", - ) - focusAlleleFrequency: float = Field( - ..., description="The frequency of the focusAllele in the cohort." - ) - cohort: list[StudyGroup] = Field( - ..., description="The cohort from which the frequency was derived." - ) - subCohortFrequency: list[CohortAlleleFrequencyStudyResult] | None = Field( - None, - description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. This creates a recursive relationship and subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.", - ) diff --git a/src/ga4gh/va_spec/profiles/var_path_stmt.py b/src/ga4gh/va_spec/profiles/var_path_stmt.py deleted file mode 100644 index 6bf536d..0000000 --- a/src/ga4gh/va_spec/profiles/var_path_stmt.py +++ /dev/null @@ -1,54 +0,0 @@ -"""VA Spec Variant Pathogenicity Statement Standard Profile""" - -from enum import Enum -from typing import Literal - -from ga4gh.cat_vrs.core_models import CategoricalVariant -from ga4gh.core.domain_models import Condition, Gene -from ga4gh.core.entity_models import IRI, Coding, StatementBase -from ga4gh.vrs.models import Variation -from pydantic import ConfigDict, Field - - -class PenetranceQualifier(str, Enum): - """Reports the penetrance of the pathogenic effect - i.e. the extent to which the - variant impact is expressed by individuals carrying it as a measure of the - proportion of carriers exhibiting the condition. - """ - - HIGH = "high" - LOW = "low" - RISK_ALLELE = "risk allele" - - -class VariantPathogenicityStatement(StatementBase): - """A Statement describing the role of a variant in causing an inherited condition.""" - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["VariantPathogenicityStatement"] = Field( - "VariantPathogenicityStatement", - description="MUST be 'VariantPathogenicityStatement'.", - ) - subjectVariant: Variation | CategoricalVariant | IRI = Field( - ..., description="A variant that is the subject of the Statement." - ) - predicate: Literal["isCausalFor"] = Field( - "isCausalFor", - description="The relationship declared to hold between the subject and the object of the Statement.", - ) - objectCondition: Condition | IRI = Field( - ..., description="The Condition for which the variant impact is stated." - ) - penetranceQualifier: PenetranceQualifier | None = Field( - None, - description="Reports the penetrance of the pathogenic effect - i.e. the extent to which the variant impact is expressed by individuals carrying it as a measure of the proportion of carriers exhibiting the condition.", - ) - modeOfInheritanceQualifier: list[Coding] | None = Field( - None, - description="Reports a pattern of inheritance expected for the pathogenic effect of the variant. Use HPO terms within the hierarchy of 'HP:0000005' (mode of inheritance) to specify.", - ) - geneContextQualifier: Gene | IRI | None = Field( - None, - description="Reports the gene through which the pathogenic effect asserted for the variant is mediated (i.e. it is the variant's impact on this gene that is responsible for causing the condition).", - ) diff --git a/src/ga4gh/va_spec/profiles/var_study_stmt.py b/src/ga4gh/va_spec/profiles/var_study_stmt.py deleted file mode 100644 index bb7c7f9..0000000 --- a/src/ga4gh/va_spec/profiles/var_study_stmt.py +++ /dev/null @@ -1,208 +0,0 @@ -"""VA Spec Variant Study Statement Standard Profiles""" - -from enum import Enum -from typing import Literal - -from ga4gh.cat_vrs.core_models import CategoricalVariant -from ga4gh.core.domain_models import Condition, Gene, TherapeuticProcedure -from ga4gh.core.entity_models import IRI, StatementBase -from ga4gh.vrs.models import Variation -from pydantic import ConfigDict, Field - - -class AlleleOriginQualifier(str, Enum): - """Reports whether the statement should be interpreted in the context of an - inherited (germline) variant, an acquired (somatic) mutation, or both (combined). - """ - - GERMLINE = "germline" - SOMATIC = "somatic" - COMBINED = "combined" - - -class DiagnosticPredicate(str, Enum): - """Define constraints for diagnostic predicate""" - - INCLUSIVE = "isDiagnosticInclusionCriterionFor" - EXCLUSIVE = "isDiagnosticExclusionCriterionFor" - - -class OncogenicPredicate(str, Enum): - """Define constraints for oncogenic predicate""" - - ONCOGENIC = "isOncogenicFor" - PROTECTIVE = "isProtectiveFor" - PREDISPOSING = "isPredisposingFor" - - -class PrognosticPredicate(str, Enum): - """Define constraints for prognostic predicate""" - - BETTER_OUTCOME = "associatedWithBetterOutcomeFor" - WORSE_OUTCOME = "associatedWithWorseOutcomeFor" - - -class TherapeuticResponsePredicate(str, Enum): - """Define constraints for therapeutic response predicate""" - - SENSITIVITY = "predictsSensitivityTo" - RESISTANCE = "predictsResistanceTo" - - -class AllelePrevalenceQualifier(str, Enum): - """Reports whether the statement should be interpreted in the context of the variant - being rare or common. - """ - - RARE = "rare" - COMMON = "common" - - -class VariantDiagnosticStudyStatement(StatementBase): - """A Statement reporting a conclusion from a single study about whether a variant is - associated with a disease (a diagnostic inclusion criterion), or absence of a - disease (diagnostic exclusion criterion) - based on interpretation of the study's - results. - """ - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["VariantDiagnosticStudyStatement"] = Field( - "VariantDiagnosticStudyStatement", - description="MUST be 'VariantDiagnosticStudyStatement'.", - ) - subjectVariant: Variation | CategoricalVariant | IRI = Field( - ..., description="A variant that is the subject of the Statement." - ) - predicate: DiagnosticPredicate = Field( - ..., - description="The relationship declared to hold between the subject and the object of the Statement.", - ) - objectCondition: Condition | IRI = Field( - ..., description="The disease that is evaluated for diagnosis." - ) - alleleOriginQualifier: AlleleOriginQualifier | None = Field( - None, - description="Reports whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", - ) - allelePrevalenceQualifier: AllelePrevalenceQualifier | None = Field( - None, - description="Reports whether the statement should be interpreted in the context of the variant being rare or common.", - ) - geneContextQualifier: Gene | IRI | None = Field( - None, - description="Reports a gene impacted by the variant, which may contribute to the diagnostic association in the Statement.", - ) - - -class VariantOncogenicityStudyStatement(StatementBase): - """A Statement reporting a conclusion from a single study that supports or refutes a - variant's effect on oncogenesis for a specific tumor type - based on interpretation - of the study's results. - """ - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["VariantOncogenicityStudyStatement"] = Field( - "VariantOncogenicityStudyStatement", - description="MUST be 'VariantOncogenicityStudyStatement'.", - ) - subjectVariant: Variation | CategoricalVariant | IRI = Field( - ..., description="A variant that is the subject of the Statement." - ) - predicate: OncogenicPredicate = Field( - ..., - description="The relationship declared to hold between the subject and the object of the Statement.", - ) - objectTumorType: Condition | IRI = Field( - ..., description="The tumor type for which the variant impact is evaluated." - ) - alleleOriginQualifier: AlleleOriginQualifier | None = Field( - None, - description="Reports whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", - ) - allelePrevalenceQualifier: AllelePrevalenceQualifier | None = Field( - None, - description="Reports whether the statement should be interpreted in the context of the variant being rare or common.", - ) - geneContextQualifier: Gene | IRI | None = Field( - None, - description="Reports a gene impacted by the variant, which may contribute to the oncogenic role in the Statement.", - ) - - -class VariantPrognosticStudyStatement(StatementBase): - """A Statement reporting a conclusion from a single study about whether a variant is - associated with an improved or worse outcome for a disease - based on interpretation - of the study's results. - """ - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["VariantPrognosticStudyStatement"] = Field( - "VariantPrognosticStudyStatement", - description="MUST be 'VariantPrognosticStudyStatement'.", - ) - subjectVariant: Variation | CategoricalVariant | IRI = Field( - ..., description="A variant that is the subject of the Statement." - ) - predicate: PrognosticPredicate = Field( - ..., - description="The relationship declared to hold between the subject and the object of the Statement.", - ) - objectCondition: Condition | IRI = Field( - ..., description="The disease that is evaluated for outcome." - ) - alleleOriginQualifier: AlleleOriginQualifier | None = Field( - None, - description="Reports whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", - ) - allelePrevalenceQualifier: AllelePrevalenceQualifier | None = Field( - None, - description="Reports whether the statement should be interpreted in the context of the variant being rare or common.", - ) - geneContextQualifier: Gene | IRI | None = Field( - None, - description="Reports a gene impacted by the variant, which may contribute to the prognostic association in the Statement.", - ) - - -class VariantTherapeuticResponseStudyStatement(StatementBase): - """A Statement reporting a conclusion from a single study about the role of a - variant in modulating the response of a neoplasm to drug administration or other - therapeutic procedures - based on interpretation of the study's results. - """ - - model_config = ConfigDict(use_enum_values=True) - - type: Literal["VariantTherapeuticResponseStudyStatement"] = Field( - "VariantTherapeuticResponseStudyStatement", - description="MUST be 'VariantTherapeuticResponseStudyStatement'.", - ) - subjectVariant: Variation | CategoricalVariant | IRI = Field( - ..., description="A variant that is the subject of the Statement." - ) - predicate: TherapeuticResponsePredicate = Field( - ..., - description="The relationship declared to hold between the subject and the object of the Statement.", - ) - objectTherapeutic: TherapeuticProcedure | IRI = Field( - ..., - description="A drug administration or other therapeutic procedure that the neoplasm is intended to respond to.", - ) - conditionQualifier: Condition | IRI = Field( - ..., - description="Reports the disease context in which the variant's association with therapeutic sensitivity or resistance is evaluated. Note that this is a required qualifier in therapeutic response statements.", - ) - alleleOriginQualifier: AlleleOriginQualifier | None = Field( - None, - description="Reports whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", - ) - allelePrevalenceQualifier: AllelePrevalenceQualifier | None = Field( - None, - description="Reports whether the statement should be interpreted in the context of the variant being rare or common.", - ) - geneContextQualifier: Gene | IRI | None = Field( - None, - description="Reports a gene impacted by the variant, which may contribute to the therapeutic sensitivity or resistance reported in the Statement. ", - ) From c3d1434cbe16e5717642d4290c9f3c9ab6cdab44 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 23 Dec 2024 14:41:21 -0500 Subject: [PATCH 03/13] use enum values --- src/ga4gh/va_spec/base/core.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/ga4gh/va_spec/base/core.py b/src/ga4gh/va_spec/base/core.py index 509bf65..3bb5489 100644 --- a/src/ga4gh/va_spec/base/core.py +++ b/src/ga4gh/va_spec/base/core.py @@ -6,6 +6,7 @@ from typing import Annotated, Literal from pydantic import ( + ConfigDict, Field, RootModel, StringConstraints, @@ -142,6 +143,8 @@ class VariantDiagnosticProposition(ClinicalVariantProposition): """A Proposition about whether a variant is associated with a disease (a diagnostic inclusion criterion), or absence of a disease (diagnostic exclusion criterion).""" + model_config = ConfigDict(use_enum_values=True) + type: Literal["VariantDiagnosticProposition"] = Field( "VariantDiagnosticProposition", description="MUST be 'VariantDiagnosticProposition'.", @@ -196,6 +199,8 @@ class PrognosticPredicate(str, Enum): class VariantPrognosticProposition(ClinicalVariantProposition): """A Proposition about whether a variant is associated with an improved or worse outcome for a disease.""" + model_config = ConfigDict(use_enum_values=True) + type: Literal["VariantPrognosticProposition"] = Field( "VariantPrognosticProposition", description="MUST be 'VariantPrognosticProposition'.", @@ -217,6 +222,8 @@ class VariantTherapeuticResponseProposition(ClinicalVariantProposition): """A Proposition about the role of a variant in modulating the response of a neoplasm to drug administration or other therapeutic procedures.""" + model_config = ConfigDict(use_enum_values=True) + type: Literal["VariantTherapeuticResponseProposition"] = Field( "VariantTherapeuticResponseProposition", description="MUST be 'VariantTherapeuticResponseProposition'.", @@ -374,6 +381,8 @@ class EvidenceLine(InformationEntity): an interpretation of one or more pieces of information as evidence for or against the target Proposition.""" + model_config = ConfigDict(use_enum_values=True) + type: Literal["EvidenceLine"] = Field( CoreType.EVIDENCE_LINE.value, description=f"MUST be '{CoreType.EVIDENCE_LINE.value}'.", @@ -412,6 +421,7 @@ class Statement(InformationEntity): as true or false, or to provide a more nuanced assessment of the level of confidence or evidence supporting a particular Proposition. """ + model_config = ConfigDict(use_enum_values=True) type: Literal["Statement"] = Field( CoreType.STATEMENT.value, description=f"MUST be '{CoreType.STATEMENT.value}'." From 2b89b9c5b30057f0af41b91cebc3bb4404f8c50e Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 23 Dec 2024 14:41:39 -0500 Subject: [PATCH 04/13] add aac_2017 models --- src/ga4gh/va_spec/aac_2017/models.py | 42 ++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/ga4gh/va_spec/aac_2017/models.py diff --git a/src/ga4gh/va_spec/aac_2017/models.py b/src/ga4gh/va_spec/aac_2017/models.py new file mode 100644 index 0000000..0f22dce --- /dev/null +++ b/src/ga4gh/va_spec/aac_2017/models.py @@ -0,0 +1,42 @@ +"""AMP/ASCO/CAP 2017""" + +from pydantic import ( + Field, +) + +from ga4gh.va_spec.base.core import ( + Statement, + VariantDiagnosticProposition, + VariantPrognosticProposition, + VariantTherapeuticResponseProposition, +) + + +class VariantDiagnosticStudyStatement(Statement): + """A statement reporting a conclusion from a single study about whether a variant is + associated with a disease (a diagnostic inclusion criterion), or absence of a + disease (diagnostic exclusion criterion) - based on interpretation of the study's + results. + """ + + proposition: VariantDiagnosticProposition = Field(..., description="A proposition about a diagnostic association between a variant and condition, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.") + + + +class VariantPrognosticStudyStatement(Statement): + """A statement reporting a conclusion from a single study about whether a variant is + associated with a disease prognosis - based on interpretation of the study's + results. + """ + + proposition: VariantPrognosticProposition = Field(..., description="A proposition about a prognostic association between a variant and condition, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.") + + + +class VariantTherapeuticResponseStudyStatement(Statement): + """A statement reporting a conclusion from a single study about whether a variant is + associated with a therapeutic response (positive or negative) - based on + interpretation of the study's results. + """ + + proposition: VariantTherapeuticResponseProposition = Field(..., description="A proposition about the therapeutic response associated with a variant, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.") \ No newline at end of file From f65062fa361c664ce98be70d5637648d656eddbc Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 16 Jan 2025 10:29:01 -0500 Subject: [PATCH 05/13] wip --- pyproject.toml | 4 +- src/ga4gh/va_spec/aac_2017/models.py | 24 +++++---- src/ga4gh/va_spec/base/caf_study_result.py | 49 +++++++++++++------ src/ga4gh/va_spec/base/core.py | 30 ++++++++---- src/ga4gh/va_spec/base/domain_entities.py | 7 +-- .../experimental_variant_functional_impact.py | 31 ++++++++---- 6 files changed, 96 insertions(+), 49 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 75112d2..11d9f7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,8 +31,8 @@ keywords = [ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ - "ga4gh.vrs~=2.0.0a12", - "ga4gh.cat_vrs~=0.1.0", + #"ga4gh.vrs~=2.0.0a12", + #"ga4gh.cat_vrs~=0.1.0", "pydantic==2.*" ] diff --git a/src/ga4gh/va_spec/aac_2017/models.py b/src/ga4gh/va_spec/aac_2017/models.py index 0f22dce..f05870e 100644 --- a/src/ga4gh/va_spec/aac_2017/models.py +++ b/src/ga4gh/va_spec/aac_2017/models.py @@ -1,15 +1,14 @@ """AMP/ASCO/CAP 2017""" -from pydantic import ( - Field, -) - from ga4gh.va_spec.base.core import ( Statement, VariantDiagnosticProposition, VariantPrognosticProposition, VariantTherapeuticResponseProposition, ) +from pydantic import ( + Field, +) class VariantDiagnosticStudyStatement(Statement): @@ -19,8 +18,10 @@ class VariantDiagnosticStudyStatement(Statement): results. """ - proposition: VariantDiagnosticProposition = Field(..., description="A proposition about a diagnostic association between a variant and condition, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.") - + proposition: VariantDiagnosticProposition = Field( + ..., + description="A proposition about a diagnostic association between a variant and condition, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.", + ) class VariantPrognosticStudyStatement(Statement): @@ -29,8 +30,10 @@ class VariantPrognosticStudyStatement(Statement): results. """ - proposition: VariantPrognosticProposition = Field(..., description="A proposition about a prognostic association between a variant and condition, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.") - + proposition: VariantPrognosticProposition = Field( + ..., + description="A proposition about a prognostic association between a variant and condition, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.", + ) class VariantTherapeuticResponseStudyStatement(Statement): @@ -39,4 +42,7 @@ class VariantTherapeuticResponseStudyStatement(Statement): interpretation of the study's results. """ - proposition: VariantTherapeuticResponseProposition = Field(..., description="A proposition about the therapeutic response associated with a variant, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.") \ No newline at end of file + proposition: VariantTherapeuticResponseProposition = Field( + ..., + description="A proposition about the therapeutic response associated with a variant, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.", + ) diff --git a/src/ga4gh/va_spec/base/caf_study_result.py b/src/ga4gh/va_spec/base/caf_study_result.py index 909f4d4..9e31245 100644 --- a/src/ga4gh/va_spec/base/caf_study_result.py +++ b/src/ga4gh/va_spec/base/caf_study_result.py @@ -1,25 +1,46 @@ """Cohort Allele Frequency Study Result Standard Profile""" - from typing import Literal -from pydantic import Field - from ga4gh.core.models import iriReference from ga4gh.va_spec.base.core import DataSet, StudyGroup, StudyResult from ga4gh.vrs.models import Allele +from pydantic import Field class CohortAlleleFrequencyStudyResult(StudyResult): - """A StudyResult that reports measures related to the frequency of an Allele in a cohort""" + """A StudyResult that reports measures related to the frequency of an Allele in a cohort""" + + type: Literal["CohortAlleleFrequencyStudyResult"] = Field( + "CohortAlleleFrequencyStudyResult", + description="MUST be 'CohortAlleleFrequencyStudyResult'.", + ) + sourceDataSet: DataSet | None = Field( + None, + description="The dataset from which the CohortAlleleFrequencyStudyResult was reported.", + ) + focusAllele: Allele | iriReference = Field( + ..., description="The Allele for which frequency results are reported." + ) + focusAlleleCount: int = Field( + ..., description="The number of occurrences of the focusAllele in the cohort." + ) + locusAlleleCount: int = Field( + ..., + description="The number of occurrences of all alleles at the locus in the cohort.", + ) + focusAlleleFrequency: int = Field( + ..., description="The frequency of the focusAllele in the cohort." + ) + cohort: StudyGroup = Field( + ..., description="The cohort from which the frequency was derived." + ) + subCohortFrequency: list["CohortAlleleFrequencyStudyResult"] | None = Field( + None, + description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. Subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.", + ) + ancillaryResults: dict | None = None + qualityMeasures: dict | None = None + - type: Literal["CohortAlleleFrequencyStudyResult"] = Field("CohortAlleleFrequencyStudyResult", description="MUST be 'CohortAlleleFrequencyStudyResult'.") - sourceDataSet: DataSet | None = Field(None, description="The dataset from which the CohortAlleleFrequencyStudyResult was reported.") - focusAllele: Allele | iriReference = Field(..., description="The Allele for which frequency results are reported.") - focusAlleleCount: int = Field(..., description="The number of occurrences of the focusAllele in the cohort.") - locusAlleleCount: int = Field(..., description="The number of occurrences of all alleles at the locus in the cohort.") - focusAlleleFrequency: int = Field(..., description="The frequency of the focusAllele in the cohort.") - cohort: StudyGroup = Field(..., description="The cohort from which the frequency was derived.") - subCohortFrequency: list[CohortAlleleFrequencyStudyResult] | None = Field(None, description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. Subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.") - ancillaryResults: dict | None = None - qualityMeasures: dict | None = None +CohortAlleleFrequencyStudyResult.model_rebuild() diff --git a/src/ga4gh/va_spec/base/core.py b/src/ga4gh/va_spec/base/core.py index 3bb5489..3e5a3e8 100644 --- a/src/ga4gh/va_spec/base/core.py +++ b/src/ga4gh/va_spec/base/core.py @@ -1,3 +1,5 @@ +"""VA Spec Base Core Models""" + from __future__ import annotations from abc import ABC @@ -5,6 +7,10 @@ from enum import Enum from typing import Annotated, Literal +from ga4gh.cat_vrs.models import CategoricalVariant +from ga4gh.core.models import Entity, MappableConcept, iriReference +from ga4gh.va_spec.base.domain_entities import Condition, Therapeutic +from ga4gh.vrs.models import MolecularVariation from pydantic import ( ConfigDict, Field, @@ -12,11 +18,6 @@ StringConstraints, ) -from ga4gh.cat_vrs.models import CategoricalVariant -from ga4gh.core.models import Entity, MappableConcept, iriReference -from ga4gh.va_spec.base.domain_entities import Condition, Therapeutic -from ga4gh.vrs.models import MolecularVariation - ######################################### # Abstract Core Classes ######################################### @@ -141,7 +142,8 @@ class DiagnosticPredicate(str, Enum): class VariantDiagnosticProposition(ClinicalVariantProposition): """A Proposition about whether a variant is associated with a disease (a diagnostic - inclusion criterion), or absence of a disease (diagnostic exclusion criterion).""" + inclusion criterion), or absence of a disease (diagnostic exclusion criterion). + """ model_config = ConfigDict(use_enum_values=True) @@ -220,7 +222,8 @@ class TherapeuticResponsePredicate(str, Enum): class VariantTherapeuticResponseProposition(ClinicalVariantProposition): """A Proposition about the role of a variant in modulating the response of a neoplasm to drug - administration or other therapeutic procedures.""" + administration or other therapeutic procedures. + """ model_config = ConfigDict(use_enum_values=True) @@ -245,6 +248,8 @@ class VariantTherapeuticResponseProposition(ClinicalVariantProposition): class CoreType(str, Enum): + """Define VA Spec Base Core Types""" + METHOD = "Method" CONTRIBUTION = "Contribution" DOCUMENT = "Document" @@ -342,15 +347,18 @@ class Agent(Entity): class Direction(str, Enum): """A term indicating whether the Statement supports, disputes, or remains neutral - w.r.t. the validity of the Proposition it evaluates.""" + w.r.t. the validity of the Proposition it evaluates. + """ SUPPORTS = "supports" NEUTRAL = "neutral" DISPUTES = "disputes" + class DataSet(Entity): """A collection of related data items or records that are organized together in a - common format or structure, to enable their computational manipulation as a unit.""" + common format or structure, to enable their computational manipulation as a unit. + """ type: Literal["DataSet"] = Field( CoreType.DATA_SET.value, description=f"MUST be '{CoreType.DATA_SET.value}'." @@ -379,7 +387,8 @@ class EvidenceLine(InformationEntity): """An independent, evidence-based argument that may support or refute the validity of a specific Proposition. The strength and direction of this argument is based on an interpretation of one or more pieces of information as evidence for or against - the target Proposition.""" + the target Proposition. + """ model_config = ConfigDict(use_enum_values=True) @@ -421,6 +430,7 @@ class Statement(InformationEntity): as true or false, or to provide a more nuanced assessment of the level of confidence or evidence supporting a particular Proposition. """ + model_config = ConfigDict(use_enum_values=True) type: Literal["Statement"] = Field( diff --git a/src/ga4gh/va_spec/base/domain_entities.py b/src/ga4gh/va_spec/base/domain_entities.py index 14bedaf..5cbdb0e 100644 --- a/src/ga4gh/va_spec/base/domain_entities.py +++ b/src/ga4gh/va_spec/base/domain_entities.py @@ -1,9 +1,7 @@ """VA Spec Shared Domain Entity Data Structures""" - -from pydantic import Field, RootModel - from ga4gh.core.models import Element, MappableConcept +from pydantic import Field, RootModel class TraitSet(Element): @@ -43,8 +41,7 @@ class TherapyGroup(Element): class Therapeutic(RootModel): - """A group of therapies (TherapyGroup) or a single therapy (drug, procedure, behavioral intervention, etc.). - """ + """A group of therapies (TherapyGroup) or a single therapy (drug, procedure, behavioral intervention, etc.).""" root: TherapyGroup | MappableConcept = Field( ..., diff --git a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py index ce4f69f..1318a41 100644 --- a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py +++ b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py @@ -1,20 +1,33 @@ """Cohort Allele Frequency Study Result Standard Profile""" - from typing import Literal -from pydantic import Field - from ga4gh.core.models import iriReference from ga4gh.va_spec.base.core import DataSet, Method, StudyResult from ga4gh.vrs.models import MolecularVariation +from pydantic import Field class ExperimentalVariantFunctionalImpactStudyResult(StudyResult): - """A StudyResult that reports a functional impact score from a variant functional assay or study.""" + """A StudyResult that reports a functional impact score from a variant functional assay or study.""" - type: Literal["ExperimentalVariantFunctionalImpactStudyResult"] = Field("ExperimentalVariantFunctionalImpactStudyResult", description="MUST be 'ExperimentalVariantFunctionalImpactStudyResult'.") - focusVariant: MolecularVariation | iriReference | None = Field(None, description="The genetic variant for which a functional impact score is generated.") - functionalImpactScore: float | None = Field(None, description="The score of the variant impact measured in the assay or study.") - specifiedBy: Method | iriReference | None = Field(None, description="The assay that was performed to generate the reported functional impact score.") - sourceDataSet: DataSet | None = Field(None, description="The full data set that provided the reported the functional impact score.") + type: Literal["ExperimentalVariantFunctionalImpactStudyResult"] = Field( + "ExperimentalVariantFunctionalImpactStudyResult", + description="MUST be 'ExperimentalVariantFunctionalImpactStudyResult'.", + ) + focusVariant: MolecularVariation | iriReference | None = Field( + None, + description="The genetic variant for which a functional impact score is generated.", + ) + functionalImpactScore: float | None = Field( + None, + description="The score of the variant impact measured in the assay or study.", + ) + specifiedBy: Method | iriReference | None = Field( + None, + description="The assay that was performed to generate the reported functional impact score.", + ) + sourceDataSet: DataSet | None = Field( + None, + description="The full data set that provided the reported the functional impact score.", + ) From 39b9636db12ae351dcc8f7246a5caf568c604366 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 22 Jan 2025 08:48:43 -0500 Subject: [PATCH 06/13] update --- pyproject.toml | 2 +- src/ga4gh/va_spec/aac_2017/__init__.py | 13 +++ src/ga4gh/va_spec/base/__init__.py | 66 +++++++++++++ tests/validation/test_va_spec_schema.py | 122 ++++++++++++++++-------- 4 files changed, 163 insertions(+), 40 deletions(-) create mode 100644 src/ga4gh/va_spec/aac_2017/__init__.py create mode 100644 src/ga4gh/va_spec/base/__init__.py diff --git a/pyproject.toml b/pyproject.toml index 11d9f7d..289076b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ keywords = [ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ - #"ga4gh.vrs~=2.0.0a12", + "ga4gh.vrs==2.0.0a13", #"ga4gh.cat_vrs~=0.1.0", "pydantic==2.*" ] diff --git a/src/ga4gh/va_spec/aac_2017/__init__.py b/src/ga4gh/va_spec/aac_2017/__init__.py new file mode 100644 index 0000000..7ff1f81 --- /dev/null +++ b/src/ga4gh/va_spec/aac_2017/__init__.py @@ -0,0 +1,13 @@ +"""Module to load and init namespace at package level.""" + +from .models import ( + VariantDiagnosticStudyStatement, + VariantPrognosticStudyStatement, + VariantTherapeuticResponseStudyStatement, +) + +__all__ = [ + "VariantDiagnosticStudyStatement", + "VariantPrognosticStudyStatement", + "VariantTherapeuticResponseStudyStatement", +] diff --git a/src/ga4gh/va_spec/base/__init__.py b/src/ga4gh/va_spec/base/__init__.py new file mode 100644 index 0000000..d92e6df --- /dev/null +++ b/src/ga4gh/va_spec/base/__init__.py @@ -0,0 +1,66 @@ +"""Module to load and init namespace at package level.""" + +from .caf_study_result import CohortAlleleFrequencyStudyResult +from .core import ( + Agent, + ClinicalVariantProposition, + Contribution, + CoreType, + DataSet, + DiagnosticPredicate, + Direction, + Document, + EvidenceLine, + ExperimentalVariantFunctionalImpactProposition, + InformationEntity, + Method, + PrognosticPredicate, + Proposition, + Statement, + StudyGroup, + StudyResult, + SubjectVariantProposition, + TherapeuticResponsePredicate, + VariantDiagnosticProposition, + VariantOncogenicityProposition, + VariantPathogenicityProposition, + VariantPrognosticProposition, + VariantTherapeuticResponseProposition, +) +from .domain_entities import Condition, Therapeutic, TherapyGroup, TraitSet +from .experimental_variant_functional_impact import ( + ExperimentalVariantFunctionalImpactStudyResult, +) + +__all__ = [ + "CohortAlleleFrequencyStudyResult", + "InformationEntity", + "StudyResult", + "Proposition", + "SubjectVariantProposition", + "ClinicalVariantProposition", + "ExperimentalVariantFunctionalImpactProposition", + "DiagnosticPredicate", + "VariantDiagnosticProposition", + "VariantOncogenicityProposition", + "VariantPathogenicityProposition", + "PrognosticPredicate", + "VariantPrognosticProposition", + "TherapeuticResponsePredicate", + "VariantTherapeuticResponseProposition", + "CoreType", + "Method", + "Contribution", + "Document", + "Agent", + "Direction", + "DataSet", + "EvidenceLine", + "Statement", + "StudyGroup", + "TraitSet", + "Condition", + "TherapyGroup", + "Therapeutic", + "ExperimentalVariantFunctionalImpactStudyResult", +] diff --git a/tests/validation/test_va_spec_schema.py b/tests/validation/test_va_spec_schema.py index 1d99d76..cb78a3f 100644 --- a/tests/validation/test_va_spec_schema.py +++ b/tests/validation/test_va_spec_schema.py @@ -1,56 +1,105 @@ -"""Test that VA-Spec Python model structures match VA-Spec Schema""" +"""Test that VA-Spec Python Pydantic models match corresponding JSON schemas""" import json +from enum import Enum from pathlib import Path -from typing import Literal, get_args, get_origin -import ga4gh.va_spec.profiles as va_spec_profiles +import pytest +from ga4gh.va_spec import aac_2017, base +from pydantic import BaseModel -ROOT_DIR = Path(__file__).parents[2] -VA_SPEC_SCHEMA_DIR = ( - ROOT_DIR / "submodules" / "va_spec" / "schema" / "profiles" / "json" -) -VA_SPEC_SCHEMA = {} -VA_SPEC_BASE_CLASSES = set() -VA_SPEC_CONCRETE_CLASSES = set() -VA_SPEC_PRIMITIVES = set() +class VaSpecSchema(str, Enum): + """Enum for VA-Spec schema""" + + AAC_2017 = "aac_2017" + BASE = "base" + + +class VaSpecSchemaMapping(BaseModel): + """Model for representing VA-Spec Schema concrete classes, primitives, and schema""" + + base_classes: set = set() + concrete_classes: set = set() + primitives: set = set() + schema: dict = {} -# Get profile classes -for f in VA_SPEC_SCHEMA_DIR.glob("*"): - with f.open() as rf: +def _update_va_spec_schema_mapping( + f_path: Path, va_spec_schema_mapping: VaSpecSchemaMapping +) -> None: + """Update ``va_spec_schema_mapping`` properties + + :param f_path: Path to JSON Schema file + :param va_spec_schema_mapping: VA-Spec schema mapping to update + """ + with f_path.open() as rf: cls_def = json.load(rf) - va_spec_class = cls_def["title"] - VA_SPEC_SCHEMA[va_spec_class] = cls_def + spec_class = cls_def["title"] + va_spec_schema_mapping.schema[spec_class] = cls_def if "properties" in cls_def: - VA_SPEC_CONCRETE_CLASSES.add(va_spec_class) + va_spec_schema_mapping.concrete_classes.add(spec_class) elif cls_def.get("type") in {"array", "integer", "string"}: - VA_SPEC_PRIMITIVES.add(va_spec_class) + va_spec_schema_mapping.primitives.add(spec_class) else: - VA_SPEC_BASE_CLASSES.add(va_spec_class) + va_spec_schema_mapping.base_classes.add(spec_class) + + +VA_SPEC_SCHEMA_MAPPING = {schema: VaSpecSchemaMapping() for schema in VaSpecSchema} +SUBMODULES_DIR = Path(__file__).parents[2] / "submodules" / "va_spec" / "schema" -def test_schema_models_in_pydantic(): +# Get core + profiles classes +for child in SUBMODULES_DIR.iterdir(): + child_str = str(child) + if child_str.endswith(VaSpecSchema.AAC_2017): + mapping_key = VaSpecSchema.AAC_2017 + elif child_str.endswith(VaSpecSchema.BASE): + mapping_key = VaSpecSchema.BASE + else: + continue + + mapping = VA_SPEC_SCHEMA_MAPPING[mapping_key] + for f in (child / "json").glob("*"): + _update_va_spec_schema_mapping(f, mapping) + + +@pytest.mark.parametrize( + ("va_spec_schema", "pydantic_models"), + [ + (VaSpecSchema.AAC_2017, aac_2017), + (VaSpecSchema.BASE, base), + ], +) +def test_schema_models_in_pydantic(va_spec_schema, pydantic_models): """Ensure that each schema model has corresponding Pydantic model""" - for va_spec_class in ( - VA_SPEC_BASE_CLASSES | VA_SPEC_CONCRETE_CLASSES | VA_SPEC_PRIMITIVES + mapping = VA_SPEC_SCHEMA_MAPPING[va_spec_schema] + for schema_model in ( + mapping.base_classes | mapping.concrete_classes | mapping.primitives ): - assert getattr(va_spec_profiles, va_spec_class, False), va_spec_class + assert getattr(pydantic_models, schema_model, False), schema_model -def test_schema_class_fields(): +@pytest.mark.parametrize( + ("va_spec_schema", "pydantic_models"), + [ + (VaSpecSchema.AAC_2017, aac_2017), + (VaSpecSchema.BASE, base), + ], +) +def test_schema_class_fields(va_spec_schema, pydantic_models): """Check that each schema model properties exist and are required in corresponding Pydantic model, and validate required properties """ - for va_spec_class in VA_SPEC_CONCRETE_CLASSES: - schema_properties = VA_SPEC_SCHEMA[va_spec_class]["properties"] - pydantic_model = getattr(va_spec_profiles, va_spec_class) - assert set(pydantic_model.model_fields) == set(schema_properties), va_spec_class + mapping = VA_SPEC_SCHEMA_MAPPING[va_spec_schema] + for schema_model in mapping.concrete_classes: + schema_properties = mapping.schema[schema_model]["properties"] + pydantic_model = getattr(pydantic_models, schema_model) + assert set(pydantic_model.model_fields) == set(schema_properties), schema_model - required_schema_fields = set(VA_SPEC_SCHEMA[va_spec_class]["required"]) + required_schema_fields = set(mapping.schema[schema_model]["required"]) for prop, property_def in schema_properties.items(): pydantic_model_field_info = pydantic_model.model_fields[prop] @@ -58,21 +107,16 @@ def test_schema_class_fields(): if prop in required_schema_fields: if prop != "type": - if get_origin(pydantic_model_field_info.annotation) is Literal: - assert ( - get_args(pydantic_model_field_info.annotation)[0] - == pydantic_model_field_info.default - ) - else: - assert pydantic_field_required, f"{pydantic_model}.{prop}" + assert pydantic_field_required, f"{pydantic_model}.{prop}" else: assert not pydantic_field_required, f"{pydantic_model}.{prop}" - if property_def.get("description") is not None: - field_descr = pydantic_model_field_info.description or "" + if "description" in property_def: assert property_def["description"].replace( "'", '"' - ) == field_descr.replace("'", '"'), f"{pydantic_model}.{prop}" + ) == pydantic_model_field_info.description.replace( + "'", '"' + ), f"{pydantic_model}.{prop}" else: assert ( pydantic_model_field_info.description is None From 175c9fef9c8b34e98a412131c720e686aa3b1407 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 22 Jan 2025 12:22:47 -0500 Subject: [PATCH 07/13] wip --- pyproject.toml | 2 +- src/ga4gh/va_spec/base/caf_study_result.py | 12 +++--- src/ga4gh/va_spec/base/core.py | 42 ++++++++++--------- src/ga4gh/va_spec/base/domain_entities.py | 4 +- .../experimental_variant_functional_impact.py | 10 ++--- tests/validation/test_va_spec_schema.py | 24 +++++++---- 6 files changed, 50 insertions(+), 44 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 289076b..faefe54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ keywords = [ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ - "ga4gh.vrs==2.0.0a13", + #"ga4gh.vrs==2.0.0a13", #"ga4gh.cat_vrs~=0.1.0", "pydantic==2.*" ] diff --git a/src/ga4gh/va_spec/base/caf_study_result.py b/src/ga4gh/va_spec/base/caf_study_result.py index 9e31245..0d4098d 100644 --- a/src/ga4gh/va_spec/base/caf_study_result.py +++ b/src/ga4gh/va_spec/base/caf_study_result.py @@ -1,14 +1,15 @@ """Cohort Allele Frequency Study Result Standard Profile""" +from __future__ import annotations from typing import Literal from ga4gh.core.models import iriReference -from ga4gh.va_spec.base.core import DataSet, StudyGroup, StudyResult +from ga4gh.va_spec.base.core import StudyResultBase, DataSet, StudyGroup, StudyResult from ga4gh.vrs.models import Allele from pydantic import Field -class CohortAlleleFrequencyStudyResult(StudyResult): +class CohortAlleleFrequencyStudyResult(StudyResultBase): """A StudyResult that reports measures related to the frequency of an Allele in a cohort""" type: Literal["CohortAlleleFrequencyStudyResult"] = Field( @@ -35,12 +36,9 @@ class CohortAlleleFrequencyStudyResult(StudyResult): cohort: StudyGroup = Field( ..., description="The cohort from which the frequency was derived." ) - subCohortFrequency: list["CohortAlleleFrequencyStudyResult"] | None = Field( + subCohortFrequency: list[CohortAlleleFrequencyStudyResult] | None = Field( None, - description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. Subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.", + description="A list of CohortAlleleFrequency objects describing subcohorts of the cohort currently being described. Subcohorts can be further subdivided into more subcohorts. This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.", ) ancillaryResults: dict | None = None qualityMeasures: dict | None = None - - -CohortAlleleFrequencyStudyResult.model_rebuild() diff --git a/src/ga4gh/va_spec/base/core.py b/src/ga4gh/va_spec/base/core.py index 3e5a3e8..dd57659 100644 --- a/src/ga4gh/va_spec/base/core.py +++ b/src/ga4gh/va_spec/base/core.py @@ -8,7 +8,7 @@ from typing import Annotated, Literal from ga4gh.cat_vrs.models import CategoricalVariant -from ga4gh.core.models import Entity, MappableConcept, iriReference +from ga4gh.core.models import Entity, EntityBase, MappableConcept, iriReference from ga4gh.va_spec.base.domain_entities import Condition, Therapeutic from ga4gh.vrs.models import MolecularVariation from pydantic import ( @@ -42,20 +42,22 @@ class InformationEntity(Entity): ) -class StudyResult(InformationEntity): +class StudyResultBase(InformationEntity, ABC): + sourceDataSet: DataSet | None = Field( + None, + description="A larger DataSet from which the data included in the StudyResult was taken or derived.", + ) + + +class StudyResult(StudyResultBase): """A collection of data items from a single study that pertain to a particular subject or experimental unit in the study, along with optional provenance information describing how these data items were generated. """ - focus: Entity | MappableConcept | iriReference = Field( ..., description="The specific participant, subject or experimental unit in a Study that data included in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.", ) - sourceDataSet: DataSet | None = Field( - None, - description="A larger DataSet from which the data included in the StudyResult was taken or derived.", - ) class Proposition(Entity): @@ -92,8 +94,8 @@ class SubjectVariantProposition(RootModel): class _SubjectVariantPropositionBase(Entity, ABC): - subjectVariant: MolecularVariation | CategoricalVariant | iriReference | None = ( - Field(None, description="A variant that is the subject of the Proposition.") + subjectVariant: MolecularVariation | CategoricalVariant | iriReference = ( + Field(..., description="A variant that is the subject of the Proposition.") ) @@ -102,7 +104,7 @@ class ClinicalVariantProposition(_SubjectVariantPropositionBase): geneContextQualifier: MappableConcept | iriReference | None = Field( None, - description="Reports a gene impacted by the variant, which may contribute to the association described in the Proposition.", + description="Reports a gene impacted by the variant, which may contribute to the association described in the Proposition.", ) alleleOriginQualifier: MappableConcept | iriReference | None = Field( None, @@ -125,11 +127,11 @@ class ExperimentalVariantFunctionalImpactProposition(_SubjectVariantPropositionB ) objectSequenceFeature: iriReference | MappableConcept = Field( ..., - description="The sequence feature (typically a gene or gene product) on whose function the impact of the subject variant is reported.", + description="The sequence feature (typically a gene or gene product) on whose function the impact of the subject variant is reported.", ) experimentalContextQualifier: iriReference | Document | dict | None = Field( None, - description="An assay in which the reported variant functional impact was determined - providing a specific experimental context in which this effect is asserted to hold.", + description="An assay in which the reported variant functional impact was determined - providing a specific experimental context in which this effect is asserted to hold.", ) @@ -175,11 +177,11 @@ class VariantPathogenicityProposition(ClinicalVariantProposition): type: Literal["VariantPathogenicityProposition"] = Field( "VariantPathogenicityProposition", - description="MUST be 'VariantPathogenicityProposition'", + description="Must be 'VariantPathogenicityProposition'", ) predicate: str = "isCausalFor" objectCondition: Condition | iriReference = Field( - ..., description="The :ref:`Condition` for which the variant impact is stated." + ..., description="The Condition for which the variant impact is stated." ) penetranceQualifier: MappableConcept | None = Field( None, @@ -238,7 +240,7 @@ class VariantTherapeuticResponseProposition(ClinicalVariantProposition): ) conditionQualifier: Condition | iriReference = Field( ..., - description="Reports the disease context in which the variant's association with therapeutic sensitivity or resistance is evaluated. Note that this is a required qualifier in therapeutic response propositions.", + description="Reports the disease context in which the variant's association with therapeutic sensitivity or resistance is evaluated. Note that this is a required qualifier in therapeutic response propositions. ", ) @@ -301,7 +303,7 @@ class Document(Entity): """ type: Literal["Document"] = Field( - CoreType.DOCUMENT.value, description=f"Must be '{CoreType.DOCUMENT.value}'." + CoreType.DOCUMENT.value, description=f"Must be '{CoreType.DOCUMENT.value}'" ) subtype: MappableConcept | None = Field( None, @@ -321,15 +323,15 @@ class Document(Entity): | None ) = Field( None, - description="A `Digital Object Identifier `_ for the document.", + description="A [Digital Object Identifier](https://www.doi.org/the-identifier/what-is-a-doi/) for the document.", ) pmid: int | None = Field( None, - description="A `PubMed unique identifier `_ for the document.", + description="A [PubMed unique identifier](https://en.wikipedia.org/wiki/PubMed#PubMed_identifier) for the document.", ) -class Agent(Entity): +class Agent(EntityBase): """An autonomous actor (person, organization, or software agent) that bears some form of responsibility for an activity taking place, for the existence of an entity, or for another agent's activity. @@ -450,7 +452,7 @@ class Statement(InformationEntity): ) score: float | None = Field( None, - description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it. Instructions for how to interpret the menaing of a given score may be gleaned from the method or document referenced in 'specifiedBy' attribute.", + description="A quantitative score that indicates the strength of a Proposition's assessment in the direction indicated (i.e. how strongly supported or disputed the Proposition is believed to be). Depending on its implementation, a score may reflect how *confident* that agent is that the Proposition is true or false, or the *strength of evidence* they believe supports or disputes it. Instructions for how to interpret the menaing of a given score may be gleaned from the method or document referenced in 'specifiedBy' attribute. ", ) classification: MappableConcept | None = Field( None, diff --git a/src/ga4gh/va_spec/base/domain_entities.py b/src/ga4gh/va_spec/base/domain_entities.py index 5cbdb0e..030d9a2 100644 --- a/src/ga4gh/va_spec/base/domain_entities.py +++ b/src/ga4gh/va_spec/base/domain_entities.py @@ -8,7 +8,7 @@ class TraitSet(Element): """A set of conditions (diseases, phenotypes, traits) that are co-occurring.""" traits: list[MappableConcept] | None = Field( - ..., + None, min_length=2, description="A list of conditions (diseases, phenotypes, traits) that are co-occurring.", ) @@ -31,7 +31,7 @@ class TherapyGroup(Element): """A group of therapies that are applied together to treat a condition.""" therapies: list[MappableConcept] | None = Field( - ..., + None, min_length=2, description="A list of therapies that are applied together to treat a condition.", ) diff --git a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py index 1318a41..19263ca 100644 --- a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py +++ b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py @@ -3,20 +3,20 @@ from typing import Literal from ga4gh.core.models import iriReference -from ga4gh.va_spec.base.core import DataSet, Method, StudyResult +from ga4gh.va_spec.base.core import StudyResultBase, DataSet, Method, StudyResult from ga4gh.vrs.models import MolecularVariation from pydantic import Field -class ExperimentalVariantFunctionalImpactStudyResult(StudyResult): +class ExperimentalVariantFunctionalImpactStudyResult(StudyResultBase): """A StudyResult that reports a functional impact score from a variant functional assay or study.""" type: Literal["ExperimentalVariantFunctionalImpactStudyResult"] = Field( "ExperimentalVariantFunctionalImpactStudyResult", description="MUST be 'ExperimentalVariantFunctionalImpactStudyResult'.", ) - focusVariant: MolecularVariation | iriReference | None = Field( - None, + focusVariant: MolecularVariation | iriReference = Field( + ..., description="The genetic variant for which a functional impact score is generated.", ) functionalImpactScore: float | None = Field( @@ -29,5 +29,5 @@ class ExperimentalVariantFunctionalImpactStudyResult(StudyResult): ) sourceDataSet: DataSet | None = Field( None, - description="The full data set that provided the reported the functional impact score.", + description="The full data set that provided the reported the functional impact score. ", ) diff --git a/tests/validation/test_va_spec_schema.py b/tests/validation/test_va_spec_schema.py index cb78a3f..7eb846e 100644 --- a/tests/validation/test_va_spec_schema.py +++ b/tests/validation/test_va_spec_schema.py @@ -12,7 +12,7 @@ class VaSpecSchema(str, Enum): """Enum for VA-Spec schema""" - AAC_2017 = "aac_2017" + AAC_2017 = "aac-2017" BASE = "base" @@ -48,7 +48,7 @@ def _update_va_spec_schema_mapping( VA_SPEC_SCHEMA_MAPPING = {schema: VaSpecSchemaMapping() for schema in VaSpecSchema} -SUBMODULES_DIR = Path(__file__).parents[2] / "submodules" / "va_spec" / "schema" +SUBMODULES_DIR = Path(__file__).parents[2] / "submodules" / "va_spec" / "schema" / "va-spec" # Get core + profiles classes @@ -106,17 +106,23 @@ def test_schema_class_fields(va_spec_schema, pydantic_models): pydantic_field_required = pydantic_model_field_info.is_required() if prop in required_schema_fields: - if prop != "type": + if prop in {"predicate", "type"}: + assert pydantic_model_field_info + else: assert pydantic_field_required, f"{pydantic_model}.{prop}" else: - assert not pydantic_field_required, f"{pydantic_model}.{prop}" + if prop == "date": + assert pydantic_model_field_info + else: + assert not pydantic_field_required, f"{pydantic_model}.{prop}" if "description" in property_def: - assert property_def["description"].replace( - "'", '"' - ) == pydantic_model_field_info.description.replace( - "'", '"' - ), f"{pydantic_model}.{prop}" + if prop not in {"date", "predicate"}: # special exceptions + assert property_def["description"].replace( + "'", '"' + ) == pydantic_model_field_info.description.replace( + "'", '"' + ), f"{pydantic_model}.{prop}" else: assert ( pydantic_model_field_info.description is None From 3190b21184e5dd0ca6e21ce17664e35744cc2e66 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 22 Jan 2025 20:28:39 -0500 Subject: [PATCH 08/13] wip: extends --- src/ga4gh/va_spec/base/caf_study_result.py | 28 +++++- src/ga4gh/va_spec/base/core.py | 43 ++++++--- .../experimental_variant_functional_impact.py | 27 +++++- tests/validation/test_va_spec_models.py | 87 +++++++++++++++++++ tests/validation/test_va_spec_schema.py | 4 +- 5 files changed, 169 insertions(+), 20 deletions(-) create mode 100644 tests/validation/test_va_spec_models.py diff --git a/src/ga4gh/va_spec/base/caf_study_result.py b/src/ga4gh/va_spec/base/caf_study_result.py index 0d4098d..33abfec 100644 --- a/src/ga4gh/va_spec/base/caf_study_result.py +++ b/src/ga4gh/va_spec/base/caf_study_result.py @@ -1,15 +1,16 @@ """Cohort Allele Frequency Study Result Standard Profile""" + from __future__ import annotations -from typing import Literal +from typing import Any, Literal from ga4gh.core.models import iriReference -from ga4gh.va_spec.base.core import StudyResultBase, DataSet, StudyGroup, StudyResult +from ga4gh.va_spec.base.core import DataSet, StudyGroup, StudyResult from ga4gh.vrs.models import Allele from pydantic import Field -class CohortAlleleFrequencyStudyResult(StudyResultBase): +class CohortAlleleFrequencyStudyResult(StudyResult): """A StudyResult that reports measures related to the frequency of an Allele in a cohort""" type: Literal["CohortAlleleFrequencyStudyResult"] = Field( @@ -20,6 +21,9 @@ class CohortAlleleFrequencyStudyResult(StudyResultBase): None, description="The dataset from which the CohortAlleleFrequencyStudyResult was reported.", ) + focus: None = Field( + None, exclude=True, repr=False + ) # extends property in JSON Schema. Should not be used focusAllele: Allele | iriReference = Field( ..., description="The Allele for which frequency results are reported." ) @@ -42,3 +46,21 @@ class CohortAlleleFrequencyStudyResult(StudyResultBase): ) ancillaryResults: dict | None = None qualityMeasures: dict | None = None + + def __getattribute__(self, name: str) -> Any: # noqa: ANN401 + """Retrieve the value of the specified attribute + + :param name: Name of attribute being accessed + :return: The value of the specified attribute + :raises ValueError: If the attribute being accessed is not already defined in + CohortAlleleFrequencyStudyResult or the attribute is `focus` + """ + if name == "focus": + err_msg = f"'{type(self).__name__!r}' object has no attribute '{name!r}'" + raise AttributeError(err_msg) + return super().__getattribute__(name) + + +del CohortAlleleFrequencyStudyResult.model_fields[ + "focus" +] # Need to remove inherited property diff --git a/src/ga4gh/va_spec/base/core.py b/src/ga4gh/va_spec/base/core.py index dd57659..f0dbe91 100644 --- a/src/ga4gh/va_spec/base/core.py +++ b/src/ga4gh/va_spec/base/core.py @@ -5,10 +5,10 @@ from abc import ABC from datetime import date from enum import Enum -from typing import Annotated, Literal +from typing import Annotated, Any, Literal from ga4gh.cat_vrs.models import CategoricalVariant -from ga4gh.core.models import Entity, EntityBase, MappableConcept, iriReference +from ga4gh.core.models import Entity, MappableConcept, iriReference from ga4gh.va_spec.base.domain_entities import Condition, Therapeutic from ga4gh.vrs.models import MolecularVariation from pydantic import ( @@ -42,22 +42,20 @@ class InformationEntity(Entity): ) -class StudyResultBase(InformationEntity, ABC): - sourceDataSet: DataSet | None = Field( - None, - description="A larger DataSet from which the data included in the StudyResult was taken or derived.", - ) - - -class StudyResult(StudyResultBase): +class StudyResult(InformationEntity, ABC): """A collection of data items from a single study that pertain to a particular subject or experimental unit in the study, along with optional provenance information describing how these data items were generated. """ + focus: Entity | MappableConcept | iriReference = Field( ..., description="The specific participant, subject or experimental unit in a Study that data included in the StudyResult object is about - e.g. a particular variant in a population allele frequency dataset like ExAC or gnomAD.", ) + sourceDataSet: DataSet | None = Field( + None, + description="A larger DataSet from which the data included in the StudyResult was taken or derived.", + ) class Proposition(Entity): @@ -94,8 +92,8 @@ class SubjectVariantProposition(RootModel): class _SubjectVariantPropositionBase(Entity, ABC): - subjectVariant: MolecularVariation | CategoricalVariant | iriReference = ( - Field(..., description="A variant that is the subject of the Proposition.") + subjectVariant: MolecularVariation | CategoricalVariant | iriReference = Field( + ..., description="A variant that is the subject of the Proposition." ) @@ -331,7 +329,7 @@ class Document(Entity): ) -class Agent(EntityBase): +class Agent(Entity): """An autonomous actor (person, organization, or software agent) that bears some form of responsibility for an activity taking place, for the existence of an entity, or for another agent's activity. @@ -340,12 +338,31 @@ class Agent(EntityBase): type: Literal["Agent"] = Field( CoreType.AGENT.value, description=f"MUST be '{CoreType.AGENT.value}'." ) + label: None = Field( + None, exclude=True, repr=False + ) # extends property in JSON Schema. Should not be used name: str | None = Field(None, description="The given name of the Agent.") subtype: MappableConcept | None = Field( None, description="A specific type of agent the Agent object represents. Recommended subtypes include codes for `person`, `organization`, or `software`.", ) + def __getattribute__(self, name: str) -> Any: # noqa: ANN401 + """Retrieve the value of the specified attribute + + :param name: Name of attribute being accessed + :return: The value of the specified attribute + :raises ValueError: If the attribute being accessed is not already defined in + Agent or the attribute is `label` + """ + if name == "label": + err_msg = f"'{type(self).__name__!r}' object has no attribute '{name!r}'" + raise AttributeError(err_msg) + return super().__getattribute__(name) + + +del Agent.model_fields["label"] # Need to remove inherited property + class Direction(str, Enum): """A term indicating whether the Statement supports, disputes, or remains neutral diff --git a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py index 19263ca..77089fc 100644 --- a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py +++ b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py @@ -1,20 +1,23 @@ """Cohort Allele Frequency Study Result Standard Profile""" -from typing import Literal +from typing import Any, Literal from ga4gh.core.models import iriReference -from ga4gh.va_spec.base.core import StudyResultBase, DataSet, Method, StudyResult +from ga4gh.va_spec.base.core import DataSet, Method, StudyResult from ga4gh.vrs.models import MolecularVariation from pydantic import Field -class ExperimentalVariantFunctionalImpactStudyResult(StudyResultBase): +class ExperimentalVariantFunctionalImpactStudyResult(StudyResult): """A StudyResult that reports a functional impact score from a variant functional assay or study.""" type: Literal["ExperimentalVariantFunctionalImpactStudyResult"] = Field( "ExperimentalVariantFunctionalImpactStudyResult", description="MUST be 'ExperimentalVariantFunctionalImpactStudyResult'.", ) + focus: None = Field( + None, exclude=True, repr=False + ) # extends property in JSON Schema. Should not be used focusVariant: MolecularVariation | iriReference = Field( ..., description="The genetic variant for which a functional impact score is generated.", @@ -31,3 +34,21 @@ class ExperimentalVariantFunctionalImpactStudyResult(StudyResultBase): None, description="The full data set that provided the reported the functional impact score. ", ) + + def __getattribute__(self, name: str) -> Any: # noqa: ANN401 + """Retrieve the value of the specified attribute + + :param name: Name of attribute being accessed + :return: The value of the specified attribute + :raises ValueError: If the attribute being accessed is not already defined in + ExperimentalVariantFunctionalImpactStudyResult or the attribute is `focus` + """ + if name == "focus": + err_msg = f"'{type(self).__name__!r}' object has no attribute '{name!r}'" + raise AttributeError(err_msg) + return super().__getattribute__(name) + + +del ExperimentalVariantFunctionalImpactStudyResult.model_fields[ + "focus" +] # Need to remove inherited property diff --git a/tests/validation/test_va_spec_models.py b/tests/validation/test_va_spec_models.py new file mode 100644 index 0000000..3049ab4 --- /dev/null +++ b/tests/validation/test_va_spec_models.py @@ -0,0 +1,87 @@ +"""Test VA Spec Pydantic model""" + +import pytest +from ga4gh.va_spec.base import ( + Agent, + CohortAlleleFrequencyStudyResult, + ExperimentalVariantFunctionalImpactStudyResult, +) +from ga4gh.va_spec.base.core import StudyGroup + + +def test_agent(): + """Ensure Agent model works as expected + + Tests that extends property is implemented correctly in the Pydantic models + """ + assert "label" not in Agent.model_fields + + agent = Agent(name="Joe") + assert agent.type == "Agent" + assert agent.name == "Joe" + + with pytest.raises(AttributeError, match="'Agent' object has no attribute 'label'"): + agent.label # noqa: B018 + + with pytest.raises(ValueError, match='"Agent" object has no field "label"'): + agent.label = "This is an agent" + + +def test_caf_study_result(): + """Ensure CohortAlleleFrequencyStudyResult model works as expected + + Tests that extends property is implemented correctly in the Pydantic models + """ + assert "focus" not in CohortAlleleFrequencyStudyResult.model_fields + + caf = CohortAlleleFrequencyStudyResult( + focusAllele="allele.json#/1", + focusAlleleCount=0, + focusAlleleFrequency=0, + locusAlleleCount=34086, + cohort=StudyGroup(id="ALL", label="Overall"), + ) + assert caf.focusAllele.root == "allele.json#/1" + assert caf.focusAlleleCount == 0 + assert caf.focusAlleleFrequency == 0 + assert caf.locusAlleleCount == 34086 + assert caf.cohort.id == "ALL" + assert caf.cohort.label == "Overall" + assert caf.cohort.type == "StudyGroup" + + with pytest.raises( + AttributeError, + match="'CohortAlleleFrequencyStudyResult' object has no attribute 'focus'", + ): + caf.focus # noqa: B018 + + with pytest.raises( + ValueError, + match='"CohortAlleleFrequencyStudyResult" object has no field "focus"', + ): + caf.focus = "focus" + + +def test_experimental_func_impact_study_result(): + """Ensure ExperimentalVariantFunctionalImpactStudyResult model works as expected + + Tests that extends property is implemented correctly in the Pydantic models + """ + assert "focus" not in ExperimentalVariantFunctionalImpactStudyResult.model_fields + + experimental_func_impact_study_result = ( + ExperimentalVariantFunctionalImpactStudyResult(focusVariant="allele.json#/1") + ) + assert experimental_func_impact_study_result.focusVariant.root == "allele.json#/1" + + with pytest.raises( + AttributeError, + match="'ExperimentalVariantFunctionalImpactStudyResult' object has no attribute 'focus'", + ): + experimental_func_impact_study_result.focus # noqa: B018 + + with pytest.raises( + ValueError, + match='"ExperimentalVariantFunctionalImpactStudyResult" object has no field "focus"', + ): + experimental_func_impact_study_result.focus = "focus" diff --git a/tests/validation/test_va_spec_schema.py b/tests/validation/test_va_spec_schema.py index 7eb846e..f92c42f 100644 --- a/tests/validation/test_va_spec_schema.py +++ b/tests/validation/test_va_spec_schema.py @@ -48,7 +48,9 @@ def _update_va_spec_schema_mapping( VA_SPEC_SCHEMA_MAPPING = {schema: VaSpecSchemaMapping() for schema in VaSpecSchema} -SUBMODULES_DIR = Path(__file__).parents[2] / "submodules" / "va_spec" / "schema" / "va-spec" +SUBMODULES_DIR = ( + Path(__file__).parents[2] / "submodules" / "va_spec" / "schema" / "va-spec" +) # Get core + profiles classes From 1564d38d268c0dccf82b5a59a502df05b2e38da8 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 22 Jan 2025 20:35:29 -0500 Subject: [PATCH 09/13] add additional checks --- tests/validation/test_va_spec_models.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/validation/test_va_spec_models.py b/tests/validation/test_va_spec_models.py index 3049ab4..3f16ab4 100644 --- a/tests/validation/test_va_spec_models.py +++ b/tests/validation/test_va_spec_models.py @@ -1,5 +1,7 @@ """Test VA Spec Pydantic model""" +import json + import pytest from ga4gh.va_spec.base import ( Agent, @@ -20,6 +22,9 @@ def test_agent(): assert agent.type == "Agent" assert agent.name == "Joe" + assert "label" not in agent.model_dump() + assert "label" not in json.loads(agent.model_dump_json()) + with pytest.raises(AttributeError, match="'Agent' object has no attribute 'label'"): agent.label # noqa: B018 @@ -49,6 +54,9 @@ def test_caf_study_result(): assert caf.cohort.label == "Overall" assert caf.cohort.type == "StudyGroup" + assert "focus" not in caf.model_dump() + assert "focus" not in json.loads(caf.model_dump_json()) + with pytest.raises( AttributeError, match="'CohortAlleleFrequencyStudyResult' object has no attribute 'focus'", @@ -74,6 +82,11 @@ def test_experimental_func_impact_study_result(): ) assert experimental_func_impact_study_result.focusVariant.root == "allele.json#/1" + assert "focus" not in experimental_func_impact_study_result.model_dump() + assert "focus" not in json.loads( + experimental_func_impact_study_result.model_dump_json() + ) + with pytest.raises( AttributeError, match="'ExperimentalVariantFunctionalImpactStudyResult' object has no attribute 'focus'", From dc9270c1f4efa403f169451099374aaab6dc5724 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 22 Jan 2025 20:57:30 -0500 Subject: [PATCH 10/13] wip: add field validators --- src/ga4gh/va_spec/base/caf_study_result.py | 7 ++++++- src/ga4gh/va_spec/base/core.py | 8 +++++++- .../base/experimental_variant_functional_impact.py | 7 ++++++- tests/validation/test_va_spec_models.py | 10 ++++++++++ 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/ga4gh/va_spec/base/caf_study_result.py b/src/ga4gh/va_spec/base/caf_study_result.py index 33abfec..fa095f5 100644 --- a/src/ga4gh/va_spec/base/caf_study_result.py +++ b/src/ga4gh/va_spec/base/caf_study_result.py @@ -7,7 +7,7 @@ from ga4gh.core.models import iriReference from ga4gh.va_spec.base.core import DataSet, StudyGroup, StudyResult from ga4gh.vrs.models import Allele -from pydantic import Field +from pydantic import Field, field_validator class CohortAlleleFrequencyStudyResult(StudyResult): @@ -60,6 +60,11 @@ def __getattribute__(self, name: str) -> Any: # noqa: ANN401 raise AttributeError(err_msg) return super().__getattribute__(name) + @field_validator("focus", mode="before") + def set_focus_to_none(cls, v: Any) -> None: # noqa: ANN401, N805 + """Set focus to None""" + return + del CohortAlleleFrequencyStudyResult.model_fields[ "focus" diff --git a/src/ga4gh/va_spec/base/core.py b/src/ga4gh/va_spec/base/core.py index f0dbe91..d09cd71 100644 --- a/src/ga4gh/va_spec/base/core.py +++ b/src/ga4gh/va_spec/base/core.py @@ -16,6 +16,7 @@ Field, RootModel, StringConstraints, + field_validator, ) ######################################### @@ -347,6 +348,11 @@ class Agent(Entity): description="A specific type of agent the Agent object represents. Recommended subtypes include codes for `person`, `organization`, or `software`.", ) + @field_validator("label", mode="before") + def set_label_to_none(cls, v: Any) -> None: # noqa: ANN401, N805 + """Set label to None""" + return + def __getattribute__(self, name: str) -> Any: # noqa: ANN401 """Retrieve the value of the specified attribute @@ -356,7 +362,7 @@ def __getattribute__(self, name: str) -> Any: # noqa: ANN401 Agent or the attribute is `label` """ if name == "label": - err_msg = f"'{type(self).__name__!r}' object has no attribute '{name!r}'" + err_msg = f"'{type(self).__name__!r}' object has no attribute '{name!r}'. Use 'name' instead." raise AttributeError(err_msg) return super().__getattribute__(name) diff --git a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py index 77089fc..aeee77f 100644 --- a/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py +++ b/src/ga4gh/va_spec/base/experimental_variant_functional_impact.py @@ -5,7 +5,7 @@ from ga4gh.core.models import iriReference from ga4gh.va_spec.base.core import DataSet, Method, StudyResult from ga4gh.vrs.models import MolecularVariation -from pydantic import Field +from pydantic import Field, field_validator class ExperimentalVariantFunctionalImpactStudyResult(StudyResult): @@ -48,6 +48,11 @@ def __getattribute__(self, name: str) -> Any: # noqa: ANN401 raise AttributeError(err_msg) return super().__getattribute__(name) + @field_validator("focus", mode="before") + def set_focus_to_none(cls, v: Any) -> None: # noqa: ANN401, N805 + """Set focus to None""" + return + del ExperimentalVariantFunctionalImpactStudyResult.model_fields[ "focus" diff --git a/tests/validation/test_va_spec_models.py b/tests/validation/test_va_spec_models.py index 3f16ab4..0e7ce29 100644 --- a/tests/validation/test_va_spec_models.py +++ b/tests/validation/test_va_spec_models.py @@ -31,6 +31,16 @@ def test_agent(): with pytest.raises(ValueError, match='"Agent" object has no field "label"'): agent.label = "This is an agent" + agent = Agent( + **{ # noqa: PIE804 + "name": "Joe", + "label": "Jane" + } + ) + + with pytest.raises(AttributeError, match="'Agent' object has no attribute 'label'"): + agent.label # noqa: B018 + def test_caf_study_result(): """Ensure CohortAlleleFrequencyStudyResult model works as expected From 28b5c15a0bbf288806872ee75d2eb36105c89459 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 23 Jan 2025 14:02:37 -0500 Subject: [PATCH 11/13] update error msgs + update ga4gh versions --- pyproject.toml | 4 ++-- src/ga4gh/va_spec/base/core.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index faefe54..54ed989 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,8 +31,8 @@ keywords = [ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ - #"ga4gh.vrs==2.0.0a13", - #"ga4gh.cat_vrs~=0.1.0", + "ga4gh.vrs==2.0.0a13", + "ga4gh.cat_vrs~=0.2.1", "pydantic==2.*" ] diff --git a/src/ga4gh/va_spec/base/core.py b/src/ga4gh/va_spec/base/core.py index d09cd71..c258476 100644 --- a/src/ga4gh/va_spec/base/core.py +++ b/src/ga4gh/va_spec/base/core.py @@ -362,7 +362,7 @@ def __getattribute__(self, name: str) -> Any: # noqa: ANN401 Agent or the attribute is `label` """ if name == "label": - err_msg = f"'{type(self).__name__!r}' object has no attribute '{name!r}'. Use 'name' instead." + err_msg = f"'{type(self).__name__!r}' object has no attribute '{name!r}'" raise AttributeError(err_msg) return super().__getattribute__(name) From 7cb7140554c373cfbaee1ae5cf9c7ee661fd3097 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 23 Jan 2025 14:05:00 -0500 Subject: [PATCH 12/13] ruff --- tests/validation/test_va_spec_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/validation/test_va_spec_models.py b/tests/validation/test_va_spec_models.py index 0e7ce29..11776bc 100644 --- a/tests/validation/test_va_spec_models.py +++ b/tests/validation/test_va_spec_models.py @@ -34,7 +34,7 @@ def test_agent(): agent = Agent( **{ # noqa: PIE804 "name": "Joe", - "label": "Jane" + "label": "Jane", } ) From 2ea827ee9179262094e9f4cdba0876a6a389a54c Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 24 Jan 2025 17:11:35 -0500 Subject: [PATCH 13/13] fix type --- src/ga4gh/va_spec/base/caf_study_result.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ga4gh/va_spec/base/caf_study_result.py b/src/ga4gh/va_spec/base/caf_study_result.py index fa095f5..4364c70 100644 --- a/src/ga4gh/va_spec/base/caf_study_result.py +++ b/src/ga4gh/va_spec/base/caf_study_result.py @@ -34,7 +34,7 @@ class CohortAlleleFrequencyStudyResult(StudyResult): ..., description="The number of occurrences of all alleles at the locus in the cohort.", ) - focusAlleleFrequency: int = Field( + focusAlleleFrequency: float = Field( ..., description="The frequency of the focusAllele in the cohort." ) cohort: StudyGroup = Field(