From 00acf01962432bee48ffb6308d6fb8d70585479d Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Sun, 20 Oct 2024 22:20:11 +0200 Subject: [PATCH 01/20] Add custom properties in roles --- docs/README.md | 1 + schema/odcs-json-schema-latest.json | 3 +++ schema/odcs-json-schema-v3.0.0.json | 3 +++ 3 files changed, 7 insertions(+) diff --git a/docs/README.md b/docs/README.md index 06a3c82..b393d38 100644 --- a/docs/README.md +++ b/docs/README.md @@ -635,6 +635,7 @@ roles: | roles.access | Access | No | The type of access provided by the IAM role. | | roles.firstLevelApprovers | 1st Level Approvers | No | The name(s) of the first-level approver(s) of the role. | | roles.secondLevelApprovers | 2nd Level Approvers | No | The name(s) of the second-level approver(s) of the role. | +| roles.customProperties | Custom Properties | No | Any custom properties. | ## Service-Level Agreement (SLA) diff --git a/schema/odcs-json-schema-latest.json b/schema/odcs-json-schema-latest.json index 955bffa..357f5c5 100644 --- a/schema/odcs-json-schema-latest.json +++ b/schema/odcs-json-schema-latest.json @@ -2225,6 +2225,9 @@ "secondLevelApprovers": { "type": "string", "description": "The name(s) of the second-level approver(s) of the role." + }, + "customProperties": { + "$ref": "#/$defs/CustomProperties" } }, "required": ["role"] diff --git a/schema/odcs-json-schema-v3.0.0.json b/schema/odcs-json-schema-v3.0.0.json index 955bffa..357f5c5 100644 --- a/schema/odcs-json-schema-v3.0.0.json +++ b/schema/odcs-json-schema-v3.0.0.json @@ -2225,6 +2225,9 @@ "secondLevelApprovers": { "type": "string", "description": "The name(s) of the second-level approver(s) of the role." + }, + "customProperties": { + "$ref": "#/$defs/CustomProperties" } }, "required": ["role"] From 7e6a312a7a20c19c556a28217db7c0e177c75ff3 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Mon, 4 Nov 2024 10:21:52 +0100 Subject: [PATCH 02/20] Status is not related to dev/prod --- docs/README.md | 30 +++++++++---------- .../quality/column-completeness.odcs.yaml | 2 +- schema/odcs-json-schema-latest.json | 3 +- schema/odcs-json-schema-v3.0.0.json | 3 -- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/docs/README.md b/docs/README.md index 9e7fa2f..f3c1c5a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -64,21 +64,21 @@ tags: null ### Definitions -| Key | UX label | Required | Description | -|-------------------------|------------------|----------|------------------------------------------------------------------------------------------------| -| apiVersion | Standard version | Yes | Version of the standard used to build data contract. Default value is `v3.0.0`. | -| kind | Kind | Yes | The kind of file this is. Valid value is `DataContract`. | -| id | ID | Yes | A unique identifier used to reduce the risk of dataset name collisions, such as a UUID. | -| name | Name | No | Name of the data contract. | -| version | Version | Yes | Current version of the data contract. | -| status | Status | Yes | Current status of the data contract. Valid values are `production`, `test`, or `development`. | -| tenant | Tenant | No | Indicates the property the data is primarily associated with. Value is case insensitive. | -| domain | Domain | No | Name of the logical data domain. | -| dataProduct | Data Product | No | Name of the data product. | -| description | Description | No | Object containing the descriptions. | -| description.purpose | Purpose | No | Intended purpose for the provided data. | -| description.limitations | Limitations | No | Technical, compliance, and legal limitations for data use. | -| description.usage | Usage | No | Recommended usage of the data. | +| Key | UX label | Required | Description | +|-------------------------|------------------|----------|-----------------------------------------------------------------------------------------------| +| apiVersion | Standard version | Yes | Version of the standard used to build data contract. Default value is `v3.0.0`. | +| kind | Kind | Yes | The kind of file this is. Valid value is `DataContract`. | +| id | ID | Yes | A unique identifier used to reduce the risk of dataset name collisions, such as a UUID. | +| name | Name | No | Name of the data contract. | +| version | Version | Yes | Current version of the data contract. | +| status | Status | Yes | Current status of the data contract. | +| tenant | Tenant | No | Indicates the property the data is primarily associated with. Value is case insensitive. | +| domain | Domain | No | Name of the logical data domain. | +| dataProduct | Data Product | No | Name of the data product. | +| description | Description | No | Object containing the descriptions. | +| description.purpose | Purpose | No | Intended purpose for the provided data. | +| description.limitations | Limitations | No | Technical, compliance, and legal limitations for data use. | +| description.usage | Usage | No | Recommended usage of the data. | ## Schema diff --git a/docs/examples/quality/column-completeness.odcs.yaml b/docs/examples/quality/column-completeness.odcs.yaml index f137d28..abe3289 100644 --- a/docs/examples/quality/column-completeness.odcs.yaml +++ b/docs/examples/quality/column-completeness.odcs.yaml @@ -24,4 +24,4 @@ schema: dimension: completeness severity: error rule: nullCheck - businessImpact: operational \ No newline at end of file + businessImpact: operational diff --git a/schema/odcs-json-schema-latest.json b/schema/odcs-json-schema-latest.json index fe8686c..a450688 100644 --- a/schema/odcs-json-schema-latest.json +++ b/schema/odcs-json-schema-latest.json @@ -37,8 +37,7 @@ }, "status": { "type": "string", - "description": "Current status of the dataset. Valid values are `production`, `test`, or `development`.", - "examples": ["production", "test", "development"] + "description": "Current status of the dataset." }, "servers": { "type": "array", diff --git a/schema/odcs-json-schema-v3.0.0.json b/schema/odcs-json-schema-v3.0.0.json index fe8686c..1ecb880 100644 --- a/schema/odcs-json-schema-v3.0.0.json +++ b/schema/odcs-json-schema-v3.0.0.json @@ -2228,9 +2228,6 @@ "secondLevelApprovers": { "type": "string", "description": "The name(s) of the second-level approver(s) of the role." - }, - "customProperties": { - "$ref": "#/$defs/CustomProperties" } }, "required": ["role"] From b2876a7c517a2f651963d9090132eabcf7534d75 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Mon, 11 Nov 2024 16:34:40 +0100 Subject: [PATCH 03/20] Add three new fields: - description.customProperties - description.authoritativeDefinitions - authoritativeDefinitions --- docs/README.md | 55 +++++++++++++++-------------- schema/odcs-json-schema-latest.json | 11 +++++- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/docs/README.md b/docs/README.md index f3c1c5a..f868f12 100644 --- a/docs/README.md +++ b/docs/README.md @@ -64,21 +64,24 @@ tags: null ### Definitions -| Key | UX label | Required | Description | -|-------------------------|------------------|----------|-----------------------------------------------------------------------------------------------| -| apiVersion | Standard version | Yes | Version of the standard used to build data contract. Default value is `v3.0.0`. | -| kind | Kind | Yes | The kind of file this is. Valid value is `DataContract`. | -| id | ID | Yes | A unique identifier used to reduce the risk of dataset name collisions, such as a UUID. | -| name | Name | No | Name of the data contract. | -| version | Version | Yes | Current version of the data contract. | -| status | Status | Yes | Current status of the data contract. | -| tenant | Tenant | No | Indicates the property the data is primarily associated with. Value is case insensitive. | -| domain | Domain | No | Name of the logical data domain. | -| dataProduct | Data Product | No | Name of the data product. | -| description | Description | No | Object containing the descriptions. | -| description.purpose | Purpose | No | Intended purpose for the provided data. | -| description.limitations | Limitations | No | Technical, compliance, and legal limitations for data use. | -| description.usage | Usage | No | Recommended usage of the data. | +| Key | UX label | Required | Description | +|--------------------------------------|---------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| apiVersion | Standard version | Yes | Version of the standard used to build data contract. Default value is `v3.0.0`. | +| kind | Kind | Yes | The kind of file this is. Valid value is `DataContract`. | +| id | ID | Yes | A unique identifier used to reduce the risk of dataset name collisions, such as a UUID. | +| name | Name | No | Name of the data contract. | +| version | Version | Yes | Current version of the data contract. | +| status | Status | Yes | Current status of the data contract. | +| tenant | Tenant | No | Indicates the property the data is primarily associated with. Value is case insensitive. | +| domain | Domain | No | Name of the logical data domain. | +| dataProduct | Data Product | No | Name of the data product. | +| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the data contract. | +| description | Description | No | Object containing the descriptions. | +| description.purpose | Purpose | No | Intended purpose for the provided data. | +| description.limitations | Limitations | No | Technical, compliance, and legal limitations for data use. | +| description.usage | Usage | No | Recommended usage of the data. | +| description.authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the dataset; examples would be a link to privacy statement, terms and conditions, license agreements, data catalog, or another tool. | +| description.customProperties | Custom Properties | No | Custom properties that are not part of the standard. | ## Schema @@ -214,16 +217,16 @@ schema: #### Applicable to Elements (either Objects or Properties) -| Key | UX label | Required | Description | -|--------------------------|------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| name | Name | Yes | Name of the element. | -| physicalName | Physical Name | No | Physical name. | -| description | Description | No | Description of the element. | -| businessName | Business Name | No | The business name of the element. | -| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the table; examples would be a link to an external definition, a training video, a GitHub repo, Collibra, or another tool. See `authoritativeDefinitions` below. | -| quality | Quality | No | List of data quality attributes. | -| tags | Tags | No | A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. | -| customProperties | Custom Properties | No | Custom properties that are not part of the standard. | +| Key | UX label | Required | Description | +|--------------------------|------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| name | Name | Yes | Name of the element. | +| physicalName | Physical Name | No | Physical name. | +| description | Description | No | Description of the element. | +| businessName | Business Name | No | The business name of the element. | +| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the table; examples would be a link to an external definition, a training video, a git repo, data catalog, or another tool. See `authoritativeDefinitions` below. | +| quality | Quality | No | List of data quality attributes. | +| tags | Tags | No | A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. | +| customProperties | Custom Properties | No | Custom properties that are not part of the standard. | #### Applicable to Objects @@ -734,7 +737,7 @@ servers: - **description**: A description of the server. - **environment**: The environment where the server operates (e.g., `prod`, `dev`, `uat`). There are no set values. - **roles**: An optional array of roles that have access to the server. -- **customProperties**: Any additional custom properties specific to the server. +- **customProperties**: Any additional custom properties specific to the server that are not part of the standard. ### Specific Server Properties diff --git a/schema/odcs-json-schema-latest.json b/schema/odcs-json-schema-latest.json index a450688..7ddc003 100644 --- a/schema/odcs-json-schema-latest.json +++ b/schema/odcs-json-schema-latest.json @@ -65,6 +65,12 @@ "limitations": { "type": "string", "description": "Limitations of the dataset." + }, + "authoritativeDefinitions": { + "$ref": "#/$defs/AuthoritativeDefinitions" + }, + "customProperties": { + "$ref": "#/$defs/CustomProperties" } } }, @@ -110,6 +116,9 @@ "$ref": "#/$defs/ServiceLevelAgreementProperty" } }, + "authoritativeDefinitions": { + "$ref": "#/$defs/AuthoritativeDefinitions" + }, "customProperties": { "$ref": "#/$defs/CustomProperties" }, @@ -2105,7 +2114,7 @@ }, "AuthoritativeDefinitions": { "type": "array", - "description": "List of links to sources that provide more details on the dataset; examples would be a link to an external definition, a training video, a GitHub repo, Collibra, or another tool. Authoritative definitions follow the same structure in the standard.", + "description": "List of links to sources that provide more details on the dataset; examples would be a link to an external definition, a training video, a git repo, data catalog, or another tool. Authoritative definitions follow the same structure in the standard.", "items": { "type": "object", "properties": { From 88611d516c7540f1eea39176a4dfbeb94b6d1719 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Mon, 11 Nov 2024 16:37:27 +0100 Subject: [PATCH 04/20] Add to full example --- docs/examples/all/full-example.odcs.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/examples/all/full-example.odcs.yaml b/docs/examples/all/full-example.odcs.yaml index 3dfffe2..f6ff684 100644 --- a/docs/examples/all/full-example.odcs.yaml +++ b/docs/examples/all/full-example.odcs.yaml @@ -10,6 +10,9 @@ description: purpose: Views built on top of the seller tables. limitations: Data based on seller perspective, no buyer information usage: Predict sales over time + authoritativeDefinitions: + type: privacy-statement + url: https://example.com/gdpr.pdf tenant: ClimateQuantumInc kind: DataContract From d44786834ec172a70ecbb31202b2d93bc3e260f4 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Mon, 11 Nov 2024 16:50:15 +0100 Subject: [PATCH 05/20] Introduce examples for status --- docs/README.md | 32 +++++++++---------- docs/examples/all/full-example.odcs.yaml | 2 +- ...stgresql-adventureworks-contract.odcs.yaml | 4 +-- .../data-types/all-data-types.odcs.yaml | 2 +- .../table-column-description.odcs.yaml | 2 +- .../quality/column-accuracy.odcs.yaml | 2 +- .../quality/column-completeness.odcs.yaml | 2 +- docs/examples/quality/column-custom.odcs.yaml | 2 +- .../quality/column-validity.odcs.yaml | 2 +- .../service-and-operational-roles.odcs.yaml | 4 +-- .../schema/all-schema-types.odcs.yaml | 4 +-- docs/examples/schema/table-column.odcs.yaml | 2 +- .../table-columns-with-partition.odcs.yaml | 2 +- docs/examples/server/azure-server.odcs.yaml | 2 +- docs/examples/server/kafka-server.odcs.yaml | 2 +- .../examples/sla/database-table-sla.odcs.yaml | 2 +- .../stakeholders/basic-four-dpo.odcs.yaml | 2 +- schema/odcs-json-schema-latest.json | 5 ++- 18 files changed, 39 insertions(+), 36 deletions(-) diff --git a/docs/README.md b/docs/README.md index f3c1c5a..a88654a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -49,7 +49,7 @@ kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a name: seller_payments_v1 version: 1.1.0 # Data Contract Version -status: production +status: active domain: seller dataProduct: payments tenant: ClimateQuantumInc @@ -64,21 +64,21 @@ tags: null ### Definitions -| Key | UX label | Required | Description | -|-------------------------|------------------|----------|-----------------------------------------------------------------------------------------------| -| apiVersion | Standard version | Yes | Version of the standard used to build data contract. Default value is `v3.0.0`. | -| kind | Kind | Yes | The kind of file this is. Valid value is `DataContract`. | -| id | ID | Yes | A unique identifier used to reduce the risk of dataset name collisions, such as a UUID. | -| name | Name | No | Name of the data contract. | -| version | Version | Yes | Current version of the data contract. | -| status | Status | Yes | Current status of the data contract. | -| tenant | Tenant | No | Indicates the property the data is primarily associated with. Value is case insensitive. | -| domain | Domain | No | Name of the logical data domain. | -| dataProduct | Data Product | No | Name of the data product. | -| description | Description | No | Object containing the descriptions. | -| description.purpose | Purpose | No | Intended purpose for the provided data. | -| description.limitations | Limitations | No | Technical, compliance, and legal limitations for data use. | -| description.usage | Usage | No | Recommended usage of the data. | +| Key | UX label | Required | Description | +|-------------------------|------------------|----------|----------------------------------------------------------------------------------------------------------------------------| +| apiVersion | Standard version | Yes | Version of the standard used to build data contract. Default value is `v3.0.0`. | +| kind | Kind | Yes | The kind of file this is. Valid value is `DataContract`. | +| id | ID | Yes | A unique identifier used to reduce the risk of dataset name collisions, such as a UUID. | +| name | Name | No | Name of the data contract. | +| version | Version | Yes | Current version of the data contract. | +| status | Status | Yes | Current status of the data contract. Examples are "proposed", "in development", "active", "deprecated", "retired". | +| tenant | Tenant | No | Indicates the property the data is primarily associated with. Value is case insensitive. | +| domain | Domain | No | Name of the logical data domain. | +| dataProduct | Data Product | No | Name of the data product. | +| description | Description | No | Object containing the descriptions. | +| description.purpose | Purpose | No | Intended purpose for the provided data. | +| description.limitations | Limitations | No | Technical, compliance, and legal limitations for data use. | +| description.usage | Usage | No | Recommended usage of the data. | ## Schema diff --git a/docs/examples/all/full-example.odcs.yaml b/docs/examples/all/full-example.odcs.yaml index 3dfffe2..c01647d 100644 --- a/docs/examples/all/full-example.odcs.yaml +++ b/docs/examples/all/full-example.odcs.yaml @@ -2,7 +2,7 @@ domain: seller # Domain dataProduct: my quantum # Data product name version: 1.1.0 # Version (follows semantic versioning) -status: current +status: active id: 53581432-6c55-4ba2-a65f-72344a91553a # Lots of information diff --git a/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml b/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml index 73950a6..922ecb3 100644 --- a/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml +++ b/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml @@ -1,10 +1,10 @@ version: "1.0.0" apiVersion: "v3.0.0" id: "6aeafdc1-ed62-4c8f-bf0a-da1061c98cdb" -status: "Development" +status: active kind: "DataContract" description: {} -schema: +schema: - name: "department" physicalName: "department" physicalType: "table" diff --git a/docs/examples/data-types/all-data-types.odcs.yaml b/docs/examples/data-types/all-data-types.odcs.yaml index beddd6b..e024a14 100644 --- a/docs/examples/data-types/all-data-types.odcs.yaml +++ b/docs/examples/data-types/all-data-types.odcs.yaml @@ -1,7 +1,7 @@ version: 1.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_table dataProduct: my_quantum apiVersion: v3.0.0 diff --git a/docs/examples/fundamentals/table-column-description.odcs.yaml b/docs/examples/fundamentals/table-column-description.odcs.yaml index 57a1576..f0f1e3e 100644 --- a/docs/examples/fundamentals/table-column-description.odcs.yaml +++ b/docs/examples/fundamentals/table-column-description.odcs.yaml @@ -1,7 +1,7 @@ version: 1.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_quantum apiVersion: v3.0.0 schema: diff --git a/docs/examples/quality/column-accuracy.odcs.yaml b/docs/examples/quality/column-accuracy.odcs.yaml index fb0a705..05edc52 100644 --- a/docs/examples/quality/column-accuracy.odcs.yaml +++ b/docs/examples/quality/column-accuracy.odcs.yaml @@ -1,7 +1,7 @@ version: 1.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_table dataProduct: my_quantum apiVersion: v3.0.0 diff --git a/docs/examples/quality/column-completeness.odcs.yaml b/docs/examples/quality/column-completeness.odcs.yaml index abe3289..12e4eed 100644 --- a/docs/examples/quality/column-completeness.odcs.yaml +++ b/docs/examples/quality/column-completeness.odcs.yaml @@ -2,7 +2,7 @@ version: 1.0.0 apiVersion: v3.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_table dataProduct: my_quantum schema: diff --git a/docs/examples/quality/column-custom.odcs.yaml b/docs/examples/quality/column-custom.odcs.yaml index 32d1fff..ae2e228 100644 --- a/docs/examples/quality/column-custom.odcs.yaml +++ b/docs/examples/quality/column-custom.odcs.yaml @@ -2,7 +2,7 @@ version: 1.0.0 apiVersion: v3.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_table dataProduct: my_quantum schema: diff --git a/docs/examples/quality/column-validity.odcs.yaml b/docs/examples/quality/column-validity.odcs.yaml index 99db211..d9031ff 100644 --- a/docs/examples/quality/column-validity.odcs.yaml +++ b/docs/examples/quality/column-validity.odcs.yaml @@ -2,7 +2,7 @@ version: 1.0.0 apiVersion: v3.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_table dataProduct: my_quantum schema: diff --git a/docs/examples/roles/service-and-operational-roles.odcs.yaml b/docs/examples/roles/service-and-operational-roles.odcs.yaml index b51a0a3..ed18b2f 100644 --- a/docs/examples/roles/service-and-operational-roles.odcs.yaml +++ b/docs/examples/roles/service-and-operational-roles.odcs.yaml @@ -1,7 +1,7 @@ version: 1.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_table dataProduct: my_quantum schema: [] @@ -22,4 +22,4 @@ roles: - role: bq_unica_user_opr access: write firstLevelApprovers: Reporting Manager - secondLevelApprovers: 'mickey' \ No newline at end of file + secondLevelApprovers: 'mickey' diff --git a/docs/examples/schema/all-schema-types.odcs.yaml b/docs/examples/schema/all-schema-types.odcs.yaml index 45b02fc..7b9a633 100644 --- a/docs/examples/schema/all-schema-types.odcs.yaml +++ b/docs/examples/schema/all-schema-types.odcs.yaml @@ -1,7 +1,7 @@ version: 1.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_quantum apiVersion: v3.0.0 schema: @@ -99,4 +99,4 @@ schema: physicalType: VARCHAR(40) - name: zip logicalType: string - physicalType: VARCHAR(15) \ No newline at end of file + physicalType: VARCHAR(15) diff --git a/docs/examples/schema/table-column.odcs.yaml b/docs/examples/schema/table-column.odcs.yaml index 183807c..f1ada88 100644 --- a/docs/examples/schema/table-column.odcs.yaml +++ b/docs/examples/schema/table-column.odcs.yaml @@ -1,7 +1,7 @@ version: 1.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553b -status: current +status: active name: my_table dataProduct: my_quantum apiVersion: v3.0.0 diff --git a/docs/examples/schema/table-columns-with-partition.odcs.yaml b/docs/examples/schema/table-columns-with-partition.odcs.yaml index 25a0cc1..3f4f9dc 100644 --- a/docs/examples/schema/table-columns-with-partition.odcs.yaml +++ b/docs/examples/schema/table-columns-with-partition.odcs.yaml @@ -1,7 +1,7 @@ version: 1.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553c -status: current +status: active name: my_table dataProduct: my_quantum apiVersion: v3.0.0 diff --git a/docs/examples/server/azure-server.odcs.yaml b/docs/examples/server/azure-server.odcs.yaml index 9d3c23d..95fb80d 100644 --- a/docs/examples/server/azure-server.odcs.yaml +++ b/docs/examples/server/azure-server.odcs.yaml @@ -2,7 +2,7 @@ version: 1.0.0 apiVersion: v3.0.0 kind: DataContract id: abc123 -status: pending +status: in development servers: - server: my-azure type: azure diff --git a/docs/examples/server/kafka-server.odcs.yaml b/docs/examples/server/kafka-server.odcs.yaml index 0dc94dc..57199b9 100644 --- a/docs/examples/server/kafka-server.odcs.yaml +++ b/docs/examples/server/kafka-server.odcs.yaml @@ -2,7 +2,7 @@ version: 1.0.0 apiVersion: v3.0.0 kind: DataContract id: abc123 -status: pending +status: in development schema: - name: Orders physicalName: orders.avro.v1 diff --git a/docs/examples/sla/database-table-sla.odcs.yaml b/docs/examples/sla/database-table-sla.odcs.yaml index 9348ab2..b756a9a 100644 --- a/docs/examples/sla/database-table-sla.odcs.yaml +++ b/docs/examples/sla/database-table-sla.odcs.yaml @@ -2,7 +2,7 @@ version: 1.0.0 apiVersion: v3.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_table dataProduct: my_quantum schema: [] diff --git a/docs/examples/stakeholders/basic-four-dpo.odcs.yaml b/docs/examples/stakeholders/basic-four-dpo.odcs.yaml index cc870ac..a7159ca 100644 --- a/docs/examples/stakeholders/basic-four-dpo.odcs.yaml +++ b/docs/examples/stakeholders/basic-four-dpo.odcs.yaml @@ -2,7 +2,7 @@ version: 1.0.0 apiVersion: v3.0.0 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a -status: current +status: active name: my_table dataProduct: my_quantum schema: [ ] diff --git a/schema/odcs-json-schema-latest.json b/schema/odcs-json-schema-latest.json index a450688..2411dca 100644 --- a/schema/odcs-json-schema-latest.json +++ b/schema/odcs-json-schema-latest.json @@ -37,7 +37,10 @@ }, "status": { "type": "string", - "description": "Current status of the dataset." + "description": "Current status of the dataset.", + "examples": [ + "proposed", "in development", "active", "deprecated", "retired" + ] }, "servers": { "type": "array", From 86169848254a0d919235dd564cdb9d78a225bc34 Mon Sep 17 00:00:00 2001 From: "Dr. Simon Harrer" Date: Tue, 12 Nov 2024 08:38:25 +0100 Subject: [PATCH 06/20] Update docs/README.md Co-authored-by: Diego Carvallo --- docs/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/README.md b/docs/README.md index a88654a..24df538 100644 --- a/docs/README.md +++ b/docs/README.md @@ -71,7 +71,7 @@ tags: null | id | ID | Yes | A unique identifier used to reduce the risk of dataset name collisions, such as a UUID. | | name | Name | No | Name of the data contract. | | version | Version | Yes | Current version of the data contract. | -| status | Status | Yes | Current status of the data contract. Examples are "proposed", "in development", "active", "deprecated", "retired". | +| status | Status | Yes | Current status of the data contract. Examples are "proposed", "draft", "active", "deprecated", "retired". | | tenant | Tenant | No | Indicates the property the data is primarily associated with. Value is case insensitive. | | domain | Domain | No | Name of the logical data domain. | | dataProduct | Data Product | No | Name of the data product. | From 67bad529dad5777519ead5488da916b5142f9e1f Mon Sep 17 00:00:00 2001 From: "Dr. Simon Harrer" Date: Tue, 12 Nov 2024 08:38:52 +0100 Subject: [PATCH 07/20] Update odcs-json-schema-latest.json --- schema/odcs-json-schema-latest.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/odcs-json-schema-latest.json b/schema/odcs-json-schema-latest.json index 2411dca..44ef4c2 100644 --- a/schema/odcs-json-schema-latest.json +++ b/schema/odcs-json-schema-latest.json @@ -39,7 +39,7 @@ "type": "string", "description": "Current status of the dataset.", "examples": [ - "proposed", "in development", "active", "deprecated", "retired" + "proposed", "draft", "active", "deprecated", "retired" ] }, "servers": { From 2ebe21b3ee8897f9da114fbeb29034a95879812b Mon Sep 17 00:00:00 2001 From: Dirk Van de Poel Date: Thu, 5 Dec 2024 21:21:40 +0100 Subject: [PATCH 08/20] Kafka schema examples Kafka schema examples, one with an inline example and another with an authoritativeDefinitions reference to an external Kafka schema registry. Signed-off-by: Dirk Van de Poel --- docs/examples/schema/kafka-schema.odcs.yaml | 33 +++++++++++++++++++ .../schema/kafka-schemaregistry.odcs.yaml | 20 +++++++++++ 2 files changed, 53 insertions(+) create mode 100644 docs/examples/schema/kafka-schema.odcs.yaml create mode 100644 docs/examples/schema/kafka-schemaregistry.odcs.yaml diff --git a/docs/examples/schema/kafka-schema.odcs.yaml b/docs/examples/schema/kafka-schema.odcs.yaml new file mode 100644 index 0000000..b264dca --- /dev/null +++ b/docs/examples/schema/kafka-schema.odcs.yaml @@ -0,0 +1,33 @@ +apiVersion: v3.0.0 +kind: DataContract +id: orders +status: development +name: Orders Event Stream +version: 0.0.1 +schema: +- name: Orders + physicalName: orders + logicalType: object + physicalType: topic + description: Orders Kafka topic + properties: + - name: cust_id + businessName: Customer ID + logicalType: string + physicalType: string + required: true + - name: prod_id + businessName: Product ID + logicalType: string + physicalType: string + required: true + - name: qty + businessName: Quantity + logicalType: integer + physicalType: int + required: true +servers: +- server: my-kafka + type: kafka + format: avro + host: kafkabroker1:9092 \ No newline at end of file diff --git a/docs/examples/schema/kafka-schemaregistry.odcs.yaml b/docs/examples/schema/kafka-schemaregistry.odcs.yaml new file mode 100644 index 0000000..b418954 --- /dev/null +++ b/docs/examples/schema/kafka-schemaregistry.odcs.yaml @@ -0,0 +1,20 @@ +apiVersion: v3.0.0 +kind: DataContract +id: orders +status: production +name: Orders Event Stream +version: 0.0.1 +schema: +- name: Orders + physicalName: orders + logicalType: object + physicalType: topic + description: Orders Kafka topic + authoritativeDefinitions: + - url: https://schema-registry:8081 + type: implementation +servers: +- server: my-kafka + type: kafka + format: avro + host: kafkabroker1:9092 \ No newline at end of file From 12c28ab45003ee4f9235a6ccfe4ba096062aafcf Mon Sep 17 00:00:00 2001 From: msorel-meta Date: Fri, 20 Dec 2024 11:43:37 +0100 Subject: [PATCH 09/20] Add Meta Analysis to list of vendors --- vendors.md | 1 + 1 file changed, 1 insertion(+) diff --git a/vendors.md b/vendors.md index 073c181..5380a00 100644 --- a/vendors.md +++ b/vendors.md @@ -14,6 +14,7 @@ catalogs, data quality platforms, security tools, and more. * [Data Contract Playground](https://data-catering.github.io/data-contract-playground/) - Playground site for creating, exporting and validating data contracts * [DQC.ai | DQ PLATFORM](https://www.dqc.ai/dqc-platform) - [Enhancing Data Quality with ODCS: A Standard Ensured by the DQ Platform ](https://www.dqc.ai/post/enhancing-data-quality-with-odcs-a-standard-ensured-by-the-dq-platform) +* [Meta Analysis](https://www.meta-analysis.fr/en/home/) - Governance repository & Data Catalog, supports ODCS via a [flexible metamodel and open API] (https://www.meta-analysis.fr/en/article-en/open-data-contract-standard-adoption/) . ## Service providers From ecd40ad4a7051688b3e0594d5cc37138a628d91a Mon Sep 17 00:00:00 2001 From: msorel-meta Date: Fri, 20 Dec 2024 11:44:20 +0100 Subject: [PATCH 10/20] Update vendors.md - Meta Analysis Link --- vendors.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendors.md b/vendors.md index 5380a00..3acd73d 100644 --- a/vendors.md +++ b/vendors.md @@ -14,7 +14,7 @@ catalogs, data quality platforms, security tools, and more. * [Data Contract Playground](https://data-catering.github.io/data-contract-playground/) - Playground site for creating, exporting and validating data contracts * [DQC.ai | DQ PLATFORM](https://www.dqc.ai/dqc-platform) - [Enhancing Data Quality with ODCS: A Standard Ensured by the DQ Platform ](https://www.dqc.ai/post/enhancing-data-quality-with-odcs-a-standard-ensured-by-the-dq-platform) -* [Meta Analysis](https://www.meta-analysis.fr/en/home/) - Governance repository & Data Catalog, supports ODCS via a [flexible metamodel and open API] (https://www.meta-analysis.fr/en/article-en/open-data-contract-standard-adoption/) . +* [Meta Analysis](https://www.meta-analysis.fr/en/home/) - Governance repository & Data Catalog, supports ODCS via a [flexible metamodel and open API](https://www.meta-analysis.fr/en/article-en/open-data-contract-standard-adoption/) . ## Service providers From 51a73acf74ef93d84803f2672b3ae0dd1177ebfe Mon Sep 17 00:00:00 2001 From: msorel-meta Date: Fri, 20 Dec 2024 11:44:59 +0100 Subject: [PATCH 11/20] Update vendors.md - formating --- vendors.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendors.md b/vendors.md index 3acd73d..e063d4d 100644 --- a/vendors.md +++ b/vendors.md @@ -14,7 +14,7 @@ catalogs, data quality platforms, security tools, and more. * [Data Contract Playground](https://data-catering.github.io/data-contract-playground/) - Playground site for creating, exporting and validating data contracts * [DQC.ai | DQ PLATFORM](https://www.dqc.ai/dqc-platform) - [Enhancing Data Quality with ODCS: A Standard Ensured by the DQ Platform ](https://www.dqc.ai/post/enhancing-data-quality-with-odcs-a-standard-ensured-by-the-dq-platform) -* [Meta Analysis](https://www.meta-analysis.fr/en/home/) - Governance repository & Data Catalog, supports ODCS via a [flexible metamodel and open API](https://www.meta-analysis.fr/en/article-en/open-data-contract-standard-adoption/) . +* [Meta Analysis](https://www.meta-analysis.fr/en/home/) - Governance repository & Data Catalog, supports ODCS via a [flexible metamodel and open API](https://www.meta-analysis.fr/en/article-en/open-data-contract-standard-adoption/) ## Service providers From 5d93d5d883ac9cb8275e2b874c1a8dedf538f4bf Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Sun, 22 Dec 2024 10:17:08 +0800 Subject: [PATCH 12/20] Create new JSON schema for v3.0.1, update changelog, update examples to use v3.0.1 --- CHANGELOG.md | 9 + docs/README.md | 30 +- docs/examples/all/full-example.odcs.yaml | 2 +- .../data-types/all-data-types.odcs.yaml | 2 +- .../table-column-description.odcs.yaml | 2 +- .../quality/column-accuracy.odcs.yaml | 2 +- .../quality/column-completeness.odcs.yaml | 2 +- docs/examples/quality/column-custom.odcs.yaml | 2 +- .../quality/column-validity.odcs.yaml | 2 +- .../service-and-operational-roles.odcs.yaml | 2 +- .../schema/all-schema-types.odcs.yaml | 2 +- docs/examples/schema/kafka-schema.odcs.yaml | 2 +- .../schema/kafka-schemaregistry.odcs.yaml | 2 +- docs/examples/schema/table-column.odcs.yaml | 2 +- .../table-columns-with-partition.odcs.yaml | 2 +- docs/examples/server/azure-server.odcs.yaml | 2 +- docs/examples/server/kafka-server.odcs.yaml | 2 +- .../examples/sla/database-table-sla.odcs.yaml | 2 +- .../stakeholders/basic-four-dpo.odcs.yaml | 2 +- schema/odcs-json-schema-latest.json | 6 +- schema/odcs-json-schema-v3.0.1.json | 2361 +++++++++++++++++ 21 files changed, 2405 insertions(+), 35 deletions(-) create mode 100644 schema/odcs-json-schema-v3.0.1.json diff --git a/CHANGELOG.md b/CHANGELOG.md index cc19e4b..5f802b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ image: "https://raw.githubusercontent.com/bitol-io/artwork/main/horizontal/color This document tracks the history and evolution of the **Open Data Contract Standard**. +# v3.0.1 - 2024-12-22 - APPROVED + +* Added field `authoritativeDefinitions` +* Added field `description.customProperties` +* Added field `description.authoritativeDefinitions` +* Added field `role.customProperties` +* Updated `status` field to include examples +* Updated `authoritativeDefinitions` description to be vendor agnostic + # v3.0.0 - 2024-10-21 - APPROVED * **New section**: Support & communication channels. diff --git a/docs/README.md b/docs/README.md index 27f5063..0a55acb 100644 --- a/docs/README.md +++ b/docs/README.md @@ -43,7 +43,7 @@ This section contains general information about the contract. ### Example ```YAML -apiVersion: v3.0.0 # Standard version +apiVersion: v3.0.1 # Standard version kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a @@ -66,16 +66,16 @@ tags: null | Key | UX label | Required | Description | |--------------------------------------|---------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| apiVersion | Standard version | Yes | Version of the standard used to build data contract. Default value is `v3.0.0`. | +| apiVersion | Standard version | Yes | Version of the standard used to build data contract. Default value is `v3.0.1`. | | kind | Kind | Yes | The kind of file this is. Valid value is `DataContract`. | | id | ID | Yes | A unique identifier used to reduce the risk of dataset name collisions, such as a UUID. | | name | Name | No | Name of the data contract. | | version | Version | Yes | Current version of the data contract. | -| status | Status | Yes | Current status of the data contract. Examples are "proposed", "draft", "active", "deprecated", "retired". | +| status | Status | Yes | Current status of the data contract. Examples are "proposed", "draft", "active", "deprecated", "retired". | | tenant | Tenant | No | Indicates the property the data is primarily associated with. Value is case insensitive. | | domain | Domain | No | Name of the logical data domain. | | dataProduct | Data Product | No | Name of the data product. | -| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the data contract. | +| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the data contract. | | description | Description | No | Object containing the descriptions. | | description.purpose | Purpose | No | Intended purpose for the provided data. | | description.limitations | Limitations | No | Technical, compliance, and legal limitations for data use. | @@ -218,16 +218,16 @@ schema: #### Applicable to Elements (either Objects or Properties) -| Key | UX label | Required | Description | -|--------------------------|------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| name | Name | Yes | Name of the element. | -| physicalName | Physical Name | No | Physical name. | -| description | Description | No | Description of the element. | -| businessName | Business Name | No | The business name of the element. | -| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the table; examples would be a link to an external definition, a training video, a git repo, data catalog, or another tool. See `authoritativeDefinitions` below. | -| quality | Quality | No | List of data quality attributes. | -| tags | Tags | No | A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. | -| customProperties | Custom Properties | No | Custom properties that are not part of the standard. | +| Key | UX label | Required | Description | +|--------------------------|---------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| name | Name | Yes | Name of the element. | +| physicalName | Physical Name | No | Physical name. | +| description | Description | No | Description of the element. | +| businessName | Business Name | No | The business name of the element. | +| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the table; examples would be a link to an external definition, a training video, a git repo, data catalog, or another tool. See `authoritativeDefinitions` below. | +| quality | Quality | No | List of data quality attributes. | +| tags | Tags | No | A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. | +| customProperties | Custom Properties | No | Custom properties that are not part of the standard. | #### Applicable to Objects @@ -647,7 +647,7 @@ roles: | roles.access | Access | No | The type of access provided by the IAM role. | | roles.firstLevelApprovers | 1st Level Approvers | No | The name(s) of the first-level approver(s) of the role. | | roles.secondLevelApprovers | 2nd Level Approvers | No | The name(s) of the second-level approver(s) of the role. | -| roles.customProperties | Custom Properties | No | Any custom properties. | +| roles.customProperties | Custom Properties | No | Any custom properties. | ## Service-Level Agreement (SLA) diff --git a/docs/examples/all/full-example.odcs.yaml b/docs/examples/all/full-example.odcs.yaml index 9325763..0d74681 100644 --- a/docs/examples/all/full-example.odcs.yaml +++ b/docs/examples/all/full-example.odcs.yaml @@ -16,7 +16,7 @@ description: tenant: ClimateQuantumInc kind: DataContract -apiVersion: v3.0.0 # Standard version (follows semantic versioning) +apiVersion: v3.0.1 # Standard version (follows semantic versioning) # Infrastructure & servers servers: diff --git a/docs/examples/data-types/all-data-types.odcs.yaml b/docs/examples/data-types/all-data-types.odcs.yaml index e024a14..93d7b1f 100644 --- a/docs/examples/data-types/all-data-types.odcs.yaml +++ b/docs/examples/data-types/all-data-types.odcs.yaml @@ -4,7 +4,7 @@ id: 53581432-6c55-4ba2-a65f-72344a91553a status: active name: my_table dataProduct: my_quantum -apiVersion: v3.0.0 +apiVersion: v3.0.1 schema: - name: transactions_tbl description: Provides core payment metrics diff --git a/docs/examples/fundamentals/table-column-description.odcs.yaml b/docs/examples/fundamentals/table-column-description.odcs.yaml index f0f1e3e..aacadbb 100644 --- a/docs/examples/fundamentals/table-column-description.odcs.yaml +++ b/docs/examples/fundamentals/table-column-description.odcs.yaml @@ -3,7 +3,7 @@ kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a status: active name: my_quantum -apiVersion: v3.0.0 +apiVersion: v3.0.1 schema: - name: tbl description: Provides core payment metrics diff --git a/docs/examples/quality/column-accuracy.odcs.yaml b/docs/examples/quality/column-accuracy.odcs.yaml index 05edc52..445f0e3 100644 --- a/docs/examples/quality/column-accuracy.odcs.yaml +++ b/docs/examples/quality/column-accuracy.odcs.yaml @@ -4,7 +4,7 @@ id: 53581432-6c55-4ba2-a65f-72344a91553a status: active name: my_table dataProduct: my_quantum -apiVersion: v3.0.0 +apiVersion: v3.0.1 schema: - name: Air_Quality description: Air quality of the city of New York diff --git a/docs/examples/quality/column-completeness.odcs.yaml b/docs/examples/quality/column-completeness.odcs.yaml index 12e4eed..adfda79 100644 --- a/docs/examples/quality/column-completeness.odcs.yaml +++ b/docs/examples/quality/column-completeness.odcs.yaml @@ -1,5 +1,5 @@ version: 1.0.0 -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a status: active diff --git a/docs/examples/quality/column-custom.odcs.yaml b/docs/examples/quality/column-custom.odcs.yaml index ae2e228..80e68f8 100644 --- a/docs/examples/quality/column-custom.odcs.yaml +++ b/docs/examples/quality/column-custom.odcs.yaml @@ -1,5 +1,5 @@ version: 1.0.0 -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a status: active diff --git a/docs/examples/quality/column-validity.odcs.yaml b/docs/examples/quality/column-validity.odcs.yaml index d9031ff..406d919 100644 --- a/docs/examples/quality/column-validity.odcs.yaml +++ b/docs/examples/quality/column-validity.odcs.yaml @@ -1,5 +1,5 @@ version: 1.0.0 -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a status: active diff --git a/docs/examples/roles/service-and-operational-roles.odcs.yaml b/docs/examples/roles/service-and-operational-roles.odcs.yaml index ed18b2f..b0adde2 100644 --- a/docs/examples/roles/service-and-operational-roles.odcs.yaml +++ b/docs/examples/roles/service-and-operational-roles.odcs.yaml @@ -5,7 +5,7 @@ status: active name: my_table dataProduct: my_quantum schema: [] -apiVersion: v3.0.0 +apiVersion: v3.0.1 roles: - role: microstrategy_user_opr access: read diff --git a/docs/examples/schema/all-schema-types.odcs.yaml b/docs/examples/schema/all-schema-types.odcs.yaml index 7b9a633..908a19d 100644 --- a/docs/examples/schema/all-schema-types.odcs.yaml +++ b/docs/examples/schema/all-schema-types.odcs.yaml @@ -3,7 +3,7 @@ kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a status: active name: my_quantum -apiVersion: v3.0.0 +apiVersion: v3.0.1 schema: - name: tbl logicalType: object diff --git a/docs/examples/schema/kafka-schema.odcs.yaml b/docs/examples/schema/kafka-schema.odcs.yaml index b264dca..3e48ef1 100644 --- a/docs/examples/schema/kafka-schema.odcs.yaml +++ b/docs/examples/schema/kafka-schema.odcs.yaml @@ -1,4 +1,4 @@ -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: orders status: development diff --git a/docs/examples/schema/kafka-schemaregistry.odcs.yaml b/docs/examples/schema/kafka-schemaregistry.odcs.yaml index b418954..a5f5033 100644 --- a/docs/examples/schema/kafka-schemaregistry.odcs.yaml +++ b/docs/examples/schema/kafka-schemaregistry.odcs.yaml @@ -1,4 +1,4 @@ -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: orders status: production diff --git a/docs/examples/schema/table-column.odcs.yaml b/docs/examples/schema/table-column.odcs.yaml index f1ada88..863f15f 100644 --- a/docs/examples/schema/table-column.odcs.yaml +++ b/docs/examples/schema/table-column.odcs.yaml @@ -4,7 +4,7 @@ id: 53581432-6c55-4ba2-a65f-72344a91553b status: active name: my_table dataProduct: my_quantum -apiVersion: v3.0.0 +apiVersion: v3.0.1 schema: - name: tbl physicalType: table diff --git a/docs/examples/schema/table-columns-with-partition.odcs.yaml b/docs/examples/schema/table-columns-with-partition.odcs.yaml index 3f4f9dc..fdb9684 100644 --- a/docs/examples/schema/table-columns-with-partition.odcs.yaml +++ b/docs/examples/schema/table-columns-with-partition.odcs.yaml @@ -4,7 +4,7 @@ id: 53581432-6c55-4ba2-a65f-72344a91553c status: active name: my_table dataProduct: my_quantum -apiVersion: v3.0.0 +apiVersion: v3.0.1 schema: - name: tbl physicalType: table diff --git a/docs/examples/server/azure-server.odcs.yaml b/docs/examples/server/azure-server.odcs.yaml index 95fb80d..1c3cdf2 100644 --- a/docs/examples/server/azure-server.odcs.yaml +++ b/docs/examples/server/azure-server.odcs.yaml @@ -1,5 +1,5 @@ version: 1.0.0 -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: abc123 status: in development diff --git a/docs/examples/server/kafka-server.odcs.yaml b/docs/examples/server/kafka-server.odcs.yaml index 57199b9..e155f14 100644 --- a/docs/examples/server/kafka-server.odcs.yaml +++ b/docs/examples/server/kafka-server.odcs.yaml @@ -1,5 +1,5 @@ version: 1.0.0 -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: abc123 status: in development diff --git a/docs/examples/sla/database-table-sla.odcs.yaml b/docs/examples/sla/database-table-sla.odcs.yaml index b756a9a..53bd9ac 100644 --- a/docs/examples/sla/database-table-sla.odcs.yaml +++ b/docs/examples/sla/database-table-sla.odcs.yaml @@ -1,5 +1,5 @@ version: 1.0.0 -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a status: active diff --git a/docs/examples/stakeholders/basic-four-dpo.odcs.yaml b/docs/examples/stakeholders/basic-four-dpo.odcs.yaml index a7159ca..a48e54c 100644 --- a/docs/examples/stakeholders/basic-four-dpo.odcs.yaml +++ b/docs/examples/stakeholders/basic-four-dpo.odcs.yaml @@ -1,5 +1,5 @@ version: 1.0.0 -apiVersion: v3.0.0 +apiVersion: v3.0.1 kind: DataContract id: 53581432-6c55-4ba2-a65f-72344a91553a status: active diff --git a/schema/odcs-json-schema-latest.json b/schema/odcs-json-schema-latest.json index c469302..228115b 100644 --- a/schema/odcs-json-schema-latest.json +++ b/schema/odcs-json-schema-latest.json @@ -16,9 +16,9 @@ }, "apiVersion": { "type": "string", - "default": "v3.0.0", - "description": "Version of the standard used to build data contract. Default value is v3.0.0.", - "enum": ["v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"] + "default": "v3.0.1", + "description": "Version of the standard used to build data contract. Default value is v3.0.1.", + "enum": ["v3.0.1", "v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"] }, "id": { "type": "string", diff --git a/schema/odcs-json-schema-v3.0.1.json b/schema/odcs-json-schema-v3.0.1.json new file mode 100644 index 0000000..228115b --- /dev/null +++ b/schema/odcs-json-schema-v3.0.1.json @@ -0,0 +1,2361 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "title": "Open Data Contract Standard (ODCS)", + "description": "An open data contract specification to establish agreement between data producers and consumers.", + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "Current version of the data contract." + }, + "kind": { + "type": "string", + "default": "DataContract", + "description": "The kind of file this is. Valid value is `DataContract`.", + "enum": ["DataContract"] + }, + "apiVersion": { + "type": "string", + "default": "v3.0.1", + "description": "Version of the standard used to build data contract. Default value is v3.0.1.", + "enum": ["v3.0.1", "v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"] + }, + "id": { + "type": "string", + "description": "A unique identifier used to reduce the risk of dataset name collisions, such as a UUID." + }, + "name": { + "type": "string", + "description": "Name of the data contract." + }, + "tenant": { + "type": "string", + "description": "Indicates the property the data is primarily associated with. Value is case insensitive." + }, + "tags": { + "$ref": "#/$defs/Tags" + }, + "status": { + "type": "string", + "description": "Current status of the dataset.", + "examples": [ + "proposed", "draft", "active", "deprecated", "retired" + ] + }, + "servers": { + "type": "array", + "description": "List of servers where the datasets reside.", + "items": { + "$ref": "#/$defs/Server" + } + }, + "dataProduct": { + "type": "string", + "description": "The name of the data product." + }, + "description": { + "type": "object", + "description": "High level description of the dataset.", + "properties": { + "usage": { + "type": "string", + "description": "Intended usage of the dataset." + }, + "purpose": { + "type": "string", + "description": "Purpose of the dataset." + }, + "limitations": { + "type": "string", + "description": "Limitations of the dataset." + }, + "authoritativeDefinitions": { + "$ref": "#/$defs/AuthoritativeDefinitions" + }, + "customProperties": { + "$ref": "#/$defs/CustomProperties" + } + } + }, + "domain": { + "type": "string", + "description": "Name of the logical data domain.", + "examples": ["imdb_ds_aggregate", "receiver_profile_out", "transaction_profile_out"] + }, + "schema": { + "type": "array", + "description": "A list of elements within the schema to be cataloged.", + "items": { + "$ref": "#/$defs/SchemaObject" + } + }, + "support": { + "$ref": "#/$defs/Support" + }, + "price": { + "$ref": "#/$defs/Pricing" + }, + "team": { + "type": "array", + "items": { + "$ref": "#/$defs/Team" + } + }, + "roles": { + "type": "array", + "description": "A list of roles that will provide user access to the dataset.", + "items": { + "$ref": "#/$defs/Role" + } + }, + "slaDefaultElement": { + "type": "string", + "description": "Element (using the element path notation) to do the checks on." + }, + "slaProperties": { + "type": "array", + "description": "A list of key/value pairs for SLA specific properties. There is no limit on the type of properties (more details to come).", + "items": { + "$ref": "#/$defs/ServiceLevelAgreementProperty" + } + }, + "authoritativeDefinitions": { + "$ref": "#/$defs/AuthoritativeDefinitions" + }, + "customProperties": { + "$ref": "#/$defs/CustomProperties" + }, + "contractCreatedTs": { + "type": "string", + "format": "date-time", + "description": "Timestamp in UTC of when the data contract was created." + } + }, + "required": ["version", "apiVersion", "kind", "id", "status"], + "additionalProperties": false, + "$defs": { + "Server": { + "type": "object", + "description": "Data source details of where data is physically stored.", + "properties": { + "server": { + "type": "string", + "description": "Identifier of the server." + }, + "type": { + "type": "string", + "description": "Type of the server.", + "enum": [ + "api", "athena", "azure", "bigquery", "clickhouse", "databricks", "denodo", "dremio", + "duckdb", "glue", "cloudsql", "db2", "informix", "kafka", "kinesis", "local", + "mysql", "oracle", "postgresql", "postgres", "presto", "pubsub", + "redshift", "s3", "sftp", "snowflake", "sqlserver", "synapse", "trino", "vertica", "custom" + ] + }, + "description": { + "type": "string", + "description": "Description of the server." + }, + "environment": { + "type": "string", + "description": "Environment of the server.", + "examples": ["prod", "preprod", "dev", "uat"] + }, + "roles": { + "type": "array", + "description": "List of roles that have access to the server.", + "items": { + "$ref": "#/$defs/Role" + } + }, + "customProperties": { + "$ref": "#/$defs/CustomProperties" + } + }, + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "api" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/ApiServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "athena" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/AthenaServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "azure" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/AzureServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "bigquery" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/BigQueryServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "clickhouse" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/ClickHouseServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "databricks" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/DatabricksServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "denodo" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/DenodoServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "dremio" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/DremioServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "duckdb" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/DuckdbServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "glue" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/GlueServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "cloudsql" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/GoogleCloudSqlServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "db2" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/IBMDB2Server" + } + }, + { + "if": { + "properties": { + "type": { + "const": "informix" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/InformixServer" + } + }, + + { + "if": { + "properties": { + "type": { + "const": "custom" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/CustomServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "kafka" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/KafkaServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "kinesis" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/KinesisServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "local" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/LocalServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "mysql" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/MySqlServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "oracle" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/OracleServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "postgresql" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/PostgresServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "postgres" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/PostgresServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "presto" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/PrestoServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "pubsub" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/PubSubServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "redshift" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/RedshiftServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "s3" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/S3Server" + } + }, + { + "if": { + "properties": { + "type": { + "const": "sftp" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/SftpServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "snowflake" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/SnowflakeServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "sqlserver" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/SqlserverServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "synapse" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/SynapseServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "trino" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/TrinoServer" + } + }, + { + "if": { + "properties": { + "type": { + "const": "vertica" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/ServerSource/VerticaServer" + } + } + ], + "required": ["server", "type"] + }, + "ServerSource": { + "ApiServer": { + "type": "object", + "title": "AthenaServer", + "properties": { + "location": { + "type": "string", + "format": "uri", + "description": "The url to the API.", + "examples": [ + "https://api.example.com/v1" + ] + } + }, + "required": [ + "location" + ] + }, + "AthenaServer": { + "type": "object", + "title": "AthenaServer", + "properties": { + "stagingDir": { + "type": "string", + "format": "uri", + "description": "Amazon Athena automatically stores query results and metadata information for each query that runs in a query result location that you can specify in Amazon S3.", + "examples": [ + "s3://my_storage_account_name/my_container/path" + ] + }, + "schema": { + "type": "string", + "description": "Identify the schema in the data source in which your tables exist." + }, + "catalog": { + "type": "string", + "description": "Identify the name of the Data Source, also referred to as a Catalog.", + "default": "awsdatacatalog" + }, + "regionName": { + "type": "string", + "description": "The region your AWS account uses.", + "examples": ["eu-west-1"] + } + }, + "required": [ + "staging_dir", + "schema" + ] + }, + "AzureServer": { + "type": "object", + "title": "AzureServer", + "properties": { + "location": { + "type": "string", + "format": "uri", + "description": "Fully qualified path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs.", + "examples": [ + "az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet", + "abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet" + ] + }, + "format": { + "type": "string", + "enum": [ + "parquet", + "delta", + "json", + "csv" + ], + "description": "File format." + }, + "delimiter": { + "type": "string", + "enum": [ + "new_line", + "array" + ], + "description": "Only for format = json. How multiple json documents are delimited within one file" + } + }, + "required": [ + "location", + "format" + ] + }, + "BigQueryServer": { + "type": "object", + "title": "BigQueryServer", + "properties": { + "project": { + "type": "string", + "description": "The GCP project name." + }, + "dataset": { + "type": "string", + "description": "The GCP dataset name." + } + }, + "required": [ + "project", + "dataset" + ] + }, + "ClickHouseServer": { + "type": "object", + "title": "ClickHouseServer", + "properties": { + "host": { + "type": "string", + "description": "The host of the ClickHouse server." + }, + "port": { + "type": "integer", + "description": "The port to the ClickHouse server." + }, + "database": { + "type": "string", + "description": "The name of the database." + } + }, + "required": [ + "host", + "port", + "database" + ] + }, + "DatabricksServer": { + "type": "object", + "title": "DatabricksServer", + "properties": { + "host": { + "type": "string", + "description": "The Databricks host", + "examples": [ + "dbc-abcdefgh-1234.cloud.databricks.com" + ] + }, + "catalog": { + "type": "string", + "description": "The name of the Hive or Unity catalog" + }, + "schema": { + "type": "string", + "description": "The schema name in the catalog" + } + }, + "required": [ + "catalog", + "schema" + ] + }, + "DenodoServer": { + "type": "object", + "title": "DenodoServer", + "properties": { + "host": { + "type": "string", + "description": "The host of the Denodo server." + }, + "port": { + "type": "integer", + "description": "The port of the Denodo server." + }, + "database": { + "type": "string", + "description": "The name of the database." + } + }, + "required": [ + "host", + "port" + ] + }, + "DremioServer": { + "type": "object", + "title": "DremioServer", + "properties": { + "host": { + "type": "string", + "description": "The host of the Dremio server." + }, + "port": { + "type": "integer", + "description": "The port of the Dremio server." + }, + "schema": { + "type": "string", + "description": "The name of the schema." + } + }, + "required": [ + "host", + "port" + ] + }, + "DuckdbServer": { + "type": "object", + "title": "DuckdbServer", + "properties": { + "database": { + "type": "string", + "description": "Path to duckdb database file." + }, + "schema": { + "type": "integer", + "description": "The name of the schema." + } + }, + "required": [ + "database" + ] + }, + "GlueServer": { + "type": "object", + "title": "GlueServer", + "properties": { + "account": { + "type": "string", + "description": "The AWS Glue account", + "examples": [ + "1234-5678-9012" + ] + }, + "database": { + "type": "string", + "description": "The AWS Glue database name", + "examples": [ + "my_database" + ] + }, + "location": { + "type": "string", + "format": "uri", + "description": "The AWS S3 path. Must be in the form of a URL.", + "examples": [ + "s3://datacontract-example-orders-latest/data/{model}" + ] + }, + "format": { + "type": "string", + "description": "The format of the files", + "examples": [ + "parquet", + "csv", + "json", + "delta" + ] + } + }, + "required": [ + "account", + "database" + ] + }, + "GoogleCloudSqlServer": { + "type": "object", + "title": "GoogleCloudSqlServer", + "properties": { + "host": { + "type": "string", + "description": "The host of the Google Cloud Sql server." + }, + "port": { + "type": "integer", + "description": "The port of the Google Cloud Sql server." + }, + "database": { + "type": "string", + "description": "The name of the database." + }, + "schema": { + "type": "string", + "description": "The name of the schema." + } + }, + "required": [ + "host", + "port", + "database", + "schema" + ] + }, + "IBMDB2Server": { + "type": "object", + "title": "IBMDB2Server", + "properties": { + "host": { + "type": "string", + "description": "The host of the IBM DB2 server." + }, + "port": { + "type": "integer", + "description": "The port of the IBM DB2 server." + }, + "database": { + "type": "string", + "description": "The name of the database." + }, + "schema": { + "type": "string", + "description": "The name of the schema." + } + }, + "required": [ + "host", + "port", + "database" + ] + }, + "InformixServer": { + "type": "object", + "title": "InformixServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the Informix server. " + }, + "port": { + "type": "integer", + "description": "The port to the Informix server. Defaults to 9088." + }, + "database": { + "type": "string", + "description": "The name of the database." + } + }, + "required": [ + "host", + "database" + ] + }, + "CustomServer": { + "type": "object", + "title": "CustomServer", + "properties": { + "account": { + "type": "string", + "description": "Account used by the server." + }, + "catalog": { + "type": "string", + "description": "Name of the catalog." + }, + "database": { + "type": "string", + "description": "Name of the database." + }, + "dataset": { + "type": "string", + "description": "Name of the dataset." + }, + "delimiter": { + "type": "string", + "description": "Delimiter." + }, + "endpointUrl": { + "type": "string", + "description": "Server endpoint.", + "format": "uri" + }, + "format": { + "type": "string", + "description": "File format." + }, + "host": { + "type": "string", + "description": "Host name or IP address." + }, + "location": { + "type": "string", + "description": "A URL to a location.", + "format": "uri" + }, + "path": { + "type": "string", + "description": "Relative or absolute path to the data file(s)." + }, + "port": { + "type": "integer", + "description": "Port to the server. No default value is assumed for custom servers." + }, + "project": { + "type": "string", + "description": "Project name." + }, + "region": { + "type": "string", + "description": "Cloud region." + }, + "regionName": { + "type": "string", + "description": "Region name." + }, + "schema": { + "type": "string", + "description": "Name of the schema." + }, + "serviceName": { + "type": "string", + "description": "Name of the service." + }, + "stagingDir": { + "type": "string", + "description": "Staging directory." + }, + "warehouse": { + "type": "string", + "description": "Name of the cluster or warehouse." + } + } + }, + "KafkaServer": { + "type": "object", + "title": "KafkaServer", + "description": "Kafka Server", + "properties": { + "host": { + "type": "string", + "description": "The bootstrap server of the kafka cluster." + }, + "format": { + "type": "string", + "description": "The format of the messages.", + "examples": ["json", "avro", "protobuf", "xml"], + "default": "json" + } + }, + "required": [ + "host" + ] + }, + "KinesisServer": { + "type": "object", + "title": "KinesisDataStreamsServer", + "description": "Kinesis Data Streams Server", + "properties": { + "region": { + "type": "string", + "description": "AWS region.", + "examples": [ + "eu-west-1" + ] + }, + "format": { + "type": "string", + "description": "The format of the record", + "examples": [ + "json", + "avro", + "protobuf" + ] + } + } + }, + "LocalServer": { + "type": "object", + "title": "LocalServer", + "properties": { + "path": { + "type": "string", + "description": "The relative or absolute path to the data file(s).", + "examples": [ + "./folder/data.parquet", + "./folder/*.parquet" + ] + }, + "format": { + "type": "string", + "description": "The format of the file(s)", + "examples": [ + "json", + "parquet", + "delta", + "csv" + ] + } + }, + "required": [ + "path", + "format" + ] + }, + "MySqlServer": { + "type": "object", + "title": "MySqlServer", + "properties": { + "host": { + "type": "string", + "description": "The host of the MySql server." + }, + "port": { + "type": "integer", + "description": "The port of the MySql server." + }, + "database": { + "type": "string", + "description": "The name of the database." + } + }, + "required": [ + "host", + "port", + "database" + ] + }, + "OracleServer": { + "type": "object", + "title": "OracleServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the oracle server", + "examples": [ + "localhost" + ] + }, + "port": { + "type": "integer", + "description": "The port to the oracle server.", + "examples": [ + 1523 + ] + }, + "serviceName": { + "type": "string", + "description": "The name of the service.", + "examples": [ + "service" + ] + } + }, + "required": [ + "host", + "port", + "serviceName" + ] + }, + "PostgresServer": { + "type": "object", + "title": "PostgresServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the Postgres server" + }, + "port": { + "type": "integer", + "description": "The port to the Postgres server." + }, + "database": { + "type": "string", + "description": "The name of the database." + }, + "schema": { + "type": "string", + "description": "The name of the schema in the database." + } + }, + "required": [ + "host", + "port", + "database", + "schema" + ] + }, + "PrestoServer": { + "type": "object", + "title": "PrestoServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the Presto server", + "examples": [ + "localhost:8080" + ] + }, + "catalog": { + "type": "string", + "description": "The name of the catalog.", + "examples": [ + "postgres" + ] + }, + "schema": { + "type": "string", + "description": "The name of the schema.", + "examples": [ + "public" + ] + } + }, + "required": [ + "host" + ] + }, + "PubSubServer": { + "type": "object", + "title": "PubSubServer", + "properties": { + "project": { + "type": "string", + "description": "The GCP project name." + } + }, + "required": [ + "project" + ] + }, + "RedshiftServer": { + "type": "object", + "title": "RedshiftServer", + "properties": { + "host": { + "type": "string", + "description": "An optional string describing the server." + }, + "database": { + "type": "string", + "description": "The name of the database." + }, + "schema": { + "type": "string", + "description": "The name of the schema." + }, + "region": { + "type": "string", + "description": "AWS region of Redshift server.", + "examples": ["us-east-1"] + }, + "account": { + "type": "string", + "description": "The account used by the server." + } + }, + "required": [ + "database", + "schema" + ] + }, + "S3Server": { + "type": "object", + "title": "S3Server", + "properties": { + "location": { + "type": "string", + "format": "uri", + "description": "S3 URL, starting with `s3://`", + "examples": [ + "s3://datacontract-example-orders-latest/data/{model}/*.json" + ] + }, + "endpointUrl": { + "type": "string", + "format": "uri", + "description": "The server endpoint for S3-compatible servers.", + "examples": ["https://minio.example.com"] + }, + "format": { + "type": "string", + "enum": [ + "parquet", + "delta", + "json", + "csv" + ], + "description": "File format." + }, + "delimiter": { + "type": "string", + "enum": [ + "new_line", + "array" + ], + "description": "Only for format = json. How multiple json documents are delimited within one file" + } + }, + "required": [ + "location" + ] + }, + "SftpServer": { + "type": "object", + "title": "SftpServer", + "properties": { + "location": { + "type": "string", + "format": "uri", + "pattern": "^sftp://.*", + "description": "SFTP URL, starting with `sftp://`", + "examples": [ + "sftp://123.123.12.123/{model}/*.json" + ] + }, + "format": { + "type": "string", + "enum": [ + "parquet", + "delta", + "json", + "csv" + ], + "description": "File format." + }, + "delimiter": { + "type": "string", + "enum": [ + "new_line", + "array" + ], + "description": "Only for format = json. How multiple json documents are delimited within one file" + } + }, + "required": [ + "location" + ] + }, + "SnowflakeServer": { + "type": "object", + "title": "SnowflakeServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the Snowflake server" + }, + "port": { + "type": "integer", + "description": "The port to the Snowflake server." + }, + "account": { + "type": "string", + "description": "The Snowflake account used by the server." + }, + "database": { + "type": "string", + "description": "The name of the database." + }, + "schema": { + "type": "string", + "description": "The name of the schema." + }, + "warehouse": { + "type": "string", + "description": "The name of the cluster of resources that is a Snowflake virtual warehouse." + } + }, + "required": [ + "account", + "database", + "schema" + ] + }, + "SqlserverServer": { + "type": "object", + "title": "SqlserverServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the database server", + "examples": [ + "localhost" + ] + }, + "port": { + "type": "integer", + "description": "The port to the database server.", + "default": 1433, + "examples": [ + 1433 + ] + }, + "database": { + "type": "string", + "description": "The name of the database.", + "examples": [ + "database" + ] + }, + "schema": { + "type": "string", + "description": "The name of the schema in the database.", + "examples": [ + "dbo" + ] + } + }, + "required": [ + "host", + "database", + "schema" + ] + }, + "SynapseServer": { + "type": "object", + "title": "SynapseServer", + "properties": { + "host": { + "type": "string", + "description": "The host of the Synapse server." + }, + "port": { + "type": "integer", + "description": "The port of the Synapse server." + }, + "database": { + "type": "string", + "description": "The name of the database." + } + }, + "required": [ + "host", + "port", + "database" + ] + }, + "TrinoServer": { + "type": "object", + "title": "TrinoServer", + "properties": { + "host": { + "type": "string", + "description": "The Trino host URL.", + "examples": [ + "localhost" + ] + }, + "port": { + "type": "integer", + "description": "The Trino port." + }, + "catalog": { + "type": "string", + "description": "The name of the catalog.", + "examples": [ + "hive" + ] + }, + "schema": { + "type": "string", + "description": "The name of the schema in the database.", + "examples": [ + "my_schema" + ] + } + }, + "required": [ + "host", + "port", + "catalog", + "schema" + ] + }, + "VerticaServer": { + "type": "object", + "title": "VerticaServer", + "properties": { + "host": { + "type": "string", + "description": "The host of the Vertica server." + }, + "port": { + "type": "integer", + "description": "The port of the Vertica server." + }, + "database": { + "type": "string", + "description": "The name of the database." + }, + "schema": { + "type": "string", + "description": "The name of the schema." + } + }, + "required": [ + "host", + "port", + "database", + "schema" + ] + } + }, + "SchemaElement": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the element." + }, + "physicalType": { + "type": "string", + "description": "The physical element data type in the data source.", + "examples": ["table", "view", "topic", "file"] + }, + "description": { + "type": "string", + "description": "Description of the element." + }, + "businessName": { + "type": "string", + "description": "The business name of the element." + }, + "authoritativeDefinitions": { + "$ref": "#/$defs/AuthoritativeDefinitions" + }, + "tags": { + "$ref": "#/$defs/Tags" + }, + "customProperties": { + "$ref": "#/$defs/CustomProperties" + } + } + }, + "SchemaObject": { + "type": "object", + "properties": { + "logicalType": { + "type": "string", + "description": "The logical element data type.", + "enum": ["object"] + }, + "physicalName": { + "type": "string", + "description": "Physical name.", + "examples": ["table_1_2_0"] + }, + "dataGranularityDescription": { + "type": "string", + "description": "Granular level of the data in the object.", + "examples": ["Aggregation by country"] + }, + "properties": { + "type": "array", + "description": "A list of properties for the object.", + "items": { + "$ref": "#/$defs/SchemaProperty" + } + }, + "quality": { + "$ref": "#/$defs/DataQualityChecks" + } + }, + "allOf": [ + { + "$ref": "#/$defs/SchemaElement" + } + ], + "required": ["name"], + "unevaluatedProperties": false + }, + "SchemaBaseProperty": { + "type": "object", + "properties": { + "primaryKey": { + "type": "boolean", + "description": "Boolean value specifying whether the element is primary or not. Default is false." + }, + "primaryKeyPosition": { + "type": "integer", + "default": -1, + "description": "If element is a primary key, the position of the primary key element. Starts from 1. Example of `account_id, name` being primary key columns, `account_id` has primaryKeyPosition 1 and `name` primaryKeyPosition 2. Default to -1." + }, + "logicalType": { + "type": "string", + "description": "The logical element data type.", + "enum": ["string", "date", "number", "integer", "object", "array", "boolean"] + }, + "logicalTypeOptions": { + "type": "object", + "description": "Additional optional metadata to describe the logical type." + }, + "physicalType": { + "type": "string", + "description": "The physical element data type in the data source. For example, VARCHAR(2), DOUBLE, INT." + }, + "required": { + "type": "boolean", + "default": false, + "description": "Indicates if the element may contain Null values; possible values are true and false. Default is false." + }, + "unique": { + "type": "boolean", + "default": false, + "description": "Indicates if the element contains unique values; possible values are true and false. Default is false." + }, + "partitioned": { + "type": "boolean", + "default": false, + "description": "Indicates if the element is partitioned; possible values are true and false." + }, + "partitionKeyPosition": { + "type": "integer", + "default": -1, + "description": "If element is used for partitioning, the position of the partition element. Starts from 1. Example of `country, year` being partition columns, `country` has partitionKeyPosition 1 and `year` partitionKeyPosition 2. Default to -1." + }, + "classification": { + "type": "string", + "description": "Can be anything, like confidential, restricted, and public to more advanced categorization. Some companies like PayPal, use data classification indicating the class of data in the element; expected values are 1, 2, 3, 4, or 5.", + "examples": ["confidential", "restricted", "public"] + }, + "encryptedName": { + "type": "string", + "description": "The element name within the dataset that contains the encrypted element value. For example, unencrypted element `email_address` might have an encryptedName of `email_address_encrypt`." + }, + "transformSourceObjects": { + "type": "array", + "description": "List of objects in the data source used in the transformation.", + "items": { + "type": "string" + } + }, + "transformLogic": { + "type": "string", + "description": "Logic used in the element transformation." + }, + "transformDescription": { + "type": "string", + "description": "Describes the transform logic in very simple terms." + }, + "examples": { + "type": "array", + "description": "List of sample element values.", + "items": { + "$ref": "#/$defs/AnyType" + } + }, + "criticalDataElement": { + "type": "boolean", + "default": false, + "description": "True or false indicator; If element is considered a critical data element (CDE) then true else false." + }, + "quality": { + "$ref": "#/$defs/DataQualityChecks" + } + }, + "allOf": [ + { + "$ref": "#/$defs/SchemaElement" + }, + { + "if": { + "properties": { + "logicalType": { + "const": "string" + } + } + }, + "then": { + "properties": { + "logicalTypeOptions": { + "type": "object", + "properties": { + "minLength": { + "type": "integer", + "minimum": 0, + "description": "Minimum length of the string." + }, + "maxLength": { + "type": "integer", + "minimum": 0, + "description": "Maximum length of the string." + }, + "pattern": { + "type": "string", + "description": "Regular expression pattern to define valid value. Follows regular expression syntax from ECMA-262 (https://262.ecma-international.org/5.1/#sec-15.10.1)." + }, + "format": { + "type": "string", + "examples": ["password", "byte", "binary", "email", "uuid", "uri", "hostname", "ipv4", "ipv6"], + "description": "Provides extra context about what format the string follows." + } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "logicalType": { + "const": "date" + } + } + }, + "then": { + "properties": { + "logicalTypeOptions": { + "type": "object", + "properties": { + "format": { + "type": "string", + "examples": ["yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "HH:mm:ss"], + "description": "Format of the date. Follows the format as prescribed by [JDK DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html). For example, format 'yyyy-MM-dd'." + }, + "exclusiveMaximum": { + "type": "boolean", + "default": false, + "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)." + }, + "maximum": { + "type": "string", + "description": "All date values are less than or equal to this value (values <= maximum)." + }, + "exclusiveMinimum": { + "type": "boolean", + "default": false, + "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)." + }, + "minimum": { + "type": "string", + "description": "All date values are greater than or equal to this value (values >= minimum)." + } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { + "anyOf": [ + { + "properties": { + "logicalType": { + "const": "integer" + } + } + } + ] + }, + "then": { + "properties": { + "logicalTypeOptions": { + "type": "object", + "properties": { + "multipleOf": { + "type": "number", + "exclusiveMinimum": 0, + "description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5." + }, + "maximum": { + "type": "number", + "description": "All values are less than or equal to this value (values <= maximum)." + }, + "exclusiveMaximum": { + "type": "boolean", + "default": false, + "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)." + }, + "minimum": { + "type": "number", + "description": "All values are greater than or equal to this value (values >= minimum)." + }, + "exclusiveMinimum": { + "type": "boolean", + "default": false, + "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)." + }, + "format": { + "type": "string", + "default": "i32", + "description": "Format of the value in terms of how many bits of space it can use and whether it is signed or unsigned (follows the Rust integer types).", + "enum": ["i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", "u64", "u128"] + } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { + "anyOf": [ + { + "properties": { + "logicalType": { + "const": "number" + } + } + } + ] + }, + "then": { + "properties": { + "logicalTypeOptions": { + "type": "object", + "properties": { + "multipleOf": { + "type": "number", + "exclusiveMinimum": 0, + "description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5." + }, + "maximum": { + "type": "number", + "description": "All values are less than or equal to this value (values <= maximum)." + }, + "exclusiveMaximum": { + "type": "boolean", + "default": false, + "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)." + }, + "minimum": { + "type": "number", + "description": "All values are greater than or equal to this value (values >= minimum)." + }, + "exclusiveMinimum": { + "type": "boolean", + "default": false, + "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)." + }, + "format": { + "type": "string", + "default": "i32", + "description": "Format of the value in terms of how many bits of space it can use (follows the Rust float types).", + "enum": ["f32", "f64"] + } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "logicalType": { + "const": "object" + } + } + }, + "then": { + "properties": { + "logicalTypeOptions": { + "type": "object", + "properties": { + "maxProperties": { + "type": "integer", + "minimum": 0, + "description": "Maximum number of properties." + }, + "minProperties": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "Minimum number of properties." + }, + "required": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": true, + "description": "Property names that are required to exist in the object." + } + }, + "additionalProperties": false + }, + "properties": { + "type": "array", + "description": "A list of properties for the object.", + "items": { + "$ref": "#/$defs/SchemaProperty" + } + } + } + } + }, + { + "if": { + "properties": { + "logicalType": { + "const": "array" + } + } + }, + "then": { + "properties": { + "logicalTypeOptions": { + "type": "object", + "properties": { + "maxItems": { + "type": "integer", + "minimum": 0, + "description": "Maximum number of items." + }, + "minItems": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "Minimum number of items" + }, + "uniqueItems": { + "type": "boolean", + "default": false, + "description": "If set to true, all items in the array are unique." + } + }, + "additionalProperties": false + }, + "items": { + "$ref": "#/$defs/SchemaItemProperty", + "description": "List of items in an array (only applicable when `logicalType: array`)." + } + } + } + } + ] + }, + "SchemaProperty": { + "type": "object", + "$ref": "#/$defs/SchemaBaseProperty", + "required": ["name"], + "unevaluatedProperties": false + }, + "SchemaItemProperty": { + "type": "object", + "$ref": "#/$defs/SchemaBaseProperty", + "properties": { + "properties": { + "type": "array", + "description": "A list of properties for the object.", + "items": { + "$ref": "#/$defs/SchemaProperty" + } + } + }, + "unevaluatedProperties": false + }, + "Tags": { + "type": "array", + "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level.", + "items": { + "type": "string" + } + }, + "DataQuality": { + "type": "object", + "properties": { + "authoritativeDefinitions": { + "$ref": "#/$defs/AuthoritativeDefinitions" + }, + "businessImpact": { + "type": "string", + "description": "Consequences of the rule failure.", + "examples": ["operational", "regulatory"] + }, + "customProperties": { + "type": "array", + "description": "Additional properties required for rule execution.", + "items": { + "$ref": "#/$defs/CustomProperty" + } + }, + "description": { + "type": "string", + "description": "Describe the quality check to be completed." + }, + "dimension": { + "type": "string", + "description": "The key performance indicator (KPI) or dimension for data quality.", + "enum": ["accuracy", "completeness", "conformity", "consistency", "coverage", "timeliness", "uniqueness"] + }, + "method": { + "type": "string", + "examples": ["reconciliation"] + }, + "name": { + "type": "string", + "description": "Name of the data quality check." + }, + "schedule": { + "type": "string", + "description": "Rule execution schedule details.", + "examples": ["0 20 * * *"] + }, + "scheduler": { + "type": "string", + "description": "The name or type of scheduler used to start the data quality check.", + "examples": ["cron"] + }, + "severity": { + "type": "string", + "description": "The severance of the quality rule.", + "examples": ["info", "warning", "error"] + }, + "tags": { + "$ref": "#/$defs/Tags" + }, + "type": { + "type": "string", + "description": "The type of quality check. 'text' is human-readable text that describes the quality of the data. 'library' is a set of maintained predefined quality attributes such as row count or unique. 'sql' is an individual SQL query that returns a value that can be compared. 'custom' is quality attributes that are vendor-specific, such as Soda or Great Expectations.", + "enum": ["text", "library", "sql", "custom"], + "default": "library" + }, + "unit": { + "type": "string", + "description": "Unit the rule is using, popular values are `rows` or `percent`, but any value is allowed.", + "examples": ["rows", "percent"] + } + }, + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "library" + } + } + }, + "then": { + "$ref": "#/$defs/DataQualityLibrary" + } + }, + { + "if": { + "properties": { + "type": { + "const": "sql" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/DataQualitySql" + } + }, + { + "if": { + "properties": { + "type": { + "const": "custom" + } + }, + "required": ["type"] + }, + "then": { + "$ref": "#/$defs/DataQualityCustom" + } + } + ] + }, + "DataQualityChecks": { + "type": "array", + "description": "Data quality rules with all the relevant information for rule setup and execution.", + "items": { + "$ref": "#/$defs/DataQuality" + } + }, + "DataQualityLibrary": { + "type": "object", + "properties": { + "rule": { + "type": "string", + "description": "Define a data quality check based on the predefined rules as per ODCS.", + "examples": ["duplicateCount", "validValues", "rowCount"] + }, + "mustBe": { + "description": "Must be equal to the value to be valid. When using numbers, it is equivalent to '='." + }, + "mustNotBe": { + "description": "Must not be equal to the value to be valid. When using numbers, it is equivalent to '!='." + }, + "mustBeGreaterThan": { + "type": "number", + "description": "Must be greater than the value to be valid. It is equivalent to '>'." + }, + "mustBeGreaterOrEqualTo": { + "type": "number", + "description": "Must be greater than or equal to the value to be valid. It is equivalent to '>='." + }, + "mustBeLessThan": { + "type": "number", + "description": "Must be less than the value to be valid. It is equivalent to '<'." + }, + "mustBeLessOrEqualTo": { + "type": "number", + "description": "Must be less than or equal to the value to be valid. It is equivalent to '<='." + }, + "mustBeBetween": { + "type": "array", + "description": "Must be between the two numbers to be valid. Smallest number first in the array.", + "minItems": 2, + "maxItems": 2, + "uniqueItems": true, + "items": { + "type": "number" + } + }, + "mustNotBeBetween": { + "type": "array", + "description": "Must not be between the two numbers to be valid. Smallest number first in the array.", + "minItems": 2, + "maxItems": 2, + "uniqueItems": true, + "items": { + "type": "number" + } + } + }, + "required": ["rule"] + }, + "DataQualitySql": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Query string that adheres to the dialect of the provided server.", + "examples": ["SELECT COUNT(*) FROM ${table} WHERE ${column} IS NOT NULL"] + } + }, + "required": ["query"] + }, + "DataQualityCustom": { + "type": "object", + "properties": { + "engine": { + "type": "string", + "description": "Name of the engine which executes the data quality checks.", + "examples": ["soda", "great-expectations", "monte-carlo", "dbt"] + }, + "implementation": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + } + }, + "required": ["engine", "implementation"] + }, + "AuthoritativeDefinitions": { + "type": "array", + "description": "List of links to sources that provide more details on the dataset; examples would be a link to an external definition, a training video, a git repo, data catalog, or another tool. Authoritative definitions follow the same structure in the standard.", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "URL to the authority." + }, + "type": { + "type": "string", + "description": "Type of definition for authority: v2.3 adds standard values: `businessDefinition`, `transformationImplementation`, `videoTutorial`, `tutorial`, and `implementation`.", + "examples": ["businessDefinition", "transformationImplementation", "videoTutorial", "tutorial", "implementation"] + } + }, + "required": ["url", "type"] + } + }, + "Support": { + "type": "array", + "description": "Top level for support channels.", + "items": { + "$ref": "#/$defs/SupportItem" + } + }, + "SupportItem": { + "type": "object", + "properties": { + "channel": { + "type": "string", + "description": "Channel name or identifier." + }, + "url": { + "type": "string", + "description": "Access URL using normal [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax) (https, mailto, etc.)." + }, + "description": { + "type": "string", + "description": "Description of the channel, free text." + }, + "tool": { + "type": "string", + "description": "Name of the tool, value can be `email`, `slack`, `teams`, `discord`, `ticket`, or `other`.", + "examples": ["email", "slack", "teams", "discord", "ticket", "other"] + }, + "scope": { + "type": "string", + "description": "Scope can be: `interactive`, `announcements`, `issues`.", + "examples": ["interactive", "announcements", "issues"] + }, + "invitationUrl": { + "type": "string", + "description": "Some tools uses invitation URL for requesting or subscribing. Follows the [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax)." + } + }, + "required": ["channel", "url"] + }, + "Pricing": { + "type": "object", + "properties": { + "priceAmount": { + "type": "number", + "description": "Subscription price per unit of measure in `priceUnit`." + }, + "priceCurrency": { + "type": "string", + "description": "Currency of the subscription price in `price.priceAmount`." + }, + "priceUnit": { + "type": "string", + "description": "The unit of measure for calculating cost. Examples megabyte, gigabyte." + } + } + }, + "Team": { + "type": "object", + "properties": { + "username": { + "type": "string", + "description": "The user's username or email." + }, + "role": { + "type": "string", + "description": "The user's job role; Examples might be owner, data steward. There is no limit on the role." + }, + "dateIn": { + "type": "string", + "format": "date", + "description": "The date when the user joined the team." + }, + "dateOut": { + "type": "string", + "format": "date", + "description": "The date when the user ceased to be part of the team." + }, + "replacedByUsername": { + "type": "string", + "description": "The username of the user who replaced the previous user." + } + } + }, + "Role": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "Name of the IAM role that provides access to the dataset." + }, + "description": { + "type": "string", + "description": "Description of the IAM role and its permissions." + }, + "access": { + "type": "string", + "description": "The type of access provided by the IAM role." + }, + "firstLevelApprovers": { + "type": "string", + "description": "The name(s) of the first-level approver(s) of the role." + }, + "secondLevelApprovers": { + "type": "string", + "description": "The name(s) of the second-level approver(s) of the role." + }, + "customProperties": { + "$ref": "#/$defs/CustomProperties" + } + }, + "required": ["role"] + }, + "ServiceLevelAgreementProperty": { + "type": "object", + "properties": { + "property": { + "type": "string", + "description": "Specific property in SLA, check the periodic table. May requires units (more details to come)." + }, + "value": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + }, + { + "type": "integer" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "description": "Agreement value. The label will change based on the property itself." + }, + "valueExt": { + "$ref": "#/$defs/AnyNonCollectionType", + "description": "Extended agreement value. The label will change based on the property itself." + }, + "unit": { + "type": "string", + "description": "**d**, day, days for days; **y**, yr, years for years, etc. Units use the ISO standard." + }, + "element": { + "type": "string", + "description": "Element(s) to check on. Multiple elements should be extremely rare and, if so, separated by commas." + }, + "driver": { + "type": "string", + "description": "Describes the importance of the SLA from the list of: `regulatory`, `analytics`, or `operational`.", + "examples": ["regulatory", "analytics", "operational"] + } + }, + "required": ["property", "value"] + }, + "CustomProperties": { + "type": "array", + "description": "A list of key/value pairs for custom properties.", + "items": { + "$ref": "#/$defs/CustomProperty" + } + }, + "CustomProperty": { + "type": "object", + "properties": { + "property": { + "type": "string", + "description": "The name of the key. Names should be in camel caseā€“the same as if they were permanent properties in the contract." + }, + "value": { + "$ref": "#/$defs/AnyType", + "description": "The value of the key." + } + } + }, + "AnyType": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + }, + { + "type": "integer" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "AnyNonCollectionType": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + }, + { + "type": "integer" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] + } + } +} From f60b9d311cd15d50eb05b2db75b64b490c5e3931 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Sun, 22 Dec 2024 10:20:08 +0800 Subject: [PATCH 13/20] Run github action check on pull request --- .github/workflows/validate-examples.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/validate-examples.yaml b/.github/workflows/validate-examples.yaml index 9f66220..f4de07b 100644 --- a/.github/workflows/validate-examples.yaml +++ b/.github/workflows/validate-examples.yaml @@ -1,5 +1,7 @@ name: validate-examples on: + pull_request: + branches: ["*"] push: branches: ["*"] From 5886348cce0d56a5e160bc6c33aefac01e400dd4 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Sun, 22 Dec 2024 10:24:42 +0800 Subject: [PATCH 14/20] Update validate examples script to use version v3.0.1 --- src/script/validate-examples.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/script/validate-examples.sh b/src/script/validate-examples.sh index b7ac1ac..74b6a5f 100644 --- a/src/script/validate-examples.sh +++ b/src/script/validate-examples.sh @@ -7,7 +7,7 @@ LIGHT_BLUE='\033[1;34m' NC='\033[0m' script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -json_schema_version=${JSON_SCHEMA_VERSION:-v3.0.0} +json_schema_version=${JSON_SCHEMA_VERSION:-v3.0.1} num_failed_validation=0 echo "Checking if $json_schema_version JSON schema is valid" From 57ff39e1df9bf154e209b14132e9535eef59d646 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Sun, 22 Dec 2024 10:26:27 +0800 Subject: [PATCH 15/20] Update full example to use array of authoritativeDefinitions --- docs/examples/all/full-example.odcs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/examples/all/full-example.odcs.yaml b/docs/examples/all/full-example.odcs.yaml index 0d74681..a8a752b 100644 --- a/docs/examples/all/full-example.odcs.yaml +++ b/docs/examples/all/full-example.odcs.yaml @@ -11,8 +11,8 @@ description: limitations: Data based on seller perspective, no buyer information usage: Predict sales over time authoritativeDefinitions: - type: privacy-statement - url: https://example.com/gdpr.pdf + - type: privacy-statement + url: https://example.com/gdpr.pdf tenant: ClimateQuantumInc kind: DataContract From c07474af42237d7cfef5ed33e9e1328afa468b10 Mon Sep 17 00:00:00 2001 From: Diego Carvallo Date: Mon, 23 Dec 2024 00:02:38 +0000 Subject: [PATCH 16/20] add sample tags in contract readme examples Signed-off-by: Diego Carvallo --- docs/examples/all/full-example.odcs.yaml | 4 ++-- .../examples/all/postgresql-adventureworks-contract.odcs.yaml | 2 ++ docs/examples/quality/column-completeness.odcs.yaml | 4 +++- docs/examples/schema/all-schema-types.odcs.yaml | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/examples/all/full-example.odcs.yaml b/docs/examples/all/full-example.odcs.yaml index 3dfffe2..08961c0 100644 --- a/docs/examples/all/full-example.odcs.yaml +++ b/docs/examples/all/full-example.odcs.yaml @@ -35,7 +35,7 @@ schema: type: businessDefinition - url: https://youtu.be/jbY1BKFj9ec type: videoTutorial - tags: [ ] + tags: [ 'finance', 'payments'] dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id properties: - name: txn_ref_dt @@ -74,7 +74,7 @@ schema: partitioned: false partitionKeyPosition: -1 criticalDataElement: false - tags: [ ] + tags: [ 'uid' ] classification: restricted - name: rcvr_cntry_code primaryKey: false diff --git a/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml b/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml index 73950a6..faef388 100644 --- a/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml +++ b/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml @@ -82,6 +82,7 @@ schema: criticalDataElement: false primaryKey: false required: false + tag: ['sensitive'] - name: "loginid" logicalType: "string" physicalType: "varchar[256]" @@ -115,6 +116,7 @@ schema: criticalDataElement: false primaryKey: false required: false + tag: ['sensitive'] - name: "maritalstatus" logicalType: "string" physicalType: "bpchar" diff --git a/docs/examples/quality/column-completeness.odcs.yaml b/docs/examples/quality/column-completeness.odcs.yaml index f137d28..d2c50c9 100644 --- a/docs/examples/quality/column-completeness.odcs.yaml +++ b/docs/examples/quality/column-completeness.odcs.yaml @@ -13,6 +13,7 @@ schema: type: Reference definition on Data.gov dataGranularityDescription: Raw records physicalType: table + tags: ['nyc', 'regional'] properties: - name: UniqueID primaryKey: true @@ -24,4 +25,5 @@ schema: dimension: completeness severity: error rule: nullCheck - businessImpact: operational \ No newline at end of file + businessImpact: operational + \ No newline at end of file diff --git a/docs/examples/schema/all-schema-types.odcs.yaml b/docs/examples/schema/all-schema-types.odcs.yaml index 45b02fc..ef06b42 100644 --- a/docs/examples/schema/all-schema-types.odcs.yaml +++ b/docs/examples/schema/all-schema-types.odcs.yaml @@ -15,7 +15,7 @@ schema: type: businessDefinition - url: https://youtu.be/jbY1BKFj9ec type: videoTutorial - tags: [] + tags: ['finance'] dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id properties: - name: txn_ref_dt From 708dc46d6fb7fa153452df104c0fe5240a1facc2 Mon Sep 17 00:00:00 2001 From: Diego Carvallo Date: Mon, 23 Dec 2024 00:05:39 +0000 Subject: [PATCH 17/20] add tags to psql example Signed-off-by: Diego Carvallo --- .../examples/all/postgresql-adventureworks-contract.odcs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml b/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml index faef388..f62c62f 100644 --- a/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml +++ b/docs/examples/all/postgresql-adventureworks-contract.odcs.yaml @@ -82,7 +82,7 @@ schema: criticalDataElement: false primaryKey: false required: false - tag: ['sensitive'] + tags: ['sensitive'] - name: "loginid" logicalType: "string" physicalType: "varchar[256]" @@ -116,7 +116,7 @@ schema: criticalDataElement: false primaryKey: false required: false - tag: ['sensitive'] + tags: ['sensitive'] - name: "maritalstatus" logicalType: "string" physicalType: "bpchar" From e6c89c7bc35c2d076ee1c489c6563467335e0bdb Mon Sep 17 00:00:00 2001 From: Diego Carvallo Date: Mon, 23 Dec 2024 00:15:41 +0000 Subject: [PATCH 18/20] add tags to README & update tags description Signed-off-by: Diego Carvallo --- docs/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/README.md b/docs/README.md index 2905046..7e018a3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -59,7 +59,7 @@ description: limitations: null usage: null -tags: null +tags: ['finance'] ``` ### Definitions @@ -112,7 +112,7 @@ schema: type: businessDefinition - url: https://youtu.be/jbY1BKFj9ec type: videoTutorial - tags: null + tags: ['finance'] dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id properties: - name: txn_ref_dt @@ -222,7 +222,7 @@ schema: | businessName | Business Name | No | The business name of the element. | | authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the table; examples would be a link to an external definition, a training video, a GitHub repo, Collibra, or another tool. See `authoritativeDefinitions` below. | | quality | Quality | No | List of data quality attributes. | -| tags | Tags | No | A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. | +| tags | Tags | No | A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`. | | customProperties | Custom Properties | No | Custom properties that are not part of the standard. | #### Applicable to Objects From aa687830d52f74cea6efa739b6d5a7d79d5e5606 Mon Sep 17 00:00:00 2001 From: Diego Carvallo Date: Mon, 23 Dec 2024 15:19:36 +0000 Subject: [PATCH 19/20] update tag description to schema Signed-off-by: Diego Carvallo --- schema/odcs-json-schema-latest.json | 2 +- schema/odcs-json-schema-v3.0.0.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/schema/odcs-json-schema-latest.json b/schema/odcs-json-schema-latest.json index c469302..2d0d891 100644 --- a/schema/odcs-json-schema-latest.json +++ b/schema/odcs-json-schema-latest.json @@ -1912,7 +1912,7 @@ }, "Tags": { "type": "array", - "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level.", + "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`.", "items": { "type": "string" } diff --git a/schema/odcs-json-schema-v3.0.0.json b/schema/odcs-json-schema-v3.0.0.json index 1ecb880..65eaf40 100644 --- a/schema/odcs-json-schema-v3.0.0.json +++ b/schema/odcs-json-schema-v3.0.0.json @@ -1901,7 +1901,7 @@ }, "Tags": { "type": "array", - "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level.", + "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`.", "items": { "type": "string" } From e375f662b9eeaba0185c0a8448c171cb5d587786 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Fri, 3 Jan 2025 09:39:08 +0800 Subject: [PATCH 20/20] Updated changelog to reflect changes to JSON schema, moved tags updated description from v3.0.0 to v3.0.1 --- CHANGELOG.md | 9 +++++---- docs/README.md | 2 +- schema/odcs-json-schema-v3.0.0.json | 2 +- schema/odcs-json-schema-v3.0.1.json | 3 ++- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f802b3..b21aeda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,13 @@ This document tracks the history and evolution of the **Open Data Contract Stand # v3.0.1 - 2024-12-22 - APPROVED -* Added field `authoritativeDefinitions` -* Added field `description.customProperties` -* Added field `description.authoritativeDefinitions` -* Added field `role.customProperties` +* Added field `authoritativeDefinitions` into JSON schema +* Added field `description.customProperties` into JSON schema +* Added field `description.authoritativeDefinitions` into JSON schema +* Added field `role.customProperties` into JSON schema * Updated `status` field to include examples * Updated `authoritativeDefinitions` description to be vendor agnostic +* Updated `tags` description and included examples # v3.0.0 - 2024-10-21 - APPROVED diff --git a/docs/README.md b/docs/README.md index f86aefb..475af3a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -224,7 +224,7 @@ schema: | physicalName | Physical Name | No | Physical name. | | description | Description | No | Description of the element. | | businessName | Business Name | No | The business name of the element. | -| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the table; examples would be a link to an external definition, a training video, a GitHub repo, Collibra, or another tool. See `authoritativeDefinitions` below. | +| authoritativeDefinitions | Authoritative Definitions | No | List of links to sources that provide more details on the element; examples would be a link to privacy statement, terms and conditions, license agreements, data catalog, or another tool. | | quality | Quality | No | List of data quality attributes. | | tags | Tags | No | A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`. | | customProperties | Custom Properties | No | Custom properties that are not part of the standard. | diff --git a/schema/odcs-json-schema-v3.0.0.json b/schema/odcs-json-schema-v3.0.0.json index 65eaf40..1ecb880 100644 --- a/schema/odcs-json-schema-v3.0.0.json +++ b/schema/odcs-json-schema-v3.0.0.json @@ -1901,7 +1901,7 @@ }, "Tags": { "type": "array", - "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`.", + "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level.", "items": { "type": "string" } diff --git a/schema/odcs-json-schema-v3.0.1.json b/schema/odcs-json-schema-v3.0.1.json index 228115b..44ff99e 100644 --- a/schema/odcs-json-schema-v3.0.1.json +++ b/schema/odcs-json-schema-v3.0.1.json @@ -1912,7 +1912,8 @@ }, "Tags": { "type": "array", - "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level.", + "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`.", + "examples": ["finance", "sensitive", "employee_record"], "items": { "type": "string" }