Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support v4.4 I/O schema #529

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ name: test
types: [test]

env:
KCIDB_IO_URL: kcidb-io@git+https://github.com/kernelci/kcidb-io.git@${{ github.event.client_payload.kcidb_io_ref || 'main' }}
KCIDB_IO_URL: kcidb-io@git+https://github.com/kernelci/kcidb-io.git@${{ github.event.client_payload.kcidb_io_ref || 'add_schema_v4_4' }}

jobs:
check_python:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -e -u -o pipefail

if [ -n "$NO_VERIFY" ]; then
if [ -n "${NO_VERIFY:-}" ]; then
echo "Skipping pre-commit hook" >&2
exit 0
fi
Expand Down
2 changes: 1 addition & 1 deletion kcidb/db/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import textwrap
from kcidb.db.schematic import Driver as SchematicDriver
from kcidb.db.bigquery.v04_02 import Schema as LatestSchema
from kcidb.db.bigquery.v04_03 import Schema as LatestSchema


class Driver(SchematicDriver):
Expand Down
15 changes: 15 additions & 0 deletions kcidb/db/bigquery/v04_00.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ class Schema(AbstractSchema):
checkout="SELECT\n"
" id,\n"
" git_commit_hash,\n"
" NULL AS git_commit_generation,\n"
" patchset_hash,\n"
" origin,\n"
" git_repository_url,\n"
Expand Down Expand Up @@ -584,6 +585,20 @@ class Schema(AbstractSchema):
' "" AS comment,\n'
' "" AS misc\n'
'FROM UNNEST([])',
transition='SELECT\n'
' "" AS id,\n'
' 0 AS version,\n'
' "" AS origin,\n'
' "" AS issue_id,\n'
' 0 AS issue_version,\n'
' FALSE AS appearance,\n'
' "" AS revision_before_git_commit_hash,\n'
' "" AS revision_before_patchset_hash,\n'
' "" AS revision_after_git_commit_hash,\n'
' "" AS revision_after_patchset_hash,\n'
' "" AS comment,\n'
' "" AS misc\n'
'FROM UNNEST([])',
)

@classmethod
Expand Down
6 changes: 5 additions & 1 deletion kcidb/db/bigquery/v04_02.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from google.cloud.bigquery.schema import SchemaField as Field
from google.cloud.bigquery.table import Table
import kcidb.io as io
from kcidb.misc import merge_dicts
from .v04_01 import Schema as PreviousSchema

# Module's logger
Expand Down Expand Up @@ -36,7 +37,10 @@ class Schema(PreviousSchema):
# A map of table names to the dictionary of fields and the names of their
# aggregation function, if any (the default is "ANY_VALUE").
AGGS_MAP = {
name: {TIMESTAMP_FIELD.name: "MAX"}
name: merge_dicts(
PreviousSchema.AGGS_MAP.get(name, {}),
{TIMESTAMP_FIELD.name: "MAX"}
)
for name in TABLE_MAP
}

Expand Down
196 changes: 196 additions & 0 deletions kcidb/db/bigquery/v04_03.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
"""Kernel CI report database - BigQuery schema v4.3"""

import logging
from google.cloud.bigquery.schema import SchemaField as Field
from google.cloud.bigquery.table import Table
from kcidb.misc import merge_dicts
import kcidb.io as io
from .v04_02 import Schema as PreviousSchema, TIMESTAMP_FIELD

# Module's logger
LOGGER = logging.getLogger(__name__)

# Git commit generation number field
GIT_COMMIT_GENERATION_FIELD = Field(
"git_commit_generation", "INTEGER",
description="The commit generation number",
)

# Revision ID fields
REVISION_ID_FIELDS = (
Field(
"git_commit_hash", "STRING",
description="The full commit hash of the checked out base "
"source code",
),
Field(
"patchset_hash", "STRING",
description="The patchset hash",
),
)


class Schema(PreviousSchema):
"""BigQuery database schema v4.3"""

# The schema's version.
version = (4, 3)
# The I/O schema the database schema supports
io = io.schema.V4_4

# A map of table names to their BigQuery schemas
TABLE_MAP = merge_dicts(PreviousSchema.TABLE_MAP, dict(
checkouts=PreviousSchema.TABLE_MAP["checkouts"] + [
GIT_COMMIT_GENERATION_FIELD,
],
transitions=[
TIMESTAMP_FIELD,
Field(
"id", "STRING",
description="Transition ID",
),
Field(
"version", "INTEGER",
description="Transition version number",
),
Field(
"origin", "STRING",
description="The name of the CI system which submitted "
"the transition",
),
Field(
"issue_id", "STRING",
description="ID of the transitioning issue",
),
Field(
"issue_version", "INTEGER",
description="Version number of the transitioning issue",
),
Field(
"appearance", "BOOL",
description="True if this is an issue appearance, "
"false if disappearance.",
),
Field(
"revision_before", "RECORD", fields=REVISION_ID_FIELDS,
description="ID of the last-known revision before the "
"transition"
),
Field(
"revision_after", "RECORD", fields=REVISION_ID_FIELDS,
description="ID of the first-known revision after the "
"transition",
),
Field(
"comment", "STRING",
description="A human-readable comment regarding the "
"transition",
),
Field(
"misc", "STRING",
description="Miscellaneous extra data about the "
"transition in JSON format",
),
],
))

# A map of table names and their "primary key" fields
KEYS_MAP = dict(
**PreviousSchema.KEYS_MAP,
transitions=("id", "version",),
)

# A map of table names to the dictionary of fields and the names of their
# aggregation function, if any (the default is "ANY_VALUE").
AGGS_MAP = merge_dicts(
PreviousSchema.AGGS_MAP,
dict(transitions={TIMESTAMP_FIELD.name: "MAX"})
)

# Queries for each type of raw object-oriented data
OO_QUERIES = merge_dicts(
PreviousSchema.OO_QUERIES,
checkout="SELECT\n"
" id,\n"
" git_commit_hash,\n"
# Implement this column
" git_commit_generation,\n"
" patchset_hash,\n"
" origin,\n"
" git_repository_url,\n"
" git_repository_branch,\n"
" tree_name,\n"
" message_id,\n"
" start_time,\n"
" log_url,\n"
" log_excerpt,\n"
" comment,\n"
" valid,\n"
" misc\n"
"FROM checkouts",
# Implement transitions
transition="SELECT\n"
" id,\n"
" version,\n"
" origin,\n"
" issue_id,\n"
" issue_version,\n"
" appearance,\n"
" revision_before.git_commit_hash AS "
"revision_before_git_commit_hash,\n"
" revision_before.patchset_hash AS "
"revision_before_patchset_hash,\n"
" revision_after.git_commit_hash AS "
"revision_after_git_commit_hash,\n"
" revision_after.patchset_hash AS "
"revision_after_patchset_hash,\n"
" comment,\n"
" misc\n"
"FROM (\n"
" SELECT\n"
" id,\n"
" version,\n"
" origin,\n"
" issue_id,\n"
" issue_version,\n"
" appearance,\n"
" revision_before,\n"
" revision_after,\n"
" comment,\n"
" misc,\n"
" ROW_NUMBER() OVER (\n"
" PARTITION BY id\n"
" ORDER BY version DESC\n"
" ) AS precedence\n"
" FROM transitions\n"
")\n"
"WHERE precedence = 1",
)

@classmethod
def _inherit(cls, conn):
"""
Inerit the database data from the previous schema version (if any).

Args:
conn: Connection to the database to inherit. The database must
comply with the previous version of the schema.
"""
assert isinstance(conn, cls.Connection)
# Add the "git_commit_generation" field to _checkouts
conn.query_create(f"""
ALTER TABLE `_checkouts`
ADD COLUMN IF NOT EXISTS
`{GIT_COMMIT_GENERATION_FIELD.name}`
{GIT_COMMIT_GENERATION_FIELD.field_type}
OPTIONS(description={GIT_COMMIT_GENERATION_FIELD.description!r})
""").result()
# Update the checkouts view
view_ref = conn.dataset_ref.table('checkouts')
view = Table(view_ref)
view.view_query = cls._format_view_query(conn, 'checkouts')
conn.client.update_table(view, ["view_query"])
# Create new tables
for table_name in cls.TABLE_MAP:
if table_name not in PreviousSchema.TABLE_MAP:
cls._create_table(conn, table_name)
2 changes: 1 addition & 1 deletion kcidb/db/postgresql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import textwrap
from kcidb.db.schematic import Driver as SchematicDriver
from kcidb.db.postgresql.v04_06 import Schema as LatestSchema
from kcidb.db.postgresql.v04_07 import Schema as LatestSchema


class Driver(SchematicDriver):
Expand Down
32 changes: 32 additions & 0 deletions kcidb/db/postgresql/v04_00.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ class Schema(AbstractSchema):
statement="SELECT\n"
" id,\n"
" git_commit_hash,\n"
" NULL AS git_commit_generation,\n"
" patchset_hash,\n"
" origin,\n"
" git_repository_url,\n"
Expand All @@ -320,6 +321,7 @@ class Schema(AbstractSchema):
schema=Table(dict(
id=TextColumn(),
git_commit_hash=TextColumn(),
git_commit_generation=IntegerColumn(),
patchset_hash=TextColumn(),
origin=TextColumn(),
git_repository_url=TextColumn(),
Expand Down Expand Up @@ -478,6 +480,36 @@ class Schema(AbstractSchema):
misc=JSONColumn(),
)),
),
transition=dict(
statement="SELECT\n"
" NULL AS id,\n"
" NULL AS version,\n"
" NULL AS origin,\n"
" NULL AS issue_id,\n"
" NULL AS issue_version,\n"
" NULL AS appearance,\n"
" NULL AS revision_before_git_commit_hash,\n"
" NULL AS revision_before_patchset_hash,\n"
" NULL AS revision_after_git_commit_hash,\n"
" NULL AS revision_after_patchset_hash,\n"
" NULL AS comment,\n"
" NULL AS misc\n"
"WHERE FALSE",
schema=Table(dict(
id=TextColumn(),
version=IntegerColumn(),
origin=TextColumn(),
issue_id=TextColumn(),
issue_version=IntegerColumn(),
appearance=BoolColumn(),
revision_before_git_commit_hash=TextColumn(),
revision_before_patchset_hash=TextColumn(),
revision_after_git_commit_hash=TextColumn(),
revision_after_patchset_hash=TextColumn(),
comment=TextColumn(),
misc=JSONColumn(),
)),
),
)

def init(self):
Expand Down
Loading