Skip to content

Commit

Permalink
db: Support I/O schema v4.4
Browse files Browse the repository at this point in the history
  • Loading branch information
spbnick committed Jun 6, 2024
1 parent f07813d commit 6c9e9e3
Show file tree
Hide file tree
Showing 8 changed files with 767 additions and 6 deletions.
2 changes: 1 addition & 1 deletion kcidb/db/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import textwrap
from kcidb.db.schematic import Driver as SchematicDriver
from kcidb.db.bigquery.v04_02 import Schema as LatestSchema
from kcidb.db.bigquery.v04_03 import Schema as LatestSchema


class Driver(SchematicDriver):
Expand Down
190 changes: 190 additions & 0 deletions kcidb/db/bigquery/v04_03.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
"""Kernel CI report database - BigQuery schema v4.3"""

import logging
from google.cloud.bigquery.schema import SchemaField as Field
from google.cloud.bigquery.table import Table
from kcidb.misc import merge_dicts
import kcidb.io as io
from .v04_02 import Schema as PreviousSchema, TIMESTAMP_FIELD

# Module's logger
LOGGER = logging.getLogger(__name__)

# Git commit generation number field
GIT_COMMIT_GENERATION_FIELD = Field(
"git_commit_generation", "INTEGER",
description="The commit generation number",
)

# Revision ID fields
REVISION_ID_FIELDS = (
Field(
"git_commit_hash", "STRING",
description="The full commit hash of the checked out base "
"source code",
),
Field(
"patchset_hash", "STRING",
description="The patchset hash",
),
)


class Schema(PreviousSchema):
"""BigQuery database schema v4.3"""

# The schema's version.
version = (4, 3)
# The I/O schema the database schema supports
io = io.schema.V4_4

# A map of table names to their BigQuery schemas
TABLE_MAP = merge_dicts(PreviousSchema.TABLE_MAP, dict(
checkouts=PreviousSchema.TABLE_MAP["checkouts"] + [
GIT_COMMIT_GENERATION_FIELD,
],
transitions=[
TIMESTAMP_FIELD,
Field(
"id", "STRING",
description="Transition ID",
),
Field(
"version", "INTEGER",
description="Transition version number",
),
Field(
"origin", "STRING",
description="The name of the CI system which submitted "
"the transition",
),
Field(
"issue_id", "STRING",
description="ID of the transitioning issue",
),
Field(
"issue_version", "INTEGER",
description="Version number of the transitioning issue",
),
Field(
"appearance", "BOOL",
description="True if this is an issue appearance, "
"false if disappearance.",
),
Field(
"revision_before", "RECORD", fields=REVISION_ID_FIELDS,
description="ID of the last-known revision before the "
"transition"
),
Field(
"revision_after", "RECORD", fields=REVISION_ID_FIELDS,
description="ID of the first-known revision after the "
"transition",
),
Field(
"comment", "STRING",
description="A human-readable comment regarding the "
"transition",
),
Field(
"misc", "STRING",
description="Miscellaneous extra data about the "
"transition in JSON format",
),
],
))

# A map of table names and their "primary key" fields
KEYS_MAP = dict(
**PreviousSchema.KEYS_MAP,
transitions=("id", "version",),
)

# Queries for each type of raw object-oriented data
OO_QUERIES = merge_dicts(
PreviousSchema.OO_QUERIES,
checkout="SELECT\n"
" id,\n"
" git_commit_hash,\n"
# Implement this column
" git_commit_generation,\n"
" patchset_hash,\n"
" origin,\n"
" git_repository_url,\n"
" git_repository_branch,\n"
" tree_name,\n"
" message_id,\n"
" start_time,\n"
" log_url,\n"
" log_excerpt,\n"
" comment,\n"
" valid,\n"
" misc\n"
"FROM checkouts",
# Implement transitions
transition="SELECT\n"
" id,\n"
" version,\n"
" origin,\n"
" issue_id,\n"
" issue_version,\n"
" appearance,\n"
" revision_before.git_commit_hash AS "
"revision_before_git_commit_hash,\n"
" revision_before.patchset_hash AS "
"revision_before_patchset_hash,\n"
" revision_after.git_commit_hash AS "
"revision_after_git_commit_hash,\n"
" revision_after.patchset_hash AS "
"revision_after_patchset_hash,\n"
" comment,\n"
" misc\n"
"FROM (\n"
" SELECT\n"
" id,\n"
" origin,\n"
" issue_id,\n"
" issue_version,\n"
" appearance,\n"
" revision_before.git_commit_hash,\n"
" revision_before.patchset_hash,\n"
" revision_after.git_commit_hash,\n"
" revision_after.patchset_hash,\n"
" comment,\n"
" misc,\n"
" ROW_NUMBER() OVER (\n"
" PARTITION BY id\n"
" ORDER BY version DESC\n"
" ) AS precedence\n"
" FROM transitions\n"
")\n"
"WHERE precedence = 1",
)

@classmethod
def _inherit(cls, conn):
"""
Inerit the database data from the previous schema version (if any).
Args:
conn: Connection to the database to inherit. The database must
comply with the previous version of the schema.
"""
assert isinstance(conn, cls.Connection)
# Add the "git_commit_generation" field to _checkouts
conn.query_create(f"""
ALTER TABLE `_checkouts`
ADD COLUMN IF NOT EXISTS
`{GIT_COMMIT_GENERATION_FIELD.name}`
{GIT_COMMIT_GENERATION_FIELD.field_type}
OPTIONS(description={GIT_COMMIT_GENERATION_FIELD.description!r})
""").result()
# Update the checkouts view
view_ref = conn.dataset_ref.table('checkouts')
view = Table(view_ref)
view.view_query = cls._format_view_query(conn, 'checkouts')
conn.client.update_table(view, ["view_query"])
# Create new tables
for table_name in cls.TABLE_MAP:
if table_name not in PreviousSchema.TABLE_MAP:
cls._create_table(conn, table_name)
2 changes: 1 addition & 1 deletion kcidb/db/postgresql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import textwrap
from kcidb.db.schematic import Driver as SchematicDriver
from kcidb.db.postgresql.v04_06 import Schema as LatestSchema
from kcidb.db.postgresql.v04_07 import Schema as LatestSchema


class Driver(SchematicDriver):
Expand Down
175 changes: 175 additions & 0 deletions kcidb/db/postgresql/v04_07.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
"""Kernel CI report database - PostgreSQL schema v4.7"""

import kcidb.io as io
from kcidb.misc import merge_dicts
from kcidb.db.postgresql.schema import Index
from .v04_02 import TIMESTAMP_COLUMN
from .v04_06 import Schema as PreviousSchema
from .schema import \
Table, Constraint, BoolColumn, IntegerColumn, TextColumn, JSONColumn


# It's OK, pylint: disable=too-many-ancestors
class Schema(PreviousSchema):
"""PostgreSQL database schema v4.7"""

# The schema's version.
version = (4, 7)
# The I/O schema the database schema supports
io = io.schema.V4_4

# A map of table names and Table constructor arguments
# For use by descendants
TABLES_ARGS = merge_dicts(
PreviousSchema.TABLES_ARGS,
checkouts=dict(
columns=merge_dicts(
PreviousSchema.TABLES_ARGS["checkouts"]["columns"],
git_commit_generation=IntegerColumn(),
),
),
transitions=dict(
columns={
"_timestamp": TIMESTAMP_COLUMN,
"id": TextColumn(constraint=Constraint.NOT_NULL),
"version": IntegerColumn(constraint=Constraint.NOT_NULL),
"origin": TextColumn(constraint=Constraint.NOT_NULL),
"issue_id": TextColumn(constraint=Constraint.NOT_NULL),
"issue_version":
IntegerColumn(constraint=Constraint.NOT_NULL),
"appearance": BoolColumn(),
"revision_before.git_commit_hash": TextColumn(),
"revision_before.patchset_hash": TextColumn(),
"revision_after.git_commit_hash": TextColumn(),
"revision_after.patchset_hash": TextColumn(),
"comment": TextColumn(),
"misc": JSONColumn(),
},
primary_key=["id", "version"]
),
)

# A map of table names and schemas
TABLES = {
name: Table(**args) for name, args in TABLES_ARGS.items()
}

# A map of index names and schemas
INDEXES = merge_dicts(PreviousSchema.INDEXES, dict(
checkouts_git_commit_generation=Index(
"checkouts", ["git_commit_generation"]
),
transitions__timestamp=Index("transitions", ["_timestamp"]),
transitions_origin=Index("transitions", ["origin"]),
transitions_issue_id=Index("transitions", ["issue_id"]),
transitions_revision_before_git_commit_hash_patchset_hash=Index(
"transitions",
["revision_before_git_commit_hash",
"revision_before_patchset_hash"]
),
transitions_revision_after_git_commit_hash_patchset_hash=Index(
"transitions",
["revision_after_git_commit_hash",
"revision_after_patchset_hash"]
),
))

# Queries and their columns for each type of raw object-oriented data.
# Both should have columns in the same order.
OO_QUERIES = merge_dicts(
PreviousSchema.OO_QUERIES,
checkout=merge_dicts(
PreviousSchema.OO_QUERIES["checkout"],
statement="SELECT\n"
" id,\n"
" git_commit_hash,\n"
# Implement this column
" git_commit_generation,\n"
" patchset_hash,\n"
" origin,\n"
" git_repository_url,\n"
" git_repository_branch,\n"
" tree_name,\n"
" message_id,\n"
" start_time,\n"
" log_url,\n"
" log_excerpt,\n"
" comment,\n"
" valid,\n"
" misc\n"
"FROM checkouts",
),
# Implement transitions
transition=merge_dicts(
PreviousSchema.OO_QUERIES["transition"],
statement="SELECT\n"
" id,\n"
" version,\n"
" origin,\n"
" issue_id,\n"
" issue_version,\n"
" appearance,\n"
" revision_before_git_commit_hash,\n"
" revision_before_patchset_hash,\n"
" revision_after_git_commit_hash,\n"
" revision_after_patchset_hash,\n"
" comment,\n"
" misc\n"
"FROM (\n"
" SELECT\n"
" id,\n"
" version,\n"
" origin,\n"
" issue_id,\n"
" issue_version,\n"
" appearance,\n"
" revision_before_git_commit_hash,\n"
" revision_before_patchset_hash,\n"
" revision_after_git_commit_hash,\n"
" revision_after_patchset_hash,\n"
" comment,\n"
" misc,\n"
" ROW_NUMBER() OVER (\n"
" PARTITION BY id\n"
" ORDER BY version DESC\n"
" ) AS precedence\n"
" FROM transitions\n"
") AS prioritized_transitions\n"
"WHERE precedence = 1",
),
)

@classmethod
def _inherit(cls, conn):
"""
Inerit the database data from the previous schema version (if any).
Args:
conn: Connection to the database to inherit. The database must
comply with the previous version of the schema.
"""
assert isinstance(conn, cls.Connection)
with conn, conn.cursor() as cursor:
# Add the git_commit_generation column to checkouts
column = cls.TABLES["checkouts"].columns["git_commit_generation"]
cursor.execute(f"""
ALTER TABLE checkouts ADD COLUMN {column.format_def()}
""")
# Create new tables
for table_name, table_schema in cls.TABLES.items():
if table_name not in PreviousSchema.TABLES:
try:
cursor.execute(table_schema.format_create(table_name))
except Exception as exc:
raise Exception(
f"Failed creating table {table_name!r}"
) from exc
# Create new indexes
for index_name, index_schema in cls.INDEXES.items():
if index_name not in PreviousSchema.INDEXES:
try:
cursor.execute(index_schema.format_create(index_name))
except Exception as exc:
raise Exception(
f"Failed creating index {index_name!r}"
) from exc
2 changes: 1 addition & 1 deletion kcidb/db/sqlite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import textwrap
from kcidb.db.schematic import Driver as SchematicDriver
from kcidb.db.sqlite.v04_02 import Schema as LatestSchema
from kcidb.db.sqlite.v04_03 import Schema as LatestSchema


class Driver(SchematicDriver):
Expand Down
Loading

0 comments on commit 6c9e9e3

Please sign in to comment.