From 3e4236c54dd05a73fd9364c1ebbf21272594c529 Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Sun, 3 Nov 2024 13:43:43 -0600 Subject: [PATCH] feat: include dates for first/last events to repo from Github API in code metrics marts (#2346) * chore: updating staging model * chore: update staging schema * feat(dbt): include first/last updates to repo * feat(dbt): add first/last update to code metrics mart * fix: trailing whitespace --- .../metrics/int_code_metrics_by_project.sql | 4 ++++ .../metrics/int_repo_metrics_by_project.sql | 8 +++++++- .../metrics/code_metrics_by_project_v1.sql | 2 ++ .../stg_ossd__current_repositories.sql | 6 +++--- .../oss-directory/stg_ossd__schema.yml | 20 ++++++++++++++++++- 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql index c6390401f..91a38565e 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql @@ -130,6 +130,8 @@ repos as ( select project_id, artifact_source as event_source, + MIN(created_at) as first_created_at_date, + MAX(updated_at) as last_updated_at_date, MIN(first_commit_time) as first_commit_date, MAX(last_commit_time) as last_commit_date, COUNT(distinct artifact_id) as repository_count, @@ -172,6 +174,8 @@ select project_metadata.project_name, project_metadata.display_name, project_metadata.event_source, + code_metrics.first_created_at_date, + code_metrics.last_updated_at_date, code_metrics.first_commit_date, code_metrics.last_commit_date, code_metrics.repository_count, diff --git a/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql index dae01926f..0977b73bc 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql @@ -13,6 +13,8 @@ with repo_artifact as ( license_spdx_id, language, watcher_count, + created_at, + updated_at, CAST(id as STRING) as artifact_source_id, LOWER(owner) as artifact_namespace, LOWER(name) as artifact_name @@ -29,7 +31,9 @@ repo_snapshot as ( is_fork, fork_count, star_count, - watcher_count + watcher_count, + created_at, + updated_at from repo_artifact as a ), @@ -75,6 +79,8 @@ select distinct repo_snapshot.watcher_count, repo_snapshot.language, repo_snapshot.license_spdx_id, + repo_snapshot.created_at, + repo_snapshot.updated_at, repo_stats.first_commit_time, repo_stats.last_commit_time, repo_stats.days_with_commits_count, diff --git a/warehouse/dbt/models/marts/metrics/code_metrics_by_project_v1.sql b/warehouse/dbt/models/marts/metrics/code_metrics_by_project_v1.sql index ffff0264e..034cbdd51 100644 --- a/warehouse/dbt/models/marts/metrics/code_metrics_by_project_v1.sql +++ b/warehouse/dbt/models/marts/metrics/code_metrics_by_project_v1.sql @@ -12,6 +12,8 @@ select display_name, event_source, repository_count, + first_created_at_date, + last_updated_at_date, first_commit_date, last_commit_date, star_count, diff --git a/warehouse/dbt/models/staging/oss-directory/stg_ossd__current_repositories.sql b/warehouse/dbt/models/staging/oss-directory/stg_ossd__current_repositories.sql index b8dab5b15..b81e91b93 100644 --- a/warehouse/dbt/models/staging/oss-directory/stg_ossd__current_repositories.sql +++ b/warehouse/dbt/models/staging/oss-directory/stg_ossd__current_repositories.sql @@ -17,9 +17,9 @@ with ranked_repositories as ( license_name, license_spdx_id, language, - ingestion_time, created_at, updated_at, + ingestion_time, ROW_NUMBER() over (partition by node_id order by ingestion_time desc, id asc) as row_num @@ -41,8 +41,8 @@ select license_name, license_spdx_id, language, - ingestion_time, created_at, - updated_at + updated_at, + ingestion_time from ranked_repositories where row_num = 1 diff --git a/warehouse/dbt/models/staging/oss-directory/stg_ossd__schema.yml b/warehouse/dbt/models/staging/oss-directory/stg_ossd__schema.yml index 01e3add75..3a2e77e1e 100644 --- a/warehouse/dbt/models/staging/oss-directory/stg_ossd__schema.yml +++ b/warehouse/dbt/models/staging/oss-directory/stg_ossd__schema.yml @@ -134,6 +134,24 @@ models: - &is_fork name: is_fork description: "is this repo a fork?" + - &license_name + name: license_name + description: "license name" + - &license_spdx_id + name: license_spdx_id + description: "license spdx id" + - &language + name: language + description: "programming language used in the repository" + - &created_at + name: created_at + description: "repository creation timestamp" + - &updated_at + name: updated_at + description: "repository last update timestamp" + - &ingestion_time + name: ingestion_time + description: "timestamp when the data was ingested" - name: stg_ossd__missing_sbom meta: #... @@ -148,4 +166,4 @@ models: description: "artifact source, currently only GITHUB" - *artifact_url - name: snapshot_at - description: "snapshot time" + description: "snapshot time" \ No newline at end of file