diff --git a/.github/workflows/commit-access-review.py b/.github/workflows/commit-access-review.py new file mode 100644 index 0000000000000..97589c138c0b7 --- /dev/null +++ b/.github/workflows/commit-access-review.py @@ -0,0 +1,418 @@ +#!/usr/bin/env python3 +# ===-- commit-access-review.py --------------------------------------------===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===------------------------------------------------------------------------===# +# +# ===------------------------------------------------------------------------===# + +import datetime +import github +import re +import requests +import time +import sys +import re + + +class User: + THRESHOLD = 5 + + def __init__(self, name, triage_list): + self.name = name + self.authored = 0 + self.merged = 0 + self.reviewed = 0 + self.triage_list = triage_list + + def add_authored(self, val=1): + self.authored += val + if self.meets_threshold(): + print(self.name, "meets the threshold with authored commits") + del self.triage_list[self.name] + + def set_authored(self, val): + self.authored = 0 + self.add_authored(val) + + def add_merged(self, val=1): + self.merged += val + if self.meets_threshold(): + print(self.name, "meets the threshold with merged commits") + del self.triage_list[self.name] + + def add_reviewed(self, val=1): + self.reviewed += val + if self.meets_threshold(): + print(self.name, "meets the threshold with reviewed commits") + del self.triage_list[self.name] + + def get_total(self): + return self.authored + self.merged + self.reviewed + + def meets_threshold(self): + return self.get_total() >= self.THRESHOLD + + def __repr__(self): + return "{} : a: {} m: {} r: {}".format( + self.name, self.authored, self.merged, self.reviewed + ) + + +def run_graphql_query( + query: str, variables: dict, token: str, retry: bool = True +) -> dict: + """ + This function submits a graphql query and returns the results as a + dictionary. + """ + s = requests.Session() + retries = requests.adapters.Retry(total=8, backoff_factor=2, status_forcelist=[504]) + s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries)) + + headers = { + "Authorization": "bearer {}".format(token), + # See + # https://github.blog/2021-11-16-graphql-global-id-migration-update/ + "X-Github-Next-Global-ID": "1", + } + request = s.post( + url="https://api.github.com/graphql", + json={"query": query, "variables": variables}, + headers=headers, + ) + + rate_limit = request.headers.get("X-RateLimit-Remaining") + print(rate_limit) + if rate_limit and int(rate_limit) < 10: + reset_time = int(request.headers["X-RateLimit-Reset"]) + while reset_time - int(time.time()) > 0: + time.sleep(60) + print( + "Waiting until rate limit reset", + reset_time - int(time.time()), + "seconds remaining", + ) + + if request.status_code == 200: + if "data" not in request.json(): + print(request.json()) + sys.exit(1) + return request.json()["data"] + elif retry: + return run_graphql_query(query, variables, token, False) + else: + raise Exception( + "Failed to run graphql query\nquery: {}\nerror: {}".format( + query, request.json() + ) + ) + + +def check_manual_requests(start_date: datetime.datetime, token: str) -> list[str]: + """ + Return a list of users who have been asked since ``start_date`` if they + want to keep their commit access. + """ + query = """ + query ($query: String!) { + search(query: $query, type: ISSUE, first: 100) { + nodes { + ... on Issue { + body + comments (first: 100) { + nodes { + author { + login + } + } + } + } + } + } + } + """ + formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S") + variables = { + "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infrastructure:commit-access" + } + + data = run_graphql_query(query, variables, token) + users = [] + for issue in data["search"]["nodes"]: + users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])]) + + return users + + +def get_num_commits(user: str, start_date: datetime.datetime, token: str) -> int: + """ + Get number of commits that ``user`` has been made since ``start_date`. + """ + variables = { + "owner": "llvm", + "user": user, + "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%S"), + } + + user_query = """ + query ($user: String!) { + user(login: $user) { + id + } + } + """ + + data = run_graphql_query(user_query, variables, token) + variables["user_id"] = data["user"]["id"] + + query = """ + query ($owner: String!, $user_id: ID!, $start_date: GitTimestamp!){ + organization(login: $owner) { + teams(query: "llvm-committers" first:1) { + nodes { + repositories { + nodes { + ref(qualifiedName: "main") { + target { + ... on Commit { + history(since: $start_date, author: {id: $user_id }) { + totalCount + } + } + } + } + } + } + } + } + } + } + """ + count = 0 + data = run_graphql_query(query, variables, token) + for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]: + count += int(repo["ref"]["target"]["history"]["totalCount"]) + if count >= User.THRESHOLD: + break + return count + + +def is_new_committer_query_repo( + user: str, start_date: datetime.datetime, token: str +) -> bool: + """ + Determine if ``user`` is a new committer. A new committer can keep their + commit access even if they don't meet the criteria. + """ + variables = { + "user": user, + } + + user_query = """ + query ($user: String!) { + user(login: $user) { + id + } + } + """ + + data = run_graphql_query(user_query, variables, token) + variables["owner"] = "llvm" + variables["user_id"] = data["user"]["id"] + variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S") + + query = """ + query ($owner: String!, $user_id: ID!){ + organization(login: $owner) { + repository(name: "llvm-project") { + ref(qualifiedName: "main") { + target { + ... on Commit { + history(author: {id: $user_id }, first: 5) { + nodes { + committedDate + } + } + } + } + } + } + } + } + """ + + data = run_graphql_query(query, variables, token) + repo = data["organization"]["repository"] + commits = repo["ref"]["target"]["history"]["nodes"] + if len(commits) == 0: + return True + committed_date = commits[-1]["committedDate"] + if datetime.datetime.strptime(committed_date, "%Y-%m-%dT%H:%M:%SZ") < start_date: + return False + return True + + +def is_new_committer(user: str, start_date: datetime.datetime, token: str) -> bool: + """ + Wrapper around is_new_commiter_query_repo to handle exceptions. + """ + try: + return is_new_committer_query_repo(user, start_date, token) + except: + pass + return True + + +def get_review_count(user: str, start_date: datetime.datetime, token: str) -> int: + """ + Return the number of reviews that ``user`` has done since ``start_date``. + """ + query = """ + query ($query: String!) { + search(query: $query, type: ISSUE, first: 5) { + issueCount + } + } + """ + formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S") + variables = { + "owner": "llvm", + "repo": "llvm-project", + "user": user, + "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm", + } + + data = run_graphql_query(query, variables, token) + return int(data["search"]["issueCount"]) + + +def count_prs(triage_list: dict, start_date: datetime.datetime, token: str): + """ + Fetch all the merged PRs for the project since ``start_date`` and update + ``triage_list`` with the number of PRs merged for each user. + """ + + query = """ + query ($query: String!, $after: String) { + search(query: $query, type: ISSUE, first: 100, after: $after) { + issueCount, + nodes { + ... on PullRequest { + author { + login + } + mergedBy { + login + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + """ + date_begin = start_date + date_end = None + while date_begin < datetime.datetime.now(): + date_end = date_begin + datetime.timedelta(days=7) + formatted_date_begin = date_begin.strftime("%Y-%m-%dT%H:%M:%S") + formatted_date_end = date_end.strftime("%Y-%m-%dT%H:%M:%S") + variables = { + "query": f"type:pr is:merged merged:{formatted_date_begin}..{formatted_date_end} org:llvm", + } + has_next_page = True + while has_next_page: + print(variables) + data = run_graphql_query(query, variables, token) + for pr in data["search"]["nodes"]: + # Users can be None if the user has been deleted. + if not pr["author"]: + continue + author = pr["author"]["login"] + if author in triage_list: + triage_list[author].add_authored() + + if not pr["mergedBy"]: + continue + merger = pr["mergedBy"]["login"] + if author == merger: + continue + if merger not in triage_list: + continue + triage_list[merger].add_merged() + + has_next_page = data["search"]["pageInfo"]["hasNextPage"] + if has_next_page: + variables["after"] = data["search"]["pageInfo"]["endCursor"] + date_begin = date_end + + +def main(): + token = sys.argv[1] + gh = github.Github(login_or_token=token) + org = gh.get_organization("llvm") + repo = org.get_repo("llvm-project") + team = org.get_team_by_slug("llvm-committers") + one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365) + triage_list = {} + for member in team.get_members(): + triage_list[member.login] = User(member.login, triage_list) + + print("Start:", len(triage_list), "triagers") + # Step 0 Check if users have requested commit access in the last year. + for user in check_manual_requests(one_year_ago, token): + if user in triage_list: + print(user, "requested commit access in the last year.") + del triage_list[user] + print("After Request Check:", len(triage_list), "triagers") + + # Step 1 count all PRs authored or merged + count_prs(triage_list, one_year_ago, token) + + print("After PRs:", len(triage_list), "triagers") + + if len(triage_list) == 0: + sys.exit(0) + + # Step 2 check for reviews + for user in list(triage_list.keys()): + review_count = get_review_count(user, one_year_ago, token) + triage_list[user].add_reviewed(review_count) + + print("After Reviews:", len(triage_list), "triagers") + + if len(triage_list) == 0: + sys.exit(0) + + # Step 3 check for number of commits + for user in list(triage_list.keys()): + num_commits = get_num_commits(user, one_year_ago, token) + # Override the total number of commits to not double count commits and + # authored PRs. + triage_list[user].set_authored(num_commits) + + print("After Commits:", len(triage_list), "triagers") + + # Step 4 check for new committers + for user in list(triage_list.keys()): + print("Checking", user) + if is_new_committer(user, one_year_ago, token): + print("Removing new committer: ", user) + del triage_list[user] + + print("Complete:", len(triage_list), "triagers") + + with open("triagers.log", "w") as triagers_log: + for user in triage_list: + print(triage_list[user].__repr__()) + triagers_log.write(user + "\n") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/commit-access-review.yml b/.github/workflows/commit-access-review.yml new file mode 100644 index 0000000000000..f9195a1863dee --- /dev/null +++ b/.github/workflows/commit-access-review.yml @@ -0,0 +1,34 @@ +name: Commit Access Review + +on: + workflow_dispatch: + schedule: + # * is a special character in YAML so you have to quote this string + - cron: '0 7 1 * *' + +permissions: + contents: read + +jobs: + commit-access-review: + if: github.repository_owner == 'llvm' + runs-on: ubuntu-22.04 + steps: + - name: Fetch LLVM sources + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + pip install --require-hashes -r ./llvm/utils/git/requirements.txt + + - name: Run Script + env: + GITHUB_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }} + run: | + python3 .github/workflows/commit-access-review.py $GITHUB_TOKEN + + - name: Upload Triage List + uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0 + with: + name: triagers + path: triagers.log