-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: get repos from graphql #120
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,7 +36,10 @@ | |
|
||
METRICS_PREFIX = "services.torngit.github" | ||
|
||
GITHUB_REPO_COUNT_QUERY = """ | ||
|
||
class GitHubGraphQLQueries(object): | ||
_queries = dict( | ||
REPO_TOTALCOUNT=""" | ||
query { | ||
viewer { | ||
repositories( | ||
|
@@ -47,11 +50,61 @@ | |
} | ||
} | ||
} | ||
""" | ||
""", | ||
REPOS_FROM_NODEIDS=""" | ||
query GetReposFromNodeIds($node_ids: [ID!]!) { | ||
nodes(ids: $node_ids) { | ||
__typename | ||
... on Repository { | ||
# databaseId == service_id | ||
databaseId | ||
name | ||
primaryLanguage { | ||
name | ||
} | ||
isPrivate | ||
defaultBranchRef { | ||
name | ||
} | ||
owner { | ||
# This ID is actually the node_id, not the ownerid | ||
id | ||
login | ||
} | ||
Comment on lines
+69
to
+73
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need a separate query for getting owner information or could we include databaseId here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have included if it was possible, but that is not a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. RIP |
||
} | ||
} | ||
} | ||
""", | ||
OWNER_FROM_NODEID=""" | ||
query GetOwnerFromNodeId($node_id: ID!) { | ||
node(id: $node_id) { | ||
__typename | ||
... on Organization { | ||
login | ||
databaseId | ||
} | ||
... on User { | ||
login | ||
databaseId | ||
} | ||
} | ||
} | ||
""", | ||
) | ||
|
||
def get(self, query_name: str) -> Optional[str]: | ||
return self._queries.get(query_name, None) | ||
|
||
def prepare(self, query_name: str, variables: dict) -> Optional[dict]: | ||
# If Query was an object we could validate the variables | ||
query = self.get(query_name) | ||
if query is not None: | ||
return {"query": query, "variables": variables} | ||
|
||
|
||
class Github(TorngitBaseAdapter): | ||
service = "github" | ||
graphql = GitHubGraphQLQueries() | ||
urls = dict( | ||
repo="{username}/{name}", | ||
owner="{username}", | ||
|
@@ -587,11 +640,92 @@ async def _fetch_number_of_repos(self, client, token): | |
client, | ||
"post", | ||
"/graphql", | ||
body=dict(query=GITHUB_REPO_COUNT_QUERY), | ||
body=dict(query=self.graphql.get("REPO_TOTALCOUNT")), | ||
token=token, | ||
) | ||
return res["data"]["viewer"]["repositories"]["totalCount"] | ||
|
||
async def _get_owner_from_nodeid(self, client, token, owner_node_id: str): | ||
query = self.graphql.prepare( | ||
"OWNER_FROM_NODEID", variables={"node_id": owner_node_id} | ||
) | ||
res = await self.api( | ||
client, | ||
"post", | ||
"/graphql", | ||
body=query, | ||
token=token, | ||
) | ||
owner_data = res["data"]["node"] | ||
return {"username": owner_data["login"], "service_id": owner_data["databaseId"]} | ||
|
||
async def get_repos_from_nodeids_generator( | ||
self, repo_node_ids: List[str], expected_owner_username, *, token=None | ||
): | ||
"""Gets a list of repos from github graphQL API when the node_ids for the repos are known. | ||
Also gets the owner info (also from graphQL API) if the owner is not the expected one. | ||
The expected owner is one we are sure to have the info for available. | ||
|
||
Couldn't find how to use pagination with this endpoint, so we will implement it ourselves | ||
believing that the max number of node_ids we can use is 100. | ||
""" | ||
token = self.get_token_by_type_if_none(token, TokenType.read) | ||
owners_seen = dict() | ||
async with self.get_client() as client: | ||
max_index = len(repo_node_ids) | ||
curr_index = 0 | ||
PAGE_SIZE = 100 | ||
while curr_index < max_index: | ||
chunk = repo_node_ids[curr_index : curr_index + PAGE_SIZE] | ||
curr_index += PAGE_SIZE | ||
query = self.graphql.prepare( | ||
"REPOS_FROM_NODEIDS", variables={"node_ids": chunk} | ||
) | ||
res = await self.api( | ||
client, | ||
"post", | ||
"/graphql", | ||
body=query, | ||
token=token, | ||
) | ||
for raw_repo_data in res["data"]["nodes"]: | ||
if ( | ||
raw_repo_data is None | ||
or raw_repo_data["__typename"] != "Repository" | ||
): | ||
continue | ||
primary_language = raw_repo_data.get("primaryLanguage") | ||
default_branch = raw_repo_data.get("defaultBranchRef") | ||
repo = { | ||
"service_id": raw_repo_data["databaseId"], | ||
"name": raw_repo_data["name"], | ||
"language": self._validate_language( | ||
primary_language.get("name") if primary_language else None | ||
), | ||
"private": raw_repo_data["isPrivate"], | ||
"branch": default_branch.get("name") | ||
if default_branch | ||
else None, | ||
"owner": { | ||
"node_id": raw_repo_data["owner"]["id"], | ||
"username": raw_repo_data["owner"]["login"], | ||
}, | ||
} | ||
is_expected_owner = ( | ||
repo["owner"]["username"] == expected_owner_username | ||
) | ||
if not is_expected_owner: | ||
ownerid = repo["owner"]["node_id"] | ||
if ownerid not in owners_seen: | ||
owner_info = await self._get_owner_from_nodeid( | ||
client, token, ownerid | ||
) | ||
owners_seen[ownerid] = owner_info | ||
repo["owner"] = {**repo["owner"], **owners_seen[ownerid]} | ||
|
||
repo["owner"]["is_expected_owner"] = is_expected_owner | ||
yield repo | ||
|
||
async def list_repos_using_installation(self, username=None): | ||
""" | ||
returns list of repositories included in this integration | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
interactions: | ||
- request: | ||
body: '{"query": "\nquery GetReposFromNodeIds($node_ids: [ID!]!) {\n nodes(ids: | ||
$node_ids) {\n __typename \n ... on Repository {\n # | ||
databaseId == service_id\n databaseId\n name\n primaryLanguage | ||
{\n name\n }\n isPrivate\n defaultBranchRef | ||
{\n name\n }\n owner {\n # | ||
This ID is actually the node_id, not the ownerid\n id\n login\n }\n }\n }\n}\n", | ||
matt-codecov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"variables": {"node_ids": ["R_kgDOHrbKcg", "R_kgDOLEJx2g"]}}' | ||
headers: | ||
accept: | ||
- application/json | ||
accept-encoding: | ||
- gzip, deflate | ||
connection: | ||
- keep-alive | ||
content-length: | ||
- '613' | ||
content-type: | ||
- application/json | ||
host: | ||
- api.github.com | ||
user-agent: | ||
- Default | ||
method: POST | ||
uri: https://api.github.com/graphql | ||
response: | ||
content: '{"data":{"nodes":[{"__typename":"Repository","databaseId":515295858,"name":"example-python","primaryLanguage":{"name":"Shell"},"isPrivate":false,"defaultBranchRef":{"name":"master"},"owner":{"id":"U_kgDOBZOfKw","login":"codecove2e"}},{"__typename":"Repository","databaseId":742552026,"name":"test-no-languages","primaryLanguage":null,"isPrivate":false,"defaultBranchRef":null,"owner":{"id":"U_kgDOBZOfKw","login":"codecove2e"}}]}}' | ||
headers: | ||
Access-Control-Allow-Origin: | ||
- '*' | ||
Access-Control-Expose-Headers: | ||
- ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, | ||
X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, | ||
X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO, | ||
X-GitHub-Request-Id, Deprecation, Sunset | ||
Content-Encoding: | ||
- gzip | ||
Content-Security-Policy: | ||
- default-src 'none' | ||
Content-Type: | ||
- application/json; charset=utf-8 | ||
Date: | ||
- Tue, 06 Feb 2024 13:21:07 GMT | ||
Referrer-Policy: | ||
- origin-when-cross-origin, strict-origin-when-cross-origin | ||
Server: | ||
- GitHub.com | ||
Strict-Transport-Security: | ||
- max-age=31536000; includeSubdomains; preload | ||
Transfer-Encoding: | ||
- chunked | ||
Vary: | ||
- Accept-Encoding, Accept, X-Requested-With | ||
X-Accepted-OAuth-Scopes: | ||
- repo | ||
X-Content-Type-Options: | ||
- nosniff | ||
X-Frame-Options: | ||
- deny | ||
X-GitHub-Media-Type: | ||
- github.v4 | ||
X-GitHub-Request-Id: | ||
- C11E:116D76:8B8D4:94D71:65C23242 | ||
X-OAuth-Scopes: | ||
- repo | ||
X-RateLimit-Limit: | ||
- '5000' | ||
X-RateLimit-Remaining: | ||
- '4997' | ||
X-RateLimit-Reset: | ||
- '1707227531' | ||
X-RateLimit-Resource: | ||
- graphql | ||
X-RateLimit-Used: | ||
- '3' | ||
X-XSS-Protection: | ||
- '0' | ||
http_version: HTTP/1.1 | ||
status_code: 200 | ||
version: 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does github let you fetch the whole list without pagination or will it paginate for you with some default page size?
codecov/engineering-team#139 has an example of pagination with github's graphql api if we need to add it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a very good question that I probably should have put in the PR description.
I spent quite some time trying to get pagination in this query. From the docs it indicates you can specify a
first
,last
and getpageInfo
cursors.But from the explorer docs the
nodes
query doesn't include the inputs forfirst
andlast
(I tried, didn't work) AND theRepostory
object doesn't includepageInfo
as a possible field to query. For that reason I don't think there's pagination? At least I couldn't find a way to paginate the request.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update: I decided to implement simple pagination from our side just in case.