Skip to content

Commit

Permalink
Experimental deeper checks for SH runner.
Browse files Browse the repository at this point in the history
  • Loading branch information
AdnaneKhan committed Jul 22, 2023
1 parent 80d5ec8 commit 3d43b49
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 17 deletions.
5 changes: 5 additions & 0 deletions gato/enumerate/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ def print_repo_runner_info(repository: Repository):
f"{Output.bright(repository.accessible_runners[0].machine_name)}"
)

for runner in repository.accessible_runners:
if runner.non_ephemeral:
Output.owned("The repository contains a non-ephemeral self-hosted runner!")
break

if repository.runners:
Output.result(
f"The repository has {len(repository.runners)} repo-level"
Expand Down
14 changes: 8 additions & 6 deletions gato/enumerate/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,16 @@ def __perform_runlog_enumeration(self, repository: Repository):
"""
runner_detected = False
wf_runs = self.api.retrieve_run_logs(
repository.name, short_circuit=True
repository.name, short_circuit=False
)

if wf_runs:
runner = Runner(
wf_runs[0]['runner_name'], wf_runs[0]['machine_name']
)
for wf_run in wf_runs:
runner = Runner(
wf_run['runner_name'], wf_run['machine_name'], non_ephemeral=wf_run['non_ephemeral']
)

repository.add_accessible_runner(runner)
repository.add_accessible_runner(runner)
runner_detected = True

return runner_detected
Expand Down Expand Up @@ -79,7 +80,8 @@ def __perform_yml_enumeration(self, repository: Repository):
# At this point we only know the extension, so handle and
# ignore malformed yml files.
except Exception as parse_error:
print(parse_error)

print(f"{wf}: {str(parse_error)}")
logger.warning("Attmpted to parse invalid yaml!")

return runner_wfs
Expand Down
30 changes: 21 additions & 9 deletions gato/github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class Api():
rate limiting or network issues.
"""

RUNNER_RE = re.compile(r'Runner name: \'([\w+-]+)\'')
MACHINE_RE = re.compile(r'Machine name: \'([\w+-]+)\'')
RUNNER_RE = re.compile(r'Runner name: \'([\w+-.]+)\'')
MACHINE_RE = re.compile(r'Machine name: \'([\w+-.]+)\'')

def __init__(self, pat: str, version: str = "2022-11-28",
http_proxy: str = None, socks_proxy: str = None,
Expand Down Expand Up @@ -127,12 +127,18 @@ def __process_run_log(self, log_content: bytes, run_info: dict):
matches = Api.MACHINE_RE.search(content)
hostname = matches.group(1) if matches else None

if "Cleaning the repository" in content:
ephemeral = "Yes"
else:
ephemeral = "Unknown"

log_package = {
"setup_log": content,
"runner_name": runner_name,
"machine_name": hostname,
"run_id": run_info["id"],
"run_attempt": run_info["run_attempt"]
"run_attempt": run_info["run_attempt"],
"non_ephemeral": ephemeral
}
return log_package

Expand Down Expand Up @@ -601,30 +607,36 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
Returns:
list: List of run logs for runs that ran on self-hosted runners.
"""
runs = self.call_get(f'/repos/{repo_name}/actions/runs')
runs = self.call_get(f'/repos/{repo_name}/actions/runs', params={"per_page": "100"})

run_logs = []
# This is a dictionary so we can de-duplicate runner IDs based on
# the machine_name:runner_name.
run_logs = {}

if runs.status_code == 200:
logger.debug(f'Enumerating runs within {repo_name}')
for run in runs.json()['workflow_runs']:
run_log = self.call_get(
f'/repos/{repo_name}/actions/runs/{run["id"]}/'
f'attempts/{run["run_attempt"]}/logs')

if run_log.status_code == 200:
run_log = self.__process_run_log(run_log.content, run)
if run_log:
run_logs.append(run_log)
key = f"{run_log['machine_name']}:{run_log['runner_name']}"
run_logs[key] = run_log

if short_circuit:
return run_logs
return run_logs.values()
elif run_log.status_code == 410:
print("can't get")
break
else:
logger.debug(
f"Call to retrieve run logs from {repo_name} run "
f"{run['id']} attempt {run['run_attempt']} returned "
f"{run_log.status_code}!")

return run_logs
return run_logs.values()

def parse_workflow_runs(self, repo_name: str):
"""Returns the number of workflow runs associated with the repository.
Expand Down
7 changes: 5 additions & 2 deletions gato/models/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ def __init__(
machine_name=None,
os=None,
status=None,
labels=[]):
labels=[],
non_ephemeral=False):
"""Constructor for runner wrapper object.
Args:
Expand All @@ -27,6 +28,7 @@ def __init__(
self.os = os
self.status = status
self.labels = labels
self.non_ephemeral = non_ephemeral

def toJSON(self):
"""Converts the repository to a Gato JSON representation.
Expand All @@ -37,7 +39,8 @@ def toJSON(self):
else "Unknown",
"os": self.os if self.os else "Unknown",
"status": self.status if self.status else "Unknown",
"labels": [label for label in self.labels]
"labels": [label for label in self.labels],
"non_ephemeral": self.non_ephemeral
}

return representation
56 changes: 56 additions & 0 deletions gato/workflow_parser/workflow_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import yaml
from pathlib import Path
import os
import re

logger = logging.getLogger(__name__)

Expand All @@ -16,6 +17,27 @@ class WorkflowParser():
as the project grows in capability.
"""

GITHUB_HOSTED_LABELS = [
'ubuntu-latest',
'macos-latest',
'macOS-latest',
'windows-latest',
'ubuntu-18.04', # deprecated, but we don't want false positives on older repos.
'ubuntu-20.04',
'ubuntu-22.04',
'windows-2022',
'windows-2019',
'windows-2016', # deprecated, but we don't want false positives on older repos.
'macos-11',
'macos-12',
'macos-13',
'macos-13-xl',
'macos-12',
]

LARGER_RUNNER_REGEX_LIST = r'(windows|ubuntu)-(22.04|20.04|2019-2022)-(4|8|16|32|64)core-(16|32|64|128|256)gb'
MATRIX_KEY_EXTRACTION_REGEX = r'{{\s*matrix\.([\w-]+)\s*}}'

def __init__(self, workflow_yml: str, repo_name: str, workflow_name: str):
"""Initialize class with workflow file.
Expand Down Expand Up @@ -59,8 +81,42 @@ def self_hosted(self):
for jobname, job_details in self.parsed_yml['jobs'].items():
if 'runs-on' in job_details:
runs_on = job_details['runs-on']
# Clear cut
if 'self-hosted' in runs_on:
sh_jobs.append((jobname, job_details))
elif 'matrix.' in runs_on:
# We need to check each OS in the matrix strategy.
# Extract the matrix key from the variable
matrix_key = re.search(self.MATRIX_KEY_EXTRACTION_REGEX, runs_on).group(1)
# Check if strategy exists in the yaml file
if 'strategy' in job_details and 'matrix' in job_details['strategy']:
matrix = job_details['strategy']['matrix']

# Use previously acquired key to retrieve list of OSes
os_list = matrix[matrix_key]

# We only need ONE to be self hosted, others can be
# GitHub hosted
for key in os_list:
if key not in self.GITHUB_HOSTED_LABELS and not re.match(self.LARGER_RUNNER_REGEX_LIST, key):
sh_jobs.append((jobname, job_details))
break
pass
else:
if type(runs_on) == list:
for label in runs_on:
if label in self.GITHUB_HOSTED_LABELS:
break
if re.match(self.LARGER_RUNNER_REGEX_LIST, label):
break
else:
sh_jobs.append((jobname, job_details))
elif type(runs_on) == str:
if runs_on in self.GITHUB_HOSTED_LABELS:
break
if re.match(self.LARGER_RUNNER_REGEX_LIST, runs_on):
break
sh_jobs.append((jobname, job_details))

return sh_jobs

Expand Down

0 comments on commit 3d43b49

Please sign in to comment.