Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to output a new config file #63

Merged
merged 11 commits into from
Sep 24, 2024
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,15 @@ this returns something like:
omics-run-analyzer: wrote run-1234567.json
```

#### Output optimized configuration
> [!WARNING]
> Currently this feature only supports Nextflow workflows.

The `--write-config` option will write a new configuration file with the `recommendedCpus` and `recommendedMemoryGiB` as the resource requirements. This will take the maximum values if the task is run multiple times with different inputs.

```bash
python -m omics.cli.run_analyzer 123456 --write-config=optimized.config
```
## Security

See [CONTRIBUTING](https://github.com/awslabs/amazon-omics-tools/blob/main/CONTRIBUTING.md#security-issue-notifications) for more information.
Expand Down
28 changes: 26 additions & 2 deletions omics/cli/run_analyzer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
[--out=<path>]
[--plot=<directory>]
[--headroom=<float>]
[--write-config=<path>]
[--help]

Options:
Expand All @@ -24,6 +25,7 @@
-o, --out=<path> Write output to file
-P, --plot=<directory> Plot a run timeline to a directory
-H, --headroom=<float> Adds a fractional buffer to the size of recommended memory and CPU. Values must be between 0.0 and 1.0.
-c, --write-config=<path> Output a config file with recommended resources (Nextflow only)
-h, --help Show help text

Examples:
Expand Down Expand Up @@ -54,8 +56,8 @@
import dateutil.utils
import docopt
from bokeh.plotting import output_file

from . import timeline # type: ignore
from . import writeconfig

exename = os.path.basename(sys.argv[0])
OMICS_LOG_GROUP = "/aws/omics/WorkflowLog"
Expand Down Expand Up @@ -410,7 +412,6 @@ def get_timeline_event(res, resources):
"running": (time3 - time2).total_seconds(),
}


if __name__ == "__main__":
# Parse command-line options
opts = docopt.docopt(__doc__)
Expand Down Expand Up @@ -522,11 +523,33 @@ def tocsv(val):

writer = csv.writer(out, lineterminator="\n")
writer.writerow(formatted_headers)
config = {}

for res in resources:
add_metrics(res, resources, pricing, headroom)
metrics = res.get("metrics", {})
if res['type'] == 'run':
omics = session.client("omics")
wfid = res['workflow'].split('/')[-1]
engine = omics.get_workflow(id=wfid)['engine']
if res['type'] == 'task':
task_name = writeconfig.get_base_task(engine, res['name'])
if task_name not in config.keys():
config[task_name] ={
'cpus': metrics['recommendedCpus'],
'mem': metrics['recommendedMemoryGiB']
}
else:
config[task_name] ={
'cpus': max(metrics['recommendedCpus'], config[task_name]['cpus']),
'mem': max(metrics['recommendedMemoryGiB'], config[task_name]['mem'])
}
row = [tocsv(metrics.get(h, res.get(h))) for h in hdrs]
writer.writerow(row)

if opts["--write-config"]:
filename = opts['--write-config']
writeconfig.create_config(engine, config, filename)
if opts["--out"]:
sys.stderr.write(f"{exename}: wrote {opts['--out']}\n")
if opts["--plot"]:
Expand Down Expand Up @@ -558,3 +581,4 @@ def tocsv(val):
title = f"arn: {run['arn']}, name: {run.get('name')}"

timeline.plot_timeline(resources, title=title, max_duration_hrs=run_duration_hrs)

32 changes: 32 additions & 0 deletions omics/cli/run_analyzer/writeconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import textwrap

def create_config(engine, task_resources, filename):
if engine == 'NEXTFLOW':
with open(filename, 'w') as out:
for task in task_resources:
task_string = textwrap.dedent(f"""
withName: {task} {{
cpu = {task_resources[task]['cpus']}
memory = {task_resources[task]['mem']}
}}
""")
out.write(task_string)

elif engine == 'CWL':
raise ValueError("--write-config does not currently support CWL workflows")
elif engine == 'WDL':
raise ValueError("--write-config does not currently support WDL workflows")
else:
raise ValueError("Unknown workflow engine")

def get_base_task(engine, task):
# Returns the base task name
if engine == 'NEXTFLOW':
individual_task = task.split(" ")[0]
return individual_task
elif engine == 'CWL':
return task
elif engine == 'WDL':
return task
else:
raise ValueError("Unknown workflow engine")
Empty file added tests/run_analyzer/__init__.py
Empty file.
14 changes: 14 additions & 0 deletions tests/run_analyzer/unit/test_writeconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import unittest
from omics.cli.run_analyzer import writeconfig

class TestGetBaseTask(unittest.TestCase):
def test_get_base_task_nextflow(self):
result = writeconfig.get_base_task('NEXTFLOW', 'task1 (sample1)')
self.assertEqual(result, 'task1')

def test_get_base_task_cwl(self):
result = writeconfig.get_base_task('CWL', 'task1 (sample1)')
self.assertRaises(ValueError)

if __name__ == '__main__':
unittest.main()
Loading