Skip to content

Commit

Permalink
Issue/85 (#86)
Browse files Browse the repository at this point in the history
* fixes edgecase where all run tasks have been cached

* update dependencies

* adds tests for timeline functions
markjschreiber authored Jan 7, 2025
1 parent ef14d38 commit 19672e1
Showing 3 changed files with 520 additions and 378 deletions.
33 changes: 18 additions & 15 deletions omics/cli/run_analyzer/timeline.py
Original file line number Diff line number Diff line change
@@ -25,23 +25,25 @@ def _parse_time_str(time_str):
def _get_task_timings_data(tasks, time_units="min"):
time_scale_factor = TIME_SCALE_FACTORS[time_units]

# remove tasks that don't have a creationTime
filtered_tasks = [task for task in tasks if task.get("creationTime")]
# retain only tasks that have creationTime, stopTime and startTime
filtered_tasks = [
task
for task in tasks
if task.get("creationTime") and task.get("stopTime") and task.get("startTime")
]
if not filtered_tasks:
print("No tasks found with timing data, a plot cannot be created", file=sys.stderr)
print(
"No tasks found with timing data, possibly all tasks were cached, a plot cannot be created",
file=sys.stderr,
)
return pd.DataFrame()

tare = min([_parse_time_str(task["creationTime"]) for task in filtered_tasks])

for i, task in enumerate(tasks):
if "creationTime" not in task:
task["creationTime"] = tare
task["startTime"] = task["creationTime"]
task["stopTime"] = task["creationTime"]
else:
task["creationTime"] = _parse_time_str(task["creationTime"])
task["startTime"] = _parse_time_str(task["startTime"])
task["stopTime"] = _parse_time_str(task["stopTime"])
for i, task in enumerate(filtered_tasks):
task["creationTime"] = _parse_time_str(task["creationTime"])
task["startTime"] = _parse_time_str(task["startTime"])
task["stopTime"] = _parse_time_str(task["stopTime"])
task["cpus"] = task.get("cpus", 0)
task["gpus"] = task.get("gpus", 0)
task["memory"] = task.get("memory", 0)
@@ -61,17 +63,18 @@ def _get_task_timings_data(tasks, time_units="min"):
task["label"] = f"({task['arn']}) {task['name']}"
task["text_x"] = (task["stopTime"] - tare).total_seconds() + 30 * time_scale_factor

tasks[i] = task
filtered_tasks[i] = task
task["estimatedUSD"] = task.get("metrics", {}).get("estimatedUSD", 0.0)

return pd.DataFrame.from_records(tasks).sort_values("creationTime")
return pd.DataFrame.from_records(filtered_tasks).sort_values("creationTime")


def plot_timeline(tasks, title="", time_units="min", max_duration_hrs=5, show_plot=True):
"""Plot a time line figure for supplied tasks"""
time_scale_factor = TIME_SCALE_FACTORS[time_units]
data = _get_task_timings_data(tasks, time_units=time_units)

if data.empty:
return None
source = ColumnDataSource(data)

tooltips = [
Loading

0 comments on commit 19672e1

Please sign in to comment.