Skip to content

Commit

Permalink
Catch all exceptions in k8s submit_command() (#926)
Browse files Browse the repository at this point in the history
It's entirely possible that creating a task_processing task (and/or
submitting one) can result in an exception. At the moment, this results
in the affected ActionRun getting stuck in the Starting state - but this
is a lie and means that the normal monitoring/alerting on failed runs
does not kick in.
  • Loading branch information
nemacysts authored Aug 21, 2023
1 parent a6ce6dd commit 9d1d0e8
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions tron/core/actionrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from dataclasses import dataclass
from dataclasses import fields
from pyrsistent import InvariantException
from twisted.internet import reactor

from tron import command_context
Expand Down Expand Up @@ -1047,7 +1046,7 @@ def submit_command(self, attempt: ActionRunAttempt) -> Optional[KubernetesTask]:
service_account_name=attempt.command_config.service_account_name,
ports=attempt.command_config.ports,
)
except InvariantException:
except Exception:
log.exception(f"Unable to create task for ActionRun {self.id}")
self.fail(exitcode.EXIT_KUBERNETES_TASK_INVALID)
return None
Expand All @@ -1061,7 +1060,14 @@ def submit_command(self, attempt: ActionRunAttempt) -> Optional[KubernetesTask]:

# Watch before submitting, in case submit causes a transition
self.watch(task)
k8s_cluster.submit(task)

try:
k8s_cluster.submit(task)
except Exception:
log.exception(f"Unable to submit task for ActionRun {self.id}")
self.fail(exitcode.EXIT_KUBERNETES_TASK_INVALID)
return None

return task

def recover(self) -> Optional[KubernetesTask]:
Expand Down

0 comments on commit 9d1d0e8

Please sign in to comment.