Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add post-observing daily task #38

Merged
merged 3 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Next version

### 🚀 New

* [#38](https://github.com/sdss/lvmgort/pull/38) Add a post-observing daily task that runs 30 minutes after sunrise and will do a few check (make sure the dome is closed, park the telescopes, etc.) and retry safe calibrations that failed during the normal sequence.

### 🔧 Fixed

* Prevent the Overwatcher observer from opening the dome while calibrations are ongoing.
Expand All @@ -16,7 +20,7 @@

### 🚀 New

* * [#37](https://github.com/sdss/lvmgort/pull/37) Basic implementation of the `Troubleshooter` class for the Overwatcher. Currently only very broad troubleshooting checks and recipes are implemented.
* [#37](https://github.com/sdss/lvmgort/pull/37) Basic implementation of the `Troubleshooter` class for the Overwatcher. Currently only very broad troubleshooting checks and recipes are implemented.

### 🏷️ Changed

Expand Down
5 changes: 5 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
coverage:
status:
project: off
patch: off

github_checks:
annotations: false
3 changes: 3 additions & 0 deletions src/gort/etc/calibrations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
close_dome_after: true
abort_observing: true
priority: 10
allow_post_observing_recovery: false

- name: quick_cals
recipe: quick_cals
Expand All @@ -21,6 +22,7 @@
dome: closed
abort_observing: true
priority: 8
allow_post_observing_recovery: true

- name: bias_sequence
recipe: bias_sequence
Expand All @@ -32,6 +34,7 @@
dome: closed
abort_observing: true
priority: 5
allow_post_observing_recovery: true
#
# - name: twilight_flats_sunrise
# recipe: twilight_flats
Expand Down
8 changes: 8 additions & 0 deletions src/gort/overwatcher/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ class CalibrationModel(BaseModel):
title="The maximum time in seconds to attempt the calibration if it fails. "
"If max_start_time is reached during this period, the calibrations fails.",
)
allow_post_observing_recovery: bool = Field(
default=True,
title="Whether the calibration can be run after observing has finished "
"if it initially failed.",
)

@model_validator(mode="after")
def validate_start_time(self) -> Self:
Expand Down Expand Up @@ -484,6 +489,9 @@ async def reset(self, cals_file: str | pathlib.Path | None = None):
if cals_file is not None:
self.cals_file = cals_file

self._failing_cals = {}
self._ignore_cals = set()

try:
self.schedule.update_schedule(self.cals_file)
except Exception as ee:
Expand Down
146 changes: 141 additions & 5 deletions src/gort/overwatcher/helpers/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

from sdsstools import get_sjd

from gort.tools import redis_client_sync
from gort.overwatcher.calibration import CalibrationState
from gort.tools import add_night_log_comment, redis_client_sync


if TYPE_CHECKING:
Expand Down Expand Up @@ -103,14 +104,31 @@ async def run(self):
if self.done:
return

if not await self._should_run():
return

await self.overwatcher.notify(f"Running daily task {self.name}.")
self.done = await self._run_internal()
try:
self.done = await self._run_internal()
except Exception as err:
await self.overwatcher.notify(
f"Error running daily task {self.name}: {err}",
level="error",
)
self.done = True
return

if self.done:
await self.overwatcher.notify(f"Task {self.name} has been completed.")
else:
await self.overwatcher.notify(f"Task {self.name} has failed.")

@abc.abstractmethod
async def _should_run(self) -> bool:
"""Returns True if the task should run."""

raise NotImplementedError

@abc.abstractmethod
async def _run_internal(self) -> bool:
"""Runs the internal task."""
Expand Down Expand Up @@ -145,12 +163,17 @@ def mark_done(self):


class PreObservingTask(DailyTaskBase):
"""Run the pre-observing tasks."""
"""Run the pre-observing tasks.

This task is run between 30 and 10 minutes before sunset if no calibration is
ongoing and will take a bias and make sure the telescopes are connected and homed.

"""

name = "pre_observing"

async def _run_internal(self) -> bool:
"""Runs the pre-observing tasks."""
async def _should_run(self) -> bool:
"""Returns True if the task should run."""

if self.overwatcher.ephemeris.ephemeris is None:
return False
Expand All @@ -168,6 +191,11 @@ async def _run_internal(self) -> bool:
):
return False

return True

async def _run_internal(self) -> bool:
"""Runs the pre-observing tasks."""

try:
await self.overwatcher.gort.execute_recipe("pre-observing")
except Exception as err:
Expand All @@ -178,3 +206,111 @@ async def _run_internal(self) -> bool:

# Always mark the task complete, even if it failed.
return True


class PostObservingTask(DailyTaskBase):
"""Run the post-observing tasks.

This task is run 30 minutes after sunrise. It runs the post-observing recipe
but does not send the email (that is done at 12UT by a cronjon for redundancy).

The recipe checks that the dome is closed, the telescope is parked, guiders
are off, etc. It also goes over the calibrations and if a calibration is missing
and has ``allow_post_observing_recovery=true`` it will try to obtain it.

"""

name = "post_observing"

async def _should_run(self) -> bool:
"""Returns True if the task should run."""

if self.overwatcher.ephemeris.ephemeris is None:
return False

# Run this task 30 minutes after sunrise.
now = time.time()
sunrise = Time(self.overwatcher.ephemeris.ephemeris.sunrise, format="jd").unix

if (
now - sunrise < 0
or now - sunrise < 1800
or now - sunrise > 2000
or self.overwatcher.state.calibrating
or self.overwatcher.state.observing
):
return False

return True

async def _run_internal(self) -> bool:
"""Runs the post-observing tasks."""

notify = self.overwatcher.notify

try:
await self.overwatcher.gort.execute_recipe(
"post-observing",
send_emal=False,
)
except Exception as err:
await self.overwatcher.notify(
f"Error running post-observing task: {err}",
level="critical",
)
return True

calibrations_attempted: bool = False

for calibration in self.overwatcher.calibrations.schedule.calibrations:
name = calibration.name

# Calibration must not be done (any other state is valid)
if calibration.state != CalibrationState.DONE:
# Calibration must allow recovery.
allows_recovery = calibration.model.allow_post_observing_recovery

# Calibration must not require moving the dome (model.dome = None)
# or asks for the dome to be closed and it actually is.
required_dome = calibration.model.dome
needs_dome: bool = False
if required_dome is not None:
current_dome = await self.overwatcher.dome.is_closing()
if required_dome is True or current_dome != required_dome:
needs_dome = True

# Calibrations must be allowed.
allow_calibrations = self.overwatcher.state.allow_calibrations

if not needs_dome and allows_recovery and allow_calibrations:
await notify(f"Retrying calibration {calibration.name}.")

try:
calibrations_attempted = True
await self.overwatcher.calibrations.run_calibration(calibration)

if not calibration.state == CalibrationState.DONE:
await notify(f"Failed to recover calibration {name}.")
else:
await notify(f"Calibration {name} recovered.")

# Automatically add a comment to the night log.
await add_night_log_comment(
f"Calibration {name} initially failed and was retaken "
"after observations had been completed. Review the "
"data quality since the exposures were taken after "
"sunrise.",
category="other",
)

except Exception as err:
await notify(f"Error recovering calibration {name}: {err}")

# If we have tried a calibration we may have rehomed the telescopes and
# left them not parked. Make sure they are really parked.
if calibrations_attempted:
self.overwatcher.log.info("Parking telescopes after post-observing cals.")
await self.overwatcher.gort.telescopes.park()

# Always mark the task complete, even if it failed.
return True
15 changes: 9 additions & 6 deletions src/gort/recipes/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import asyncio

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, ClassVar

from rich.prompt import Confirm

Expand Down Expand Up @@ -322,7 +322,9 @@ class PostObservingRecipe(BaseRecipe):

name = "post-observing"

async def recipe(self):
email_route: ClassVar[str] = "/logs/night-logs/0/email?only_if_not_sent=1"

async def recipe(self, send_email: bool = True):
"""Runs the post-observing sequence."""

from gort.overwatcher.helpers.notifier import BasicNotifier
Expand All @@ -347,10 +349,11 @@ async def recipe(self):
except Exception as ee:
notifier.log.error(f"Error running post-observing task: {ee}")

notifier.log.info("Sending night log email.")
result = await get_lvmapi_route("/logs/night-logs/0/email?only_if_not_sent=1")
if not result:
notifier.log.warning("Night log had already been sent.")
if send_email:
notifier.log.info("Sending night log email.")
result = await get_lvmapi_route(self.email_route)
if not result:
notifier.log.warning("Night log had already been sent.")

# Disable the overwatcher.
if await overwatcher_is_running():
Expand Down
23 changes: 23 additions & 0 deletions src/gort/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
"kubernetes_restart_deployment",
"kubernetes_list_deployments",
"get_gort_client",
"add_night_log_comment",
]

AnyPath = str | os.PathLike
Expand Down Expand Up @@ -1003,3 +1004,25 @@ async def get_gort_client(override_overwatcher: bool | None = None):
yield gort

await gort.stop()


async def add_night_log_comment(comment: str, category: str = "other"):
"""Adds a comment to the night log."""

payload = {
"mjd": get_sjd("LCO"),
"category": category or "other",
"comment": comment,
}

host, port = config["services"]["lvmapi"].values()

async with httpx.AsyncClient(
base_url=f"http://{host}:{port}",
follow_redirects=True,
) as client:
response = await client.post("/night-logs/comments/add", json=payload)

code = response.status_code
if code != 200:
raise ValueError(f"Failed adding night log comment. Code {code}.")
Loading