Skip to content

Commit

Permalink
Merge pull request #18 from A2-ai/cancel_job
Browse files Browse the repository at this point in the history
added ability to cancel a running job
  • Loading branch information
mduncans authored Nov 4, 2024
2 parents 31a8feb + f063bc6 commit cd36276
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 74 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: slurmtools
Title: slurm tooling
Version: 0.0.0.9001
Version: 0.0.0.9002
Authors@R: c(person("Devin", "Pastoor", , "[email protected]", role = c("aut", "cre")),
person("Jenna", "Elwing", email = "[email protected]", role = "aut"),
person("Matthew", "Smith", email = "[email protected]", role = "aut"))
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ S3method(format_toml,integer)
S3method(format_toml,list)
S3method(format_toml,logical)
S3method(format_toml,name)
export(cancel_job)
export(generate_nmm_config)
export(get_slurm_jobs)
export(get_slurm_partitions)
Expand Down Expand Up @@ -36,6 +37,8 @@ importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map2_chr)
importFrom(purrr,map_if)
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(rlang,abort)
importFrom(rlang,as_function)
importFrom(rlang,dots_list)
Expand Down
48 changes: 48 additions & 0 deletions R/cancel-job.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#' Cancels a running job
#'
#' @param job_id job id to cancel
#' @param user optional if not the current user.
#' @importFrom rlang .data
#' @importFrom rlang .env
#'
#' @export
#'
#' @examples \dontrun{
#' cancel_job(243)
#' }
cancel_job <- function(job_id, user = NULL) {

current_user = Sys.getenv("USER") %||% Sys.info()['user']

if (!is.null(user) && user != current_user) {
cat("The supplied user does not match the current user.\n")
cat(paste0("\tsupplied user: ", user, "\n\tcurrent_user: ", current_user, "\n"))
cat("You might be cancelling someone elses job.")
continue <- readline("Are you sure you want to cancel this job? (Y/n)\n")
if (continue == "Y") {
current_user <- user
} else if (tolower(continue) == "n") {
stop(paste0("Not cancelling job: ", job_id))
} else {
stop("Please enter Y or n")
}
}

jobs <- get_slurm_jobs(user = current_user)

if (!job_id %in% jobs$job_id) {
stop("Please ensure the job id is correct.")
}

job_id_filtered <- jobs %>% dplyr::filter(.data$job_id == .env$job_id)
print(job_id_filtered)
if (job_id_filtered$job_state != "RUNNING") {
stop(paste0("Job: ", job_id, " is not running"))
}

result <- processx::run("scancel", args = c(as.character(job_id)))
if (result$status != 0) {
print(paste0("Stdout: ", result$stdout))
print(paste0("Stderr: ", result$stderr))
}
}
5 changes: 5 additions & 0 deletions R/slurmtools.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
#' that shows status and other information given with `squeue`
#' }
#'
#' @section cancelling jobs:
#' \itemize{
#' \item \code{\link{cancel_job}}: Cancels the specified job
#' }
#'
#' @section slurm partitions:
#' \itemize{
#' \item \code{\link{get_slurm_partitions}}: Gives a vector of available
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ reference:
- submit_nonmem_model
- generate_nmm_config
- get_slurm_jobs
- cancel_job
- get_slurm_partitions
- slurmtools

Expand Down
21 changes: 21 additions & 0 deletions man/cancel_job.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/slurmtools.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 11 additions & 17 deletions vignettes/Running-nonmem.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,6 @@ vignette: >
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
#removing generated files from running this vignette
nonmem <- file.path("model", "nonmem")
unlink(file.path(nonmem, "1001"), recursive = TRUE)
unlink(file.path(nonmem, "1001.yaml"))
unlink(file.path(nonmem, "1001.toml"))
unlink(file.path(nonmem, "submission-log"), recursive = TRUE)
unlink(file.path(nonmem, "in_progress"), recursive = TRUE)
```

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
Expand Down Expand Up @@ -259,13 +247,19 @@ curl -d "Finished model run: 1001-nonmem-run $JOBID" ntfy.sh/ntfy_demo

To reiterate, this template file is run as a bash shell script so anything you can do in bash you can put into the template and pass the needed arguments and customize the behavior to your liking.

```{r, include = FALSE}
#removing generated files from running this vignette
# nonmem <- file.path("model", "nonmem")
#
```{r cleanup, include = FALSE}
# cancelling any running nonmem jobs
jobs <- get_slurm_jobs(user = "matthews") %>%
dplyr::filter(job_state == "RUNNING") %>%
dplyr::pull(job_id)
for (job in jobs) {
cancel_job(job)
}
unlink(file.path(nonmem, "1001"), recursive = TRUE)
unlink(file.path(nonmem, "1001.yaml"))
unlink(file.path(nonmem, "1001.toml"))
unlink(file.path(nonmem, "submission-log"), recursive = TRUE)
unlink(file.path(nonmem, "submission-log"), recursive = TRUE)
unlink(file.path(nonmem, "in_progress"), recursive = TRUE)
```
50 changes: 19 additions & 31 deletions vignettes/custom-alerts.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,6 @@ editor_options:
wrap: 72
---

```{r, include = FALSE}
#removing generated files from running this vignette
nonmem <- file.path("model", "nonmem")
unlink(file.path(nonmem, "1001"), recursive = TRUE)
unlink(file.path(nonmem, "1001.yaml"))
unlink(file.path(nonmem, "1001.toml"))
unlink(file.path(nonmem, "submission-log"), recursive = TRUE)
unlink(file.path(nonmem, "in_progress"), recursive = TRUE)
```

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
Expand All @@ -40,12 +29,13 @@ options('slurmtools.submission_root' = file.path(nonmem, "submission-log"))
## Submitting a NONMEM job with nmm

Instead of using bbi we can use `nmm` ([NONMEM
Monitor](https://github.com/A2-ai/nonmem-monitor/releases)) which currently has
some additional functionality of sending notifications about zero
gradients, missing -1E9 lines in ext file, and some very basic control
stream errors. Nonmem-monitor also allows for setting up an alerter to
be better fed these messages - more on that later. To use `nmm` you can
install the latest release from the github repository linked above.
Monitor](https://github.com/A2-ai/nonmem-monitor/releases)) which
currently has some additional functionality of sending notifications
about zero gradients, missing -1E9 lines in ext file, and some very
basic control stream errors. Nonmem-monitor also allows for setting up
an alerter to be better fed these messages - more on that later. To use
`nmm` you can install the latest release from the github repository
linked above.

We can update the template file accordingly:

Expand All @@ -70,7 +60,8 @@ it's not on our path.
The `config.toml` file controls what `nmm` will monitor and where to
look for files and how to alert you. We'll use `generate_nmm_config()`
to create this file. First we can look at the documentation to see what
type of information we should pass to this function. `?generate_nmm_config()`
type of information we should pass to this function.
`?generate_nmm_config()`

```{r}
mod_number <- "1001"
Expand Down Expand Up @@ -239,19 +230,16 @@ This gives us the notifications in a much more digestible format

![nmm ntfy.sh alerts](data/images/nmm_ntfy_alerts.png)

```{r, include = FALSE}
# #cancelling any running nonmem jobs
# state <- slurmtools::get_slurm_jobs(user = "matthews")
#
# if (any(state$job_state %in% c("RUNNING", "CONFIGURING"))) {
# for (job_id in state %>% dplyr::filter(job_state == "RUNNING") %>% dplyr::pull("job_id")) {
# processx::run("scancel", args = paste0(job_id))
# }
# }
#
# #removing generated files from running this vignette
# nonmem <- file.path("model", "nonmem")
#
```{r cleanup, include = FALSE}
# cancelling any running nonmem jobs
jobs <- get_slurm_jobs(user = "matthews") %>%
dplyr::filter(job_state == "RUNNING") %>%
dplyr::pull(job_id)
for (job in jobs) {
cancel_job(job)
}
unlink(file.path(nonmem, "1001"), recursive = TRUE)
unlink(file.path(nonmem, "1001.yaml"))
unlink(file.path(nonmem, "1001.toml"))
Expand Down
35 changes: 10 additions & 25 deletions vignettes/slack-alerts.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,6 @@ editor_options:
markdown:
wrap: 72
---

```{r, include = FALSE}
#removing generated files from running this vignette
nonmem <- file.path("model", "nonmem")
unlink(file.path(nonmem, "1001"), recursive = TRUE)
unlink(file.path(nonmem, "1001.yaml"))
unlink(file.path(nonmem, "1001.toml"))
unlink(file.path(nonmem, "submission-log"), recursive = TRUE)
unlink(file.path(nonmem, "in_progress"), recursive = TRUE)
```

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
Expand Down Expand Up @@ -151,19 +139,16 @@ slurmtools::get_slurm_jobs(user = 'matthews')

![nmm slack alerts](data/images/nmm_slack_notifications.png)

```{r, include = FALSE}
# #cancelling any running nonmem jobs
# state <- slurmtools::get_slurm_jobs(user = "matthews")
#
# if (any(state$job_state %in% c("RUNNING", "CONFIGURING"))) {
# for (job_id in state %>% dplyr::filter(job_state == "RUNNING") %>% dplyr::pull("job_id")) {
# processx::run("scancel", args = paste0(job_id))
# }
# }
#
# #removing generated files from running this vignette
# nonmem <- file.path("model", "nonmem")
#
```{r cleanup, include = FALSE}
# cancelling any running nonmem jobs
jobs <- get_slurm_jobs(user = "matthews") %>%
dplyr::filter(job_state == "RUNNING") %>%
dplyr::pull(job_id)
for (job in jobs) {
cancel_job(job)
}
unlink(file.path(nonmem, "1001"), recursive = TRUE)
unlink(file.path(nonmem, "1001.yaml"))
unlink(file.path(nonmem, "1001.toml"))
Expand Down

0 comments on commit cd36276

Please sign in to comment.