Add abislurm.py

abinit · May 29, 2024 · 42615f6 · 42615f6
1 parent dfe641e
commit 42615f6
Show file tree

Hide file tree

Showing 3 changed files with 147 additions and 57 deletions.
diff --git a/abipy/flowtk/qutils.py b/abipy/flowtk/qutils.py
@@ -11,7 +11,7 @@
 import os
 #import json
 
-from subprocess import Popen, PIPE
+from subprocess import Popen, PIPE, run
 from monty.string import is_string
 from pymatgen.core.units import Time, Memory
 from abipy.tools.typing import PathLike
@@ -308,6 +308,56 @@ def slurm_sbatch(slurm_filepath: PathLike) -> int:
             raise RuntimeError(f"Error while submitting {slurm_filepath=} with {process.returncode=},\n{out=}\n{err=}")
 
 
+def get_sacct_info():
+    """
+    Run the sacct command to get the job information
+    """
+    try:
+
+        result = run(['sacct', '--format=JobID,JobName,Partition,Account,AllocCPUS,State,ExitCode', '--noheader'],
+                      stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+
+        # Check if the command was successful
+        if result.returncode != 0:
+            print(f"Error running sacct: {result.stderr}")
+            return None
+
+        # Process the output
+        jobs_info = result.stdout.strip().split('\n')
+        jobs = [dict(zip(['JobID', 'JobName', 'Partition', 'Account', 'AllocCPUS', 'State', 'ExitCode'], job.split())) for job in jobs_info]
+        return jobs
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+
+
+def get_completed_job_info(job_id):
+    try:
+        # Define the fields we want to retrieve
+        fields = "JobID,JobName,Partition,Account,AllocCPUS,State,ExitCode,Start,End,Elapsed,TotalCPU,MaxRSS"
+
+        # Run the sacct command with the specified fields for the given job ID
+        result = run(
+            ['sacct', '--jobs', job_id, '--format', fields, '--noheader', '--parsable2'],
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+
+        # Check if the command was successful
+        if result.returncode != 0:
+            print(f"Error running sacct: {result.stderr}")
+            return None
+
+        # Process the output
+        lines = result.stdout.strip().split('\n')
+        jobs = [dict(zip(fields.split(','), line.split('|'))) for line in lines]
+        return jobs
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+
+
 def get_slurm_template(body: str) -> str:
     """
     Return template for slurm submission that is supposed to be customized by the user.

diff --git a/abipy/scripts/abislurm.py b/abipy/scripts/abislurm.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+"""
+This script retrieve information on Slurm jobs.
+"""
+import sys
+import os
+import argparse
+import abipy.tools.cli_parsers as cli
+import abipy.flowtk.qutils as qu
+
+from abipy.core.release import __version__
+
+
+def get_epilog() -> str:
+   return """\
+Usage example:\n
+
+    abislurm.py running                => Get info on all the running jobs
+"""
+
+
+def get_parser(with_epilog=False):
+    # Build the main parser.
+    parser = argparse.ArgumentParser(epilog=get_epilog() if with_epilog else "",
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    parser.add_argument('-v', '--verbose', default=0, action='count', # -vv --> verbose=2
+                        help='verbose, can be supplied multiple times to increase verbosity')
+
+    #parser.add_argument('-V', '--version', action='version', version="%(prog)s version " + __version__)
+    #parser.add_argument('--loglevel', default="ERROR", type=str,
+    #                    help="set the loglevel. Possible values: CRITICAL, ERROR (default), WARNING, INFO, DEBUG")
+
+    # Parent parser for common options.
+    copts_parser = argparse.ArgumentParser(add_help=False)
+    copts_parser.add_argument('-v', '--verbose', default=0, action='count', # -vv --> verbose=2
+        help='verbose, can be supplied multiple times to increase verbosity')
+    copts_parser.add_argument('--loglevel', default="ERROR", type=str,
+        help="Set the loglevel. Possible values: CRITICAL, ERROR (default), WARNING, INFO, DEBUG")
+
+    # Create the parsers for the sub-commands
+    subparsers = parser.add_subparsers(dest='command', help='sub-command help',
+        description="Valid subcommands, use command --help for help")
+
+    # Subparser for running command.
+    p_running = subparsers.add_parser('jobs', parents=[copts_parser],
+        help="Check info on all the running jobs.")
+
+    return parser
+
+
+def main():
+
+    def show_examples_and_exit(err_msg=None, error_code=1):
+        """Display the usage of the script."""
+        sys.stderr.write(get_epilog())
+        if err_msg: sys.stderr.write("Fatal Error\n" + err_msg + "\n")
+        sys.exit(error_code)
+
+    parser = get_parser(with_epilog=True)
+
+    # Parse command line.
+    try:
+        options = parser.parse_args()
+    except Exception as exc:
+        show_examples_and_exit(error_code=1)
+
+    if not options.command:
+        show_examples_and_exit(error_code=1)
+
+    cli.set_loglevel(options.loglevel)
+
+    if options.verbose > 2:
+        print(options)
+
+    if options.command == "running":
+        jobs_dict = qu.slurm_get_jobs()
+        for job_id, dct in jobs_dict.items():
+            print(f"{job_id=}", dct)
+
+    #elif options.command == "running_from_logs":
+
+    elif options.command == "completed":
+        raise NotImplementedError("")
+        #qu.get_completed_job_info(job_id)
+
+    #elif options.command == "completed_from_logs":
+
+    else:
+        raise ValueError("Unsupported command: %s" % options.command)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/dev_scripts/abiq.py b/dev_scripts/abiq.py