From b9ace1a01f2ba3f48595a772d0e26712f1db83a9 Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Tue, 20 Feb 2024 13:42:14 +0100 Subject: [PATCH 1/8] Create `bartender link` subcommand. Refs https://github.com/i-VRESSE/haddock3-webapp/issues/79 --- src/bartender/__main__.py | 65 +++++++++++++++++++++++++++++++++ src/bartender/link.py | 75 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 src/bartender/link.py diff --git a/src/bartender/__main__.py b/src/bartender/__main__.py index 43df36a..20c6bae 100644 --- a/src/bartender/__main__.py +++ b/src/bartender/__main__.py @@ -13,6 +13,7 @@ import uvicorn from bartender.config import build_config +from bartender.link import link_job from bartender.schedulers.arq import ArqSchedulerConfig, run_workers from bartender.settings import settings from bartender.user import generate_token_subcommand @@ -92,6 +93,7 @@ def build_parser() -> ArgumentParser: perform_sp.set_defaults(func=perform) add_generate_token_subcommand(subparsers) + add_link_job_subcommand(subparsers) return parser @@ -175,6 +177,69 @@ def add_generate_token_subcommand( generate_token_sp.set_defaults(func=generate_token_subcommand) +def add_link_job_subcommand(subparsers: Any) -> None: + """ + Add the 'link' subcommand to the given subparsers. + + Args: + subparsers (Any): The subparsers object to add the 'link' subcommand to. + """ + link_job_sp = subparsers.add_parser( + "link", + help="Link external directory as job", + formatter_class=Formatter, + description=dedent( # noqa: WPS462 -- docs + """\ + Link external directory as job. + + The external directory should have same shape + as a completed job for the selected application. + + For haddock3 application, the directory should have: + output/ directory and workflow.cfg, stderr.txt, + stdout.txt, returncode files. + + Example: + ```shell + # Link a directory as job + bartender link-job \\ + --submitter someone \\ + --application haddock3 \\ + /path/to/myjob + # Prints job identifier + # The job in db has + # - name=internal_id=myjob + # - destination=local + # - state=ok + # - created_on=updated_on=now + ``` + """, + ), + ) + link_job_sp.add_argument( + "directory", + type=Path, + help="Directory to link as job", + ) + link_job_sp.add_argument( + "--submitter", + default="someone", + help="Submitter of job", + ) + link_job_sp.add_argument( + "--application", + default="ln", + help="Application of job.", + ) + link_job_sp.add_argument( + "--config", + default=Path("config.yaml"), + type=Path, + help="Configuration with schedulers that need arq workers", + ) + link_job_sp.set_defaults(func=link_job) + + def main(argv: list[str] = sys.argv[1:]) -> None: """Entrypoint of the application. diff --git a/src/bartender/link.py b/src/bartender/link.py new file mode 100644 index 0000000..d2b8dbb --- /dev/null +++ b/src/bartender/link.py @@ -0,0 +1,75 @@ +import asyncio +from os import symlink +from pathlib import Path + +from bartender.config import build_config +from bartender.db.dao.job_dao import JobDAO +from bartender.db.session import make_engine, make_session_factory + + +def link_job( + directory: Path, + submitter: str, + application: str, + config: Path, + destination: str = "local", +) -> None: + """Link external directory as job. + + Args: + directory: Directory to link as job + submitter: Submitter of job + application: Application of job + config: Configuration with schedulers that need arq workers + destination: Destination of job + """ + validated_config = build_config(config) + job_root_dir = validated_config.job_root_dir + name = directory.name + + # Create job in db + job_id = asyncio.run( + create_job_in_db(name, application, submitter, destination), + ) + + # Sym link directory to job directory + job_dir = job_root_dir / str(job_id) + symlink(directory, job_dir) + print(job_id) # noqa: WPS421 -- user feedback + + +async def create_job_in_db( + name: str, + application: str, + submitter: str, + destination: str, +) -> int: + """ + Create a job in the database. + + Args: + name: The name of the job. + application: The application associated with the job. + submitter: The submitter of the job. + destination: The destination of the job. + + Returns: + The ID of the created job. + + Raises: + IndexError: If failed to create a database entry for the job. + """ + engine = make_engine() + factory = make_session_factory(engine) + async with factory() as session: + dao = JobDAO(session) + job_id = await dao.create_job(name, application, submitter) + if job_id is None: + raise IndexError("Failed to create database entry for job") + await dao.update_internal_job_id( + job_id, + internal_job_id=name, + destination=destination, + ) + await dao.update_job_state(job_id, "ok") + return job_id From cec15875e6b429f300c83359db8ce8b7e8edaec0 Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Fri, 23 Feb 2024 08:03:05 +0100 Subject: [PATCH 2/8] Document bartender link subcommand --- docs/deploy.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/deploy.md b/docs/deploy.md index 051d0bb..278bdaa 100644 --- a/docs/deploy.md +++ b/docs/deploy.md @@ -20,3 +20,13 @@ If you want to generate a token with the `docker compose -f deploy/docker-compose.yml exec api bartender generate-token` command you should uncomment the private key volume bind in `deploy/docker-compose.yml`. See [configuration.md#authentication](configuration.md#authentication). + +## Link external directory as job + +If you have a directory outside the bartender job root directory +that is the output of one the configured applications in bartender +then you might want to make it available as a job in bartender. + +To do this you can create a symlink to the external directory +in the bartender job root directory, +by running the `bartender link` command. From 5b087400dcbdb4bc6e62c0e0abb05b508dacbdf2 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Fri, 23 Feb 2024 09:53:48 +0100 Subject: [PATCH 3/8] Document permissions needed for a linked job --- src/bartender/__main__.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/bartender/__main__.py b/src/bartender/__main__.py index 20c6bae..01f20a2 100644 --- a/src/bartender/__main__.py +++ b/src/bartender/__main__.py @@ -219,7 +219,13 @@ def add_link_job_subcommand(subparsers: Any) -> None: link_job_sp.add_argument( "directory", type=Path, - help="Directory to link as job", + help=dedent( # noqa: WPS462 -- docs + """Directory to link as job. + Its content should be readable by the user running bartender serve. + To run an interactive application on the linked job, + the directory should be writable by the user running bartender serve. + """, + ), ) link_job_sp.add_argument( "--submitter", @@ -229,7 +235,13 @@ def add_link_job_subcommand(subparsers: Any) -> None: link_job_sp.add_argument( "--application", default="ln", - help="Application of job.", + help=dedent( # noqa: WPS462 -- docs + """Application of job. + To run interative application on the linked job, + the application of the job should match the name of + the `job_application` of the interactive application. + """, + ), ) link_job_sp.add_argument( "--config", From 5acd63e0fee876629ec5e7a99fce7bc59536fcd3 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Fri, 23 Feb 2024 11:15:42 +0100 Subject: [PATCH 4/8] Return 404 when log files are missing --- src/bartender/web/api/job/views.py | 9 ++++++++- tests/web/test_job.py | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/bartender/web/api/job/views.py b/src/bartender/web/api/job/views.py index abfdd0b..23d40db 100644 --- a/src/bartender/web/api/job/views.py +++ b/src/bartender/web/api/job/views.py @@ -234,6 +234,7 @@ async def get_completed_logs( Raises: ValueError: When job has no destination. + HTTPException: When a log file is not found. Returns: The standard output and error. @@ -241,7 +242,13 @@ async def get_completed_logs( if not job.destination or not job.internal_id: raise ValueError("Job has no destination") destination = context.destinations[job.destination] - return await destination.scheduler.logs(job.internal_id, job_dir) + try: + return await destination.scheduler.logs(job.internal_id, job_dir) + except FileNotFoundError as exc: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="File not found", + ) from exc CurrentLogs = Annotated[Tuple[str, str], Depends(get_completed_logs)] diff --git a/tests/web/test_job.py b/tests/web/test_job.py index 6b3619e..c0ccded 100644 --- a/tests/web/test_job.py +++ b/tests/web/test_job.py @@ -537,6 +537,25 @@ async def test_stderr( assert response.headers["content-type"] == "text/plain; charset=utf-8" +@pytest.mark.anyio +async def test_stderr_missing( + fastapi_app: FastAPI, + client: AsyncClient, + auth_headers: Dict[str, str], + mock_ok_job: int, + job_root_dir: Path, +) -> None: + job_id = str(mock_ok_job) + fn = job_root_dir / str(job_id) / "stderr.txt" + fn.unlink() + + url = fastapi_app.url_path_for("retrieve_job_stderr", jobid=job_id) + response = await client.get(url, headers=auth_headers) + + assert response.status_code == status.HTTP_404_NOT_FOUND + assert response.json() == {"detail": "File not found"} + + @pytest.mark.anyio async def test_directories( fastapi_app: FastAPI, From 5ca4f4c4ab62f8649d0e0627bb7f5443742308b9 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Fri, 23 Feb 2024 12:13:31 +0100 Subject: [PATCH 5/8] always symlink absolute dir Linking relative dir gives problems. --- src/bartender/link.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bartender/link.py b/src/bartender/link.py index d2b8dbb..54b7016 100644 --- a/src/bartender/link.py +++ b/src/bartender/link.py @@ -34,7 +34,7 @@ def link_job( # Sym link directory to job directory job_dir = job_root_dir / str(job_id) - symlink(directory, job_dir) + symlink(directory.absolute(), job_dir) print(job_id) # noqa: WPS421 -- user feedback From 14e75ac4707f0ec87bfa10aefd98d9b529e8b72b Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Fri, 23 Feb 2024 12:18:50 +0100 Subject: [PATCH 6/8] Allow listing and file content to be retrieved when job dir is a symlink --- src/bartender/web/api/job/views.py | 18 +++++---- tests/test_walk_dir.py | 24 +++++++++++ tests/web/test_job.py | 64 ++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 7 deletions(-) diff --git a/src/bartender/web/api/job/views.py b/src/bartender/web/api/job/views.py index 23d40db..f80b495 100644 --- a/src/bartender/web/api/job/views.py +++ b/src/bartender/web/api/job/views.py @@ -198,9 +198,7 @@ def retrieve_job_files( The file content. """ try: - full_path = (job_dir / path).expanduser().resolve(strict=True) - if not full_path.is_relative_to(job_dir): - raise FileNotFoundError() + full_path = _resolve_path(path, job_dir) if not full_path.is_file(): raise FileNotFoundError() except FileNotFoundError as exc: @@ -437,11 +435,17 @@ async def retrieve_job_subdirectory_as_archive( # noqa: WPS211 ) +def _resolve_path(path: str, job_dir: Path) -> Path: + resolved_job_dir = job_dir.resolve(strict=True) + subdirectory = (resolved_job_dir / path).resolve(strict=True) + if not subdirectory.is_relative_to(resolved_job_dir): + raise FileNotFoundError() + return subdirectory + + def _parse_subdirectory(path: str, job_dir: Path) -> Path: try: - subdirectory = (job_dir / path).resolve(strict=True) - if not subdirectory.is_relative_to(job_dir): - raise FileNotFoundError() + subdirectory = _resolve_path(path, job_dir) if not subdirectory.is_dir(): raise FileNotFoundError() except FileNotFoundError as exc: @@ -450,7 +454,7 @@ def _parse_subdirectory(path: str, job_dir: Path) -> Path: detail="File not found", ) from exc - return subdirectory + return job_dir / path def get_interactive_app( diff --git a/tests/test_walk_dir.py b/tests/test_walk_dir.py index 0102277..15edd87 100644 --- a/tests/test_walk_dir.py +++ b/tests/test_walk_dir.py @@ -137,3 +137,27 @@ async def test_given_single_dir_with_file_and_dir_as_subdir( ], ) assert result == expected + + @pytest.mark.anyio + async def test_given_symlink_as_job_dir(self, tmp_path: Path) -> None: + (tmp_path / "somedir").mkdir() + (tmp_path / "somedir" / "somefile").write_text("sometext") + (tmp_path / "symlink").symlink_to(tmp_path / "somedir") + + result = await walk_dir(tmp_path / "symlink", tmp_path, max_depth=2) + + expected = DirectoryItem( + name="symlink", + path=Path("symlink"), + is_dir=True, + is_file=False, + children=[ + DirectoryItem( + name="somefile", + path=Path("symlink/somefile"), + is_dir=False, + is_file=True, + ), + ], + ) + assert result == expected diff --git a/tests/web/test_job.py b/tests/web/test_job.py index c0ccded..739c85a 100644 --- a/tests/web/test_job.py +++ b/tests/web/test_job.py @@ -477,6 +477,23 @@ async def test_files_given_path_is_dir( assert response.json() == {"detail": "File not found"} +@pytest.mark.anyio +async def test_files_given_jobdir_is_symlink( + fastapi_app: FastAPI, + client: AsyncClient, + auth_headers: Dict[str, str], + mock_ok_job: int, + job_root_dir: Path, +) -> None: + job_id = create_symlinked_job_dir(mock_ok_job, job_root_dir) + + path = "somefile.txt" + url = fastapi_app.url_path_for("retrieve_job_files", jobid=job_id, path=path) + response = await client.get(url, headers=auth_headers) + + assert response.status_code == status.HTTP_200_OK + + @pytest.mark.parametrize( "test_input", [ @@ -642,6 +659,53 @@ async def test_directories_from_path( assert response.json() == expected +@pytest.mark.anyio +async def test_directories_from_path_linkedjob( + fastapi_app: FastAPI, + client: AsyncClient, + auth_headers: Dict[str, str], + mock_ok_job: int, + job_root_dir: Path, +) -> None: + job_id = create_symlinked_job_dir(mock_ok_job, job_root_dir) + + url = fastapi_app.url_path_for( + "retrieve_job_directories_from_path", + jobid=job_id, + path="somedir", + ) + response = await client.get(url, headers=auth_headers) + + assert response.status_code == status.HTTP_200_OK + expected = { + "name": "somedir", + "path": "somedir", + "is_dir": True, + "is_file": False, + "children": [ + { + "is_dir": False, + "is_file": True, + "name": "somefile1.txt", + "path": "somedir/somefile1.txt", + }, + ], + } + assert response.json() == expected + + +def create_symlinked_job_dir(mock_ok_job: int, job_root_dir: Path) -> str: + job_id = str(mock_ok_job) + job_dir = job_root_dir / str(job_id) + orig_job_dir = job_root_dir / "orig" + job_dir.rename(orig_job_dir) + dir1 = orig_job_dir / "somedir" + dir1.mkdir() + (dir1 / "somefile1.txt").write_text("some text") + job_dir.symlink_to(orig_job_dir) + return job_id + + @pytest.mark.anyio @pytest.mark.parametrize( "archive_format", From 2c11fcbbe230ed4b3f23768648a2a9b77d444ca7 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Fri, 23 Feb 2024 12:55:47 +0100 Subject: [PATCH 7/8] Rename retrieve_job_files to retrieve_job_file As you only get one file. --- src/bartender/web/api/job/views.py | 15 ++++++++------- tests/web/test_job.py | 10 +++++----- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/bartender/web/api/job/views.py b/src/bartender/web/api/job/views.py index f80b495..4ea7833 100644 --- a/src/bartender/web/api/job/views.py +++ b/src/bartender/web/api/job/views.py @@ -181,21 +181,22 @@ def get_dir_of_completed_job( }, response_class=FileResponse, ) -def retrieve_job_files( +def retrieve_job_file( path: str, job_dir: CurrentCompletedJobDir, ) -> FileResponse: - """Retrieve files from a completed job. + """Retrieve file from a completed job. Args: path: Path to file that job has produced. job_dir: Directory with job output files. Raises: - HTTPException: When file is not found or is outside job directory. + HTTPException: When file is not found or is not a file + or is outside job directory. Returns: - The file content. + The file contents. """ try: full_path = _resolve_path(path, job_dir) @@ -437,10 +438,10 @@ async def retrieve_job_subdirectory_as_archive( # noqa: WPS211 def _resolve_path(path: str, job_dir: Path) -> Path: resolved_job_dir = job_dir.resolve(strict=True) - subdirectory = (resolved_job_dir / path).resolve(strict=True) - if not subdirectory.is_relative_to(resolved_job_dir): + resolved = (resolved_job_dir / path).resolve(strict=True) + if not resolved.is_relative_to(resolved_job_dir): raise FileNotFoundError() - return subdirectory + return resolved def _parse_subdirectory(path: str, job_dir: Path) -> Path: diff --git a/tests/web/test_job.py b/tests/web/test_job.py index 739c85a..677979b 100644 --- a/tests/web/test_job.py +++ b/tests/web/test_job.py @@ -433,7 +433,7 @@ async def test_files_of_noncomplete_job( ) -> None: # mock_db_of_job has state==new url = fastapi_app.url_path_for( - "retrieve_job_files", + "retrieve_job_file", jobid=str(mock_db_of_job), path="README.md", ) @@ -452,7 +452,7 @@ async def test_files_of_completed_job( ) -> None: path = "somefile.txt" job_id = str(mock_ok_job) - url = fastapi_app.url_path_for("retrieve_job_files", jobid=job_id, path=path) + url = fastapi_app.url_path_for("retrieve_job_file", jobid=job_id, path=path) response = await client.get(url, headers=auth_headers) assert response.status_code == status.HTTP_200_OK @@ -470,7 +470,7 @@ async def test_files_given_path_is_dir( ) -> None: path = "" job_id = str(mock_ok_job) - url = fastapi_app.url_path_for("retrieve_job_files", jobid=job_id, path=path) + url = fastapi_app.url_path_for("retrieve_job_file", jobid=job_id, path=path) response = await client.get(url, headers=auth_headers) assert response.status_code == status.HTTP_404_NOT_FOUND @@ -488,7 +488,7 @@ async def test_files_given_jobdir_is_symlink( job_id = create_symlinked_job_dir(mock_ok_job, job_root_dir) path = "somefile.txt" - url = fastapi_app.url_path_for("retrieve_job_files", jobid=job_id, path=path) + url = fastapi_app.url_path_for("retrieve_job_file", jobid=job_id, path=path) response = await client.get(url, headers=auth_headers) assert response.status_code == status.HTTP_200_OK @@ -515,7 +515,7 @@ async def test_files_given_bad_paths( test_input: str, ) -> None: job_id = str(mock_ok_job) - url = fastapi_app.url_path_for("retrieve_job_files", jobid=job_id, path=test_input) + url = fastapi_app.url_path_for("retrieve_job_file", jobid=job_id, path=test_input) response = await client.get(url, headers=auth_headers) assert response.status_code == status.HTTP_404_NOT_FOUND From fe7343de25a5e9935ad6922cbbbbd65691371de4 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Fri, 23 Feb 2024 12:59:08 +0100 Subject: [PATCH 8/8] Mention bartender link --help --- docs/deploy.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/deploy.md b/docs/deploy.md index 278bdaa..ab80a05 100644 --- a/docs/deploy.md +++ b/docs/deploy.md @@ -30,3 +30,4 @@ then you might want to make it available as a job in bartender. To do this you can create a symlink to the external directory in the bartender job root directory, by running the `bartender link` command. +See `bartender link --help` for more information.