From 96483727fa55d76e1c39854324205a8e34ea0023 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Wed, 19 Apr 2023 18:26:19 -0500 Subject: [PATCH 01/10] Base classes and example for client-side code --- exorcist/example_client.py | 108 +++++++++++++++++++++ exorcist/resultstore.py | 26 +++++ exorcist/taskstore.py | 20 ++++ exorcist/tests/test_example_client.py | 134 ++++++++++++++++++++++++++ 4 files changed, 288 insertions(+) create mode 100644 exorcist/example_client.py create mode 100644 exorcist/resultstore.py create mode 100644 exorcist/taskstore.py create mode 100644 exorcist/tests/test_example_client.py diff --git a/exorcist/example_client.py b/exorcist/example_client.py new file mode 100644 index 0000000..a6c5d83 --- /dev/null +++ b/exorcist/example_client.py @@ -0,0 +1,108 @@ +""" +Simple example of implementing Exorcist in client code. + +This is useful both to illustrate how client code for Exocist can be +written, as well as to be used in our test suite. +""" + + +import dataclasses +import pathlib +import json +import pickle + +from functools import partial + +from .resultstore import ResultStore +from .taskstore import TaskDetailsStore + +from typing import Callable + +@dataclasses.dataclass +class ExampleResult: + """Result class. + + This doesn't have to be a user-defined class; for example, it could be a + dict with an expected structure. + """ + label: str + main_result: float | str + is_failure: bool + + +@dataclasses.dataclass +class ExampleTaskDetails: + label: str + input_result_labels: dict[str, str] + """keys are variable names, values are the label where result is stored + """ + task_func: Callable + + def _extract_main_result(self, resultfile): + with open(resultfile, mode='r') as f: + result = ExampleResult(**json.load(f)) + + return result.main_result + + def run_task(self, directory): + inputs = { + key: self._extract_main_result(directory / f"{inp}_result.json") + for key, inp in self.input_result_labels.items() + } + try: + main_result = self.task_func(**inputs) + is_failure = False + except Exception as e: + main_result = str(e) + is_failure = True + + return ExampleResult( + label=self.label, + main_result=main_result, + is_failure=is_failure + ) + + +class ExampleResultStore(ResultStore): + """Example of a ResultStore + + This stores :class:`.ExampleResult`\ s as JSON files in a given + directory. + """ + def __init__(self, directory): + self.directory = directory + + def is_failure_result(self, result: ExampleResult) -> bool: + return result.is_failure + + def store_result(self, result: ExampleResult, retry: int = 0): + # the idea here is that there is only ever one successful result for + # a given task, but there may be many failures -- we label the + # failures by trial numbers. This way we save both successes and + # failures. + if self.is_failure_result(result): + path = self.directory / f"{result.label}_result_{retry}.json" + else: + path = self.directory / f"{result.label}_result.json" + + with open(path, mode='w') as f: + f.write(json.dumps(dataclasses.asdict(result))) + + +class ExampleTaskDetailsStore(TaskDetailsStore): + """Example of a TaskDetailsStore + + """ + def __init__(self, directory): + self.directory = directory + + def store_task_details(self, taskid: str, + task_details: ExampleTaskDetails): + with open(self.directory / f"{taskid}.p", mode='wb') as f: + pickle.dump(task_details, f) + + def load_task(self, taskid: str) -> Callable[[], ExampleResult]: + with open(self.directory / f"{taskid}.p", mode='rb') as f: + task_details = pickle.load(f) + + return partial(task_details.run_task, directory=self.directory) diff --git a/exorcist/resultstore.py b/exorcist/resultstore.py new file mode 100644 index 0000000..f27415e --- /dev/null +++ b/exorcist/resultstore.py @@ -0,0 +1,26 @@ +import abc + +from typing import Generic +from .models import Result + + +class ResultStore(abc.ABC, Generic[Result]): + """Result storage. + + Client code must provide a storage object that conforms to this abstract + API. + """ + @abc.abstractmethod + def is_failure_result(self, result: Result) -> bool: + """Test whether this result represents a failed run. + + This allows failures (e.g., raising exceptions) to be treated as + first-class results, which can then be introspected by the users. + """ + raise NotImplementedError() + + @abc.abstractmethod + def store_result(self, result: Result, retry: int = 0): + """Store a result to permanent storage. + """ + raise NotImplementedError() diff --git a/exorcist/taskstore.py b/exorcist/taskstore.py new file mode 100644 index 0000000..af1b06f --- /dev/null +++ b/exorcist/taskstore.py @@ -0,0 +1,20 @@ +import abc + +from typing import Generic, Callable +from .models import TaskDetails, Result + +class TaskDetailsStore(abc.ABC, Generic[TaskDetails, Result]): + """Task details storage. + + Client code must provide a storage object that conforms to this abstract + API. + """ + @abc.abstractmethod + def store_task_details(self, taskid: str, task_details: TaskDetails): + """Store the given task details. + """ + raise NotImplementedError() + + @abc.abstractmethod + def load_task(self, taskid: str) -> Callable[[], Result]: + raise NotImplementedError() diff --git a/exorcist/tests/test_example_client.py b/exorcist/tests/test_example_client.py new file mode 100644 index 0000000..d8a39e9 --- /dev/null +++ b/exorcist/tests/test_example_client.py @@ -0,0 +1,134 @@ +import pytest +import json +import pickle + +from exorcist.example_client import ( + ExampleResult, ExampleTaskDetails, ExampleResultStore, + ExampleTaskDetailsStore, +) + +@pytest.fixture +def example_result(): + return ExampleResult( + label="foo", + main_result=3.0, + is_failure=False + ) + +@pytest.fixture +def failure_result(): + return ExampleResult( + label="foo", + main_result="float division by zero", + is_failure=True + ) + + +# simple functions used as our tasks (need to be named functions here +# because pickle doesn't like lambdas) +def incr(x): + return x + 1.0 + +def return_1(): + return 1.0 + +def failure_func(): + return 1.0/0.0 + + +@pytest.fixture +def example_details(): + return ExampleTaskDetails( + label="foo", + input_result_labels={}, + task_func=return_1 + ) + +@pytest.fixture +def result_store(tmp_path): + return ExampleResultStore(tmp_path) + +@pytest.fixture +def task_store(tmp_path): + return ExampleTaskDetailsStore(tmp_path) + + +class TestExampleResultStore: + @pytest.mark.parametrize('fixture', [ + 'example_result', 'failure_result' + ]) + def test_is_failure_result(self, result_store, request, fixture): + result = request.getfixturevalue(fixture) + expected = (fixture == 'failure_result') + assert result_store.is_failure_result(result) == expected + + @pytest.mark.parametrize('fixture', [ + 'example_result', 'failure_result' + ]) + def test_store_result(self, result_store, request, fixture): + result = request.getfixturevalue(fixture) + result_store.store_result(result, retry=5) + filename = { + "example_result": "foo_result.json", + "failure_result": "foo_result_5.json", + }[fixture] + path = result_store.directory / filename + assert path.exists() + with open(path) as f: + dct = json.load(f) + + recreated = ExampleResult(**dct) + assert result == recreated + + +class TestExampleTaskDetailsStore: + def test_store_task_details(self, task_store, example_details): + task_store.store_task_details(example_details.label, + example_details) + path = task_store.directory / f"{example_details.label}.p" + assert path.exists() + with open(path, mode='rb') as f: + reloaded = pickle.load(f) + + assert reloaded == example_details + + + def test_load_task(self, task_store, example_details): + # note that this is dependent on store_task_details working, not + # quite true unit + task_store.store_task_details(example_details.label, + example_details) + task = task_store.load_task(example_details.label) + result = task() + assert not result.is_failure + assert result.main_result == 1.0 + assert result.label == "foo" + + def test_workflow(self, task_store, result_store, example_details): + # depends on store_task_details working + example_details_2 = ExampleTaskDetails( + label="bar", + input_result_labels={'x': "foo"}, + task_func=incr + ) + + # the order of storage shouldn't matter, so we intentionally store + # in the inverted order in this test + task_store.store_task_details(example_details_2.label, + example_details_2) + task_store.store_task_details(example_details.label, + example_details) + + # order in which we load them also doesn't matter -- only the order + # in which we run them does + task2 = task_store.load_task("bar") + task = task_store.load_task("foo") + + # manually do the work of the worker here + result1 = task() + assert not result_store.is_failure_result(result1) + result_store.store_result(result1) + result2 = task2() + assert not result_store.is_failure_result(result2) + + assert result2.main_result == 2.0 From 0e1831d53c4ad62247299bce4256a00983fbbe3e Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Wed, 19 Apr 2023 18:24:50 -0500 Subject: [PATCH 02/10] Add __init__, updates to models --- exorcist/__init__.py | 2 ++ exorcist/models.py | 28 +++++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) create mode 100644 exorcist/__init__.py diff --git a/exorcist/__init__.py b/exorcist/__init__.py new file mode 100644 index 0000000..5c51fa6 --- /dev/null +++ b/exorcist/__init__.py @@ -0,0 +1,2 @@ +from .taskdb import TaskStatusDB, NoStatusChange +from .models import TaskStatus, Task diff --git a/exorcist/models.py b/exorcist/models.py index 79f0891..d3374fc 100644 --- a/exorcist/models.py +++ b/exorcist/models.py @@ -1,23 +1,29 @@ from enum import Enum -from dataclasses import dataclass -from datetime import datetime +from typing import NamedTuple, TypeVar, Generic + +# generics: the actual types here depend on the client library +Result = TypeVar("Result") +TaskDetails = TypeVar("TaskDetails") class TaskStatus(Enum): + """ + Status of a given task. + """ BLOCKED = 0 AVAILABLE = 1 IN_PROGRESS = 2 RESULTS_READY = 3 COMPLETED = 99 + TOO_MANY_RETRIES = -2 ERROR = -1 -@dataclass -class Task: - taskid: str - taskfile: str - last_modified: datetime - n_retries: int +# TODO: it isn't entirely clear to me that this is needed, or that this is +# the right way to do it. but I wanted to capture the way to handle typing +# of something like this +class Task(NamedTuple, Generic[TaskDetails]): + """Generic to contain taskid and the client-specific TaskDetails. - @classmethod - def from_db_row(row): - ... + """ + taskid: str + task_details: TaskDetails From 6c1ae570569e701d80bf487977033d50f25915cb Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 20 Apr 2023 09:09:21 -0500 Subject: [PATCH 03/10] Add load_task_details --- exorcist/example_client.py | 6 +++++- exorcist/taskstore.py | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/exorcist/example_client.py b/exorcist/example_client.py index a6c5d83..e38d606 100644 --- a/exorcist/example_client.py +++ b/exorcist/example_client.py @@ -101,8 +101,12 @@ def store_task_details(self, taskid: str, with open(self.directory / f"{taskid}.p", mode='wb') as f: pickle.dump(task_details, f) - def load_task(self, taskid: str) -> Callable[[], ExampleResult]: + def load_task_details(self, taskid: str) -> ExampleTaskDetails: with open(self.directory / f"{taskid}.p", mode='rb') as f: task_details = pickle.load(f) + return task_details + + def load_task(self, taskid: str) -> Callable[[], ExampleResult]: + task_details = self.load_task_details(taskid) return partial(task_details.run_task, directory=self.directory) diff --git a/exorcist/taskstore.py b/exorcist/taskstore.py index af1b06f..0cc33c8 100644 --- a/exorcist/taskstore.py +++ b/exorcist/taskstore.py @@ -15,6 +15,11 @@ def store_task_details(self, taskid: str, task_details: TaskDetails): """ raise NotImplementedError() + @abc.abstractmethod + def load_task_details(self, taskid: str) -> TaskDetails: + """Load the task details from disk.""" + raise NotImplementedError() + @abc.abstractmethod def load_task(self, taskid: str) -> Callable[[], Result]: raise NotImplementedError() From 62b64bfee4af573a9886a49198410d3fe21fc358 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 20 Apr 2023 09:12:45 -0500 Subject: [PATCH 04/10] temporarily remove taskdb from main import this is a side-effect of cherry picking a change into here --- exorcist/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exorcist/__init__.py b/exorcist/__init__.py index 5c51fa6..621d53f 100644 --- a/exorcist/__init__.py +++ b/exorcist/__init__.py @@ -1,2 +1,2 @@ -from .taskdb import TaskStatusDB, NoStatusChange +# from .taskdb import TaskStatusDB, NoStatusChange from .models import TaskStatus, Task From 4bf75673127a37103684a487ef137a7480a44d32 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 20 Apr 2023 09:55:18 -0500 Subject: [PATCH 05/10] switch to run_task model --- exorcist/example_client.py | 5 ++--- exorcist/taskstore.py | 3 ++- exorcist/tests/test_example_client.py | 26 ++++---------------------- 3 files changed, 8 insertions(+), 26 deletions(-) diff --git a/exorcist/example_client.py b/exorcist/example_client.py index e38d606..b0f0fd3 100644 --- a/exorcist/example_client.py +++ b/exorcist/example_client.py @@ -107,6 +107,5 @@ def load_task_details(self, taskid: str) -> ExampleTaskDetails: return task_details - def load_task(self, taskid: str) -> Callable[[], ExampleResult]: - task_details = self.load_task_details(taskid) - return partial(task_details.run_task, directory=self.directory) + def run_task(self, task_details: ExampleTaskDetails) -> ExampleResult: + return task_details.run_task(directory=self.directory) diff --git a/exorcist/taskstore.py b/exorcist/taskstore.py index 0cc33c8..fd563b5 100644 --- a/exorcist/taskstore.py +++ b/exorcist/taskstore.py @@ -21,5 +21,6 @@ def load_task_details(self, taskid: str) -> TaskDetails: raise NotImplementedError() @abc.abstractmethod - def load_task(self, taskid: str) -> Callable[[], Result]: + def run_task(self, task_details: TaskDetails) -> Result: + """Run the task, based on the given details""" raise NotImplementedError() diff --git a/exorcist/tests/test_example_client.py b/exorcist/tests/test_example_client.py index d8a39e9..f2eb6d3 100644 --- a/exorcist/tests/test_example_client.py +++ b/exorcist/tests/test_example_client.py @@ -92,14 +92,8 @@ def test_store_task_details(self, task_store, example_details): assert reloaded == example_details - - def test_load_task(self, task_store, example_details): - # note that this is dependent on store_task_details working, not - # quite true unit - task_store.store_task_details(example_details.label, - example_details) - task = task_store.load_task(example_details.label) - result = task() + def test_run_task(self, task_store, example_details): + result = task_store.run_task(example_details) assert not result.is_failure assert result.main_result == 1.0 assert result.label == "foo" @@ -112,23 +106,11 @@ def test_workflow(self, task_store, result_store, example_details): task_func=incr ) - # the order of storage shouldn't matter, so we intentionally store - # in the inverted order in this test - task_store.store_task_details(example_details_2.label, - example_details_2) - task_store.store_task_details(example_details.label, - example_details) - - # order in which we load them also doesn't matter -- only the order - # in which we run them does - task2 = task_store.load_task("bar") - task = task_store.load_task("foo") - # manually do the work of the worker here - result1 = task() + result1 = task_store.run_task(example_details) assert not result_store.is_failure_result(result1) result_store.store_result(result1) - result2 = task2() + result2 = task_store.run_task(example_details_2) assert not result_store.is_failure_result(result2) assert result2.main_result == 2.0 From 940a673071f3e494a021a50e35b6c619c067bee7 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 20 Apr 2023 10:46:31 -0500 Subject: [PATCH 06/10] Use dataclass instead of NamedTuple Problem with second superclass for typing with NamedTuple --- exorcist/models.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/exorcist/models.py b/exorcist/models.py index d3374fc..7f1788d 100644 --- a/exorcist/models.py +++ b/exorcist/models.py @@ -1,5 +1,6 @@ from enum import Enum -from typing import NamedTuple, TypeVar, Generic +from typing import TypeVar, Generic +import dataclasses # generics: the actual types here depend on the client library Result = TypeVar("Result") @@ -21,7 +22,8 @@ class TaskStatus(Enum): # TODO: it isn't entirely clear to me that this is needed, or that this is # the right way to do it. but I wanted to capture the way to handle typing # of something like this -class Task(NamedTuple, Generic[TaskDetails]): +@dataclasses.dataclass +class Task(Generic[TaskDetails]): """Generic to contain taskid and the client-specific TaskDetails. """ From badcef8a11012783f1a130c17103dfb32989d61e Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 20 Apr 2023 10:56:39 -0500 Subject: [PATCH 07/10] Not allowed for another year. Wish it was 2024. --- exorcist/example_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exorcist/example_client.py b/exorcist/example_client.py index b0f0fd3..2f1b3be 100644 --- a/exorcist/example_client.py +++ b/exorcist/example_client.py @@ -16,7 +16,7 @@ from .resultstore import ResultStore from .taskstore import TaskDetailsStore -from typing import Callable +from typing import Callable, Union @dataclasses.dataclass class ExampleResult: @@ -26,7 +26,7 @@ class ExampleResult: dict with an expected structure. """ label: str - main_result: float | str + main_result: Union[float, str] is_failure: bool From 5776a2027ff42d2280bed08cc9f12a657ff4e71d Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 20 Apr 2023 11:50:21 -0500 Subject: [PATCH 08/10] Add coveragerc --- .coveragerc | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..99ca682 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,6 @@ +[report] +exclude_lines = + pragma: no cover + -no-cov- + raise NotImplementedError + \.\.\. From 28dd7c8cd806d044051b7c78fc3817e9a25c5e82 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 20 Apr 2023 12:03:33 -0500 Subject: [PATCH 09/10] Add a few more tests --- exorcist/tests/test_example_client.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/exorcist/tests/test_example_client.py b/exorcist/tests/test_example_client.py index f2eb6d3..2e57d27 100644 --- a/exorcist/tests/test_example_client.py +++ b/exorcist/tests/test_example_client.py @@ -82,14 +82,13 @@ def test_store_result(self, result_store, request, fixture): class TestExampleTaskDetailsStore: - def test_store_task_details(self, task_store, example_details): + def test_store_load_task_details_cycle(self, task_store, + example_details): task_store.store_task_details(example_details.label, example_details) path = task_store.directory / f"{example_details.label}.p" assert path.exists() - with open(path, mode='rb') as f: - reloaded = pickle.load(f) - + reloaded = task_store.load_task_details(example_details.label) assert reloaded == example_details def test_run_task(self, task_store, example_details): @@ -98,6 +97,17 @@ def test_run_task(self, task_store, example_details): assert result.main_result == 1.0 assert result.label == "foo" + def test_run_task_failing_result(self, task_store): + details = ExampleTaskDetails( + label="baz", + input_result_labels={}, + task_func=failure_func + ) + result = task_store.run_task(details) + assert result.is_failure + assert result.main_result == "float division by zero" + assert result.label == "baz" + def test_workflow(self, task_store, result_store, example_details): # depends on store_task_details working example_details_2 = ExampleTaskDetails( From 4eb36489be7d03336f992fc3882019771089fc74 Mon Sep 17 00:00:00 2001 From: "David W.H. Swenson" Date: Thu, 20 Apr 2023 12:18:20 -0500 Subject: [PATCH 10/10] Remove unneeded pytest placeholder --- exorcist/tests/test_nothing.py | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 exorcist/tests/test_nothing.py diff --git a/exorcist/tests/test_nothing.py b/exorcist/tests/test_nothing.py deleted file mode 100644 index 99bb93d..0000000 --- a/exorcist/tests/test_nothing.py +++ /dev/null @@ -1,4 +0,0 @@ -import exorcist - -def test_temporary_to_make_pytest_pass(): - ...