-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* First commit * doc(readme): add usage docs * fix(cleanup) * feat(fail): added failure injector * Added fault injection, recovery, and decoupled non-atomic states * feat(p): add eDBWrite * feat(safety): expanded safety spec to handle multiple tasks * style(refactor) * feat(p): cleanup --------- Co-authored-by: Gabriel Guerra <[email protected]>
- Loading branch information
Showing
11 changed files
with
643 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
PCheckerOutput/ | ||
PGenerated/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/***************************************************** | ||
This file defines two P specifications | ||
1. Safety | ||
2. Liveness | ||
*****************************************************/ | ||
|
||
|
||
/**************************************************** | ||
Checks the global invariant that the response to a task request is always correct and there is no error on the | ||
server side with the implementation of the task logic. | ||
2 invariants: | ||
1. A task is completed only once. | ||
2. Only one worker can claim a task at a time and they must claim it with the latest taskId and counter. | ||
****************************************************/ | ||
|
||
type tRecord = (isComplete: bool, electedWorker: any, currCounter: int); | ||
|
||
spec GuaranteedServerCorrectness observes eDBWrite, eTaskTimeOut, eClaimTaskResp, ePromisePending, ePromiseResolved, ePromiseRejected { | ||
var registry: map[int, tRecord]; | ||
|
||
start state Init { | ||
entry { | ||
goto WaitForEvents; | ||
} | ||
} | ||
|
||
state WaitForEvents { | ||
/* Invariant: a task is completed only once. */ | ||
|
||
on ePromisePending do (taskId: int) { | ||
assert (taskId in registry == false); | ||
|
||
registry[taskId] = (isComplete = false, electedWorker = null, currCounter = 0); | ||
} | ||
|
||
on ePromiseResolved do (taskId: int) { | ||
assert (taskId in registry); | ||
assert (registry[taskId].isComplete == false); | ||
|
||
registry[taskId].isComplete = true; | ||
} | ||
|
||
on ePromiseRejected do (taskId: int) { | ||
assert (taskId in registry); | ||
assert (registry[taskId].isComplete == false); | ||
|
||
registry[taskId].isComplete = true; | ||
} | ||
|
||
/* Invariant: only one worker can claim a task. */ | ||
|
||
// Database is the source of truth for the current task id and counter. | ||
on eDBWrite do (req: tDBWrite) { | ||
assert (req.taskId in registry); | ||
|
||
registry[req.taskId].electedWorker = req.worker; | ||
registry[req.taskId].currCounter = req.counter; | ||
} | ||
|
||
// If the task timesout, it should reset the elected worker. | ||
on eTaskTimeOut do (taskId: int) { | ||
registry[taskId].electedWorker = null; | ||
} | ||
|
||
on eClaimTaskResp do (resp: tClaimTaskResp) { | ||
assert (resp.status == CLAIM_SUCCESS || resp.status == CLAIM_ERROR); | ||
|
||
if (resp.status == CLAIM_SUCCESS) { | ||
assert (resp.taskId in registry); | ||
assert (resp.counter == registry[resp.taskId].currCounter); | ||
|
||
if (registry[resp.taskId].electedWorker == null) { | ||
registry[resp.taskId].electedWorker = resp.worker; | ||
} else { | ||
assert (registry[resp.taskId].electedWorker == resp.worker); | ||
} | ||
} | ||
|
||
if (resp.status == CLAIM_ERROR) { | ||
if (resp.taskId in registry && registry[resp.taskId].isComplete == false) { | ||
assert (resp.worker != registry[resp.taskId].electedWorker || resp.counter != registry[resp.taskId].currCounter); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
/************************************************************************** | ||
GuaranteedTaskProgress checks the global liveness (or progress) property that for every | ||
eTaskPending raised a corresponding eTaskResolved or eTaskRejected eventually follows | ||
***************************************************************************/ | ||
spec GuaranteedTaskProgress observes ePromisePending, ePromiseResolved, ePromiseRejected { | ||
start state Init { | ||
on ePromisePending goto Pending; | ||
} | ||
|
||
// Eventually you want to leave the hot state and go to a cold state. | ||
hot state Pending { | ||
on ePromiseResolved goto Resolved; | ||
on ePromiseRejected goto Rejected; | ||
} | ||
|
||
cold state Resolved {} | ||
|
||
cold state Rejected {} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/* User Defined Events */ | ||
|
||
// event: event sent by the failure injector to delay a node failure | ||
event eDelayNodeFailure; | ||
// event: event sent by the failure injector to shutdown a node | ||
event eShutDown: machine; | ||
|
||
/***************************************************************************************** | ||
The failure injector machine randomly selects a replica machine and enqueues the special event "halt". | ||
******************************************************************************************/ | ||
machine FailureInjector { | ||
var nFailures: int; | ||
var nodes: set[machine]; | ||
|
||
start state Init { | ||
entry (config: (nodes: set[machine], nFailures: int)) { | ||
nFailures = config.nFailures; | ||
nodes = config.nodes; | ||
assert nFailures <= sizeof(nodes); // can be equal since we restart not halt | ||
goto FailOneNode; | ||
} | ||
} | ||
|
||
state FailOneNode { | ||
entry { | ||
var fail: machine; | ||
|
||
if(nFailures == 0) | ||
raise halt; // done with all failures | ||
else | ||
{ | ||
if($) | ||
{ | ||
fail = choose(nodes); | ||
send fail, eShutDown, fail; | ||
nodes -= (fail); | ||
nFailures = nFailures - 1; | ||
} | ||
else { | ||
send this, eDelayNodeFailure; | ||
} | ||
} | ||
} | ||
|
||
on eDelayNodeFailure goto FailOneNode; | ||
} | ||
} | ||
|
||
// Function to create the failure injection. | ||
fun CreateFailureInjector(config: (nodes: set[machine], nFailures: int)) { | ||
new FailureInjector(config); | ||
} |
Oops, something went wrong.