Skip to content

Commit

Permalink
First commit (#1)
Browse files Browse the repository at this point in the history
* First commit

* doc(readme): add usage docs

* fix(cleanup)

* feat(fail): added failure injector

* Added fault injection, recovery, and decoupled non-atomic states

* feat(p): add eDBWrite

* feat(safety): expanded safety spec to handle multiple tasks

* style(refactor)

* feat(p): cleanup

---------

Co-authored-by: Gabriel Guerra <[email protected]>
  • Loading branch information
guergabo and Gabriel Guerra authored Jan 22, 2024
1 parent 8efeba0 commit ad9fda0
Show file tree
Hide file tree
Showing 11 changed files with 643 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PCheckerOutput/
PGenerated/
107 changes: 107 additions & 0 deletions PSpec/ResonateWorkerCorrect.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*****************************************************
This file defines two P specifications
1. Safety
2. Liveness
*****************************************************/


/****************************************************
Checks the global invariant that the response to a task request is always correct and there is no error on the
server side with the implementation of the task logic.
2 invariants:
1. A task is completed only once.
2. Only one worker can claim a task at a time and they must claim it with the latest taskId and counter.
****************************************************/

type tRecord = (isComplete: bool, electedWorker: any, currCounter: int);

spec GuaranteedServerCorrectness observes eDBWrite, eTaskTimeOut, eClaimTaskResp, ePromisePending, ePromiseResolved, ePromiseRejected {
var registry: map[int, tRecord];

start state Init {
entry {
goto WaitForEvents;
}
}

state WaitForEvents {
/* Invariant: a task is completed only once. */

on ePromisePending do (taskId: int) {
assert (taskId in registry == false);

registry[taskId] = (isComplete = false, electedWorker = null, currCounter = 0);
}

on ePromiseResolved do (taskId: int) {
assert (taskId in registry);
assert (registry[taskId].isComplete == false);

registry[taskId].isComplete = true;
}

on ePromiseRejected do (taskId: int) {
assert (taskId in registry);
assert (registry[taskId].isComplete == false);

registry[taskId].isComplete = true;
}

/* Invariant: only one worker can claim a task. */

// Database is the source of truth for the current task id and counter.
on eDBWrite do (req: tDBWrite) {
assert (req.taskId in registry);

registry[req.taskId].electedWorker = req.worker;
registry[req.taskId].currCounter = req.counter;
}

// If the task timesout, it should reset the elected worker.
on eTaskTimeOut do (taskId: int) {
registry[taskId].electedWorker = null;
}

on eClaimTaskResp do (resp: tClaimTaskResp) {
assert (resp.status == CLAIM_SUCCESS || resp.status == CLAIM_ERROR);

if (resp.status == CLAIM_SUCCESS) {
assert (resp.taskId in registry);
assert (resp.counter == registry[resp.taskId].currCounter);

if (registry[resp.taskId].electedWorker == null) {
registry[resp.taskId].electedWorker = resp.worker;
} else {
assert (registry[resp.taskId].electedWorker == resp.worker);
}
}

if (resp.status == CLAIM_ERROR) {
if (resp.taskId in registry && registry[resp.taskId].isComplete == false) {
assert (resp.worker != registry[resp.taskId].electedWorker || resp.counter != registry[resp.taskId].currCounter);
}
}
}
}
}

/**************************************************************************
GuaranteedTaskProgress checks the global liveness (or progress) property that for every
eTaskPending raised a corresponding eTaskResolved or eTaskRejected eventually follows
***************************************************************************/
spec GuaranteedTaskProgress observes ePromisePending, ePromiseResolved, ePromiseRejected {
start state Init {
on ePromisePending goto Pending;
}

// Eventually you want to leave the hot state and go to a cold state.
hot state Pending {
on ePromiseResolved goto Resolved;
on ePromiseRejected goto Rejected;
}

cold state Resolved {}

cold state Rejected {}
}
52 changes: 52 additions & 0 deletions PSrc/FailureInjector.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/* User Defined Events */

// event: event sent by the failure injector to delay a node failure
event eDelayNodeFailure;
// event: event sent by the failure injector to shutdown a node
event eShutDown: machine;

/*****************************************************************************************
The failure injector machine randomly selects a replica machine and enqueues the special event "halt".
******************************************************************************************/
machine FailureInjector {
var nFailures: int;
var nodes: set[machine];

start state Init {
entry (config: (nodes: set[machine], nFailures: int)) {
nFailures = config.nFailures;
nodes = config.nodes;
assert nFailures <= sizeof(nodes); // can be equal since we restart not halt
goto FailOneNode;
}
}

state FailOneNode {
entry {
var fail: machine;

if(nFailures == 0)
raise halt; // done with all failures
else
{
if($)
{
fail = choose(nodes);
send fail, eShutDown, fail;
nodes -= (fail);
nFailures = nFailures - 1;
}
else {
send this, eDelayNodeFailure;
}
}
}

on eDelayNodeFailure goto FailOneNode;
}
}

// Function to create the failure injection.
fun CreateFailureInjector(config: (nodes: set[machine], nFailures: int)) {
new FailureInjector(config);
}
Loading

0 comments on commit ad9fda0

Please sign in to comment.