Daniel Tisdall


software

Primary Backup Replication

The book Replication, Theory and Practice from 2010 contains lecture notes on state replication. Here is a TLA+ model of what is probably the simplest algorithm from the book used to replicate an object in an asynchronous message passing model with crash stop failures. The communication is FIFO and reliable (up to crashes). A prerequisite for the algorithm is a perfect failure detector. The algorithm is called Primary Backup.

Algorithm

A client writes or queries an object by calling to a primary process. The primary process syncs the call to other (replica) processes, and returns to the client only once responses from all replicas have been received. In the case of failure, the new primary is chosen simply based on next highest ID.

The description in the book uses an object which is a complete history of operations, but we model a one-shot integer write for simplicity.

Code

VARIABLES
(*Meta*)
    \* @type: Str;
    action,
(*Global*)
    \* @type: Int;
    nextGUID,
    \* @type: PID -> Bool;
    crashed,
    \* @type: <<PID, PID>> -> Seq(MSG);
    fifo,
(*Local*)
    \* @type: PID -> Set(PID);
    servers,
    \* @type: PID -> Set(RESPONSE);
    responses,
    \* @type: PID -> Int;
    value,
    \* @type: Int;
    clientPrimary


SERVERS == 1..3
CLIENT == 0

Init == 
    /\ action = "init"
    /\ nextGUID = 0
    /\ crashed = [p \in SERVERS |-> FALSE]
    /\ fifo = [p \in ((SERVERS \cup {CLIENT}) \X (SERVERS \cup {CLIENT})) |-> <<>>]
    /\ servers = [p \in (SERVERS \cup {CLIENT}) |-> SERVERS]
    /\ responses = [p \in (SERVERS \cup {CLIENT}) |-> {}]
    /\ value = [p \in (SERVERS \cup {CLIENT}) |-> NullInt]
    /\ clientPrimary = NullInt

Fail ==
    \E p, pCorrect \in SERVERS: 
    /\ p # pCorrect
    (*Always leave at least 1 correct process remaining*)
    /\ ~crashed[pCorrect]
    /\ ~crashed[p]
    /\ UNCHANGED nextGUID
    /\ crashed' = [crashed EXCEPT ![p] = TRUE]
    /\ UNCHANGED fifo
    /\ UNCHANGED servers
    /\ UNCHANGED responses
    /\ UNCHANGED value
    /\ UNCHANGED clientPrimary

NotifyFail ==
    \E p \in SERVERS \cup {CLIENT}, pCrashed \in SERVERS:
    /\ p # CLIENT => ~crashed[p]
    /\ p # pCrashed
    /\ crashed[pCrashed]
    /\ pCrashed \in servers[p]
    /\ UNCHANGED nextGUID
    /\ UNCHANGED crashed
    /\ UNCHANGED fifo
    /\ servers' = [servers EXCEPT ![p] = @ \ {pCrashed}]
    /\ UNCHANGED responses
    /\ UNCHANGED value
    /\ UNCHANGED clientPrimary

ReceiveResponse == 
    \E p \in SERVERS \cup {CLIENT}, other \in SERVERS:
    /\ p # CLIENT => ~crashed[p]
    /\ p # other
    /\ 0 < Len(fifo[other, p])
    /\ LET m == Head(fifo[other, p]) IN
        /\ m.nature = _response
        /\ UNCHANGED nextGUID
        /\ UNCHANGED crashed
        /\ fifo' = [ fifo EXCEPT ![other, p] = Tail(@)]
        /\ UNCHANGED servers
        /\ responses' = [responses EXCEPT ![p] = @ \cup {[m|->m, src|->other]}]
        /\ UNCHANGED value
        /\ UNCHANGED clientPrimary
    
ReceiveSync ==
    \E p, primary \in SERVERS:
    /\ ~crashed[p]
    /\ p # primary
    /\ 0 < Len(fifo[primary, p])
    /\ LET m == Head(fifo[primary, p]) IN
        /\ m.nature = _sync
        /\ UNCHANGED nextGUID
        /\ UNCHANGED crashed
        /\ fifo' = [
                fifo EXCEPT
                ![primary, p] = Tail(@),
                ![p, primary] = 
                    CASE primary \in servers[p] -> @ \o <<response(m.guid, m.value)>>
                      [] OTHER                  -> @
            ]
        /\ servers' = [servers EXCEPT ![p] = {e \in @ : primary <= e} ]
        /\ UNCHANGED responses
        /\ value' = [value EXCEPT
                ![p] =
                    CASE primary \in servers[p] -> m.value
                      [] OTHER                  -> @
            ]
        /\ UNCHANGED clientPrimary

PrimaryBeginUpdate ==
    \E p \in SERVERS:
    /\ ~crashed[p]
    /\ 0 < Len(fifo[CLIENT, p])
    /\ LET m == Head(fifo[CLIENT, p]) IN
        /\ m.nature = _updateHist
        /\ nextGUID' = nextGUID + 1
        /\ UNCHANGED crashed
        /\ fifo' =
            LET
            v == IF value[p] = NullInt THEN m.value ELSE value[p]
            IN
            [
                pair \in ({p} \X (servers[p] \ {p})) |-> fifo[pair] \o <<sync(nextGUID, v)>>
            ] @@ 
            [fifo EXCEPT ![CLIENT, p] = Tail(@)]
        /\ servers' = [servers EXCEPT ![p] = {e \in @ : p <= e}]
        (*Add own response (optimisation)*)
        /\ responses' = [responses EXCEPT ![p] = @ \cup {[m|->m, src|->p]}]
        /\ UNCHANGED value
        /\ UNCHANGED clientPrimary

PrimaryCompleteUpdate ==
    \E p \in SERVERS:
    \E r \in responses[p]:
    /\ ~crashed[p]
    /\ \A other \in servers[p] : (\E r_ \in responses[p] : r_.src = other /\ r_.m.guid = r.m.guid)
    /\ UNCHANGED nextGUID
    /\ UNCHANGED crashed
    /\ fifo' = [fifo EXCEPT ![p, CLIENT] = @ \o <<response(r.m.guid, r.m.value)>>]
    /\ UNCHANGED servers
    /\ responses' = [responses EXCEPT ![p] = @ \ {e \in @ : e.m.guid = r.m.guid}]
    /\ value' = [value EXCEPT ![p] = r.m.value]
    /\ UNCHANGED clientPrimary

ClientSend == 
    LET primary == CHOOSE e \in servers[CLIENT] : (\A x \in servers[CLIENT] : e <= x) IN
    /\ clientPrimary = NullInt
    /\ UNCHANGED nextGUID
    /\ UNCHANGED crashed
    /\ fifo' = [fifo EXCEPT ![CLIENT, primary] = @ \o <<updateHist(TargetValue)>>]
    /\ UNCHANGED servers
    /\ UNCHANGED responses
    /\ UNCHANGED value
    /\ clientPrimary' = primary

ClientGiveup == 
    /\ clientPrimary # NullInt
    (*We model one shot, if the client has succeeded, it will not restart.*)
    /\ clientPrimary # 777
    /\ clientPrimary \notin servers[CLIENT]
    /\ UNCHANGED nextGUID
    /\ UNCHANGED crashed
    /\ UNCHANGED fifo
    /\ UNCHANGED servers
    /\ UNCHANGED responses
    /\ UNCHANGED value
    /\ clientPrimary' = NullInt

ClientSucceed == 
    \E r \in responses[CLIENT]:
    /\ UNCHANGED nextGUID
    /\ UNCHANGED crashed
    /\ UNCHANGED fifo
    /\ UNCHANGED servers
    /\ UNCHANGED responses
    /\ value' = [value EXCEPT ![CLIENT] = r.m.value]
    /\ clientPrimary' = 777

Next ==
    \/ /\ Fail
       /\ action' = "fail"
    \/ /\ NotifyFail
       /\ action' = "notifyFail"
    \/ /\ ReceiveResponse
       /\ action' = "receiveResponse"
    \/ /\ ReceiveSync
       /\ action' = "receiveSync"
    \/ /\ PrimaryBeginUpdate
       /\ action' = "primaryBeginUpdate"
    \/ /\ PrimaryCompleteUpdate
       /\ action' = "primaryCompleteUpdate"
    \/ /\ ClientSend
       /\ action' = "clientSend"
    \/ /\ ClientGiveup
       /\ action' = "clientGiveup"
    \/ /\ ClientSucceed
       /\ action' = "clientSucceed"

Agreement ==
    clientPrimary = 777 => 
        /\ \A p \in SERVERS: crashed[p] \/ value[p] = TargetValue
        /\ value[CLIENT] = TargetValue