linux/tools/memory-model/linux-kernel.cat
Jonas Oberhauser 762e9357e7 tools/memory-model: Make ppo a subrelation of po
As stated in the documentation and implied by its name, the ppo
(preserved program order) relation is intended to link po-earlier
to po-later instructions under certain conditions.  However, a
corner case currently allows instructions to be linked by ppo that
are not executed by the same thread, i.e., instructions are being
linked that have no po relation.

This happens due to the mb/strong-fence/fence relations, which (as
one case) provide order when locks are passed between threads
followed by an smp_mb__after_unlock_lock() fence.  This is
illustrated in the following litmus test (as can be seen when using
herd7 with `doshow ppo`):

P0(spinlock_t *x, spinlock_t *y)
{
    spin_lock(x);
    spin_unlock(x);
}

P1(spinlock_t *x, spinlock_t *y)
{
    spin_lock(x);
    smp_mb__after_unlock_lock();
    *y = 1;
}

The ppo relation will link P0's spin_lock(x) and P1's *y=1, because
P0 passes a lock to P1 which then uses this fence.

The patch makes ppo a subrelation of po by letting fence contribute
to ppo only in case the fence links events of the same thread.

Signed-off-by: Jonas Oberhauser <jonas.oberhauser@huaweicloud.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Andrea Parri <parri.andrea@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2023-03-22 12:02:21 -07:00

217 lines
7.8 KiB
Plaintext

// SPDX-License-Identifier: GPL-2.0+
(*
* Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
* Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
* Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
* Andrea Parri <parri.andrea@gmail.com>
*
* An earlier version of this file appeared in the companion webpage for
* "Frightening small children and disconcerting grown-ups: Concurrency
* in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
* which appeared in ASPLOS 2018.
*)
"Linux-kernel memory consistency model"
(*
* File "lock.cat" handles locks and is experimental.
* It can be replaced by include "cos.cat" for tests that do not use locks.
*)
include "lock.cat"
(*******************)
(* Basic relations *)
(*******************)
(* Release Acquire *)
let acq-po = [Acquire] ; po ; [M]
let po-rel = [M] ; po ; [Release]
let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po
(* Fences *)
let R4rmb = R \ Noreturn (* Reads for which rmb works *)
let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
let wmb = [W] ; fencerel(Wmb) ; [W]
let mb = ([M] ; fencerel(Mb) ; [M]) |
([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
(*
* Note: The po-unlock-lock-po relation only passes the lock to the direct
* successor, perhaps giving the impression that the ordering of the
* smp_mb__after_unlock_lock() fence only affects a single lock handover.
* However, in a longer sequence of lock handovers, the implicit
* A-cumulative release fences of lock-release ensure that any stores that
* propagate to one of the involved CPUs before it hands over the lock to
* the next CPU will also propagate to the final CPU handing over the lock
* to the CPU that executes the fence. Therefore, all those stores are
* also affected by the fence.
*)
([M] ; po-unlock-lock-po ;
[After-unlock-lock] ; po ; [M]) |
([M] ; po? ; [Srcu-unlock] ; fencerel(After-srcu-read-unlock) ; [M])
let gp = po ; [Sync-rcu | Sync-srcu] ; po?
let strong-fence = mb | gp
let nonrw-fence = strong-fence | po-rel | acq-po
let fence = nonrw-fence | wmb | rmb
let barrier = fencerel(Barrier | Rmb | Wmb | Mb | Sync-rcu | Sync-srcu |
Before-atomic | After-atomic | Acquire | Release |
Rcu-lock | Rcu-unlock | Srcu-lock | Srcu-unlock) |
(po ; [Release]) | ([Acquire] ; po)
(**********************************)
(* Fundamental coherence ordering *)
(**********************************)
(* Sequential Consistency Per Variable *)
let com = rf | co | fr
acyclic po-loc | com as coherence
(* Atomic Read-Modify-Write *)
empty rmw & (fre ; coe) as atomic
(**********************************)
(* Instruction execution ordering *)
(**********************************)
(* Preserved Program Order *)
let dep = addr | data
let rwdep = (dep | ctrl) ; [W]
let overwrite = co | fr
let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
let to-r = (addr ; [R]) | (dep ; [Marked] ; rfi)
let ppo = to-r | to-w | (fence & int) | (po-unlock-lock-po & int)
(* Propagation: Ordering from release operations and strong fences. *)
let A-cumul(r) = (rfe ; [Marked])? ; r
let rmw-sequence = (rf ; rmw)*
let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
po-unlock-lock-po) ; [Marked] ; rmw-sequence
let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
[Marked] ; rfe? ; [Marked]
(*
* Happens Before: Ordering from the passage of time.
* No fences needed here for prop because relation confined to one process.
*)
let hb = [Marked] ; (ppo | rfe | ((prop \ id) & int)) ; [Marked]
acyclic hb as happens-before
(****************************************)
(* Write and fence propagation ordering *)
(****************************************)
(* Propagation: Each non-rf link needs a strong fence. *)
let pb = prop ; strong-fence ; hb* ; [Marked]
acyclic pb as propagation
(*******)
(* RCU *)
(*******)
(*
* Effects of read-side critical sections proceed from the rcu_read_unlock()
* or srcu_read_unlock() backwards on the one hand, and from the
* rcu_read_lock() or srcu_read_lock() forwards on the other hand.
*
* In the definition of rcu-fence below, the po term at the left-hand side
* of each disjunct and the po? term at the right-hand end have been factored
* out. They have been moved into the definitions of rcu-link and rb.
* This was necessary in order to apply the "& loc" tests correctly.
*)
let rcu-gp = [Sync-rcu] (* Compare with gp *)
let srcu-gp = [Sync-srcu]
let rcu-rscsi = rcu-rscs^-1
let srcu-rscsi = srcu-rscs^-1
(*
* The synchronize_rcu() strong fence is special in that it can order not
* one but two non-rf relations, but only in conjunction with an RCU
* read-side critical section.
*)
let rcu-link = po? ; hb* ; pb* ; prop ; po
(*
* Any sequence containing at least as many grace periods as RCU read-side
* critical sections (joined by rcu-link) induces order like a generalized
* inter-CPU strong fence.
* Likewise for SRCU grace periods and read-side critical sections, provided
* the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
* struct srcu_struct location.
*)
let rec rcu-order = rcu-gp | srcu-gp |
(rcu-gp ; rcu-link ; rcu-rscsi) |
((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
(rcu-rscsi ; rcu-link ; rcu-gp) |
((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
(rcu-gp ; rcu-link ; rcu-order ; rcu-link ; rcu-rscsi) |
((srcu-gp ; rcu-link ; rcu-order ; rcu-link ; srcu-rscsi) & loc) |
(rcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; rcu-gp) |
((srcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; srcu-gp) & loc) |
(rcu-order ; rcu-link ; rcu-order)
let rcu-fence = po ; rcu-order ; po?
let fence = fence | rcu-fence
let strong-fence = strong-fence | rcu-fence
(* rb orders instructions just as pb does *)
let rb = prop ; rcu-fence ; hb* ; pb* ; [Marked]
irreflexive rb as rcu
(*
* The happens-before, propagation, and rcu constraints are all
* expressions of temporal ordering. They could be replaced by
* a single constraint on an "executes-before" relation, xb:
*
* let xb = hb | pb | rb
* acyclic xb as executes-before
*)
(*********************************)
(* Plain accesses and data races *)
(*********************************)
(* Warn about plain writes and marked accesses in the same region *)
let mixed-accesses = ([Plain & W] ; (po-loc \ barrier) ; [Marked]) |
([Marked] ; (po-loc \ barrier) ; [Plain & W])
flag ~empty mixed-accesses as mixed-accesses
(* Executes-before and visibility *)
let xbstar = (hb | pb | rb)*
let vis = cumul-fence* ; rfe? ; [Marked] ;
((strong-fence ; [Marked] ; xbstar) | (xbstar & int))
(* Boundaries for lifetimes of plain accesses *)
let w-pre-bounded = [Marked] ; (addr | fence)?
let r-pre-bounded = [Marked] ; (addr | nonrw-fence |
([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))?
let w-post-bounded = fence? ; [Marked] ; rmw-sequence
let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ;
[Marked]
(* Visibility and executes-before for plain accesses *)
let ww-vis = fence | (strong-fence ; xbstar ; w-pre-bounded) |
(w-post-bounded ; vis ; w-pre-bounded)
let wr-vis = fence | (strong-fence ; xbstar ; r-pre-bounded) |
(w-post-bounded ; vis ; r-pre-bounded)
let rw-xbstar = fence | (r-post-bounded ; xbstar ; w-pre-bounded)
(* Potential races *)
let pre-race = ext & ((Plain * M) | ((M \ IW) * Plain))
(* Coherence requirements for plain accesses *)
let wr-incoh = pre-race & rf & rw-xbstar^-1
let rw-incoh = pre-race & fr & wr-vis^-1
let ww-incoh = pre-race & co & ww-vis^-1
empty (wr-incoh | rw-incoh | ww-incoh) as plain-coherence
(* Actual races *)
let ww-nonrace = ww-vis & ((Marked * W) | rw-xbstar) & ((W * Marked) | wr-vis)
let ww-race = (pre-race & co) \ ww-nonrace
let wr-race = (pre-race & (co? ; rf)) \ wr-vis \ rw-xbstar^-1
let rw-race = (pre-race & fr) \ rw-xbstar
flag ~empty (ww-race | wr-race | rw-race) as data-race