From 10f42d244b4ae621c786ab5d1da9bbd47bb3ad42 Mon Sep 17 00:00:00 2001 From: Matt Macy Date: Sat, 3 Nov 2018 03:43:32 +0000 Subject: [PATCH] Convert epoch to read / write records per cpu In discussing D17503 "Run epoch calls sooner and more reliably" with sbahra@ we came to the conclusion that epoch is currently misusing the ck_epoch API. It isn't safe to do a "write side" operation (ck_epoch_call or ck_epoch_poll) in the middle of a "read side" section. Since, by definition, it's possible to be preempted during the middle of an EPOCH_PREEMPT epoch the GC task might call ck_epoch_poll or another thread might call ck_epoch_call on the same section. The right solution is ultimately to change the way that ck_epoch works for this use case. However, as a stopgap for 12 we agreed to simply have separate records for each use case. Tested by: pho@ MFC after: 3 days --- sys/kern/subr_epoch.c | 9 +++++---- sys/sys/epoch_private.h | 11 ++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/sys/kern/subr_epoch.c b/sys/kern/subr_epoch.c index 3dcc3c197efc..a63f669fea75 100644 --- a/sys/kern/subr_epoch.c +++ b/sys/kern/subr_epoch.c @@ -150,7 +150,8 @@ epoch_ctor(epoch_t epoch) CPU_FOREACH(cpu) { er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu); bzero(er, sizeof(*er)); - ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL); + ck_epoch_register(&epoch->e_epoch, &er->er_read_record, NULL); + ck_epoch_register(&epoch->e_epoch, &er->er_write_record, NULL); TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist); er->er_cpuid = cpu; } @@ -235,7 +236,7 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t int spincount, gen; int locksheld __unused; - record = __containerof(cr, struct epoch_record, er_record); + record = __containerof(cr, struct epoch_record, er_read_record); td = curthread; locksheld = td->td_locks; spincount = 0; @@ -461,7 +462,7 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t critical_enter(); *DPCPU_PTR(epoch_cb_count) += 1; er = epoch_currecord(epoch); - ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback); + ck_epoch_call(&er->er_write_record, cb, (ck_epoch_cb_t *)callback); critical_exit(); return; boottime: @@ -485,7 +486,7 @@ epoch_call_task(void *arg __unused) if (__predict_false((epoch = allepochs[i]) == NULL)) continue; er = epoch_currecord(epoch); - record = &er->er_record; + record = &er->er_write_record; if ((npending = record->n_pending) == 0) continue; ck_epoch_poll_deferred(record, &cb_stack); diff --git a/sys/sys/epoch_private.h b/sys/sys/epoch_private.h index ff16fe7a4983..f475ca3aa200 100644 --- a/sys/sys/epoch_private.h +++ b/sys/sys/epoch_private.h @@ -89,7 +89,8 @@ typedef struct epoch_thread { TAILQ_HEAD (epoch_tdlist, epoch_thread); typedef struct epoch_record { - ck_epoch_record_t er_record; + ck_epoch_record_t er_read_record; + ck_epoch_record_t er_write_record; volatile struct epoch_tdlist er_tdlist; volatile uint32_t er_gen; uint32_t er_cpuid; @@ -138,7 +139,7 @@ epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et) td->td_pre_epoch_prio = td->td_priority; er = epoch_currecord(epoch); TAILQ_INSERT_TAIL(&er->er_tdlist, etd, et_link); - ck_epoch_begin(&er->er_record, (ck_epoch_section_t *)&etd->et_section); + ck_epoch_begin(&er->er_read_record, (ck_epoch_section_t *)&etd->et_section); critical_exit_sa(td); } @@ -155,7 +156,7 @@ epoch_enter(epoch_t epoch) td->td_epochnest++; critical_enter_sa(td); er = epoch_currecord(epoch); - ck_epoch_begin(&er->er_record, NULL); + ck_epoch_begin(&er->er_read_record, NULL); } static __inline void @@ -183,7 +184,7 @@ epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et) etd->et_magic_post = 0; #endif etd->et_td = (void*)0xDEADBEEF; - ck_epoch_end(&er->er_record, + ck_epoch_end(&er->er_read_record, (ck_epoch_section_t *)&etd->et_section); TAILQ_REMOVE(&er->er_tdlist, etd, et_link); er->er_gen++; @@ -203,7 +204,7 @@ epoch_exit(epoch_t epoch) MPASS(td->td_epochnest); td->td_epochnest--; er = epoch_currecord(epoch); - ck_epoch_end(&er->er_record, NULL); + ck_epoch_end(&er->er_read_record, NULL); critical_exit_sa(td); } #endif /* _KERNEL */