gru: fix prefetch and speculation bugs

Fix several bugs related to prefetch, ordering & speculation:

	- GRU cch_allocate() instruction causes cacheable memory
	  to be created. Add a barriers to prevent speculation
	  from prefetching data before it exists.
	- Add memory barriers before cache-flush instructions to ensure
	  that previously stored data is included in the line flushed to memory.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Jack Steiner 2009-12-15 16:48:11 -08:00 committed by Linus Torvalds
parent e006043a4d
commit 67bf04a5c2
5 changed files with 38 additions and 6 deletions

View file

@ -325,6 +325,7 @@ static inline void gru_flush_cache(void *p)
static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
{
gru_ordered_store_int(ins, op32);
mb();
gru_flush_cache(ins);
}

View file

@ -333,6 +333,7 @@ static int gru_try_dropin(struct gru_thread_state *gts,
*/
if (tfh->status != TFHSTATUS_EXCEPTION) {
gru_flush_cache(tfh);
sync_core();
if (tfh->status != TFHSTATUS_EXCEPTION)
goto failnoexception;
STAT(tfh_stale_on_fault);
@ -599,6 +600,7 @@ int gru_get_exception_detail(unsigned long arg)
cbrnum = thread_cbr_number(gts, ucbnum);
cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
gru_flush_cache(cbe); /* CBE not coherent */
sync_core(); /* make sure we are have current data */
excdet.opc = cbe->opccpy;
excdet.exopc = cbe->exopccpy;
excdet.ecause = cbe->ecause;

View file

@ -91,9 +91,18 @@ static int wait_instruction_complete(void *h, enum mcs_op opc)
int cch_allocate(struct gru_context_configuration_handle *cch)
{
int ret;
cch->opc = CCHOP_ALLOCATE;
start_instruction(cch);
return wait_instruction_complete(cch, cchop_allocate);
ret = wait_instruction_complete(cch, cchop_allocate);
/*
* Stop speculation into the GSEG being mapped by the previous ALLOCATE.
* The GSEG memory does not exist until the ALLOCATE completes.
*/
sync_core();
return ret;
}
int cch_start(struct gru_context_configuration_handle *cch)
@ -112,9 +121,18 @@ int cch_interrupt(struct gru_context_configuration_handle *cch)
int cch_deallocate(struct gru_context_configuration_handle *cch)
{
int ret;
cch->opc = CCHOP_DEALLOCATE;
start_instruction(cch);
return wait_instruction_complete(cch, cchop_deallocate);
ret = wait_instruction_complete(cch, cchop_deallocate);
/*
* Stop speculation into the GSEG being unmapped by the previous
* DEALLOCATE.
*/
sync_core();
return ret;
}
int cch_interrupt_sync(struct gru_context_configuration_handle

View file

@ -395,6 +395,7 @@ int gru_get_cb_exception_detail(void *cb,
cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb));
cbe = get_cbe(GRUBASE(cb), cbrnum);
gru_flush_cache(cbe); /* CBE not coherent */
sync_core();
excdet->opc = cbe->opccpy;
excdet->exopc = cbe->exopccpy;
excdet->ecause = cbe->ecause;
@ -461,9 +462,10 @@ int gru_check_status_proc(void *cb)
int ret;
ret = gen->istatus;
if (ret != CBS_EXCEPTION)
return ret;
return gru_retry_exception(cb);
if (ret == CBS_EXCEPTION)
ret = gru_retry_exception(cb);
rmb();
return ret;
}
@ -475,7 +477,7 @@ int gru_wait_proc(void *cb)
ret = gru_wait_idle_or_exception(gen);
if (ret == CBS_EXCEPTION)
ret = gru_retry_exception(cb);
rmb();
return ret;
}

View file

@ -499,6 +499,9 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum,
memset(cbe + i * GRU_HANDLE_STRIDE, 0,
GRU_CACHE_LINE_BYTES);
}
/* Flush CBE to hide race in context restart */
mb();
gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
cb += GRU_HANDLE_STRIDE;
}
@ -519,6 +522,12 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE;
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
/* CBEs may not be coherent. Flush them from cache */
for_each_cbr_in_allocation_map(i, &cbrmap, scr)
gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
mb(); /* Let the CL flush complete */
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {