amd64: Use __seg_gs to implement per-CPU data accesses.

This makes use of the alternate address space support in both GCC and
clang to access per-CPU data as accesses relative to GS:.  The
original motivation for this is that it quiets verbose warnings from
GCC 12.  However, this version is also much easier to read and
allows the compiler to generate better code (e.g. the compiler can
use a GS: memory operand directly in other instructions such as IMUL
and CMP rather than always MOVing to a temporary register).

The one caveat is that the current approach is very inefficient at -O0
since the compiler expects to load the 0 base offset from a global
variable instead of assuming it is 0 (even with the const).

Reviewed by:	kib
Differential Revision:	https://reviews.freebsd.org/D40647
This commit is contained in:
John Baldwin 2023-07-07 13:06:55 -07:00
parent 1f7e357264
commit 2329393c61

View file

@ -113,18 +113,74 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
#define MONITOR_STOPSTATE_RUNNING 0
#define MONITOR_STOPSTATE_STOPPED 1
/*
* Evaluates to the byte offset of the per-cpu variable name.
*/
#define __pcpu_offset(name) \
__offsetof(struct pcpu, name)
/*
* Evaluates to the type of the per-cpu variable name.
*/
#define __pcpu_type(name) \
__typeof(((struct pcpu *)0)->name)
#ifdef __SEG_GS
#define get_pcpu() __extension__ ({ \
static struct pcpu __seg_gs *__pc = 0; \
\
__pc->pc_prvspace; \
})
/*
* Evaluates to the address of the per-cpu variable name.
*/
#define __PCPU_PTR(name) __extension__ ({ \
struct pcpu *__pc = get_pcpu(); \
\
&__pc->name; \
})
/*
* Evaluates to the value of the per-cpu variable name.
*/
#define __PCPU_GET(name) __extension__ ({ \
static struct pcpu __seg_gs *__pc = 0; \
\
__pc->name; \
})
/*
* Adds the value to the per-cpu counter name. The implementation
* must be atomic with respect to interrupts.
*/
#define __PCPU_ADD(name, val) do { \
static struct pcpu __seg_gs *__pc = 0; \
__pcpu_type(name) __val; \
\
__val = (val); \
if (sizeof(__val) == 1 || sizeof(__val) == 2 || \
sizeof(__val) == 4 || sizeof(__val) == 8) { \
__pc->name += __val; \
} else \
*__PCPU_PTR(name) += __val; \
} while (0)
/*
* Sets the value of the per-cpu variable name to value val.
*/
#define __PCPU_SET(name, val) { \
static struct pcpu __seg_gs *__pc = 0; \
__pcpu_type(name) __val; \
\
__val = (val); \
if (sizeof(__val) == 1 || sizeof(__val) == 2 || \
sizeof(__val) == 4 || sizeof(__val) == 8) { \
__pc->name = __val; \
} else \
*__PCPU_PTR(name) = __val; \
} while (0)
#else /* !__SEG_GS */
/*
* Evaluates to the byte offset of the per-cpu variable name.
*/
#define __pcpu_offset(name) \
__offsetof(struct pcpu, name)
/*
* Evaluates to the address of the per-cpu variable name.
*/
@ -210,6 +266,7 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
: "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace)))); \
__pc; \
})
#endif /* !__SEG_GS */
#define PCPU_GET(member) __PCPU_GET(pc_ ## member)
#define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val)