hw/cxl: QMP based poison injection support

Inject poison using QMP command cxl-inject-poison to add an entry to the
poison list.

For now, the poison is not returned CXL.mem reads, but only via the
mailbox command Get Poison List. So a normal memory read to an address
that is on the poison list will not yet result in a synchronous exception
(and similar for partial cacheline writes).
That is left for a future patch.

See CXL rev 3.0, sec 8.2.9.8.4.1 Get Poison list (Opcode 4300h)

Kernel patches to use this interface here:
https://lore.kernel.org/linux-cxl/cover.1665606782.git.alison.schofield@intel.com/

To inject poison using QMP (telnet to the QMP port)
{ "execute": "qmp_capabilities" }

{ "execute": "cxl-inject-poison",
    "arguments": {
         "path": "/machine/peripheral/cxl-pmem0",
         "start": 2048,
         "length": 256
    }
}

Adjusted to select a device on your machine.

Note that the poison list supported is kept short enough to avoid the
complexity of state machine that is needed to handle the MORE flag.

Reviewed-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Message-Id: <20230526170010.574-3-Jonathan.Cameron@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
Jonathan Cameron 2023-05-26 18:00:08 +01:00 committed by Michael S. Tsirkin
parent 14180d6221
commit 9547754f40
6 changed files with 194 additions and 0 deletions

View file

@ -62,6 +62,8 @@ enum {
#define GET_PARTITION_INFO 0x0
#define GET_LSA 0x2
#define SET_LSA 0x3
MEDIA_AND_POISON = 0x43,
#define GET_POISON_LIST 0x0
};
/* 8.2.8.4.5.1 Command Return Codes */
@ -295,6 +297,10 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER);
stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d));
/* 256 poison records */
st24_le_p(id->poison_list_max_mer, 256);
/* No limit - so limited by main poison record limit */
stw_le_p(&id->inject_poison_limit, 0);
*len = sizeof(*id);
return CXL_MBOX_SUCCESS;
@ -384,6 +390,88 @@ static CXLRetCode cmd_ccls_set_lsa(struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
/*
* This is very inefficient, but good enough for now!
* Also the payload will always fit, so no need to handle the MORE flag and
* make this stateful. We may want to allow longer poison lists to aid
* testing that kernel functionality.
*/
static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
struct get_poison_list_pl {
uint64_t pa;
uint64_t length;
} QEMU_PACKED;
struct get_poison_list_out_pl {
uint8_t flags;
uint8_t rsvd1;
uint64_t overflow_timestamp;
uint16_t count;
uint8_t rsvd2[0x14];
struct {
uint64_t addr;
uint32_t length;
uint32_t resv;
} QEMU_PACKED records[];
} QEMU_PACKED;
struct get_poison_list_pl *in = (void *)cmd->payload;
struct get_poison_list_out_pl *out = (void *)cmd->payload;
CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
uint16_t record_count = 0, i = 0;
uint64_t query_start, query_length;
CXLPoisonList *poison_list = &ct3d->poison_list;
CXLPoison *ent;
uint16_t out_pl_len;
query_start = ldq_le_p(&in->pa);
/* 64 byte alignemnt required */
if (query_start & 0x3f) {
return CXL_MBOX_INVALID_INPUT;
}
query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE;
QLIST_FOREACH(ent, poison_list, node) {
/* Check for no overlap */
if (ent->start >= query_start + query_length ||
ent->start + ent->length <= query_start) {
continue;
}
record_count++;
}
out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
memset(out, 0, out_pl_len);
QLIST_FOREACH(ent, poison_list, node) {
uint64_t start, stop;
/* Check for no overlap */
if (ent->start >= query_start + query_length ||
ent->start + ent->length <= query_start) {
continue;
}
/* Deal with overlap */
start = MAX(ROUND_DOWN(ent->start, 64ull), query_start);
stop = MIN(ROUND_DOWN(ent->start, 64ull) + ent->length,
query_start + query_length);
stq_le_p(&out->records[i].addr, start | (ent->type & 0x7));
stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE);
i++;
}
if (ct3d->poison_list_overflowed) {
out->flags = (1 << 1);
stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts);
}
stw_le_p(&out->count, record_count);
*len = out_pl_len;
return CXL_MBOX_SUCCESS;
}
#define IMMEDIATE_CONFIG_CHANGE (1 << 1)
#define IMMEDIATE_DATA_CHANGE (1 << 2)
#define IMMEDIATE_POLICY_CHANGE (1 << 3)
@ -411,6 +499,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
[CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 },
[CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE },
[MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST",
cmd_media_get_poison_list, 16, 0 },
};
void cxl_process_mailbox(CXLDeviceState *cxl_dstate)

View file

@ -947,6 +947,62 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
*/
}
void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
{
ct3d->poison_list_overflowed = true;
ct3d->poison_list_overflow_ts =
cxl_device_get_timestamp(&ct3d->cxl_dstate);
}
void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
Error **errp)
{
Object *obj = object_resolve_path(path, NULL);
CXLType3Dev *ct3d;
CXLPoison *p;
if (length % 64) {
error_setg(errp, "Poison injection must be in multiples of 64 bytes");
return;
}
if (start % 64) {
error_setg(errp, "Poison start address must be 64 byte aligned");
return;
}
if (!obj) {
error_setg(errp, "Unable to resolve path");
return;
}
if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
error_setg(errp, "Path does not point to a CXL type 3 device");
return;
}
ct3d = CXL_TYPE3(obj);
QLIST_FOREACH(p, &ct3d->poison_list, node) {
if (((start >= p->start) && (start < p->start + p->length)) ||
((start + length > p->start) &&
(start + length <= p->start + p->length))) {
error_setg(errp, "Overlap with existing poisoned region not supported");
return;
}
}
if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
cxl_set_poison_list_overflowed(ct3d);
return;
}
p = g_new0(CXLPoison, 1);
p->length = length;
p->start = start;
p->type = CXL_POISON_TYPE_INTERNAL; /* Different from injected via the mbox */
QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
ct3d->poison_list_cnt++;
}
/* For uncorrectable errors include support for multiple header recording */
void qmp_cxl_inject_uncorrectable_errors(const char *path,
CXLUncorErrorRecordList *errors,

View file

@ -3,6 +3,12 @@
#include "qapi/error.h"
#include "qapi/qapi-commands-cxl.h"
void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
Error **errp)
{
error_setg(errp, "CXL Type 3 support is not compiled in");
}
void qmp_cxl_inject_uncorrectable_errors(const char *path,
CXLUncorErrorRecordList *errors,
Error **errp)

View file

@ -18,6 +18,7 @@
#include "cxl_component.h"
#include "cxl_device.h"
#define CXL_CACHE_LINE_SIZE 64
#define CXL_COMPONENT_REG_BAR_IDX 0
#define CXL_DEVICE_REG_BAR_IDX 2

View file

@ -242,6 +242,18 @@ typedef struct CXLError {
typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
typedef struct CXLPoison {
uint64_t start, length;
uint8_t type;
#define CXL_POISON_TYPE_EXTERNAL 0x1
#define CXL_POISON_TYPE_INTERNAL 0x2
#define CXL_POISON_TYPE_INJECTED 0x3
QLIST_ENTRY(CXLPoison) node;
} CXLPoison;
typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
#define CXL_POISON_LIST_LIMIT 256
struct CXLType3Dev {
/* Private */
PCIDevice parent_obj;
@ -264,6 +276,12 @@ struct CXLType3Dev {
/* Error injection */
CXLErrorList error_list;
/* Poison Injection - cache */
CXLPoisonList poison_list;
unsigned int poison_list_cnt;
bool poison_list_overflowed;
uint64_t poison_list_overflow_ts;
};
#define TYPE_CXL_TYPE3 "cxl-type3"
@ -289,4 +307,6 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds);
void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
#endif

View file

@ -5,6 +5,27 @@
# = CXL devices
##
##
# @cxl-inject-poison:
#
# Poison records indicate that a CXL memory device knows that a
# particular memory region may be corrupted. This may be because of
# locally detected errors (e.g. ECC failure) or poisoned writes
# received from other components in the system. This injection
# mechanism enables testing of the OS handling of poison records which
# may be queried via the CXL mailbox.
#
# @path: CXL type 3 device canonical QOM path
#
# @start: Start address; must be 64 byte aligned.
#
# @length: Length of poison to inject; must be a multiple of 64 bytes.
#
# Since: 8.1
##
{ 'command': 'cxl-inject-poison',
'data': { 'path': 'str', 'start': 'uint64', 'length': 'size' }}
##
# @CxlUncorErrorType:
#