Add DSM TRIM command support for virtual AHCI disks.

It works only for virtual disks backed by ZVOLs and raw devices supporting
BIO_DELETE.  Virtual disks backed by files won't report this capability.

MFC after:	2 weeks
Relnotes:	yes
This commit is contained in:
Alexander Motin 2015-03-13 16:43:52 +00:00
parent dc9fa19b3b
commit 0b9d25c935
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=279957
3 changed files with 166 additions and 8 deletions

View file

@ -59,7 +59,8 @@ __FBSDID("$FreeBSD$");
enum blockop {
BOP_READ,
BOP_WRITE,
BOP_FLUSH
BOP_FLUSH,
BOP_DELETE
};
enum blockstat {
@ -81,6 +82,7 @@ struct blockif_ctxt {
int bc_magic;
int bc_fd;
int bc_ischr;
int bc_candelete;
int bc_rdonly;
off_t bc_size;
int bc_sectsz;
@ -172,6 +174,7 @@ static void
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
{
struct blockif_req *br;
off_t arg[2];
int err;
br = be->be_req;
@ -197,6 +200,17 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
} else if (fsync(bc->bc_fd))
err = errno;
break;
case BOP_DELETE:
if (!bc->bc_candelete)
err = EOPNOTSUPP;
else if (bc->bc_ischr) {
arg[0] = br->br_offset;
arg[1] = br->br_iov[0].iov_len;
if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
err = errno;
} else
err = EOPNOTSUPP;
break;
default:
err = EINVAL;
break;
@ -276,9 +290,10 @@ blockif_open(const char *optstr, const char *ident)
char *nopt, *xopts;
struct blockif_ctxt *bc;
struct stat sbuf;
struct diocgattr_arg arg;
off_t size, psectsz, psectoff;
int extra, fd, i, sectsz;
int nocache, sync, ro;
int nocache, sync, ro, candelete;
pthread_once(&blockif_once, blockif_init);
@ -332,6 +347,7 @@ blockif_open(const char *optstr, const char *ident)
size = sbuf.st_size;
sectsz = DEV_BSIZE;
psectsz = psectoff = 0;
candelete = 0;
if (S_ISCHR(sbuf.st_mode)) {
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
@ -343,6 +359,10 @@ blockif_open(const char *optstr, const char *ident)
assert(sectsz != 0);
if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
arg.len = sizeof(arg.value.i);
if (ioctl(fd, DIOCGATTR, &arg) == 0)
candelete = arg.value.i;
} else
psectsz = sbuf.st_blksize;
@ -355,6 +375,7 @@ blockif_open(const char *optstr, const char *ident)
bc->bc_magic = BLOCKIF_SIG;
bc->bc_fd = fd;
bc->bc_ischr = S_ISCHR(sbuf.st_mode);
bc->bc_candelete = candelete;
bc->bc_rdonly = ro;
bc->bc_size = size;
bc->bc_sectsz = sectsz;
@ -433,6 +454,14 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
return (blockif_request(bc, breq, BOP_FLUSH));
}
int
blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
{
assert(bc->bc_magic == BLOCKIF_SIG);
return (blockif_request(bc, breq, BOP_DELETE));
}
int
blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
{
@ -634,3 +663,11 @@ blockif_is_ro(struct blockif_ctxt *bc)
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_rdonly);
}
int
blockif_candelete(struct blockif_ctxt *bc)
{
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_candelete);
}

View file

@ -58,9 +58,11 @@ int blockif_sectsz(struct blockif_ctxt *bc);
void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off);
int blockif_queuesz(struct blockif_ctxt *bc);
int blockif_is_ro(struct blockif_ctxt *bc);
int blockif_candelete(struct blockif_ctxt *bc);
int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_close(struct blockif_ctxt *bc);

View file

@ -643,6 +643,100 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
assert(err == 0);
}
static inline void
read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
void *buf, int size)
{
struct ahci_cmd_hdr *hdr;
struct ahci_prdt_entry *prdt;
void *to;
int i, len;
hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
len = size;
to = buf;
prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
for (i = 0; i < hdr->prdtl && len; i++) {
uint8_t *ptr;
uint32_t dbcsz;
int sublen;
dbcsz = (prdt->dbc & DBCMASK) + 1;
ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
sublen = len < dbcsz ? len : dbcsz;
memcpy(to, ptr, sublen);
len -= sublen;
to += sublen;
prdt++;
}
}
static void
ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
{
struct ahci_ioreq *aior;
struct blockif_req *breq;
uint8_t *entry;
uint64_t elba;
uint32_t len, elen;
int err;
uint8_t buf[512];
len = (uint16_t)cfis[13] << 8 | cfis[12];
len *= 512;
read_prdt(p, slot, cfis, buf, sizeof(buf));
next:
entry = &buf[done];
elba = ((uint64_t)entry[5] << 40) |
((uint64_t)entry[4] << 32) |
((uint64_t)entry[3] << 24) |
((uint64_t)entry[2] << 16) |
((uint64_t)entry[1] << 8) |
entry[0];
elen = (uint16_t)entry[7] << 8 | entry[6];
done += 8;
if (elen == 0) {
if (done >= len) {
ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
p->pending &= ~(1 << slot);
ahci_check_stopped(p);
return;
}
goto next;
}
/*
* Pull request off free list
*/
aior = STAILQ_FIRST(&p->iofhd);
assert(aior != NULL);
STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
aior->cfis = cfis;
aior->slot = slot;
aior->len = len;
aior->done = done;
aior->prdtl = 0;
breq = &aior->io_req;
breq->br_offset = elba * blockif_sectsz(p->bctx);
breq->br_iovcnt = 1;
breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
/*
* Mark this command in-flight.
*/
p->pending |= 1 << slot;
/*
* Stuff request onto busy list
*/
TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
err = blockif_delete(p->bctx, breq);
assert(err == 0);
}
static inline void
write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
void *buf, int size)
@ -684,10 +778,11 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
} else {
uint16_t buf[256];
uint64_t sectors;
int sectsz, psectsz, psectoff;
int sectsz, psectsz, psectoff, candelete;
uint16_t cyl;
uint8_t sech, heads;
candelete = blockif_candelete(p->bctx);
sectsz = blockif_sectsz(p->bctx);
sectors = blockif_size(p->bctx) / sectsz;
blockif_chs(p->bctx, &cyl, &heads, &sech);
@ -718,6 +813,7 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
buf[66] = 100;
buf[67] = 100;
buf[68] = 100;
buf[69] = 0;
buf[75] = 31;
buf[76] = (1 << 8 | 1 << 2);
buf[80] = 0x1f0;
@ -736,6 +832,11 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
buf[101] = (sectors >> 16);
buf[102] = (sectors >> 32);
buf[103] = (sectors >> 48);
if (candelete) {
buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
buf[105] = 1;
buf[169] = ATA_SUPPORT_DSM_TRIM;
}
buf[106] = 0x4000;
buf[209] = 0x4000;
if (psectsz > sectsz) {
@ -1394,6 +1495,15 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
case ATA_FLUSHCACHE48:
ahci_handle_flush(p, slot, cfis);
break;
case ATA_DATA_SET_MANAGEMENT:
if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
cfis[13] == 0 && cfis[12] == 1) {
ahci_handle_dsm_trim(p, slot, cfis, 0);
break;
}
ahci_write_fis_d2h(p, slot, cfis,
(ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
break;
case ATA_STANDBY_CMD:
break;
case ATA_NOP:
@ -1505,7 +1615,7 @@ ata_ioreq_cb(struct blockif_req *br, int err)
struct pci_ahci_softc *sc;
uint32_t tfd;
uint8_t *cfis;
int pending, slot, ncq;
int pending, slot, ncq, dsm;
DPRINTF("%s %d\n", __func__, err);
@ -1521,6 +1631,8 @@ ata_ioreq_cb(struct blockif_req *br, int err)
if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
cfis[2] == ATA_READ_FPDMA_QUEUED)
ncq = 1;
if (cfis[2] == ATA_DATA_SET_MANAGEMENT)
dsm = 1;
pthread_mutex_lock(&sc->mtx);
@ -1534,10 +1646,17 @@ ata_ioreq_cb(struct blockif_req *br, int err)
*/
STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
if (pending && !err) {
ahci_handle_dma(p, slot, cfis, aior->done,
hdr->prdtl - pending);
goto out;
if (dsm) {
if (aior->done != aior->len && !err) {
ahci_handle_dsm_trim(p, slot, cfis, aior->done);
goto out;
}
} else {
if (pending && !err) {
ahci_handle_dma(p, slot, cfis, aior->done,
hdr->prdtl - pending);
goto out;
}
}
if (!err && aior->done == aior->len) {