diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index e765987afaf4..de4302046789 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -59,7 +59,8 @@ __FBSDID("$FreeBSD$"); enum blockop { BOP_READ, BOP_WRITE, - BOP_FLUSH + BOP_FLUSH, + BOP_DELETE }; enum blockstat { @@ -81,6 +82,7 @@ struct blockif_ctxt { int bc_magic; int bc_fd; int bc_ischr; + int bc_candelete; int bc_rdonly; off_t bc_size; int bc_sectsz; @@ -172,6 +174,7 @@ static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) { struct blockif_req *br; + off_t arg[2]; int err; br = be->be_req; @@ -197,6 +200,17 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) } else if (fsync(bc->bc_fd)) err = errno; break; + case BOP_DELETE: + if (!bc->bc_candelete) + err = EOPNOTSUPP; + else if (bc->bc_ischr) { + arg[0] = br->br_offset; + arg[1] = br->br_iov[0].iov_len; + if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) + err = errno; + } else + err = EOPNOTSUPP; + break; default: err = EINVAL; break; @@ -276,9 +290,10 @@ blockif_open(const char *optstr, const char *ident) char *nopt, *xopts; struct blockif_ctxt *bc; struct stat sbuf; + struct diocgattr_arg arg; off_t size, psectsz, psectoff; int extra, fd, i, sectsz; - int nocache, sync, ro; + int nocache, sync, ro, candelete; pthread_once(&blockif_once, blockif_init); @@ -332,6 +347,7 @@ blockif_open(const char *optstr, const char *ident) size = sbuf.st_size; sectsz = DEV_BSIZE; psectsz = psectoff = 0; + candelete = 0; if (S_ISCHR(sbuf.st_mode)) { if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { @@ -343,6 +359,10 @@ blockif_open(const char *optstr, const char *ident) assert(sectsz != 0); if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); + strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); + arg.len = sizeof(arg.value.i); + if (ioctl(fd, DIOCGATTR, &arg) == 0) + candelete = arg.value.i; } else psectsz = sbuf.st_blksize; @@ -355,6 +375,7 @@ blockif_open(const char *optstr, const char *ident) bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; bc->bc_ischr = S_ISCHR(sbuf.st_mode); + bc->bc_candelete = candelete; bc->bc_rdonly = ro; bc->bc_size = size; bc->bc_sectsz = sectsz; @@ -433,6 +454,14 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) return (blockif_request(bc, breq, BOP_FLUSH)); } +int +blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (blockif_request(bc, breq, BOP_DELETE)); +} + int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) { @@ -634,3 +663,11 @@ blockif_is_ro(struct blockif_ctxt *bc) assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } + +int +blockif_candelete(struct blockif_ctxt *bc) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (bc->bc_candelete); +} diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h index d1b7695c03f2..4e2c1b4e55e6 100644 --- a/usr.sbin/bhyve/block_if.h +++ b/usr.sbin/bhyve/block_if.h @@ -58,9 +58,11 @@ int blockif_sectsz(struct blockif_ctxt *bc); void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off); int blockif_queuesz(struct blockif_ctxt *bc); int blockif_is_ro(struct blockif_ctxt *bc); +int blockif_candelete(struct blockif_ctxt *bc); int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq); +int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_close(struct blockif_ctxt *bc); diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c index 87ad361c909d..a143072cdd86 100644 --- a/usr.sbin/bhyve/pci_ahci.c +++ b/usr.sbin/bhyve/pci_ahci.c @@ -643,6 +643,100 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis) assert(err == 0); } +static inline void +read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, + void *buf, int size) +{ + struct ahci_cmd_hdr *hdr; + struct ahci_prdt_entry *prdt; + void *to; + int i, len; + + hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); + len = size; + to = buf; + prdt = (struct ahci_prdt_entry *)(cfis + 0x80); + for (i = 0; i < hdr->prdtl && len; i++) { + uint8_t *ptr; + uint32_t dbcsz; + int sublen; + + dbcsz = (prdt->dbc & DBCMASK) + 1; + ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz); + sublen = len < dbcsz ? len : dbcsz; + memcpy(to, ptr, sublen); + len -= sublen; + to += sublen; + prdt++; + } +} + +static void +ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) +{ + struct ahci_ioreq *aior; + struct blockif_req *breq; + uint8_t *entry; + uint64_t elba; + uint32_t len, elen; + int err; + uint8_t buf[512]; + + len = (uint16_t)cfis[13] << 8 | cfis[12]; + len *= 512; + read_prdt(p, slot, cfis, buf, sizeof(buf)); + +next: + entry = &buf[done]; + elba = ((uint64_t)entry[5] << 40) | + ((uint64_t)entry[4] << 32) | + ((uint64_t)entry[3] << 24) | + ((uint64_t)entry[2] << 16) | + ((uint64_t)entry[1] << 8) | + entry[0]; + elen = (uint16_t)entry[7] << 8 | entry[6]; + done += 8; + if (elen == 0) { + if (done >= len) { + ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); + p->pending &= ~(1 << slot); + ahci_check_stopped(p); + return; + } + goto next; + } + + /* + * Pull request off free list + */ + aior = STAILQ_FIRST(&p->iofhd); + assert(aior != NULL); + STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); + aior->cfis = cfis; + aior->slot = slot; + aior->len = len; + aior->done = done; + aior->prdtl = 0; + + breq = &aior->io_req; + breq->br_offset = elba * blockif_sectsz(p->bctx); + breq->br_iovcnt = 1; + breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx); + + /* + * Mark this command in-flight. + */ + p->pending |= 1 << slot; + + /* + * Stuff request onto busy list + */ + TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); + + err = blockif_delete(p->bctx, breq); + assert(err == 0); +} + static inline void write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf, int size) @@ -684,10 +778,11 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis) } else { uint16_t buf[256]; uint64_t sectors; - int sectsz, psectsz, psectoff; + int sectsz, psectsz, psectoff, candelete; uint16_t cyl; uint8_t sech, heads; + candelete = blockif_candelete(p->bctx); sectsz = blockif_sectsz(p->bctx); sectors = blockif_size(p->bctx) / sectsz; blockif_chs(p->bctx, &cyl, &heads, &sech); @@ -718,6 +813,7 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis) buf[66] = 100; buf[67] = 100; buf[68] = 100; + buf[69] = 0; buf[75] = 31; buf[76] = (1 << 8 | 1 << 2); buf[80] = 0x1f0; @@ -736,6 +832,11 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis) buf[101] = (sectors >> 16); buf[102] = (sectors >> 32); buf[103] = (sectors >> 48); + if (candelete) { + buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT; + buf[105] = 1; + buf[169] = ATA_SUPPORT_DSM_TRIM; + } buf[106] = 0x4000; buf[209] = 0x4000; if (psectsz > sectsz) { @@ -1394,6 +1495,15 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis) case ATA_FLUSHCACHE48: ahci_handle_flush(p, slot, cfis); break; + case ATA_DATA_SET_MANAGEMENT: + if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM && + cfis[13] == 0 && cfis[12] == 1) { + ahci_handle_dsm_trim(p, slot, cfis, 0); + break; + } + ahci_write_fis_d2h(p, slot, cfis, + (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); + break; case ATA_STANDBY_CMD: break; case ATA_NOP: @@ -1505,7 +1615,7 @@ ata_ioreq_cb(struct blockif_req *br, int err) struct pci_ahci_softc *sc; uint32_t tfd; uint8_t *cfis; - int pending, slot, ncq; + int pending, slot, ncq, dsm; DPRINTF("%s %d\n", __func__, err); @@ -1521,6 +1631,8 @@ ata_ioreq_cb(struct blockif_req *br, int err) if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || cfis[2] == ATA_READ_FPDMA_QUEUED) ncq = 1; + if (cfis[2] == ATA_DATA_SET_MANAGEMENT) + dsm = 1; pthread_mutex_lock(&sc->mtx); @@ -1534,10 +1646,17 @@ ata_ioreq_cb(struct blockif_req *br, int err) */ STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); - if (pending && !err) { - ahci_handle_dma(p, slot, cfis, aior->done, - hdr->prdtl - pending); - goto out; + if (dsm) { + if (aior->done != aior->len && !err) { + ahci_handle_dsm_trim(p, slot, cfis, aior->done); + goto out; + } + } else { + if (pending && !err) { + ahci_handle_dma(p, slot, cfis, aior->done, + hdr->prdtl - pending); + goto out; + } } if (!err && aior->done == aior->len) {