diff --git a/block/bio.c b/block/bio.c index 2e421c0dad13..fd11614bba4d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -772,6 +772,7 @@ static inline void bio_put_percpu_cache(struct bio *bio) if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) { bio->bi_next = cache->free_list; + bio->bi_bdev = NULL; cache->free_list = bio; cache->nr++; } else { diff --git a/block/blk-core.c b/block/blk-core.c index 82b5b2c53f1e..9e5e0277a4d9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -858,10 +858,16 @@ EXPORT_SYMBOL(submit_bio); */ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); blk_qc_t cookie = READ_ONCE(bio->bi_cookie); + struct block_device *bdev; + struct request_queue *q; int ret = 0; + bdev = READ_ONCE(bio->bi_bdev); + if (!bdev) + return 0; + + q = bdev_get_queue(bdev); if (cookie == BLK_QC_T_NONE || !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) return 0; @@ -930,7 +936,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, */ rcu_read_lock(); bio = READ_ONCE(kiocb->private); - if (bio && bio->bi_bdev) + if (bio) ret = bio_poll(bio, iob, flags); rcu_read_unlock(); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index ff534e9d92dc..4442c7a85112 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -800,7 +800,11 @@ static void ioc_refresh_period_us(struct ioc *ioc) ioc_refresh_margins(ioc); } -static int ioc_autop_idx(struct ioc *ioc) +/* + * ioc->rqos.disk isn't initialized when this function is called from + * the init path. + */ +static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk) { int idx = ioc->autop_idx; const struct ioc_params *p = &autop[idx]; @@ -808,11 +812,11 @@ static int ioc_autop_idx(struct ioc *ioc) u64 now_ns; /* rotational? */ - if (!blk_queue_nonrot(ioc->rqos.disk->queue)) + if (!blk_queue_nonrot(disk->queue)) return AUTOP_HDD; /* handle SATA SSDs w/ broken NCQ */ - if (blk_queue_depth(ioc->rqos.disk->queue) == 1) + if (blk_queue_depth(disk->queue) == 1) return AUTOP_SSD_QD1; /* use one of the normal ssd sets */ @@ -901,14 +905,19 @@ static void ioc_refresh_lcoefs(struct ioc *ioc) &c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]); } -static bool ioc_refresh_params(struct ioc *ioc, bool force) +/* + * struct gendisk is required as an argument because ioc->rqos.disk + * is not properly initialized when called from the init path. + */ +static bool ioc_refresh_params_disk(struct ioc *ioc, bool force, + struct gendisk *disk) { const struct ioc_params *p; int idx; lockdep_assert_held(&ioc->lock); - idx = ioc_autop_idx(ioc); + idx = ioc_autop_idx(ioc, disk); p = &autop[idx]; if (idx == ioc->autop_idx && !force) @@ -939,6 +948,11 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force) return true; } +static bool ioc_refresh_params(struct ioc *ioc, bool force) +{ + return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk); +} + /* * When an iocg accumulates too much vtime or gets deactivated, we throw away * some vtime, which lowers the overall device utilization. As the exact amount @@ -2880,7 +2894,7 @@ static int blk_iocost_init(struct gendisk *disk) spin_lock_irq(&ioc->lock); ioc->autop_idx = AUTOP_INVALID; - ioc_refresh_params(ioc, true); + ioc_refresh_params_disk(ioc, true, disk); spin_unlock_irq(&ioc->lock); /* diff --git a/block/blk-merge.c b/block/blk-merge.c index 1ac782fdc55c..6460abdb2426 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -587,13 +587,6 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(__blk_rq_map_sg); -static inline unsigned int blk_rq_get_max_segments(struct request *rq) -{ - if (req_op(rq) == REQ_OP_DISCARD) - return queue_max_discard_segments(rq->q); - return queue_max_segments(rq->q); -} - static inline unsigned int blk_rq_get_max_sectors(struct request *rq, sector_t offset) { diff --git a/block/blk-mq.c b/block/blk-mq.c index d3494a796ba8..d0cb2ef18fe2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3000,6 +3000,7 @@ blk_status_t blk_insert_cloned_request(struct request *rq) { struct request_queue *q = rq->q; unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq)); + unsigned int max_segments = blk_rq_get_max_segments(rq); blk_status_t ret; if (blk_rq_sectors(rq) > max_sectors) { @@ -3026,9 +3027,9 @@ blk_status_t blk_insert_cloned_request(struct request *rq) * original queue. */ rq->nr_phys_segments = blk_recalc_rq_segments(rq); - if (rq->nr_phys_segments > queue_max_segments(q)) { - printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n", - __func__, rq->nr_phys_segments, queue_max_segments(q)); + if (rq->nr_phys_segments > max_segments) { + printk(KERN_ERR "%s: over max segments limit. (%u > %u)\n", + __func__, rq->nr_phys_segments, max_segments); return BLK_STS_IOERR; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 614b575be899..fce9082384d6 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -334,17 +334,12 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, { void __user *argp = (void __user *)arg; struct zone_report_args args; - struct request_queue *q; struct blk_zone_report rep; int ret; if (!argp) return -EINVAL; - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - if (!bdev_is_zoned(bdev)) return -ENOTTY; @@ -391,7 +386,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; - struct request_queue *q; struct blk_zone_range zrange; enum req_op op; int ret; @@ -399,10 +393,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, if (!argp) return -EINVAL; - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - if (!bdev_is_zoned(bdev)) return -ENOTTY; diff --git a/block/blk.h b/block/blk.h index 4c3b3325219a..cc4e8873dfde 100644 --- a/block/blk.h +++ b/block/blk.h @@ -156,6 +156,13 @@ static inline bool blk_discard_mergable(struct request *req) return false; } +static inline unsigned int blk_rq_get_max_segments(struct request *rq) +{ + if (req_op(rq) == REQ_OP_DISCARD) + return queue_max_discard_segments(rq->q); + return queue_max_segments(rq->q); +} + static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, enum req_op op) { @@ -427,7 +434,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct request_queue *blk_alloc_queue(int node_id); -int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner); +int disk_scan_partitions(struct gendisk *disk, fmode_t mode); int disk_alloc_events(struct gendisk *disk); void disk_add_events(struct gendisk *disk); diff --git a/block/genhd.c b/block/genhd.c index d09d775c222a..3ee5577e1586 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -356,9 +356,10 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action) } EXPORT_SYMBOL_GPL(disk_uevent); -int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner) +int disk_scan_partitions(struct gendisk *disk, fmode_t mode) { struct block_device *bdev; + int ret = 0; if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) return -EINVAL; @@ -366,16 +367,29 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner) return -EINVAL; if (disk->open_partitions) return -EBUSY; - /* Someone else has bdev exclusively open? */ - if (disk->part0->bd_holder && disk->part0->bd_holder != owner) - return -EBUSY; set_bit(GD_NEED_PART_SCAN, &disk->state); - bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL); + /* + * If the device is opened exclusively by current thread already, it's + * safe to scan partitons, otherwise, use bd_prepare_to_claim() to + * synchronize with other exclusive openers and other partition + * scanners. + */ + if (!(mode & FMODE_EXCL)) { + ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions); + if (ret) + return ret; + } + + bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL); if (IS_ERR(bdev)) - return PTR_ERR(bdev); - blkdev_put(bdev, mode); - return 0; + ret = PTR_ERR(bdev); + else + blkdev_put(bdev, mode); + + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(disk->part0, disk_scan_partitions); + return ret; } /** @@ -497,9 +511,14 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, if (ret) goto out_unregister_bdi; + /* Make sure the first partition scan will be proceed */ + if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) && + !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state)) + set_bit(GD_NEED_PART_SCAN, &disk->state); + bdev_add(disk->part0, ddev->devt); if (get_capacity(disk)) - disk_scan_partitions(disk, FMODE_READ, NULL); + disk_scan_partitions(disk, FMODE_READ); /* * Announce the disk and partitions after all partitions are diff --git a/block/ioctl.c b/block/ioctl.c index 96617512982e..9c5f637ff153 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -467,10 +467,10 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode, * user space. Note the separate arg/argp parameters that are needed * to deal with the compat_ptr() conversion. */ -static int blkdev_common_ioctl(struct file *file, fmode_t mode, unsigned cmd, - unsigned long arg, void __user *argp) +static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg, + void __user *argp) { - struct block_device *bdev = I_BDEV(file->f_mapping->host); unsigned int max_sectors; switch (cmd) { @@ -528,8 +528,7 @@ static int blkdev_common_ioctl(struct file *file, fmode_t mode, unsigned cmd, return -EACCES; if (bdev_is_partition(bdev)) return -EINVAL; - return disk_scan_partitions(bdev->bd_disk, mode & ~FMODE_EXCL, - file); + return disk_scan_partitions(bdev->bd_disk, mode); case BLKTRACESTART: case BLKTRACESTOP: case BLKTRACETEARDOWN: @@ -607,7 +606,7 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; } - ret = blkdev_common_ioctl(file, mode, cmd, arg, argp); + ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); if (ret != -ENOIOCTLCMD) return ret; @@ -676,7 +675,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; } - ret = blkdev_common_ioctl(file, mode, cmd, arg, argp); + ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); diff --git a/block/sed-opal.c b/block/sed-opal.c index 463873f61e01..c320093c14f1 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -487,6 +487,8 @@ static int opal_discovery0_end(struct opal_dev *dev) break; case FC_SINGLEUSER: single_user = check_sum(body->features); + if (single_user) + dev->flags |= OPAL_FL_SUM_SUPPORTED; break; case FC_GEOMETRY: check_geometry(dev, body); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 5f04235e4ff7..839373451c2b 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -977,13 +977,13 @@ loop_set_status_from_info(struct loop_device *lo, return -EINVAL; } + /* Avoid assigning overflow values */ + if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX) + return -EOVERFLOW; + lo->lo_offset = info->lo_offset; lo->lo_sizelimit = info->lo_sizelimit; - /* loff_t vars have been assigned __u64 */ - if (lo->lo_offset < 0 || lo->lo_sizelimit < 0) - return -EOVERFLOW; - memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; lo->lo_flags = info->lo_flags; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index b9c759cef00e..d1d1c8d606c8 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1271,9 +1271,6 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) __func__, cmd->cmd_op, ub_cmd->q_id, tag, ub_cmd->result); - if (!(issue_flags & IO_URING_F_SQE128)) - goto out; - if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues) goto out; diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 901c59145811..ea16a0aba679 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -256,7 +256,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, chap->qid, ret, gid_name); chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE; chap->dh_tfm = NULL; - return -ret; + return ret; } dev_dbg(ctrl->device, "qid %d: selected DH group %s\n", chap->qid, gid_name); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8698410aeb84..c2730b116dc6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -38,6 +38,7 @@ struct nvme_ns_info { bool is_shared; bool is_readonly; bool is_ready; + bool is_removed; }; unsigned int admin_timeout = 60; @@ -1402,16 +1403,8 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); if (error) { dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); - goto out_free_id; + kfree(*id); } - - error = NVME_SC_INVALID_NS | NVME_SC_DNR; - if ((*id)->ncap == 0) /* namespace not allocated or attached */ - goto out_free_id; - return 0; - -out_free_id: - kfree(*id); return error; } @@ -1425,6 +1418,13 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, ret = nvme_identify_ns(ctrl, info->nsid, &id); if (ret) return ret; + + if (id->ncap == 0) { + /* namespace not allocated or attached */ + info->is_removed = true; + return -ENODEV; + } + info->anagrpid = id->anagrpid; info->is_shared = id->nmic & NVME_NS_NMIC_SHARED; info->is_readonly = id->nsattr & NVME_NS_ATTR_RO; @@ -3104,7 +3104,7 @@ static void nvme_init_known_nvm_effects(struct nvme_ctrl *ctrl) * Rather than blindly freezing the IO queues for this effect that * doesn't even apply to IO, mask it off. */ - log->acs[nvme_admin_security_recv] &= ~NVME_CMD_EFFECTS_CSE_MASK; + log->acs[nvme_admin_security_recv] &= cpu_to_le32(~NVME_CMD_EFFECTS_CSE_MASK); log->iocs[nvme_cmd_write] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC); log->iocs[nvme_cmd_write_zeroes] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC); @@ -4429,6 +4429,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_info info = { .nsid = nsid }; struct nvme_ns *ns; + int ret; if (nvme_identify_ns_descs(ctrl, &info)) return; @@ -4445,19 +4446,19 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid) * set up a namespace. If not fall back to the legacy version. */ if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) || - (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS)) { - if (nvme_ns_info_from_id_cs_indep(ctrl, &info)) - return; - } else { - if (nvme_ns_info_from_identify(ctrl, &info)) - return; - } + (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS)) + ret = nvme_ns_info_from_id_cs_indep(ctrl, &info); + else + ret = nvme_ns_info_from_identify(ctrl, &info); + + if (info.is_removed) + nvme_ns_remove_by_nsid(ctrl, nsid); /* * Ignore the namespace if it is not ready. We will get an AEN once it * becomes ready and restart the scan. */ - if (!info.is_ready) + if (ret || !info.is_ready) return; ns = nvme_find_get_ns(ctrl, nsid); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a6e22116e139..dcac3df8a5f7 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -189,7 +189,8 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl) { - if (!ctrl->subsys) + if (!ctrl->subsys || + !strcmp(ctrl->opts->subsysnqn, NVME_DISC_SUBSYS_NAME)) return ctrl->opts->subsysnqn; return ctrl->subsys->subnqn; } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1955c0ec209e..7723a4989524 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2492,6 +2492,10 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) len = nvmf_get_address(ctrl, buf, size); + mutex_lock(&queue->queue_lock); + + if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) + goto done; ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); if (ret > 0) { if (len > 0) @@ -2499,6 +2503,8 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", (len) ? "," : "", &src_addr); } +done: + mutex_unlock(&queue->queue_lock); return len; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 779fba613bd0..dd5ce1137f04 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -473,6 +473,7 @@ enum hctx_type { /** * struct blk_mq_tag_set - tag set that can be shared between request queues + * @ops: Pointers to functions that implement block driver behavior. * @map: One or more ctx -> hctx mappings. One map exists for each * hardware queue type (enum hctx_type) that the driver wishes * to support. There are no restrictions on maps being of the @@ -480,7 +481,6 @@ enum hctx_type { * types. * @nr_maps: Number of elements in the @map array. A number in the range * [1, HCTX_MAX_TYPES]. - * @ops: Pointers to functions that implement block driver behavior. * @nr_hw_queues: Number of hardware queues supported by the block driver that * owns this data structure. * @queue_depth: Number of tags per hardware queue, reserved tags included. @@ -505,9 +505,9 @@ enum hctx_type { * (BLK_MQ_F_BLOCKING). */ struct blk_mq_tag_set { + const struct blk_mq_ops *ops; struct blk_mq_queue_map map[HCTX_MAX_TYPES]; unsigned int nr_maps; - const struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; unsigned int reserved_tags; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 41a41561b773..d1aee08f8c18 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1283,12 +1283,7 @@ static inline bool bdev_nowait(struct block_device *bdev) static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return blk_queue_zoned_model(q); - - return BLK_ZONED_NONE; + return blk_queue_zoned_model(bdev_get_queue(bdev)); } static inline bool bdev_is_zoned(struct block_device *bdev) diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 1fed3c9294fc..d7a1524023db 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -144,6 +144,7 @@ struct opal_read_write_table { #define OPAL_FL_LOCKED 0x00000008 #define OPAL_FL_MBR_ENABLED 0x00000010 #define OPAL_FL_MBR_DONE 0x00000020 +#define OPAL_FL_SUM_SUPPORTED 0x00000040 struct opal_status { __u32 flags; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 5743be559415..d5d94510afd3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -729,14 +729,10 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop); **/ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) { - struct request_queue *q; + struct request_queue *q = bdev_get_queue(bdev); int ret, start = 0; char b[BDEVNAME_SIZE]; - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - mutex_lock(&q->debugfs_mutex); switch (cmd) {