Some fscrypt-related fixups (sparse reads are used only for encrypted

files) and two cap handling fixes from Xiubo and Rishabh.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmXGRJATHGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzi+/0B/4pEweAm2W0UUaaS59DecNySBFobwed
 m7bBDBGIAQ/I3duN46a13FzsGNclho967TeB0ig1jrQxnoo3HEMiXpZz5xfG9spe
 fyvrIk3R8cSqgd7YsyITnUjGGd2UBvZVrbWOCbWrKofSoflS6IjcGDQF7ZrgEsff
 0KkMaWHvO6poIU2mAToV//UkWUk6RrtAUNlSdjLpizXnUrrAQ+vUA3OU9SSp6Klf
 xmFaIiAiVZC6M8qFpXJtnIf8Ba7PrpW5InAXgCOkxDKciE9fLaPsIu0B3H9lUVKZ
 TJwjEJ0nB+akh0tRO5bZKyM8j0D3lhgxphJwNtUoYjQsV3m7LcGQV+Il
 =u953
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-6.8-rc4' of https://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "Some fscrypt-related fixups (sparse reads are used only for encrypted
  files) and two cap handling fixes from Xiubo and Rishabh"

* tag 'ceph-for-6.8-rc4' of https://github.com/ceph/ceph-client:
  ceph: always check dir caps asynchronously
  ceph: prevent use-after-free in encode_cap_msg()
  ceph: always set initial i_blkbits to CEPH_FSCRYPT_BLOCK_SHIFT
  libceph: just wait for more data to be available on the socket
  libceph: rename read_sparse_msg_*() to read_partial_sparse_msg_*()
  libceph: fail sparse-read if the data length doesn't match
This commit is contained in:
Linus Torvalds 2024-02-09 17:05:02 -08:00
commit e1e3f530a1
10 changed files with 49 additions and 44 deletions

View file

@ -1452,7 +1452,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
if (flushing & CEPH_CAP_XATTR_EXCL) {
arg->old_xattr_buf = __ceph_build_xattrs_blob(ci);
arg->xattr_version = ci->i_xattrs.version;
arg->xattr_buf = ci->i_xattrs.blob;
arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob);
} else {
arg->xattr_buf = NULL;
arg->old_xattr_buf = NULL;
@ -1553,6 +1553,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
encode_cap_msg(msg, arg);
ceph_con_send(&arg->session->s_con, msg);
ceph_buffer_put(arg->old_xattr_buf);
ceph_buffer_put(arg->xattr_buf);
if (arg->wake)
wake_up_all(&ci->i_cap_wq);
}
@ -3215,7 +3216,6 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
enum put_cap_refs_mode {
PUT_CAP_REFS_SYNC = 0,
PUT_CAP_REFS_NO_CHECK,
PUT_CAP_REFS_ASYNC,
};
@ -3331,11 +3331,6 @@ void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had)
__ceph_put_cap_refs(ci, had, PUT_CAP_REFS_ASYNC);
}
void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
{
__ceph_put_cap_refs(ci, had, PUT_CAP_REFS_NO_CHECK);
}
/*
* Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
* context. Adjust per-snap dirty page accounting as appropriate.

View file

@ -78,6 +78,8 @@ struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
if (!inode)
return ERR_PTR(-ENOMEM);
inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT;
if (!S_ISLNK(*mode)) {
err = ceph_pre_init_acls(dir, mode, as_ctx);
if (err < 0)

View file

@ -1089,7 +1089,7 @@ void ceph_mdsc_release_request(struct kref *kref)
struct ceph_mds_request *req = container_of(kref,
struct ceph_mds_request,
r_kref);
ceph_mdsc_release_dir_caps_no_check(req);
ceph_mdsc_release_dir_caps_async(req);
destroy_reply_info(&req->r_reply_info);
if (req->r_request)
ceph_msg_put(req->r_request);
@ -4261,7 +4261,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
}
}
void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req)
{
struct ceph_client *cl = req->r_mdsc->fsc->client;
int dcaps;
@ -4269,8 +4269,7 @@ void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
dcaps = xchg(&req->r_dir_caps, 0);
if (dcaps) {
doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
dcaps);
ceph_put_cap_refs_async(ceph_inode(req->r_parent), dcaps);
}
}
@ -4306,7 +4305,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
if (req->r_session->s_mds != session->s_mds)
continue;
ceph_mdsc_release_dir_caps_no_check(req);
ceph_mdsc_release_dir_caps_async(req);
__send_request(session, req, true);
}

View file

@ -552,7 +552,7 @@ extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir,
struct ceph_mds_request *req);
extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req);
extern void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req);
extern void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req);
static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
{
kref_get(&req->r_kref);

View file

@ -1255,8 +1255,6 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had);
extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
extern void __ceph_remove_capsnap(struct inode *inode,

View file

@ -283,7 +283,7 @@ struct ceph_msg {
struct kref kref;
bool more_to_follow;
bool needs_out_seq;
bool sparse_read;
u64 sparse_read_total;
int front_alloc_len;
struct ceph_msgpool *pool;

View file

@ -45,6 +45,7 @@ enum ceph_sparse_read_state {
CEPH_SPARSE_READ_HDR = 0,
CEPH_SPARSE_READ_EXTENTS,
CEPH_SPARSE_READ_DATA_LEN,
CEPH_SPARSE_READ_DATA_PRE,
CEPH_SPARSE_READ_DATA,
};
@ -64,7 +65,7 @@ struct ceph_sparse_read {
u64 sr_req_len; /* orig request length */
u64 sr_pos; /* current pos in buffer */
int sr_index; /* current extent index */
__le32 sr_datalen; /* length of actual data */
u32 sr_datalen; /* length of actual data */
u32 sr_count; /* extent count in reply */
int sr_ext_len; /* length of extent array */
struct ceph_sparse_extent *sr_extent; /* extent array */

View file

@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
/* Initialize data cursor if it's not a sparse read */
if (!msg->sparse_read)
ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
u64 len = msg->sparse_read_total ? : data_len;
ceph_msg_data_cursor_init(&msg->cursor, msg, len);
}
/*
@ -991,7 +992,7 @@ static inline int read_partial_message_section(struct ceph_connection *con,
return read_partial_message_chunk(con, section, sec_len, crc);
}
static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
@ -1026,7 +1027,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
return 1;
}
static int read_sparse_msg_data(struct ceph_connection *con)
static int read_partial_sparse_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
@ -1036,31 +1037,31 @@ static int read_sparse_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
do {
while (cursor->total_resid) {
if (con->v1.in_sr_kvec.iov_base)
ret = read_partial_message_chunk(con,
&con->v1.in_sr_kvec,
con->v1.in_sr_len,
&crc);
else if (cursor->sr_resid > 0)
ret = read_sparse_msg_extent(con, &crc);
if (ret <= 0) {
if (do_datacrc)
con->in_data_crc = crc;
return ret;
}
ret = read_partial_sparse_msg_extent(con, &crc);
if (ret <= 0)
break;
memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
ret = con->ops->sparse_read(con, cursor,
(char **)&con->v1.in_sr_kvec.iov_base);
if (ret <= 0) {
ret = ret ? ret : 1; /* must return > 0 to indicate success */
break;
}
con->v1.in_sr_len = ret;
} while (ret > 0);
}
if (do_datacrc)
con->in_data_crc = crc;
return ret < 0 ? ret : 1; /* must return > 0 to indicate success */
return ret;
}
static int read_partial_msg_data(struct ceph_connection *con)
@ -1253,8 +1254,8 @@ static int read_partial_message(struct ceph_connection *con)
if (!m->num_data_items)
return -EIO;
if (m->sparse_read)
ret = read_sparse_msg_data(con);
if (m->sparse_read_total)
ret = read_partial_sparse_msg_data(con);
else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else

View file

@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con)
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
struct page **pages = NULL;
bool sparse = con->in_msg->sparse_read;
bool sparse = !!con->in_msg->sparse_read_total;
int dpos = 0;
int tail_len;
int ret;
@ -2060,7 +2060,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
}
if (data_len(msg)) {
if (msg->sparse_read)
if (msg->sparse_read_total)
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
else
con->v2.in_state = IN_S_PREPARE_READ_DATA;

View file

@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}
m = ceph_msg_get(req->r_reply);
m->sparse_read = (bool)srlen;
m->sparse_read_total = srlen;
dout("get_reply tid %lld %p\n", tid, m);
@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con,
}
if (o->o_sparse_op_idx < 0) {
u64 srlen = sparse_data_requested(req);
dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
__func__, o->o_osd, srlen);
ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
dout("%s: [%d] starting new sparse read req\n",
__func__, o->o_osd);
} else {
u64 end;
@ -5857,8 +5854,8 @@ static int osd_sparse_read(struct ceph_connection *con,
struct ceph_osd *o = con->private;
struct ceph_sparse_read *sr = &o->o_sparse_read;
u32 count = sr->sr_count;
u64 eoff, elen;
int ret;
u64 eoff, elen, len = 0;
int i, ret;
switch (sr->sr_state) {
case CEPH_SPARSE_READ_HDR:
@ -5903,8 +5900,20 @@ static int osd_sparse_read(struct ceph_connection *con,
convert_extent_map(sr);
ret = sizeof(sr->sr_datalen);
*pbuf = (char *)&sr->sr_datalen;
sr->sr_state = CEPH_SPARSE_READ_DATA;
sr->sr_state = CEPH_SPARSE_READ_DATA_PRE;
break;
case CEPH_SPARSE_READ_DATA_PRE:
/* Convert sr_datalen to host-endian */
sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen);
for (i = 0; i < count; i++)
len += sr->sr_extent[i].len;
if (sr->sr_datalen != len) {
pr_warn_ratelimited("data len %u != extent len %llu\n",
sr->sr_datalen, len);
return -EREMOTEIO;
}
sr->sr_state = CEPH_SPARSE_READ_DATA;
fallthrough;
case CEPH_SPARSE_READ_DATA:
if (sr->sr_index >= count) {
sr->sr_state = CEPH_SPARSE_READ_HDR;