NFS client updates for Linux 5.11

Highlights include:
 
 Features:
 - NFSv3: Add emulation of lookupp() to improve open_by_filehandle()
   support.
 - A series of patches to improve readdir performance, particularly with
   large directories.
 - Basic support for using NFS/RDMA with the pNFS files and flexfiles
   drivers.
 - Micro-optimisations for RDMA.
 - RDMA tracing improvements.
 
 Bugfixes:
 - Fix a long standing bug with xs_read_xdr_buf() when receiving partial
   pages (Dan Aloni).
 - Various fixes for getxattr and listxattr, when used over non-TCP
   transports.
 - Fixes for containerised NFS from Sargun Dhillon.
 - switch nfsiod to be an UNBOUND workqueue (Neil Brown).
 - READDIR should not ask for security label information if there is no
   LSM policy. (Olga Kornievskaia)
 - Avoid using interval-based rebinding with TCP in lockd (Calum Mackay).
 - A series of RPC and NFS layer fixes to support the NFSv4.2 READ_PLUS code.
 - A couple of fixes for pnfs/flexfiles read failover
 
 Cleanups:
 - Various cleanups for the SUNRPC xdr code in conjunction with the
   READ_PLUS fixes.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEESQctxSBg8JpV8KqEZwvnipYKAPIFAl/aiaIACgkQZwvnipYK
 APIOihAAvONscxrFSaGRh2ICNv9I/zXW/A5+R3qnkESPVLTqTPJVphoN7FlINAr1
 B74pg6n4T4viycbvsogU2+kHrlJZO7B8lTkJL7ynm9Wgyw8+2Ga4QEn1bsAoqmuY
 b91p/+LfOLKrYeeojoH31PC73uOYYG1WHXJhjq0l9b5CTgThWpj6O3gDaFEbFvmz
 A7V3yqSp04sV70YxUhwelBHZ5BXdiXIKsPnIwvXXHuY7IcamrE4EA3wGCwtxkBnu
 4dwbOtRXURNSev0r3n6FsH4wZl+/nvp9UpnGdPtVv94F1zm2JKLwkhoJejS/vpjq
 eyKc7ZXBQ0uHbTWI2Yj1YjA61VIUO0R0EDuyTAnRKDeaarID42n5kMG7J8cIglZR
 jQfyx99xm0eSrdwxC09tcRL/lBzYcOfc6pJo5P9BtaFtRvbp9iFIHuFKlrXbULd4
 WrZzDMhiKVYGSTcTpfQyVoK2rCvn6W1Ida4iYeI0gkJ1v9X90UhbtJOyggn/bxyL
 DV/Qy40+l48n7CZfPU2eDv4WXqjKGRibpDoWMBLwUH20dDEX6kKYv3BfApFYGqyO
 /GTPAFUZarCy8BENvzZv/Jb9mt5pDQM5p9ZXpdUOhydLMMA+pauaT/Gr+pAHPIPx
 MPj546Gh2cEaT883xvRrJmQTG0nw/WscPNcHaJcgL5oYltmuwck=
 =IKWG
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-5.11-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Features:

   - NFSv3: Add emulation of lookupp() to improve open_by_filehandle()
     support

   - A series of patches to improve readdir performance, particularly
     with large directories

   - Basic support for using NFS/RDMA with the pNFS files and flexfiles
     drivers

   - Micro-optimisations for RDMA

   - RDMA tracing improvements

  Bugfixes:

   - Fix a long standing bug with xs_read_xdr_buf() when receiving
     partial pages (Dan Aloni)

   - Various fixes for getxattr and listxattr, when used over non-TCP
     transports

   - Fixes for containerised NFS from Sargun Dhillon

   - switch nfsiod to be an UNBOUND workqueue (Neil Brown)

   - READDIR should not ask for security label information if there is
     no LSM policy (Olga Kornievskaia)

   - Avoid using interval-based rebinding with TCP in lockd (Calum
     Mackay)

   - A series of RPC and NFS layer fixes to support the NFSv4.2
     READ_PLUS code

   - A couple of fixes for pnfs/flexfiles read failover

  Cleanups:

   - Various cleanups for the SUNRPC xdr code in conjunction with the
     READ_PLUS fixes"

* tag 'nfs-for-5.11-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (90 commits)
  NFS/pNFS: Fix a typo in ff_layout_resend_pnfs_read()
  pNFS/flexfiles: Avoid spurious layout returns in ff_layout_choose_ds_for_read
  NFSv4/pnfs: Add tracing for the deviceid cache
  fs/lockd: convert comma to semicolon
  NFSv4.2: fix error return on memory allocation failure
  NFSv4.2/pnfs: Don't use READ_PLUS with pNFS yet
  NFSv4.2: Deal with potential READ_PLUS data extent buffer overflow
  NFSv4.2: Don't error when exiting early on a READ_PLUS buffer overflow
  NFSv4.2: Handle hole lengths that exceed the READ_PLUS read buffer
  NFSv4.2: decode_read_plus_hole() needs to check the extent offset
  NFSv4.2: decode_read_plus_data() must skip padding after data segment
  NFSv4.2: Ensure we always reset the result->count in decode_read_plus()
  SUNRPC: When expanding the buffer, we may need grow the sparse pages
  SUNRPC: Cleanup - constify a number of xdr_buf helpers
  SUNRPC: Clean up open coded setting of the xdr_stream 'nwords' field
  SUNRPC: _copy_to/from_pages() now check for zero length
  SUNRPC: Cleanup xdr_shrink_bufhead()
  SUNRPC: Fix xdr_expand_hole()
  SUNRPC: Fixes for xdr_align_data()
  SUNRPC: _shift_data_left/right_pages should check the shift length
  ...
This commit is contained in:
Linus Torvalds 2020-12-17 12:15:03 -08:00
commit 74f602dc96
41 changed files with 1959 additions and 1251 deletions

View File

@ -163,7 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_nsmhandle = nsm;
host->h_addrbuf = nsm->sm_addrbuf;
host->net = ni->net;
host->h_cred = get_cred(ni->cred),
host->h_cred = get_cred(ni->cred);
strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
out:
@ -439,12 +439,7 @@ nlm_bind_host(struct nlm_host *host)
* RPC rebind is required
*/
if ((clnt = host->h_rpcclnt) != NULL) {
if (time_after_eq(jiffies, host->h_nextrebind)) {
rpc_force_rebind(clnt);
host->h_nextrebind = jiffies + NLM_HOST_REBIND;
dprintk("lockd: next rebind in %lu jiffies\n",
host->h_nextrebind - jiffies);
}
nlm_rebind_host(host);
} else {
unsigned long increment = nlmsvc_timeout;
struct rpc_timeout timeparms = {
@ -494,13 +489,20 @@ nlm_bind_host(struct nlm_host *host)
return clnt;
}
/*
* Force a portmap lookup of the remote lockd port
/**
* nlm_rebind_host - If needed, force a portmap lookup of the peer's lockd port
* @host: NLM host handle for peer
*
* This is not needed when using a connection-oriented protocol, such as TCP.
* The existing autobind mechanism is sufficient to force a rebind when
* required, e.g. on connection state transitions.
*/
void
nlm_rebind_host(struct nlm_host *host)
{
dprintk("lockd: rebind host %s\n", host->h_name);
if (host->h_proto != IPPROTO_UDP)
return;
if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) {
rpc_force_rebind(host->h_rpcclnt);
host->h_nextrebind = jiffies + NLM_HOST_REBIND;

View File

@ -571,7 +571,7 @@ static int nfs_start_lockd(struct nfs_server *server)
1 : 0,
.net = clp->cl_net,
.nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops,
.cred = current_cred(),
.cred = server->cred,
};
if (nlm_init.nfs_version > 3)
@ -781,8 +781,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES)
server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES;
if (server->dtsize > NFS_MAX_FILE_IO_SIZE)
server->dtsize = NFS_MAX_FILE_IO_SIZE;
if (server->dtsize > server->rsize)
server->dtsize = server->rsize;
@ -985,7 +985,7 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
if (!server)
return ERR_PTR(-ENOMEM);
server->cred = get_cred(current_cred());
server->cred = get_cred(fc->cred);
error = -ENOMEM;
fattr = nfs_alloc_fattr();

File diff suppressed because it is too large Load Diff

View File

@ -740,16 +740,12 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
bool fail_return = false;
u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
if (idx+1 == fls->mirror_array_cnt)
fail_return = !check_device;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
if (!ds)
continue;
@ -1056,7 +1052,7 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
u32 idx = hdr->pgio_mirror_idx + 1;
u32 new_idx = 0;
if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx))
if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
ff_layout_send_layouterror(hdr->lseg);
else
pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
@ -2284,7 +2280,6 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
char portbuf[RPCBIND_MAXUADDRPLEN];
char addrbuf[RPCBIND_MAXUADDRLEN];
char *netid;
unsigned short port;
int len, netid_len;
__be32 *p;
@ -2294,18 +2289,13 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
return;
port = ntohs(((struct sockaddr_in *)sap)->sin_port);
netid = "tcp";
netid_len = 3;
break;
case AF_INET6:
if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
return;
port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
netid = "tcp6";
netid_len = 4;
break;
default:
/* we only support tcp and tcp6 */
WARN_ON_ONCE(1);
return;
}
@ -2313,8 +2303,9 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
netid_len = strlen(da->da_netid);
p = xdr_reserve_space(xdr, 4 + netid_len);
xdr_encode_opaque(p, netid, netid_len);
xdr_encode_opaque(p, da->da_netid, netid_len);
p = xdr_reserve_space(xdr, 4 + len);
xdr_encode_opaque(p, addrbuf, len);

View File

@ -510,13 +510,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_tcp:
ctx->flags |= NFS_MOUNT_TCP;
ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
break;
case Opt_rdma:
ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */
ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
xprt_load_transport(param->key);
ret = xprt_find_transport_ident(param->key);
if (ret < 0)
goto out_bad_transport;
ctx->nfs_server.protocol = ret;
break;
case Opt_acl:
if (result.negated)
@ -670,11 +669,13 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
case Opt_xprt_rdma:
/* vector side protocols to TCP */
ctx->flags |= NFS_MOUNT_TCP;
ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
xprt_load_transport(param->string);
ret = xprt_find_transport_ident(param->string);
if (ret < 0)
goto out_bad_transport;
ctx->nfs_server.protocol = ret;
break;
default:
return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
goto out_bad_transport;
}
ctx->protofamily = protofamily;
@ -697,7 +698,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_xprt_rdma: /* not used for side protocols */
default:
return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
goto out_bad_transport;
}
ctx->mountfamily = mountfamily;
break;
@ -787,6 +788,8 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
return nfs_invalf(fc, "NFS: Bad IP address specified");
out_of_bounds:
return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key);
out_bad_transport:
return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
}
/*

View File

@ -229,7 +229,6 @@ static void nfs_zap_caches_locked(struct inode *inode)
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_DATA
@ -1237,7 +1236,6 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
int ret;
if (mapping->nrpages != 0) {
@ -1250,11 +1248,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
if (ret < 0)
return ret;
}
if (S_ISDIR(inode->i_mode)) {
spin_lock(&inode->i_lock);
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
spin_unlock(&inode->i_lock);
}
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
nfs_fscache_wait_on_invalidate(inode);
@ -2180,7 +2173,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;

View File

@ -56,12 +56,6 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
#define NFS_UNSPEC_RETRANS (UINT_MAX)
#define NFS_UNSPEC_TIMEO (UINT_MAX)
/*
* Maximum number of pages that readdir can use for creating
* a vmapped array of pages.
*/
#define NFS_MAX_READDIR_PAGES 8
struct nfs_client_initdata {
unsigned long init_flags;
const char *hostname; /* Hostname of the server */

View File

@ -34,6 +34,7 @@
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
*/
#define NFS_pagepad_sz (1) /* Page padding */
#define NFS_fhandle_sz (8)
#define NFS_sattr_sz (8)
#define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2))
@ -56,11 +57,11 @@
#define NFS_attrstat_sz (1+NFS_fattr_sz)
#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
#define NFS_readlinkres_sz (2+1)
#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
#define NFS_readlinkres_sz (2+NFS_pagepad_sz)
#define NFS_readres_sz (1+NFS_fattr_sz+1+NFS_pagepad_sz)
#define NFS_writeres_sz (NFS_attrstat_sz)
#define NFS_stat_sz (1)
#define NFS_readdirres_sz (1+1)
#define NFS_readdirres_sz (1+NFS_pagepad_sz)
#define NFS_statfsres_sz (1+NFS_info_sz)
static int nfs_stat_to_errno(enum nfs_stat);
@ -592,8 +593,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
const struct nfs_readlinkargs *args = data;
encode_fhandle(xdr, args->fh);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->pglen, NFS_readlinkres_sz);
rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen,
NFS_readlinkres_sz - NFS_pagepad_sz);
}
/*
@ -628,8 +629,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
const struct nfs_pgio_args *args = data;
encode_readargs(xdr, args);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->count, NFS_readres_sz);
rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count,
NFS_readres_sz - NFS_pagepad_sz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
}
@ -786,8 +787,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
const struct nfs_readdirargs *args = data;
encode_readdirargs(xdr, args);
rpc_prepare_reply_pages(req, args->pages, 0,
args->count, NFS_readdirres_sz);
rpc_prepare_reply_pages(req, args->pages, 0, args->count,
NFS_readdirres_sz - NFS_pagepad_sz);
}
/*

View File

@ -154,14 +154,14 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
__nfs3_proc_lookup(struct inode *dir, const char *name, size_t len,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
unsigned short task_flags)
{
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
.name = dentry->d_name.name,
.len = dentry->d_name.len
.name = name,
.len = len
};
struct nfs3_diropres res = {
.fh = fhandle,
@ -173,17 +173,11 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
.rpc_resp = &res,
};
int status;
unsigned short task_flags = 0;
/* Is this is an attribute revalidation, subject to softreval? */
if (nfs_lookup_is_soft_revalidate(dentry))
task_flags |= RPC_TASK_TIMEOUT;
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
return -ENOMEM;
dprintk("NFS call lookup %pd2\n", dentry);
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
nfs_refresh_inode(dir, res.dir_attr);
@ -198,6 +192,37 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
return status;
}
static int
nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
unsigned short task_flags = 0;
/* Is this is an attribute revalidation, subject to softreval? */
if (nfs_lookup_is_soft_revalidate(dentry))
task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call lookup %pd2\n", dentry);
return __nfs3_proc_lookup(dir, dentry->d_name.name,
dentry->d_name.len, fhandle, fattr,
task_flags);
}
static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
const char dotdot[] = "..";
const size_t len = strlen(dotdot);
unsigned short task_flags = 0;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
task_flags |= RPC_TASK_TIMEOUT;
return __nfs3_proc_lookup(inode, dotdot, len, fhandle, fattr,
task_flags);
}
static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs3_accessargs arg = {
@ -637,37 +662,36 @@ nfs3_proc_rmdir(struct inode *dir, const struct qstr *name)
* Also note that this implementation handles both plain readdir and
* readdirplus.
*/
static int
nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
static int nfs3_proc_readdir(struct nfs_readdir_arg *nr_arg,
struct nfs_readdir_res *nr_res)
{
struct inode *dir = d_inode(dentry);
__be32 *verf = NFS_I(dir)->cookieverf;
struct inode *dir = d_inode(nr_arg->dentry);
struct nfs3_readdirargs arg = {
.fh = NFS_FH(dir),
.cookie = cookie,
.verf = {verf[0], verf[1]},
.plus = plus,
.count = count,
.pages = pages
.cookie = nr_arg->cookie,
.plus = nr_arg->plus,
.count = nr_arg->page_len,
.pages = nr_arg->pages
};
struct nfs3_readdirres res = {
.verf = verf,
.plus = plus
.verf = nr_res->verf,
.plus = nr_arg->plus,
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READDIR],
.rpc_argp = &arg,
.rpc_resp = &res,
.rpc_cred = cred,
.rpc_cred = nr_arg->cred,
};
int status = -ENOMEM;
if (plus)
if (nr_arg->plus)
msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
if (arg.cookie)
memcpy(arg.verf, nr_arg->verf, sizeof(arg.verf));
dprintk("NFS call readdir%s %d\n",
plus? "plus" : "", (unsigned int) cookie);
dprintk("NFS call readdir%s %llu\n", nr_arg->plus ? "plus" : "",
(unsigned long long)nr_arg->cookie);
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
@ -680,8 +704,8 @@ nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
nfs_free_fattr(res.dir_attr);
out:
dprintk("NFS reply readdir%s: %d\n",
plus? "plus" : "", status);
dprintk("NFS reply readdir%s: %d\n", nr_arg->plus ? "plus" : "",
status);
return status;
}
@ -1004,6 +1028,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
.lookupp = nfs3_proc_lookupp,
.access = nfs3_proc_access,
.readlink = nfs3_proc_readlink,
.create = nfs3_proc_create,

View File

@ -33,6 +33,7 @@
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
*/
#define NFS3_pagepad_sz (1) /* Page padding */
#define NFS3_fhandle_sz (1+16)
#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */
#define NFS3_sattr_sz (15)
@ -69,13 +70,13 @@
#define NFS3_removeres_sz (NFS3_setattrres_sz)
#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+NFS3_pagepad_sz)
#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+NFS3_pagepad_sz)
#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+NFS3_pagepad_sz)
#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
@ -85,7 +86,8 @@
#define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+\
NFS3_pagepad_sz)
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
static int nfs3_stat_to_errno(enum nfs_stat);
@ -909,8 +911,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
const struct nfs3_readlinkargs *args = data;
encode_nfs_fh3(xdr, args->fh);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->pglen, NFS3_readlinkres_sz);
rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen,
NFS3_readlinkres_sz - NFS3_pagepad_sz);
}
/*
@ -939,7 +941,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
const void *data)
{
const struct nfs_pgio_args *args = data;
unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
unsigned int replen = args->replen ? args->replen :
NFS3_readres_sz - NFS3_pagepad_sz;
encode_read3args(xdr, args);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
@ -1239,8 +1242,8 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
const struct nfs3_readdirargs *args = data;
encode_readdir3args(xdr, args);
rpc_prepare_reply_pages(req, args->pages, 0,
args->count, NFS3_readdirres_sz);
rpc_prepare_reply_pages(req, args->pages, 0, args->count,
NFS3_readdirres_sz - NFS3_pagepad_sz);
}
/*
@ -1281,8 +1284,8 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
const struct nfs3_readdirargs *args = data;
encode_readdirplus3args(xdr, args);
rpc_prepare_reply_pages(req, args->pages, 0,
args->count, NFS3_readdirres_sz);
rpc_prepare_reply_pages(req, args->pages, 0, args->count,
NFS3_readdirres_sz - NFS3_pagepad_sz);
}
/*
@ -1328,7 +1331,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
if (args->mask & (NFS_ACL | NFS_DFACL)) {
rpc_prepare_reply_pages(req, args->pages, 0,
NFSACL_MAXPAGES << PAGE_SHIFT,
ACL3_getaclres_sz);
ACL3_getaclres_sz - NFS3_pagepad_sz);
req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
}
}
@ -1648,7 +1651,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
result->op_status = status;
if (status != NFS3_OK)
goto out_status;
result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
error = decode_read3resok(xdr, result);
out:
return error;

View File

@ -1173,14 +1173,12 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
}
static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
void *buf, size_t buflen)
void *buf, size_t buflen, struct page **pages,
size_t plen)
{
struct nfs_server *server = NFS_SERVER(inode);
struct page *pages[NFS4XATTR_MAXPAGES] = {};
struct nfs42_getxattrargs arg = {
.fh = NFS_FH(inode),
.xattr_pages = pages,
.xattr_len = buflen,
.xattr_name = name,
};
struct nfs42_getxattrres res;
@ -1189,7 +1187,10 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
.rpc_argp = &arg,
.rpc_resp = &res,
};
int ret, np;
ssize_t ret;
arg.xattr_len = plen;
arg.xattr_pages = pages;
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 0);
@ -1214,10 +1215,6 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
_copy_from_pages(buf, pages, 0, res.xattr_len);
}
np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE);
while (--np >= 0)
__free_page(pages[np]);
return res.xattr_len;
}
@ -1292,16 +1289,45 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
void *buf, size_t buflen)
{
struct nfs4_exception exception = { };
ssize_t err;
ssize_t err, np, i;
struct page **pages;
np = nfs_page_array_len(0, buflen ?: XATTR_SIZE_MAX);
pages = kmalloc_array(np, sizeof(*pages), GFP_KERNEL);
if (!pages)
return -ENOMEM;
for (i = 0; i < np; i++) {
pages[i] = alloc_page(GFP_KERNEL);
if (!pages[i]) {
np = i + 1;
err = -ENOMEM;
goto out;
}
}
/*
* The GETXATTR op has no length field in the call, and the
* xattr data is at the end of the reply.
*
* There is no downside in using the page-aligned length. It will
* allow receiving and caching xattrs that are too large for the
* caller but still fit in the page-rounded value.
*/
do {
err = _nfs42_proc_getxattr(inode, name, buf, buflen);
err = _nfs42_proc_getxattr(inode, name, buf, buflen,
pages, np * PAGE_SIZE);
if (err >= 0)
break;
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
out:
while (--np >= 0)
__free_page(pages[np]);
kfree(pages);
return err;
}

View File

@ -191,7 +191,7 @@
#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \
nfs4_xattr_name_maxsz)
#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1)
#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \
1 + nfs4_xattr_name_maxsz + 1)
#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
@ -489,6 +489,12 @@ static int decode_getxattr(struct xdr_stream *xdr,
return -EIO;
len = be32_to_cpup(p);
/*
* Only check against the page length here. The actual
* requested length may be smaller, but that is only
* checked against after possibly caching a valid reply.
*/
if (len > req->rq_rcv_buf.page_len)
return -ERANGE;
@ -1019,56 +1025,80 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
return decode_op_hdr(xdr, OP_DEALLOCATE);
}
static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
uint32_t *eof)
static int decode_read_plus_data(struct xdr_stream *xdr,
struct nfs_pgio_args *args,
struct nfs_pgio_res *res)
{
uint32_t count, recvd;
uint64_t offset;
__be32 *p;
p = xdr_inline_decode(xdr, 8 + 4);
if (unlikely(!p))
return -EIO;
if (!p)
return 1;
p = xdr_decode_hyper(p, &offset);
count = be32_to_cpup(p);
recvd = xdr_align_data(xdr, res->count, count);
res->count += recvd;
if (count > recvd) {
dprintk("NFS: server cheating in read reply: "
"count %u > recvd %u\n", count, recvd);
*eof = 0;
recvd = xdr_align_data(xdr, res->count, xdr_align_size(count));
if (recvd > count)
recvd = count;
if (res->count + recvd > args->count) {
if (args->count > res->count)
res->count += args->count - res->count;
return 1;
}
res->count += recvd;
if (count > recvd)
return 1;
return 0;
}
static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res,
uint32_t *eof)
static int decode_read_plus_hole(struct xdr_stream *xdr,
struct nfs_pgio_args *args,
struct nfs_pgio_res *res, uint32_t *eof)
{
uint64_t offset, length, recvd;
__be32 *p;
p = xdr_inline_decode(xdr, 8 + 8);
if (unlikely(!p))
return -EIO;
if (!p)
return 1;
p = xdr_decode_hyper(p, &offset);
p = xdr_decode_hyper(p, &length);
if (offset != args->offset + res->count) {
/* Server returned an out-of-sequence extent */
if (offset > args->offset + res->count ||
offset + length < args->offset + res->count) {
dprintk("NFS: server returned out of sequence extent: "
"offset/size = %llu/%llu != expected %llu\n",
(unsigned long long)offset,
(unsigned long long)length,
(unsigned long long)(args->offset +
res->count));
return 1;
}
length -= args->offset + res->count - offset;
}
if (length + res->count > args->count) {
*eof = 0;
if (unlikely(res->count >= args->count))
return 1;
length = args->count - res->count;
}
recvd = xdr_expand_hole(xdr, res->count, length);
res->count += recvd;
if (recvd < length) {
*eof = 0;
if (recvd < length)
return 1;
}
return 0;
}
static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
struct nfs_pgio_header *hdr =
container_of(res, struct nfs_pgio_header, res);
struct nfs_pgio_args *args = &hdr->args;
uint32_t eof, segments, type;
int status, i;
__be32 *p;
@ -1081,6 +1111,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
if (unlikely(!p))
return -EIO;
res->count = 0;
eof = be32_to_cpup(p++);
segments = be32_to_cpup(p++);
if (segments == 0)
@ -1088,26 +1119,31 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
for (i = 0; i < segments; i++) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
if (!p)
goto early_out;
type = be32_to_cpup(p++);
if (type == NFS4_CONTENT_DATA)
status = decode_read_plus_data(xdr, res, &eof);
status = decode_read_plus_data(xdr, args, res);
else if (type == NFS4_CONTENT_HOLE)
status = decode_read_plus_hole(xdr, res, &eof);
status = decode_read_plus_hole(xdr, args, res, &eof);
else
return -EINVAL;
if (status < 0)
return status;
if (status > 0)
break;
goto early_out;
}
out:
res->eof = eof;
return 0;
early_out:
if (unlikely(!i))
return -EIO;
res->eof = 0;
return 0;
}
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
@ -1476,18 +1512,16 @@ static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
size_t plen;
uint32_t replen;
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
replen = hdr.replen + op_decode_hdr_maxsz + 1;
encode_getxattr(xdr, args->xattr_name, &hdr);
plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX;
rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen,
hdr.replen);
req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len,
replen);
encode_nops(&hdr);
}
@ -1520,14 +1554,15 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
uint32_t replen;
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1;
encode_listxattrs(xdr, args, &hdr);
rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count,
hdr.replen);
rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen);
encode_nops(&hdr);
}

View File

@ -1153,7 +1153,7 @@ struct nfs_server *nfs4_create_server(struct fs_context *fc)
if (!server)
return ERR_PTR(-ENOMEM);
server->cred = get_cred(current_cred());
server->cred = get_cred(fc->cred);
auth_probe = ctx->auth_info.flavor_len < 1;

View File

@ -184,6 +184,8 @@ static int nfs4_map_errors(int err)
return -EPROTONOSUPPORT;
case -NFS4ERR_FILE_OPEN:
return -EBUSY;
case -NFS4ERR_NOT_SAME:
return -ENOTSYNC;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@ -4397,6 +4399,10 @@ static int _nfs4_proc_lookupp(struct inode *inode,
.rpc_argp = &args,
.rpc_resp = &res,
};
unsigned short task_flags = 0;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
task_flags |= RPC_TASK_TIMEOUT;
args.bitmask = nfs4_bitmask(server, label);
@ -4404,7 +4410,7 @@ static int _nfs4_proc_lookupp(struct inode *inode,
dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino);
status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
&res.seq_res, 0);
&res.seq_res, task_flags);
dprintk("NFS reply lookupp: %d\n", status);
return status;
}
@ -4957,35 +4963,40 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
return err;
}
static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg,
struct nfs_readdir_res *nr_res)
{
struct inode *dir = d_inode(dentry);
struct inode *dir = d_inode(nr_arg->dentry);
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_readdir_arg args = {
.fh = NFS_FH(dir),
.pages = pages,
.pages = nr_arg->pages,
.pgbase = 0,
.count = count,
.bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
.plus = plus,
.count = nr_arg->page_len,
.plus = nr_arg->plus,
};
struct nfs4_readdir_res res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR],
.rpc_argp = &args,
.rpc_resp = &res,
.rpc_cred = cred,
.rpc_cred = nr_arg->cred,
};
int status;
dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
dentry,
(unsigned long long)cookie);
nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
dprintk("%s: dentry = %pd2, cookie = %llu\n", __func__,
nr_arg->dentry, (unsigned long long)nr_arg->cookie);
if (!(server->caps & NFS_CAP_SECURITY_LABEL))
args.bitmask = server->attr_bitmask_nl;
else
args.bitmask = server->attr_bitmask;
nfs4_setup_readdir(nr_arg->cookie, nr_arg->verf, nr_arg->dentry, &args);
res.pgbase = args.pgbase;
status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
&res.seq_res, 0);
if (status >= 0) {
memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
memcpy(nr_res->verf, res.verifier.data, NFS4_VERIFIER_SIZE);
status += args.pgbase;
}
@ -4995,19 +5006,18 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
return status;
}
static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
static int nfs4_proc_readdir(struct nfs_readdir_arg *arg,
struct nfs_readdir_res *res)
{
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
err = _nfs4_proc_readdir(dentry, cred, cookie,
pages, count, plus);
trace_nfs4_readdir(d_inode(dentry), err);
err = nfs4_handle_exception(NFS_SERVER(d_inode(dentry)), err,
&exception);
err = _nfs4_proc_readdir(arg, res);
trace_nfs4_readdir(d_inode(arg->dentry), err);
err = nfs4_handle_exception(NFS_SERVER(d_inode(arg->dentry)),
err, &exception);
} while (exception.retry);
return err;
}
@ -5310,17 +5320,17 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
}
#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS
static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
static void nfs42_read_plus_support(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
if (server->caps & NFS_CAP_READ_PLUS)
/* Note: We don't use READ_PLUS with pNFS yet */
if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp)
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
else
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
}
#else
static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
static void nfs42_read_plus_support(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
}
#endif /* CONFIG_NFS_V4_2 */
@ -5330,7 +5340,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
hdr->timestamp = jiffies;
if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_read_done_cb;
nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
nfs42_read_plus_support(hdr, msg);
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
}
@ -9651,6 +9662,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server,
if (res.notification != args.notify_types)
pdev->nocache = 1;
trace_nfs4_getdeviceinfo(server, &pdev->dev_id, status);
dprintk("<-- %s status=%d\n", __func__, status);
return status;

View File

@ -34,7 +34,7 @@ enum nfs4_slot_tbl_state {
NFS4_SLOT_TBL_DRAINING,
};
#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, BITS_PER_LONG)
struct nfs4_slot_table {
struct nfs4_session *session; /* Parent session */
struct nfs4_slot *slots; /* seqid per slot */

View File

@ -67,7 +67,7 @@ static void nfs4_evict_inode(struct inode *inode)
nfs_inode_evict_delegation(inode);
/* Note that above delegreturn would trigger pnfs return-on-close */
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));
pnfs_destroy_layout_final(NFS_I(inode));
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
nfs4_xattr_cache_zap(inode);

View File

@ -2189,6 +2189,81 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done);
DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist);
DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist);
DECLARE_EVENT_CLASS(nfs4_deviceid_event,
TP_PROTO(
const struct nfs_client *clp,
const struct nfs4_deviceid *deviceid
),
TP_ARGS(clp, deviceid),
TP_STRUCT__entry(
__string(dstaddr, clp->cl_hostname)
__array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
),
TP_fast_assign(
__assign_str(dstaddr, clp->cl_hostname);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
TP_printk(
"deviceid=%s, dstaddr=%s",
__print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
__get_str(dstaddr)
)
);
#define DEFINE_PNFS_DEVICEID_EVENT(name) \
DEFINE_EVENT(nfs4_deviceid_event, name, \
TP_PROTO(const struct nfs_client *clp, \
const struct nfs4_deviceid *deviceid \
), \
TP_ARGS(clp, deviceid))
DEFINE_PNFS_DEVICEID_EVENT(nfs4_deviceid_free);
DECLARE_EVENT_CLASS(nfs4_deviceid_status,
TP_PROTO(
const struct nfs_server *server,
const struct nfs4_deviceid *deviceid,
int status
),
TP_ARGS(server, deviceid, status),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, status)
__string(dstaddr, server->nfs_client->cl_hostname)
__array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
),
TP_fast_assign(
__entry->dev = server->s_dev;
__entry->status = status;
__assign_str(dstaddr, server->nfs_client->cl_hostname);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
TP_printk(
"dev=%02x:%02x: deviceid=%s, dstaddr=%s, status=%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
__get_str(dstaddr),
__entry->status
)
);
#define DEFINE_PNFS_DEVICEID_STATUS(name) \
DEFINE_EVENT(nfs4_deviceid_status, name, \
TP_PROTO(const struct nfs_server *server, \
const struct nfs4_deviceid *deviceid, \
int status \
), \
TP_ARGS(server, deviceid, status))
DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo);
DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid);
DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_PROTO(
const struct nfs_pgio_header *hdr

View File

@ -84,6 +84,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
/* lock,open owner id:
* we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
#define pagepad_maxsz (1)
#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2)
#define lock_owner_id_maxsz (1 + 1 + 4)
#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
@ -215,14 +216,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
nfs4_fattr_bitmap_maxsz)
#define encode_read_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz)
#define encode_readdir_maxsz (op_encode_hdr_maxsz + \
2 + encode_verifier_maxsz + 5 + \
nfs4_label_maxsz)
#define decode_readdir_maxsz (op_decode_hdr_maxsz + \
decode_verifier_maxsz + 1)
decode_verifier_maxsz + pagepad_maxsz)
#define encode_readlink_maxsz (op_encode_hdr_maxsz)
#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
#define encode_write_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 4)
#define decode_write_maxsz (op_decode_hdr_maxsz + \
@ -284,14 +285,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
#define encode_getacl_maxsz (encode_getattr_maxsz)
#define decode_getacl_maxsz (op_decode_hdr_maxsz + \
nfs4_fattr_bitmap_maxsz + 1 + 1)
nfs4_fattr_bitmap_maxsz + 1 + pagepad_maxsz)
#define encode_setacl_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_setacl_maxsz (decode_setattr_maxsz)
#define encode_fs_locations_maxsz \
(encode_getattr_maxsz)
#define decode_fs_locations_maxsz \
(1)
(pagepad_maxsz)
#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
@ -393,12 +394,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
/* devaddr4 payload is read into page */ \
1 /* notification bitmap length */ + \
1 /* notification bitmap, word 0 */ + \
1 /* possible XDR padding */)
pagepad_maxsz /* possible XDR padding */)
#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
encode_stateid_maxsz)
#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
decode_stateid_maxsz + \
XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
pagepad_maxsz)
#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
2 /* offset */ + \
2 /* length */ + \
@ -2342,7 +2344,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_layoutget(xdr, args->lg_args, &hdr);
rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
args->lg_args->layout.pglen,
hdr.replen);
hdr.replen - pagepad_maxsz);
}
encode_nops(&hdr);
}
@ -2388,7 +2390,7 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
encode_layoutget(xdr, args->lg_args, &hdr);
rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
args->lg_args->layout.pglen,
hdr.replen);
hdr.replen - pagepad_maxsz);
}
encode_nops(&hdr);
}
@ -2499,7 +2501,7 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_readlink(xdr, args, req, &hdr);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->pglen, hdr.replen);
args->pglen, hdr.replen - pagepad_maxsz);
encode_nops(&hdr);
}
@ -2520,7 +2522,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_readdir(xdr, args, req, &hdr);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->count, hdr.replen);
args->count, hdr.replen - pagepad_maxsz);
encode_nops(&hdr);
}
@ -2541,7 +2543,7 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_read(xdr, args, &hdr);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->count, hdr.replen);
args->count, hdr.replen - pagepad_maxsz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
encode_nops(&hdr);
}
@ -2588,7 +2590,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
ARRAY_SIZE(nfs4_acl_bitmap), &hdr);
rpc_prepare_reply_pages(req, args->acl_pages, 0,
args->acl_len, replen + 1);
args->acl_len, replen);
encode_nops(&hdr);
}
@ -2810,7 +2812,7 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
}
rpc_prepare_reply_pages(req, (struct page **)&args->page, 0,
PAGE_SIZE, replen + 1);
PAGE_SIZE, replen);
encode_nops(&hdr);
}
@ -3009,15 +3011,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
uint32_t replen;
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
replen = hdr.replen + op_decode_hdr_maxsz + 2;
encode_getdeviceinfo(xdr, args, &hdr);
/* set up reply kvec. Subtract notification bitmap max size (2)
* so that notification bitmap is put in xdr_buf tail */
/* set up reply kvec. device_addr4 opaque data is read into the
* pages */
rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
args->pdev->pglen, hdr.replen - 2);
args->pdev->pglen, replen);
encode_nops(&hdr);
}
@ -3039,7 +3045,7 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
encode_layoutget(xdr, args, &hdr);
rpc_prepare_reply_pages(req, args->layout.pages, 0,
args->layout.pglen, hdr.replen);
args->layout.pglen, hdr.replen - pagepad_maxsz);
encode_nops(&hdr);
}
@ -5331,11 +5337,11 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
res->acl_len = attrlen;
/* Check for receive buffer overflow */
if (res->acl_len > (xdr->nwords << 2) ||
if (res->acl_len > xdr_stream_remaining(xdr) ||
res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
res->acl_flags |= NFS4_ACL_TRUNC;
dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
attrlen, xdr->nwords << 2);
dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
attrlen, xdr_stream_remaining(xdr));
}
} else
status = -EOPNOTSUPP;

View File

@ -294,6 +294,7 @@ void
pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct inode *inode;
unsigned long i_state;
if (!lo)
return;
@ -304,8 +305,12 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
if (!list_empty(&lo->plh_segs))
WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
pnfs_detach_layout_hdr(lo);
i_state = inode->i_state;
spin_unlock(&inode->i_lock);
pnfs_free_layout_hdr(lo);
/* Notify pnfs_destroy_layout_final() that we're done */
if (i_state & (I_FREEING | I_CLEAR))
wake_up_var(lo);
}
}
@ -734,8 +739,7 @@ pnfs_free_lseg_list(struct list_head *free_me)
}
}
void
pnfs_destroy_layout(struct nfs_inode *nfsi)
static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi)
{
struct pnfs_layout_hdr *lo;
LIST_HEAD(tmp_list);
@ -753,9 +757,34 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
pnfs_put_layout_hdr(lo);
} else
spin_unlock(&nfsi->vfs_inode.i_lock);
return lo;
}
void pnfs_destroy_layout(struct nfs_inode *nfsi)
{
__pnfs_destroy_layout(nfsi);
}
EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
static bool pnfs_layout_removed(struct nfs_inode *nfsi,
struct pnfs_layout_hdr *lo)
{
bool ret;
spin_lock(&nfsi->vfs_inode.i_lock);
ret = nfsi->layout != lo;
spin_unlock(&nfsi->vfs_inode.i_lock);
return ret;
}
void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
if (lo)
wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
}
static bool
pnfs_layout_add_bulk_destroy_list(struct inode *inode,
struct list_head *layout_list)

View File

@ -51,6 +51,8 @@ struct nfs4_pnfs_ds_addr {
size_t da_addrlen;
struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *da_remotestr; /* human readable addr+port */
const char *da_netid;
int da_transport;
};
struct nfs4_pnfs_ds {
@ -266,6 +268,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_layoutget_free(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_layout_final(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
struct nfs_fsid *fsid,
@ -710,6 +713,10 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
{
}
static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
}
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{

View File

@ -34,6 +34,8 @@
#include "internal.h"
#include "pnfs.h"
#include "nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
/*
@ -192,24 +194,28 @@ nfs4_find_get_deviceid(struct nfs_server *server,
d = __nfs4_find_get_deviceid(server, id, hash);
if (d)
return d;
goto found;
new = nfs4_get_device_info(server, id, cred, gfp_mask);
if (!new)
if (!new) {
trace_nfs4_find_deviceid(server, id, -ENOENT);
return new;
}
spin_lock(&nfs4_deviceid_lock);
d = __nfs4_find_get_deviceid(server, id, hash);
if (d) {
spin_unlock(&nfs4_deviceid_lock);
server->pnfs_curr_ld->free_deviceid_node(new);
return d;
} else {
atomic_inc(&new->ref);
hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
spin_unlock(&nfs4_deviceid_lock);
d = new;
}
hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
atomic_inc(&new->ref);
spin_unlock(&nfs4_deviceid_lock);
return new;
found:
trace_nfs4_find_deviceid(server, id, 0);
return d;
}
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
@ -278,6 +284,7 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
}
if (!atomic_dec_and_test(&d->ref))
return false;
trace_nfs4_deviceid_free(d->nfs_client, &d->deviceid);
d->ld->free_deviceid_node(d);
return true;
}

View File

@ -661,6 +661,21 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
return NULL;
}
static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
{
struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags);
if (da)
INIT_LIST_HEAD(&da->da_node);
return da;
}
static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
{
kfree(da->da_remotestr);
kfree(da->da_netid);
kfree(da);
}
static void destroy_ds(struct nfs4_pnfs_ds *ds)
{
struct nfs4_pnfs_ds_addr *da;
@ -676,8 +691,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
struct nfs4_pnfs_ds_addr,
da_node);
list_del_init(&da->da_node);
kfree(da->da_remotestr);
kfree(da);
nfs4_pnfs_ds_addr_free(da);
}
kfree(ds->ds_remotestr);
@ -854,12 +868,17 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
if (!IS_ERR(clp)) {
struct xprt_create xprt_args = {
.ident = XPRT_TRANSPORT_TCP,
.ident = da->da_transport,
.net = clp->cl_net,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
.servername = clp->cl_hostname,
};
if (da->da_transport != clp->cl_proto)
continue;
if (da->da_addr.ss_family != clp->cl_addr.ss_family)
continue;
/* Add this address as an alias */
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
rpc_clnt_test_and_add_xprt, NULL);
@ -867,7 +886,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
}
clp = get_v3_ds_connect(mds_srv,
(struct sockaddr *)&da->da_addr,
da->da_addrlen, IPPROTO_TCP,
da->da_addrlen, da->da_transport,
timeo, retrans);
if (IS_ERR(clp))
continue;
@ -905,7 +924,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
struct xprt_create xprt_args = {
.ident = XPRT_TRANSPORT_TCP,
.ident = da->da_transport,
.net = clp->cl_net,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
@ -913,17 +932,21 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
};
struct nfs4_add_xprt_data xprtdata = {
.clp = clp,
.cred = nfs4_get_clid_cred(clp),
};
struct rpc_add_xprt_test rpcdata = {
.add_xprt_test = clp->cl_mvops->session_trunk,
.data = &xprtdata,
};
if (da->da_transport != clp->cl_proto)
continue;
if (da->da_addr.ss_family != clp->cl_addr.ss_family)
continue;
/**
* Test this address for session trunking and
* add as an alias
*/
xprtdata.cred = nfs4_get_clid_cred(clp),
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
rpc_clnt_setup_test_and_add_xprt,
&rpcdata);
@ -932,8 +955,9 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
} else {
clp = nfs4_set_ds_client(mds_srv,
(struct sockaddr *)&da->da_addr,
da->da_addrlen, IPPROTO_TCP,
timeo, retrans, minor_version);
da->da_addrlen,
da->da_transport, timeo,
retrans, minor_version);
if (IS_ERR(clp))
continue;
@ -1021,55 +1045,26 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
struct nfs4_pnfs_ds_addr *da = NULL;
char *buf, *portstr;
__be16 port;
int nlen, rlen;
ssize_t nlen, rlen;
int tmp[2];
__be32 *p;
char *netid, *match_netid;
size_t len, match_netid_len;
char *netid;
size_t len;
char *startsep = "";
char *endsep = "";
/* r_netid */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
gfp_flags);
if (unlikely(nlen < 0))
goto out_err;
nlen = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, nlen);
if (unlikely(!p))
goto out_err;
netid = kmalloc(nlen+1, gfp_flags);
if (unlikely(!netid))
goto out_err;
netid[nlen] = '\0';
memcpy(netid, p, nlen);
/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_free_netid;
rlen = be32_to_cpup(p);
p = xdr_inline_decode(xdr, rlen);
if (unlikely(!p))
goto out_free_netid;
/* port is ".ABC.DEF", 8 chars max */
if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
dprintk("%s: Invalid address, length %d\n", __func__,
rlen);
rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
IPV6_SCOPE_ID_LEN + 8, gfp_flags);
if (unlikely(rlen < 0))
goto out_free_netid;
}
buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) {
dprintk("%s: Not enough memory\n", __func__);
goto out_free_netid;
}
buf[rlen] = '\0';
memcpy(buf, p, rlen);
/* replace port '.' with '-' */
portstr = strrchr(buf, '.');
@ -1089,12 +1084,10 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
}
*portstr = '\0';
da = kzalloc(sizeof(*da), gfp_flags);
da = nfs4_pnfs_ds_addr_alloc(gfp_flags);
if (unlikely(!da))
goto out_free_buf;
INIT_LIST_HEAD(&da->da_node);
if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
sizeof(da->da_addr))) {
dprintk("%s: error parsing address %s\n", __func__, buf);
@ -1109,15 +1102,11 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
case AF_INET:
((struct sockaddr_in *)&da->da_addr)->sin_port = port;
da->da_addrlen = sizeof(struct sockaddr_in);
match_netid = "tcp";
match_netid_len = 3;
break;
case AF_INET6:
((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
da->da_addrlen = sizeof(struct sockaddr_in6);
match_netid = "tcp6";
match_netid_len = 4;
startsep = "[";
endsep = "]";
break;
@ -1128,12 +1117,15 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
goto out_free_da;
}
if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
__func__, netid, match_netid);
da->da_transport = xprt_find_transport_ident(netid);
if (da->da_transport < 0) {
dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
__func__, netid);
goto out_free_da;
}
da->da_netid = netid;
/* save human readable address */
len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
da->da_remotestr = kzalloc(len, gfp_flags);
@ -1145,7 +1137,6 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
kfree(buf);
kfree(netid);
return da;
out_free_da:

View File

@ -499,26 +499,26 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
* sure it is syntactically correct; the entries itself are decoded
* from nfs_readdir by calling the decode_entry function directly.
*/
static int
nfs_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
static int nfs_proc_readdir(struct nfs_readdir_arg *nr_arg,
struct nfs_readdir_res *nr_res)
{
struct inode *dir = d_inode(dentry);
struct inode *dir = d_inode(nr_arg->dentry);
struct nfs_readdirargs arg = {
.fh = NFS_FH(dir),
.cookie = cookie,
.count = count,
.pages = pages,
.cookie = nr_arg->cookie,
.count = nr_arg->page_len,
.pages = nr_arg->pages,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READDIR],
.rpc_argp = &arg,
.rpc_cred = cred,
.rpc_cred = nr_arg->cred,
};
int status;
dprintk("NFS call readdir %d\n", (unsigned int)cookie);
dprintk("NFS call readdir %llu\n", (unsigned long long)nr_arg->cookie);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nr_res->verf[0] = nr_res->verf[1] = 0;
nfs_invalidate_atime(dir);

View File

@ -45,6 +45,11 @@
*/
#define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
/*
* Size of the NFS directory verifier
*/
#define NFS_DIR_VERIFIER_SIZE 2
/*
* NFSv3/v4 Access mode cache entry
*/
@ -88,8 +93,8 @@ struct nfs_open_context {
struct nfs_open_dir_context {
struct list_head list;
const struct cred *cred;
unsigned long attr_gencount;
__be32 verf[NFS_DIR_VERIFIER_SIZE];
__u64 dir_cookie;
__u64 dup_cookie;
signed char duped;
@ -157,7 +162,7 @@ struct nfs_inode {
* This is the cookie verifier used for NFSv3 readdir
* operations
*/
__be32 cookieverf[2];
__be32 cookieverf[NFS_DIR_VERIFIER_SIZE];
atomic_long_t nrequests;
struct nfs_mds_commit_info commit_info;

View File

@ -750,6 +750,20 @@ struct nfs_entry {
struct nfs_server * server;
};
struct nfs_readdir_arg {
struct dentry *dentry;
const struct cred *cred;
__be32 *verf;
u64 cookie;
struct page **pages;
unsigned int page_len;
bool plus;
};
struct nfs_readdir_res {
__be32 *verf;
};
/*
* The following types are for NFSv2 only.
*/
@ -1744,8 +1758,7 @@ struct nfs_rpc_ops {
unsigned int, struct iattr *);
int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
int (*readdir) (struct dentry *, const struct cred *,
u64, struct page **, unsigned int, bool);
int (*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *);
int (*mknod) (struct inode *, struct dentry *, struct iattr *,
dev_t);
int (*statfs) (struct nfs_server *, struct nfs_fh *,

View File

@ -128,8 +128,8 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);
void xdr_inline_pages(struct xdr_buf *, unsigned int,
struct page **, unsigned int, unsigned int);
void xdr_terminate_string(struct xdr_buf *, const u32);
size_t xdr_buf_pagecount(struct xdr_buf *buf);
void xdr_terminate_string(const struct xdr_buf *, const u32);
size_t xdr_buf_pagecount(const struct xdr_buf *buf);
int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
void xdr_free_bvec(struct xdr_buf *buf);
@ -182,15 +182,14 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
* XDR buffer helper functions
*/
extern void xdr_shift_buf(struct xdr_buf *, size_t);
extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
extern int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf,
unsigned int base, unsigned int len);
extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *);
extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
extern int read_bytes_from_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int);
extern int write_bytes_to_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int);
extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
extern int xdr_encode_word(const struct xdr_buf *, unsigned int, u32);
extern int xdr_decode_word(const struct xdr_buf *, unsigned int, u32 *);
struct xdr_array2_desc;
typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem);
@ -201,9 +200,9 @@ struct xdr_array2_desc {
xdr_xcode_elem_t xcode;
};
extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
extern int xdr_decode_array2(const struct xdr_buf *buf, unsigned int base,
struct xdr_array2_desc *desc);
extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
extern int xdr_encode_array2(const struct xdr_buf *buf, unsigned int base,
struct xdr_array2_desc *desc);
extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase,
size_t len);
@ -251,9 +250,9 @@ extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t);
extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t);
extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length);
extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length);
extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
unsigned int len);

View File

@ -330,6 +330,7 @@ struct xprt_class {
struct rpc_xprt * (*setup)(struct xprt_create *);
struct module *owner;
char name[32];
const char * netid[];
};
/*
@ -384,7 +385,7 @@ xprt_disable_swap(struct rpc_xprt *xprt)
*/
int xprt_register_transport(struct xprt_class *type);
int xprt_unregister_transport(struct xprt_class *type);
int xprt_load_transport(const char *);
int xprt_find_transport_ident(const char *);
void xprt_wait_for_reply_request_def(struct rpc_task *task);
void xprt_wait_for_reply_request_rtt(struct rpc_task *task);
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);

View File

@ -60,7 +60,7 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class,
), \
TP_ARGS(wc, cid))
DECLARE_EVENT_CLASS(xprtrdma_reply_event,
DECLARE_EVENT_CLASS(xprtrdma_reply_class,
TP_PROTO(
const struct rpcrdma_rep *rep
),
@ -68,29 +68,30 @@ DECLARE_EVENT_CLASS(xprtrdma_reply_event,
TP_ARGS(rep),
TP_STRUCT__entry(
__field(const void *, rep)
__field(const void *, r_xprt)
__field(u32, xid)
__field(u32, version)
__field(u32, proc)
__string(addr, rpcrdma_addrstr(rep->rr_rxprt))
__string(port, rpcrdma_portstr(rep->rr_rxprt))
),
TP_fast_assign(
__entry->rep = rep;
__entry->r_xprt = rep->rr_rxprt;
__entry->xid = be32_to_cpu(rep->rr_xid);
__entry->version = be32_to_cpu(rep->rr_vers);
__entry->proc = be32_to_cpu(rep->rr_proc);
__assign_str(addr, rpcrdma_addrstr(rep->rr_rxprt));
__assign_str(port, rpcrdma_portstr(rep->rr_rxprt));
),
TP_printk("rxprt %p xid=0x%08x rep=%p: version %u proc %u",
__entry->r_xprt, __entry->xid, __entry->rep,
__entry->version, __entry->proc
TP_printk("peer=[%s]:%s xid=0x%08x version=%u proc=%u",
__get_str(addr), __get_str(port),
__entry->xid, __entry->version, __entry->proc
)
);
#define DEFINE_REPLY_EVENT(name) \
DEFINE_EVENT(xprtrdma_reply_event, name, \
DEFINE_EVENT(xprtrdma_reply_class, \
xprtrdma_reply_##name##_err, \
TP_PROTO( \
const struct rpcrdma_rep *rep \
), \
@ -261,41 +262,6 @@ DECLARE_EVENT_CLASS(xprtrdma_wrch_event,
), \
TP_ARGS(task, mr, nsegs))
DECLARE_EVENT_CLASS(xprtrdma_frwr_done,
TP_PROTO(
const struct ib_wc *wc,
const struct rpcrdma_frwr *frwr
),
TP_ARGS(wc, frwr),
TP_STRUCT__entry(
__field(u32, mr_id)
__field(unsigned int, status)
__field(unsigned int, vendor_err)
),
TP_fast_assign(
__entry->mr_id = frwr->fr_mr->res.id;
__entry->status = wc->status;
__entry->vendor_err = __entry->status ? wc->vendor_err : 0;
),
TP_printk(
"mr.id=%u: %s (%u/0x%x)",
__entry->mr_id, rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err
)
);
#define DEFINE_FRWR_DONE_EVENT(name) \
DEFINE_EVENT(xprtrdma_frwr_done, name, \
TP_PROTO( \
const struct ib_wc *wc, \
const struct rpcrdma_frwr *frwr \
), \
TP_ARGS(wc, frwr))
TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL);
TRACE_DEFINE_ENUM(DMA_TO_DEVICE);
TRACE_DEFINE_ENUM(DMA_FROM_DEVICE);
@ -308,7 +274,55 @@ TRACE_DEFINE_ENUM(DMA_NONE);
{ DMA_FROM_DEVICE, "FROM_DEVICE" }, \
{ DMA_NONE, "NONE" })
DECLARE_EVENT_CLASS(xprtrdma_mr,
DECLARE_EVENT_CLASS(xprtrdma_mr_class,
TP_PROTO(
const struct rpcrdma_mr *mr
),
TP_ARGS(mr),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, mr_id)
__field(int, nents)
__field(u32, handle)
__field(u32, length)
__field(u64, offset)
__field(u32, dir)
),
TP_fast_assign(
const struct rpcrdma_req *req = mr->mr_req;
const struct rpc_task *task = req->rl_slot.rq_task;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->mr_id = mr->frwr.fr_mr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
__entry->offset = mr->mr_offset;
__entry->dir = mr->mr_dir;
),
TP_printk("task:%u@%u mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)",
__entry->task_id, __entry->client_id,
__entry->mr_id, __entry->nents, __entry->length,
(unsigned long long)__entry->offset, __entry->handle,
xprtrdma_show_direction(__entry->dir)
)
);
#define DEFINE_MR_EVENT(name) \
DEFINE_EVENT(xprtrdma_mr_class, \
xprtrdma_mr_##name, \
TP_PROTO( \
const struct rpcrdma_mr *mr \
), \
TP_ARGS(mr))
DECLARE_EVENT_CLASS(xprtrdma_anonymous_mr_class,
TP_PROTO(
const struct rpcrdma_mr *mr
),
@ -340,45 +354,47 @@ DECLARE_EVENT_CLASS(xprtrdma_mr,
)
);
#define DEFINE_MR_EVENT(name) \
DEFINE_EVENT(xprtrdma_mr, xprtrdma_mr_##name, \
TP_PROTO( \
const struct rpcrdma_mr *mr \
), \
#define DEFINE_ANON_MR_EVENT(name) \
DEFINE_EVENT(xprtrdma_anonymous_mr_class, \
xprtrdma_mr_##name, \
TP_PROTO( \
const struct rpcrdma_mr *mr \
), \
TP_ARGS(mr))
DECLARE_EVENT_CLASS(xprtrdma_cb_event,
DECLARE_EVENT_CLASS(xprtrdma_callback_class,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
const struct rpc_rqst *rqst
),
TP_ARGS(rqst),
TP_ARGS(r_xprt, rqst),
TP_STRUCT__entry(
__field(const void *, rqst)
__field(const void *, rep)
__field(const void *, req)
__field(u32, xid)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->rqst = rqst;
__entry->req = rpcr_to_rdmar(rqst);
__entry->rep = rpcr_to_rdmar(rqst)->rl_reply;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("xid=0x%08x, rqst=%p req=%p rep=%p",
__entry->xid, __entry->rqst, __entry->req, __entry->rep
TP_printk("peer=[%s]:%s xid=0x%08x",
__get_str(addr), __get_str(port), __entry->xid
)
);
#define DEFINE_CB_EVENT(name) \
DEFINE_EVENT(xprtrdma_cb_event, name, \
#define DEFINE_CALLBACK_EVENT(name) \
DEFINE_EVENT(xprtrdma_callback_class, \
xprtrdma_cb_##name, \
TP_PROTO( \
const struct rpcrdma_xprt *r_xprt, \
const struct rpc_rqst *rqst \
), \
TP_ARGS(rqst))
TP_ARGS(r_xprt, rqst))
/**
** Connection events
@ -549,61 +565,33 @@ TRACE_EVENT(xprtrdma_createmrs,
)
);
TRACE_EVENT(xprtrdma_mr_get,
TRACE_EVENT(xprtrdma_nomrs_err,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
const struct rpcrdma_req *req
),
TP_ARGS(req),
TP_ARGS(r_xprt, req),
TP_STRUCT__entry(
__field(const void *, req)
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
const struct rpc_rqst *rqst = &req->rl_slot;
__entry->req = req;
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("task:%u@%u xid=0x%08x req=%p",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->req
)
);
TRACE_EVENT(xprtrdma_nomrs,
TP_PROTO(
const struct rpcrdma_req *req
),
TP_ARGS(req),
TP_STRUCT__entry(
__field(const void *, req)
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
),
TP_fast_assign(
const struct rpc_rqst *rqst = &req->rl_slot;
__entry->req = req;
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
),
TP_printk("task:%u@%u xid=0x%08x req=%p",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->req
TP_printk("peer=[%s]:%s task:%u@%u",
__get_str(addr), __get_str(port),
__entry->task_id, __entry->client_id
)
);
@ -735,8 +723,8 @@ TRACE_EVENT(xprtrdma_post_send,
TP_ARGS(req),
TP_STRUCT__entry(
__field(const void *, req)
__field(const void *, sc)
__field(u32, cq_id)
__field(int, completion_id)
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, num_sge)
@ -745,20 +733,21 @@ TRACE_EVENT(xprtrdma_post_send,
TP_fast_assign(
const struct rpc_rqst *rqst = &req->rl_slot;
const struct rpcrdma_sendctx *sc = req->rl_sendctx;
__entry->cq_id = sc->sc_cid.ci_queue_id;
__entry->completion_id = sc->sc_cid.ci_completion_id;
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client ?
rqst->rq_task->tk_client->cl_clid : -1;
__entry->req = req;
__entry->sc = req->rl_sendctx;
__entry->num_sge = req->rl_wr.num_sge;
__entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED;
),
TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %s",
TP_printk("task:%u@%u cq.id=%u cid=%d (%d SGE%s) %s",
__entry->task_id, __entry->client_id,
__entry->req, __entry->sc, __entry->num_sge,
(__entry->num_sge == 1 ? "" : "s"),
__entry->cq_id, __entry->completion_id,
__entry->num_sge, (__entry->num_sge == 1 ? "" : "s"),
(__entry->signaled ? "signaled" : "")
)
);
@ -771,15 +760,17 @@ TRACE_EVENT(xprtrdma_post_recv,
TP_ARGS(rep),
TP_STRUCT__entry(
__field(const void *, rep)
__field(u32, cq_id)
__field(int, completion_id)
),
TP_fast_assign(
__entry->rep = rep;
__entry->cq_id = rep->rr_cid.ci_queue_id;
__entry->completion_id = rep->rr_cid.ci_completion_id;
),
TP_printk("rep=%p",
__entry->rep
TP_printk("cq.id=%d cid=%d",
__entry->cq_id, __entry->completion_id
)
);
@ -816,7 +807,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
)
);
TRACE_EVENT(xprtrdma_post_linv,
TRACE_EVENT(xprtrdma_post_linv_err,
TP_PROTO(
const struct rpcrdma_req *req,
int status
@ -825,19 +816,21 @@ TRACE_EVENT(xprtrdma_post_linv,
TP_ARGS(req, status),
TP_STRUCT__entry(
__field(const void *, req)
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, status)
__field(u32, xid)
),
TP_fast_assign(
__entry->req = req;
const struct rpc_task *task = req->rl_slot.rq_task;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->status = status;
__entry->xid = be32_to_cpu(req->rl_slot.rq_xid);
),
TP_printk("req=%p xid=0x%08x status=%d",
__entry->req, __entry->xid, __entry->status
TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id, __entry->status
)
);
@ -845,75 +838,12 @@ TRACE_EVENT(xprtrdma_post_linv,
** Completion events
**/
TRACE_EVENT(xprtrdma_wc_send,
TP_PROTO(
const struct rpcrdma_sendctx *sc,
const struct ib_wc *wc
),
TP_ARGS(sc, wc),
TP_STRUCT__entry(
__field(const void *, req)
__field(const void *, sc)
__field(unsigned int, unmap_count)
__field(unsigned int, status)
__field(unsigned int, vendor_err)
),
TP_fast_assign(
__entry->req = sc->sc_req;
__entry->sc = sc;
__entry->unmap_count = sc->sc_unmap_count;
__entry->status = wc->status;
__entry->vendor_err = __entry->status ? wc->vendor_err : 0;
),
TP_printk("req=%p sc=%p unmapped=%u: %s (%u/0x%x)",
__entry->req, __entry->sc, __entry->unmap_count,
rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err
)
);
TRACE_EVENT(xprtrdma_wc_receive,
TP_PROTO(
const struct ib_wc *wc
),
TP_ARGS(wc),
TP_STRUCT__entry(
__field(const void *, rep)
__field(u32, byte_len)
__field(unsigned int, status)
__field(u32, vendor_err)
),
TP_fast_assign(
__entry->rep = container_of(wc->wr_cqe, struct rpcrdma_rep,
rr_cqe);
__entry->status = wc->status;
if (wc->status) {
__entry->byte_len = 0;
__entry->vendor_err = wc->vendor_err;
} else {
__entry->byte_len = wc->byte_len;
__entry->vendor_err = 0;
}
),
TP_printk("rep=%p %u bytes: %s (%u/0x%x)",
__entry->rep, __entry->byte_len,
rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err
)
);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_done);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_receive);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done);
TRACE_EVENT(xprtrdma_frwr_alloc,
TP_PROTO(
@ -1036,9 +966,9 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
DEFINE_MR_EVENT(localinv);
DEFINE_MR_EVENT(map);
DEFINE_MR_EVENT(unmap);
DEFINE_MR_EVENT(reminv);
DEFINE_MR_EVENT(recycle);
DEFINE_ANON_MR_EVENT(unmap);
DEFINE_ANON_MR_EVENT(recycle);
TRACE_EVENT(xprtrdma_dma_maperr,
TP_PROTO(
@ -1066,17 +996,14 @@ TRACE_EVENT(xprtrdma_reply,
TP_PROTO(
const struct rpc_task *task,
const struct rpcrdma_rep *rep,
const struct rpcrdma_req *req,
unsigned int credits
),
TP_ARGS(task, rep, req, credits),
TP_ARGS(task, rep, credits),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, rep)
__field(const void *, req)
__field(u32, xid)
__field(unsigned int, credits)
),
@ -1084,49 +1011,102 @@ TRACE_EVENT(xprtrdma_reply,
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->rep = rep;
__entry->req = req;
__entry->xid = be32_to_cpu(rep->rr_xid);
__entry->credits = credits;
),
TP_printk("task:%u@%u xid=0x%08x, %u credits, rep=%p -> req=%p",
TP_printk("task:%u@%u xid=0x%08x credits=%u",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->credits, __entry->rep, __entry->req
__entry->credits
)
);
TRACE_EVENT(xprtrdma_defer_cmp,
DEFINE_REPLY_EVENT(vers);
DEFINE_REPLY_EVENT(rqst);
DEFINE_REPLY_EVENT(short);
DEFINE_REPLY_EVENT(hdr);
TRACE_EVENT(xprtrdma_err_vers,
TP_PROTO(
const struct rpcrdma_rep *rep
const struct rpc_rqst *rqst,
__be32 *min,
__be32 *max
),
TP_ARGS(rep),
TP_ARGS(rqst, min, max),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(u32, min)
__field(u32, max)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->min = be32_to_cpup(min);
__entry->max = be32_to_cpup(max);
),
TP_printk("task:%u@%u xid=0x%08x versions=[%u, %u]",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->min, __entry->max
)
);
TRACE_EVENT(xprtrdma_err_chunk,
TP_PROTO(
const struct rpc_rqst *rqst
),
TP_ARGS(rqst),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, rep)
__field(u32, xid)
),
TP_fast_assign(
__entry->task_id = rep->rr_rqst->rq_task->tk_pid;
__entry->client_id = rep->rr_rqst->rq_task->tk_client->cl_clid;
__entry->rep = rep;
__entry->xid = be32_to_cpu(rep->rr_xid);
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
),
TP_printk("task:%u@%u xid=0x%08x rep=%p",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->rep
TP_printk("task:%u@%u xid=0x%08x",
__entry->task_id, __entry->client_id, __entry->xid
)
);
DEFINE_REPLY_EVENT(xprtrdma_reply_vers);
DEFINE_REPLY_EVENT(xprtrdma_reply_rqst);
DEFINE_REPLY_EVENT(xprtrdma_reply_short);
DEFINE_REPLY_EVENT(xprtrdma_reply_hdr);
TRACE_EVENT(xprtrdma_err_unrecognized,
TP_PROTO(
const struct rpc_rqst *rqst,
__be32 *procedure
),
TP_ARGS(rqst, procedure),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(u32, procedure)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->procedure = be32_to_cpup(procedure);
),
TP_printk("task:%u@%u xid=0x%08x procedure=%u",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->procedure
)
);
TRACE_EVENT(xprtrdma_fixup,
TP_PROTO(
@ -1187,6 +1167,28 @@ TRACE_EVENT(xprtrdma_decode_seg,
)
);
TRACE_EVENT(xprtrdma_mrs_zap,
TP_PROTO(
const struct rpc_task *task
),
TP_ARGS(task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
),
TP_printk("task:%u@%u",
__entry->task_id, __entry->client_id
)
);
/**
** Callback events
**/
@ -1219,36 +1221,8 @@ TRACE_EVENT(xprtrdma_cb_setup,
)
);
DEFINE_CB_EVENT(xprtrdma_cb_call);
DEFINE_CB_EVENT(xprtrdma_cb_reply);
TRACE_EVENT(xprtrdma_leaked_rep,
TP_PROTO(
const struct rpc_rqst *rqst,
const struct rpcrdma_rep *rep
),
TP_ARGS(rqst, rep),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(const void *, rep)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->rep = rep;
),
TP_printk("task:%u@%u xid=0x%08x rep=%p",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->rep
)
);
DEFINE_CALLBACK_EVENT(call);
DEFINE_CALLBACK_EVENT(reply);
/**
** Server-side RPC/RDMA events

View File

@ -1251,10 +1251,7 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
unsigned int base, unsigned int len,
unsigned int hdrsize)
{
/* Subtract one to force an extra word of buffer space for the
* payload's XDR pad to fall into the rcv_buf's tail iovec.
*/
hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1;
hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign;
xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
trace_rpc_xdr_reply_pages(req->rq_task, &req->rq_rcv_buf);

View File

@ -128,13 +128,13 @@ static int do_xprt_debugfs(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *n
return 0;
len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
xprt->debugfs->d_name.name);
if (len > sizeof(name))
if (len >= sizeof(name))
return -1;
if (*nump == 0)
strcpy(link, "xprt");
else {
len = snprintf(link, sizeof(link), "xprt%d", *nump);
if (len > sizeof(link))
if (len >= sizeof(link))
return -1;
}
debugfs_create_symlink(link, clnt->cl_debugfs, name);

View File

@ -675,6 +675,23 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue)
}
EXPORT_SYMBOL_GPL(rpc_wake_up_next);
/**
* rpc_wake_up_locked - wake up all rpc_tasks
* @queue: rpc_wait_queue on which the tasks are sleeping
*
*/
static void rpc_wake_up_locked(struct rpc_wait_queue *queue)
{
struct rpc_task *task;
for (;;) {
task = __rpc_find_next_queued(queue);
if (task == NULL)
break;
rpc_wake_up_task_queue_locked(queue, task);
}
}
/**
* rpc_wake_up - wake up all rpc_tasks
* @queue: rpc_wait_queue on which the tasks are sleeping
@ -683,26 +700,29 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next);
*/
void rpc_wake_up(struct rpc_wait_queue *queue)
{
struct list_head *head;
spin_lock(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
while (!list_empty(head)) {
struct rpc_task *task;
task = list_first_entry(head,
struct rpc_task,
u.tk_wait.list);
rpc_wake_up_task_queue_locked(queue, task);
}
if (head == &queue->tasks[0])
break;
head--;
}
rpc_wake_up_locked(queue);
spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up);
/**
* rpc_wake_up_status_locked - wake up all rpc_tasks and set their status value.
* @queue: rpc_wait_queue on which the tasks are sleeping
* @status: status value to set
*/
static void rpc_wake_up_status_locked(struct rpc_wait_queue *queue, int status)
{
struct rpc_task *task;
for (;;) {
task = __rpc_find_next_queued(queue);
if (task == NULL)
break;
rpc_wake_up_task_queue_set_status_locked(queue, task, status);
}
}
/**
* rpc_wake_up_status - wake up all rpc_tasks and set their status value.
* @queue: rpc_wait_queue on which the tasks are sleeping
@ -712,23 +732,8 @@ EXPORT_SYMBOL_GPL(rpc_wake_up);
*/
void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
struct list_head *head;
spin_lock(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
while (!list_empty(head)) {
struct rpc_task *task;
task = list_first_entry(head,
struct rpc_task,
u.tk_wait.list);
task->tk_status = status;
rpc_wake_up_task_queue_locked(queue, task);
}
if (head == &queue->tasks[0])
break;
head--;
}
rpc_wake_up_status_locked(queue, status);
spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);

File diff suppressed because it is too large Load Diff

View File

@ -151,33 +151,94 @@ int xprt_unregister_transport(struct xprt_class *transport)
}
EXPORT_SYMBOL_GPL(xprt_unregister_transport);
/**
* xprt_load_transport - load a transport implementation
* @transport_name: transport to load
*
* Returns:
* 0: transport successfully loaded
* -ENOENT: transport module not available
*/
int xprt_load_transport(const char *transport_name)
static void
xprt_class_release(const struct xprt_class *t)
{
struct xprt_class *t;
int result;
module_put(t->owner);
}
static const struct xprt_class *
xprt_class_find_by_ident_locked(int ident)
{
const struct xprt_class *t;
result = 0;
spin_lock(&xprt_list_lock);
list_for_each_entry(t, &xprt_list, list) {
if (strcmp(t->name, transport_name) == 0) {
spin_unlock(&xprt_list_lock);
goto out;
if (t->ident != ident)
continue;
if (!try_module_get(t->owner))
continue;
return t;
}
return NULL;
}
static const struct xprt_class *
xprt_class_find_by_ident(int ident)
{
const struct xprt_class *t;
spin_lock(&xprt_list_lock);
t = xprt_class_find_by_ident_locked(ident);
spin_unlock(&xprt_list_lock);
return t;
}
static const struct xprt_class *
xprt_class_find_by_netid_locked(const char *netid)
{
const struct xprt_class *t;
unsigned int i;
list_for_each_entry(t, &xprt_list, list) {
for (i = 0; t->netid[i][0] != '\0'; i++) {
if (strcmp(t->netid[i], netid) != 0)
continue;
if (!try_module_get(t->owner))
continue;
return t;
}
}
spin_unlock(&xprt_list_lock);
result = request_module("xprt%s", transport_name);
out:
return result;
return NULL;
}
EXPORT_SYMBOL_GPL(xprt_load_transport);
static const struct xprt_class *
xprt_class_find_by_netid(const char *netid)
{
const struct xprt_class *t;
spin_lock(&xprt_list_lock);
t = xprt_class_find_by_netid_locked(netid);
if (!t) {
spin_unlock(&xprt_list_lock);
request_module("rpc%s", netid);
spin_lock(&xprt_list_lock);
t = xprt_class_find_by_netid_locked(netid);
}
spin_unlock(&xprt_list_lock);
return t;
}
/**
* xprt_find_transport_ident - convert a netid into a transport identifier
* @netid: transport to load
*
* Returns:
* > 0: transport identifier
* -ENOENT: transport module not available
*/
int xprt_find_transport_ident(const char *netid)
{
const struct xprt_class *t;
int ret;
t = xprt_class_find_by_netid(netid);
if (!t)
return -ENOENT;
ret = t->ident;
xprt_class_release(t);
return ret;
}
EXPORT_SYMBOL_GPL(xprt_find_transport_ident);
static void xprt_clear_locked(struct rpc_xprt *xprt)
{
@ -1896,21 +1957,17 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
{
struct rpc_xprt *xprt;
struct xprt_class *t;
const struct xprt_class *t;
spin_lock(&xprt_list_lock);
list_for_each_entry(t, &xprt_list, list) {
if (t->ident == args->ident) {
spin_unlock(&xprt_list_lock);
goto found;
}
t = xprt_class_find_by_ident(args->ident);
if (!t) {
dprintk("RPC: transport (%d) not supported\n", args->ident);
return ERR_PTR(-EIO);
}
spin_unlock(&xprt_list_lock);
dprintk("RPC: transport (%d) not supported\n", args->ident);
return ERR_PTR(-EIO);
found:
xprt = t->setup(args);
xprt_class_release(t);
if (IS_ERR(xprt))
goto out;
if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2015-2020, Oracle and/or its affiliates.
*
* Support for backward direction RPCs on RPC/RDMA.
*/
@ -82,7 +82,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
&rqst->rq_snd_buf, rpcrdma_noch_pullup))
return -EIO;
trace_xprtrdma_cb_reply(rqst);
trace_xprtrdma_cb_reply(r_xprt, rqst);
return 0;
}
@ -260,7 +260,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
*/
req = rpcr_to_rdmar(rqst);
req->rl_reply = rep;
trace_xprtrdma_cb_call(rqst);
trace_xprtrdma_cb_call(r_xprt, rqst);
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;

View File

@ -65,18 +65,23 @@ void frwr_release_mr(struct rpcrdma_mr *mr)
kfree(mr);
}
static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
{
if (mr->mr_device) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(mr->mr_device, mr->mr_sg, mr->mr_nents,
mr->mr_dir);
mr->mr_device = NULL;
}
}
static void frwr_mr_recycle(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
trace_xprtrdma_mr_recycle(mr);
if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
frwr_mr_unmap(r_xprt, mr);
spin_lock(&r_xprt->rx_buf.rb_lock);
list_del(&mr->mr_all);
@ -86,6 +91,16 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr)
frwr_release_mr(mr);
}
static void frwr_mr_put(struct rpcrdma_mr *mr)
{
frwr_mr_unmap(mr->mr_xprt, mr);
/* The MR is returned to the req's MR free list instead
* of to the xprt's MR free list. No spinlock is needed.
*/
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
/* frwr_reset - Place MRs back on the free list
* @req: request to reset
*
@ -101,7 +116,7 @@ void frwr_reset(struct rpcrdma_req *req)
struct rpcrdma_mr *mr;
while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
rpcrdma_mr_put(mr);
frwr_mr_put(mr);
}
/**
@ -130,7 +145,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
mr->mr_xprt = r_xprt;
mr->frwr.fr_mr = frmr;
mr->mr_dir = DMA_NONE;
mr->mr_device = NULL;
INIT_LIST_HEAD(&mr->mr_list);
init_completion(&mr->frwr.fr_linv_done);
@ -315,6 +330,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
mr->mr_dir);
if (!dma_nents)
goto out_dmamap_err;
mr->mr_device = ep->re_id->device;
ibmr = mr->frwr.fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
@ -341,7 +357,6 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
return seg;
out_dmamap_err:
mr->mr_dir = DMA_NONE;
trace_xprtrdma_frwr_sgerr(mr, i);
return ERR_PTR(-EIO);
@ -363,12 +378,21 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_fastreg(wc, frwr);
trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid);
/* The MR will get recycled when the associated req is retransmitted */
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
static void frwr_cid_init(struct rpcrdma_ep *ep,
struct rpcrdma_frwr *frwr)
{
struct rpc_rdma_cid *cid = &frwr->fr_cid;
cid->ci_queue_id = ep->re_attr.send_cq->res.id;
cid->ci_completion_id = frwr->fr_mr->res.id;
}
/**
* frwr_send - post Send WRs containing the RPC Call message
* @r_xprt: controlling transport instance
@ -385,6 +409,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
*/
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
@ -395,6 +420,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_fastreg;
frwr_cid_init(ep, frwr);
frwr->fr_regwr.wr.next = post_wr;
frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
frwr->fr_regwr.wr.num_sge = 0;
@ -404,7 +430,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
post_wr = &frwr->fr_regwr.wr;
}
return ib_post_send(r_xprt->rx_ep->re_id->qp, post_wr, NULL);
return ib_post_send(ep->re_id->qp, post_wr, NULL);
}
/**
@ -420,18 +446,17 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
trace_xprtrdma_mr_reminv(mr);
rpcrdma_mr_put(mr);
frwr_mr_put(mr);
break; /* only one invalidated MR per RPC */
}
}
static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
{
if (wc->status != IB_WC_SUCCESS)
frwr_mr_recycle(mr);
else
rpcrdma_mr_put(mr);
frwr_mr_put(mr);
}
/**
@ -448,8 +473,8 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li(wc, frwr);
__frwr_release_mr(wc, mr);
trace_xprtrdma_wc_li(wc, &frwr->fr_cid);
frwr_mr_done(wc, mr);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
@ -469,8 +494,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li_wake(wc, frwr);
__frwr_release_mr(wc, mr);
trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid);
frwr_mr_done(wc, mr);
complete(&frwr->fr_linv_done);
rpcrdma_flush_disconnect(cq->cq_context, wc);
@ -490,6 +515,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, **prev, *last;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
@ -509,6 +535,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_localinv;
frwr_cid_init(ep, frwr);
last = &frwr->fr_invwr;
last->next = NULL;
last->wr_cqe = &frwr->fr_cqe;
@ -534,7 +561,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless re_id->qp is a valid pointer.
*/
bad_wr = NULL;
rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
/* The final LOCAL_INV WR in the chain is supposed to
* do the wake. If it was never posted, the wake will
@ -547,7 +574,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
trace_xprtrdma_post_linv(req, rc);
trace_xprtrdma_post_linv_err(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr);
@ -574,10 +601,10 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li_done(wc, frwr);
__frwr_release_mr(wc, mr);
trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid);
frwr_mr_done(wc, mr);
/* Ensure @rep is generated before __frwr_release_mr */
/* Ensure @rep is generated before frwr_mr_done */
smp_rmb();
rpcrdma_complete_rqst(rep);
@ -597,6 +624,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, *last, **prev;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
@ -614,6 +642,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_localinv;
frwr_cid_init(ep, frwr);
last = &frwr->fr_invwr;
last->next = NULL;
last->wr_cqe = &frwr->fr_cqe;
@ -639,13 +668,13 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless re_id->qp is a valid pointer.
*/
bad_wr = NULL;
rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
if (!rc)
return;
/* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
trace_xprtrdma_post_linv(req, rc);
trace_xprtrdma_post_linv_err(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);

View File

@ -24,6 +24,7 @@ MODULE_DESCRIPTION("RPC/RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("svcrdma");
MODULE_ALIAS("xprtrdma");
MODULE_ALIAS("rpcrdma6");
static void __exit rpc_rdma_cleanup(void)
{

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2014-2020, Oracle and/or its affiliates.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@ -179,6 +179,31 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
r_xprt->rx_ep->re_max_inline_recv;
}
/* ACL likes to be lazy in allocating pages. For TCP, these
* pages can be allocated during receive processing. Not true
* for RDMA, which must always provision receive buffers
* up front.
*/
static noinline int
rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
{
struct page **ppages;
int len;
len = buf->page_len;
ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
while (len > 0) {
if (!*ppages)
*ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (!*ppages)
return -ENOBUFS;
ppages++;
len -= PAGE_SIZE;
}
return 0;
}
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
* a byte range. Other modes coalesce these SGEs into a single MR
* when they can.
@ -233,15 +258,6 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
page_base = offset_in_page(xdrbuf->page_base);
while (len) {
/* ACL likes to be lazy in allocating pages - ACLs
* are small by default but can get huge.
*/
if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
if (!*ppages)
*ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (!*ppages)
return -ENOBUFS;
}
seg->mr_page = *ppages;
seg->mr_offset = (char *)page_base;
seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
@ -315,7 +331,6 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
*mr = rpcrdma_mr_get(r_xprt);
if (!*mr)
goto out_getmr_err;
trace_xprtrdma_mr_get(req);
(*mr)->mr_req = req;
}
@ -323,7 +338,7 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
out_getmr_err:
trace_xprtrdma_nomrs(req);
trace_xprtrdma_nomrs_err(r_xprt, req);
xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
rpcrdma_mrs_refresh(r_xprt);
return ERR_PTR(-EAGAIN);
@ -867,6 +882,12 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
__be32 *p;
int ret;
if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) {
ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf);
if (ret)
return ret;
}
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
rqst);
@ -1322,20 +1343,13 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (!p)
break;
dprintk("RPC: %s: server reports "
"version error (%u-%u), xid %08x\n", __func__,
be32_to_cpup(p), be32_to_cpu(*(p + 1)),
be32_to_cpu(rep->rr_xid));
trace_xprtrdma_err_vers(rqst, p, p + 1);
break;
case err_chunk:
dprintk("RPC: %s: server reports "
"header decoding error, xid %08x\n", __func__,
be32_to_cpu(rep->rr_xid));
trace_xprtrdma_err_chunk(rqst);
break;
default:
dprintk("RPC: %s: server reports "
"unrecognized error %d, xid %08x\n", __func__,
be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
trace_xprtrdma_err_unrecognized(rqst, p);
}
return -EIO;
@ -1376,7 +1390,7 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
return;
out_badheader:
trace_xprtrdma_reply_hdr(rep);
trace_xprtrdma_reply_hdr_err(rep);
r_xprt->rx_stats.bad_reply_count++;
rqst->rq_task->tk_status = status;
status = 0;
@ -1450,14 +1464,12 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst);
if (req->rl_reply) {
trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
if (unlikely(req->rl_reply))
rpcrdma_recv_buffer_put(req->rl_reply);
}
req->rl_reply = rep;
rep->rr_rqst = rqst;
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
trace_xprtrdma_reply(rqst->rq_task, rep, credits);
if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
frwr_reminv(rep, &req->rl_registered);
@ -1469,16 +1481,16 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
return;
out_badversion:
trace_xprtrdma_reply_vers(rep);
trace_xprtrdma_reply_vers_err(rep);
goto out;
out_norqst:
spin_unlock(&xprt->queue_lock);
trace_xprtrdma_reply_rqst(rep);
trace_xprtrdma_reply_rqst_err(rep);
goto out;
out_shortreply:
trace_xprtrdma_reply_short(rep);
trace_xprtrdma_reply_short_err(rep);
out:
rpcrdma_recv_buffer_put(rep);

View File

@ -599,11 +599,12 @@ static void
xprt_rdma_free(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
if (!list_empty(&req->rl_registered))
frwr_unmap_sync(r_xprt, req);
if (unlikely(!list_empty(&req->rl_registered))) {
trace_xprtrdma_mrs_zap(task);
frwr_unmap_sync(rpcx_to_rdmax(rqst->rq_xprt), req);
}
/* XXX: If the RPC is completing because of a signal and
* not because a reply was received, we ought to ensure
@ -768,6 +769,7 @@ static struct xprt_class xprt_rdma = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_RDMA,
.setup = xprt_setup_rdma,
.netid = { "rdma", "rdma6", "" },
};
void xprt_rdma_cleanup(void)

View File

@ -167,7 +167,7 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
trace_xprtrdma_wc_send(wc, &sc->sc_cid);
rpcrdma_sendctx_put_locked(r_xprt, sc);
rpcrdma_flush_disconnect(r_xprt, wc);
}
@ -186,7 +186,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_receive(wc);
trace_xprtrdma_wc_receive(wc, &rep->rr_cid);
--r_xprt->rx_ep->re_receive_count;
if (wc->status != IB_WC_SUCCESS)
goto out_flushed;
@ -643,6 +643,9 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep)
return NULL;
sc->sc_cqe.done = rpcrdma_wc_send;
sc->sc_cid.ci_queue_id = ep->re_attr.send_cq->res.id;
sc->sc_cid.ci_completion_id =
atomic_inc_return(&ep->re_completion_ids);
return sc;
}
@ -972,6 +975,9 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
goto out_free_regbuf;
rep->rr_cid.ci_completion_id =
atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf));
rep->rr_cqe.done = rpcrdma_wc_receive;
@ -1178,25 +1184,6 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
return mr;
}
/**
* rpcrdma_mr_put - DMA unmap an MR and release it
* @mr: MR to release
*
*/
void rpcrdma_mr_put(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
/**
* rpcrdma_buffer_get - Get a request buffer
* @buffers: Buffer pool from which to obtain a buffer
@ -1411,6 +1398,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
if (!rep)
break;
rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
trace_xprtrdma_post_recv(rep);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;

View File

@ -53,6 +53,7 @@
#include <rdma/ib_verbs.h> /* RDMA verbs api */
#include <linux/sunrpc/clnt.h> /* rpc_xprt */
#include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
@ -93,6 +94,8 @@ struct rpcrdma_ep {
unsigned int re_max_requests; /* depends on device */
unsigned int re_inline_send; /* negotiated */
unsigned int re_inline_recv; /* negotiated */
atomic_t re_completion_ids;
};
/* Pre-allocate extra Work Requests for handling backward receives
@ -180,6 +183,8 @@ enum {
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
struct rpc_rdma_cid rr_cid;
__be32 rr_xid;
__be32 rr_vers;
__be32 rr_proc;
@ -211,6 +216,7 @@ enum {
struct rpcrdma_req;
struct rpcrdma_sendctx {
struct ib_cqe sc_cqe;
struct rpc_rdma_cid sc_cid;
struct rpcrdma_req *sc_req;
unsigned int sc_unmap_count;
struct ib_sge sc_sges[];
@ -225,6 +231,7 @@ struct rpcrdma_sendctx {
struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
struct rpc_rdma_cid fr_cid;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
@ -236,6 +243,7 @@ struct rpcrdma_req;
struct rpcrdma_mr {
struct list_head mr_list;
struct rpcrdma_req *mr_req;
struct ib_device *mr_device;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
@ -466,7 +474,6 @@ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);

View File

@ -433,7 +433,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (ret <= 0)
goto sock_err;
xs_flush_bvec(buf->bvec, ret, seek + buf->page_base);
offset += ret - buf->page_base;
ret -= buf->page_base;
offset += ret;
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
@ -3059,6 +3060,7 @@ static struct xprt_class xs_local_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_LOCAL,
.setup = xs_setup_local,
.netid = { "" },
};
static struct xprt_class xs_udp_transport = {
@ -3067,6 +3069,7 @@ static struct xprt_class xs_udp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_UDP,
.setup = xs_setup_udp,
.netid = { "udp", "udp6", "" },
};
static struct xprt_class xs_tcp_transport = {
@ -3075,6 +3078,7 @@ static struct xprt_class xs_tcp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_TCP,
.setup = xs_setup_tcp,
.netid = { "tcp", "tcp6", "" },
};
static struct xprt_class xs_bc_tcp_transport = {
@ -3083,6 +3087,7 @@ static struct xprt_class xs_bc_tcp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_BC_TCP,
.setup = xs_setup_bc_tcp,
.netid = { "" },
};
/**