linux/fs/nfs/nfs4client.c
Trond Myklebust b326df4a8e NFS: enable nconnect for RDMA
It appears that in certain cases, RDMA capable transports can benefit
from the ability to establish multiple connections to increase their
throughput. This patch therefore enables the use of the "nconnect" mount
option for those use cases.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2024-03-09 09:14:51 -05:00

1387 lines
36 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/module.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include "internal.h"
#include "callback.h"
#include "delegation.h"
#include "nfs4session.h"
#include "nfs4idmap.h"
#include "pnfs.h"
#include "netns.h"
#include "sysfs.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
/*
* Get a unique NFSv4.0 callback identifier which will be used
* by the V4.0 callback service to lookup the nfs_client struct
*/
static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
{
int ret = 0;
struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
if (clp->rpc_ops->version != 4 || minorversion != 0)
return ret;
idr_preload(GFP_KERNEL);
spin_lock(&nn->nfs_client_lock);
ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);
if (ret >= 0)
clp->cl_cb_ident = ret;
spin_unlock(&nn->nfs_client_lock);
idr_preload_end();
return ret < 0 ? ret : 0;
}
#ifdef CONFIG_NFS_V4_1
/*
* Per auth flavor data server rpc clients
*/
struct nfs4_ds_server {
struct list_head list; /* ds_clp->cl_ds_clients */
struct rpc_clnt *rpc_clnt;
};
/**
* nfs4_find_ds_client - Common lookup case for DS I/O
* @ds_clp: pointer to the DS's nfs_client
* @flavor: rpc auth flavour to match
*/
static struct nfs4_ds_server *
nfs4_find_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
{
struct nfs4_ds_server *dss;
rcu_read_lock();
list_for_each_entry_rcu(dss, &ds_clp->cl_ds_clients, list) {
if (dss->rpc_clnt->cl_auth->au_flavor != flavor)
continue;
goto out;
}
dss = NULL;
out:
rcu_read_unlock();
return dss;
}
static struct nfs4_ds_server *
nfs4_add_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor,
struct nfs4_ds_server *new)
{
struct nfs4_ds_server *dss;
spin_lock(&ds_clp->cl_lock);
list_for_each_entry(dss, &ds_clp->cl_ds_clients, list) {
if (dss->rpc_clnt->cl_auth->au_flavor != flavor)
continue;
goto out;
}
if (new)
list_add_rcu(&new->list, &ds_clp->cl_ds_clients);
dss = new;
out:
spin_unlock(&ds_clp->cl_lock); /* need some lock to protect list */
return dss;
}
static struct nfs4_ds_server *
nfs4_alloc_ds_server(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
{
struct nfs4_ds_server *dss;
dss = kmalloc(sizeof(*dss), GFP_NOFS);
if (dss == NULL)
return ERR_PTR(-ENOMEM);
dss->rpc_clnt = rpc_clone_client_set_auth(ds_clp->cl_rpcclient, flavor);
if (IS_ERR(dss->rpc_clnt)) {
int err = PTR_ERR(dss->rpc_clnt);
kfree (dss);
return ERR_PTR(err);
}
INIT_LIST_HEAD(&dss->list);
return dss;
}
static void
nfs4_free_ds_server(struct nfs4_ds_server *dss)
{
rpc_release_client(dss->rpc_clnt);
kfree(dss);
}
/**
* nfs4_find_or_create_ds_client - Find or create a DS rpc client
* @ds_clp: pointer to the DS's nfs_client
* @inode: pointer to the inode
*
* Find or create a DS rpc client with th MDS server rpc client auth flavor
* in the nfs_client cl_ds_clients list.
*/
struct rpc_clnt *
nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode)
{
struct nfs4_ds_server *dss, *new;
rpc_authflavor_t flavor = NFS_SERVER(inode)->client->cl_auth->au_flavor;
dss = nfs4_find_ds_client(ds_clp, flavor);
if (dss != NULL)
goto out;
new = nfs4_alloc_ds_server(ds_clp, flavor);
if (IS_ERR(new))
return ERR_CAST(new);
dss = nfs4_add_ds_client(ds_clp, flavor, new);
if (dss != new)
nfs4_free_ds_server(new);
out:
return dss->rpc_clnt;
}
EXPORT_SYMBOL_GPL(nfs4_find_or_create_ds_client);
static void
nfs4_shutdown_ds_clients(struct nfs_client *clp)
{
struct nfs4_ds_server *dss;
while (!list_empty(&clp->cl_ds_clients)) {
dss = list_entry(clp->cl_ds_clients.next,
struct nfs4_ds_server, list);
list_del(&dss->list);
rpc_shutdown_client(dss->rpc_clnt);
kfree (dss);
}
}
static void
nfs4_cleanup_callback(struct nfs_client *clp)
{
struct nfs4_copy_state *cp_state;
while (!list_empty(&clp->pending_cb_stateids)) {
cp_state = list_entry(clp->pending_cb_stateids.next,
struct nfs4_copy_state, copies);
list_del(&cp_state->copies);
kfree(cp_state);
}
}
void nfs41_shutdown_client(struct nfs_client *clp)
{
if (nfs4_has_session(clp)) {
nfs4_cleanup_callback(clp);
nfs4_shutdown_ds_clients(clp);
nfs4_destroy_session(clp->cl_session);
nfs4_destroy_clientid(clp);
}
}
#endif /* CONFIG_NFS_V4_1 */
void nfs40_shutdown_client(struct nfs_client *clp)
{
if (clp->cl_slot_tbl) {
nfs4_shutdown_slot_table(clp->cl_slot_tbl);
kfree(clp->cl_slot_tbl);
}
}
struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
{
char buf[INET6_ADDRSTRLEN + 1];
const char *ip_addr = cl_init->ip_addr;
struct nfs_client *clp = nfs_alloc_client(cl_init);
int err;
if (IS_ERR(clp))
return clp;
err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
if (err)
goto error;
if (cl_init->minorversion > NFS4_MAX_MINOR_VERSION) {
err = -EINVAL;
goto error;
}
spin_lock_init(&clp->cl_lock);
INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
INIT_LIST_HEAD(&clp->cl_ds_clients);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
clp->cl_mig_gen = 1;
#if IS_ENABLED(CONFIG_NFS_V4_1)
init_waitqueue_head(&clp->cl_lock_waitq);
#endif
INIT_LIST_HEAD(&clp->pending_cb_stateids);
if (cl_init->minorversion != 0)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
if (test_bit(NFS_CS_DS, &cl_init->init_flags))
__set_bit(NFS_CS_DS, &clp->cl_flags);
/*
* Set up the connection to the server before we add add to the
* global list.
*/
err = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_GSS_KRB5I);
if (err == -EINVAL)
err = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
if (err < 0)
goto error;
/* If no clientaddr= option was specified, find a usable cb address */
if (ip_addr == NULL) {
struct sockaddr_storage cb_addr;
struct sockaddr *sap = (struct sockaddr *)&cb_addr;
err = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
if (err < 0)
goto error;
err = rpc_ntop(sap, buf, sizeof(buf));
if (err < 0)
goto error;
ip_addr = (const char *)buf;
}
strscpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
err = nfs_idmap_new(clp);
if (err < 0) {
dprintk("%s: failed to create idmapper. Error = %d\n",
__func__, err);
goto error;
}
__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
return clp;
error:
nfs_free_client(clp);
return ERR_PTR(err);
}
/*
* Destroy the NFS4 callback service
*/
static void nfs4_destroy_callback(struct nfs_client *clp)
{
if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
nfs_callback_down(clp->cl_mvops->minor_version, clp->cl_net);
}
static void nfs4_shutdown_client(struct nfs_client *clp)
{
if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
nfs4_kill_renewd(clp);
clp->cl_mvops->shutdown_client(clp);
nfs4_destroy_callback(clp);
if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
nfs_idmap_delete(clp);
rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
kfree(clp->cl_serverowner);
kfree(clp->cl_serverscope);
kfree(clp->cl_implid);
kfree(clp->cl_owner_id);
}
void nfs4_free_client(struct nfs_client *clp)
{
nfs4_shutdown_client(clp);
nfs_free_client(clp);
}
/*
* Initialize the NFS4 callback service
*/
static int nfs4_init_callback(struct nfs_client *clp)
{
struct rpc_xprt *xprt;
int error;
xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
if (nfs4_has_session(clp)) {
error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
if (error < 0)
return error;
}
error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
if (error < 0) {
dprintk("%s: failed to start callback. Error = %d\n",
__func__, error);
return error;
}
__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
return 0;
}
/**
* nfs40_init_client - nfs_client initialization tasks for NFSv4.0
* @clp: nfs_client to initialize
*
* Returns zero on success, or a negative errno if some error occurred.
*/
int nfs40_init_client(struct nfs_client *clp)
{
struct nfs4_slot_table *tbl;
int ret;
tbl = kzalloc(sizeof(*tbl), GFP_NOFS);
if (tbl == NULL)
return -ENOMEM;
ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE,
"NFSv4.0 transport Slot table");
if (ret) {
nfs4_shutdown_slot_table(tbl);
kfree(tbl);
return ret;
}
clp->cl_slot_tbl = tbl;
return 0;
}
#if defined(CONFIG_NFS_V4_1)
/**
* nfs41_init_client - nfs_client initialization tasks for NFSv4.1+
* @clp: nfs_client to initialize
*
* Returns zero on success, or a negative errno if some error occurred.
*/
int nfs41_init_client(struct nfs_client *clp)
{
struct nfs4_session *session = NULL;
/*
* Create the session and mark it expired.
* When a SEQUENCE operation encounters the expired session
* it will do session recovery to initialize it.
*/
session = nfs4_alloc_session(clp);
if (!session)
return -ENOMEM;
clp->cl_session = session;
/*
* The create session reply races with the server back
* channel probe. Mark the client NFS_CS_SESSION_INITING
* so that the client back channel can find the
* nfs_client struct
*/
nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
return 0;
}
#endif /* CONFIG_NFS_V4_1 */
/*
* Initialize the minor version specific parts of an NFS4 client record
*/
static int nfs4_init_client_minor_version(struct nfs_client *clp)
{
int ret;
ret = clp->cl_mvops->init_client(clp);
if (ret)
return ret;
return nfs4_init_callback(clp);
}
static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
{
struct sockaddr_storage clp_addr, old_addr;
struct sockaddr *clp_sap = (struct sockaddr *)&clp_addr;
struct sockaddr *old_sap = (struct sockaddr *)&old_addr;
size_t clp_salen;
struct xprt_create xprt_args = {
.ident = old->cl_proto,
.net = old->cl_net,
.servername = old->cl_hostname,
};
int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ?
clp->cl_max_connect : old->cl_max_connect;
if (clp->cl_proto != old->cl_proto)
return;
clp_salen = rpc_peeraddr(clp->cl_rpcclient, clp_sap, sizeof(clp_addr));
rpc_peeraddr(old->cl_rpcclient, old_sap, sizeof(old_addr));
if (clp_addr.ss_family != old_addr.ss_family)
return;
xprt_args.dstaddr = clp_sap;
xprt_args.addrlen = clp_salen;
rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
rpc_clnt_test_and_add_xprt, &max_connect);
}
/**
* nfs4_init_client - Initialise an NFS4 client record
*
* @clp: nfs_client to initialise
* @cl_init: pointer to nfs_client_initdata
*
* Returns pointer to an NFS client, or an ERR_PTR value.
*/
struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct nfs_client_initdata *cl_init)
{
struct nfs_client *old;
int error;
if (clp->cl_cons_state == NFS_CS_READY)
/* the client is initialised already */
return clp;
error = nfs4_init_client_minor_version(clp);
if (error < 0)
goto error;
error = nfs4_discover_server_trunking(clp, &old);
if (error < 0)
goto error;
if (clp != old) {
clp->cl_preserve_clid = true;
/*
* Mark the client as having failed initialization so other
* processes walking the nfs_client_list in nfs_match_client()
* won't try to use it.
*/
nfs_mark_client_ready(clp, -EPERM);
if (old->cl_mvops->session_trunk)
nfs4_add_trunk(clp, old);
}
clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
nfs_put_client(clp);
return old;
error:
nfs_mark_client_ready(clp, error);
nfs_put_client(clp);
return ERR_PTR(error);
}
/*
* SETCLIENTID just did a callback update with the callback ident in
* "drop," but server trunking discovery claims "drop" and "keep" are
* actually the same server. Swap the callback IDs so that "keep"
* will continue to use the callback ident the server now knows about,
* and so that "keep"'s original callback ident is destroyed when
* "drop" is freed.
*/
static void nfs4_swap_callback_idents(struct nfs_client *keep,
struct nfs_client *drop)
{
struct nfs_net *nn = net_generic(keep->cl_net, nfs_net_id);
unsigned int save = keep->cl_cb_ident;
if (keep->cl_cb_ident == drop->cl_cb_ident)
return;
dprintk("%s: keeping callback ident %u and dropping ident %u\n",
__func__, keep->cl_cb_ident, drop->cl_cb_ident);
spin_lock(&nn->nfs_client_lock);
idr_replace(&nn->cb_ident_idr, keep, drop->cl_cb_ident);
keep->cl_cb_ident = drop->cl_cb_ident;
idr_replace(&nn->cb_ident_idr, drop, save);
drop->cl_cb_ident = save;
spin_unlock(&nn->nfs_client_lock);
}
static bool nfs4_match_client_owner_id(const struct nfs_client *clp1,
const struct nfs_client *clp2)
{
if (clp1->cl_owner_id == NULL || clp2->cl_owner_id == NULL)
return true;
return strcmp(clp1->cl_owner_id, clp2->cl_owner_id) == 0;
}
static bool nfs4_same_verifier(nfs4_verifier *v1, nfs4_verifier *v2)
{
return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0;
}
static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new,
struct nfs_client **prev, struct nfs_net *nn)
{
int status;
if (pos->rpc_ops != new->rpc_ops)
return 1;
if (pos->cl_minorversion != new->cl_minorversion)
return 1;
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos", especially the client
* ID and serverowner fields. Wait for CREATE_SESSION
* to finish. */
if (pos->cl_cons_state > NFS_CS_READY) {
refcount_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
nfs_put_client(*prev);
*prev = pos;
status = nfs_wait_client_init_complete(pos);
spin_lock(&nn->nfs_client_lock);
if (status < 0)
return status;
}
if (pos->cl_cons_state != NFS_CS_READY)
return 1;
if (pos->cl_clientid != new->cl_clientid)
return 1;
/* NFSv4.1 always uses the uniform string, however someone
* might switch the uniquifier string on us.
*/
if (!nfs4_match_client_owner_id(pos, new))
return 1;
return 0;
}
/**
* nfs40_walk_client_list - Find server that recognizes a client ID
*
* @new: nfs_client with client ID to test
* @result: OUT: found nfs_client, or new
* @cred: credential to use for trunking test
*
* Returns zero, a negative errno, or a negative NFS4ERR status.
* If zero is returned, an nfs_client pointer is planted in "result."
*
* NB: nfs40_walk_client_list() relies on the new nfs_client being
* the last nfs_client on the list.
*/
int nfs40_walk_client_list(struct nfs_client *new,
struct nfs_client **result,
const struct cred *cred)
{
struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
struct nfs_client *pos, *prev = NULL;
struct nfs4_setclientid_res clid = {
.clientid = new->cl_clientid,
.confirm = new->cl_confirm,
};
int status = -NFS4ERR_STALE_CLIENTID;
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
if (pos == new)
goto found;
status = nfs4_match_client(pos, new, &prev, nn);
if (status < 0)
goto out_unlock;
if (status != 0)
continue;
/*
* We just sent a new SETCLIENTID, which should have
* caused the server to return a new cl_confirm. So if
* cl_confirm is the same, then this is a different
* server that just returned the same cl_confirm by
* coincidence:
*/
if ((new != pos) && nfs4_same_verifier(&pos->cl_confirm,
&new->cl_confirm))
continue;
/*
* But if the cl_confirm's are different, then the only
* way that a SETCLIENTID_CONFIRM to pos can succeed is
* if new and pos point to the same server:
*/
found:
refcount_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
nfs_put_client(prev);
prev = pos;
status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
switch (status) {
case -NFS4ERR_STALE_CLIENTID:
break;
case 0:
nfs4_swap_callback_idents(pos, new);
pos->cl_confirm = new->cl_confirm;
nfs_mark_client_ready(pos, NFS_CS_READY);
prev = NULL;
*result = pos;
goto out;
case -ERESTARTSYS:
case -ETIMEDOUT:
/* The callback path may have been inadvertently
* changed. Schedule recovery!
*/
nfs4_schedule_path_down_recovery(pos);
goto out;
default:
goto out;
}
spin_lock(&nn->nfs_client_lock);
}
out_unlock:
spin_unlock(&nn->nfs_client_lock);
/* No match found. The server lost our clientid */
out:
nfs_put_client(prev);
return status;
}
#ifdef CONFIG_NFS_V4_1
/*
* Returns true if the server major ids match
*/
bool
nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
struct nfs41_server_owner *o2)
{
if (o1->major_id_sz != o2->major_id_sz)
return false;
return memcmp(o1->major_id, o2->major_id, o1->major_id_sz) == 0;
}
/*
* Returns true if the server scopes match
*/
static bool
nfs4_check_server_scope(struct nfs41_server_scope *s1,
struct nfs41_server_scope *s2)
{
if (s1->server_scope_sz != s2->server_scope_sz)
return false;
return memcmp(s1->server_scope, s2->server_scope,
s1->server_scope_sz) == 0;
}
/**
* nfs4_detect_session_trunking - Checks for session trunking.
* @clp: original mount nfs_client
* @res: result structure from an exchange_id using the original mount
* nfs_client with a new multi_addr transport
* @xprt: pointer to the transport to add.
*
* Called after a successful EXCHANGE_ID on a multi-addr connection.
* Upon success, add the transport.
*
* Returns zero on success, otherwise -EINVAL
*
* Note: since the exchange_id for the new multi_addr transport uses the
* same nfs_client from the original mount, the cl_owner_id is reused,
* so eir_clientowner is the same.
*/
int nfs4_detect_session_trunking(struct nfs_client *clp,
struct nfs41_exchange_id_res *res,
struct rpc_xprt *xprt)
{
/* Check eir_clientid */
if (clp->cl_clientid != res->clientid)
goto out_err;
/* Check eir_server_owner so_major_id */
if (!nfs4_check_serverowner_major_id(clp->cl_serverowner,
res->server_owner))
goto out_err;
/* Check eir_server_owner so_minor_id */
if (clp->cl_serverowner->minor_id != res->server_owner->minor_id)
goto out_err;
/* Check eir_server_scope */
if (!nfs4_check_server_scope(clp->cl_serverscope, res->server_scope))
goto out_err;
pr_info("NFS: %s: Session trunking succeeded for %s\n",
clp->cl_hostname,
xprt->address_strings[RPC_DISPLAY_ADDR]);
return 0;
out_err:
pr_info("NFS: %s: Session trunking failed for %s\n", clp->cl_hostname,
xprt->address_strings[RPC_DISPLAY_ADDR]);
return -EINVAL;
}
/**
* nfs41_walk_client_list - Find nfs_client that matches a client/server owner
*
* @new: nfs_client with client ID to test
* @result: OUT: found nfs_client, or new
* @cred: credential to use for trunking test
*
* Returns zero, a negative errno, or a negative NFS4ERR status.
* If zero is returned, an nfs_client pointer is planted in "result."
*
* NB: nfs41_walk_client_list() relies on the new nfs_client being
* the last nfs_client on the list.
*/
int nfs41_walk_client_list(struct nfs_client *new,
struct nfs_client **result,
const struct cred *cred)
{
struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
struct nfs_client *pos, *prev = NULL;
int status = -NFS4ERR_STALE_CLIENTID;
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
if (pos == new)
goto found;
status = nfs4_match_client(pos, new, &prev, nn);
if (status < 0)
goto out;
if (status != 0)
continue;
/*
* Note that session trunking is just a special subcase of
* client id trunking. In either case, we want to fall back
* to using the existing nfs_client.
*/
if (!nfs4_check_serverowner_major_id(pos->cl_serverowner,
new->cl_serverowner))
continue;
found:
refcount_inc(&pos->cl_count);
*result = pos;
status = 0;
break;
}
out:
spin_unlock(&nn->nfs_client_lock);
nfs_put_client(prev);
return status;
}
#endif /* CONFIG_NFS_V4_1 */
static void nfs4_destroy_server(struct nfs_server *server)
{
LIST_HEAD(freeme);
nfs_server_return_all_delegations(server);
unset_pnfs_layoutdriver(server);
nfs4_purge_state_owners(server, &freeme);
nfs4_free_state_owners(&freeme);
}
/*
* NFSv4.0 callback thread helper
*
* Find a client by callback identifier
*/
struct nfs_client *
nfs4_find_client_ident(struct net *net, int cb_ident)
{
struct nfs_client *clp;
struct nfs_net *nn = net_generic(net, nfs_net_id);
spin_lock(&nn->nfs_client_lock);
clp = idr_find(&nn->cb_ident_idr, cb_ident);
if (clp)
refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock);
return clp;
}
#if defined(CONFIG_NFS_V4_1)
/* Common match routine for v4.0 and v4.1 callback services */
static bool nfs4_cb_match_client(const struct sockaddr *addr,
struct nfs_client *clp, u32 minorversion)
{
struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
/* Don't match clients that failed to initialise */
if (!(clp->cl_cons_state == NFS_CS_READY ||
clp->cl_cons_state == NFS_CS_SESSION_INITING))
return false;
smp_rmb();
/* Match the version and minorversion */
if (clp->rpc_ops->version != 4 ||
clp->cl_minorversion != minorversion)
return false;
/* Match only the IP address, not the port number */
return rpc_cmp_addr(addr, clap);
}
/*
* NFSv4.1 callback thread helper
* For CB_COMPOUND calls, find a client by IP address, protocol version,
* minorversion, and sessionID
*
* Returns NULL if no such client
*/
struct nfs_client *
nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
struct nfs4_sessionid *sid, u32 minorversion)
{
struct nfs_client *clp;
struct nfs_net *nn = net_generic(net, nfs_net_id);
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
if (!nfs4_cb_match_client(addr, clp, minorversion))
continue;
if (!nfs4_has_session(clp))
continue;
/* Match sessionid*/
if (memcmp(clp->cl_session->sess_id.data,
sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
continue;
refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock);
return clp;
}
spin_unlock(&nn->nfs_client_lock);
return NULL;
}
#else /* CONFIG_NFS_V4_1 */
struct nfs_client *
nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
struct nfs4_sessionid *sid, u32 minorversion)
{
return NULL;
}
#endif /* CONFIG_NFS_V4_1 */
/*
* Set up an NFS4 client
*/
static int nfs4_set_client(struct nfs_server *server,
const char *hostname,
const struct sockaddr_storage *addr,
const size_t addrlen,
const char *ip_addr,
int proto, const struct rpc_timeout *timeparms,
u32 minorversion, unsigned int nconnect,
unsigned int max_connect,
struct net *net,
struct xprtsec_parms *xprtsec)
{
struct nfs_client_initdata cl_init = {
.hostname = hostname,
.addr = addr,
.addrlen = addrlen,
.ip_addr = ip_addr,
.nfs_mod = &nfs_v4,
.proto = proto,
.minorversion = minorversion,
.net = net,
.timeparms = timeparms,
.cred = server->cred,
.xprtsec = *xprtsec,
};
struct nfs_client *clp;
if (minorversion == 0)
__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
else
cl_init.max_connect = max_connect;
switch (proto) {
case XPRT_TRANSPORT_RDMA:
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_TCP_TLS:
cl_init.nconnect = nconnect;
}
if (server->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (server->options & NFS_OPTION_MIGRATION)
__set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
__set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
server->port = rpc_get_port((struct sockaddr *)addr);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
if (IS_ERR(clp))
return PTR_ERR(clp);
if (server->nfs_client == clp) {
nfs_put_client(clp);
return -ELOOP;
}
/*
* Query for the lease time on clientid setup or renewal
*
* Note that this will be set on nfs_clients that were created
* only for the DS role and did not set this bit, but now will
* serve a dual role.
*/
set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
server->nfs_client = clp;
nfs_sysfs_add_server(server);
nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state");
return 0;
}
/*
* Set up a pNFS Data Server client.
*
* Return any existing nfs_client that matches server address,port,version
* and minorversion.
*
* For a new nfs_client, use a soft mount (default), a low retrans and a
* low timeout interval so that if a connection is lost, we retry through
* the MDS.
*/
struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans,
u32 minor_version)
{
struct rpc_timeout ds_timeout;
struct nfs_client *mds_clp = mds_srv->nfs_client;
struct nfs_client_initdata cl_init = {
.addr = ds_addr,
.addrlen = ds_addrlen,
.nodename = mds_clp->cl_rpcclient->cl_nodename,
.ip_addr = mds_clp->cl_ipaddr,
.nfs_mod = &nfs_v4,
.proto = ds_proto,
.minorversion = minor_version,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
.xprtsec = mds_srv->nfs_client->cl_xprtsec,
};
char buf[INET6_ADDRSTRLEN + 1];
if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0)
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
switch (ds_proto) {
case XPRT_TRANSPORT_RDMA:
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_TCP_TLS:
if (mds_clp->cl_nconnect > 1) {
cl_init.nconnect = mds_clp->cl_nconnect;
cl_init.max_connect = NFS_MAX_TRANSPORTS;
}
}
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
__set_bit(NFS_CS_DS, &cl_init.init_flags);
__set_bit(NFS_CS_PNFS, &cl_init.init_flags);
cl_init.max_connect = NFS_MAX_TRANSPORTS;
/*
* Set an authflavor equual to the MDS value. Use the MDS nfs_client
* cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
* (section 13.1 RFC 5661).
*/
nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
return nfs_get_client(&cl_init);
}
EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
/*
* Session has been established, and the client marked ready.
* Limit the mount rsize, wsize and dtsize using negotiated fore
* channel attributes.
*/
static void nfs4_session_limit_rwsize(struct nfs_server *server)
{
#ifdef CONFIG_NFS_V4_1
struct nfs4_session *sess;
u32 server_resp_sz;
u32 server_rqst_sz;
if (!nfs4_has_session(server->nfs_client))
return;
sess = server->nfs_client->cl_session;
server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
if (server->dtsize > server_resp_sz)
server->dtsize = server_resp_sz;
if (server->rsize > server_resp_sz)
server->rsize = server_resp_sz;
if (server->wsize > server_rqst_sz)
server->wsize = server_rqst_sz;
#endif /* CONFIG_NFS_V4_1 */
}
/*
* Limit xattr sizes using the channel attributes.
*/
static void nfs4_session_limit_xasize(struct nfs_server *server)
{
#ifdef CONFIG_NFS_V4_2
struct nfs4_session *sess;
u32 server_gxa_sz;
u32 server_sxa_sz;
u32 server_lxa_sz;
if (!nfs4_has_session(server->nfs_client))
return;
sess = server->nfs_client->cl_session;
server_gxa_sz = sess->fc_attrs.max_resp_sz - nfs42_maxgetxattr_overhead;
server_sxa_sz = sess->fc_attrs.max_rqst_sz - nfs42_maxsetxattr_overhead;
server_lxa_sz = sess->fc_attrs.max_resp_sz -
nfs42_maxlistxattrs_overhead;
if (server->gxasize > server_gxa_sz)
server->gxasize = server_gxa_sz;
if (server->sxasize > server_sxa_sz)
server->sxasize = server_sxa_sz;
if (server->lxasize > server_lxa_sz)
server->lxasize = server_lxa_sz;
#endif
}
void nfs4_server_set_init_caps(struct nfs_server *server)
{
/* Set the basic capabilities */
server->caps |= server->nfs_client->cl_mvops->init_caps;
if (server->flags & NFS_MOUNT_NORDIRPLUS)
server->caps &= ~NFS_CAP_READDIRPLUS;
if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
server->caps &= ~NFS_CAP_READ_PLUS;
/*
* Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
* authentication.
*/
if (nfs4_disable_idmapping &&
server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
server->caps |= NFS_CAP_UIDGID_NOMAP;
}
static int nfs4_server_common_setup(struct nfs_server *server,
struct nfs_fh *mntfh, bool auth_probe)
{
int error;
/* data servers support only a subset of NFSv4.1 */
if (is_ds_only_client(server->nfs_client))
return -EPROTONOSUPPORT;
/* We must ensure the session is initialised first */
error = nfs4_init_session(server->nfs_client);
if (error < 0)
goto out;
nfs4_server_set_init_caps(server);
/* Probe the root fh to retrieve its FSID and filehandle */
error = nfs4_get_rootfh(server, mntfh, auth_probe);
if (error < 0)
goto out;
dprintk("Server FSID: %llx:%llx\n",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
nfs_display_fhandle(mntfh, "Pseudo-fs root FH");
error = nfs_probe_server(server, mntfh);
if (error < 0)
goto out;
nfs4_session_limit_rwsize(server);
nfs4_session_limit_xasize(server);
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
nfs_server_insert_lists(server);
server->mount_time = jiffies;
server->destroy = nfs4_destroy_server;
out:
return error;
}
/*
* Create a version 4 volume record
*/
static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct rpc_timeout timeparms;
int error;
nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
ctx->timeo, ctx->retrans);
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
server->options = ctx->options;
server->auth_info = ctx->auth_info;
/* Use the first specified auth flavor. If this flavor isn't
* allowed by the server, use the SECINFO path to try the
* other specified flavors */
if (ctx->auth_info.flavor_len >= 1)
ctx->selected_flavor = ctx->auth_info.flavors[0];
else
ctx->selected_flavor = RPC_AUTH_UNIX;
/* Get a client record */
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
&ctx->nfs_server._address,
ctx->nfs_server.addrlen,
ctx->client_address,
ctx->nfs_server.protocol,
&timeparms,
ctx->minorversion,
ctx->nfs_server.nconnect,
ctx->nfs_server.max_connect,
fc->net_ns,
&ctx->xprtsec);
if (error < 0)
return error;
if (ctx->rsize)
server->rsize = nfs_io_size(ctx->rsize, server->nfs_client->cl_proto);
if (ctx->wsize)
server->wsize = nfs_io_size(ctx->wsize, server->nfs_client->cl_proto);
server->acregmin = ctx->acregmin * HZ;
server->acregmax = ctx->acregmax * HZ;
server->acdirmin = ctx->acdirmin * HZ;
server->acdirmax = ctx->acdirmax * HZ;
server->port = ctx->nfs_server.port;
return nfs_init_server_rpcclient(server, &timeparms,
ctx->selected_flavor);
}
/*
* Create a version 4 volume record
* - keyed on server and FSID
*/
struct nfs_server *nfs4_create_server(struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_server *server;
bool auth_probe;
int error;
server = nfs_alloc_server();
if (!server)
return ERR_PTR(-ENOMEM);
server->cred = get_cred(fc->cred);
auth_probe = ctx->auth_info.flavor_len < 1;
/* set up the general RPC client */
error = nfs4_init_server(server, fc);
if (error < 0)
goto error;
error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe);
if (error < 0)
goto error;
return server;
error:
nfs_free_server(server);
return ERR_PTR(error);
}
/*
* Create an NFS4 referral server record
*/
struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
int proto, error;
bool auth_probe;
server = nfs_alloc_server();
if (!server)
return ERR_PTR(-ENOMEM);
parent_server = NFS_SB(ctx->clone_data.sb);
parent_client = parent_server->nfs_client;
server->cred = get_cred(parent_server->cred);
/* Initialise the client representation from the parent server */
nfs_server_copy_userdata(server, parent_server);
/* Get a client representation */
#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
&ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_RDMA,
parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version,
parent_client->cl_nconnect,
parent_client->cl_max_connect,
parent_client->cl_net,
&parent_client->cl_xprtsec);
if (!error)
goto init_server;
#endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
proto = XPRT_TRANSPORT_TCP;
if (parent_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE)
proto = XPRT_TRANSPORT_TCP_TLS;
rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
&ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
proto,
parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version,
parent_client->cl_nconnect,
parent_client->cl_max_connect,
parent_client->cl_net,
&parent_client->cl_xprtsec);
if (error < 0)
goto error;
#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
init_server:
#endif
error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout,
ctx->selected_flavor);
if (error < 0)
goto error;
auth_probe = parent_server->auth_info.flavor_len < 1;
error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe);
if (error < 0)
goto error;
return server;
error:
nfs_free_server(server);
return ERR_PTR(error);
}
/**
* nfs4_update_server - Move an nfs_server to a different nfs_client
*
* @server: represents FSID to be moved
* @hostname: new end-point's hostname
* @sap: new end-point's socket address
* @salen: size of "sap"
* @net: net namespace
*
* The nfs_server must be quiescent before this function is invoked.
* Either its session is drained (NFSv4.1+), or its transport is
* plugged and drained (NFSv4.0).
*
* Returns zero on success, or a negative errno value.
*/
int nfs4_update_server(struct nfs_server *server, const char *hostname,
struct sockaddr_storage *sap, size_t salen, struct net *net)
{
struct nfs_client *clp = server->nfs_client;
struct rpc_clnt *clnt = server->client;
struct xprt_create xargs = {
.ident = clp->cl_proto,
.net = net,
.dstaddr = (struct sockaddr *)sap,
.addrlen = salen,
.servername = hostname,
/* cel: bleh. We might need to pass TLS parameters here */
};
char buf[INET6_ADDRSTRLEN + 1];
struct sockaddr_storage address;
struct sockaddr *localaddr = (struct sockaddr *)&address;
int error;
error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
if (error != 0)
return error;
error = rpc_localaddr(clnt, localaddr, sizeof(address));
if (error != 0)
return error;
if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0)
return -EAFNOSUPPORT;
nfs_server_remove_lists(server);
set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
error = nfs4_set_client(server, hostname, sap, salen, buf,
clp->cl_proto, clnt->cl_timeout,
clp->cl_minorversion,
clp->cl_nconnect, clp->cl_max_connect,
net, &clp->cl_xprtsec);
clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
if (error != 0) {
nfs_server_insert_lists(server);
return error;
}
nfs_put_client(clp);
if (server->nfs_client->cl_hostname == NULL) {
server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
if (server->nfs_client->cl_hostname == NULL)
return -ENOMEM;
}
nfs_server_insert_lists(server);
return nfs_probe_server(server, NFS_FH(d_inode(server->super->s_root)));
}