Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index e84c187..14a7222 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -164,7 +164,7 @@
If you want your system to mount its root file system via NFS,
choose Y here. This is common practice for managing systems
without local permanent storage. For details, read
- <file:Documentation/filesystems/nfs/nfsroot.txt>.
+ <file:Documentation/admin-guide/nfs/nfsroot.rst>.
Most people say N here.
@@ -196,3 +196,21 @@
depends on NFS_FS && SUNRPC_DEBUG
select CRC32
default y
+
+config NFS_DISABLE_UDP_SUPPORT
+ bool "NFS: Disable NFS UDP protocol support"
+ depends on NFS_FS
+ default y
+ help
+ Choose Y here to disable the use of NFS over UDP. NFS over UDP
+ on modern networks (1Gb+) can lead to data corruption caused by
+ fragmentation during high loads.
+
+config NFS_V4_2_READ_PLUS
+ bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation"
+ depends on NFS_V4_2
+ default n
+ help
+ This is intended for developers only. The READ_PLUS operation has
+ been shown to have issues under specific conditions and should not
+ be used in production.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 34cdeae..22d11fd 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -9,7 +9,7 @@
nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
io.o direct.o pagelist.o read.o symlink.o unlink.o \
write.o namespace.o mount_clnt.o nfstrace.o \
- export.o sysfs.o
+ export.o sysfs.o fs_context.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
@@ -30,7 +30,7 @@
nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o
-nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o
+nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o nfs42xattr.o
obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 6902217..08108b6 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -476,7 +476,7 @@
err = ext_tree_remove(bl, true, 0, LLONG_MAX);
WARN_ON(err);
- kfree(bl);
+ kfree_rcu(bl, bl_layout.plh_rcu);
}
static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
@@ -753,7 +753,7 @@
case -ENODEV:
/* Our extent block devices are unavailable */
set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags);
- /* Fall through */
+ fallthrough;
case 0:
return lseg;
default:
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 7a57ff2..8f7cff7 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -582,7 +582,7 @@
if (!arg->layoutupdate_pages)
return -ENOMEM;
- start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL);
+ start_p = __vmalloc(buffer_size, GFP_NOFS);
if (!start_p) {
kfree(arg->layoutupdate_pages);
return -ENOMEM;
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index 9fb067a..ef9db13 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -79,7 +79,7 @@
goto out_free_data;
bl_msg = msg->data;
- bl_msg->type = BL_DEVICE_MOUNT,
+ bl_msg->type = BL_DEVICE_MOUNT;
bl_msg->totallen = b->simple.len;
nfs4_encode_simple(msg->data + sizeof(*bl_msg), b);
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 8f34daf..ccd4f24 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -72,8 +72,8 @@
uint32_t bitmap[2];
uint64_t size;
uint64_t change_attr;
- struct timespec ctime;
- struct timespec mtime;
+ struct timespec64 ctime;
+ struct timespec64 mtime;
};
struct cb_recallargs {
@@ -127,7 +127,9 @@
#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
-#define RCA4_TYPE_MASK_ALL 0xf31f
+#define PNFS_FF_RCA4_TYPE_MASK_READ 16
+#define PNFS_FF_RCA4_TYPE_MASK_RW 17
+#define RCA4_TYPE_MASK_ALL 0x3f31f
struct cb_recallanyargs {
uint32_t craa_objs_to_keep;
@@ -168,7 +170,7 @@
};
struct cb_devicenotifyargs {
- int ndevs;
+ uint32_t ndevs;
struct cb_devicenotifyitem *devs;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index fc775b0..b44219c 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -26,7 +26,6 @@
struct cb_getattrargs *args = argp;
struct cb_getattrres *res = resp;
struct nfs_delegation *delegation;
- struct nfs_inode *nfsi;
struct inode *inode;
res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
@@ -47,17 +46,16 @@
-ntohl(res->status));
goto out;
}
- nfsi = NFS_I(inode);
rcu_read_lock();
- delegation = rcu_dereference(nfsi->delegation);
+ delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
goto out_iput;
res->size = i_size_read(inode);
res->change_attr = delegation->change_attr;
if (nfs_have_writebacks(inode))
res->change_attr++;
- res->ctime = timespec64_to_timespec(inode->i_ctime);
- res->mtime = timespec64_to_timespec(inode->i_mtime);
+ res->ctime = inode->i_ctime;
+ res->mtime = inode->i_mtime;
res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
args->bitmap[0];
res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
@@ -123,33 +121,31 @@
*/
static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp,
const nfs4_stateid *stateid)
+ __must_hold(RCU)
{
struct nfs_server *server;
struct inode *inode;
struct pnfs_layout_hdr *lo;
+ rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
if (!pnfs_layout_is_valid(lo))
continue;
- if (stateid != NULL &&
- !nfs4_stateid_match_other(stateid, &lo->plh_stateid))
+ if (!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
continue;
- inode = igrab(lo->plh_inode);
- if (!inode)
- return ERR_PTR(-EAGAIN);
- if (!nfs_sb_active(inode->i_sb)) {
- rcu_read_unlock();
- spin_unlock(&clp->cl_lock);
- iput(inode);
- spin_lock(&clp->cl_lock);
- rcu_read_lock();
- return ERR_PTR(-EAGAIN);
- }
- return inode;
+ if (nfs_sb_active(server->super))
+ inode = igrab(lo->plh_inode);
+ else
+ inode = ERR_PTR(-EAGAIN);
+ rcu_read_unlock();
+ if (inode)
+ return inode;
+ nfs_sb_deactive(server->super);
+ return ERR_PTR(-EAGAIN);
}
}
-
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
}
@@ -167,28 +163,26 @@
struct inode *inode;
struct pnfs_layout_hdr *lo;
+ rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
nfsi = NFS_I(lo->plh_inode);
if (nfs_compare_fh(fh, &nfsi->fh))
continue;
if (nfsi->layout != lo)
continue;
- inode = igrab(lo->plh_inode);
- if (!inode)
- return ERR_PTR(-EAGAIN);
- if (!nfs_sb_active(inode->i_sb)) {
- rcu_read_unlock();
- spin_unlock(&clp->cl_lock);
- iput(inode);
- spin_lock(&clp->cl_lock);
- rcu_read_lock();
- return ERR_PTR(-EAGAIN);
- }
- return inode;
+ if (nfs_sb_active(server->super))
+ inode = igrab(lo->plh_inode);
+ else
+ inode = ERR_PTR(-EAGAIN);
+ rcu_read_unlock();
+ if (inode)
+ return inode;
+ nfs_sb_deactive(server->super);
+ return ERR_PTR(-EAGAIN);
}
}
-
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
}
@@ -198,14 +192,9 @@
{
struct inode *inode;
- spin_lock(&clp->cl_lock);
- rcu_read_lock();
inode = nfs_layout_find_inode_by_stateid(clp, stateid);
if (inode == ERR_PTR(-ENOENT))
inode = nfs_layout_find_inode_by_fh(clp, fh);
- rcu_read_unlock();
- spin_unlock(&clp->cl_lock);
-
return inode;
}
@@ -284,7 +273,7 @@
goto unlock;
}
- pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+ pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true);
switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
&args->cbl_range,
be32_to_cpu(args->cbl_stateid.seqid))) {
@@ -364,7 +353,7 @@
struct cb_process_state *cps)
{
struct cb_devicenotifyargs *args = argp;
- int i;
+ uint32_t i;
__be32 res = 0;
struct nfs_client *clp = cps->clp;
struct nfs_server *server = NULL;
@@ -609,6 +598,7 @@
struct cb_recallanyargs *args = argp;
__be32 status;
fmode_t flags = 0;
+ bool schedule_manager = false;
status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
if (!cps->clp) /* set in cb_sequence */
@@ -631,6 +621,18 @@
if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
pnfs_recall_all_layouts(cps->clp);
+
+ if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) {
+ set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state);
+ schedule_manager = true;
+ }
+ if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_RW)) {
+ set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &cps->clp->cl_state);
+ schedule_manager = true;
+ }
+ if (schedule_manager)
+ nfs4_schedule_state_manager(cps->clp);
+
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 73a5a5e..1725079 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -18,6 +18,7 @@
#include "callback.h"
#include "internal.h"
#include "nfs4session.h"
+#include "nfs4trace.h"
#define CB_OP_TAGLEN_MAXSZ (512)
#define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status
@@ -258,11 +259,9 @@
void *argp)
{
struct cb_devicenotifyargs *args = argp;
+ uint32_t tmp, n, i;
__be32 *p;
__be32 status = 0;
- u32 tmp;
- int n, i;
- args->ndevs = 0;
/* Num of device notifications */
p = xdr_inline_decode(xdr, sizeof(uint32_t));
@@ -271,7 +270,7 @@
goto out;
}
n = ntohl(*p++);
- if (n <= 0)
+ if (n == 0)
goto out;
if (n > ULONG_MAX / sizeof(*args->devs)) {
status = htonl(NFS4ERR_BADXDR);
@@ -330,19 +329,21 @@
dev->cbd_immediate = 0;
}
- args->ndevs++;
-
dprintk("%s: type %d layout 0x%x immediate %d\n",
__func__, dev->cbd_notify_type, dev->cbd_layout_type,
dev->cbd_immediate);
}
+ args->ndevs = n;
+ dprintk("%s: ndevs %d\n", __func__, args->ndevs);
+ return 0;
+err:
+ kfree(args->devs);
out:
+ args->devs = NULL;
+ args->ndevs = 0;
dprintk("%s: status %d ndevs %d\n",
__func__, ntohl(status), args->ndevs);
return status;
-err:
- kfree(args->devs);
- goto out;
}
static __be32 decode_sessionid(struct xdr_stream *xdr,
@@ -627,7 +628,7 @@
return 0;
}
-static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
+static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *time)
{
__be32 *p;
@@ -639,14 +640,14 @@
return 0;
}
-static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
return 0;
return encode_attr_time(xdr,time);
}
-static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
return 0;
@@ -946,9 +947,13 @@
if (hdr_arg.minorversion == 0) {
cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
- if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) {
- if (cps.clp)
- nfs_put_client(cps.clp);
+ if (!cps.clp) {
+ trace_nfs_cb_no_clp(rqstp->rq_xid, hdr_arg.cb_ident);
+ goto out_invalidcred;
+ }
+ if (!check_gss_callback_principal(cps.clp, rqstp)) {
+ trace_nfs_cb_badprinc(rqstp->rq_xid, hdr_arg.cb_ident);
+ nfs_put_client(cps.clp);
goto out_invalidcred;
}
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index af838d1..818ff8b 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -50,6 +50,7 @@
#include "nfs.h"
#include "netns.h"
#include "sysfs.h"
+#include "nfs42.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
@@ -176,6 +177,7 @@
INIT_LIST_HEAD(&clp->cl_superblocks);
clp->cl_rpcclient = ERR_PTR(-EINVAL);
+ clp->cl_flags = cl_init->init_flags;
clp->cl_proto = cl_init->proto;
clp->cl_nconnect = cl_init->nconnect;
clp->cl_net = get_net(cl_init->net);
@@ -313,6 +315,12 @@
/* Match nfsv4 minorversion */
if (clp->cl_minorversion != data->minorversion)
continue;
+
+ /* Match request for a dedicated DS */
+ if (test_bit(NFS_CS_DS, &data->init_flags) !=
+ test_bit(NFS_CS_DS, &clp->cl_flags))
+ continue;
+
/* Match the full socket address */
if (!rpc_cmp_addr_port(sap, clap))
/* Match all xprt_switch full socket addresses */
@@ -419,7 +427,6 @@
list_add_tail(&new->cl_share_link,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
- new->cl_flags = cl_init->init_flags;
return rpc_ops->init_client(new, cl_init);
}
@@ -469,6 +476,7 @@
to->to_maxval = to->to_initval;
to->to_exponential = 0;
break;
+#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
case XPRT_TRANSPORT_UDP:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_UDP_RETRANS;
@@ -479,6 +487,7 @@
to->to_maxval = NFS_MAX_UDP_TIMEOUT;
to->to_exponential = 1;
break;
+#endif
default:
BUG();
}
@@ -516,6 +525,10 @@
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS;
+ if (test_bit(NFS_CS_NOPING, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_NOPING;
+ if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_REUSEPORT;
if (!IS_ERR(clp->cl_rpcclient))
return 0;
@@ -571,8 +584,10 @@
default:
nlm_init.protocol = IPPROTO_TCP;
break;
+#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
case XPRT_TRANSPORT_UDP:
nlm_init.protocol = IPPROTO_UDP;
+#endif
}
host = nlmclnt_init(&nlm_init);
@@ -649,27 +664,28 @@
* Create a version 2 or 3 client
*/
static int nfs_init_server(struct nfs_server *server,
- const struct nfs_parsed_mount_data *data,
- struct nfs_subversion *nfs_mod)
+ const struct fs_context *fc)
{
+ const struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct rpc_timeout timeparms;
struct nfs_client_initdata cl_init = {
- .hostname = data->nfs_server.hostname,
- .addr = (const struct sockaddr *)&data->nfs_server.address,
- .addrlen = data->nfs_server.addrlen,
- .nfs_mod = nfs_mod,
- .proto = data->nfs_server.protocol,
- .net = data->net,
+ .hostname = ctx->nfs_server.hostname,
+ .addr = (const struct sockaddr *)&ctx->nfs_server.address,
+ .addrlen = ctx->nfs_server.addrlen,
+ .nfs_mod = ctx->nfs_mod,
+ .proto = ctx->nfs_server.protocol,
+ .net = fc->net_ns,
.timeparms = &timeparms,
.cred = server->cred,
- .nconnect = data->nfs_server.nconnect,
+ .nconnect = ctx->nfs_server.nconnect,
+ .init_flags = (1UL << NFS_CS_REUSEPORT),
};
struct nfs_client *clp;
int error;
- nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
- data->timeo, data->retrans);
- if (data->flags & NFS_MOUNT_NORESVPORT)
+ nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
+ ctx->timeo, ctx->retrans);
+ if (ctx->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
@@ -680,46 +696,46 @@
server->nfs_client = clp;
/* Initialise the client representation from the mount data */
- server->flags = data->flags;
- server->options = data->options;
+ server->flags = ctx->flags;
+ server->options = ctx->options;
server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
- if (data->rsize)
- server->rsize = nfs_block_size(data->rsize, NULL);
- if (data->wsize)
- server->wsize = nfs_block_size(data->wsize, NULL);
+ if (ctx->rsize)
+ server->rsize = nfs_block_size(ctx->rsize, NULL);
+ if (ctx->wsize)
+ server->wsize = nfs_block_size(ctx->wsize, NULL);
- server->acregmin = data->acregmin * HZ;
- server->acregmax = data->acregmax * HZ;
- server->acdirmin = data->acdirmin * HZ;
- server->acdirmax = data->acdirmax * HZ;
+ server->acregmin = ctx->acregmin * HZ;
+ server->acregmax = ctx->acregmax * HZ;
+ server->acdirmin = ctx->acdirmin * HZ;
+ server->acdirmax = ctx->acdirmax * HZ;
/* Start lockd here, before we might error out */
error = nfs_start_lockd(server);
if (error < 0)
goto error;
- server->port = data->nfs_server.port;
- server->auth_info = data->auth_info;
+ server->port = ctx->nfs_server.port;
+ server->auth_info = ctx->auth_info;
error = nfs_init_server_rpcclient(server, &timeparms,
- data->selected_flavor);
+ ctx->selected_flavor);
if (error < 0)
goto error;
/* Preserve the values of mount_server-related mount options */
- if (data->mount_server.addrlen) {
- memcpy(&server->mountd_address, &data->mount_server.address,
- data->mount_server.addrlen);
- server->mountd_addrlen = data->mount_server.addrlen;
+ if (ctx->mount_server.addrlen) {
+ memcpy(&server->mountd_address, &ctx->mount_server.address,
+ ctx->mount_server.addrlen);
+ server->mountd_addrlen = ctx->mount_server.addrlen;
}
- server->mountd_version = data->mount_server.version;
- server->mountd_port = data->mount_server.port;
- server->mountd_protocol = data->mount_server.protocol;
+ server->mountd_version = ctx->mount_server.version;
+ server->mountd_port = ctx->mount_server.port;
+ server->mountd_protocol = ctx->mount_server.protocol;
- server->namelen = data->namlen;
+ server->namelen = ctx->namlen;
return 0;
error:
@@ -734,7 +750,7 @@
static void nfs_server_set_fsinfo(struct nfs_server *server,
struct nfs_fsinfo *fsinfo)
{
- unsigned long max_rpc_payload;
+ unsigned long max_rpc_payload, raw_max_rpc_payload;
/* Work out a lot of parameters */
if (server->rsize == 0)
@@ -747,7 +763,9 @@
if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
- max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+ raw_max_rpc_payload = rpc_max_payload(server->client);
+ max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL);
+
if (server->rsize > max_rpc_payload)
server->rsize = max_rpc_payload;
if (server->rsize > NFS_MAX_FILE_IO_SIZE)
@@ -780,6 +798,21 @@
server->clone_blksize = fsinfo->clone_blksize;
/* We're airborne Set socket buffersize */
rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+
+#ifdef CONFIG_NFS_V4_2
+ /*
+ * Defaults until limited by the session parameters.
+ */
+ server->gxasize = min_t(unsigned int, raw_max_rpc_payload,
+ XATTR_SIZE_MAX);
+ server->sxasize = min_t(unsigned int, raw_max_rpc_payload,
+ XATTR_SIZE_MAX);
+ server->lxasize = min_t(unsigned int, raw_max_rpc_payload,
+ nfs42_listxattr_xdrsize(XATTR_LIST_MAX));
+
+ if (fsinfo->xattr_support)
+ server->caps |= NFS_CAP_XATTR;
+#endif
}
/*
@@ -941,9 +974,9 @@
* Create a version 2 or 3 volume record
* - keyed on server and FSID
*/
-struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs_create_server(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_server *server;
struct nfs_fattr *fattr;
int error;
@@ -960,18 +993,18 @@
goto error;
/* Get a client representation */
- error = nfs_init_server(server, mount_info->parsed, nfs_mod);
+ error = nfs_init_server(server, fc);
if (error < 0)
goto error;
/* Probe the root fh to retrieve its FSID */
- error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr);
+ error = nfs_probe_fsinfo(server, ctx->mntfh, fattr);
if (error < 0)
goto error;
if (server->nfs_client->rpc_ops->version == 3) {
if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
server->namelen = NFS3_MAXNAMLEN;
- if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS))
+ if (!(ctx->flags & NFS_MOUNT_NORDIRPLUS))
server->caps |= NFS_CAP_READDIRPLUS;
} else {
if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
@@ -979,8 +1012,8 @@
}
if (!(fattr->valid & NFS_ATTR_FATTR)) {
- error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh,
- fattr, NULL, NULL);
+ error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
+ fattr, NULL, NULL);
if (error < 0) {
dprintk("nfs_create_server: getattr error = %d\n", -error);
goto error;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index af549d7..d6ac2c4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -25,13 +25,46 @@
#include "internal.h"
#include "nfs4trace.h"
-static void nfs_free_delegation(struct nfs_delegation *delegation)
+#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U)
+
+static atomic_long_t nfs_active_delegations;
+static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
+
+static void __nfs_free_delegation(struct nfs_delegation *delegation)
{
put_cred(delegation->cred);
delegation->cred = NULL;
kfree_rcu(delegation, rcu);
}
+static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation)
+{
+ if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+ delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
+ atomic_long_dec(&nfs_active_delegations);
+ if (!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+ nfs_clear_verifier_delegated(delegation->inode);
+ }
+}
+
+static struct nfs_delegation *nfs_get_delegation(struct nfs_delegation *delegation)
+{
+ refcount_inc(&delegation->refcount);
+ return delegation;
+}
+
+static void nfs_put_delegation(struct nfs_delegation *delegation)
+{
+ if (refcount_dec_and_test(&delegation->refcount))
+ __nfs_free_delegation(delegation);
+}
+
+static void nfs_free_delegation(struct nfs_delegation *delegation)
+{
+ nfs_mark_delegation_revoked(delegation);
+ nfs_put_delegation(delegation);
+}
+
/**
* nfs_mark_delegation_referenced - set delegation's REFERENCED flag
* @delegation: delegation to process
@@ -42,6 +75,13 @@
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
+static void nfs_mark_return_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
+{
+ set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+ set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
+}
+
static bool
nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
fmode_t flags)
@@ -199,7 +239,7 @@
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
spin_lock(&delegation->lock);
- if (delegation->inode != NULL) {
+ if (nfs4_is_valid_delegation(delegation, 0)) {
nfs4_stateid_copy(&delegation->stateid, stateid);
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
@@ -222,14 +262,18 @@
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
+ const struct cred *cred;
int res = 0;
- if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
- res = nfs4_proc_delegreturn(inode,
- delegation->cred,
+ if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+ spin_lock(&delegation->lock);
+ cred = get_cred(delegation->cred);
+ spin_unlock(&delegation->lock);
+ res = nfs4_proc_delegreturn(inode, cred,
&delegation->stateid,
issync);
- nfs_free_delegation(delegation);
+ put_cred(cred);
+ }
return res;
}
@@ -255,9 +299,14 @@
if (delegation == NULL)
goto out;
spin_lock(&delegation->lock);
- if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
- ret = delegation;
+ if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
+ /* Refcount matched in nfs_end_delegation_return() */
+ ret = nfs_get_delegation(delegation);
+ }
spin_unlock(&delegation->lock);
+ if (ret)
+ nfs_clear_verifier_delegated(&nfsi->vfs_inode);
out:
return ret;
}
@@ -273,16 +322,17 @@
return delegation;
}
-static void
-nfs_abort_delegation_return(struct nfs_delegation *delegation,
- struct nfs_client *clp)
+static void nfs_abort_delegation_return(struct nfs_delegation *delegation,
+ struct nfs_client *clp, int err)
{
spin_lock(&delegation->lock);
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
- set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+ if (err == -EAGAIN) {
+ set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
+ set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state);
+ }
spin_unlock(&delegation->lock);
- set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
}
static struct nfs_delegation *
@@ -298,7 +348,10 @@
return NULL;
spin_lock(&delegation->lock);
- set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+ if (!delegation->inode) {
+ spin_unlock(&delegation->lock);
+ return NULL;
+ }
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
rcu_assign_pointer(nfsi->delegation, NULL);
@@ -325,10 +378,24 @@
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_delegation *delegation;
- delegation = nfs_start_delegation_return(nfsi);
- if (delegation == NULL)
- return NULL;
- return nfs_detach_delegation(nfsi, delegation, server);
+ rcu_read_lock();
+ delegation = rcu_dereference(nfsi->delegation);
+ if (delegation != NULL)
+ delegation = nfs_detach_delegation(nfsi, delegation, server);
+ rcu_read_unlock();
+ return delegation;
+}
+
+static void
+nfs_update_delegation_cred(struct nfs_delegation *delegation,
+ const struct cred *cred)
+{
+ const struct cred *old;
+
+ if (cred_fscmp(delegation->cred, cred) != 0) {
+ old = xchg(&delegation->cred, get_cred(cred));
+ put_cred(old);
+ }
}
static void
@@ -339,6 +406,14 @@
delegation->stateid.seqid = update->stateid.seqid;
smp_wmb();
delegation->type = update->type;
+ delegation->pagemod_limit = update->pagemod_limit;
+ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+ delegation->change_attr = update->change_attr;
+ nfs_update_delegation_cred(delegation, update->cred);
+ /* smp_mb__before_atomic() is implicit due to xchg() */
+ clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
+ atomic_long_inc(&nfs_active_delegations);
+ }
}
}
@@ -368,6 +443,7 @@
if (delegation == NULL)
return -ENOMEM;
nfs4_stateid_copy(&delegation->stateid, stateid);
+ refcount_set(&delegation->refcount, 1);
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
delegation->change_attr = inode_peek_iversion_raw(inode);
@@ -379,14 +455,18 @@
spin_lock(&clp->cl_lock);
old_delegation = rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
- if (old_delegation != NULL) {
- /* Is this an update of the existing delegation? */
- if (nfs4_stateid_match_other(&old_delegation->stateid,
- &delegation->stateid)) {
- nfs_update_inplace_delegation(old_delegation,
- delegation);
- goto out;
- }
+ if (old_delegation == NULL)
+ goto add_new;
+ /* Is this an update of the existing delegation? */
+ if (nfs4_stateid_match_other(&old_delegation->stateid,
+ &delegation->stateid)) {
+ spin_lock(&old_delegation->lock);
+ nfs_update_inplace_delegation(old_delegation,
+ delegation);
+ spin_unlock(&old_delegation->lock);
+ goto out;
+ }
+ if (!test_bit(NFS_DELEGATION_REVOKED, &old_delegation->flags)) {
/*
* Deal with broken servers that hand out two
* delegations for the same file.
@@ -405,15 +485,17 @@
if (test_and_set_bit(NFS_DELEGATION_RETURNING,
&old_delegation->flags))
goto out;
- freeme = nfs_detach_delegation_locked(nfsi,
- old_delegation, clp);
- if (freeme == NULL)
- goto out;
}
+ freeme = nfs_detach_delegation_locked(nfsi, old_delegation, clp);
+ if (freeme == NULL)
+ goto out;
+add_new:
list_add_tail_rcu(&delegation->super_list, &server->delegations);
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
+ atomic_long_inc(&nfs_active_delegations);
+
trace_nfs4_set_delegation(inode, type);
spin_lock(&inode->i_lock);
@@ -423,9 +505,11 @@
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
- nfs_free_delegation(delegation);
- if (freeme != NULL)
+ __nfs_free_delegation(delegation);
+ if (freeme != NULL) {
nfs_do_return_delegation(inode, freeme, 0);
+ nfs_free_delegation(freeme);
+ }
return status;
}
@@ -435,7 +519,6 @@
static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
- struct nfs_inode *nfsi = NFS_I(inode);
int err = 0;
if (delegation == NULL)
@@ -454,14 +537,14 @@
} while (err == 0);
if (err) {
- nfs_abort_delegation_return(delegation, clp);
+ nfs_abort_delegation_return(delegation, clp, err);
goto out;
}
- if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
- goto out;
err = nfs_do_return_delegation(inode, delegation, issync);
out:
+ /* Refcount matched in nfs_start_delegation_return_locked() */
+ nfs_put_delegation(delegation);
return err;
}
@@ -469,11 +552,9 @@
{
bool ret = false;
- if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
- goto out;
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
- if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
+ else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) {
struct inode *inode;
spin_lock(&delegation->lock);
@@ -482,6 +563,117 @@
ret = true;
spin_unlock(&delegation->lock);
}
+ if (ret)
+ clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+ if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
+ test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) ||
+ test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ ret = false;
+
+ return ret;
+}
+
+static int nfs_server_return_marked_delegations(struct nfs_server *server,
+ void __always_unused *data)
+{
+ struct nfs_delegation *delegation;
+ struct nfs_delegation *prev;
+ struct inode *inode;
+ struct inode *place_holder = NULL;
+ struct nfs_delegation *place_holder_deleg = NULL;
+ int err = 0;
+
+restart:
+ /*
+ * To avoid quadratic looping we hold a reference
+ * to an inode place_holder. Each time we restart, we
+ * list delegation in the server from the delegations
+ * of that inode.
+ * prev is an RCU-protected pointer to a delegation which
+ * wasn't marked for return and might be a good choice for
+ * the next place_holder.
+ */
+ prev = NULL;
+ delegation = NULL;
+ rcu_read_lock();
+ if (place_holder)
+ delegation = rcu_dereference(NFS_I(place_holder)->delegation);
+ if (!delegation || delegation != place_holder_deleg)
+ delegation = list_entry_rcu(server->delegations.next,
+ struct nfs_delegation, super_list);
+ list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) {
+ struct inode *to_put = NULL;
+
+ if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags))
+ continue;
+ if (!nfs_delegation_need_return(delegation)) {
+ if (nfs4_is_valid_delegation(delegation, 0))
+ prev = delegation;
+ continue;
+ }
+
+ if (prev) {
+ struct inode *tmp = nfs_delegation_grab_inode(prev);
+ if (tmp) {
+ to_put = place_holder;
+ place_holder = tmp;
+ place_holder_deleg = prev;
+ }
+ }
+
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL) {
+ rcu_read_unlock();
+ iput(to_put);
+ goto restart;
+ }
+ delegation = nfs_start_delegation_return_locked(NFS_I(inode));
+ rcu_read_unlock();
+
+ iput(to_put);
+
+ err = nfs_end_delegation_return(inode, delegation, 0);
+ iput(inode);
+ cond_resched();
+ if (!err)
+ goto restart;
+ set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
+ goto out;
+ }
+ rcu_read_unlock();
+out:
+ iput(place_holder);
+ return err;
+}
+
+static bool nfs_server_clear_delayed_delegations(struct nfs_server *server)
+{
+ struct nfs_delegation *d;
+ bool ret = false;
+
+ list_for_each_entry_rcu (d, &server->delegations, super_list) {
+ if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags))
+ continue;
+ nfs_mark_return_delegation(server, d);
+ clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags);
+ ret = true;
+ }
+ return ret;
+}
+
+static bool nfs_client_clear_delayed_delegations(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+ bool ret = false;
+
+ if (!test_and_clear_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state))
+ goto out;
+ rcu_read_lock();
+ list_for_each_entry_rcu (server, &clp->cl_superblocks, client_link) {
+ if (nfs_server_clear_delayed_delegations(server))
+ ret = true;
+ }
+ rcu_read_unlock();
out:
return ret;
}
@@ -498,106 +690,35 @@
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
{
- struct nfs_delegation *delegation;
- struct nfs_delegation *prev;
- struct nfs_server *server;
- struct inode *inode;
- struct inode *place_holder = NULL;
- struct nfs_delegation *place_holder_deleg = NULL;
- int err = 0;
-
-restart:
- /*
- * To avoid quadratic looping we hold a reference
- * to an inode place_holder. Each time we restart, we
- * list nfs_servers from the server of that inode, and
- * delegation in the server from the delegations of that
- * inode.
- * prev is an RCU-protected pointer to a delegation which
- * wasn't marked for return and might be a good choice for
- * the next place_holder.
- */
- rcu_read_lock();
- prev = NULL;
- if (place_holder)
- server = NFS_SERVER(place_holder);
- else
- server = list_entry_rcu(clp->cl_superblocks.next,
- struct nfs_server, client_link);
- list_for_each_entry_from_rcu(server, &clp->cl_superblocks, client_link) {
- delegation = NULL;
- if (place_holder && server == NFS_SERVER(place_holder))
- delegation = rcu_dereference(NFS_I(place_holder)->delegation);
- if (!delegation || delegation != place_holder_deleg)
- delegation = list_entry_rcu(server->delegations.next,
- struct nfs_delegation, super_list);
- list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) {
- struct inode *to_put = NULL;
-
- if (!nfs_delegation_need_return(delegation)) {
- prev = delegation;
- continue;
- }
- if (!nfs_sb_active(server->super))
- break; /* continue in outer loop */
-
- if (prev) {
- struct inode *tmp;
-
- tmp = nfs_delegation_grab_inode(prev);
- if (tmp) {
- to_put = place_holder;
- place_holder = tmp;
- place_holder_deleg = prev;
- }
- }
-
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL) {
- rcu_read_unlock();
- if (to_put)
- iput(to_put);
- nfs_sb_deactive(server->super);
- goto restart;
- }
- delegation = nfs_start_delegation_return_locked(NFS_I(inode));
- rcu_read_unlock();
-
- if (to_put)
- iput(to_put);
-
- err = nfs_end_delegation_return(inode, delegation, 0);
- iput(inode);
- nfs_sb_deactive(server->super);
- cond_resched();
- if (!err)
- goto restart;
- set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
- if (place_holder)
- iput(place_holder);
- return err;
- }
- }
- rcu_read_unlock();
- if (place_holder)
- iput(place_holder);
+ int err = nfs_client_for_each_server(
+ clp, nfs_server_return_marked_delegations, NULL);
+ if (err)
+ return err;
+ /* If a return was delayed, sleep to prevent hard looping */
+ if (nfs_client_clear_delayed_delegations(clp))
+ ssleep(1);
return 0;
}
/**
- * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
+ * nfs_inode_evict_delegation - return delegation, don't reclaim opens
* @inode: inode to process
*
* Does not protect against delegation reclaims, therefore really only safe
- * to be called from nfs4_clear_inode().
+ * to be called from nfs4_clear_inode(). Guaranteed to always free
+ * the delegation structure.
*/
-void nfs_inode_return_delegation_noreclaim(struct inode *inode)
+void nfs_inode_evict_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
delegation = nfs_inode_detach_delegation(inode);
- if (delegation != NULL)
+ if (delegation != NULL) {
+ set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+ set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
nfs_do_return_delegation(inode, delegation, 1);
+ nfs_free_delegation(delegation);
+ }
}
/**
@@ -624,6 +745,43 @@
}
/**
+ * nfs_inode_return_delegation_on_close - asynchronously return a delegation
+ * @inode: inode to process
+ *
+ * This routine is called on file close in order to determine if the
+ * inode delegation needs to be returned immediately.
+ */
+void nfs4_inode_return_delegation_on_close(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ struct nfs_delegation *ret = NULL;
+
+ if (!inode)
+ return;
+ rcu_read_lock();
+ delegation = nfs4_get_valid_delegation(inode);
+ if (!delegation)
+ goto out;
+ if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) ||
+ atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) {
+ spin_lock(&delegation->lock);
+ if (delegation->inode &&
+ list_empty(&NFS_I(inode)->open_files) &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+ /* Refcount matched in nfs_end_delegation_return() */
+ ret = nfs_get_delegation(delegation);
+ }
+ spin_unlock(&delegation->lock);
+ if (ret)
+ nfs_clear_verifier_delegated(inode);
+ }
+out:
+ rcu_read_unlock();
+ nfs_end_delegation_return(inode, ret, 0);
+}
+
+/**
* nfs4_inode_make_writeable
* @inode: pointer to inode
*
@@ -633,10 +791,18 @@
*/
int nfs4_inode_make_writeable(struct inode *inode)
{
- if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) ||
- !nfs4_check_delegation(inode, FMODE_WRITE))
- return nfs4_inode_return_delegation(inode);
- return 0;
+ struct nfs_delegation *delegation;
+
+ rcu_read_lock();
+ delegation = nfs4_get_valid_delegation(inode);
+ if (delegation == NULL ||
+ (nfs4_has_session(NFS_SERVER(inode)->nfs_client) &&
+ (delegation->type & FMODE_WRITE))) {
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+ return nfs4_inode_return_delegation(inode);
}
static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
@@ -646,13 +812,6 @@
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
-static void nfs_mark_return_delegation(struct nfs_server *server,
- struct nfs_delegation *delegation)
-{
- set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
-}
-
static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
@@ -739,15 +898,7 @@
rcu_read_unlock();
}
-static void nfs_mark_delegation_revoked(struct nfs_server *server,
- struct nfs_delegation *delegation)
-{
- set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
- delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
- nfs_mark_return_delegation(server, delegation);
-}
-
-static bool nfs_revoke_delegation(struct inode *inode,
+static void nfs_revoke_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_delegation *delegation;
@@ -761,30 +912,70 @@
if (stateid == NULL) {
nfs4_stateid_copy(&tmp, &delegation->stateid);
stateid = &tmp;
- } else if (!nfs4_stateid_match(stateid, &delegation->stateid))
- goto out;
- nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
+ } else {
+ if (!nfs4_stateid_match_other(stateid, &delegation->stateid))
+ goto out;
+ spin_lock(&delegation->lock);
+ if (stateid->seqid) {
+ if (nfs4_stateid_is_newer(&delegation->stateid, stateid)) {
+ spin_unlock(&delegation->lock);
+ goto out;
+ }
+ delegation->stateid.seqid = stateid->seqid;
+ }
+ spin_unlock(&delegation->lock);
+ }
+ nfs_mark_delegation_revoked(delegation);
ret = true;
out:
rcu_read_unlock();
if (ret)
nfs_inode_find_state_and_recover(inode, stateid);
- return ret;
}
void nfs_remove_bad_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
- struct nfs_delegation *delegation;
-
- if (!nfs_revoke_delegation(inode, stateid))
- return;
- delegation = nfs_inode_detach_delegation(inode);
- if (delegation)
- nfs_free_delegation(delegation);
+ nfs_revoke_delegation(inode, stateid);
}
EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
+void nfs_delegation_mark_returned(struct inode *inode,
+ const nfs4_stateid *stateid)
+{
+ struct nfs_delegation *delegation;
+
+ if (!inode)
+ return;
+
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (!delegation)
+ goto out_rcu_unlock;
+
+ spin_lock(&delegation->lock);
+ if (!nfs4_stateid_match_other(stateid, &delegation->stateid))
+ goto out_spin_unlock;
+ if (stateid->seqid) {
+ /* If delegation->stateid is newer, dont mark as returned */
+ if (nfs4_stateid_is_newer(&delegation->stateid, stateid))
+ goto out_clear_returning;
+ if (delegation->stateid.seqid != stateid->seqid)
+ delegation->stateid.seqid = stateid->seqid;
+ }
+
+ nfs_mark_delegation_revoked(delegation);
+
+out_clear_returning:
+ clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+out_spin_unlock:
+ spin_unlock(&delegation->lock);
+out_rcu_unlock:
+ rcu_read_unlock();
+
+ nfs_inode_find_state_and_recover(inode, stateid);
+}
+
/**
* nfs_expire_unused_delegation_types
* @clp: client to process
@@ -840,7 +1031,7 @@
struct nfs_delegation *delegation;
rcu_read_lock();
- delegation = rcu_dereference(NFS_I(inode)->delegation);
+ delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL)
goto out_enoent;
if (stateid != NULL &&
@@ -861,21 +1052,24 @@
const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
- struct inode *freeme, *res = NULL;
+ struct super_block *freeme = NULL;
+ struct inode *res = NULL;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
+ !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
- freeme = igrab(delegation->inode);
- if (freeme && nfs_sb_active(freeme->i_sb))
- res = freeme;
+ if (nfs_sb_active(server->super)) {
+ freeme = server->super;
+ res = igrab(delegation->inode);
+ }
spin_unlock(&delegation->lock);
if (res != NULL)
return res;
if (freeme) {
rcu_read_unlock();
- iput(freeme);
+ nfs_sb_deactive(freeme);
rcu_read_lock();
}
return ERR_PTR(-EAGAIN);
@@ -941,6 +1135,42 @@
rcu_read_unlock();
}
+static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server,
+ void __always_unused *data)
+{
+ struct nfs_delegation *delegation;
+ struct inode *inode;
+restart:
+ rcu_read_lock();
+restart_locked:
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
+ if (test_bit(NFS_DELEGATION_INODE_FREEING,
+ &delegation->flags) ||
+ test_bit(NFS_DELEGATION_RETURNING,
+ &delegation->flags) ||
+ test_bit(NFS_DELEGATION_NEED_RECLAIM,
+ &delegation->flags) == 0)
+ continue;
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ goto restart_locked;
+ delegation = nfs_start_delegation_return_locked(NFS_I(inode));
+ rcu_read_unlock();
+ if (delegation != NULL) {
+ if (nfs_detach_delegation(NFS_I(inode), delegation,
+ server) != NULL)
+ nfs_free_delegation(delegation);
+ /* Match nfs_start_delegation_return_locked */
+ nfs_put_delegation(delegation);
+ }
+ iput(inode);
+ cond_resched();
+ goto restart;
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
/**
* nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
* @clp: nfs_client to process
@@ -948,45 +1178,8 @@
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
- struct nfs_delegation *delegation;
- struct nfs_server *server;
- struct inode *inode;
-
-restart:
- rcu_read_lock();
- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- list_for_each_entry_rcu(delegation, &server->delegations,
- super_list) {
- if (test_bit(NFS_DELEGATION_INODE_FREEING,
- &delegation->flags) ||
- test_bit(NFS_DELEGATION_RETURNING,
- &delegation->flags) ||
- test_bit(NFS_DELEGATION_NEED_RECLAIM,
- &delegation->flags) == 0)
- continue;
- if (!nfs_sb_active(server->super))
- break; /* continue in outer loop */
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL) {
- rcu_read_unlock();
- nfs_sb_deactive(server->super);
- goto restart;
- }
- delegation = nfs_start_delegation_return_locked(NFS_I(inode));
- rcu_read_unlock();
- if (delegation != NULL) {
- delegation = nfs_detach_delegation(NFS_I(inode),
- delegation, server);
- if (delegation != NULL)
- nfs_free_delegation(delegation);
- }
- iput(inode);
- nfs_sb_deactive(server->super);
- cond_resched();
- goto restart;
- }
- }
- rcu_read_unlock();
+ nfs_client_for_each_server(clp, nfs_server_reap_unclaimed_delegations,
+ NULL);
}
static inline bool nfs4_server_rebooted(const struct nfs_client *clp)
@@ -1072,6 +1265,48 @@
nfs_remove_bad_delegation(inode, stateid);
}
+static int nfs_server_reap_expired_delegations(struct nfs_server *server,
+ void __always_unused *data)
+{
+ struct nfs_delegation *delegation;
+ struct inode *inode;
+ const struct cred *cred;
+ nfs4_stateid stateid;
+restart:
+ rcu_read_lock();
+restart_locked:
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
+ if (test_bit(NFS_DELEGATION_INODE_FREEING,
+ &delegation->flags) ||
+ test_bit(NFS_DELEGATION_RETURNING,
+ &delegation->flags) ||
+ test_bit(NFS_DELEGATION_TEST_EXPIRED,
+ &delegation->flags) == 0)
+ continue;
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ goto restart_locked;
+ spin_lock(&delegation->lock);
+ cred = get_cred_rcu(delegation->cred);
+ nfs4_stateid_copy(&stateid, &delegation->stateid);
+ spin_unlock(&delegation->lock);
+ clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
+ rcu_read_unlock();
+ nfs_delegation_test_free_expired(inode, &stateid, cred);
+ put_cred(cred);
+ if (!nfs4_server_rebooted(server->nfs_client)) {
+ iput(inode);
+ cond_resched();
+ goto restart;
+ }
+ nfs_inode_mark_test_expired_delegation(server,inode);
+ iput(inode);
+ return -EAGAIN;
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
/**
* nfs_reap_expired_delegations - reap expired delegations
* @clp: nfs_client to process
@@ -1083,51 +1318,8 @@
*/
void nfs_reap_expired_delegations(struct nfs_client *clp)
{
- struct nfs_delegation *delegation;
- struct nfs_server *server;
- struct inode *inode;
- const struct cred *cred;
- nfs4_stateid stateid;
-
-restart:
- rcu_read_lock();
- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- list_for_each_entry_rcu(delegation, &server->delegations,
- super_list) {
- if (test_bit(NFS_DELEGATION_INODE_FREEING,
- &delegation->flags) ||
- test_bit(NFS_DELEGATION_RETURNING,
- &delegation->flags) ||
- test_bit(NFS_DELEGATION_TEST_EXPIRED,
- &delegation->flags) == 0)
- continue;
- if (!nfs_sb_active(server->super))
- break; /* continue in outer loop */
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL) {
- rcu_read_unlock();
- nfs_sb_deactive(server->super);
- goto restart;
- }
- cred = get_cred_rcu(delegation->cred);
- nfs4_stateid_copy(&stateid, &delegation->stateid);
- clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
- rcu_read_unlock();
- nfs_delegation_test_free_expired(inode, &stateid, cred);
- put_cred(cred);
- if (nfs4_server_rebooted(clp)) {
- nfs_inode_mark_test_expired_delegation(server,inode);
- iput(inode);
- nfs_sb_deactive(server->super);
- return;
- }
- iput(inode);
- nfs_sb_deactive(server->super);
- cond_resched();
- goto restart;
- }
- }
- rcu_read_unlock();
+ nfs_client_for_each_server(clp, nfs_server_reap_expired_delegations,
+ NULL);
}
void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
@@ -1140,7 +1332,8 @@
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation &&
- nfs4_stateid_match_other(&delegation->stateid, stateid)) {
+ nfs4_stateid_match_or_older(&delegation->stateid, stateid) &&
+ !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
nfs_mark_test_expired_delegation(NFS_SERVER(inode), delegation);
found = true;
}
@@ -1189,7 +1382,9 @@
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL &&
- nfs4_stateid_match_other(dst, &delegation->stateid)) {
+ nfs4_stateid_match_other(dst, &delegation->stateid) &&
+ nfs4_stateid_is_newer(&delegation->stateid, dst) &&
+ !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
dst->seqid = delegation->stateid.seqid;
ret = true;
}
@@ -1213,11 +1408,14 @@
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
- bool ret;
+ bool ret = false;
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
+ if (!delegation)
+ goto out;
+ spin_lock(&delegation->lock);
ret = nfs4_is_valid_delegation(delegation, flags);
if (ret) {
nfs4_stateid_copy(dst, &delegation->stateid);
@@ -1225,6 +1423,8 @@
if (cred)
*cred = get_cred(delegation->cred);
}
+ spin_unlock(&delegation->lock);
+out:
rcu_read_unlock();
return ret;
}
@@ -1253,3 +1453,5 @@
rcu_read_unlock();
return ret;
}
+
+module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 8b14d44..26f57a9 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,6 +22,7 @@
unsigned long pagemod_limit;
__u64 change_attr;
unsigned long flags;
+ refcount_t refcount;
spinlock_t lock;
struct rcu_head rcu;
};
@@ -35,6 +36,7 @@
NFS_DELEGATION_REVOKED,
NFS_DELEGATION_TEST_EXPIRED,
NFS_DELEGATION_INODE_FREEING,
+ NFS_DELEGATION_RETURN_DELAYED,
};
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
@@ -42,8 +44,9 @@
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
int nfs4_inode_return_delegation(struct inode *inode);
+void nfs4_inode_return_delegation_on_close(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
-void nfs_inode_return_delegation_noreclaim(struct inode *inode);
+void nfs_inode_evict_delegation(struct inode *inode);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
void nfs_server_return_all_delegations(struct nfs_server *);
@@ -53,6 +56,7 @@
int nfs_client_return_marked_delegations(struct nfs_client *clp);
int nfs_delegations_present(struct nfs_client *clp);
void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid);
+void nfs_delegation_mark_returned(struct inode *inode, const nfs4_stateid *stateid);
void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7c0790..2ad56ff 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -58,7 +58,7 @@
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
.read = generic_read_dir,
- .iterate = nfs_readdir,
+ .iterate_shared = nfs_readdir,
.open = nfs_opendir,
.release = nfs_closedir,
.fsync = nfs_fsync_dir,
@@ -141,10 +141,9 @@
int size;
int eof_index;
u64 last_cookie;
- struct nfs_cache_array_entry array[0];
+ struct nfs_cache_array_entry array[];
};
-typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
typedef struct {
struct file *file;
struct page *page;
@@ -153,8 +152,9 @@
u64 *dir_cookie;
u64 last_cookie;
loff_t current_index;
- decode_dirent_t decode;
+ loff_t prev_index;
+ unsigned long dir_verifier;
unsigned long timestamp;
unsigned long gencount;
unsigned int cache_entry_index;
@@ -198,7 +198,7 @@
int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
{
string->len = len;
- string->name = kmemdup(name, len, GFP_KERNEL);
+ string->name = kmemdup_nul(name, len, GFP_KERNEL);
if (string->name == NULL)
return -ENOMEM;
/*
@@ -239,6 +239,25 @@
return ret;
}
+static inline
+int is_32bit_api(void)
+{
+#ifdef CONFIG_COMPAT
+ return in_compat_syscall();
+#else
+ return (BITS_PER_LONG == 32);
+#endif
+}
+
+static
+bool nfs_readdir_use_cookie(const struct file *filp)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return false;
+ return true;
+}
+
static
int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
{
@@ -288,7 +307,7 @@
!nfs_readdir_inode_mapping_valid(nfsi)) {
ctx->duped = 0;
ctx->attr_gencount = nfsi->attr_gencount;
- } else if (new_pos < desc->ctx->pos) {
+ } else if (new_pos < desc->prev_index) {
if (ctx->duped > 0
&& ctx->dup_cookie == *desc->dir_cookie) {
if (printk_ratelimit()) {
@@ -304,7 +323,11 @@
ctx->dup_cookie = *desc->dir_cookie;
ctx->duped = -1;
}
- desc->ctx->pos = new_pos;
+ if (nfs_readdir_use_cookie(desc->file))
+ desc->ctx->pos = *desc->dir_cookie;
+ else
+ desc->ctx->pos = new_pos;
+ desc->prev_index = new_pos;
desc->cache_entry_index = i;
return 0;
}
@@ -353,6 +376,7 @@
again:
timestamp = jiffies;
gencount = nfs_inc_attr_generation_counter();
+ desc->dir_verifier = nfs_save_change_attribute(inode);
error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
NFS_SERVER(inode)->dtsize, desc->plus);
if (error < 0) {
@@ -374,9 +398,10 @@
static int xdr_decode(nfs_readdir_descriptor_t *desc,
struct nfs_entry *entry, struct xdr_stream *xdr)
{
+ struct inode *inode = file_inode(desc->file);
int error;
- error = desc->decode(xdr, entry, desc->plus);
+ error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus);
if (error)
return error;
entry->fattr->time_start = desc->timestamp;
@@ -449,18 +474,19 @@
if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
!list_empty(&nfsi->open_files)) {
set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
- invalidate_mapping_pages(dir->i_mapping, 0, -1);
+ invalidate_mapping_pages(dir->i_mapping,
+ nfsi->page_index + 1, -1);
}
}
static
-void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
+void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
+ unsigned long dir_verifier)
{
struct qstr filename = QSTR_INIT(entry->name, entry->len);
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct dentry *dentry;
struct dentry *alias;
- struct inode *dir = d_inode(parent);
struct inode *inode;
int status;
@@ -499,7 +525,7 @@
if (nfs_same_file(dentry, entry)) {
if (!entry->fh->size)
goto out;
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, dir_verifier);
status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
if (!status)
nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label);
@@ -525,7 +551,7 @@
dput(dentry);
dentry = alias;
}
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, dir_verifier);
out:
dput(dentry);
}
@@ -566,7 +592,8 @@
count++;
if (desc->plus)
- nfs_prime_dcache(file_dentry(desc->file), entry);
+ nfs_prime_dcache(file_dentry(desc->file), entry,
+ desc->dir_verifier);
status = nfs_readdir_add_to_array(entry, page);
if (status != 0)
@@ -723,6 +750,8 @@
static
int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
{
+ struct inode *inode = file_inode(desc->file);
+ struct nfs_inode *nfsi = NFS_I(inode);
int res;
desc->page = get_cache_page(desc);
@@ -734,8 +763,10 @@
res = -EAGAIN;
if (desc->page->mapping != NULL) {
res = nfs_readdir_search_array(desc);
- if (res == 0)
+ if (res == 0) {
+ nfsi->page_index = desc->page_index;
return 0;
+ }
}
unlock_page(desc->page);
error:
@@ -751,6 +782,7 @@
if (desc->page_index == 0) {
desc->current_index = 0;
+ desc->prev_index = 0;
desc->last_cookie = 0;
}
do {
@@ -781,11 +813,14 @@
desc->eof = true;
break;
}
- desc->ctx->pos++;
if (i < (array->size-1))
*desc->dir_cookie = array->array[i+1].cookie;
else
*desc->dir_cookie = array->last_cookie;
+ if (nfs_readdir_use_cookie(file))
+ desc->ctx->pos = *desc->dir_cookie;
+ else
+ desc->ctx->pos++;
if (ctx->duped != 0)
ctx->duped = 1;
}
@@ -855,9 +890,14 @@
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
- nfs_readdir_descriptor_t my_desc,
- *desc = &my_desc;
struct nfs_open_dir_context *dir_ctx = file->private_data;
+ nfs_readdir_descriptor_t my_desc = {
+ .file = file,
+ .ctx = ctx,
+ .dir_cookie = &dir_ctx->dir_cookie,
+ .plus = nfs_use_readdirplus(inode, ctx),
+ },
+ *desc = &my_desc;
int res = 0;
dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
@@ -870,14 +910,6 @@
* to either find the entry with the appropriate number or
* revalidate the cookie.
*/
- memset(desc, 0, sizeof(*desc));
-
- desc->file = file;
- desc->ctx = ctx;
- desc->dir_cookie = &dir_ctx->dir_cookie;
- desc->decode = NFS_PROTO(inode)->decode_dirent;
- desc->plus = nfs_use_readdirplus(inode, ctx);
-
if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
res = nfs_revalidate_mapping(inode, file->f_mapping);
if (res < 0)
@@ -923,7 +955,6 @@
static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
{
- struct inode *inode = file_inode(filp);
struct nfs_open_dir_context *dir_ctx = filp->private_data;
dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
@@ -935,24 +966,27 @@
case SEEK_SET:
if (offset < 0)
return -EINVAL;
- inode_lock(inode);
+ spin_lock(&filp->f_lock);
break;
case SEEK_CUR:
if (offset == 0)
return filp->f_pos;
- inode_lock(inode);
+ spin_lock(&filp->f_lock);
offset += filp->f_pos;
if (offset < 0) {
- inode_unlock(inode);
+ spin_unlock(&filp->f_lock);
return -EINVAL;
}
}
if (offset != filp->f_pos) {
filp->f_pos = offset;
- dir_ctx->dir_cookie = 0;
+ if (nfs_readdir_use_cookie(filp))
+ dir_ctx->dir_cookie = offset;
+ else
+ dir_ctx->dir_cookie = 0;
dir_ctx->duped = 0;
}
- inode_unlock(inode);
+ spin_unlock(&filp->f_lock);
return offset;
}
@@ -963,13 +997,9 @@
static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
int datasync)
{
- struct inode *inode = file_inode(filp);
-
dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
- inode_lock(inode);
- nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- inode_unlock(inode);
+ nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC);
return 0;
}
@@ -981,14 +1011,112 @@
* full lookup on all child dentries of 'dir' whenever a change occurs
* on the server that might have invalidated our dcache.
*
+ * Note that we reserve bit '0' as a tag to let us know when a dentry
+ * was revalidated while holding a delegation on its inode.
+ *
* The caller should be holding dir->i_lock
*/
void nfs_force_lookup_revalidate(struct inode *dir)
{
- NFS_I(dir)->cache_change_attribute++;
+ NFS_I(dir)->cache_change_attribute += 2;
}
EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
+/**
+ * nfs_verify_change_attribute - Detects NFS remote directory changes
+ * @dir: pointer to parent directory inode
+ * @verf: previously saved change attribute
+ *
+ * Return "false" if the verifiers doesn't match the change attribute.
+ * This would usually indicate that the directory contents have changed on
+ * the server, and that any dentries need revalidating.
+ */
+static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf)
+{
+ return (verf & ~1UL) == nfs_save_change_attribute(dir);
+}
+
+static void nfs_set_verifier_delegated(unsigned long *verf)
+{
+ *verf |= 1UL;
+}
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+static void nfs_unset_verifier_delegated(unsigned long *verf)
+{
+ *verf &= ~1UL;
+}
+#endif /* IS_ENABLED(CONFIG_NFS_V4) */
+
+static bool nfs_test_verifier_delegated(unsigned long verf)
+{
+ return verf & 1;
+}
+
+static bool nfs_verifier_is_delegated(struct dentry *dentry)
+{
+ return nfs_test_verifier_delegated(dentry->d_time);
+}
+
+static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
+{
+ struct inode *inode = d_inode(dentry);
+ struct inode *dir = d_inode(dentry->d_parent);
+
+ if (!nfs_verify_change_attribute(dir, verf))
+ return;
+ if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ nfs_set_verifier_delegated(&verf);
+ dentry->d_time = verf;
+}
+
+/**
+ * nfs_set_verifier - save a parent directory verifier in the dentry
+ * @dentry: pointer to dentry
+ * @verf: verifier to save
+ *
+ * Saves the parent directory verifier in @dentry. If the inode has
+ * a delegation, we also tag the dentry as having been revalidated
+ * while holding a delegation so that we know we don't have to
+ * look it up again after a directory change.
+ */
+void nfs_set_verifier(struct dentry *dentry, unsigned long verf)
+{
+
+ spin_lock(&dentry->d_lock);
+ nfs_set_verifier_locked(dentry, verf);
+ spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_set_verifier);
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+/**
+ * nfs_clear_verifier_delegated - clear the dir verifier delegation tag
+ * @inode: pointer to inode
+ *
+ * Iterates through the dentries in the inode alias list and clears
+ * the tag used to indicate that the dentry has been revalidated
+ * while holding a delegation.
+ * This function is intended for use when the delegation is being
+ * returned or revoked.
+ */
+void nfs_clear_verifier_delegated(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (!inode)
+ return;
+ spin_lock(&inode->i_lock);
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+ spin_lock(&alias->d_lock);
+ nfs_unset_verifier_delegated(&alias->d_time);
+ spin_unlock(&alias->d_lock);
+ }
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
+#endif /* IS_ENABLED(CONFIG_NFS_V4) */
+
/*
* A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy
@@ -1050,7 +1178,7 @@
/* A NFSv4 OPEN will revalidate later */
if (server->caps & NFS_CAP_ATOMIC_OPEN)
goto out;
- /* Fallthrough */
+ fallthrough;
case S_IFDIR:
if (server->flags & NFS_MOUNT_NOCTO)
break;
@@ -1161,6 +1289,7 @@
struct nfs_fh *fhandle;
struct nfs_fattr *fattr;
struct nfs4_label *label;
+ unsigned long dir_verifier;
int ret;
ret = -ENOMEM;
@@ -1170,10 +1299,18 @@
if (fhandle == NULL || fattr == NULL || IS_ERR(label))
goto out;
- ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ dir_verifier = nfs_save_change_attribute(dir);
+ ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
if (ret < 0) {
- if (ret == -ESTALE || ret == -ENOENT)
+ switch (ret) {
+ case -ESTALE:
+ case -ENOENT:
ret = 0;
+ break;
+ case -ETIMEDOUT:
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+ ret = 1;
+ }
goto out;
}
ret = 0;
@@ -1183,7 +1320,7 @@
goto out;
nfs_setsecurity(inode, fattr, label);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, dir_verifier);
/* set a readdirplus hint that we had a cache miss */
nfs_force_use_readdirplus(dir);
@@ -1232,7 +1369,7 @@
goto out_bad;
}
- if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+ if (nfs_verifier_is_delegated(dentry))
return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* Force a full look up iff the parent directory has changed */
@@ -1417,6 +1554,7 @@
struct nfs_fh *fhandle = NULL;
struct nfs_fattr *fattr = NULL;
struct nfs4_label *label = NULL;
+ unsigned long dir_verifier;
int error;
dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
@@ -1442,8 +1580,9 @@
if (IS_ERR(label))
goto out;
+ dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
if (error == -ENOENT)
goto no_entry;
if (error < 0) {
@@ -1465,7 +1604,7 @@
goto out_label;
dentry = res;
}
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, dir_verifier);
out_label:
trace_nfs_lookup_exit(dir, dentry, flags, error);
nfs4_label_free(label);
@@ -1638,6 +1777,24 @@
no_open:
res = nfs_lookup(dir, dentry, lookup_flags);
+ if (!res) {
+ inode = d_inode(dentry);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
+ res = ERR_PTR(-ENOTDIR);
+ else if (inode && S_ISREG(inode->i_mode))
+ res = ERR_PTR(-EOPENSTALE);
+ } else if (!IS_ERR(res)) {
+ inode = d_inode(res);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
+ dput(res);
+ res = ERR_PTR(-ENOTDIR);
+ } else if (inode && S_ISREG(inode->i_mode)) {
+ dput(res);
+ res = ERR_PTR(-EOPENSTALE);
+ }
+ }
if (switched) {
d_lookup_done(dentry);
if (!res)
@@ -1670,7 +1827,7 @@
if (inode == NULL)
goto full_reval;
- if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+ if (nfs_verifier_is_delegated(dentry))
return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* NFS only supports OPEN on regular files */
@@ -1718,7 +1875,7 @@
d_drop(dentry);
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL);
if (error)
goto out_error;
}
@@ -2035,6 +2192,8 @@
trace_nfs_link_enter(inode, dir, dentry);
d_drop(dentry);
+ if (S_ISREG(inode->i_mode))
+ nfs_sync_inode(inode);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
ihold(inode);
@@ -2123,6 +2282,8 @@
}
}
+ if (S_ISREG(old_inode->i_mode))
+ nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
@@ -2177,7 +2338,7 @@
static LIST_HEAD(nfs_access_lru_list);
static atomic_long_t nfs_access_nr_entries;
-static unsigned long nfs_access_max_cachesize = ULONG_MAX;
+static unsigned long nfs_access_max_cachesize = 4*1024*1024;
module_param(nfs_access_max_cachesize, ulong, 0644);
MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
@@ -2328,7 +2489,7 @@
return NULL;
}
-static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
+static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
@@ -2346,11 +2507,11 @@
/* Found an entry, is our attribute cache valid? */
if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
break;
+ if (!retry)
+ break;
err = -ECHILD;
if (!may_block)
goto out;
- if (!retry)
- goto out_zap;
spin_unlock(&inode->i_lock);
err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (err)
@@ -2358,8 +2519,7 @@
spin_lock(&inode->i_lock);
retry = false;
}
- res->cred = cache->cred;
- res->mask = cache->mask;
+ *mask = cache->mask;
list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
err = 0;
out:
@@ -2371,7 +2531,7 @@
return -ENOENT;
}
-static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
{
/* Only check the most recently returned cache entry,
* but do it without locking.
@@ -2384,23 +2544,36 @@
rcu_read_lock();
if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
goto out;
- lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
+ lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
cache = list_entry(lh, struct nfs_access_entry, lru);
if (lh == &nfsi->access_cache_entry_lru ||
- cred != cache->cred)
+ cred_fscmp(cred, cache->cred) != 0)
cache = NULL;
if (cache == NULL)
goto out;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
goto out;
- res->cred = cache->cred;
- res->mask = cache->mask;
+ *mask = cache->mask;
err = 0;
out:
rcu_read_unlock();
return err;
}
+int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
+ u32 *mask, bool may_block)
+{
+ int status;
+
+ status = nfs_access_get_cached_rcu(inode, cred, mask);
+ if (status != 0)
+ status = nfs_access_get_cached_locked(inode, cred, mask,
+ may_block);
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(nfs_access_get_cached);
+
static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -2510,14 +2683,12 @@
{
struct nfs_access_entry cache;
bool may_block = (mask & MAY_NOT_BLOCK) == 0;
- int cache_mask;
+ int cache_mask = -1;
int status;
trace_nfs_access_enter(inode);
- status = nfs_access_get_cached_rcu(inode, cred, &cache);
- if (status != 0)
- status = nfs_access_get_cached(inode, cred, &cache, may_block);
+ status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
if (status == 0)
goto out_cached;
@@ -2529,6 +2700,10 @@
* Determine which access bits we want to ask for...
*/
cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
+ if (nfs_server_capable(inode, NFS_CAP_XATTR)) {
+ cache.mask |= NFS_ACCESS_XAREAD | NFS_ACCESS_XAWRITE |
+ NFS_ACCESS_XALIST;
+ }
if (S_ISDIR(inode->i_mode))
cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
else
@@ -2537,9 +2712,10 @@
status = NFS_PROTO(inode)->access(inode, &cache);
if (status != 0) {
if (status == -ESTALE) {
- nfs_zap_caches(inode);
if (!S_ISDIR(inode->i_mode))
- set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+ nfs_set_inode_stale(inode);
+ else
+ nfs_zap_caches(inode);
}
goto out;
}
@@ -2549,7 +2725,7 @@
if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
status = -EACCES;
out:
- trace_nfs_access_exit(inode, status);
+ trace_nfs_access_exit(inode, mask, cache_mask, status);
return status;
}
@@ -2627,14 +2803,7 @@
if (!NFS_PROTO(inode)->access)
goto out_notsup;
- /* Always try fast lookups first */
- rcu_read_lock();
- res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
- rcu_read_unlock();
- if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
- /* Fast lookup failed, try the slow way */
- res = nfs_do_access(inode, cred, mask);
- }
+ res = nfs_do_access(inode, cred, mask);
out:
if (!res && (mask & MAY_EXEC))
res = nfs_execute_ok(inode, mask);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6b0bf4e..3c0335c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -94,7 +94,7 @@
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
/* for read */
#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */
- struct nfs_writeverf verf; /* unstable write verifier */
+#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */
};
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
@@ -151,106 +151,6 @@
dreq->count = dreq_len;
}
-/*
- * nfs_direct_select_verf - select the right verifier
- * @dreq - direct request possibly spanning multiple servers
- * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
- * @commit_idx - commit bucket index for the DS
- *
- * returns the correct verifier to use given the role of the server
- */
-static struct nfs_writeverf *
-nfs_direct_select_verf(struct nfs_direct_req *dreq,
- struct nfs_client *ds_clp,
- int commit_idx)
-{
- struct nfs_writeverf *verfp = &dreq->verf;
-
-#ifdef CONFIG_NFS_V4_1
- /*
- * pNFS is in use, use the DS verf except commit_through_mds is set
- * for layout segment where nbuckets is zero.
- */
- if (ds_clp && dreq->ds_cinfo.nbuckets > 0) {
- if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets)
- verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf;
- else
- WARN_ON_ONCE(1);
- }
-#endif
- return verfp;
-}
-
-
-/*
- * nfs_direct_set_hdr_verf - set the write/commit verifier
- * @dreq - direct request possibly spanning multiple servers
- * @hdr - pageio header to validate against previously seen verfs
- *
- * Set the server's (MDS or DS) "seen" verifier
- */
-static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_writeverf *verfp;
-
- verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx);
- WARN_ON_ONCE(verfp->committed >= 0);
- memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
- WARN_ON_ONCE(verfp->committed < 0);
-}
-
-static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1,
- const struct nfs_writeverf *v2)
-{
- return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier);
-}
-
-/*
- * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
- * @dreq - direct request possibly spanning multiple servers
- * @hdr - pageio header to validate against previously seen verf
- *
- * set the server's "seen" verf if not initialized.
- * returns result of comparison between @hdr->verf and the "seen"
- * verf of the server used by @hdr (DS or MDS)
- */
-static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_writeverf *verfp;
-
- verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx);
- if (verfp->committed < 0) {
- nfs_direct_set_hdr_verf(dreq, hdr);
- return 0;
- }
- return nfs_direct_cmp_verf(verfp, &hdr->verf);
-}
-
-/*
- * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
- * @dreq - direct request possibly spanning multiple servers
- * @data - commit data to validate against previously seen verf
- *
- * returns result of comparison between @data->verf and the verf of
- * the server used by @data (DS or MDS)
- */
-static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
- struct nfs_commit_data *data)
-{
- struct nfs_writeverf *verfp;
-
- verfp = nfs_direct_select_verf(dreq, data->ds_clp,
- data->ds_commit_index);
-
- /* verifier not set so always fail */
- if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE)
- return 1;
-
- return nfs_direct_cmp_verf(verfp, data->res.verf);
-}
-
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* @iocb: target I/O control block
@@ -305,7 +205,7 @@
kref_get(&dreq->kref);
init_completion(&dreq->completion);
INIT_LIST_HEAD(&dreq->mds_cinfo.list);
- dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
+ pnfs_init_ds_commit_info(&dreq->ds_cinfo);
INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
spin_lock_init(&dreq->lock);
@@ -316,7 +216,7 @@
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
- nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
+ pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode);
if (dreq->l_ctx != NULL)
nfs_put_lock_context(dreq->l_ctx);
if (dreq->ctx != NULL)
@@ -546,7 +446,7 @@
struct inode *inode = mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
- ssize_t result = -EINVAL, requested;
+ ssize_t result, requested;
size_t count = iov_iter_count(iter);
nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
@@ -606,15 +506,30 @@
}
static void
+nfs_direct_join_group(struct list_head *list, struct inode *inode)
+{
+ struct nfs_page *req, *next;
+
+ list_for_each_entry(req, list, wb_list) {
+ if (req->wb_head != req || req->wb_this_page == req)
+ continue;
+ for (next = req->wb_this_page;
+ next != req->wb_head;
+ next = next->wb_this_page) {
+ nfs_list_remove_request(next);
+ nfs_release_request(next);
+ }
+ nfs_join_page_group(req, inode);
+ }
+}
+
+static void
nfs_direct_write_scan_commit_list(struct inode *inode,
struct list_head *list,
struct nfs_commit_info *cinfo)
{
mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
-#ifdef CONFIG_NFS_V4_1
- if (cinfo->ds != NULL && cinfo->ds->nwritten != 0)
- NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
-#endif
+ pnfs_recover_commit_reqs(list, cinfo);
nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0);
mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
}
@@ -630,11 +545,12 @@
nfs_init_cinfo_from_dreq(&cinfo, dreq);
nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
+ nfs_direct_join_group(&reqs, dreq->inode);
+
dreq->count = 0;
dreq->max_count = 0;
list_for_each_entry(req, &reqs, wb_list)
dreq->max_count += req->wb_bytes;
- dreq->verf.committed = NFS_INVALID_STABLE_HOW;
nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
get_dreq(dreq);
@@ -671,32 +587,40 @@
static void nfs_direct_commit_complete(struct nfs_commit_data *data)
{
+ const struct nfs_writeverf *verf = data->res.verf;
struct nfs_direct_req *dreq = data->dreq;
struct nfs_commit_info cinfo;
struct nfs_page *req;
int status = data->task.tk_status;
+ if (status < 0) {
+ /* Errors in commit are fatal */
+ dreq->error = status;
+ dreq->max_count = 0;
+ dreq->count = 0;
+ dreq->flags = NFS_ODIRECT_DONE;
+ } else if (dreq->flags == NFS_ODIRECT_DONE)
+ status = dreq->error;
+
nfs_init_cinfo_from_dreq(&cinfo, dreq);
- if (status < 0 || nfs_direct_cmp_commit_data_verf(dreq, data))
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
- if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+ if (status >= 0 && !nfs_write_match_verf(verf, req)) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
/*
* Despite the reboot, the write was successful,
* so reset wb_nio.
*/
req->wb_nio = 0;
- /* Note the rewrite will go through mds */
nfs_mark_request_commit(req, NULL, &cinfo, 0);
- } else
+ } else /* Error or match */
nfs_release_request(req);
nfs_unlock_and_release_request(req);
}
- if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+ if (nfs_commit_end(cinfo.mds))
nfs_direct_write_complete(dreq);
}
@@ -706,7 +630,8 @@
struct nfs_direct_req *dreq = cinfo->dreq;
spin_lock(&dreq->lock);
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ if (dreq->flags != NFS_ODIRECT_DONE)
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
spin_unlock(&dreq->lock);
nfs_mark_request_commit(req, NULL, cinfo, 0);
}
@@ -729,6 +654,23 @@
nfs_direct_write_reschedule(dreq);
}
+static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
+{
+ struct nfs_commit_info cinfo;
+ struct nfs_page *req;
+ LIST_HEAD(reqs);
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
+
+ while (!list_empty(&reqs)) {
+ req = nfs_list_entry(reqs.next);
+ nfs_list_remove_request(req);
+ nfs_release_request(req);
+ nfs_unlock_and_release_request(req);
+ }
+}
+
static void nfs_direct_write_schedule_work(struct work_struct *work)
{
struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
@@ -743,6 +685,7 @@
nfs_direct_write_reschedule(dreq);
break;
default:
+ nfs_direct_write_clear_reqs(dreq);
nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
nfs_direct_complete(dreq);
}
@@ -757,8 +700,8 @@
{
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
- bool request_commit = false;
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ int flags = NFS_ODIRECT_DONE;
nfs_init_cinfo_from_dreq(&cinfo, dreq);
@@ -769,21 +712,10 @@
}
nfs_direct_count_bytes(dreq, hdr);
- if (hdr->good_bytes != 0) {
- if (nfs_write_need_commit(hdr)) {
- if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
- request_commit = true;
- else if (dreq->flags == 0) {
- nfs_direct_set_hdr_verf(dreq, hdr);
- request_commit = true;
- dreq->flags = NFS_ODIRECT_DO_COMMIT;
- } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
- request_commit = true;
- if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
- dreq->flags =
- NFS_ODIRECT_RESCHED_WRITES;
- }
- }
+ if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) {
+ if (!dreq->flags)
+ dreq->flags = NFS_ODIRECT_DO_COMMIT;
+ flags = dreq->flags;
}
spin_unlock(&dreq->lock);
@@ -791,10 +723,15 @@
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
- if (request_commit) {
+ if (flags == NFS_ODIRECT_DO_COMMIT) {
kref_get(&req->wb_kref);
+ memcpy(&req->wb_verf, &hdr->verf.verifier,
+ sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo,
hdr->ds_commit_idx);
+ } else if (flags == NFS_ODIRECT_RESCHED_WRITES) {
+ kref_get(&req->wb_kref);
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
}
nfs_unlock_and_release_request(req);
}
@@ -825,7 +762,8 @@
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
/* fake unstable write to let common nfs resend pages */
hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = hdr->args.count;
+ hdr->good_bytes = hdr->args.offset + hdr->args.count -
+ hdr->io_start;
}
spin_unlock(&dreq->lock);
}
@@ -955,7 +893,7 @@
*/
ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
{
- ssize_t result = -EINVAL, requested;
+ ssize_t result, requested;
size_t count;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -996,6 +934,7 @@
dreq->l_ctx = l_ctx;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
+ pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
nfs_start_io_direct(inode);
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index aec769a..e87d500 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -39,7 +39,6 @@
#include <linux/string.h>
#include <linux/kmod.h>
#include <linux/slab.h>
-#include <linux/module.h>
#include <linux/socket.h>
#include <linux/seq_file.h>
#include <linux/inet.h>
@@ -93,7 +92,7 @@
key = container_of(ckey, struct nfs_dns_ent, h);
kfree(new->hostname);
- new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
+ new->hostname = kmemdup_nul(key->hostname, key->namelen, GFP_KERNEL);
if (new->hostname) {
new->namelen = key->namelen;
nfs_dns_ent_update(cnew, ckey);
@@ -152,12 +151,13 @@
struct cache_head *ch)
{
struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
- int ret;
- ret = nfs_cache_upcall(cd, key->hostname);
- if (ret)
- ret = sunrpc_cache_pipe_upcall(cd, ch);
- return ret;
+ if (test_and_set_bit(CACHE_PENDING, &ch->flags))
+ return 0;
+ if (!nfs_cache_upcall(cd, key->hostname))
+ return 0;
+ clear_bit(CACHE_PENDING, &ch->flags);
+ return sunrpc_cache_pipe_upcall_timeout(cd, ch);
}
static int nfs_dns_match(struct cache_head *ca,
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index deecb67..3430d68 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -105,6 +105,7 @@
ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL);
if (ret) {
dprintk("%s: getattr failed %d\n", __func__, ret);
+ trace_nfs_fh_to_dentry(sb, server_fh, fattr->fileid, ret);
dentry = ERR_PTR(ret);
goto out_free_label;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 387a2cf..63940a7 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -207,44 +207,39 @@
static int
nfs_file_fsync_commit(struct file *file, int datasync)
{
- struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
- int do_resend, status;
- int ret = 0;
+ int ret;
dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
- status = nfs_commit_inode(inode, FLUSH_SYNC);
- if (status == 0)
- status = file_check_and_advance_wb_err(file);
- if (status < 0) {
- ret = status;
- goto out;
- }
- do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
- if (do_resend)
- ret = -EAGAIN;
-out:
- return ret;
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (ret < 0)
+ return ret;
+ return file_check_and_advance_wb_err(file);
}
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- int ret;
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
+ int ret;
trace_nfs_fsync_enter(inode);
- do {
+ for (;;) {
ret = file_write_and_wait_range(file, start, end);
if (ret != 0)
break;
ret = nfs_file_fsync_commit(file, datasync);
- if (!ret)
- ret = pnfs_sync_inode(inode, !!datasync);
+ if (ret != 0)
+ break;
+ ret = pnfs_sync_inode(inode, !!datasync);
+ if (ret != 0)
+ break;
+ if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+ break;
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
@@ -252,7 +247,7 @@
*/
start = 0;
end = LLONG_MAX;
- } while (ret == -EAGAIN);
+ }
trace_nfs_fsync_exit(inode, ret);
return ret;
@@ -492,7 +487,19 @@
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
+ unsigned long blocks;
+ long long isize;
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
+ struct inode *inode = file->f_mapping->host;
+
+ spin_lock(&inode->i_lock);
+ blocks = inode->i_blocks;
+ isize = inode->i_size;
+ spin_unlock(&inode->i_lock);
+ if (blocks*512 < isize) {
+ pr_warn("swap activate: swapfile has holes\n");
+ return -EINVAL;
+ }
*span = sis->pages;
@@ -658,7 +665,7 @@
out_swapfile:
printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
- return -EBUSY;
+ return -ETXTBSY;
}
EXPORT_SYMBOL_GPL(nfs_file_write);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 98b74cd..ae5ed3a 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -49,6 +49,7 @@
MODULE_DESCRIPTION("The NFSv4 file layout driver");
#define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
+static const struct pnfs_commit_ops filelayout_commit_ops;
static loff_t
filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
@@ -186,7 +187,7 @@
pnfs_error_mark_layout_for_return(inode, lseg);
pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
- /* fall through */
+ fallthrough;
default:
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
@@ -750,72 +751,17 @@
/* This assumes a single RW lseg */
if (lseg->pls_range.iomode == IOMODE_RW) {
struct nfs4_filelayout *flo;
+ struct inode *inode;
flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
- flo->commit_info.nbuckets = 0;
- kfree(flo->commit_info.buckets);
- flo->commit_info.buckets = NULL;
+ inode = flo->generic_hdr.plh_inode;
+ spin_lock(&inode->i_lock);
+ pnfs_generic_ds_cinfo_release_lseg(&flo->commit_info, lseg);
+ spin_unlock(&inode->i_lock);
}
_filelayout_free_lseg(fl);
}
-static int
-filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
- struct nfs_commit_info *cinfo,
- gfp_t gfp_flags)
-{
- struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
- struct pnfs_commit_bucket *buckets;
- int size, i;
-
- if (fl->commit_through_mds)
- return 0;
-
- size = (fl->stripe_type == STRIPE_SPARSE) ?
- fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
- if (cinfo->ds->nbuckets >= size) {
- /* This assumes there is only one IOMODE_RW lseg. What
- * we really want to do is have a layout_hdr level
- * dictionary of <multipath_list4, fh> keys, each
- * associated with a struct list_head, populated by calls
- * to filelayout_write_pagelist().
- * */
- return 0;
- }
-
- buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
- gfp_flags);
- if (!buckets)
- return -ENOMEM;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&buckets[i].written);
- INIT_LIST_HEAD(&buckets[i].committing);
- /* mark direct verifier as unset */
- buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
- }
-
- spin_lock(&cinfo->inode->i_lock);
- if (cinfo->ds->nbuckets >= size)
- goto out;
- for (i = 0; i < cinfo->ds->nbuckets; i++) {
- list_splice(&cinfo->ds->buckets[i].written,
- &buckets[i].written);
- list_splice(&cinfo->ds->buckets[i].committing,
- &buckets[i].committing);
- buckets[i].direct_verf.committed =
- cinfo->ds->buckets[i].direct_verf.committed;
- buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
- buckets[i].clseg = cinfo->ds->buckets[i].clseg;
- }
- swap(cinfo->ds->buckets, buckets);
- cinfo->ds->nbuckets = size;
-out:
- spin_unlock(&cinfo->inode->i_lock);
- kfree(buckets);
- return 0;
-}
-
static struct pnfs_layout_segment *
filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
struct nfs4_layoutget_res *lgr,
@@ -938,9 +884,6 @@
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- struct nfs_commit_info cinfo;
- int status;
-
pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
@@ -959,17 +902,7 @@
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
- goto out_mds;
- nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
- status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
- if (status < 0) {
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
- goto out_mds;
- }
- return;
-out_mds:
- nfs_pageio_reset_write_mds(pgio);
+ nfs_pageio_reset_write_mds(pgio);
}
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -1078,36 +1011,6 @@
return -EAGAIN;
}
-/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
- * for @page
- * @cinfo - commit info for current inode
- * @page - page to search for matching head request
- *
- * Returns a the head request if one is found, otherwise returns NULL.
- */
-static struct nfs_page *
-filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
-{
- struct nfs_page *freq, *t;
- struct pnfs_commit_bucket *b;
- int i;
-
- /* Linearly search the commit lists for each bucket until a matching
- * request is found */
- for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
- list_for_each_entry_safe(freq, t, &b->written, wb_list) {
- if (freq->wb_page == page)
- return freq->wb_head;
- }
- list_for_each_entry_safe(freq, t, &b->committing, wb_list) {
- if (freq->wb_page == page)
- return freq->wb_head;
- }
- }
-
- return NULL;
-}
-
static int
filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
int how, struct nfs_commit_info *cinfo)
@@ -1140,13 +1043,17 @@
struct nfs4_filelayout *flo;
flo = kzalloc(sizeof(*flo), gfp_flags);
- return flo != NULL ? &flo->generic_hdr : NULL;
+ if (flo == NULL)
+ return NULL;
+ pnfs_init_ds_commit_info(&flo->commit_info);
+ flo->commit_info.ops = &filelayout_commit_ops;
+ return &flo->generic_hdr;
}
static void
filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
{
- kfree(FILELAYOUT_FROM_HDR(lo));
+ kfree_rcu(FILELAYOUT_FROM_HDR(lo), generic_hdr.plh_rcu);
}
static struct pnfs_ds_commit_info *
@@ -1160,6 +1067,46 @@
return &FILELAYOUT_FROM_HDR(layout)->commit_info;
}
+static void
+filelayout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
+ struct pnfs_layout_segment *lseg)
+{
+ struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+ struct inode *inode = lseg->pls_layout->plh_inode;
+ struct pnfs_commit_array *array, *new;
+ unsigned int size = (fl->stripe_type == STRIPE_SPARSE) ?
+ fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+ new = pnfs_alloc_commit_array(size, GFP_NOIO);
+ if (new) {
+ spin_lock(&inode->i_lock);
+ array = pnfs_add_commit_array(fl_cinfo, new, lseg);
+ spin_unlock(&inode->i_lock);
+ if (array != new)
+ pnfs_free_commit_array(new);
+ }
+}
+
+static void
+filelayout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
+ struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ pnfs_generic_ds_cinfo_destroy(fl_cinfo);
+ spin_unlock(&inode->i_lock);
+}
+
+static const struct pnfs_commit_ops filelayout_commit_ops = {
+ .setup_ds_info = filelayout_setup_ds_info,
+ .release_ds_info = filelayout_release_ds_info,
+ .mark_request_commit = filelayout_mark_request_commit,
+ .clear_request_commit = pnfs_generic_clear_request_commit,
+ .scan_commit_lists = pnfs_generic_scan_commit_lists,
+ .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
+ .search_commit_reqs = pnfs_generic_search_commit_reqs,
+ .commit_pagelist = filelayout_commit_pagelist,
+};
+
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
@@ -1173,12 +1120,6 @@
.pg_read_ops = &filelayout_pg_read_ops,
.pg_write_ops = &filelayout_pg_write_ops,
.get_ds_info = &filelayout_get_ds_info,
- .mark_request_commit = filelayout_mark_request_commit,
- .clear_request_commit = pnfs_generic_clear_request_commit,
- .scan_commit_lists = pnfs_generic_scan_commit_lists,
- .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
- .search_commit_reqs = filelayout_search_commit_reqs,
- .commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
.alloc_deviceid_node = filelayout_alloc_deviceid_node,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index fa1c920..a8a0208 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -32,6 +32,7 @@
static unsigned short io_maxretrans;
+static const struct pnfs_commit_ops ff_layout_commit_ops;
static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr);
static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
@@ -48,9 +49,11 @@
ffl = kzalloc(sizeof(*ffl), gfp_flags);
if (ffl) {
+ pnfs_init_ds_commit_info(&ffl->commit_info);
INIT_LIST_HEAD(&ffl->error_list);
INIT_LIST_HEAD(&ffl->mirrors);
ffl->last_report_time = ktime_get();
+ ffl->commit_info.ops = &ff_layout_commit_ops;
return &ffl->generic_hdr;
} else
return NULL;
@@ -59,14 +62,14 @@
static void
ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo)
{
+ struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(lo);
struct nfs4_ff_layout_ds_err *err, *n;
- list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list,
- list) {
+ list_for_each_entry_safe(err, n, &ffl->error_list, list) {
list_del(&err->list);
kfree(err);
}
- kfree(FF_LAYOUT_FROM_HDR(lo));
+ kfree_rcu(ffl, generic_hdr.plh_rcu);
}
static int decode_pnfs_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
@@ -248,36 +251,10 @@
static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls)
{
- int i;
+ u32 i;
- if (fls->mirror_array) {
- for (i = 0; i < fls->mirror_array_cnt; i++) {
- /* normally mirror_ds is freed in
- * .free_deviceid_node but we still do it here
- * for .alloc_lseg error path */
- ff_layout_put_mirror(fls->mirror_array[i]);
- }
- kfree(fls->mirror_array);
- fls->mirror_array = NULL;
- }
-}
-
-static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr)
-{
- int ret = 0;
-
- dprintk("--> %s\n", __func__);
-
- /* FIXME: remove this check when layout segment support is added */
- if (lgr->range.offset != 0 ||
- lgr->range.length != NFS4_MAX_UINT64) {
- dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
- __func__);
- ret = -EINVAL;
- }
-
- dprintk("--> %s returns %d\n", __func__, ret);
- return ret;
+ for (i = 0; i < fls->mirror_array_cnt; i++)
+ ff_layout_put_mirror(fls->mirror_array[i]);
}
static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls)
@@ -289,6 +266,23 @@
}
static bool
+ff_lseg_match_mirrors(struct pnfs_layout_segment *l1,
+ struct pnfs_layout_segment *l2)
+{
+ const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1);
+ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1);
+ u32 i;
+
+ if (fl1->mirror_array_cnt != fl2->mirror_array_cnt)
+ return false;
+ for (i = 0; i < fl1->mirror_array_cnt; i++) {
+ if (fl1->mirror_array[i] != fl2->mirror_array[i])
+ return false;
+ }
+ return true;
+}
+
+static bool
ff_lseg_range_is_after(const struct pnfs_layout_range *l1,
const struct pnfs_layout_range *l2)
{
@@ -323,6 +317,8 @@
new->pls_range.length);
if (new_end < old->pls_range.offset)
return false;
+ if (!ff_lseg_match_mirrors(new, old))
+ return false;
/* Mergeable: copy info from 'old' to 'new' */
if (new_end < old_end)
@@ -400,16 +396,13 @@
goto out_err_free;
rc = -ENOMEM;
- fls = kzalloc(sizeof(*fls), gfp_flags);
+ fls = kzalloc(struct_size(fls, mirror_array, mirror_array_cnt),
+ gfp_flags);
if (!fls)
goto out_err_free;
fls->mirror_array_cnt = mirror_array_cnt;
fls->stripe_unit = stripe_unit;
- fls->mirror_array = kcalloc(fls->mirror_array_cnt,
- sizeof(fls->mirror_array[0]), gfp_flags);
- if (fls->mirror_array == NULL)
- goto out_err_free;
for (i = 0; i < fls->mirror_array_cnt; i++) {
struct nfs4_ff_layout_mirror *mirror;
@@ -545,9 +538,6 @@
out_sort_mirrors:
ff_layout_sort_mirrors(fls);
- rc = ff_layout_check_layout(lgr);
- if (rc)
- goto out_err_free;
ret = &fls->generic_hdr;
dprintk("<-- %s (success)\n", __func__);
out_free_page:
@@ -560,17 +550,6 @@
goto out_free_page;
}
-static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
-{
- struct pnfs_layout_segment *lseg;
-
- list_for_each_entry(lseg, &layout->plh_segs, pls_list)
- if (lseg->pls_range.iomode == IOMODE_RW)
- return true;
-
- return false;
-}
-
static void
ff_layout_free_lseg(struct pnfs_layout_segment *lseg)
{
@@ -585,23 +564,12 @@
ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout);
inode = ffl->generic_hdr.plh_inode;
spin_lock(&inode->i_lock);
- if (!ff_layout_has_rw_segments(lseg->pls_layout)) {
- ffl->commit_info.nbuckets = 0;
- kfree(ffl->commit_info.buckets);
- ffl->commit_info.buckets = NULL;
- }
+ pnfs_generic_ds_cinfo_release_lseg(&ffl->commit_info, lseg);
spin_unlock(&inode->i_lock);
}
_ff_layout_free_lseg(fls);
}
-/* Return 1 until we have multiple lsegs support */
-static int
-ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls)
-{
- return 1;
-}
-
static void
nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now)
{
@@ -746,54 +714,8 @@
spin_unlock(&mirror->lock);
}
-static int
-ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
- struct nfs_commit_info *cinfo,
- gfp_t gfp_flags)
-{
- struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
- struct pnfs_commit_bucket *buckets;
- int size;
-
- if (cinfo->ds->nbuckets != 0) {
- /* This assumes there is only one RW lseg per file.
- * To support multiple lseg per file, we need to
- * change struct pnfs_commit_bucket to allow dynamic
- * increasing nbuckets.
- */
- return 0;
- }
-
- size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg);
-
- buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
- gfp_flags);
- if (!buckets)
- return -ENOMEM;
- else {
- int i;
-
- spin_lock(&cinfo->inode->i_lock);
- if (cinfo->ds->nbuckets != 0)
- kfree(buckets);
- else {
- cinfo->ds->buckets = buckets;
- cinfo->ds->nbuckets = size;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&buckets[i].written);
- INIT_LIST_HEAD(&buckets[i].committing);
- /* mark direct verifier as unset */
- buckets[i].direct_verf.committed =
- NFS_INVALID_STABLE_HOW;
- }
- }
- spin_unlock(&cinfo->inode->i_lock);
- return 0;
- }
-}
-
static void
-ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -802,7 +724,7 @@
}
static void
-ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -812,14 +734,14 @@
static struct nfs4_pnfs_ds *
ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx,
+ u32 start_idx, u32 *best_idx,
bool check_device)
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
bool fail_return = false;
- int idx;
+ u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
@@ -844,21 +766,21 @@
static struct nfs4_pnfs_ds *
ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
struct nfs4_pnfs_ds *ds;
@@ -868,6 +790,20 @@
return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
}
+static struct nfs4_pnfs_ds *
+ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
+ u32 *best_idx)
+{
+ struct pnfs_layout_segment *lseg = pgio->pg_lseg;
+ struct nfs4_pnfs_ds *ds;
+
+ ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
+ best_idx);
+ if (ds || !pgio->pg_mirror_idx)
+ return ds;
+ return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
+}
+
static void
ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req,
@@ -876,8 +812,8 @@
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_READ,
strict_iomode,
GFP_KERNEL);
@@ -888,16 +824,24 @@
}
static void
+ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
+{
+ pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_range(pgio, req);
+}
+
+static void
ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
struct nfs_pgio_mirror *pgm;
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
- int ds_idx;
+ u32 ds_idx;
retry:
- pnfs_generic_pg_check_layout(pgio);
+ ff_layout_pg_check_layout(pgio, req);
/* Use full layout for now */
if (!pgio->pg_lseg) {
ff_layout_pg_get_read(pgio, req, false);
@@ -910,25 +854,22 @@
goto out_nolseg;
}
- ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
+ ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
if (!ds) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
+ pnfs_generic_pg_cleanup(pgio);
/* Sleep for 1 second before retrying */
ssleep(1);
goto retry;
}
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
-
- pgio->pg_mirror_idx = ds_idx;
-
- /* read always uses only one mirror - idx 0 for pgio layer */
pgm = &pgio->pg_mirrors[0];
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
+ pgio->pg_mirror_idx = ds_idx;
+
if (NFS_SERVER(pgio->pg_inode)->flags &
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
@@ -941,8 +882,6 @@
0, NFS4_MAX_UINT64, IOMODE_READ,
NFS_I(pgio->pg_inode)->layout,
pgio->pg_lseg);
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
pgio->pg_maxretrans = 0;
nfs_pageio_reset_read_mds(pgio);
}
@@ -953,18 +892,16 @@
{
struct nfs4_ff_layout_mirror *mirror;
struct nfs_pgio_mirror *pgm;
- struct nfs_commit_info cinfo;
struct nfs4_pnfs_ds *ds;
- int i;
- int status;
+ u32 i;
retry:
- pnfs_generic_pg_check_layout(pgio);
+ ff_layout_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_RW,
false,
GFP_NOFS);
@@ -978,11 +915,6 @@
if (pgio->pg_lseg == NULL)
goto out_mds;
- nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
- status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
- if (status < 0)
- goto out_mds;
-
/* Use a direct mapping of ds_idx to pgio mirror_idx */
if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
goto out_eagain;
@@ -993,8 +925,7 @@
if (!ds) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
+ pnfs_generic_pg_cleanup(pgio);
/* Sleep for 1 second before retrying */
ssleep(1);
goto retry;
@@ -1016,8 +947,6 @@
0, NFS4_MAX_UINT64, IOMODE_RW,
NFS_I(pgio->pg_inode)->layout,
pgio->pg_lseg);
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
pgio->pg_maxretrans = 0;
nfs_pageio_reset_write_mds(pgio);
pgio->pg_error = -EAGAIN;
@@ -1030,8 +959,8 @@
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_RW,
false,
GFP_NOFS);
@@ -1054,6 +983,21 @@
return 1;
}
+static u32
+ff_layout_pg_set_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+ u32 old = desc->pg_mirror_idx;
+
+ desc->pg_mirror_idx = idx;
+ return old;
+}
+
+static struct nfs_pgio_mirror *
+ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+ return &desc->pg_mirrors[idx];
+}
+
static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
.pg_init = ff_layout_pg_init_read,
.pg_test = pnfs_generic_pg_test,
@@ -1067,6 +1011,8 @@
.pg_doio = pnfs_generic_pg_writepages,
.pg_get_mirror_count = ff_layout_pg_get_mirror_count_write,
.pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_get_mirror = ff_layout_pg_get_mirror_write,
+ .pg_set_mirror = ff_layout_pg_set_mirror_write,
};
static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
@@ -1105,11 +1051,24 @@
}
}
+static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
+{
+ u32 idx = hdr->pgio_mirror_idx + 1;
+ u32 new_idx = 0;
+
+ if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
+ ff_layout_send_layouterror(hdr->lseg);
+ else
+ pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
+ pnfs_read_resend_pnfs(hdr, new_idx);
+}
+
static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
{
struct rpc_task *task = &hdr->task;
pnfs_layoutcommit_inode(hdr->inode, false);
+ pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
@@ -1132,7 +1091,7 @@
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
@@ -1190,7 +1149,7 @@
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
- /* fall through */
+ fallthrough;
default:
if (ff_layout_avoid_mds_available_ds(lseg))
return -NFS4ERR_RESET_TO_PNFS;
@@ -1206,7 +1165,7 @@
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -1241,7 +1200,7 @@
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
int vers = clp->cl_nfs_mod->rpc_vers->number;
@@ -1268,10 +1227,11 @@
}
static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
- int idx, u64 offset, u64 length,
- u32 status, int opnum, int error)
+ u32 idx, u64 offset, u64 length,
+ u32 *op_status, int opnum, int error)
{
struct nfs4_ff_layout_mirror *mirror;
+ u32 status = *op_status;
int err;
if (status == 0) {
@@ -1289,31 +1249,39 @@
case -ENOBUFS:
case -EPIPE:
case -EPERM:
- status = NFS4ERR_NXIO;
+ *op_status = status = NFS4ERR_NXIO;
break;
case -EACCES:
- status = NFS4ERR_ACCESS;
+ *op_status = status = NFS4ERR_ACCESS;
break;
default:
return;
}
}
- switch (status) {
- case NFS4ERR_DELAY:
- case NFS4ERR_GRACE:
- return;
- default:
- break;
- }
-
mirror = FF_LAYOUT_COMP(lseg, idx);
err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, offset, length, status, opnum,
GFP_NOIO);
- if (status == NFS4ERR_NXIO)
+
+ switch (status) {
+ case NFS4ERR_DELAY:
+ case NFS4ERR_GRACE:
+ break;
+ case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
- pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
+ /*
+ * Don't return the layout if this is a read and we still
+ * have layouts to try
+ */
+ if (opnum == OP_READ)
+ break;
+ fallthrough;
+ default:
+ pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
+ lseg);
+ }
+
dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status);
}
@@ -1321,27 +1289,25 @@
static int ff_layout_read_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
- int new_idx = hdr->pgio_mirror_idx;
int err;
- trace_nfs4_pnfs_read(hdr, task->tk_status);
- if (task->tk_status < 0)
+ if (task->tk_status < 0) {
ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
hdr->args.offset, hdr->args.count,
- hdr->res.op_status, OP_READ,
+ &hdr->res.op_status, OP_READ,
task->tk_status);
+ trace_ff_layout_read_error(hdr);
+ }
+
err = ff_layout_async_handle_error(task, hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
+ trace_nfs4_pnfs_read(hdr, err);
clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
- if (ff_layout_choose_best_ds_for_read(hdr->lseg,
- hdr->pgio_mirror_idx + 1,
- &new_idx))
- goto out_layouterror;
set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
return task->tk_status;
case -NFS4ERR_RESET_TO_MDS:
@@ -1352,10 +1318,6 @@
}
return 0;
-out_layouterror:
- ff_layout_read_record_layoutstats_done(task, hdr);
- ff_layout_send_layouterror(hdr->lseg);
- hdr->pgio_mirror_idx = new_idx;
out_eagain:
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -1482,10 +1444,9 @@
struct nfs_pgio_header *hdr = data;
ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
- if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
- ff_layout_send_layouterror(hdr->lseg);
- pnfs_read_resend_pnfs(hdr);
- } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+ if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+ ff_layout_resend_pnfs_read(hdr);
+ else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
ff_layout_reset_read(hdr);
pnfs_generic_rw_release(data);
}
@@ -1497,16 +1458,19 @@
loff_t end_offs = 0;
int err;
- trace_nfs4_pnfs_write(hdr, task->tk_status);
- if (task->tk_status < 0)
+ if (task->tk_status < 0) {
ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
hdr->args.offset, hdr->args.count,
- hdr->res.op_status, OP_WRITE,
+ &hdr->res.op_status, OP_WRITE,
task->tk_status);
+ trace_ff_layout_write_error(hdr);
+ }
+
err = ff_layout_async_handle_error(task, hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
+ trace_nfs4_pnfs_write(hdr, err);
clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
switch (err) {
@@ -1540,15 +1504,18 @@
{
int err;
- trace_nfs4_pnfs_commit_ds(data, task->tk_status);
- if (task->tk_status < 0)
+ if (task->tk_status < 0) {
ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index,
data->args.offset, data->args.count,
- data->res.op_status, OP_COMMIT,
+ &data->res.op_status, OP_COMMIT,
task->tk_status);
+ trace_ff_layout_commit_error(data);
+ }
+
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
data->lseg, data->ds_commit_index);
+ trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
pnfs_generic_prepare_to_resend_writes(data);
@@ -1858,7 +1825,7 @@
loff_t offset = hdr->args.offset;
int vers;
struct nfs_fh *fh;
- int idx = hdr->pgio_mirror_idx;
+ u32 idx = hdr->pgio_mirror_idx;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
@@ -2005,6 +1972,33 @@
}
static void
+ff_layout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
+ struct pnfs_layout_segment *lseg)
+{
+ struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
+ struct inode *inode = lseg->pls_layout->plh_inode;
+ struct pnfs_commit_array *array, *new;
+
+ new = pnfs_alloc_commit_array(flseg->mirror_array_cnt, GFP_NOIO);
+ if (new) {
+ spin_lock(&inode->i_lock);
+ array = pnfs_add_commit_array(fl_cinfo, new, lseg);
+ spin_unlock(&inode->i_lock);
+ if (array != new)
+ pnfs_free_commit_array(new);
+ }
+}
+
+static void
+ff_layout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
+ struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ pnfs_generic_ds_cinfo_destroy(fl_cinfo);
+ spin_unlock(&inode->i_lock);
+}
+
+static void
ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d)
{
nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds,
@@ -2489,6 +2483,16 @@
return 0;
}
+static const struct pnfs_commit_ops ff_layout_commit_ops = {
+ .setup_ds_info = ff_layout_setup_ds_info,
+ .release_ds_info = ff_layout_release_ds_info,
+ .mark_request_commit = pnfs_layout_mark_request_commit,
+ .clear_request_commit = pnfs_generic_clear_request_commit,
+ .scan_commit_lists = pnfs_generic_scan_commit_lists,
+ .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
+ .commit_pagelist = ff_layout_commit_pagelist,
+};
+
static struct pnfs_layoutdriver_type flexfilelayout_type = {
.id = LAYOUT_FLEX_FILES,
.name = "LAYOUT_FLEX_FILES",
@@ -2505,11 +2509,6 @@
.pg_write_ops = &ff_layout_pg_write_ops,
.get_ds_info = ff_layout_get_ds_info,
.free_deviceid_node = ff_layout_free_deviceid_node,
- .mark_request_commit = pnfs_layout_mark_request_commit,
- .clear_request_commit = pnfs_generic_clear_request_commit,
- .scan_commit_lists = pnfs_generic_scan_commit_lists,
- .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
- .commit_pagelist = ff_layout_commit_pagelist,
.read_pagelist = ff_layout_read_pagelist,
.write_pagelist = ff_layout_write_pagelist,
.alloc_deviceid_node = ff_layout_alloc_deviceid_node,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 2f36996..354a031 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -99,7 +99,7 @@
u64 stripe_unit;
u32 flags;
u32 mirror_array_cnt;
- struct nfs4_ff_layout_mirror **mirror_array;
+ struct nfs4_ff_layout_mirror *mirror_array[];
};
struct nfs4_flexfile_layout {
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 3eda40a..1f12297 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -378,10 +378,10 @@
goto noconnect;
ds = mirror->mirror_ds->ds;
+ if (READ_ONCE(ds->ds_clp))
+ goto out;
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
smp_rmb();
- if (ds->ds_clp)
- goto out;
/* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS.
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
new file mode 100644
index 0000000..05b39e8
--- /dev/null
+++ b/fs/nfs/fs_context.c
@@ -0,0 +1,1520 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/fs/nfs/fs_context.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ * Conversion to new mount api Copyright (C) David Howells
+ *
+ * NFS mount handling.
+ *
+ * Split from fs/nfs/super.c by David Howells <dhowells@redhat.com>
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include "nfs.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY NFSDBG_MOUNT
+
+#if IS_ENABLED(CONFIG_NFS_V3)
+#define NFS_DEFAULT_VERSION 3
+#else
+#define NFS_DEFAULT_VERSION 2
+#endif
+
+#define NFS_MAX_CONNECTIONS 16
+
+enum nfs_param {
+ Opt_ac,
+ Opt_acdirmax,
+ Opt_acdirmin,
+ Opt_acl,
+ Opt_acregmax,
+ Opt_acregmin,
+ Opt_actimeo,
+ Opt_addr,
+ Opt_bg,
+ Opt_bsize,
+ Opt_clientaddr,
+ Opt_cto,
+ Opt_fg,
+ Opt_fscache,
+ Opt_fscache_flag,
+ Opt_hard,
+ Opt_intr,
+ Opt_local_lock,
+ Opt_lock,
+ Opt_lookupcache,
+ Opt_migration,
+ Opt_minorversion,
+ Opt_mountaddr,
+ Opt_mounthost,
+ Opt_mountport,
+ Opt_mountproto,
+ Opt_mountvers,
+ Opt_namelen,
+ Opt_nconnect,
+ Opt_port,
+ Opt_posix,
+ Opt_proto,
+ Opt_rdirplus,
+ Opt_rdma,
+ Opt_resvport,
+ Opt_retrans,
+ Opt_retry,
+ Opt_rsize,
+ Opt_sec,
+ Opt_sharecache,
+ Opt_sloppy,
+ Opt_soft,
+ Opt_softerr,
+ Opt_softreval,
+ Opt_source,
+ Opt_tcp,
+ Opt_timeo,
+ Opt_udp,
+ Opt_v,
+ Opt_vers,
+ Opt_wsize,
+};
+
+enum {
+ Opt_local_lock_all,
+ Opt_local_lock_flock,
+ Opt_local_lock_none,
+ Opt_local_lock_posix,
+};
+
+static const struct constant_table nfs_param_enums_local_lock[] = {
+ { "all", Opt_local_lock_all },
+ { "flock", Opt_local_lock_flock },
+ { "posix", Opt_local_lock_posix },
+ { "none", Opt_local_lock_none },
+ {}
+};
+
+enum {
+ Opt_lookupcache_all,
+ Opt_lookupcache_none,
+ Opt_lookupcache_positive,
+};
+
+static const struct constant_table nfs_param_enums_lookupcache[] = {
+ { "all", Opt_lookupcache_all },
+ { "none", Opt_lookupcache_none },
+ { "pos", Opt_lookupcache_positive },
+ { "positive", Opt_lookupcache_positive },
+ {}
+};
+
+static const struct fs_parameter_spec nfs_fs_parameters[] = {
+ fsparam_flag_no("ac", Opt_ac),
+ fsparam_u32 ("acdirmax", Opt_acdirmax),
+ fsparam_u32 ("acdirmin", Opt_acdirmin),
+ fsparam_flag_no("acl", Opt_acl),
+ fsparam_u32 ("acregmax", Opt_acregmax),
+ fsparam_u32 ("acregmin", Opt_acregmin),
+ fsparam_u32 ("actimeo", Opt_actimeo),
+ fsparam_string("addr", Opt_addr),
+ fsparam_flag ("bg", Opt_bg),
+ fsparam_u32 ("bsize", Opt_bsize),
+ fsparam_string("clientaddr", Opt_clientaddr),
+ fsparam_flag_no("cto", Opt_cto),
+ fsparam_flag ("fg", Opt_fg),
+ fsparam_flag_no("fsc", Opt_fscache_flag),
+ fsparam_string("fsc", Opt_fscache),
+ fsparam_flag ("hard", Opt_hard),
+ __fsparam(NULL, "intr", Opt_intr,
+ fs_param_neg_with_no|fs_param_deprecated, NULL),
+ fsparam_enum ("local_lock", Opt_local_lock, nfs_param_enums_local_lock),
+ fsparam_flag_no("lock", Opt_lock),
+ fsparam_enum ("lookupcache", Opt_lookupcache, nfs_param_enums_lookupcache),
+ fsparam_flag_no("migration", Opt_migration),
+ fsparam_u32 ("minorversion", Opt_minorversion),
+ fsparam_string("mountaddr", Opt_mountaddr),
+ fsparam_string("mounthost", Opt_mounthost),
+ fsparam_u32 ("mountport", Opt_mountport),
+ fsparam_string("mountproto", Opt_mountproto),
+ fsparam_u32 ("mountvers", Opt_mountvers),
+ fsparam_u32 ("namlen", Opt_namelen),
+ fsparam_u32 ("nconnect", Opt_nconnect),
+ fsparam_string("nfsvers", Opt_vers),
+ fsparam_u32 ("port", Opt_port),
+ fsparam_flag_no("posix", Opt_posix),
+ fsparam_string("proto", Opt_proto),
+ fsparam_flag_no("rdirplus", Opt_rdirplus),
+ fsparam_flag ("rdma", Opt_rdma),
+ fsparam_flag_no("resvport", Opt_resvport),
+ fsparam_u32 ("retrans", Opt_retrans),
+ fsparam_string("retry", Opt_retry),
+ fsparam_u32 ("rsize", Opt_rsize),
+ fsparam_string("sec", Opt_sec),
+ fsparam_flag_no("sharecache", Opt_sharecache),
+ fsparam_flag ("sloppy", Opt_sloppy),
+ fsparam_flag ("soft", Opt_soft),
+ fsparam_flag ("softerr", Opt_softerr),
+ fsparam_flag ("softreval", Opt_softreval),
+ fsparam_string("source", Opt_source),
+ fsparam_flag ("tcp", Opt_tcp),
+ fsparam_u32 ("timeo", Opt_timeo),
+ fsparam_flag ("udp", Opt_udp),
+ fsparam_flag ("v2", Opt_v),
+ fsparam_flag ("v3", Opt_v),
+ fsparam_flag ("v4", Opt_v),
+ fsparam_flag ("v4.0", Opt_v),
+ fsparam_flag ("v4.1", Opt_v),
+ fsparam_flag ("v4.2", Opt_v),
+ fsparam_string("vers", Opt_vers),
+ fsparam_u32 ("wsize", Opt_wsize),
+ {}
+};
+
+enum {
+ Opt_vers_2,
+ Opt_vers_3,
+ Opt_vers_4,
+ Opt_vers_4_0,
+ Opt_vers_4_1,
+ Opt_vers_4_2,
+};
+
+static const struct constant_table nfs_vers_tokens[] = {
+ { "2", Opt_vers_2 },
+ { "3", Opt_vers_3 },
+ { "4", Opt_vers_4 },
+ { "4.0", Opt_vers_4_0 },
+ { "4.1", Opt_vers_4_1 },
+ { "4.2", Opt_vers_4_2 },
+ {}
+};
+
+enum {
+ Opt_xprt_rdma,
+ Opt_xprt_rdma6,
+ Opt_xprt_tcp,
+ Opt_xprt_tcp6,
+ Opt_xprt_udp,
+ Opt_xprt_udp6,
+ nr__Opt_xprt
+};
+
+static const struct constant_table nfs_xprt_protocol_tokens[] = {
+ { "rdma", Opt_xprt_rdma },
+ { "rdma6", Opt_xprt_rdma6 },
+ { "tcp", Opt_xprt_tcp },
+ { "tcp6", Opt_xprt_tcp6 },
+ { "udp", Opt_xprt_udp },
+ { "udp6", Opt_xprt_udp6 },
+ {}
+};
+
+enum {
+ Opt_sec_krb5,
+ Opt_sec_krb5i,
+ Opt_sec_krb5p,
+ Opt_sec_lkey,
+ Opt_sec_lkeyi,
+ Opt_sec_lkeyp,
+ Opt_sec_none,
+ Opt_sec_spkm,
+ Opt_sec_spkmi,
+ Opt_sec_spkmp,
+ Opt_sec_sys,
+ nr__Opt_sec
+};
+
+static const struct constant_table nfs_secflavor_tokens[] = {
+ { "krb5", Opt_sec_krb5 },
+ { "krb5i", Opt_sec_krb5i },
+ { "krb5p", Opt_sec_krb5p },
+ { "lkey", Opt_sec_lkey },
+ { "lkeyi", Opt_sec_lkeyi },
+ { "lkeyp", Opt_sec_lkeyp },
+ { "none", Opt_sec_none },
+ { "null", Opt_sec_none },
+ { "spkm3", Opt_sec_spkm },
+ { "spkm3i", Opt_sec_spkmi },
+ { "spkm3p", Opt_sec_spkmp },
+ { "sys", Opt_sec_sys },
+ {}
+};
+
+/*
+ * Sanity-check a server address provided by the mount command.
+ *
+ * Address family must be initialized, and address must not be
+ * the ANY address for that family.
+ */
+static int nfs_verify_server_address(struct sockaddr *addr)
+{
+ switch (addr->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *sa = (struct sockaddr_in *)addr;
+ return sa->sin_addr.s_addr != htonl(INADDR_ANY);
+ }
+ case AF_INET6: {
+ struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ return !ipv6_addr_any(sa);
+ }
+ }
+
+ dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
+ return 0;
+}
+
+/*
+ * Sanity check the NFS transport protocol.
+ *
+ */
+static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
+{
+ switch (ctx->nfs_server.protocol) {
+ case XPRT_TRANSPORT_UDP:
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_RDMA:
+ break;
+ default:
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ }
+}
+
+/*
+ * For text based NFSv2/v3 mounts, the mount protocol transport default
+ * settings should depend upon the specified NFS transport.
+ */
+static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx)
+{
+ nfs_validate_transport_protocol(ctx);
+
+ if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP ||
+ ctx->mount_server.protocol == XPRT_TRANSPORT_TCP)
+ return;
+ switch (ctx->nfs_server.protocol) {
+ case XPRT_TRANSPORT_UDP:
+ ctx->mount_server.protocol = XPRT_TRANSPORT_UDP;
+ break;
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_RDMA:
+ ctx->mount_server.protocol = XPRT_TRANSPORT_TCP;
+ }
+}
+
+/*
+ * Add 'flavor' to 'auth_info' if not already present.
+ * Returns true if 'flavor' ends up in the list, false otherwise
+ */
+static int nfs_auth_info_add(struct fs_context *fc,
+ struct nfs_auth_info *auth_info,
+ rpc_authflavor_t flavor)
+{
+ unsigned int i;
+ unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
+
+ /* make sure this flavor isn't already in the list */
+ for (i = 0; i < auth_info->flavor_len; i++) {
+ if (flavor == auth_info->flavors[i])
+ return 0;
+ }
+
+ if (auth_info->flavor_len + 1 >= max_flavor_len)
+ return nfs_invalf(fc, "NFS: too many sec= flavors");
+
+ auth_info->flavors[auth_info->flavor_len++] = flavor;
+ return 0;
+}
+
+/*
+ * Parse the value of the 'sec=' option.
+ */
+static int nfs_parse_security_flavors(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ rpc_authflavor_t pseudoflavor;
+ char *string = param->string, *p;
+ int ret;
+
+ dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string);
+
+ while ((p = strsep(&string, ":")) != NULL) {
+ if (!*p)
+ continue;
+ switch (lookup_constant(nfs_secflavor_tokens, p, -1)) {
+ case Opt_sec_none:
+ pseudoflavor = RPC_AUTH_NULL;
+ break;
+ case Opt_sec_sys:
+ pseudoflavor = RPC_AUTH_UNIX;
+ break;
+ case Opt_sec_krb5:
+ pseudoflavor = RPC_AUTH_GSS_KRB5;
+ break;
+ case Opt_sec_krb5i:
+ pseudoflavor = RPC_AUTH_GSS_KRB5I;
+ break;
+ case Opt_sec_krb5p:
+ pseudoflavor = RPC_AUTH_GSS_KRB5P;
+ break;
+ case Opt_sec_lkey:
+ pseudoflavor = RPC_AUTH_GSS_LKEY;
+ break;
+ case Opt_sec_lkeyi:
+ pseudoflavor = RPC_AUTH_GSS_LKEYI;
+ break;
+ case Opt_sec_lkeyp:
+ pseudoflavor = RPC_AUTH_GSS_LKEYP;
+ break;
+ case Opt_sec_spkm:
+ pseudoflavor = RPC_AUTH_GSS_SPKM;
+ break;
+ case Opt_sec_spkmi:
+ pseudoflavor = RPC_AUTH_GSS_SPKMI;
+ break;
+ case Opt_sec_spkmp:
+ pseudoflavor = RPC_AUTH_GSS_SPKMP;
+ break;
+ default:
+ return nfs_invalf(fc, "NFS: sec=%s option not recognized", p);
+ }
+
+ ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int nfs_parse_version_string(struct fs_context *fc,
+ const char *string)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+
+ ctx->flags &= ~NFS_MOUNT_VER3;
+ switch (lookup_constant(nfs_vers_tokens, string, -1)) {
+ case Opt_vers_2:
+ ctx->version = 2;
+ break;
+ case Opt_vers_3:
+ ctx->flags |= NFS_MOUNT_VER3;
+ ctx->version = 3;
+ break;
+ case Opt_vers_4:
+ /* Backward compatibility option. In future,
+ * the mount program should always supply
+ * a NFSv4 minor version number.
+ */
+ ctx->version = 4;
+ break;
+ case Opt_vers_4_0:
+ ctx->version = 4;
+ ctx->minorversion = 0;
+ break;
+ case Opt_vers_4_1:
+ ctx->version = 4;
+ ctx->minorversion = 1;
+ break;
+ case Opt_vers_4_2:
+ ctx->version = 4;
+ ctx->minorversion = 2;
+ break;
+ default:
+ return nfs_invalf(fc, "NFS: Unsupported NFS version");
+ }
+ return 0;
+}
+
+/*
+ * Parse a single mount parameter.
+ */
+static int nfs_fs_context_parse_param(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct fs_parse_result result;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ unsigned short protofamily, mountfamily;
+ unsigned int len;
+ int ret, opt;
+
+ dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key);
+
+ opt = fs_parse(fc, nfs_fs_parameters, param, &result);
+ if (opt < 0)
+ return ctx->sloppy ? 1 : opt;
+
+ switch (opt) {
+ case Opt_source:
+ if (fc->source)
+ return nfs_invalf(fc, "NFS: Multiple sources not supported");
+ fc->source = param->string;
+ param->string = NULL;
+ break;
+
+ /*
+ * boolean options: foo/nofoo
+ */
+ case Opt_soft:
+ ctx->flags |= NFS_MOUNT_SOFT;
+ ctx->flags &= ~NFS_MOUNT_SOFTERR;
+ break;
+ case Opt_softerr:
+ ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL;
+ ctx->flags &= ~NFS_MOUNT_SOFT;
+ break;
+ case Opt_hard:
+ ctx->flags &= ~(NFS_MOUNT_SOFT |
+ NFS_MOUNT_SOFTERR |
+ NFS_MOUNT_SOFTREVAL);
+ break;
+ case Opt_softreval:
+ if (result.negated)
+ ctx->flags &= ~NFS_MOUNT_SOFTREVAL;
+ else
+ ctx->flags &= NFS_MOUNT_SOFTREVAL;
+ break;
+ case Opt_posix:
+ if (result.negated)
+ ctx->flags &= ~NFS_MOUNT_POSIX;
+ else
+ ctx->flags |= NFS_MOUNT_POSIX;
+ break;
+ case Opt_cto:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NOCTO;
+ else
+ ctx->flags &= ~NFS_MOUNT_NOCTO;
+ break;
+ case Opt_ac:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NOAC;
+ else
+ ctx->flags &= ~NFS_MOUNT_NOAC;
+ break;
+ case Opt_lock:
+ if (result.negated) {
+ ctx->flags |= NFS_MOUNT_NONLM;
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ } else {
+ ctx->flags &= ~NFS_MOUNT_NONLM;
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ }
+ break;
+ case Opt_udp:
+ ctx->flags &= ~NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+ break;
+ case Opt_tcp:
+ ctx->flags |= NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ break;
+ case Opt_rdma:
+ ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+ xprt_load_transport(param->key);
+ break;
+ case Opt_acl:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NOACL;
+ else
+ ctx->flags &= ~NFS_MOUNT_NOACL;
+ break;
+ case Opt_rdirplus:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NORDIRPLUS;
+ else
+ ctx->flags &= ~NFS_MOUNT_NORDIRPLUS;
+ break;
+ case Opt_sharecache:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_UNSHARED;
+ else
+ ctx->flags &= ~NFS_MOUNT_UNSHARED;
+ break;
+ case Opt_resvport:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NORESVPORT;
+ else
+ ctx->flags &= ~NFS_MOUNT_NORESVPORT;
+ break;
+ case Opt_fscache_flag:
+ if (result.negated)
+ ctx->options &= ~NFS_OPTION_FSCACHE;
+ else
+ ctx->options |= NFS_OPTION_FSCACHE;
+ kfree(ctx->fscache_uniq);
+ ctx->fscache_uniq = NULL;
+ break;
+ case Opt_fscache:
+ ctx->options |= NFS_OPTION_FSCACHE;
+ kfree(ctx->fscache_uniq);
+ ctx->fscache_uniq = param->string;
+ param->string = NULL;
+ break;
+ case Opt_migration:
+ if (result.negated)
+ ctx->options &= ~NFS_OPTION_MIGRATION;
+ else
+ ctx->options |= NFS_OPTION_MIGRATION;
+ break;
+
+ /*
+ * options that take numeric values
+ */
+ case Opt_port:
+ if (result.uint_32 > USHRT_MAX)
+ goto out_of_bounds;
+ ctx->nfs_server.port = result.uint_32;
+ break;
+ case Opt_rsize:
+ ctx->rsize = result.uint_32;
+ break;
+ case Opt_wsize:
+ ctx->wsize = result.uint_32;
+ break;
+ case Opt_bsize:
+ ctx->bsize = result.uint_32;
+ break;
+ case Opt_timeo:
+ if (result.uint_32 < 1 || result.uint_32 > INT_MAX)
+ goto out_of_bounds;
+ ctx->timeo = result.uint_32;
+ break;
+ case Opt_retrans:
+ if (result.uint_32 > INT_MAX)
+ goto out_of_bounds;
+ ctx->retrans = result.uint_32;
+ break;
+ case Opt_acregmin:
+ ctx->acregmin = result.uint_32;
+ break;
+ case Opt_acregmax:
+ ctx->acregmax = result.uint_32;
+ break;
+ case Opt_acdirmin:
+ ctx->acdirmin = result.uint_32;
+ break;
+ case Opt_acdirmax:
+ ctx->acdirmax = result.uint_32;
+ break;
+ case Opt_actimeo:
+ ctx->acregmin = result.uint_32;
+ ctx->acregmax = result.uint_32;
+ ctx->acdirmin = result.uint_32;
+ ctx->acdirmax = result.uint_32;
+ break;
+ case Opt_namelen:
+ ctx->namlen = result.uint_32;
+ break;
+ case Opt_mountport:
+ if (result.uint_32 > USHRT_MAX)
+ goto out_of_bounds;
+ ctx->mount_server.port = result.uint_32;
+ break;
+ case Opt_mountvers:
+ if (result.uint_32 < NFS_MNT_VERSION ||
+ result.uint_32 > NFS_MNT3_VERSION)
+ goto out_of_bounds;
+ ctx->mount_server.version = result.uint_32;
+ break;
+ case Opt_minorversion:
+ if (result.uint_32 > NFS4_MAX_MINOR_VERSION)
+ goto out_of_bounds;
+ ctx->minorversion = result.uint_32;
+ break;
+
+ /*
+ * options that take text values
+ */
+ case Opt_v:
+ ret = nfs_parse_version_string(fc, param->key + 1);
+ if (ret < 0)
+ return ret;
+ break;
+ case Opt_vers:
+ ret = nfs_parse_version_string(fc, param->string);
+ if (ret < 0)
+ return ret;
+ break;
+ case Opt_sec:
+ ret = nfs_parse_security_flavors(fc, param);
+ if (ret < 0)
+ return ret;
+ break;
+
+ case Opt_proto:
+ protofamily = AF_INET;
+ switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
+ case Opt_xprt_udp6:
+ protofamily = AF_INET6;
+ fallthrough;
+ case Opt_xprt_udp:
+ ctx->flags &= ~NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+ break;
+ case Opt_xprt_tcp6:
+ protofamily = AF_INET6;
+ fallthrough;
+ case Opt_xprt_tcp:
+ ctx->flags |= NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ break;
+ case Opt_xprt_rdma6:
+ protofamily = AF_INET6;
+ fallthrough;
+ case Opt_xprt_rdma:
+ /* vector side protocols to TCP */
+ ctx->flags |= NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+ xprt_load_transport(param->string);
+ break;
+ default:
+ return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+ }
+
+ ctx->protofamily = protofamily;
+ break;
+
+ case Opt_mountproto:
+ mountfamily = AF_INET;
+ switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
+ case Opt_xprt_udp6:
+ mountfamily = AF_INET6;
+ fallthrough;
+ case Opt_xprt_udp:
+ ctx->mount_server.protocol = XPRT_TRANSPORT_UDP;
+ break;
+ case Opt_xprt_tcp6:
+ mountfamily = AF_INET6;
+ fallthrough;
+ case Opt_xprt_tcp:
+ ctx->mount_server.protocol = XPRT_TRANSPORT_TCP;
+ break;
+ case Opt_xprt_rdma: /* not used for side protocols */
+ default:
+ return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+ }
+ ctx->mountfamily = mountfamily;
+ break;
+
+ case Opt_addr:
+ len = rpc_pton(fc->net_ns, param->string, param->size,
+ &ctx->nfs_server.address,
+ sizeof(ctx->nfs_server._address));
+ if (len == 0)
+ goto out_invalid_address;
+ ctx->nfs_server.addrlen = len;
+ break;
+ case Opt_clientaddr:
+ kfree(ctx->client_address);
+ ctx->client_address = param->string;
+ param->string = NULL;
+ break;
+ case Opt_mounthost:
+ kfree(ctx->mount_server.hostname);
+ ctx->mount_server.hostname = param->string;
+ param->string = NULL;
+ break;
+ case Opt_mountaddr:
+ len = rpc_pton(fc->net_ns, param->string, param->size,
+ &ctx->mount_server.address,
+ sizeof(ctx->mount_server._address));
+ if (len == 0)
+ goto out_invalid_address;
+ ctx->mount_server.addrlen = len;
+ break;
+ case Opt_nconnect:
+ if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS)
+ goto out_of_bounds;
+ ctx->nfs_server.nconnect = result.uint_32;
+ break;
+ case Opt_lookupcache:
+ switch (result.uint_32) {
+ case Opt_lookupcache_all:
+ ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
+ break;
+ case Opt_lookupcache_positive:
+ ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE;
+ ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG;
+ break;
+ case Opt_lookupcache_none:
+ ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
+ case Opt_local_lock:
+ switch (result.uint_32) {
+ case Opt_local_lock_all:
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+ NFS_MOUNT_LOCAL_FCNTL);
+ break;
+ case Opt_local_lock_flock:
+ ctx->flags |= NFS_MOUNT_LOCAL_FLOCK;
+ break;
+ case Opt_local_lock_posix:
+ ctx->flags |= NFS_MOUNT_LOCAL_FCNTL;
+ break;
+ case Opt_local_lock_none:
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+ NFS_MOUNT_LOCAL_FCNTL);
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
+
+ /*
+ * Special options
+ */
+ case Opt_sloppy:
+ ctx->sloppy = true;
+ dfprintk(MOUNT, "NFS: relaxing parsing rules\n");
+ break;
+ }
+
+ return 0;
+
+out_invalid_value:
+ return nfs_invalf(fc, "NFS: Bad mount option value specified");
+out_invalid_address:
+ return nfs_invalf(fc, "NFS: Bad IP address specified");
+out_of_bounds:
+ return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key);
+}
+
+/*
+ * Split fc->source into "hostname:export_path".
+ *
+ * The leftmost colon demarks the split between the server's hostname
+ * and the export path. If the hostname starts with a left square
+ * bracket, then it may contain colons.
+ *
+ * Note: caller frees hostname and export path, even on error.
+ */
+static int nfs_parse_source(struct fs_context *fc,
+ size_t maxnamlen, size_t maxpathlen)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ const char *dev_name = fc->source;
+ size_t len;
+ const char *end;
+
+ if (unlikely(!dev_name || !*dev_name)) {
+ dfprintk(MOUNT, "NFS: device name not specified\n");
+ return -EINVAL;
+ }
+
+ /* Is the host name protected with square brakcets? */
+ if (*dev_name == '[') {
+ end = strchr(++dev_name, ']');
+ if (end == NULL || end[1] != ':')
+ goto out_bad_devname;
+
+ len = end - dev_name;
+ end++;
+ } else {
+ const char *comma;
+
+ end = strchr(dev_name, ':');
+ if (end == NULL)
+ goto out_bad_devname;
+ len = end - dev_name;
+
+ /* kill possible hostname list: not supported */
+ comma = memchr(dev_name, ',', len);
+ if (comma)
+ len = comma - dev_name;
+ }
+
+ if (len > maxnamlen)
+ goto out_hostname;
+
+ kfree(ctx->nfs_server.hostname);
+
+ /* N.B. caller will free nfs_server.hostname in all cases */
+ ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL);
+ if (!ctx->nfs_server.hostname)
+ goto out_nomem;
+ len = strlen(++end);
+ if (len > maxpathlen)
+ goto out_path;
+ ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL);
+ if (!ctx->nfs_server.export_path)
+ goto out_nomem;
+
+ dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path);
+ return 0;
+
+out_bad_devname:
+ return nfs_invalf(fc, "NFS: device name not in host:path format");
+out_nomem:
+ nfs_errorf(fc, "NFS: not enough memory to parse device name");
+ return -ENOMEM;
+out_hostname:
+ nfs_errorf(fc, "NFS: server hostname too long");
+ return -ENAMETOOLONG;
+out_path:
+ nfs_errorf(fc, "NFS: export pathname too long");
+ return -ENAMETOOLONG;
+}
+
+static inline bool is_remount_fc(struct fs_context *fc)
+{
+ return fc->root != NULL;
+}
+
+/*
+ * Parse monolithic NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
+ *
+ * For option strings, user space handles the following behaviors:
+ *
+ * + DNS: mapping server host name to IP address ("addr=" option)
+ *
+ * + failure mode: how to behave if a mount request can't be handled
+ * immediately ("fg/bg" option)
+ *
+ * + retry: how often to retry a mount request ("retry=" option)
+ *
+ * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
+ * mountproto=tcp after mountproto=udp, and so on
+ */
+static int nfs23_parse_monolithic(struct fs_context *fc,
+ struct nfs_mount_data *data)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct nfs_fh *mntfh = ctx->mntfh;
+ struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
+
+ if (data == NULL)
+ goto out_no_data;
+
+ ctx->version = NFS_DEFAULT_VERSION;
+ switch (data->version) {
+ case 1:
+ data->namlen = 0;
+ fallthrough;
+ case 2:
+ data->bsize = 0;
+ fallthrough;
+ case 3:
+ if (data->flags & NFS_MOUNT_VER3)
+ goto out_no_v3;
+ data->root.size = NFS2_FHSIZE;
+ memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+ /* Turn off security negotiation */
+ extra_flags |= NFS_MOUNT_SECFLAVOUR;
+ fallthrough;
+ case 4:
+ if (data->flags & NFS_MOUNT_SECFLAVOUR)
+ goto out_no_sec;
+ fallthrough;
+ case 5:
+ memset(data->context, 0, sizeof(data->context));
+ fallthrough;
+ case 6:
+ if (data->flags & NFS_MOUNT_VER3) {
+ if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
+ goto out_invalid_fh;
+ mntfh->size = data->root.size;
+ ctx->version = 3;
+ } else {
+ mntfh->size = NFS2_FHSIZE;
+ ctx->version = 2;
+ }
+
+
+ memcpy(mntfh->data, data->root.data, mntfh->size);
+ if (mntfh->size < sizeof(mntfh->data))
+ memset(mntfh->data + mntfh->size, 0,
+ sizeof(mntfh->data) - mntfh->size);
+
+ /*
+ * for proto == XPRT_TRANSPORT_UDP, which is what uses
+ * to_exponential, implying shift: limit the shift value
+ * to BITS_PER_LONG (majortimeo is unsigned long)
+ */
+ if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */
+ if (data->retrans >= 64) /* shift value is too large */
+ goto out_invalid_data;
+
+ /*
+ * Translate to nfs_fs_context, which nfs_fill_super
+ * can deal with.
+ */
+ ctx->flags = data->flags & NFS_MOUNT_FLAGMASK;
+ ctx->flags |= extra_flags;
+ ctx->rsize = data->rsize;
+ ctx->wsize = data->wsize;
+ ctx->timeo = data->timeo;
+ ctx->retrans = data->retrans;
+ ctx->acregmin = data->acregmin;
+ ctx->acregmax = data->acregmax;
+ ctx->acdirmin = data->acdirmin;
+ ctx->acdirmax = data->acdirmax;
+ ctx->need_mount = false;
+
+ memcpy(sap, &data->addr, sizeof(data->addr));
+ ctx->nfs_server.addrlen = sizeof(data->addr);
+ ctx->nfs_server.port = ntohs(data->addr.sin_port);
+ if (sap->sa_family != AF_INET ||
+ !nfs_verify_server_address(sap))
+ goto out_no_address;
+
+ if (!(data->flags & NFS_MOUNT_TCP))
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+ /* N.B. caller will free nfs_server.hostname in all cases */
+ ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
+ if (!ctx->nfs_server.hostname)
+ goto out_nomem;
+
+ ctx->namlen = data->namlen;
+ ctx->bsize = data->bsize;
+
+ if (data->flags & NFS_MOUNT_SECFLAVOUR)
+ ctx->selected_flavor = data->pseudoflavor;
+ else
+ ctx->selected_flavor = RPC_AUTH_UNIX;
+
+ if (!(data->flags & NFS_MOUNT_NONLM))
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
+ NFS_MOUNT_LOCAL_FCNTL);
+ else
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK|
+ NFS_MOUNT_LOCAL_FCNTL);
+
+ /*
+ * The legacy version 6 binary mount data from userspace has a
+ * field used only to transport selinux information into the
+ * kernel. To continue to support that functionality we
+ * have a touch of selinux knowledge here in the NFS code. The
+ * userspace code converted context=blah to just blah so we are
+ * converting back to the full string selinux understands.
+ */
+ if (data->context[0]){
+#ifdef CONFIG_SECURITY_SELINUX
+ int ret;
+
+ data->context[NFS_MAX_CONTEXT_LEN] = '\0';
+ ret = vfs_parse_fs_string(fc, "context",
+ data->context, strlen(data->context));
+ if (ret < 0)
+ return ret;
+#else
+ return -EINVAL;
+#endif
+ }
+
+ break;
+ default:
+ goto generic;
+ }
+
+ ctx->skip_reconfig_option_check = true;
+ return 0;
+
+generic:
+ return generic_parse_monolithic(fc, data);
+
+out_no_data:
+ if (is_remount_fc(fc)) {
+ ctx->skip_reconfig_option_check = true;
+ return 0;
+ }
+ return nfs_invalf(fc, "NFS: mount program didn't pass any mount data");
+
+out_no_v3:
+ return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3");
+
+out_no_sec:
+ return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS");
+
+out_nomem:
+ dfprintk(MOUNT, "NFS: not enough memory to handle mount options");
+ return -ENOMEM;
+
+out_no_address:
+ return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
+
+out_invalid_fh:
+ return nfs_invalf(fc, "NFS: invalid root filehandle");
+
+out_invalid_data:
+ return nfs_invalf(fc, "NFS: invalid binary mount data");
+}
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+struct compat_nfs_string {
+ compat_uint_t len;
+ compat_uptr_t data;
+};
+
+static inline void compat_nfs_string(struct nfs_string *dst,
+ struct compat_nfs_string *src)
+{
+ dst->data = compat_ptr(src->data);
+ dst->len = src->len;
+}
+
+struct compat_nfs4_mount_data_v1 {
+ compat_int_t version;
+ compat_int_t flags;
+ compat_int_t rsize;
+ compat_int_t wsize;
+ compat_int_t timeo;
+ compat_int_t retrans;
+ compat_int_t acregmin;
+ compat_int_t acregmax;
+ compat_int_t acdirmin;
+ compat_int_t acdirmax;
+ struct compat_nfs_string client_addr;
+ struct compat_nfs_string mnt_path;
+ struct compat_nfs_string hostname;
+ compat_uint_t host_addrlen;
+ compat_uptr_t host_addr;
+ compat_int_t proto;
+ compat_int_t auth_flavourlen;
+ compat_uptr_t auth_flavours;
+};
+
+static void nfs4_compat_mount_data_conv(struct nfs4_mount_data *data)
+{
+ struct compat_nfs4_mount_data_v1 *compat =
+ (struct compat_nfs4_mount_data_v1 *)data;
+
+ /* copy the fields backwards */
+ data->auth_flavours = compat_ptr(compat->auth_flavours);
+ data->auth_flavourlen = compat->auth_flavourlen;
+ data->proto = compat->proto;
+ data->host_addr = compat_ptr(compat->host_addr);
+ data->host_addrlen = compat->host_addrlen;
+ compat_nfs_string(&data->hostname, &compat->hostname);
+ compat_nfs_string(&data->mnt_path, &compat->mnt_path);
+ compat_nfs_string(&data->client_addr, &compat->client_addr);
+ data->acdirmax = compat->acdirmax;
+ data->acdirmin = compat->acdirmin;
+ data->acregmax = compat->acregmax;
+ data->acregmin = compat->acregmin;
+ data->retrans = compat->retrans;
+ data->timeo = compat->timeo;
+ data->wsize = compat->wsize;
+ data->rsize = compat->rsize;
+ data->flags = compat->flags;
+ data->version = compat->version;
+}
+
+/*
+ * Validate NFSv4 mount options
+ */
+static int nfs4_parse_monolithic(struct fs_context *fc,
+ struct nfs4_mount_data *data)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ char *c;
+
+ if (!data) {
+ if (is_remount_fc(fc))
+ goto done;
+ return nfs_invalf(fc,
+ "NFS4: mount program didn't pass any mount data");
+ }
+
+ ctx->version = 4;
+
+ if (data->version != 1)
+ return generic_parse_monolithic(fc, data);
+
+ if (in_compat_syscall())
+ nfs4_compat_mount_data_conv(data);
+
+ if (data->host_addrlen > sizeof(ctx->nfs_server.address))
+ goto out_no_address;
+ if (data->host_addrlen == 0)
+ goto out_no_address;
+ ctx->nfs_server.addrlen = data->host_addrlen;
+ if (copy_from_user(sap, data->host_addr, data->host_addrlen))
+ return -EFAULT;
+ if (!nfs_verify_server_address(sap))
+ goto out_no_address;
+ ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+
+ if (data->auth_flavourlen) {
+ rpc_authflavor_t pseudoflavor;
+
+ if (data->auth_flavourlen > 1)
+ goto out_inval_auth;
+ if (copy_from_user(&pseudoflavor, data->auth_flavours,
+ sizeof(pseudoflavor)))
+ return -EFAULT;
+ ctx->selected_flavor = pseudoflavor;
+ } else {
+ ctx->selected_flavor = RPC_AUTH_UNIX;
+ }
+
+ c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ ctx->nfs_server.hostname = c;
+
+ c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ ctx->nfs_server.export_path = c;
+ dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
+
+ c = strndup_user(data->client_addr.data, 16);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ ctx->client_address = c;
+
+ /*
+ * Translate to nfs_fs_context, which nfs_fill_super
+ * can deal with.
+ */
+
+ ctx->flags = data->flags & NFS4_MOUNT_FLAGMASK;
+ ctx->rsize = data->rsize;
+ ctx->wsize = data->wsize;
+ ctx->timeo = data->timeo;
+ ctx->retrans = data->retrans;
+ ctx->acregmin = data->acregmin;
+ ctx->acregmax = data->acregmax;
+ ctx->acdirmin = data->acdirmin;
+ ctx->acdirmax = data->acdirmax;
+ ctx->nfs_server.protocol = data->proto;
+ nfs_validate_transport_protocol(ctx);
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+ goto out_invalid_transport_udp;
+done:
+ ctx->skip_reconfig_option_check = true;
+ return 0;
+
+out_inval_auth:
+ return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d",
+ data->auth_flavourlen);
+
+out_no_address:
+ return nfs_invalf(fc, "NFS4: mount program didn't pass remote address");
+
+out_invalid_transport_udp:
+ return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
+}
+#endif
+
+/*
+ * Parse a monolithic block of data from sys_mount().
+ */
+static int nfs_fs_context_parse_monolithic(struct fs_context *fc,
+ void *data)
+{
+ if (fc->fs_type == &nfs_fs_type)
+ return nfs23_parse_monolithic(fc, data);
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+ if (fc->fs_type == &nfs4_fs_type)
+ return nfs4_parse_monolithic(fc, data);
+#endif
+
+ return nfs_invalf(fc, "NFS: Unsupported monolithic data version");
+}
+
+/*
+ * Validate the preparsed information in the config.
+ */
+static int nfs_fs_context_validate(struct fs_context *fc)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct nfs_subversion *nfs_mod;
+ struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ int max_namelen = PAGE_SIZE;
+ int max_pathlen = NFS_MAXPATHLEN;
+ int port = 0;
+ int ret;
+
+ if (!fc->source)
+ goto out_no_device_name;
+
+ /* Check for sanity first. */
+ if (ctx->minorversion && ctx->version != 4)
+ goto out_minorversion_mismatch;
+
+ if (ctx->options & NFS_OPTION_MIGRATION &&
+ (ctx->version != 4 || ctx->minorversion != 0))
+ goto out_migration_misuse;
+
+ /* Verify that any proto=/mountproto= options match the address
+ * families in the addr=/mountaddr= options.
+ */
+ if (ctx->protofamily != AF_UNSPEC &&
+ ctx->protofamily != ctx->nfs_server.address.sa_family)
+ goto out_proto_mismatch;
+
+ if (ctx->mountfamily != AF_UNSPEC) {
+ if (ctx->mount_server.addrlen) {
+ if (ctx->mountfamily != ctx->mount_server.address.sa_family)
+ goto out_mountproto_mismatch;
+ } else {
+ if (ctx->mountfamily != ctx->nfs_server.address.sa_family)
+ goto out_mountproto_mismatch;
+ }
+ }
+
+ if (!nfs_verify_server_address(sap))
+ goto out_no_address;
+
+ if (ctx->version == 4) {
+ if (IS_ENABLED(CONFIG_NFS_V4)) {
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
+ port = NFS_RDMA_PORT;
+ else
+ port = NFS_PORT;
+ max_namelen = NFS4_MAXNAMLEN;
+ max_pathlen = NFS4_MAXPATHLEN;
+ nfs_validate_transport_protocol(ctx);
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+ goto out_invalid_transport_udp;
+ ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL |
+ NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK |
+ NFS_MOUNT_LOCAL_FCNTL);
+ } else {
+ goto out_v4_not_compiled;
+ }
+ } else {
+ nfs_set_mount_transport_protocol(ctx);
+#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+ goto out_invalid_transport_udp;
+#endif
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
+ port = NFS_RDMA_PORT;
+ }
+
+ nfs_set_port(sap, &ctx->nfs_server.port, port);
+
+ ret = nfs_parse_source(fc, max_namelen, max_pathlen);
+ if (ret < 0)
+ return ret;
+
+ /* Load the NFS protocol module if we haven't done so yet */
+ if (!ctx->nfs_mod) {
+ nfs_mod = get_nfs_version(ctx->version);
+ if (IS_ERR(nfs_mod)) {
+ ret = PTR_ERR(nfs_mod);
+ goto out_version_unavailable;
+ }
+ ctx->nfs_mod = nfs_mod;
+ }
+
+ /* Ensure the filesystem context has the correct fs_type */
+ if (fc->fs_type != ctx->nfs_mod->nfs_fs) {
+ module_put(fc->fs_type->owner);
+ __module_get(ctx->nfs_mod->nfs_fs->owner);
+ fc->fs_type = ctx->nfs_mod->nfs_fs;
+ }
+ return 0;
+
+out_no_device_name:
+ return nfs_invalf(fc, "NFS: Device name not specified");
+out_v4_not_compiled:
+ nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel");
+ return -EPROTONOSUPPORT;
+out_invalid_transport_udp:
+ return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
+out_no_address:
+ return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
+out_mountproto_mismatch:
+ return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option");
+out_proto_mismatch:
+ return nfs_invalf(fc, "NFS: Server address does not match proto= option");
+out_minorversion_mismatch:
+ return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u",
+ ctx->version, ctx->minorversion);
+out_migration_misuse:
+ return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version");
+out_version_unavailable:
+ nfs_errorf(fc, "NFS: Version unavailable");
+ return ret;
+}
+
+/*
+ * Create an NFS superblock by the appropriate method.
+ */
+static int nfs_get_tree(struct fs_context *fc)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ int err = nfs_fs_context_validate(fc);
+
+ if (err)
+ return err;
+ if (!ctx->internal)
+ return ctx->nfs_mod->rpc_ops->try_get_tree(fc);
+ else
+ return nfs_get_tree_common(fc);
+}
+
+/*
+ * Handle duplication of a configuration. The caller copied *src into *sc, but
+ * it can't deal with resource pointers in the filesystem context, so we have
+ * to do that. We need to clear pointers, copy data or get extra refs as
+ * appropriate.
+ */
+static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
+{
+ struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx;
+
+ ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->mntfh = nfs_alloc_fhandle();
+ if (!ctx->mntfh) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ nfs_copy_fh(ctx->mntfh, src->mntfh);
+
+ __module_get(ctx->nfs_mod->owner);
+ ctx->client_address = NULL;
+ ctx->mount_server.hostname = NULL;
+ ctx->nfs_server.export_path = NULL;
+ ctx->nfs_server.hostname = NULL;
+ ctx->fscache_uniq = NULL;
+ ctx->clone_data.fattr = NULL;
+ fc->fs_private = ctx;
+ return 0;
+}
+
+static void nfs_fs_context_free(struct fs_context *fc)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+
+ if (ctx) {
+ if (ctx->server)
+ nfs_free_server(ctx->server);
+ if (ctx->nfs_mod)
+ put_nfs_version(ctx->nfs_mod);
+ kfree(ctx->client_address);
+ kfree(ctx->mount_server.hostname);
+ kfree(ctx->nfs_server.export_path);
+ kfree(ctx->nfs_server.hostname);
+ kfree(ctx->fscache_uniq);
+ nfs_free_fhandle(ctx->mntfh);
+ nfs_free_fattr(ctx->clone_data.fattr);
+ kfree(ctx);
+ }
+}
+
+static const struct fs_context_operations nfs_fs_context_ops = {
+ .free = nfs_fs_context_free,
+ .dup = nfs_fs_context_dup,
+ .parse_param = nfs_fs_context_parse_param,
+ .parse_monolithic = nfs_fs_context_parse_monolithic,
+ .get_tree = nfs_get_tree,
+ .reconfigure = nfs_reconfigure,
+};
+
+/*
+ * Prepare superblock configuration. We use the namespaces attached to the
+ * context. This may be the current process's namespaces, or it may be a
+ * container's namespaces.
+ */
+static int nfs_init_fs_context(struct fs_context *fc)
+{
+ struct nfs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL);
+ if (unlikely(!ctx))
+ return -ENOMEM;
+
+ ctx->mntfh = nfs_alloc_fhandle();
+ if (unlikely(!ctx->mntfh)) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+
+ ctx->protofamily = AF_UNSPEC;
+ ctx->mountfamily = AF_UNSPEC;
+ ctx->mount_server.port = NFS_UNSPEC_PORT;
+
+ if (fc->root) {
+ /* reconfigure, start with the current config */
+ struct nfs_server *nfss = fc->root->d_sb->s_fs_info;
+ struct net *net = nfss->nfs_client->cl_net;
+
+ ctx->flags = nfss->flags;
+ ctx->rsize = nfss->rsize;
+ ctx->wsize = nfss->wsize;
+ ctx->retrans = nfss->client->cl_timeout->to_retries;
+ ctx->selected_flavor = nfss->client->cl_auth->au_flavor;
+ ctx->acregmin = nfss->acregmin / HZ;
+ ctx->acregmax = nfss->acregmax / HZ;
+ ctx->acdirmin = nfss->acdirmin / HZ;
+ ctx->acdirmax = nfss->acdirmax / HZ;
+ ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
+ ctx->nfs_server.port = nfss->port;
+ ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
+ ctx->version = nfss->nfs_client->rpc_ops->version;
+ ctx->minorversion = nfss->nfs_client->cl_minorversion;
+
+ memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr,
+ ctx->nfs_server.addrlen);
+
+ if (fc->net_ns != net) {
+ put_net(fc->net_ns);
+ fc->net_ns = get_net(net);
+ }
+
+ ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod;
+ __module_get(ctx->nfs_mod->owner);
+ } else {
+ /* defaults */
+ ctx->timeo = NFS_UNSPEC_TIMEO;
+ ctx->retrans = NFS_UNSPEC_RETRANS;
+ ctx->acregmin = NFS_DEF_ACREGMIN;
+ ctx->acregmax = NFS_DEF_ACREGMAX;
+ ctx->acdirmin = NFS_DEF_ACDIRMIN;
+ ctx->acdirmax = NFS_DEF_ACDIRMAX;
+ ctx->nfs_server.port = NFS_UNSPEC_PORT;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ ctx->selected_flavor = RPC_AUTH_MAXFLAVOR;
+ ctx->minorversion = 0;
+ ctx->need_mount = true;
+ }
+ fc->fs_private = ctx;
+ fc->ops = &nfs_fs_context_ops;
+ return 0;
+}
+
+struct file_system_type nfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs",
+ .init_fs_context = nfs_init_fs_context,
+ .parameters = nfs_fs_parameters,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+};
+MODULE_ALIAS_FS("nfs");
+EXPORT_SYMBOL_GPL(nfs_fs_type);
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+struct file_system_type nfs4_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .init_fs_context = nfs_init_fs_context,
+ .parameters = nfs_fs_parameters,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+};
+MODULE_ALIAS_FS("nfs4");
+MODULE_ALIAS("nfs4");
+EXPORT_SYMBOL_GPL(nfs4_fs_type);
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 7d6721e..a60df88 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -118,8 +118,6 @@
nfss->fscache_key = NULL;
nfss->fscache = NULL;
- if (!(nfss->options & NFS_OPTION_FSCACHE))
- return;
if (!uniq) {
uniq = "";
ulen = 1;
@@ -130,7 +128,7 @@
return;
key->nfs_client = nfss->nfs_client;
- key->key.super.s_flags = sb->s_flags & NFS_MS_MASK;
+ key->key.super.s_flags = sb->s_flags & NFS_SB_MASK;
key->key.nfs_server.flags = nfss->flags;
key->key.nfs_server.rsize = nfss->rsize;
key->key.nfs_server.wsize = nfss->wsize;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 878c4c5..aaeeb46 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -64,66 +64,102 @@
/*
* get an NFS2/NFS3 root dentry from the root filehandle
*/
-struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
- const char *devname)
+int nfs_get_root(struct super_block *s, struct fs_context *fc)
{
- struct nfs_server *server = NFS_SB(sb);
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct nfs_server *server = NFS_SB(s);
struct nfs_fsinfo fsinfo;
- struct dentry *ret;
+ struct dentry *root;
struct inode *inode;
- void *name = kstrdup(devname, GFP_KERNEL);
- int error;
+ char *name;
+ int error = -ENOMEM;
+ unsigned long kflags = 0, kflags_out = 0;
+ name = kstrdup(fc->source, GFP_KERNEL);
if (!name)
- return ERR_PTR(-ENOMEM);
+ goto out;
/* get the actual root for this mount */
fsinfo.fattr = nfs_alloc_fattr();
- if (fsinfo.fattr == NULL) {
- kfree(name);
- return ERR_PTR(-ENOMEM);
- }
+ if (fsinfo.fattr == NULL)
+ goto out_name;
- error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ fsinfo.fattr->label = nfs4_label_alloc(server, GFP_KERNEL);
+ if (IS_ERR(fsinfo.fattr->label))
+ goto out_fattr;
+ error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo);
if (error < 0) {
dprintk("nfs_get_root: getattr error = %d\n", -error);
- ret = ERR_PTR(error);
- goto out;
+ nfs_errorf(fc, "NFS: Couldn't getattr on root");
+ goto out_label;
}
- inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL);
+ inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL);
if (IS_ERR(inode)) {
dprintk("nfs_get_root: get root inode failed\n");
- ret = ERR_CAST(inode);
- goto out;
+ error = PTR_ERR(inode);
+ nfs_errorf(fc, "NFS: Couldn't get root inode");
+ goto out_label;
}
- error = nfs_superblock_set_dummy_root(sb, inode);
- if (error != 0) {
- ret = ERR_PTR(error);
- goto out;
- }
+ error = nfs_superblock_set_dummy_root(s, inode);
+ if (error != 0)
+ goto out_label;
/* root dentries normally start off anonymous and get spliced in later
* if the dentry tree reaches them; however if the dentry already
* exists, we'll pick it up at this point and use it as the root
*/
- ret = d_obtain_root(inode);
- if (IS_ERR(ret)) {
+ root = d_obtain_root(inode);
+ if (IS_ERR(root)) {
dprintk("nfs_get_root: get root dentry failed\n");
- goto out;
+ error = PTR_ERR(root);
+ nfs_errorf(fc, "NFS: Couldn't get root dentry");
+ goto out_label;
}
- security_d_instantiate(ret, inode);
- spin_lock(&ret->d_lock);
- if (IS_ROOT(ret) && !ret->d_fsdata &&
- !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
- ret->d_fsdata = name;
+ security_d_instantiate(root, inode);
+ spin_lock(&root->d_lock);
+ if (IS_ROOT(root) && !root->d_fsdata &&
+ !(root->d_flags & DCACHE_NFSFS_RENAMED)) {
+ root->d_fsdata = name;
name = NULL;
}
- spin_unlock(&ret->d_lock);
-out:
- kfree(name);
+ spin_unlock(&root->d_lock);
+ fc->root = root;
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
+ kflags |= SECURITY_LSM_NATIVE_LABELS;
+ if (ctx->clone_data.sb) {
+ if (d_inode(fc->root)->i_fop != &nfs_dir_operations) {
+ error = -ESTALE;
+ goto error_splat_root;
+ }
+ /* clone lsm security options from the parent to the new sb */
+ error = security_sb_clone_mnt_opts(ctx->clone_data.sb,
+ s, kflags, &kflags_out);
+ } else {
+ error = security_sb_set_mnt_opts(s, fc->security,
+ kflags, &kflags_out);
+ }
+ if (error)
+ goto error_splat_root;
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
+ !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
+ NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
+
+ nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label);
+ error = 0;
+
+out_label:
+ nfs4_label_free(fsinfo.fattr->label);
+out_fattr:
nfs_free_fattr(fsinfo.fattr);
- return ret;
+out_name:
+ kfree(name);
+out:
+ return error;
+error_splat_root:
+ dput(fc->root);
+ fc->root = NULL;
+ goto out_label;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 209263c..f27ecc2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -62,7 +62,6 @@
/* Default is to see 64-bit inode numbers */
static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
-static void nfs_invalidate_inode(struct inode *);
static int nfs_update_inode(struct inode *, struct nfs_fattr *);
static struct kmem_cache * nfs_inode_cachep;
@@ -194,6 +193,19 @@
return nfs_check_cache_invalid_not_delegated(inode, flags);
}
+EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
+
+#ifdef CONFIG_NFS_V4_2
+static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
+{
+ return nfsi->xattr_cache != NULL;
+}
+#else
+static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
+{
+ return false;
+}
+#endif
static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{
@@ -205,9 +217,13 @@
flags &= ~NFS_INO_INVALID_OTHER;
flags &= ~(NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE);
- }
+ | NFS_INO_REVAL_PAGECACHE
+ | NFS_INO_INVALID_XATTR);
+ } else if (flags & NFS_INO_REVAL_PAGECACHE)
+ flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE;
+ if (!nfs_has_xattr_cache(nfsi))
+ flags &= ~NFS_INO_INVALID_XATTR;
if (inode->i_mapping->nrpages == 0)
flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
nfsi->cache_validity |= flags;
@@ -234,11 +250,13 @@
| NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
+ | NFS_INO_INVALID_XATTR
| NFS_INO_REVAL_PAGECACHE);
} else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
+ | NFS_INO_INVALID_XATTR
| NFS_INO_REVAL_PAGECACHE);
nfs_zap_label_cache_locked(nfsi);
}
@@ -284,10 +302,18 @@
* Invalidate, but do not unhash, the inode.
* NB: must be called with inode->i_lock held!
*/
-static void nfs_invalidate_inode(struct inode *inode)
+static void nfs_set_inode_stale_locked(struct inode *inode)
{
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
nfs_zap_caches_locked(inode);
+ trace_nfs_set_inode_stale(inode);
+}
+
+void nfs_set_inode_stale(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ nfs_set_inode_stale_locked(inode);
+ spin_unlock(&inode->i_lock);
}
struct nfs_find_desc {
@@ -310,7 +336,7 @@
if (NFS_FILEID(inode) != fattr->fileid)
return 0;
- if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode))
+ if (inode_wrong_type(inode, fattr->mode))
return 0;
if (nfs_compare_fh(NFS_FH(inode), fh))
return 0;
@@ -504,15 +530,15 @@
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = timespec_to_timespec64(fattr->atime);
+ inode->i_atime = fattr->atime;
else if (nfs_server_capable(inode, NFS_CAP_ATIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
- inode->i_mtime = timespec_to_timespec64(fattr->mtime);
+ inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -535,6 +561,8 @@
inode->i_gid = fattr->gid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ if (nfs_server_capable(inode, NFS_CAP_XATTR))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -698,7 +726,7 @@
if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -709,14 +737,14 @@
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = timespec_to_timespec64(fattr->atime);
+ inode->i_atime = fattr->atime;
else if (attr->ia_valid & ATTR_ATIME_SET)
inode->i_atime = attr->ia_atime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -725,14 +753,14 @@
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
- inode->i_mtime = timespec_to_timespec64(fattr->mtime);
+ inode->i_mtime = fattr->mtime;
else if (attr->ia_valid & ATTR_MTIME_SET)
inode->i_mtime = attr->ia_mtime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -787,16 +815,15 @@
trace_nfs_getattr_enter(inode);
- if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync)
+ if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
+ nfs_readdirplus_parent_cache_hit(path->dentry);
goto out_no_update;
+ }
/* Flush out writes to the server in order to update c/mtime. */
- if ((request_mask & (STATX_CTIME|STATX_MTIME)) &&
- S_ISREG(inode->i_mode)) {
- err = filemap_write_and_wait(inode->i_mapping);
- if (err)
- goto out;
- }
+ if ((request_mask & (STATX_CTIME | STATX_MTIME)) &&
+ S_ISREG(inode->i_mode))
+ filemap_write_and_wait(inode->i_mapping);
/*
* We may force a getattr if the user cares about atime.
@@ -1064,7 +1091,7 @@
rcu_read_lock();
list_for_each_entry_rcu(pos, &nfsi->open_files, list) {
- if (cred != NULL && pos->cred != cred)
+ if (cred != NULL && cred_fscmp(pos->cred, cred) != 0)
continue;
if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
continue;
@@ -1162,10 +1189,17 @@
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode), status);
- if (status == -ESTALE) {
- nfs_zap_caches(inode);
+ switch (status) {
+ case -ETIMEDOUT:
+ /* A soft timeout occurred. Use cached information? */
+ if (server->flags & NFS_MOUNT_SOFTREVAL)
+ status = 0;
+ break;
+ case -ESTALE:
if (!S_ISDIR(inode->i_mode))
- set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+ nfs_set_inode_stale(inode);
+ else
+ nfs_zap_caches(inode);
}
goto err_out;
}
@@ -1357,7 +1391,7 @@
static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
- struct timespec ts;
+ struct timespec64 ts;
if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -1365,20 +1399,22 @@
inode_set_iversion_raw(inode, fattr->change_attr);
if (S_ISDIR(inode->i_mode))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
+ else if (nfs_server_capable(inode, NFS_CAP_XATTR))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
}
/* If we have atomic WCC data, we may update some attributes */
- ts = timespec64_to_timespec(inode->i_ctime);
+ ts = inode->i_ctime;
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
- && timespec_equal(&ts, &fattr->pre_ctime)) {
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ && timespec64_equal(&ts, &fattr->pre_ctime)) {
+ inode->i_ctime = fattr->ctime;
}
- ts = timespec64_to_timespec(inode->i_mtime);
+ ts = inode->i_mtime;
if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
- && timespec_equal(&ts, &fattr->pre_mtime)) {
- inode->i_mtime = timespec_to_timespec64(fattr->mtime);
+ && timespec64_equal(&ts, &fattr->pre_mtime)) {
+ inode->i_mtime = fattr->mtime;
if (S_ISDIR(inode->i_mode))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
}
@@ -1404,7 +1440,7 @@
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_size, new_isize;
unsigned long invalid = 0;
- struct timespec ts;
+ struct timespec64 ts;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
return 0;
@@ -1421,7 +1457,7 @@
return 0;
return -ESTALE;
}
- if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+ if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode))
return -ESTALE;
@@ -1431,12 +1467,12 @@
invalid |= NFS_INO_INVALID_CHANGE
| NFS_INO_REVAL_PAGECACHE;
- ts = timespec64_to_timespec(inode->i_mtime);
- if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&ts, &fattr->mtime))
+ ts = inode->i_mtime;
+ if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
invalid |= NFS_INO_INVALID_MTIME;
- ts = timespec64_to_timespec(inode->i_ctime);
- if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&ts, &fattr->ctime))
+ ts = inode->i_ctime;
+ if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime))
invalid |= NFS_INO_INVALID_CTIME;
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -1466,8 +1502,8 @@
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
invalid |= NFS_INO_INVALID_OTHER;
- ts = timespec64_to_timespec(inode->i_atime);
- if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&ts, &fattr->atime))
+ ts = inode->i_atime;
+ if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
invalid |= NFS_INO_INVALID_ATIME;
if (invalid != 0)
@@ -1739,12 +1775,12 @@
}
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
- fattr->pre_ctime = timespec64_to_timespec(inode->i_ctime);
+ fattr->pre_ctime = inode->i_ctime;
fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
- fattr->pre_mtime = timespec64_to_timespec(inode->i_mtime);
+ fattr->pre_mtime = inode->i_mtime;
fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
@@ -1836,7 +1872,7 @@
/*
* Make sure the inode's type hasn't changed.
*/
- if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+ if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode)) {
/*
* Big trouble! The inode has become a different object.
*/
@@ -1882,7 +1918,8 @@
if (!(have_writers || have_delegation)) {
invalid |= NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_INVALID_XATTR;
/* Force revalidate of all attributes */
save_cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME
@@ -1907,7 +1944,7 @@
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
- inode->i_mtime = timespec_to_timespec64(fattr->mtime);
+ inode->i_mtime = fattr->mtime;
} else if (server->caps & NFS_CAP_MTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_MTIME
@@ -1916,7 +1953,7 @@
}
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
} else if (server->caps & NFS_CAP_CTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_CTIME
@@ -1954,7 +1991,7 @@
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = timespec_to_timespec64(fattr->atime);
+ inode->i_atime = fattr->atime;
else if (server->caps & NFS_CAP_ATIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATIME
@@ -2070,7 +2107,7 @@
* lookup validation will know that the inode is bad.
* (But we fall through to invalidate the caches.)
*/
- nfs_invalidate_inode(inode);
+ nfs_set_inode_stale_locked(inode);
return -ESTALE;
}
@@ -2085,6 +2122,9 @@
#if IS_ENABLED(CONFIG_NFS_V4)
nfsi->nfs4_acl = NULL;
#endif /* CONFIG_NFS_V4 */
+#ifdef CONFIG_NFS_V4_2
+ nfsi->xattr_cache = NULL;
+#endif
return &nfsi->vfs_inode;
}
EXPORT_SYMBOL_GPL(nfs_alloc_inode);
@@ -2120,6 +2160,7 @@
init_rwsem(&nfsi->rmdir_sem);
mutex_init(&nfsi->commit_mutex);
nfs4_init_once(nfsi);
+ nfsi->cache_change_attribute = 0;
}
static int __init nfs_init_inodecache(void)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9e71779..98554dd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,17 +4,19 @@
*/
#include "nfs4_fs.h"
-#include <linux/mount.h>
+#include <linux/fs_context.h>
#include <linux/security.h>
#include <linux/crc32.h>
+#include <linux/sunrpc/addr.h>
#include <linux/nfs_page.h>
#include <linux/wait_bit.h>
-#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
+#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
extern const struct export_operations nfs_export_ops;
struct nfs_string;
+struct nfs_pageio_descriptor;
static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
{
@@ -31,17 +33,14 @@
return 1;
}
-struct nfs_clone_mount {
- const struct super_block *sb;
- const struct dentry *dentry;
- struct nfs_fh *fh;
- struct nfs_fattr *fattr;
- char *hostname;
- char *mnt_path;
- struct sockaddr *addr;
- size_t addrlen;
- rpc_authflavor_t authflavor;
-};
+static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
+{
+ if (!(NFS_SB(dentry->d_sb)->flags & NFS_MOUNT_SOFTREVAL))
+ return false;
+ if (!d_is_positive(dentry) || !NFS_FH(d_inode(dentry))->size)
+ return false;
+ return true;
+}
/*
* Note: RFC 1813 doesn't limit the number of auth flavors that
@@ -82,12 +81,16 @@
/*
* In-kernel mount arguments
*/
-struct nfs_parsed_mount_data {
- int flags;
+struct nfs_fs_context {
+ bool internal;
+ bool skip_reconfig_option_check;
+ bool need_mount;
+ bool sloppy;
+ unsigned int flags; /* NFS{,4}_MOUNT_* flags */
unsigned int rsize, wsize;
unsigned int timeo, retrans;
- unsigned int acregmin, acregmax,
- acdirmin, acdirmax;
+ unsigned int acregmin, acregmax;
+ unsigned int acdirmin, acdirmax;
unsigned int namlen;
unsigned int options;
unsigned int bsize;
@@ -97,10 +100,14 @@
unsigned int version;
unsigned int minorversion;
char *fscache_uniq;
- bool need_mount;
+ unsigned short protofamily;
+ unsigned short mountfamily;
struct {
- struct sockaddr_storage address;
+ union {
+ struct sockaddr address;
+ struct sockaddr_storage _address;
+ };
size_t addrlen;
char *hostname;
u32 version;
@@ -109,19 +116,61 @@
} mount_server;
struct {
- struct sockaddr_storage address;
+ union {
+ struct sockaddr address;
+ struct sockaddr_storage _address;
+ };
size_t addrlen;
char *hostname;
char *export_path;
int port;
unsigned short protocol;
unsigned short nconnect;
+ unsigned short export_path_len;
} nfs_server;
- void *lsm_opts;
- struct net *net;
+ struct nfs_fh *mntfh;
+ struct nfs_server *server;
+ struct nfs_subversion *nfs_mod;
+
+ /* Information for a cloned mount. */
+ struct nfs_clone_mount {
+ struct super_block *sb;
+ struct dentry *dentry;
+ struct nfs_fattr *fattr;
+ unsigned int inherited_bsize;
+ } clone_data;
};
+#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \
+ errorf(fc, fmt, ## __VA_ARGS__) : \
+ ({ dprintk(fmt "\n", ## __VA_ARGS__); }))
+
+#define nfs_ferrorf(fc, fac, fmt, ...) ((fc)->log.log ? \
+ errorf(fc, fmt, ## __VA_ARGS__) : \
+ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); }))
+
+#define nfs_invalf(fc, fmt, ...) ((fc)->log.log ? \
+ invalf(fc, fmt, ## __VA_ARGS__) : \
+ ({ dprintk(fmt "\n", ## __VA_ARGS__); -EINVAL; }))
+
+#define nfs_finvalf(fc, fac, fmt, ...) ((fc)->log.log ? \
+ invalf(fc, fmt, ## __VA_ARGS__) : \
+ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); -EINVAL; }))
+
+#define nfs_warnf(fc, fmt, ...) ((fc)->log.log ? \
+ warnf(fc, fmt, ## __VA_ARGS__) : \
+ ({ dprintk(fmt "\n", ## __VA_ARGS__); }))
+
+#define nfs_fwarnf(fc, fac, fmt, ...) ((fc)->log.log ? \
+ warnf(fc, fmt, ## __VA_ARGS__) : \
+ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); }))
+
+static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc)
+{
+ return fc->fs_private;
+}
+
/* mount_clnt.c */
struct nfs_mount_request {
struct sockaddr *sap;
@@ -137,14 +186,6 @@
struct net *net;
};
-struct nfs_mount_info {
- void (*fill_super)(struct super_block *, struct nfs_mount_info *);
- int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
- struct nfs_parsed_mount_data *parsed;
- struct nfs_clone_mount *cloned;
- struct nfs_fh *mntfh;
-};
-
extern int nfs_mount(struct nfs_mount_request *info);
extern void nfs_umount(const struct nfs_mount_request *info);
@@ -170,13 +211,9 @@
extern struct nfs_client *
nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
struct nfs4_sessionid *, u32);
-extern struct nfs_server *nfs_create_server(struct nfs_mount_info *,
- struct nfs_subversion *);
-extern struct nfs_server *nfs4_create_server(
- struct nfs_mount_info *,
- struct nfs_subversion *);
-extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
- struct nfs_fh *);
+extern struct nfs_server *nfs_create_server(struct fs_context *);
+extern struct nfs_server *nfs4_create_server(struct fs_context *);
+extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
struct sockaddr *sap, size_t salen,
struct net *net);
@@ -227,7 +264,9 @@
extern const struct svc_version nfs4_callback_version1;
extern const struct svc_version nfs4_callback_version4;
-struct nfs_pageio_descriptor;
+/* fs_context.c */
+extern struct file_system_type nfs_fs_type;
+
/* pagelist.c */
extern int __init nfs_init_nfspagecache(void);
extern void nfs_destroy_nfspagecache(void);
@@ -255,12 +294,6 @@
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc);
-static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc)
-{
- WARN_ON_ONCE(desc->pg_mirror_count < 1);
- return desc->pg_mirror_count > 1;
-}
-
static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
const struct nfs_open_context *ctx2)
{
@@ -387,23 +420,10 @@
/* super.c */
extern const struct super_operations nfs_sops;
-extern struct file_system_type nfs_fs_type;
-extern struct file_system_type nfs_xdev_fs_type;
-#if IS_ENABLED(CONFIG_NFS_V4)
-extern struct file_system_type nfs4_referral_fs_type;
-#endif
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
-struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
- struct nfs_subversion *);
-int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
-int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
-struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
- struct nfs_mount_info *, struct nfs_subversion *);
-struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *);
-struct dentry * nfs_xdev_mount_common(struct file_system_type *, int,
- const char *, struct nfs_mount_info *);
+int nfs_try_get_tree(struct fs_context *);
+int nfs_get_tree_common(struct fs_context *);
void nfs_kill_super(struct super_block *);
-void nfs_fill_super(struct super_block *, struct nfs_mount_info *);
extern struct rpc_stat nfs_rpcstat;
@@ -411,7 +431,9 @@
extern void __exit unregister_nfs_fs(void);
extern bool nfs_sb_active(struct super_block *sb);
extern void nfs_sb_deactive(struct super_block *sb);
-
+extern int nfs_client_for_each_server(struct nfs_client *clp,
+ int (*fn)(struct nfs_server *, void *),
+ void *data);
/* io.c */
extern void nfs_start_io_read(struct inode *inode);
extern void nfs_end_io_read(struct inode *inode);
@@ -430,18 +452,12 @@
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen, unsigned flags);
extern struct vfsmount *nfs_d_automount(struct path *path);
-struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
- struct nfs_fh *, struct nfs_fattr *);
-struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
- struct nfs_fattr *, rpc_authflavor_t);
+int nfs_submount(struct fs_context *, struct nfs_server *);
+int nfs_do_submount(struct fs_context *);
/* getroot.c */
-extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
- const char *);
+extern int nfs_get_root(struct super_block *s, struct fs_context *fc);
#if IS_ENABLED(CONFIG_NFS_V4)
-extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
- const char *);
-
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool);
#endif
@@ -460,7 +476,7 @@
int nfs_show_devname(struct seq_file *, struct dentry *);
int nfs_show_path(struct seq_file *, struct dentry *);
int nfs_show_stats(struct seq_file *, struct dentry *);
-int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
+int nfs_reconfigure(struct fs_context *);
/* write.c */
extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -515,13 +531,25 @@
loff_t lstart, loff_t lend);
#ifdef CONFIG_NFS_V4_1
+static inline void
+pnfs_bucket_clear_pnfs_ds_commit_verifiers(struct pnfs_commit_bucket *buckets,
+ unsigned int nbuckets)
+{
+ unsigned int i;
+
+ for (i = 0; i < nbuckets; i++)
+ buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
+}
static inline
void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
{
- int i;
+ struct pnfs_commit_array *array;
- for (i = 0; i < cinfo->nbuckets; i++)
- cinfo->buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
+ rcu_read_lock();
+ list_for_each_entry_rcu(array, &cinfo->commits, cinfo_list)
+ pnfs_bucket_clear_pnfs_ds_commit_verifiers(array->buckets,
+ array->nbuckets);
+ rcu_read_unlock();
}
#else
static inline
@@ -542,6 +570,14 @@
return memcmp(v1->data, v2->data, sizeof(v1->data));
}
+static inline bool
+nfs_write_match_verf(const struct nfs_writeverf *verf,
+ struct nfs_page *req)
+{
+ return verf->committed > NFS_UNSTABLE &&
+ !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier);
+}
+
/* unlink.c */
extern struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
@@ -654,7 +690,8 @@
}
/*
- * Record the page as unstable and mark its inode as dirty.
+ * Record the page as unstable (an extra writeback period) and mark its
+ * inode as dirty.
*/
static inline
void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
@@ -662,8 +699,11 @@
if (!cinfo->dreq) {
struct inode *inode = page_file_mapping(page)->host;
- inc_node_page_state(page, NR_UNSTABLE_NFS);
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);
+ /* This page is really still in write-back - just that the
+ * writeback is happening on the server now.
+ */
+ inc_node_page_state(page, NR_WRITEBACK);
+ inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
}
@@ -708,14 +748,14 @@
}
/*
- * Convert a struct timespec into a 64-bit change attribute
+ * Convert a struct timespec64 into a 64-bit change attribute
*
- * This does approximately the same thing as timespec_to_ns(),
+ * This does approximately the same thing as timespec64_to_ns(),
* but for calculation efficiency, we multiply the seconds by
* 1024*1024*1024.
*/
static inline
-u64 nfs_timespec_to_change_attr(const struct timespec *ts)
+u64 nfs_timespec_to_change_attr(const struct timespec64 *ts)
{
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}
@@ -779,3 +819,16 @@
}
return nfs_error_is_fatal(err);
}
+
+/*
+ * Select between a default port value and a user-specified port value.
+ * If a zero value is set, then autobind will be used.
+ */
+static inline void nfs_set_port(struct sockaddr *sap, int *port,
+ const unsigned short default_port)
+{
+ if (*port == NFS_UNSPEC_PORT)
+ *port = default_port;
+
+ rpc_set_port(sap, *port);
+}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index a2593b7..dda5c3e 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -29,9 +29,7 @@
*/
#define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN))
#define MNT_status_sz (1)
-#define MNT_fhs_status_sz (1)
#define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE)
-#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE))
#define MNT_fhandlev3_sz XDR_QUADLEN(NFS3_FHSIZE)
#define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2db17fd..2bcbe38 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -19,6 +19,7 @@
#include <linux/vfs.h>
#include <linux/sunrpc/gss_api.h>
#include "internal.h"
+#include "nfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -143,31 +144,66 @@
*/
struct vfsmount *nfs_d_automount(struct path *path)
{
- struct vfsmount *mnt;
+ struct nfs_fs_context *ctx;
+ struct fs_context *fc;
+ struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct nfs_server *server = NFS_SERVER(d_inode(path->dentry));
- struct nfs_fh *fh = NULL;
- struct nfs_fattr *fattr = NULL;
+ struct nfs_client *client = server->nfs_client;
+ int timeout = READ_ONCE(nfs_mountpoint_expiry_timeout);
+ int ret;
if (IS_ROOT(path->dentry))
return ERR_PTR(-ESTALE);
- mnt = ERR_PTR(-ENOMEM);
- fh = nfs_alloc_fhandle();
- fattr = nfs_alloc_fattr();
- if (fh == NULL || fattr == NULL)
- goto out;
+ /* Open a new filesystem context, transferring parameters from the
+ * parent superblock, including the network namespace.
+ */
+ fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
+ if (IS_ERR(fc))
+ return ERR_CAST(fc);
- mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
+ ctx = nfs_fc2context(fc);
+ ctx->clone_data.dentry = path->dentry;
+ ctx->clone_data.sb = path->dentry->d_sb;
+ ctx->clone_data.fattr = nfs_alloc_fattr();
+ if (!ctx->clone_data.fattr)
+ goto out_fc;
+
+ if (fc->net_ns != client->cl_net) {
+ put_net(fc->net_ns);
+ fc->net_ns = get_net(client->cl_net);
+ }
+
+ /* for submounts we want the same server; referrals will reassign */
+ memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen);
+ ctx->nfs_server.addrlen = client->cl_addrlen;
+ ctx->nfs_server.port = server->port;
+
+ ctx->version = client->rpc_ops->version;
+ ctx->minorversion = client->cl_minorversion;
+ ctx->nfs_mod = client->cl_nfs_mod;
+ __module_get(ctx->nfs_mod->owner);
+
+ ret = client->rpc_ops->submount(fc, server);
+ if (ret < 0) {
+ mnt = ERR_PTR(ret);
+ goto out_fc;
+ }
+
+ up_write(&fc->root->d_sb->s_umount);
+ mnt = vfs_create_mount(fc);
if (IS_ERR(mnt))
- goto out;
+ goto out_fc;
mntget(mnt); /* prevent immediate expiration */
- mnt_set_expiry(mnt, &nfs_automount_list);
- schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+ if (timeout <= 0)
+ goto out_fc;
-out:
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fh);
+ mnt_set_expiry(mnt, &nfs_automount_list);
+ schedule_delayed_work(&nfs_automount_task, timeout);
+
+out_fc:
+ put_fs_context(fc);
return mnt;
}
@@ -202,10 +238,11 @@
static void nfs_expire_automounts(struct work_struct *work)
{
struct list_head *list = &nfs_automount_list;
+ int timeout = READ_ONCE(nfs_mountpoint_expiry_timeout);
mark_mounts_for_expiry(list);
- if (!list_empty(list))
- schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+ if (!list_empty(list) && timeout > 0)
+ schedule_delayed_work(&nfs_automount_task, timeout);
}
void nfs_release_automount_timer(void)
@@ -214,64 +251,117 @@
cancel_delayed_work(&nfs_automount_task);
}
-/*
- * Clone a mountpoint of the appropriate type
- */
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
- const char *devname,
- struct nfs_clone_mount *mountdata)
-{
- return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
-}
-
/**
* nfs_do_submount - set up mountpoint when crossing a filesystem boundary
- * @dentry: parent directory
- * @fh: filehandle for new root dentry
- * @fattr: attributes for new root inode
- * @authflavor: security flavor to use when performing the mount
+ * @fc: pointer to struct nfs_fs_context
*
*/
-struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
- struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
+int nfs_do_submount(struct fs_context *fc)
{
- struct nfs_clone_mount mountdata = {
- .sb = dentry->d_sb,
- .dentry = dentry,
- .fh = fh,
- .fattr = fattr,
- .authflavor = authflavor,
- };
- struct vfsmount *mnt;
- char *page = (char *) __get_free_page(GFP_USER);
- char *devname;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct dentry *dentry = ctx->clone_data.dentry;
+ struct nfs_server *server;
+ char *buffer, *p;
+ int ret;
- if (page == NULL)
- return ERR_PTR(-ENOMEM);
+ /* create a new volume representation */
+ server = ctx->nfs_mod->rpc_ops->clone_server(NFS_SB(ctx->clone_data.sb),
+ ctx->mntfh,
+ ctx->clone_data.fattr,
+ ctx->selected_flavor);
- devname = nfs_devname(dentry, page, PAGE_SIZE);
- if (IS_ERR(devname))
- mnt = ERR_CAST(devname);
- else
- mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
+ if (IS_ERR(server))
+ return PTR_ERR(server);
- free_page((unsigned long)page);
- return mnt;
+ ctx->server = server;
+
+ buffer = kmalloc(4096, GFP_USER);
+ if (!buffer)
+ return -ENOMEM;
+
+ ctx->internal = true;
+ ctx->clone_data.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits;
+
+ p = nfs_devname(dentry, buffer, 4096);
+ if (IS_ERR(p)) {
+ nfs_errorf(fc, "NFS: Couldn't determine submount pathname");
+ ret = PTR_ERR(p);
+ } else {
+ ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p);
+ if (!ret)
+ ret = vfs_get_tree(fc);
+ }
+ kfree(buffer);
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_do_submount);
-struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
- struct nfs_fh *fh, struct nfs_fattr *fattr)
+int nfs_submount(struct fs_context *fc, struct nfs_server *server)
{
- int err;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct dentry *dentry = ctx->clone_data.dentry;
struct dentry *parent = dget_parent(dentry);
+ int err;
/* Look it up again to get its attributes */
- err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, fh, fattr, NULL);
+ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry,
+ ctx->mntfh, ctx->clone_data.fattr,
+ NULL);
dput(parent);
if (err != 0)
- return ERR_PTR(err);
+ return err;
- return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
+ ctx->selected_flavor = server->client->cl_auth->au_flavor;
+ return nfs_do_submount(fc);
}
EXPORT_SYMBOL_GPL(nfs_submount);
+
+static int param_set_nfs_timeout(const char *val, const struct kernel_param *kp)
+{
+ long num;
+ int ret;
+
+ if (!val)
+ return -EINVAL;
+ ret = kstrtol(val, 0, &num);
+ if (ret)
+ return -EINVAL;
+ if (num > 0) {
+ if (num >= INT_MAX / HZ)
+ num = INT_MAX;
+ else
+ num *= HZ;
+ *((int *)kp->arg) = num;
+ if (!list_empty(&nfs_automount_list))
+ mod_delayed_work(system_wq, &nfs_automount_task, num);
+ } else {
+ *((int *)kp->arg) = -1*HZ;
+ cancel_delayed_work(&nfs_automount_task);
+ }
+ return 0;
+}
+
+static int param_get_nfs_timeout(char *buffer, const struct kernel_param *kp)
+{
+ long num = *((int *)kp->arg);
+
+ if (num > 0) {
+ if (num >= INT_MAX - (HZ - 1))
+ num = INT_MAX / HZ;
+ else
+ num = (num + (HZ - 1)) / HZ;
+ } else
+ num = -1;
+ return scnprintf(buffer, PAGE_SIZE, "%li\n", num);
+}
+
+static const struct kernel_param_ops param_ops_nfs_timeout = {
+ .set = param_set_nfs_timeout,
+ .get = param_get_nfs_timeout,
+};
+#define param_check_nfs_timeout(name, p) __param_check(name, p, int);
+
+module_param(nfs_mountpoint_expiry_timeout, nfs_timeout, 0644);
+MODULE_PARM_DESC(nfs_mountpoint_expiry_timeout,
+ "Set the NFS automounted mountpoint timeout value (seconds)."
+ "Values <= 0 turn expiration off.");
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 887f913..f6676af 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -209,9 +209,9 @@
* unsigned int useconds;
* };
*/
-static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
+static __be32 *xdr_encode_time(__be32 *p, const struct timespec64 *timep)
{
- *p++ = cpu_to_be32(timep->tv_sec);
+ *p++ = cpu_to_be32((u32)timep->tv_sec);
if (timep->tv_nsec != 0)
*p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC);
else
@@ -227,14 +227,14 @@
* Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5.
*/
static __be32 *xdr_encode_current_server_time(__be32 *p,
- const struct timespec *timep)
+ const struct timespec64 *timep)
{
*p++ = cpu_to_be32(timep->tv_sec);
*p++ = cpu_to_be32(1000000);
return p;
}
-static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
+static __be32 *xdr_decode_time(__be32 *p, struct timespec64 *timep)
{
timep->tv_sec = be32_to_cpup(p++);
timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC;
@@ -339,7 +339,6 @@
static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
struct user_namespace *userns)
{
- struct timespec ts;
__be32 *p;
p = xdr_reserve_space(xdr, NFS_sattr_sz << 2);
@@ -361,21 +360,17 @@
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
- if (attr->ia_valid & ATTR_ATIME_SET) {
- ts = timespec64_to_timespec(attr->ia_atime);
- p = xdr_encode_time(p, &ts);
- } else if (attr->ia_valid & ATTR_ATIME) {
- ts = timespec64_to_timespec(attr->ia_atime);
- p = xdr_encode_current_server_time(p, &ts);
- } else
+ if (attr->ia_valid & ATTR_ATIME_SET)
+ p = xdr_encode_time(p, &attr->ia_atime);
+ else if (attr->ia_valid & ATTR_ATIME)
+ p = xdr_encode_current_server_time(p, &attr->ia_atime);
+ else
p = xdr_time_not_set(p);
- if (attr->ia_valid & ATTR_MTIME_SET) {
- ts = timespec64_to_timespec(attr->ia_mtime);
- xdr_encode_time(p, &ts);
- } else if (attr->ia_valid & ATTR_MTIME) {
- ts = timespec64_to_timespec(attr->ia_mtime);
- xdr_encode_current_server_time(p, &ts);
- } else
+ if (attr->ia_valid & ATTR_MTIME_SET)
+ xdr_encode_time(p, &attr->ia_mtime);
+ else if (attr->ia_valid & ATTR_MTIME)
+ xdr_encode_current_server_time(p, &attr->ia_mtime);
+ else
xdr_time_not_set(p);
}
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index f82e11c..1b950b6 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -27,7 +27,7 @@
#endif /* CONFIG_NFS_V3_ACL */
/* nfs3client.c */
-struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *);
+struct nfs_server *nfs3_create_server(struct fs_context *);
struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *,
struct nfs_fattr *, rpc_authflavor_t);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 26c94b3..c6c8633 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -108,7 +108,7 @@
case -EPROTONOSUPPORT:
dprintk("NFS_V3_ACL extension not supported; disabling\n");
server->caps &= ~NFS_CAP_ACLS;
- /* fall through */
+ fallthrough;
case -ENOTSUPP:
status = -EOPNOTSUPP;
default:
@@ -228,7 +228,7 @@
dprintk("NFS_V3_ACL SETACL RPC not supported"
"(will not retry)\n");
server->caps &= ~NFS_CAP_ACLS;
- /* fall through */
+ fallthrough;
case -ENOTSUPP:
status = -EOPNOTSUPP;
}
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 148ceb7..5601e47 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -46,10 +46,10 @@
}
#endif
-struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs3_create_server(struct fs_context *fc)
{
- struct nfs_server *server = nfs_create_server(mount_info, nfs_mod);
+ struct nfs_server *server = nfs_create_server(fc);
+
/* Create a client RPC handle for the NFS v3 ACL management interface */
if (!IS_ERR(server))
nfs_init_server_aclclient(server);
@@ -106,7 +106,10 @@
cl_init.nconnect = mds_clp->cl_nconnect;
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
- set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+
+ __set_bit(NFS_CS_NOPING, &cl_init.init_flags);
+ __set_bit(NFS_CS_DS, &cl_init.init_flags);
/* Use the MDS nfs_client cl_ipaddr. */
nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 01a03ca..e1491de 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -110,10 +110,15 @@
.rpc_resp = fattr,
};
int status;
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+ task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call_sync(server->client, &msg, 0);
+ status = rpc_call_sync(server->client, &msg, task_flags);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -140,23 +145,23 @@
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0) {
+ nfs_setattr_update_inode(inode, sattr, fattr);
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
nfs_zap_acl_cache(inode);
- nfs_setattr_update_inode(inode, sattr, fattr);
}
dprintk("NFS reply setattr: %d\n", status);
return status;
}
static int
-nfs3_proc_lookup(struct inode *dir, const struct qstr *name,
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
- .name = name->name,
- .len = name->len
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len
};
struct nfs3_diropres res = {
.fh = fhandle,
@@ -168,20 +173,25 @@
.rpc_resp = &res,
};
int status;
+ unsigned short task_flags = 0;
- dprintk("NFS call lookup %s\n", name->name);
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (nfs_lookup_is_soft_revalidate(dentry))
+ task_flags |= RPC_TASK_TIMEOUT;
+
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
return -ENOMEM;
+ dprintk("NFS call lookup %pd2\n", dentry);
nfs_fattr_init(fattr);
- status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
nfs_refresh_inode(dir, res.dir_attr);
if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_argp = fhandle;
msg.rpc_resp = fattr;
- status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
}
nfs_free_fattr(res.dir_attr);
dprintk("NFS reply lookup: %d\n", status);
@@ -990,7 +1000,7 @@
.nlmclnt_ops = &nlmclnt_fl_close_lock_ops,
.getroot = nfs3_proc_get_root,
.submount = nfs_submount,
- .try_mount = nfs_try_mount,
+ .try_get_tree = nfs_try_get_tree,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 23d75cd..dff6b52 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -457,14 +457,14 @@
* uint32 nseconds;
* };
*/
-static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep)
+static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec64 *timep)
{
- *p++ = cpu_to_be32(timep->tv_sec);
+ *p++ = cpu_to_be32((u32)timep->tv_sec);
*p++ = cpu_to_be32(timep->tv_nsec);
return p;
}
-static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
+static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec64 *timep)
{
timep->tv_sec = be32_to_cpup(p++);
timep->tv_nsec = be32_to_cpup(p++);
@@ -534,7 +534,6 @@
static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
struct user_namespace *userns)
{
- struct timespec ts;
u32 nbytes;
__be32 *p;
@@ -584,10 +583,8 @@
*p++ = xdr_zero;
if (attr->ia_valid & ATTR_ATIME_SET) {
- struct timespec ts;
*p++ = xdr_two;
- ts = timespec64_to_timespec(attr->ia_atime);
- p = xdr_encode_nfstime3(p, &ts);
+ p = xdr_encode_nfstime3(p, &attr->ia_atime);
} else if (attr->ia_valid & ATTR_ATIME) {
*p++ = xdr_one;
} else
@@ -595,8 +592,7 @@
if (attr->ia_valid & ATTR_MTIME_SET) {
*p++ = xdr_two;
- ts = timespec64_to_timespec(attr->ia_mtime);
- xdr_encode_nfstime3(p, &ts);
+ xdr_encode_nfstime3(p, &attr->ia_mtime);
} else if (attr->ia_valid & ATTR_MTIME) {
*p = xdr_one;
} else
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 901cca7..0fe5aac 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -6,6 +6,8 @@
#ifndef __LINUX_FS_NFS_NFS4_2_H
#define __LINUX_FS_NFS_NFS4_2_H
+#include <linux/xattr.h>
+
/*
* FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support
* more? Need to consider not to pre-alloc too much for a compound.
@@ -13,8 +15,10 @@
#define PNFS_LAYOUTSTATS_MAXDEV (4)
/* nfs4.2proc.c */
+#ifdef CONFIG_NFS_V4_2
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
-ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t);
+ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t,
+ struct nl4_server *, nfs4_stateid *, bool);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
@@ -23,5 +27,38 @@
int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
const struct nfs42_layout_error *errors,
size_t n);
+int nfs42_proc_copy_notify(struct file *, struct file *,
+ struct nfs42_copy_notify_res *);
+static inline bool nfs42_files_from_same_server(struct file *in,
+ struct file *out)
+{
+ struct nfs_client *c_in = (NFS_SERVER(file_inode(in)))->nfs_client;
+ struct nfs_client *c_out = (NFS_SERVER(file_inode(out)))->nfs_client;
+ return nfs4_check_serverowner_major_id(c_in->cl_serverowner,
+ c_out->cl_serverowner);
+}
+
+ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
+ void *buf, size_t buflen);
+int nfs42_proc_setxattr(struct inode *inode, const char *name,
+ const void *buf, size_t buflen, int flags);
+ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf,
+ size_t buflen, u64 *cookiep, bool *eofp);
+int nfs42_proc_removexattr(struct inode *inode, const char *name);
+
+/*
+ * Maximum XDR buffer size needed for a listxattr buffer of buflen size.
+ *
+ * The upper boundary is a buffer with all 1-byte sized attribute names.
+ * They would be 7 bytes long in the eventual buffer ("user.x\0"), and
+ * 8 bytes long XDR-encoded.
+ *
+ * Include the trailing eof word as well.
+ */
+static inline u32 nfs42_listxattr_xdrsize(u32 buflen)
+{
+ return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4;
+}
+#endif /* CONFIG_NFS_V4_2 */
#endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6b7c926..2587b1b 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -3,6 +3,7 @@
* Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
*/
#include <linux/fs.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs.h>
#include <linux/nfs3.h>
@@ -15,20 +16,42 @@
#include "pnfs.h"
#include "nfs4session.h"
#include "internal.h"
+#include "delegation.h"
+#include "nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
+static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
+{
+ struct nfs_client *clp = (NFS_SERVER(file_inode(filep)))->nfs_client;
+ unsigned short port = 2049;
+
+ rcu_read_lock();
+ naddr->netid_len = scnprintf(naddr->netid,
+ sizeof(naddr->netid), "%s",
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_NETID));
+ naddr->addr_len = scnprintf(naddr->addr,
+ sizeof(naddr->addr),
+ "%s.%u.%u",
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR),
+ port >> 8, port & 255);
+ rcu_read_unlock();
+}
+
static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
struct nfs_lock_context *lock, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
+ u32 bitmask[3];
struct nfs42_falloc_args args = {
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
.falloc_length = len,
- .falloc_bitmask = server->cache_consistency_bitmask,
+ .falloc_bitmask = bitmask,
};
struct nfs42_falloc_res res = {
.falloc_server = server,
@@ -40,8 +63,15 @@
status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
lock, FMODE_WRITE);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
+
+ memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
+ if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
+ bitmask[1] |= FATTR4_WORD1_SPACE_USED;
res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr)
@@ -50,7 +80,8 @@
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
if (status == 0)
- status = nfs_post_op_update_inode(inode, res.falloc_fattr);
+ status = nfs_post_op_update_inode_force_wcc(inode,
+ res.falloc_fattr);
kfree(res.falloc_fattr);
return status;
@@ -134,22 +165,26 @@
}
static int handle_async_copy(struct nfs42_copy_res *res,
- struct nfs_server *server,
+ struct nfs_server *dst_server,
+ struct nfs_server *src_server,
struct file *src,
struct file *dst,
- nfs4_stateid *src_stateid)
+ nfs4_stateid *src_stateid,
+ bool *restart)
{
struct nfs4_copy_state *copy, *tmp_copy;
int status = NFS4_OK;
bool found_pending = false;
- struct nfs_open_context *ctx = nfs_file_open_context(dst);
+ struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
+ struct nfs_open_context *src_ctx = nfs_file_open_context(src);
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
if (!copy)
return -ENOMEM;
- spin_lock(&server->nfs_client->cl_lock);
- list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids,
+ spin_lock(&dst_server->nfs_client->cl_lock);
+ list_for_each_entry(tmp_copy,
+ &dst_server->nfs_client->pending_cb_stateids,
copies) {
if (memcmp(&res->write_res.stateid, &tmp_copy->stateid,
NFS4_STATEID_SIZE))
@@ -159,7 +194,7 @@
break;
}
if (found_pending) {
- spin_unlock(&server->nfs_client->cl_lock);
+ spin_unlock(&dst_server->nfs_client->cl_lock);
kfree(copy);
copy = tmp_copy;
goto out;
@@ -167,19 +202,32 @@
memcpy(©->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
init_completion(©->completion);
- copy->parent_state = ctx->state;
+ copy->parent_dst_state = dst_ctx->state;
+ copy->parent_src_state = src_ctx->state;
- list_add_tail(©->copies, &server->ss_copies);
- spin_unlock(&server->nfs_client->cl_lock);
+ list_add_tail(©->copies, &dst_server->ss_copies);
+ spin_unlock(&dst_server->nfs_client->cl_lock);
+
+ if (dst_server != src_server) {
+ spin_lock(&src_server->nfs_client->cl_lock);
+ list_add_tail(©->src_copies, &src_server->ss_copies);
+ spin_unlock(&src_server->nfs_client->cl_lock);
+ }
status = wait_for_completion_interruptible(©->completion);
- spin_lock(&server->nfs_client->cl_lock);
+ spin_lock(&dst_server->nfs_client->cl_lock);
list_del_init(©->copies);
- spin_unlock(&server->nfs_client->cl_lock);
+ spin_unlock(&dst_server->nfs_client->cl_lock);
+ if (dst_server != src_server) {
+ spin_lock(&src_server->nfs_client->cl_lock);
+ list_del_init(©->src_copies);
+ spin_unlock(&src_server->nfs_client->cl_lock);
+ }
if (status == -ERESTARTSYS) {
goto out_cancel;
- } else if (copy->flags) {
+ } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
status = -EAGAIN;
+ *restart = true;
goto out_cancel;
}
out:
@@ -187,12 +235,14 @@
memcpy(&res->write_res.verifier, ©->verf, sizeof(copy->verf));
status = -copy->error;
+out_free:
kfree(copy);
return status;
out_cancel:
nfs42_do_offload_cancel_async(dst, ©->stateid);
- kfree(copy);
- return status;
+ if (!nfs42_files_from_same_server(src, dst))
+ nfs42_do_offload_cancel_async(src, src_stateid);
+ goto out_free;
}
static int process_copy_commit(struct file *dst, loff_t pos_dst,
@@ -224,7 +274,10 @@
struct file *dst,
struct nfs_lock_context *dst_lock,
struct nfs42_copy_args *args,
- struct nfs42_copy_res *res)
+ struct nfs42_copy_res *res,
+ struct nl4_server *nss,
+ nfs4_stateid *cnr_stateid,
+ bool *restart)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
@@ -232,17 +285,26 @@
.rpc_resp = res,
};
struct inode *dst_inode = file_inode(dst);
- struct nfs_server *server = NFS_SERVER(dst_inode);
+ struct inode *src_inode = file_inode(src);
+ struct nfs_server *dst_server = NFS_SERVER(dst_inode);
+ struct nfs_server *src_server = NFS_SERVER(src_inode);
loff_t pos_src = args->src_pos;
loff_t pos_dst = args->dst_pos;
size_t count = args->count;
ssize_t status;
- status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
- src_lock, FMODE_READ);
- if (status)
- return status;
-
+ if (nss) {
+ args->cp_src = nss;
+ nfs4_stateid_copy(&args->src_stateid, cnr_stateid);
+ } else {
+ status = nfs4_set_rw_stateid(&args->src_stateid,
+ src_lock->open_context, src_lock, FMODE_READ);
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
+ return status;
+ }
+ }
status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
pos_src, pos_src + (loff_t)count - 1);
if (status)
@@ -250,8 +312,11 @@
status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
dst_lock, FMODE_WRITE);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
status = nfs_sync_inode(dst_inode);
if (status)
@@ -264,13 +329,15 @@
if (!res->commit_res.verf)
return -ENOMEM;
}
+ set_bit(NFS_CLNT_SRC_SSC_COPY_STATE,
+ &src_lock->open_context->state->flags);
set_bit(NFS_CLNT_DST_SSC_COPY_STATE,
&dst_lock->open_context->state->flags);
- status = nfs4_call_sync(server->client, server, &msg,
+ status = nfs4_call_sync(dst_server->client, dst_server, &msg,
&args->seq_args, &res->seq_res, 0);
if (status == -ENOTSUPP)
- server->caps &= ~NFS_CAP_COPY;
+ dst_server->caps &= ~NFS_CAP_COPY;
if (status)
goto out;
@@ -282,8 +349,8 @@
}
if (!res->synchronous) {
- status = handle_async_copy(res, server, src, dst,
- &args->src_stateid);
+ status = handle_async_copy(res, dst_server, src_server, src,
+ dst, &args->src_stateid, restart);
if (status)
goto out;
}
@@ -295,9 +362,18 @@
goto out;
}
- truncate_pagecache_range(dst_inode, pos_dst,
- pos_dst + res->write_res.count);
-
+ WARN_ON_ONCE(invalidate_inode_pages2_range(dst_inode->i_mapping,
+ pos_dst >> PAGE_SHIFT,
+ (pos_dst + res->write_res.count - 1) >> PAGE_SHIFT));
+ spin_lock(&dst_inode->i_lock);
+ NFS_I(dst_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
+ NFS_INO_REVAL_FORCED | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA);
+ spin_unlock(&dst_inode->i_lock);
+ spin_lock(&src_inode->i_lock);
+ NFS_I(src_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
+ NFS_INO_REVAL_FORCED | NFS_INO_INVALID_ATIME);
+ spin_unlock(&src_inode->i_lock);
status = res->write_res.count;
out:
if (args->sync)
@@ -306,8 +382,9 @@
}
ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
- struct file *dst, loff_t pos_dst,
- size_t count)
+ struct file *dst, loff_t pos_dst, size_t count,
+ struct nl4_server *nss,
+ nfs4_stateid *cnr_stateid, bool sync)
{
struct nfs_server *server = NFS_SERVER(file_inode(dst));
struct nfs_lock_context *src_lock;
@@ -318,7 +395,7 @@
.dst_fh = NFS_FH(file_inode(dst)),
.dst_pos = pos_dst,
.count = count,
- .sync = false,
+ .sync = sync,
};
struct nfs42_copy_res res;
struct nfs4_exception src_exception = {
@@ -330,6 +407,7 @@
.stateid = &args.dst_stateid,
};
ssize_t err, err2;
+ bool restart = false;
src_lock = nfs_get_lock_context(nfs_file_open_context(src));
if (IS_ERR(src_lock))
@@ -349,21 +427,33 @@
inode_lock(file_inode(dst));
err = _nfs42_proc_copy(src, src_lock,
dst, dst_lock,
- &args, &res);
+ &args, &res,
+ nss, cnr_stateid, &restart);
inode_unlock(file_inode(dst));
if (err >= 0)
break;
- if (err == -ENOTSUPP) {
+ if (err == -ENOTSUPP &&
+ nfs42_files_from_same_server(src, dst)) {
err = -EOPNOTSUPP;
break;
} else if (err == -EAGAIN) {
- dst_exception.retry = 1;
- continue;
+ if (!restart) {
+ dst_exception.retry = 1;
+ continue;
+ }
+ break;
} else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
args.sync = true;
dst_exception.retry = 1;
continue;
+ } else if ((err == -ESTALE ||
+ err == -NFS4ERR_OFFLOAD_DENIED ||
+ err == -ENOTSUPP) &&
+ !nfs42_files_from_same_server(src, dst)) {
+ nfs42_do_offload_cancel_async(src, &args.src_stateid);
+ err = -EOPNOTSUPP;
+ break;
}
err2 = nfs4_handle_exception(server, err, &src_exception);
@@ -461,6 +551,79 @@
return status;
}
+static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
+ struct nfs42_copy_notify_args *args,
+ struct nfs42_copy_notify_res *res)
+{
+ struct nfs_server *src_server = NFS_SERVER(file_inode(src));
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY_NOTIFY],
+ .rpc_argp = args,
+ .rpc_resp = res,
+ };
+ int status;
+ struct nfs_open_context *ctx;
+ struct nfs_lock_context *l_ctx;
+
+ ctx = get_nfs_open_context(nfs_file_open_context(src));
+ l_ctx = nfs_get_lock_context(ctx);
+ if (IS_ERR(l_ctx))
+ return PTR_ERR(l_ctx);
+
+ status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
+ FMODE_READ);
+ nfs_put_lock_context(l_ctx);
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
+ return status;
+ }
+
+ status = nfs4_call_sync(src_server->client, src_server, &msg,
+ &args->cna_seq_args, &res->cnr_seq_res, 0);
+ if (status == -ENOTSUPP)
+ src_server->caps &= ~NFS_CAP_COPY_NOTIFY;
+
+ put_nfs_open_context(nfs_file_open_context(src));
+ return status;
+}
+
+int nfs42_proc_copy_notify(struct file *src, struct file *dst,
+ struct nfs42_copy_notify_res *res)
+{
+ struct nfs_server *src_server = NFS_SERVER(file_inode(src));
+ struct nfs42_copy_notify_args *args;
+ struct nfs4_exception exception = {
+ .inode = file_inode(src),
+ };
+ int status;
+
+ if (!(src_server->caps & NFS_CAP_COPY_NOTIFY))
+ return -EOPNOTSUPP;
+
+ args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_NOFS);
+ if (args == NULL)
+ return -ENOMEM;
+
+ args->cna_src_fh = NFS_FH(file_inode(src)),
+ args->cna_dst.nl4_type = NL4_NETADDR;
+ nfs42_set_netaddr(dst, &args->cna_dst.u.nl4_addr);
+ exception.stateid = &args->cna_src_stateid;
+
+ do {
+ status = _nfs42_proc_copy_notify(src, dst, args, res);
+ if (status == -ENOTSUPP) {
+ status = -EOPNOTSUPP;
+ goto out;
+ }
+ status = nfs4_handle_exception(src_server, status, &exception);
+ } while (exception.retry);
+
+out:
+ kfree(args);
+ return status;
+}
+
static loff_t _nfs42_proc_llseek(struct file *filep,
struct nfs_lock_context *lock, loff_t offset, int whence)
{
@@ -485,8 +648,11 @@
status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
lock, FMODE_READ);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
status = nfs_filemap_write_and_wait_range(inode->i_mapping,
offset, LLONG_MAX);
@@ -569,7 +735,7 @@
switch (task->tk_status) {
case 0:
- break;
+ return;
case -NFS4ERR_BADHANDLE:
case -ESTALE:
pnfs_destroy_layout(NFS_I(inode));
@@ -615,6 +781,8 @@
case -EOPNOTSUPP:
NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
}
+
+ trace_nfs4_layoutstats(inode, &data->args.stateid, task->tk_status);
}
static void
@@ -737,7 +905,7 @@
switch (task->tk_status) {
case 0:
- break;
+ return;
case -NFS4ERR_BADHANDLE:
case -ESTALE:
pnfs_destroy_layout(NFS_I(inode));
@@ -781,6 +949,9 @@
case -EOPNOTSUPP:
NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTERROR;
}
+
+ trace_nfs4_layouterror(inode, &data->args.errors[0].stateid,
+ task->tk_status);
}
static void
@@ -864,13 +1035,18 @@
status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
src_lock, FMODE_READ);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
-
+ }
status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
dst_lock, FMODE_WRITE);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
res.dst_fattr = nfs_alloc_fattr();
if (!res.dst_fattr)
@@ -938,3 +1114,256 @@
nfs_put_lock_context(src_lock);
return err;
}
+
+#define NFS4XATTR_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
+
+static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs42_removexattrargs args = {
+ .fh = NFS_FH(inode),
+ .xattr_name = name,
+ };
+ struct nfs42_removexattrres res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ int ret;
+ unsigned long timestamp = jiffies;
+
+ ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
+ &res.seq_res, 1);
+ if (!ret)
+ nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+
+ return ret;
+}
+
+static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
+ const void *buf, size_t buflen, int flags)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct page *pages[NFS4XATTR_MAXPAGES];
+ struct nfs42_setxattrargs arg = {
+ .fh = NFS_FH(inode),
+ .xattr_pages = pages,
+ .xattr_len = buflen,
+ .xattr_name = name,
+ .xattr_flags = flags,
+ };
+ struct nfs42_setxattrres res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int ret, np;
+ unsigned long timestamp = jiffies;
+
+ if (buflen > server->sxasize)
+ return -ERANGE;
+
+ if (buflen > 0) {
+ np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages);
+ if (np < 0)
+ return np;
+ } else
+ np = 0;
+
+ ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
+ &res.seq_res, 1);
+
+ for (; np > 0; np--)
+ put_page(pages[np - 1]);
+
+ if (!ret)
+ nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+
+ return ret;
+}
+
+static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
+ void *buf, size_t buflen)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct page *pages[NFS4XATTR_MAXPAGES] = {};
+ struct nfs42_getxattrargs arg = {
+ .fh = NFS_FH(inode),
+ .xattr_pages = pages,
+ .xattr_len = buflen,
+ .xattr_name = name,
+ };
+ struct nfs42_getxattrres res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETXATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int ret, np;
+
+ ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
+ &res.seq_res, 0);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Normally, the caching is done one layer up, but for successful
+ * RPCS, always cache the result here, even if the caller was
+ * just querying the length, or if the reply was too big for
+ * the caller. This avoids a second RPC in the case of the
+ * common query-alloc-retrieve cycle for xattrs.
+ *
+ * Note that xattr_len is always capped to XATTR_SIZE_MAX.
+ */
+
+ nfs4_xattr_cache_add(inode, name, NULL, pages, res.xattr_len);
+
+ if (buflen) {
+ if (res.xattr_len > buflen)
+ return -ERANGE;
+ _copy_from_pages(buf, pages, 0, res.xattr_len);
+ }
+
+ np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE);
+ while (--np >= 0)
+ __free_page(pages[np]);
+
+ return res.xattr_len;
+}
+
+static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
+ size_t buflen, u64 *cookiep, bool *eofp)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct page **pages;
+ struct nfs42_listxattrsargs arg = {
+ .fh = NFS_FH(inode),
+ .cookie = *cookiep,
+ };
+ struct nfs42_listxattrsres res = {
+ .eof = false,
+ .xattr_buf = buf,
+ .xattr_len = buflen,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LISTXATTRS],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ u32 xdrlen;
+ int ret, np, i;
+
+
+ ret = -ENOMEM;
+ res.scratch = alloc_page(GFP_KERNEL);
+ if (!res.scratch)
+ goto out;
+
+ xdrlen = nfs42_listxattr_xdrsize(buflen);
+ if (xdrlen > server->lxasize)
+ xdrlen = server->lxasize;
+ np = xdrlen / PAGE_SIZE + 1;
+
+ pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ goto out_free_scratch;
+ for (i = 0; i < np; i++) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i])
+ goto out_free_pages;
+ }
+
+ arg.xattr_pages = pages;
+ arg.count = xdrlen;
+
+ ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
+ &res.seq_res, 0);
+
+ if (ret >= 0) {
+ ret = res.copied;
+ *cookiep = res.cookie;
+ *eofp = res.eof;
+ }
+
+out_free_pages:
+ while (--np >= 0) {
+ if (pages[np])
+ __free_page(pages[np]);
+ }
+ kfree(pages);
+out_free_scratch:
+ __free_page(res.scratch);
+out:
+ return ret;
+
+}
+
+ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
+ void *buf, size_t buflen)
+{
+ struct nfs4_exception exception = { };
+ ssize_t err;
+
+ do {
+ err = _nfs42_proc_getxattr(inode, name, buf, buflen);
+ if (err >= 0)
+ break;
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
+int nfs42_proc_setxattr(struct inode *inode, const char *name,
+ const void *buf, size_t buflen, int flags)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = _nfs42_proc_setxattr(inode, name, buf, buflen, flags);
+ if (!err)
+ break;
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
+ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf,
+ size_t buflen, u64 *cookiep, bool *eofp)
+{
+ struct nfs4_exception exception = { };
+ ssize_t err;
+
+ do {
+ err = _nfs42_proc_listxattrs(inode, buf, buflen,
+ cookiep, eofp);
+ if (err >= 0)
+ break;
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
+int nfs42_proc_removexattr(struct inode *inode, const char *name)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = _nfs42_proc_removexattr(inode, name);
+ if (!err)
+ break;
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+
+ return err;
+}
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
new file mode 100644
index 0000000..6c2ce79
--- /dev/null
+++ b/fs/nfs/nfs42xattr.c
@@ -0,0 +1,1057 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ *
+ * User extended attribute client side cache functions.
+ *
+ * Author: Frank van der Linden <fllinden@amazon.com>
+ */
+#include <linux/errno.h>
+#include <linux/nfs_fs.h>
+#include <linux/hashtable.h>
+#include <linux/refcount.h>
+#include <uapi/linux/xattr.h>
+
+#include "nfs4_fs.h"
+#include "internal.h"
+
+/*
+ * User extended attributes client side caching is implemented by having
+ * a cache structure attached to NFS inodes. This structure is allocated
+ * when needed, and freed when the cache is zapped.
+ *
+ * The cache structure contains as hash table of entries, and a pointer
+ * to a special-cased entry for the listxattr cache.
+ *
+ * Accessing and allocating / freeing the caches is done via reference
+ * counting. The cache entries use a similar refcounting scheme.
+ *
+ * This makes freeing a cache, both from the shrinker and from the
+ * zap cache path, easy. It also means that, in current use cases,
+ * the large majority of inodes will not waste any memory, as they
+ * will never have any user extended attributes assigned to them.
+ *
+ * Attribute entries are hashed in to a simple hash table. They are
+ * also part of an LRU.
+ *
+ * There are three shrinkers.
+ *
+ * Two shrinkers deal with the cache entries themselves: one for
+ * large entries (> PAGE_SIZE), and one for smaller entries. The
+ * shrinker for the larger entries works more aggressively than
+ * those for the smaller entries.
+ *
+ * The other shrinker frees the cache structures themselves.
+ */
+
+/*
+ * 64 buckets is a good default. There is likely no reasonable
+ * workload that uses more than even 64 user extended attributes.
+ * You can certainly add a lot more - but you get what you ask for
+ * in those circumstances.
+ */
+#define NFS4_XATTR_HASH_SIZE 64
+
+#define NFSDBG_FACILITY NFSDBG_XATTRCACHE
+
+struct nfs4_xattr_cache;
+struct nfs4_xattr_entry;
+
+struct nfs4_xattr_bucket {
+ spinlock_t lock;
+ struct hlist_head hlist;
+ struct nfs4_xattr_cache *cache;
+ bool draining;
+};
+
+struct nfs4_xattr_cache {
+ struct kref ref;
+ struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE];
+ struct list_head lru;
+ struct list_head dispose;
+ atomic_long_t nent;
+ spinlock_t listxattr_lock;
+ struct inode *inode;
+ struct nfs4_xattr_entry *listxattr;
+};
+
+struct nfs4_xattr_entry {
+ struct kref ref;
+ struct hlist_node hnode;
+ struct list_head lru;
+ struct list_head dispose;
+ char *xattr_name;
+ void *xattr_value;
+ size_t xattr_size;
+ struct nfs4_xattr_bucket *bucket;
+ uint32_t flags;
+};
+
+#define NFS4_XATTR_ENTRY_EXTVAL 0x0001
+
+/*
+ * LRU list of NFS inodes that have xattr caches.
+ */
+static struct list_lru nfs4_xattr_cache_lru;
+static struct list_lru nfs4_xattr_entry_lru;
+static struct list_lru nfs4_xattr_large_entry_lru;
+
+static struct kmem_cache *nfs4_xattr_cache_cachep;
+
+/*
+ * Hashing helper functions.
+ */
+static void
+nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache)
+{
+ unsigned int i;
+
+ for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
+ INIT_HLIST_HEAD(&cache->buckets[i].hlist);
+ spin_lock_init(&cache->buckets[i].lock);
+ cache->buckets[i].cache = cache;
+ cache->buckets[i].draining = false;
+ }
+}
+
+/*
+ * Locking order:
+ * 1. inode i_lock or bucket lock
+ * 2. list_lru lock (taken by list_lru_* functions)
+ */
+
+/*
+ * Wrapper functions to add a cache entry to the right LRU.
+ */
+static bool
+nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry)
+{
+ struct list_lru *lru;
+
+ lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
+ &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
+
+ return list_lru_add(lru, &entry->lru);
+}
+
+static bool
+nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
+{
+ struct list_lru *lru;
+
+ lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
+ &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
+
+ return list_lru_del(lru, &entry->lru);
+}
+
+/*
+ * This function allocates cache entries. They are the normal
+ * extended attribute name/value pairs, but may also be a listxattr
+ * cache. Those allocations use the same entry so that they can be
+ * treated as one by the memory shrinker.
+ *
+ * xattr cache entries are allocated together with names. If the
+ * value fits in to one page with the entry structure and the name,
+ * it will also be part of the same allocation (kmalloc). This is
+ * expected to be the vast majority of cases. Larger allocations
+ * have a value pointer that is allocated separately by kvmalloc.
+ *
+ * Parameters:
+ *
+ * @name: Name of the extended attribute. NULL for listxattr cache
+ * entry.
+ * @value: Value of attribute, or listxattr cache. NULL if the
+ * value is to be copied from pages instead.
+ * @pages: Pages to copy the value from, if not NULL. Passed in to
+ * make it easier to copy the value after an RPC, even if
+ * the value will not be passed up to application (e.g.
+ * for a 'query' getxattr with NULL buffer).
+ * @len: Length of the value. Can be 0 for zero-length attribues.
+ * @value and @pages will be NULL if @len is 0.
+ */
+static struct nfs4_xattr_entry *
+nfs4_xattr_alloc_entry(const char *name, const void *value,
+ struct page **pages, size_t len)
+{
+ struct nfs4_xattr_entry *entry;
+ void *valp;
+ char *namep;
+ size_t alloclen, slen;
+ char *buf;
+ uint32_t flags;
+
+ BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) +
+ XATTR_NAME_MAX + 1 > PAGE_SIZE);
+
+ alloclen = sizeof(struct nfs4_xattr_entry);
+ if (name != NULL) {
+ slen = strlen(name) + 1;
+ alloclen += slen;
+ } else
+ slen = 0;
+
+ if (alloclen + len <= PAGE_SIZE) {
+ alloclen += len;
+ flags = 0;
+ } else {
+ flags = NFS4_XATTR_ENTRY_EXTVAL;
+ }
+
+ buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ if (buf == NULL)
+ return NULL;
+ entry = (struct nfs4_xattr_entry *)buf;
+
+ if (name != NULL) {
+ namep = buf + sizeof(struct nfs4_xattr_entry);
+ memcpy(namep, name, slen);
+ } else {
+ namep = NULL;
+ }
+
+
+ if (flags & NFS4_XATTR_ENTRY_EXTVAL) {
+ valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ if (valp == NULL) {
+ kfree(buf);
+ return NULL;
+ }
+ } else if (len != 0) {
+ valp = buf + sizeof(struct nfs4_xattr_entry) + slen;
+ } else
+ valp = NULL;
+
+ if (valp != NULL) {
+ if (value != NULL)
+ memcpy(valp, value, len);
+ else
+ _copy_from_pages(valp, pages, 0, len);
+ }
+
+ entry->flags = flags;
+ entry->xattr_value = valp;
+ kref_init(&entry->ref);
+ entry->xattr_name = namep;
+ entry->xattr_size = len;
+ entry->bucket = NULL;
+ INIT_LIST_HEAD(&entry->lru);
+ INIT_LIST_HEAD(&entry->dispose);
+ INIT_HLIST_NODE(&entry->hnode);
+
+ return entry;
+}
+
+static void
+nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry)
+{
+ if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL)
+ kvfree(entry->xattr_value);
+ kfree(entry);
+}
+
+static void
+nfs4_xattr_free_entry_cb(struct kref *kref)
+{
+ struct nfs4_xattr_entry *entry;
+
+ entry = container_of(kref, struct nfs4_xattr_entry, ref);
+
+ if (WARN_ON(!list_empty(&entry->lru)))
+ return;
+
+ nfs4_xattr_free_entry(entry);
+}
+
+static void
+nfs4_xattr_free_cache_cb(struct kref *kref)
+{
+ struct nfs4_xattr_cache *cache;
+ int i;
+
+ cache = container_of(kref, struct nfs4_xattr_cache, ref);
+
+ for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
+ if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist)))
+ return;
+ cache->buckets[i].draining = false;
+ }
+
+ cache->listxattr = NULL;
+
+ kmem_cache_free(nfs4_xattr_cache_cachep, cache);
+
+}
+
+static struct nfs4_xattr_cache *
+nfs4_xattr_alloc_cache(void)
+{
+ struct nfs4_xattr_cache *cache;
+
+ cache = kmem_cache_alloc(nfs4_xattr_cache_cachep,
+ GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ if (cache == NULL)
+ return NULL;
+
+ kref_init(&cache->ref);
+ atomic_long_set(&cache->nent, 0);
+
+ return cache;
+}
+
+/*
+ * Set the listxattr cache, which is a special-cased cache entry.
+ * The special value ERR_PTR(-ESTALE) is used to indicate that
+ * the cache is being drained - this prevents a new listxattr
+ * cache from being added to what is now a stale cache.
+ */
+static int
+nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache,
+ struct nfs4_xattr_entry *new)
+{
+ struct nfs4_xattr_entry *old;
+ int ret = 1;
+
+ spin_lock(&cache->listxattr_lock);
+
+ old = cache->listxattr;
+
+ if (old == ERR_PTR(-ESTALE)) {
+ ret = 0;
+ goto out;
+ }
+
+ cache->listxattr = new;
+ if (new != NULL && new != ERR_PTR(-ESTALE))
+ nfs4_xattr_entry_lru_add(new);
+
+ if (old != NULL) {
+ nfs4_xattr_entry_lru_del(old);
+ kref_put(&old->ref, nfs4_xattr_free_entry_cb);
+ }
+out:
+ spin_unlock(&cache->listxattr_lock);
+
+ return ret;
+}
+
+/*
+ * Unlink a cache from its parent inode, clearing out an invalid
+ * cache. Must be called with i_lock held.
+ */
+static struct nfs4_xattr_cache *
+nfs4_xattr_cache_unlink(struct inode *inode)
+{
+ struct nfs_inode *nfsi;
+ struct nfs4_xattr_cache *oldcache;
+
+ nfsi = NFS_I(inode);
+
+ oldcache = nfsi->xattr_cache;
+ if (oldcache != NULL) {
+ list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru);
+ oldcache->inode = NULL;
+ }
+ nfsi->xattr_cache = NULL;
+ nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR;
+
+ return oldcache;
+
+}
+
+/*
+ * Discard a cache. Called by get_cache() if there was an old,
+ * invalid cache. Can also be called from a shrinker callback.
+ *
+ * The cache is dead, it has already been unlinked from its inode,
+ * and no longer appears on the cache LRU list.
+ *
+ * Mark all buckets as draining, so that no new entries are added. This
+ * could still happen in the unlikely, but possible case that another
+ * thread had grabbed a reference before it was unlinked from the inode,
+ * and is still holding it for an add operation.
+ *
+ * Remove all entries from the LRU lists, so that there is no longer
+ * any way to 'find' this cache. Then, remove the entries from the hash
+ * table.
+ *
+ * At that point, the cache will remain empty and can be freed when the final
+ * reference drops, which is very likely the kref_put at the end of
+ * this function, or the one called immediately afterwards in the
+ * shrinker callback.
+ */
+static void
+nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache)
+{
+ unsigned int i;
+ struct nfs4_xattr_entry *entry;
+ struct nfs4_xattr_bucket *bucket;
+ struct hlist_node *n;
+
+ nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE));
+
+ for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
+ bucket = &cache->buckets[i];
+
+ spin_lock(&bucket->lock);
+ bucket->draining = true;
+ hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) {
+ nfs4_xattr_entry_lru_del(entry);
+ hlist_del_init(&entry->hnode);
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+ }
+ spin_unlock(&bucket->lock);
+ }
+
+ atomic_long_set(&cache->nent, 0);
+
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+}
+
+/*
+ * Get a referenced copy of the cache structure. Avoid doing allocs
+ * while holding i_lock. Which means that we do some optimistic allocation,
+ * and might have to free the result in rare cases.
+ *
+ * This function only checks the NFS_INO_INVALID_XATTR cache validity bit
+ * and acts accordingly, replacing the cache when needed. For the read case
+ * (!add), this means that the caller must make sure that the cache
+ * is valid before caling this function. getxattr and listxattr call
+ * revalidate_inode to do this. The attribute cache timeout (for the
+ * non-delegated case) is expected to be dealt with in the revalidate
+ * call.
+ */
+
+static struct nfs4_xattr_cache *
+nfs4_xattr_get_cache(struct inode *inode, int add)
+{
+ struct nfs_inode *nfsi;
+ struct nfs4_xattr_cache *cache, *oldcache, *newcache;
+
+ nfsi = NFS_I(inode);
+
+ cache = oldcache = NULL;
+
+ spin_lock(&inode->i_lock);
+
+ if (nfsi->cache_validity & NFS_INO_INVALID_XATTR)
+ oldcache = nfs4_xattr_cache_unlink(inode);
+ else
+ cache = nfsi->xattr_cache;
+
+ if (cache != NULL)
+ kref_get(&cache->ref);
+
+ spin_unlock(&inode->i_lock);
+
+ if (add && cache == NULL) {
+ newcache = NULL;
+
+ cache = nfs4_xattr_alloc_cache();
+ if (cache == NULL)
+ goto out;
+
+ spin_lock(&inode->i_lock);
+ if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) {
+ /*
+ * The cache was invalidated again. Give up,
+ * since what we want to enter is now likely
+ * outdated anyway.
+ */
+ spin_unlock(&inode->i_lock);
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+ cache = NULL;
+ goto out;
+ }
+
+ /*
+ * Check if someone beat us to it.
+ */
+ if (nfsi->xattr_cache != NULL) {
+ newcache = nfsi->xattr_cache;
+ kref_get(&newcache->ref);
+ } else {
+ kref_get(&cache->ref);
+ nfsi->xattr_cache = cache;
+ cache->inode = inode;
+ list_lru_add(&nfs4_xattr_cache_lru, &cache->lru);
+ }
+
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * If there was a race, throw away the cache we just
+ * allocated, and use the new one allocated by someone
+ * else.
+ */
+ if (newcache != NULL) {
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+ cache = newcache;
+ }
+ }
+
+out:
+ /*
+ * Discard the now orphaned old cache.
+ */
+ if (oldcache != NULL)
+ nfs4_xattr_discard_cache(oldcache);
+
+ return cache;
+}
+
+static inline struct nfs4_xattr_bucket *
+nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name)
+{
+ return &cache->buckets[jhash(name, strlen(name), 0) &
+ (ARRAY_SIZE(cache->buckets) - 1)];
+}
+
+static struct nfs4_xattr_entry *
+nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name)
+{
+ struct nfs4_xattr_entry *entry;
+
+ entry = NULL;
+
+ hlist_for_each_entry(entry, &bucket->hlist, hnode) {
+ if (!strcmp(entry->xattr_name, name))
+ break;
+ }
+
+ return entry;
+}
+
+static int
+nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache,
+ struct nfs4_xattr_entry *entry)
+{
+ struct nfs4_xattr_bucket *bucket;
+ struct nfs4_xattr_entry *oldentry = NULL;
+ int ret = 1;
+
+ bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name);
+ entry->bucket = bucket;
+
+ spin_lock(&bucket->lock);
+
+ if (bucket->draining) {
+ ret = 0;
+ goto out;
+ }
+
+ oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name);
+ if (oldentry != NULL) {
+ hlist_del_init(&oldentry->hnode);
+ nfs4_xattr_entry_lru_del(oldentry);
+ } else {
+ atomic_long_inc(&cache->nent);
+ }
+
+ hlist_add_head(&entry->hnode, &bucket->hlist);
+ nfs4_xattr_entry_lru_add(entry);
+
+out:
+ spin_unlock(&bucket->lock);
+
+ if (oldentry != NULL)
+ kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb);
+
+ return ret;
+}
+
+static void
+nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name)
+{
+ struct nfs4_xattr_bucket *bucket;
+ struct nfs4_xattr_entry *entry;
+
+ bucket = nfs4_xattr_hash_bucket(cache, name);
+
+ spin_lock(&bucket->lock);
+
+ entry = nfs4_xattr_get_entry(bucket, name);
+ if (entry != NULL) {
+ hlist_del_init(&entry->hnode);
+ nfs4_xattr_entry_lru_del(entry);
+ atomic_long_dec(&cache->nent);
+ }
+
+ spin_unlock(&bucket->lock);
+
+ if (entry != NULL)
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+}
+
+static struct nfs4_xattr_entry *
+nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name)
+{
+ struct nfs4_xattr_bucket *bucket;
+ struct nfs4_xattr_entry *entry;
+
+ bucket = nfs4_xattr_hash_bucket(cache, name);
+
+ spin_lock(&bucket->lock);
+
+ entry = nfs4_xattr_get_entry(bucket, name);
+ if (entry != NULL)
+ kref_get(&entry->ref);
+
+ spin_unlock(&bucket->lock);
+
+ return entry;
+}
+
+/*
+ * Entry point to retrieve an entry from the cache.
+ */
+ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf,
+ ssize_t buflen)
+{
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry;
+ ssize_t ret;
+
+ cache = nfs4_xattr_get_cache(inode, 0);
+ if (cache == NULL)
+ return -ENOENT;
+
+ ret = 0;
+ entry = nfs4_xattr_hash_find(cache, name);
+
+ if (entry != NULL) {
+ dprintk("%s: cache hit '%s', len %lu\n", __func__,
+ entry->xattr_name, (unsigned long)entry->xattr_size);
+ if (buflen == 0) {
+ /* Length probe only */
+ ret = entry->xattr_size;
+ } else if (buflen < entry->xattr_size)
+ ret = -ERANGE;
+ else {
+ memcpy(buf, entry->xattr_value, entry->xattr_size);
+ ret = entry->xattr_size;
+ }
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+ } else {
+ dprintk("%s: cache miss '%s'\n", __func__, name);
+ ret = -ENOENT;
+ }
+
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+
+ return ret;
+}
+
+/*
+ * Retrieve a cached list of xattrs from the cache.
+ */
+ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen)
+{
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry;
+ ssize_t ret;
+
+ cache = nfs4_xattr_get_cache(inode, 0);
+ if (cache == NULL)
+ return -ENOENT;
+
+ spin_lock(&cache->listxattr_lock);
+
+ entry = cache->listxattr;
+
+ if (entry != NULL && entry != ERR_PTR(-ESTALE)) {
+ if (buflen == 0) {
+ /* Length probe only */
+ ret = entry->xattr_size;
+ } else if (entry->xattr_size > buflen)
+ ret = -ERANGE;
+ else {
+ memcpy(buf, entry->xattr_value, entry->xattr_size);
+ ret = entry->xattr_size;
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ spin_unlock(&cache->listxattr_lock);
+
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+
+ return ret;
+}
+
+/*
+ * Add an xattr to the cache.
+ *
+ * This also invalidates the xattr list cache.
+ */
+void nfs4_xattr_cache_add(struct inode *inode, const char *name,
+ const char *buf, struct page **pages, ssize_t buflen)
+{
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry;
+
+ dprintk("%s: add '%s' len %lu\n", __func__,
+ name, (unsigned long)buflen);
+
+ cache = nfs4_xattr_get_cache(inode, 1);
+ if (cache == NULL)
+ return;
+
+ entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen);
+ if (entry == NULL)
+ goto out;
+
+ (void)nfs4_xattr_set_listcache(cache, NULL);
+
+ if (!nfs4_xattr_hash_add(cache, entry))
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+
+out:
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+}
+
+
+/*
+ * Remove an xattr from the cache.
+ *
+ * This also invalidates the xattr list cache.
+ */
+void nfs4_xattr_cache_remove(struct inode *inode, const char *name)
+{
+ struct nfs4_xattr_cache *cache;
+
+ dprintk("%s: remove '%s'\n", __func__, name);
+
+ cache = nfs4_xattr_get_cache(inode, 0);
+ if (cache == NULL)
+ return;
+
+ (void)nfs4_xattr_set_listcache(cache, NULL);
+ nfs4_xattr_hash_remove(cache, name);
+
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+}
+
+/*
+ * Cache listxattr output, replacing any possible old one.
+ */
+void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf,
+ ssize_t buflen)
+{
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry;
+
+ cache = nfs4_xattr_get_cache(inode, 1);
+ if (cache == NULL)
+ return;
+
+ entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen);
+ if (entry == NULL)
+ goto out;
+
+ /*
+ * This is just there to be able to get to bucket->cache,
+ * which is obviously the same for all buckets, so just
+ * use bucket 0.
+ */
+ entry->bucket = &cache->buckets[0];
+
+ if (!nfs4_xattr_set_listcache(cache, entry))
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+
+out:
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+}
+
+/*
+ * Zap the entire cache. Called when an inode is evicted.
+ */
+void nfs4_xattr_cache_zap(struct inode *inode)
+{
+ struct nfs4_xattr_cache *oldcache;
+
+ spin_lock(&inode->i_lock);
+ oldcache = nfs4_xattr_cache_unlink(inode);
+ spin_unlock(&inode->i_lock);
+
+ if (oldcache)
+ nfs4_xattr_discard_cache(oldcache);
+}
+
+/*
+ * The entry LRU is shrunk more aggressively than the cache LRU,
+ * by settings @seeks to 1.
+ *
+ * Cache structures are freed only when they've become empty, after
+ * pruning all but one entry.
+ */
+
+static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+
+static struct shrinker nfs4_xattr_cache_shrinker = {
+ .count_objects = nfs4_xattr_cache_count,
+ .scan_objects = nfs4_xattr_cache_scan,
+ .seeks = DEFAULT_SEEKS,
+ .flags = SHRINKER_MEMCG_AWARE,
+};
+
+static struct shrinker nfs4_xattr_entry_shrinker = {
+ .count_objects = nfs4_xattr_entry_count,
+ .scan_objects = nfs4_xattr_entry_scan,
+ .seeks = DEFAULT_SEEKS,
+ .batch = 512,
+ .flags = SHRINKER_MEMCG_AWARE,
+};
+
+static struct shrinker nfs4_xattr_large_entry_shrinker = {
+ .count_objects = nfs4_xattr_entry_count,
+ .scan_objects = nfs4_xattr_entry_scan,
+ .seeks = 1,
+ .batch = 512,
+ .flags = SHRINKER_MEMCG_AWARE,
+};
+
+static enum lru_status
+cache_lru_isolate(struct list_head *item,
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+{
+ struct list_head *dispose = arg;
+ struct inode *inode;
+ struct nfs4_xattr_cache *cache = container_of(item,
+ struct nfs4_xattr_cache, lru);
+
+ if (atomic_long_read(&cache->nent) > 1)
+ return LRU_SKIP;
+
+ /*
+ * If a cache structure is on the LRU list, we know that
+ * its inode is valid. Try to lock it to break the link.
+ * Since we're inverting the lock order here, only try.
+ */
+ inode = cache->inode;
+
+ if (!spin_trylock(&inode->i_lock))
+ return LRU_SKIP;
+
+ kref_get(&cache->ref);
+
+ cache->inode = NULL;
+ NFS_I(inode)->xattr_cache = NULL;
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR;
+ list_lru_isolate(lru, &cache->lru);
+
+ spin_unlock(&inode->i_lock);
+
+ list_add_tail(&cache->dispose, dispose);
+ return LRU_REMOVED;
+}
+
+static unsigned long
+nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ LIST_HEAD(dispose);
+ unsigned long freed;
+ struct nfs4_xattr_cache *cache;
+
+ freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc,
+ cache_lru_isolate, &dispose);
+ while (!list_empty(&dispose)) {
+ cache = list_first_entry(&dispose, struct nfs4_xattr_cache,
+ dispose);
+ list_del_init(&cache->dispose);
+ nfs4_xattr_discard_cache(cache);
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+ }
+
+ return freed;
+}
+
+
+static unsigned long
+nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long count;
+
+ count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc);
+ return vfs_pressure_ratio(count);
+}
+
+static enum lru_status
+entry_lru_isolate(struct list_head *item,
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+{
+ struct list_head *dispose = arg;
+ struct nfs4_xattr_bucket *bucket;
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry = container_of(item,
+ struct nfs4_xattr_entry, lru);
+
+ bucket = entry->bucket;
+ cache = bucket->cache;
+
+ /*
+ * Unhook the entry from its parent (either a cache bucket
+ * or a cache structure if it's a listxattr buf), so that
+ * it's no longer found. Then add it to the isolate list,
+ * to be freed later.
+ *
+ * In both cases, we're reverting lock order, so use
+ * trylock and skip the entry if we can't get the lock.
+ */
+ if (entry->xattr_name != NULL) {
+ /* Regular cache entry */
+ if (!spin_trylock(&bucket->lock))
+ return LRU_SKIP;
+
+ kref_get(&entry->ref);
+
+ hlist_del_init(&entry->hnode);
+ atomic_long_dec(&cache->nent);
+ list_lru_isolate(lru, &entry->lru);
+
+ spin_unlock(&bucket->lock);
+ } else {
+ /* Listxattr cache entry */
+ if (!spin_trylock(&cache->listxattr_lock))
+ return LRU_SKIP;
+
+ kref_get(&entry->ref);
+
+ cache->listxattr = NULL;
+ list_lru_isolate(lru, &entry->lru);
+
+ spin_unlock(&cache->listxattr_lock);
+ }
+
+ list_add_tail(&entry->dispose, dispose);
+ return LRU_REMOVED;
+}
+
+static unsigned long
+nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ LIST_HEAD(dispose);
+ unsigned long freed;
+ struct nfs4_xattr_entry *entry;
+ struct list_lru *lru;
+
+ lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
+ &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
+
+ freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose);
+
+ while (!list_empty(&dispose)) {
+ entry = list_first_entry(&dispose, struct nfs4_xattr_entry,
+ dispose);
+ list_del_init(&entry->dispose);
+
+ /*
+ * Drop two references: the one that we just grabbed
+ * in entry_lru_isolate, and the one that was set
+ * when the entry was first allocated.
+ */
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+ }
+
+ return freed;
+}
+
+static unsigned long
+nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long count;
+ struct list_lru *lru;
+
+ lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
+ &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
+
+ count = list_lru_shrink_count(lru, sc);
+ return vfs_pressure_ratio(count);
+}
+
+
+static void nfs4_xattr_cache_init_once(void *p)
+{
+ struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p;
+
+ spin_lock_init(&cache->listxattr_lock);
+ atomic_long_set(&cache->nent, 0);
+ nfs4_xattr_hash_init(cache);
+ cache->listxattr = NULL;
+ INIT_LIST_HEAD(&cache->lru);
+ INIT_LIST_HEAD(&cache->dispose);
+}
+
+int __init nfs4_xattr_cache_init(void)
+{
+ int ret = 0;
+
+ nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
+ sizeof(struct nfs4_xattr_cache), 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ nfs4_xattr_cache_init_once);
+ if (nfs4_xattr_cache_cachep == NULL)
+ return -ENOMEM;
+
+ ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru,
+ &nfs4_xattr_large_entry_shrinker);
+ if (ret)
+ goto out4;
+
+ ret = list_lru_init_memcg(&nfs4_xattr_entry_lru,
+ &nfs4_xattr_entry_shrinker);
+ if (ret)
+ goto out3;
+
+ ret = list_lru_init_memcg(&nfs4_xattr_cache_lru,
+ &nfs4_xattr_cache_shrinker);
+ if (ret)
+ goto out2;
+
+ ret = register_shrinker(&nfs4_xattr_cache_shrinker);
+ if (ret)
+ goto out1;
+
+ ret = register_shrinker(&nfs4_xattr_entry_shrinker);
+ if (ret)
+ goto out;
+
+ ret = register_shrinker(&nfs4_xattr_large_entry_shrinker);
+ if (!ret)
+ return 0;
+
+ unregister_shrinker(&nfs4_xattr_entry_shrinker);
+out:
+ unregister_shrinker(&nfs4_xattr_cache_shrinker);
+out1:
+ list_lru_destroy(&nfs4_xattr_cache_lru);
+out2:
+ list_lru_destroy(&nfs4_xattr_entry_lru);
+out3:
+ list_lru_destroy(&nfs4_xattr_large_entry_lru);
+out4:
+ kmem_cache_destroy(nfs4_xattr_cache_cachep);
+
+ return ret;
+}
+
+void nfs4_xattr_cache_exit(void)
+{
+ unregister_shrinker(&nfs4_xattr_large_entry_shrinker);
+ unregister_shrinker(&nfs4_xattr_entry_shrinker);
+ unregister_shrinker(&nfs4_xattr_cache_shrinker);
+ list_lru_destroy(&nfs4_xattr_large_entry_lru);
+ list_lru_destroy(&nfs4_xattr_entry_lru);
+ list_lru_destroy(&nfs4_xattr_cache_lru);
+ kmem_cache_destroy(nfs4_xattr_cache_cachep);
+}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index aed865a..f2248d9 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -21,7 +21,10 @@
#define encode_copy_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
- 2 + 2 + 2 + 1 + 1 + 1)
+ 2 + 2 + 2 + 1 + 1 + 1 +\
+ 1 + /* One cnr_source_server */\
+ 1 + /* nl4_type */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
#define decode_copy_maxsz (op_decode_hdr_maxsz + \
NFS42_WRITE_RES_SIZE + \
1 /* cr_consecutive */ + \
@@ -29,9 +32,28 @@
#define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_offload_cancel_maxsz (op_decode_hdr_maxsz)
+#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ 1 + /* nl4_type */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
+#define decode_copy_notify_maxsz (op_decode_hdr_maxsz + \
+ 3 + /* cnr_lease_time */\
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ 1 + /* Support 1 cnr_source_server */\
+ 1 + /* nl4_type */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
+#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \
+ encode_stateid_maxsz + 3)
+#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \
+ 2 /* data_info4.di_offset */ + \
+ 2 /* data_info4.di_length */)
+#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
+ 1 /* rpr_eof */ + \
+ 1 /* rpr_contents count */ + \
+ 2 * NFS42_READ_PLUS_SEGMENT_SIZE)
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
@@ -99,6 +121,12 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
+#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_copy_notify_maxsz)
+#define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_copy_notify_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -109,6 +137,14 @@
decode_putfh_maxsz + \
decode_deallocate_maxsz + \
decode_getattr_maxsz)
+#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_read_plus_maxsz)
+#define NFS4_dec_read_plus_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_read_plus_maxsz)
#define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -150,6 +186,78 @@
decode_clone_maxsz + \
decode_getattr_maxsz)
+/* Not limited by NFS itself, limited by the generic xattr code */
+#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX)
+
+#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \
+ nfs4_xattr_name_maxsz)
+#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1)
+#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \
+ 1 + nfs4_xattr_name_maxsz + 1)
+#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
+#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1)
+#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1)
+#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \
+ nfs4_xattr_name_maxsz)
+#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \
+ decode_change_info_maxsz)
+
+#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getxattr_maxsz)
+#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getxattr_maxsz)
+#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_setxattr_maxsz)
+#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_setxattr_maxsz)
+#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_listxattrs_maxsz)
+#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_listxattrs_maxsz)
+#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_removexattr_maxsz)
+#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_removexattr_maxsz)
+
+/*
+ * These values specify the maximum amount of data that is not
+ * associated with the extended attribute name or extended
+ * attribute list in the SETXATTR, GETXATTR and LISTXATTR
+ * respectively.
+ */
+const u32 nfs42_maxsetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ compound_encode_hdr_maxsz +
+ encode_sequence_maxsz +
+ encode_putfh_maxsz + 1 +
+ nfs4_xattr_name_maxsz)
+ * XDR_UNIT);
+
+const u32 nfs42_maxgetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ compound_decode_hdr_maxsz +
+ decode_sequence_maxsz +
+ decode_putfh_maxsz + 1) * XDR_UNIT);
+
+const u32 nfs42_maxlistxattrs_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ compound_decode_hdr_maxsz +
+ decode_sequence_maxsz +
+ decode_putfh_maxsz + 3) * XDR_UNIT);
+
static void encode_fallocate(struct xdr_stream *xdr,
const struct nfs42_falloc_args *args)
{
@@ -166,6 +274,26 @@
encode_fallocate(xdr, args);
}
+static void encode_nl4_server(struct xdr_stream *xdr,
+ const struct nl4_server *ns)
+{
+ encode_uint32(xdr, ns->nl4_type);
+ switch (ns->nl4_type) {
+ case NL4_NAME:
+ case NL4_URL:
+ encode_string(xdr, ns->u.nl4_str_sz, ns->u.nl4_str);
+ break;
+ case NL4_NETADDR:
+ encode_string(xdr, ns->u.nl4_addr.netid_len,
+ ns->u.nl4_addr.netid);
+ encode_string(xdr, ns->u.nl4_addr.addr_len,
+ ns->u.nl4_addr.addr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
static void encode_copy(struct xdr_stream *xdr,
const struct nfs42_copy_args *args,
struct compound_hdr *hdr)
@@ -180,7 +308,12 @@
encode_uint32(xdr, 1); /* consecutive = true */
encode_uint32(xdr, args->sync);
- encode_uint32(xdr, 0); /* src server list */
+ if (args->cp_src == NULL) { /* intra-ssc */
+ encode_uint32(xdr, 0); /* no src server list */
+ return;
+ }
+ encode_uint32(xdr, 1); /* supporting 1 server */
+ encode_nl4_server(xdr, args->cp_src);
}
static void encode_offload_cancel(struct xdr_stream *xdr,
@@ -191,6 +324,15 @@
encode_nfs4_stateid(xdr, &args->osa_stateid);
}
+static void encode_copy_notify(struct xdr_stream *xdr,
+ const struct nfs42_copy_notify_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_COPY_NOTIFY, decode_copy_notify_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->cna_src_stateid);
+ encode_nl4_server(xdr, &args->cna_dst);
+}
+
static void encode_deallocate(struct xdr_stream *xdr,
const struct nfs42_falloc_args *args,
struct compound_hdr *hdr)
@@ -199,6 +341,16 @@
encode_fallocate(xdr, args);
}
+static void encode_read_plus(struct xdr_stream *xdr,
+ const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->stateid);
+ encode_uint64(xdr, args->offset);
+ encode_uint32(xdr, args->count);
+}
+
static void encode_seek(struct xdr_stream *xdr,
const struct nfs42_seek_args *args,
struct compound_hdr *hdr)
@@ -280,6 +432,210 @@
encode_device_error(xdr, &args->errors[0]);
}
+static void encode_setxattr(struct xdr_stream *xdr,
+ const struct nfs42_setxattrargs *arg,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ BUILD_BUG_ON(XATTR_CREATE != SETXATTR4_CREATE);
+ BUILD_BUG_ON(XATTR_REPLACE != SETXATTR4_REPLACE);
+
+ encode_op_hdr(xdr, OP_SETXATTR, decode_setxattr_maxsz, hdr);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(arg->xattr_flags);
+ encode_string(xdr, strlen(arg->xattr_name), arg->xattr_name);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(arg->xattr_len);
+ if (arg->xattr_len)
+ xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len);
+}
+
+static int decode_setxattr(struct xdr_stream *xdr,
+ struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_SETXATTR);
+ if (status)
+ goto out;
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+
+static void encode_getxattr(struct xdr_stream *xdr, const char *name,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_GETXATTR, decode_getxattr_maxsz, hdr);
+ encode_string(xdr, strlen(name), name);
+}
+
+static int decode_getxattr(struct xdr_stream *xdr,
+ struct nfs42_getxattrres *res,
+ struct rpc_rqst *req)
+{
+ int status;
+ __be32 *p;
+ u32 len, rdlen;
+
+ status = decode_op_hdr(xdr, OP_GETXATTR);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ len = be32_to_cpup(p);
+ if (len > req->rq_rcv_buf.page_len)
+ return -ERANGE;
+
+ res->xattr_len = len;
+
+ if (len > 0) {
+ rdlen = xdr_read_pages(xdr, len);
+ if (rdlen < len)
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static void encode_removexattr(struct xdr_stream *xdr, const char *name,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_REMOVEXATTR, decode_removexattr_maxsz, hdr);
+ encode_string(xdr, strlen(name), name);
+}
+
+
+static int decode_removexattr(struct xdr_stream *xdr,
+ struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_REMOVEXATTR);
+ if (status)
+ goto out;
+
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+static void encode_listxattrs(struct xdr_stream *xdr,
+ const struct nfs42_listxattrsargs *arg,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz, hdr);
+
+ p = reserve_space(xdr, 12);
+ if (unlikely(!p))
+ return;
+
+ p = xdr_encode_hyper(p, arg->cookie);
+ /*
+ * RFC 8276 says to specify the full max length of the LISTXATTRS
+ * XDR reply. Count is set to the XDR length of the names array
+ * plus the EOF marker. So, add the cookie and the names count.
+ */
+ *p = cpu_to_be32(arg->count + 8 + 4);
+}
+
+static int decode_listxattrs(struct xdr_stream *xdr,
+ struct nfs42_listxattrsres *res)
+{
+ int status;
+ __be32 *p;
+ u32 count, len, ulen;
+ size_t left, copied;
+ char *buf;
+
+ status = decode_op_hdr(xdr, OP_LISTXATTRS);
+ if (status) {
+ /*
+ * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL
+ * should be translated to ERANGE.
+ */
+ if (status == -ETOOSMALL)
+ status = -ERANGE;
+ goto out;
+ }
+
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ return -EIO;
+
+ xdr_decode_hyper(p, &res->cookie);
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ left = res->xattr_len;
+ buf = res->xattr_buf;
+
+ count = be32_to_cpup(p);
+ copied = 0;
+
+ /*
+ * We have asked for enough room to encode the maximum number
+ * of possible attribute names, so everything should fit.
+ *
+ * But, don't rely on that assumption. Just decode entries
+ * until they don't fit anymore, just in case the server did
+ * something odd.
+ */
+ while (count--) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ len = be32_to_cpup(p);
+ if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) {
+ status = -ERANGE;
+ goto out;
+ }
+
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ return -EIO;
+
+ ulen = len + XATTR_USER_PREFIX_LEN + 1;
+ if (buf) {
+ if (ulen > left) {
+ status = -ERANGE;
+ goto out;
+ }
+
+ memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+ memcpy(buf + XATTR_USER_PREFIX_LEN, p, len);
+
+ buf[ulen - 1] = 0;
+ buf += ulen;
+ left -= ulen;
+ }
+ copied += ulen;
+ }
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ res->eof = be32_to_cpup(p);
+ res->copied = copied;
+
+out:
+ if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX)
+ status = -E2BIG;
+
+ return status;
+}
+
/*
* Encode ALLOCATE request
*/
@@ -355,6 +711,25 @@
}
/*
+ * Encode COPY_NOTIFY request
+ */
+static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_copy_notify_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->cna_seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->cna_seq_args, &hdr);
+ encode_putfh(xdr, args->cna_src_fh, &hdr);
+ encode_copy_notify(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode DEALLOCATE request
*/
static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
@@ -375,6 +750,28 @@
}
/*
+ * Encode READ_PLUS request
+ */
+static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs_pgio_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_read_plus(xdr, args, &hdr);
+
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->count, hdr.replen);
+ encode_nops(&hdr);
+}
+
+/*
* Encode SEEK request
*/
static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
@@ -490,6 +887,58 @@
return decode_verifier(xdr, &res->verifier.verifier);
}
+static int decode_nl4_server(struct xdr_stream *xdr, struct nl4_server *ns)
+{
+ struct nfs42_netaddr *naddr;
+ uint32_t dummy;
+ char *dummy_str;
+ __be32 *p;
+ int status;
+
+ /* nl_type */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ ns->nl4_type = be32_to_cpup(p);
+ switch (ns->nl4_type) {
+ case NL4_NAME:
+ case NL4_URL:
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
+ if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+ return -EIO;
+ memcpy(&ns->u.nl4_str, dummy_str, dummy);
+ ns->u.nl4_str_sz = dummy;
+ break;
+ case NL4_NETADDR:
+ naddr = &ns->u.nl4_addr;
+
+ /* netid string */
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
+ if (unlikely(dummy > RPCBIND_MAXNETIDLEN))
+ return -EIO;
+ naddr->netid_len = dummy;
+ memcpy(naddr->netid, dummy_str, naddr->netid_len);
+
+ /* uaddr string */
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
+ if (unlikely(dummy > RPCBIND_MAXUADDRLEN))
+ return -EIO;
+ naddr->addr_len = dummy;
+ memcpy(naddr->addr, dummy_str, naddr->addr_len);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EIO;
+ }
+ return 0;
+}
+
static int decode_copy_requirements(struct xdr_stream *xdr,
struct nfs42_copy_res *res) {
__be32 *p;
@@ -529,11 +978,136 @@
return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
}
+static int decode_copy_notify(struct xdr_stream *xdr,
+ struct nfs42_copy_notify_res *res)
+{
+ __be32 *p;
+ int status, count;
+
+ status = decode_op_hdr(xdr, OP_COPY_NOTIFY);
+ if (status)
+ return status;
+ /* cnr_lease_time */
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ return -EIO;
+ p = xdr_decode_hyper(p, &res->cnr_lease_time.seconds);
+ res->cnr_lease_time.nseconds = be32_to_cpup(p);
+
+ status = decode_opaque_fixed(xdr, &res->cnr_stateid, NFS4_STATEID_SIZE);
+ if (unlikely(status))
+ return -EIO;
+
+ /* number of source addresses */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ count = be32_to_cpup(p);
+ if (count > 1)
+ pr_warn("NFS: %s: nsvr %d > Supported. Use first servers\n",
+ __func__, count);
+
+ status = decode_nl4_server(xdr, &res->cnr_src);
+ if (unlikely(status))
+ return -EIO;
+ return 0;
+}
+
static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_DEALLOCATE);
}
+static int decode_read_plus_data(struct xdr_stream *xdr,
+ struct nfs_pgio_res *res)
+{
+ uint32_t count, recvd;
+ uint64_t offset;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 8 + 4);
+ if (!p)
+ return 1;
+
+ p = xdr_decode_hyper(p, &offset);
+ count = be32_to_cpup(p);
+ recvd = xdr_align_data(xdr, res->count, count);
+ res->count += recvd;
+
+ if (count > recvd)
+ return 1;
+ return 0;
+}
+
+static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res,
+ uint32_t *eof)
+{
+ uint64_t offset, length, recvd;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 8 + 8);
+ if (!p)
+ return 1;
+
+ p = xdr_decode_hyper(p, &offset);
+ p = xdr_decode_hyper(p, &length);
+ recvd = xdr_expand_hole(xdr, res->count, length);
+ res->count += recvd;
+
+ if (recvd < length)
+ return 1;
+ return 0;
+}
+
+static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
+{
+ uint32_t eof, segments, type;
+ int status, i;
+ __be32 *p;
+
+ status = decode_op_hdr(xdr, OP_READ_PLUS);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ eof = be32_to_cpup(p++);
+ segments = be32_to_cpup(p++);
+ if (segments == 0)
+ goto out;
+
+ for (i = 0; i < segments; i++) {
+ p = xdr_inline_decode(xdr, 4);
+ if (!p)
+ goto early_out;
+
+ type = be32_to_cpup(p++);
+ if (type == NFS4_CONTENT_DATA)
+ status = decode_read_plus_data(xdr, res);
+ else if (type == NFS4_CONTENT_HOLE)
+ status = decode_read_plus_hole(xdr, res, &eof);
+ else
+ return -EINVAL;
+
+ if (status < 0)
+ return status;
+ if (status > 0)
+ goto early_out;
+ }
+
+out:
+ res->eof = eof;
+ return 0;
+early_out:
+ if (unlikely(!i))
+ return -EIO;
+ res->eof = 0;
+ return 0;
+}
+
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
{
int status;
@@ -657,6 +1231,32 @@
}
/*
+ * Decode COPY_NOTIFY response
+ */
+static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_copy_notify_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->cnr_seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_copy_notify(xdr, res);
+
+out:
+ return status;
+}
+
+/*
* Decode DEALLOCATE request
*/
static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
@@ -685,6 +1285,33 @@
}
/*
+ * Decode READ_PLUS request
+ */
+static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs_pgio_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_read_plus(xdr, res);
+ if (!status)
+ status = res->count;
+out:
+ return status;
+}
+
+/*
* Decode SEEK request
*/
static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
@@ -769,8 +1396,7 @@
status = decode_clone(xdr);
if (status)
goto out;
- status = decode_getfattr(xdr, res->dst_fattr, res->server);
-
+ decode_getfattr(xdr, res->dst_fattr, res->server);
out:
res->rpc_status = status;
return status;
@@ -802,4 +1428,165 @@
return status;
}
+#ifdef CONFIG_NFS_V4_2
+static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_setxattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_setxattr(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_setxattrres *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, req);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+
+ status = decode_setxattr(xdr, &res->cinfo);
+out:
+ return status;
+}
+
+static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_getxattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+ size_t plen;
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getxattr(xdr, args->xattr_name, &hdr);
+
+ plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX;
+
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen,
+ hdr.replen);
+ req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+
+ encode_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr, void *data)
+{
+ struct nfs42_getxattrres *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_getxattr(xdr, res, rqstp);
+out:
+ return status;
+}
+
+static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfs42_listxattrsargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_listxattrs(xdr, args, &hdr);
+
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count,
+ hdr.replen);
+
+ encode_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr, void *data)
+{
+ struct nfs42_listxattrsres *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE);
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_listxattrs(xdr, res);
+out:
+ return status;
+}
+
+static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfs42_removexattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_removexattr(xdr, args->xattr_name, &hdr);
+ encode_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, void *data)
+{
+ struct nfs42_removexattrres *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, req);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+
+ status = decode_removexattr(xdr, &res->cinfo);
+out:
+ return status;
+}
+#endif
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5708b5a..6d91656 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -42,7 +42,10 @@
NFS4CLNT_LEASE_MOVED,
NFS4CLNT_DELEGATION_EXPIRED,
NFS4CLNT_RUN_MANAGER,
- NFS4CLNT_DELEGRETURN_RUNNING,
+ NFS4CLNT_RECALL_RUNNING,
+ NFS4CLNT_RECALL_ANY_LAYOUT_READ,
+ NFS4CLNT_RECALL_ANY_LAYOUT_RW,
+ NFS4CLNT_DELEGRETURN_DELAYED,
};
#define NFS4_RENEW_TIMEOUT 0x01
@@ -115,7 +118,7 @@
unsigned long so_flags;
struct list_head so_states;
struct nfs_seqid_counter so_seqid;
- seqcount_t so_reclaim_seqcount;
+ seqcount_spinlock_t so_reclaim_seqcount;
struct mutex so_delegreturn_mutex;
};
@@ -166,9 +169,9 @@
NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */
NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */
-#ifdef CONFIG_NFS_V4_2
NFS_CLNT_DST_SSC_COPY_STATE, /* dst server open state on client*/
-#endif /* CONFIG_NFS_V4_2 */
+ NFS_CLNT_SRC_SSC_COPY_STATE, /* src server open state on client*/
+ NFS_SRV_SSC_COPY_STATE, /* ssc state on the dst server */
};
struct nfs4_state {
@@ -269,17 +272,17 @@
int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
unsigned, umode_t);
-/* super.c */
+/* fs_context.c */
extern struct file_system_type nfs4_fs_type;
/* nfs4namespace.c */
struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
const struct qstr *);
-struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
- struct nfs_fh *, struct nfs_fattr *);
+int nfs4_submount(struct fs_context *, struct nfs_server *);
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
-
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+ size_t salen, struct net *net, int port);
/* nfs4proc.c */
extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
extern int nfs4_async_handle_error(struct rpc_task *task,
@@ -304,17 +307,33 @@
extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
struct page *page, const struct cred *);
extern int nfs4_proc_fsid_present(struct inode *, const struct cred *);
-extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, const struct qstr *,
- struct nfs_fh *, struct nfs_fattr *);
+extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *,
+ struct dentry *,
+ struct nfs_fh *,
+ struct nfs_fattr *);
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
extern const struct xattr_handler *nfs4_xattr_handlers[];
extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode);
+extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr, struct nfs4_label *label,
+ struct inode *inode);
+extern int update_open_stateid(struct nfs4_state *state,
+ const nfs4_stateid *open_stateid,
+ const nfs4_stateid *deleg_stateid,
+ fmode_t fmode);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
+extern void nfs4_update_changeattr(struct inode *dir,
+ struct nfs4_change_info *cinfo,
+ unsigned long timestamp,
+ unsigned long cache_validity);
+extern int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen,
+ struct page **pages);
+
#if defined(CONFIG_NFS_V4_1)
extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *);
@@ -444,6 +463,8 @@
/* nfs4state.c */
+extern const nfs4_stateid current_stateid;
+
const struct cred *nfs4_get_clid_cred(struct nfs_client *clp);
const struct cred *nfs4_get_machine_cred(struct nfs_client *clp);
const struct cred *nfs4_get_renew_cred(struct nfs_client *clp);
@@ -456,6 +477,8 @@
struct nfs_client **, const struct cred *);
extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
extern void nfs41_notify_server(struct nfs_client *);
+bool nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
+ struct nfs41_server_owner *o2);
#else
static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
@@ -514,7 +537,6 @@
/* nfs4super.c */
struct nfs_mount_info;
extern struct nfs_subversion nfs_v4;
-struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *);
extern bool nfs4_disable_idmapping;
extern unsigned short max_session_slots;
extern unsigned short max_session_cb_slots;
@@ -524,6 +546,9 @@
#define NFS4_CLIENT_ID_UNIQ_LEN (64)
extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
+extern int nfs4_try_get_tree(struct fs_context *);
+extern int nfs4_get_referral_tree(struct fs_context *);
+
/* nfs4sysctl.c */
#ifdef CONFIG_SYSCTL
int nfs4_register_sysctl(void);
@@ -542,6 +567,12 @@
/* nfs4xdr.c */
extern const struct rpc_procinfo nfs4_procedures[];
+#ifdef CONFIG_NFS_V4_2
+extern const u32 nfs42_maxsetxattr_overhead;
+extern const u32 nfs42_maxgetxattr_overhead;
+extern const u32 nfs42_maxlistxattrs_overhead;
+#endif
+
struct nfs4_mount_data;
/* callback_xdr.c */
@@ -579,6 +610,12 @@
return seq2 == seq1 + 1U || (seq2 == 1U && seq1 == 0xffffffffU);
}
+static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src)
+{
+ return nfs4_stateid_match_other(dst, src) &&
+ !(src->seqid && nfs4_stateid_is_newer(dst, src));
+}
+
static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1)
{
u32 seqid = be32_to_cpu(s1->seqid);
@@ -600,12 +637,34 @@
nfs4_stateid_match_other(&state->open_stateid, stateid);
}
+/* nfs42xattr.c */
+#ifdef CONFIG_NFS_V4_2
+extern int __init nfs4_xattr_cache_init(void);
+extern void nfs4_xattr_cache_exit(void);
+extern void nfs4_xattr_cache_add(struct inode *inode, const char *name,
+ const char *buf, struct page **pages,
+ ssize_t buflen);
+extern void nfs4_xattr_cache_remove(struct inode *inode, const char *name);
+extern ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name,
+ char *buf, ssize_t buflen);
+extern void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf,
+ ssize_t buflen);
+extern ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf,
+ ssize_t buflen);
+extern void nfs4_xattr_cache_zap(struct inode *inode);
#else
+static inline void nfs4_xattr_cache_zap(struct inode *inode)
+{
+}
+#endif /* CONFIG_NFS_V4_2 */
+
+#else /* CONFIG_NFS_V4 */
#define nfs4_close_state(a, b) do { } while (0)
#define nfs4_close_sync(a, b) do { } while (0)
#define nfs4_state_protect(a, b, c, d) do { } while (0)
#define nfs4_state_protect_write(a, b, c, d) do { } while (0)
+
#endif /* CONFIG_NFS_V4 */
#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8cace83..0e6437b 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -630,7 +630,7 @@
/*
* Returns true if the server major ids match
*/
-static bool
+bool
nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
struct nfs41_server_owner *o2)
{
@@ -880,14 +880,17 @@
};
struct nfs_client *clp;
- if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP)
+ if (minorversion == 0)
+ __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
+ if (proto == XPRT_TRANSPORT_TCP)
cl_init.nconnect = nconnect;
+
if (server->flags & NFS_MOUNT_NORESVPORT)
- set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (server->options & NFS_OPTION_MIGRATION)
- set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
+ __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
- set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
+ __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
server->port = rpc_get_port(addr);
/* Allocate or find a client reference we can use */
@@ -991,6 +994,36 @@
#endif /* CONFIG_NFS_V4_1 */
}
+/*
+ * Limit xattr sizes using the channel attributes.
+ */
+static void nfs4_session_limit_xasize(struct nfs_server *server)
+{
+#ifdef CONFIG_NFS_V4_2
+ struct nfs4_session *sess;
+ u32 server_gxa_sz;
+ u32 server_sxa_sz;
+ u32 server_lxa_sz;
+
+ if (!nfs4_has_session(server->nfs_client))
+ return;
+
+ sess = server->nfs_client->cl_session;
+
+ server_gxa_sz = sess->fc_attrs.max_resp_sz - nfs42_maxgetxattr_overhead;
+ server_sxa_sz = sess->fc_attrs.max_rqst_sz - nfs42_maxsetxattr_overhead;
+ server_lxa_sz = sess->fc_attrs.max_resp_sz -
+ nfs42_maxlistxattrs_overhead;
+
+ if (server->gxasize > server_gxa_sz)
+ server->gxasize = server_gxa_sz;
+ if (server->sxasize > server_sxa_sz)
+ server->sxasize = server_sxa_sz;
+ if (server->lxasize > server_lxa_sz)
+ server->lxasize = server_lxa_sz;
+#endif
+}
+
static int nfs4_server_common_setup(struct nfs_server *server,
struct nfs_fh *mntfh, bool auth_probe)
{
@@ -1014,6 +1047,8 @@
server->caps |= server->nfs_client->cl_mvops->init_caps;
if (server->flags & NFS_MOUNT_NORDIRPLUS)
server->caps &= ~NFS_CAP_READDIRPLUS;
+ if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
+ server->caps &= ~NFS_CAP_READ_PLUS;
/*
* Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
* authentication.
@@ -1038,6 +1073,7 @@
goto out;
nfs4_session_limit_rwsize(server);
+ nfs4_session_limit_xasize(server);
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
@@ -1053,66 +1089,64 @@
/*
* Create a version 4 volume record
*/
-static int nfs4_init_server(struct nfs_server *server,
- struct nfs_parsed_mount_data *data)
+static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct rpc_timeout timeparms;
int error;
- nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
- data->timeo, data->retrans);
+ nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
+ ctx->timeo, ctx->retrans);
/* Initialise the client representation from the mount data */
- server->flags = data->flags;
- server->options = data->options;
- server->auth_info = data->auth_info;
+ server->flags = ctx->flags;
+ server->options = ctx->options;
+ server->auth_info = ctx->auth_info;
/* Use the first specified auth flavor. If this flavor isn't
* allowed by the server, use the SECINFO path to try the
* other specified flavors */
- if (data->auth_info.flavor_len >= 1)
- data->selected_flavor = data->auth_info.flavors[0];
+ if (ctx->auth_info.flavor_len >= 1)
+ ctx->selected_flavor = ctx->auth_info.flavors[0];
else
- data->selected_flavor = RPC_AUTH_UNIX;
+ ctx->selected_flavor = RPC_AUTH_UNIX;
/* Get a client record */
error = nfs4_set_client(server,
- data->nfs_server.hostname,
- (const struct sockaddr *)&data->nfs_server.address,
- data->nfs_server.addrlen,
- data->client_address,
- data->nfs_server.protocol,
- &timeparms,
- data->minorversion,
- data->nfs_server.nconnect,
- data->net);
+ ctx->nfs_server.hostname,
+ &ctx->nfs_server.address,
+ ctx->nfs_server.addrlen,
+ ctx->client_address,
+ ctx->nfs_server.protocol,
+ &timeparms,
+ ctx->minorversion,
+ ctx->nfs_server.nconnect,
+ fc->net_ns);
if (error < 0)
return error;
- if (data->rsize)
- server->rsize = nfs_block_size(data->rsize, NULL);
- if (data->wsize)
- server->wsize = nfs_block_size(data->wsize, NULL);
+ if (ctx->rsize)
+ server->rsize = nfs_block_size(ctx->rsize, NULL);
+ if (ctx->wsize)
+ server->wsize = nfs_block_size(ctx->wsize, NULL);
- server->acregmin = data->acregmin * HZ;
- server->acregmax = data->acregmax * HZ;
- server->acdirmin = data->acdirmin * HZ;
- server->acdirmax = data->acdirmax * HZ;
- server->port = data->nfs_server.port;
+ server->acregmin = ctx->acregmin * HZ;
+ server->acregmax = ctx->acregmax * HZ;
+ server->acdirmin = ctx->acdirmin * HZ;
+ server->acdirmax = ctx->acdirmax * HZ;
+ server->port = ctx->nfs_server.port;
return nfs_init_server_rpcclient(server, &timeparms,
- data->selected_flavor);
+ ctx->selected_flavor);
}
/*
* Create a version 4 volume record
* - keyed on server and FSID
*/
-/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
- struct nfs_fh *mntfh)*/
-struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs4_create_server(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_server *server;
bool auth_probe;
int error;
@@ -1123,14 +1157,14 @@
server->cred = get_cred(current_cred());
- auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
+ auth_probe = ctx->auth_info.flavor_len < 1;
/* set up the general RPC client */
- error = nfs4_init_server(server, mount_info->parsed);
+ error = nfs4_init_server(server, fc);
if (error < 0)
goto error;
- error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe);
+ error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe);
if (error < 0)
goto error;
@@ -1144,9 +1178,9 @@
/*
* Create an NFS4 referral server record
*/
-struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
- struct nfs_fh *mntfh)
+struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
bool auth_probe;
@@ -1156,7 +1190,7 @@
if (!server)
return ERR_PTR(-ENOMEM);
- parent_server = NFS_SB(data->sb);
+ parent_server = NFS_SB(ctx->clone_data.sb);
parent_client = parent_server->nfs_client;
server->cred = get_cred(parent_server->cred);
@@ -1166,10 +1200,11 @@
/* Get a client representation */
#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
- rpc_set_port(data->addr, NFS_RDMA_PORT);
- error = nfs4_set_client(server, data->hostname,
- data->addr,
- data->addrlen,
+ rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
+ error = nfs4_set_client(server,
+ ctx->nfs_server.hostname,
+ &ctx->nfs_server.address,
+ ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_RDMA,
parent_server->client->cl_timeout,
@@ -1180,10 +1215,11 @@
goto init_server;
#endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
- rpc_set_port(data->addr, NFS_PORT);
- error = nfs4_set_client(server, data->hostname,
- data->addr,
- data->addrlen,
+ rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
+ error = nfs4_set_client(server,
+ ctx->nfs_server.hostname,
+ &ctx->nfs_server.address,
+ ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_TCP,
parent_server->client->cl_timeout,
@@ -1196,13 +1232,14 @@
#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
init_server:
#endif
- error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
+ error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout,
+ ctx->selected_flavor);
if (error < 0)
goto error;
auth_probe = parent_server->auth_info.flavor_len < 1;
- error = nfs4_server_common_setup(server, mntfh, auth_probe);
+ error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe);
if (error < 0)
goto error;
@@ -1293,8 +1330,11 @@
}
nfs_put_client(clp);
- if (server->nfs_client->cl_hostname == NULL)
+ if (server->nfs_client->cl_hostname == NULL) {
server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ if (server->nfs_client->cl_hostname == NULL)
+ return -ENOMEM;
+ }
nfs_server_insert_lists(server);
return nfs_probe_destination(server);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 7c73097..a1e5c6b 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -7,7 +7,9 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/falloc.h>
+#include <linux/mount.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_ssc.h>
#include "delegation.h"
#include "internal.h"
#include "iostat.h"
@@ -135,15 +137,56 @@
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
+ struct nfs42_copy_notify_res *cn_resp = NULL;
+ struct nl4_server *nss = NULL;
+ nfs4_stateid *cnrs = NULL;
+ ssize_t ret;
+ bool sync = false;
+
/* Only offload copy if superblock is the same */
- if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ if (file_in->f_op != &nfs4_file_operations)
return -EXDEV;
if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY) ||
!nfs_server_capable(file_inode(file_in), NFS_CAP_COPY))
return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
return -EOPNOTSUPP;
- return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+ /* if the copy size if smaller than 2 RPC payloads, make it
+ * synchronous
+ */
+ if (count <= 2 * NFS_SERVER(file_inode(file_in))->rsize)
+ sync = true;
+retry:
+ if (!nfs42_files_from_same_server(file_in, file_out)) {
+ /* for inter copy, if copy size if smaller than 12 RPC
+ * payloads, fallback to traditional copy. There are
+ * 14 RPCs during an NFSv4.x mount between source/dest
+ * servers.
+ */
+ if (sync ||
+ count <= 14 * NFS_SERVER(file_inode(file_in))->rsize)
+ return -EOPNOTSUPP;
+ cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res),
+ GFP_NOFS);
+ if (unlikely(cn_resp == NULL))
+ return -ENOMEM;
+
+ ret = nfs42_proc_copy_notify(file_in, file_out, cn_resp);
+ if (ret) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ nss = &cn_resp->cnr_src;
+ cnrs = &cn_resp->cnr_stateid;
+ }
+ ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count,
+ nss, cnrs, sync);
+out:
+ if (!nfs42_files_from_same_server(file_in, file_out))
+ kfree(cn_resp);
+ if (ret == -EAGAIN)
+ goto retry;
+ return ret;
}
static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
@@ -170,7 +213,7 @@
ret = nfs42_proc_llseek(filep, offset, whence);
if (ret != -EOPNOTSUPP)
return ret;
- /* Fall through */
+ fallthrough;
default:
return nfs_file_llseek(filep, offset, whence);
}
@@ -269,6 +312,127 @@
out:
return ret < 0 ? ret : count;
}
+
+static int read_name_gen = 1;
+#define SSC_READ_NAME_BODY "ssc_read_%d"
+
+static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh, nfs4_stateid *stateid)
+{
+ struct nfs_fattr fattr;
+ struct file *filep, *res;
+ struct nfs_server *server;
+ struct inode *r_ino = NULL;
+ struct nfs_open_context *ctx;
+ struct nfs4_state_owner *sp;
+ char *read_name = NULL;
+ int len, status = 0;
+
+ server = NFS_SERVER(ss_mnt->mnt_root->d_inode);
+
+ nfs_fattr_init(&fattr);
+
+ status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL);
+ if (status < 0) {
+ res = ERR_PTR(status);
+ goto out;
+ }
+
+ res = ERR_PTR(-ENOMEM);
+ len = strlen(SSC_READ_NAME_BODY) + 16;
+ read_name = kzalloc(len, GFP_NOFS);
+ if (read_name == NULL)
+ goto out;
+ snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
+
+ r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr,
+ NULL);
+ if (IS_ERR(r_ino)) {
+ res = ERR_CAST(r_ino);
+ goto out_free_name;
+ }
+
+ filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, FMODE_READ,
+ r_ino->i_fop);
+ if (IS_ERR(filep)) {
+ res = ERR_CAST(filep);
+ goto out_free_name;
+ }
+ filep->f_mode |= FMODE_READ;
+
+ ctx = alloc_nfs_open_context(filep->f_path.dentry, filep->f_mode,
+ filep);
+ if (IS_ERR(ctx)) {
+ res = ERR_CAST(ctx);
+ goto out_filep;
+ }
+
+ res = ERR_PTR(-EINVAL);
+ sp = nfs4_get_state_owner(server, ctx->cred, GFP_KERNEL);
+ if (sp == NULL)
+ goto out_ctx;
+
+ ctx->state = nfs4_get_open_state(r_ino, sp);
+ if (ctx->state == NULL)
+ goto out_stateowner;
+
+ set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags);
+ memcpy(&ctx->state->open_stateid.other, &stateid->other,
+ NFS4_STATEID_OTHER_SIZE);
+ update_open_stateid(ctx->state, stateid, NULL, filep->f_mode);
+ set_bit(NFS_OPEN_STATE, &ctx->state->flags);
+
+ nfs_file_set_open_context(filep, ctx);
+ put_nfs_open_context(ctx);
+
+ file_ra_state_init(&filep->f_ra, filep->f_mapping->host->i_mapping);
+ res = filep;
+out_free_name:
+ kfree(read_name);
+out:
+ return res;
+out_stateowner:
+ nfs4_put_state_owner(sp);
+out_ctx:
+ put_nfs_open_context(ctx);
+out_filep:
+ fput(filep);
+ goto out_free_name;
+}
+
+static void __nfs42_ssc_close(struct file *filep)
+{
+ struct nfs_open_context *ctx = nfs_file_open_context(filep);
+
+ ctx->state->flags = 0;
+}
+
+static const struct nfs4_ssc_client_ops nfs4_ssc_clnt_ops_tbl = {
+ .sco_open = __nfs42_ssc_open,
+ .sco_close = __nfs42_ssc_close,
+};
+
+/**
+ * nfs42_ssc_register_ops - Wrapper to register NFS_V4 ops in nfs_common
+ *
+ * Return values:
+ * None
+ */
+void nfs42_ssc_register_ops(void)
+{
+ nfs42_ssc_register(&nfs4_ssc_clnt_ops_tbl);
+}
+
+/**
+ * nfs42_ssc_unregister_ops - wrapper to un-register NFS_V4 ops in nfs_common
+ *
+ * Return values:
+ * None.
+ */
+void nfs42_ssc_unregister_ops(void)
+{
+ nfs42_ssc_unregister(&nfs4_ssc_clnt_ops_tbl);
+}
#endif /* CONFIG_NFS_V4_2 */
const struct file_operations nfs4_file_operations = {
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 1e72963..f331866 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -46,6 +46,7 @@
#include <keys/user-type.h>
#include <keys/request_key_auth-type.h>
#include <linux/module.h>
+#include <linux/user_namespace.h>
#include "internal.h"
#include "netns.h"
@@ -69,13 +70,13 @@
struct rpc_pipe *idmap_pipe;
struct idmap_legacy_upcalldata *idmap_upcall_data;
struct mutex idmap_mutex;
- const struct cred *cred;
+ struct user_namespace *user_ns;
};
static struct user_namespace *idmap_userns(const struct idmap *idmap)
{
- if (idmap && idmap->cred)
- return idmap->cred->user_ns;
+ if (idmap && idmap->user_ns)
+ return idmap->user_ns;
return &init_user_ns;
}
@@ -286,7 +287,7 @@
if (ret < 0)
return ERR_PTR(ret);
- if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
+ if (!idmap->user_ns || idmap->user_ns == &init_user_ns)
rkey = request_key(&key_type_id_resolver, desc, "");
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
@@ -462,7 +463,7 @@
return -ENOMEM;
mutex_init(&idmap->idmap_mutex);
- idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
+ idmap->user_ns = get_user_ns(clp->cl_rpcclient->cl_cred->user_ns);
rpc_init_pipe_dir_object(&idmap->idmap_pdo,
&nfs_idmap_pipe_dir_object_ops,
@@ -486,7 +487,7 @@
err_destroy_pipe:
rpc_destroy_pipe_data(idmap->idmap_pipe);
err:
- put_cred(idmap->cred);
+ put_user_ns(idmap->user_ns);
kfree(idmap);
return error;
}
@@ -503,7 +504,7 @@
&clp->cl_rpcclient->cl_pipedir_objects,
&idmap->idmap_pdo);
rpc_destroy_pipe_data(idmap->idmap_pipe);
- put_cred(idmap->cred);
+ put_user_ns(idmap->user_ns);
kfree(idmap);
}
@@ -520,7 +521,7 @@
switch (token) {
case Opt_find_uid:
im->im_type = IDMAP_TYPE_USER;
- /* Fall through */
+ fallthrough;
case Opt_find_gid:
im->im_conv = IDMAP_CONV_NAMETOID;
ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
@@ -528,7 +529,7 @@
case Opt_find_user:
im->im_type = IDMAP_TYPE_USER;
- /* Fall through */
+ fallthrough;
case Opt_find_group:
im->im_conv = IDMAP_CONV_IDTONAME;
ret = match_int(&substr, &im->im_id);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2e460c3..3680c8d 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -8,6 +8,7 @@
* NFSv4 namespace
*/
+#include <linux/module.h>
#include <linux/dcache.h>
#include <linux/mount.h>
#include <linux/namei.h>
@@ -21,37 +22,64 @@
#include <linux/inet.h>
#include "internal.h"
#include "nfs4_fs.h"
+#include "nfs.h"
#include "dns_resolve.h"
#define NFSDBG_FACILITY NFSDBG_VFS
/*
- * Convert the NFSv4 pathname components into a standard posix path.
- *
- * Note that the resulting string will be placed at the end of the buffer
+ * Work out the length that an NFSv4 path would render to as a standard posix
+ * path, with a leading slash but no terminating slash.
*/
-static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
- char *buffer, ssize_t buflen)
+static ssize_t nfs4_pathname_len(const struct nfs4_pathname *pathname)
{
- char *end = buffer + buflen;
- int n;
+ ssize_t len = 0;
+ int i;
- *--end = '\0';
- buflen--;
+ for (i = 0; i < pathname->ncomponents; i++) {
+ const struct nfs4_string *component = &pathname->components[i];
- n = pathname->ncomponents;
- while (--n >= 0) {
- const struct nfs4_string *component = &pathname->components[n];
- buflen -= component->len + 1;
- if (buflen < 0)
- goto Elong;
- end -= component->len;
- memcpy(end, component->data, component->len);
- *--end = '/';
+ if (component->len > NAME_MAX)
+ goto too_long;
+ len += 1 + component->len; /* Adding "/foo" */
+ if (len > PATH_MAX)
+ goto too_long;
}
- return end;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
+ return len;
+
+too_long:
+ return -ENAMETOOLONG;
+}
+
+/*
+ * Convert the NFSv4 pathname components into a standard posix path.
+ */
+static char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
+ unsigned short *_len)
+{
+ ssize_t len;
+ char *buf, *p;
+ int i;
+
+ len = nfs4_pathname_len(pathname);
+ if (len < 0)
+ return ERR_PTR(len);
+ *_len = len;
+
+ p = buf = kmalloc(len + 1, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < pathname->ncomponents; i++) {
+ const struct nfs4_string *component = &pathname->components[i];
+
+ *p++ = '/';
+ memcpy(p, component->data, component->len);
+ p += component->len;
+ }
+
+ *p = 0;
+ return buf;
}
/*
@@ -100,37 +128,57 @@
*/
static int nfs4_validate_fspath(struct dentry *dentry,
const struct nfs4_fs_locations *locations,
- char *page, char *page2)
+ struct nfs_fs_context *ctx)
{
- const char *path, *fs_path;
+ const char *path;
+ char *fs_path;
+ unsigned short len;
+ char *buf;
+ int n;
- path = nfs4_path(dentry, page, PAGE_SIZE);
- if (IS_ERR(path))
+ buf = kmalloc(4096, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ path = nfs4_path(dentry, buf, 4096);
+ if (IS_ERR(path)) {
+ kfree(buf);
return PTR_ERR(path);
+ }
- fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
- if (IS_ERR(fs_path))
+ fs_path = nfs4_pathname_string(&locations->fs_path, &len);
+ if (IS_ERR(fs_path)) {
+ kfree(buf);
return PTR_ERR(fs_path);
+ }
- if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+ n = strncmp(path, fs_path, len);
+ kfree(buf);
+ kfree(fs_path);
+ if (n != 0) {
dprintk("%s: path %s does not begin with fsroot %s\n",
- __func__, path, fs_path);
+ __func__, path, ctx->nfs_server.export_path);
return -ENOENT;
}
return 0;
}
-static size_t nfs_parse_server_name(char *string, size_t len,
- struct sockaddr *sa, size_t salen, struct net *net)
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+ size_t salen, struct net *net, int port)
{
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
if (ret == 0) {
- ret = nfs_dns_resolve_name(net, string, len, sa, salen);
- if (ret < 0)
- ret = 0;
+ ret = rpc_uaddr2sockaddr(net, string, len, sa, salen);
+ if (ret == 0) {
+ ret = nfs_dns_resolve_name(net, string, len, sa, salen);
+ if (ret < 0)
+ ret = 0;
+ }
+ } else if (port) {
+ rpc_set_port(sa, port);
}
return ret;
}
@@ -236,95 +284,103 @@
return new;
}
-static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
- char *page, char *page2,
- const struct nfs4_fs_location *location)
+static int try_location(struct fs_context *fc,
+ const struct nfs4_fs_location *location)
{
- const size_t addr_bufsize = sizeof(struct sockaddr_storage);
- struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client);
- struct vfsmount *mnt = ERR_PTR(-ENOENT);
- char *mnt_path;
- unsigned int maxbuflen;
- unsigned int s;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ unsigned int len, s;
+ char *export_path, *source, *p;
+ int ret = -ENOENT;
- mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
- if (IS_ERR(mnt_path))
- return ERR_CAST(mnt_path);
- mountdata->mnt_path = mnt_path;
- maxbuflen = mnt_path - 1 - page2;
-
- mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
- if (mountdata->addr == NULL)
- return ERR_PTR(-ENOMEM);
-
+ /* Allocate a buffer big enough to hold any of the hostnames plus a
+ * terminating char and also a buffer big enough to hold the hostname
+ * plus a colon plus the path.
+ */
+ len = 0;
for (s = 0; s < location->nservers; s++) {
const struct nfs4_string *buf = &location->servers[s];
+ if (buf->len > len)
+ len = buf->len;
+ }
- if (buf->len <= 0 || buf->len >= maxbuflen)
- continue;
+ kfree(ctx->nfs_server.hostname);
+ ctx->nfs_server.hostname = kmalloc(len + 1, GFP_KERNEL);
+ if (!ctx->nfs_server.hostname)
+ return -ENOMEM;
+
+ export_path = nfs4_pathname_string(&location->rootpath,
+ &ctx->nfs_server.export_path_len);
+ if (IS_ERR(export_path))
+ return PTR_ERR(export_path);
+
+ kfree(ctx->nfs_server.export_path);
+ ctx->nfs_server.export_path = export_path;
+
+ source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1,
+ GFP_KERNEL);
+ if (!source)
+ return -ENOMEM;
+
+ kfree(fc->source);
+ fc->source = source;
+ for (s = 0; s < location->nservers; s++) {
+ const struct nfs4_string *buf = &location->servers[s];
if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
continue;
- mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
- mountdata->addr, addr_bufsize, net);
- if (mountdata->addrlen == 0)
+ ctx->nfs_server.addrlen =
+ nfs_parse_server_name(buf->data, buf->len,
+ &ctx->nfs_server.address,
+ sizeof(ctx->nfs_server._address),
+ fc->net_ns, 0);
+ if (ctx->nfs_server.addrlen == 0)
continue;
- memcpy(page2, buf->data, buf->len);
- page2[buf->len] = '\0';
- mountdata->hostname = page2;
+ rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
- snprintf(page, PAGE_SIZE, "%s:%s",
- mountdata->hostname,
- mountdata->mnt_path);
+ memcpy(ctx->nfs_server.hostname, buf->data, buf->len);
+ ctx->nfs_server.hostname[buf->len] = '\0';
- mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
- if (!IS_ERR(mnt))
- break;
+ p = source;
+ memcpy(p, buf->data, buf->len);
+ p += buf->len;
+ *p++ = ':';
+ memcpy(p, ctx->nfs_server.export_path, ctx->nfs_server.export_path_len);
+ p += ctx->nfs_server.export_path_len;
+ *p = 0;
+
+ ret = nfs4_get_referral_tree(fc);
+ if (ret == 0)
+ return 0;
}
- kfree(mountdata->addr);
- return mnt;
+
+ return ret;
}
/**
* nfs_follow_referral - set up mountpoint when hitting a referral on moved error
- * @dentry: parent directory
+ * @fc: pointer to struct nfs_fs_context
* @locations: array of NFSv4 server location information
*
*/
-static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
- const struct nfs4_fs_locations *locations)
+static int nfs_follow_referral(struct fs_context *fc,
+ const struct nfs4_fs_locations *locations)
{
- struct vfsmount *mnt = ERR_PTR(-ENOENT);
- struct nfs_clone_mount mountdata = {
- .sb = dentry->d_sb,
- .dentry = dentry,
- .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
- };
- char *page = NULL, *page2 = NULL;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
int loc, error;
if (locations == NULL || locations->nlocations <= 0)
- goto out;
+ return -ENOENT;
- dprintk("%s: referral at %pd2\n", __func__, dentry);
-
- page = (char *) __get_free_page(GFP_USER);
- if (!page)
- goto out;
-
- page2 = (char *) __get_free_page(GFP_USER);
- if (!page2)
- goto out;
+ dprintk("%s: referral at %pd2\n", __func__, ctx->clone_data.dentry);
/* Ensure fs path is a prefix of current dentry path */
- error = nfs4_validate_fspath(dentry, locations, page, page2);
- if (error < 0) {
- mnt = ERR_PTR(error);
- goto out;
- }
+ error = nfs4_validate_fspath(ctx->clone_data.dentry, locations, ctx);
+ if (error < 0)
+ return error;
+ error = -ENOENT;
for (loc = 0; loc < locations->nlocations; loc++) {
const struct nfs4_fs_location *location = &locations->locations[loc];
@@ -332,15 +388,12 @@
location->rootpath.ncomponents == 0)
continue;
- mnt = try_location(&mountdata, page, page2, location);
- if (!IS_ERR(mnt))
- break;
+ error = try_location(fc, location);
+ if (error == 0)
+ return 0;
}
-out:
- free_page((unsigned long) page);
- free_page((unsigned long) page2);
- return mnt;
+ return error;
}
/*
@@ -348,71 +401,72 @@
* @dentry - dentry of referral
*
*/
-static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
+static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client)
{
- struct vfsmount *mnt = ERR_PTR(-ENOMEM);
- struct dentry *parent;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct dentry *dentry, *parent;
struct nfs4_fs_locations *fs_locations = NULL;
struct page *page;
- int err;
+ int err = -ENOMEM;
/* BUG_ON(IS_ROOT(dentry)); */
page = alloc_page(GFP_KERNEL);
- if (page == NULL)
- return mnt;
+ if (!page)
+ return -ENOMEM;
fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
- if (fs_locations == NULL)
+ if (!fs_locations)
goto out_free;
/* Get locations */
- mnt = ERR_PTR(-ENOENT);
-
+ dentry = ctx->clone_data.dentry;
parent = dget_parent(dentry);
dprintk("%s: getting locations for %pd2\n",
__func__, dentry);
err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page);
dput(parent);
- if (err != 0 ||
- fs_locations->nlocations <= 0 ||
- fs_locations->fs_path.ncomponents <= 0)
- goto out_free;
+ if (err != 0)
+ goto out_free_2;
- mnt = nfs_follow_referral(dentry, fs_locations);
+ err = -ENOENT;
+ if (fs_locations->nlocations <= 0 ||
+ fs_locations->fs_path.ncomponents <= 0)
+ goto out_free_2;
+
+ err = nfs_follow_referral(fc, fs_locations);
+out_free_2:
+ kfree(fs_locations);
out_free:
__free_page(page);
- kfree(fs_locations);
- return mnt;
+ return err;
}
-struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
- struct nfs_fh *fh, struct nfs_fattr *fattr)
+int nfs4_submount(struct fs_context *fc, struct nfs_server *server)
{
- rpc_authflavor_t flavor = server->client->cl_auth->au_flavor;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct dentry *dentry = ctx->clone_data.dentry;
struct dentry *parent = dget_parent(dentry);
struct inode *dir = d_inode(parent);
- const struct qstr *name = &dentry->d_name;
struct rpc_clnt *client;
- struct vfsmount *mnt;
+ int ret;
/* Look it up again to get its attributes and sec flavor */
- client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
+ client = nfs4_proc_lookup_mountpoint(dir, dentry, ctx->mntfh,
+ ctx->clone_data.fattr);
dput(parent);
if (IS_ERR(client))
- return ERR_CAST(client);
+ return PTR_ERR(client);
- if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
- mnt = nfs_do_refmount(client, dentry);
- goto out;
+ ctx->selected_flavor = client->cl_auth->au_flavor;
+ if (ctx->clone_data.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+ ret = nfs_do_refmount(fc, client);
+ } else {
+ ret = nfs_do_submount(fc);
}
- if (client->cl_auth->au_flavor != flavor)
- flavor = client->cl_auth->au_flavor;
- mnt = nfs_do_submount(dentry, fh, fattr, flavor);
-out:
rpc_shutdown_client(client);
- return mnt;
+ return ret;
}
/*
@@ -447,13 +501,13 @@
continue;
salen = nfs_parse_server_name(buf->data, buf->len,
- sap, addr_bufsize, net);
+ sap, addr_bufsize, net, 0);
if (salen == 0)
continue;
rpc_set_port(sap, NFS_PORT);
error = -ENOMEM;
- hostname = kstrndup(buf->data, buf->len, GFP_KERNEL);
+ hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL);
if (hostname == NULL)
break;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5ecaf7b..d222a98 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -63,12 +63,18 @@
#include "callback.h"
#include "pnfs.h"
#include "netns.h"
+#include "sysfs.h"
#include "nfs4idmap.h"
#include "nfs4session.h"
#include "fscache.h"
+#include "nfs42.h"
#include "nfs4trace.h"
+#ifdef CONFIG_NFS_V4_2
+#include "nfs42.h"
+#endif /* CONFIG_NFS_V4_2 */
+
#define NFSDBG_FACILITY NFSDBG_PROC
#define NFS4_BITMASK_SZ 3
@@ -91,7 +97,6 @@
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
-static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode);
static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
@@ -107,6 +112,10 @@
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
#endif
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
+ const __u32 *src, struct inode *inode,
+ struct nfs_server *server,
+ struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label *
@@ -257,6 +266,7 @@
| FATTR4_WORD1_FS_LAYOUT_TYPES,
FATTR4_WORD2_LAYOUT_BLKSIZE
| FATTR4_WORD2_CLONE_BLKSIZE
+ | FATTR4_WORD2_XATTR_SUPPORT
};
const u32 nfs4_fs_locations_bitmap[3] = {
@@ -415,7 +425,7 @@
{
might_sleep();
- freezable_schedule_timeout_interruptible(nfs4_update_delay(timeout));
+ freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout));
if (!signal_pending(current))
return 0;
return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
@@ -476,12 +486,13 @@
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_PARTNER_NO_AUTH:
if (inode != NULL && stateid != NULL) {
nfs_inode_find_state_and_recover(inode,
stateid);
goto wait_on_recovery;
}
- /* Fall through */
+ fallthrough;
case -NFS4ERR_OPENMODE:
if (inode) {
int err;
@@ -532,10 +543,10 @@
ret = -EBUSY;
break;
}
- /* Fall through */
+ fallthrough;
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
- /* Fall through */
+ fallthrough;
case -NFS4ERR_GRACE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
@@ -1117,11 +1128,12 @@
return ret;
}
-static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
- struct nfs_server *server,
- struct rpc_message *msg,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res)
+static int nfs4_do_call_sync(struct rpc_clnt *clnt,
+ struct nfs_server *server,
+ struct rpc_message *msg,
+ struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res,
+ unsigned short task_flags)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_call_sync_data data = {
@@ -1133,12 +1145,23 @@
.rpc_client = clnt,
.rpc_message = msg,
.callback_ops = clp->cl_mvops->call_sync_ops,
- .callback_data = &data
+ .callback_data = &data,
+ .flags = task_flags,
};
return nfs4_call_sync_custom(&task_setup);
}
+static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
+ struct nfs_server *server,
+ struct rpc_message *msg,
+ struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res)
+{
+ return nfs4_do_call_sync(clnt, server, msg, args, res, 0);
+}
+
+
int nfs4_call_sync(struct rpc_clnt *clnt,
struct nfs_server *server,
struct rpc_message *msg,
@@ -1165,37 +1188,49 @@
}
static void
-update_changeattr_locked(struct inode *dir, struct nfs4_change_info *cinfo,
+nfs4_update_changeattr_locked(struct inode *inode,
+ struct nfs4_change_info *cinfo,
unsigned long timestamp, unsigned long cache_validity)
{
- struct nfs_inode *nfsi = NFS_I(dir);
+ struct nfs_inode *nfsi = NFS_I(inode);
nfsi->cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME
- | NFS_INO_INVALID_DATA
| cache_validity;
- if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) {
+
+ if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) {
nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
nfsi->attrtimeo_timestamp = jiffies;
} else {
- nfs_force_lookup_revalidate(dir);
- if (cinfo->before != inode_peek_iversion_raw(dir))
+ if (S_ISDIR(inode->i_mode)) {
+ nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ nfs_force_lookup_revalidate(inode);
+ } else {
+ if (!NFS_PROTO(inode)->have_delegation(inode,
+ FMODE_READ))
+ nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
+ }
+
+ if (cinfo->before != inode_peek_iversion_raw(inode))
nfsi->cache_validity |= NFS_INO_INVALID_ACCESS |
- NFS_INO_INVALID_ACL;
+ NFS_INO_INVALID_ACL |
+ NFS_INO_INVALID_XATTR;
}
- inode_set_iversion_raw(dir, cinfo->after);
+ inode_set_iversion_raw(inode, cinfo->after);
nfsi->read_cache_jiffies = timestamp;
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
- nfs_fscache_invalidate(dir);
+
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ nfs_fscache_invalidate(inode);
}
-static void
-update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
+void
+nfs4_update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
unsigned long timestamp, unsigned long cache_validity)
{
spin_lock(&dir->i_lock);
- update_changeattr_locked(dir, cinfo, timestamp, cache_validity);
+ nfs4_update_changeattr_locked(dir, cinfo, timestamp, cache_validity);
spin_unlock(&dir->i_lock);
}
@@ -1348,6 +1383,12 @@
NFS4_ACCESS_MODIFY |
NFS4_ACCESS_EXTEND |
NFS4_ACCESS_EXECUTE;
+#ifdef CONFIG_NFS_V4_2
+ if (server->caps & NFS_CAP_XATTR)
+ p->o_arg.access |= NFS4_ACCESS_XAREAD |
+ NFS4_ACCESS_XAWRITE |
+ NFS4_ACCESS_XALIST;
+#endif
}
}
p->o_arg.clientid = server->nfs_client->cl_clientid;
@@ -1477,7 +1518,7 @@
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
break;
- /* Fall through */
+ fallthrough;
default:
return 0;
}
@@ -1549,15 +1590,16 @@
{
if (test_bit(NFS_OPEN_STATE, &state->flags)) {
/* The common case - we're updating to a new sequence number */
- if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
- nfs4_stateid_is_next(&state->open_stateid, stateid)) {
- return true;
+ if (nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ if (nfs4_stateid_is_next(&state->open_stateid, stateid))
+ return true;
+ return false;
}
- } else {
- /* This is the first OPEN in this generation */
- if (stateid->seqid == cpu_to_be32(1))
- return true;
+ /* The server returned a new stateid */
}
+ /* This is the first OPEN in this generation */
+ if (stateid->seqid == cpu_to_be32(1))
+ return true;
return false;
}
@@ -1732,7 +1774,7 @@
write_sequnlock(&state->seqlock);
}
-static int update_open_stateid(struct nfs4_state *state,
+int update_open_stateid(struct nfs4_state *state,
const nfs4_stateid *open_stateid,
const nfs4_stateid *delegation,
fmode_t fmode)
@@ -1753,7 +1795,7 @@
ret = 1;
}
- deleg_cur = rcu_dereference(nfsi->delegation);
+ deleg_cur = nfs4_get_valid_delegation(state->inode);
if (deleg_cur == NULL)
goto no_delegation;
@@ -1765,7 +1807,7 @@
if (delegation == NULL)
delegation = &deleg_cur->stateid;
- else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation))
+ else if (!nfs4_stateid_match_other(&deleg_cur->stateid, delegation))
goto no_delegation_unlock;
nfs_mark_delegation_referenced(deleg_cur);
@@ -1812,7 +1854,7 @@
fmode &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
- delegation = rcu_dereference(NFS_I(inode)->delegation);
+ delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL || (delegation->type & fmode) == fmode) {
rcu_read_unlock();
return;
@@ -2340,7 +2382,7 @@
.callback_ops = &nfs4_open_confirm_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
int status;
@@ -2397,7 +2439,7 @@
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
- /* Fall through */
+ fallthrough;
case NFS4_OPEN_CLAIM_FH:
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
}
@@ -2505,7 +2547,7 @@
.callback_ops = &nfs4_open_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
int status;
@@ -2631,8 +2673,9 @@
data->file_created = true;
if (data->file_created ||
inode_peek_iversion_raw(dir) != o_res->cinfo.after)
- update_changeattr(dir, &o_res->cinfo,
- o_res->f_attr->time_start, 0);
+ nfs4_update_changeattr(dir, &o_res->cinfo,
+ o_res->f_attr->time_start,
+ NFS_INO_INVALID_DATA);
}
if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
server->caps &= ~NFS_CAP_POSIX_LOCK;
@@ -2784,16 +2827,19 @@
return NFS_OK;
}
+ spin_lock(&delegation->lock);
nfs4_stateid_copy(&stateid, &delegation->stateid);
if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
&delegation->flags)) {
+ spin_unlock(&delegation->lock);
rcu_read_unlock();
return NFS_OK;
}
if (delegation->cred)
cred = get_cred(delegation->cred);
+ spin_unlock(&delegation->lock);
rcu_read_unlock();
status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
trace_nfs4_test_delegation_stateid(state, NULL, status);
@@ -3266,6 +3312,8 @@
nfs_put_lock_context(l_ctx);
if (status == -EIO)
return -EBADF;
+ else if (status == -EAGAIN)
+ goto zero_stateid;
} else {
zero_stateid:
nfs4_stateid_copy(&arg->stateid, &zero_stateid);
@@ -3524,11 +3572,11 @@
nfs4_free_revoked_stateid(server,
&calldata->arg.stateid,
task->tk_msg.rpc_cred);
- /* Fallthrough */
+ fallthrough;
case -NFS4ERR_BAD_STATEID:
if (calldata->arg.fmode == 0)
break;
- /* Fallthrough */
+ fallthrough;
default:
task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
@@ -3554,6 +3602,7 @@
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
@@ -3609,9 +3658,12 @@
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
- if (!nfs4_have_delegation(inode, FMODE_READ))
- calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
- else
+ if (!nfs4_have_delegation(inode, FMODE_READ)) {
+ nfs4_bitmask_set(calldata->arg.bitmask_store,
+ server->cache_consistency_bitmask,
+ inode, server, NULL);
+ calldata->arg.bitmask = calldata->arg.bitmask_store;
+ } else
calldata->arg.bitmask = NULL;
}
@@ -3670,7 +3722,7 @@
.rpc_message = &msg,
.callback_ops = &nfs4_close_ops,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
int status = -ENOMEM;
@@ -3756,7 +3808,7 @@
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_MODE_UMASK - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL)
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
@@ -4021,7 +4073,7 @@
{
int error;
struct nfs_fattr *fattr = info->fattr;
- struct nfs4_label *label = NULL;
+ struct nfs4_label *label = fattr->label;
error = nfs4_server_capabilities(server, mntfh);
if (error < 0) {
@@ -4029,23 +4081,17 @@
return error;
}
- label = nfs4_label_alloc(server, GFP_KERNEL);
- if (IS_ERR(label))
- return PTR_ERR(label);
-
error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL);
if (error < 0) {
dprintk("nfs4_get_root: getattr error = %d\n", -error);
- goto err_free_label;
+ goto out;
}
if (fattr->valid & NFS_ATTR_FATTR_FSID &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid))
memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
-err_free_label:
- nfs4_label_free(label);
-
+out:
return error;
}
@@ -4116,14 +4162,21 @@
.rpc_argp = &args,
.rpc_resp = &res,
};
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+ task_flags |= RPC_TASK_TIMEOUT;
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
nfs_fattr_init(fattr);
- return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+ return nfs4_do_call_sync(server->client, server, &msg,
+ &args.seq_args, &res.seq_res, task_flags);
}
-static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{
@@ -4208,7 +4261,7 @@
}
static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
- const struct qstr *name, struct nfs_fh *fhandle,
+ struct dentry *dentry, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs_server *server = NFS_SERVER(dir);
@@ -4216,7 +4269,7 @@
struct nfs4_lookup_arg args = {
.bitmask = server->attr_bitmask,
.dir_fh = NFS_FH(dir),
- .name = name,
+ .name = &dentry->d_name,
};
struct nfs4_lookup_res res = {
.server = server,
@@ -4229,13 +4282,20 @@
.rpc_argp = &args,
.rpc_resp = &res,
};
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (nfs_lookup_is_soft_revalidate(dentry))
+ task_flags |= RPC_TASK_TIMEOUT;
args.bitmask = nfs4_bitmask(server, label);
nfs_fattr_init(fattr);
- dprintk("NFS call lookup %s\n", name->name);
- status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
+ dprintk("NFS call lookup %pd2\n", dentry);
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+ status = nfs4_do_call_sync(clnt, server, &msg,
+ &args.seq_args, &res.seq_res, task_flags);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -4249,16 +4309,17 @@
}
static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
- const struct qstr *name, struct nfs_fh *fhandle,
+ struct dentry *dentry, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs4_exception exception = {
.interruptible = true,
};
struct rpc_clnt *client = *clnt;
+ const struct qstr *name = &dentry->d_name;
int err;
do {
- err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label);
+ err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label);
trace_nfs4_lookup(dir, name, err);
switch (err) {
case -NFS4ERR_BADNAME:
@@ -4293,14 +4354,14 @@
return err;
}
-static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name,
+static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
int status;
struct rpc_clnt *client = NFS_CLIENT(dir);
- status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label);
if (client != NFS_CLIENT(dir)) {
rpc_shutdown_client(client);
nfs_fixup_secinfo_attributes(fattr);
@@ -4309,13 +4370,13 @@
}
struct rpc_clnt *
-nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name,
+nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct rpc_clnt *client = NFS_CLIENT(dir);
int status;
- status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL);
if (status < 0)
return ERR_PTR(status);
return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
@@ -4531,7 +4592,8 @@
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- update_changeattr_locked(dir, &res.cinfo, timestamp, 0);
+ nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
+ NFS_INO_INVALID_DATA);
/* Removing a directory decrements nlink in the parent */
if (ftype == NF4DIR && dir->i_nlink > 2)
nfs4_dec_nlink_locked(dir);
@@ -4615,8 +4677,9 @@
&data->timeout) == -EAGAIN)
return 0;
if (task->tk_status == 0)
- update_changeattr(dir, &res->cinfo,
- res->dir_attr->time_start, 0);
+ nfs4_update_changeattr(dir, &res->cinfo,
+ res->dir_attr->time_start,
+ NFS_INO_INVALID_DATA);
return 1;
}
@@ -4660,16 +4723,18 @@
if (task->tk_status == 0) {
if (new_dir != old_dir) {
/* Note: If we moved a directory, nlink will change */
- update_changeattr(old_dir, &res->old_cinfo,
+ nfs4_update_changeattr(old_dir, &res->old_cinfo,
res->old_fattr->time_start,
- NFS_INO_INVALID_OTHER);
- update_changeattr(new_dir, &res->new_cinfo,
+ NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_DATA);
+ nfs4_update_changeattr(new_dir, &res->new_cinfo,
res->new_fattr->time_start,
- NFS_INO_INVALID_OTHER);
+ NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_DATA);
} else
- update_changeattr(old_dir, &res->old_cinfo,
+ nfs4_update_changeattr(old_dir, &res->old_cinfo,
res->old_fattr->time_start,
- 0);
+ NFS_INO_INVALID_DATA);
}
return 1;
}
@@ -4710,7 +4775,8 @@
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
- update_changeattr(dir, &res.cinfo, res.fattr->time_start, 0);
+ nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
status = nfs_post_op_update_inode(inode, res.fattr);
if (!status)
nfs_setsecurity(inode, res.fattr, res.label);
@@ -4788,8 +4854,9 @@
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- update_changeattr_locked(dir, &data->res.dir_cinfo,
- data->res.fattr->time_start, 0);
+ nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
/* Creating a directory bumps nlink in the parent */
if (data->arg.ftype == NF4DIR)
nfs4_inc_nlink_locked(dir);
@@ -5162,12 +5229,12 @@
const struct nfs_lock_context *l_ctx,
fmode_t fmode)
{
- nfs4_stateid current_stateid;
+ nfs4_stateid _current_stateid;
/* If the current stateid represents a lost lock, then exit */
- if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO)
+ if (nfs4_set_rw_stateid(&_current_stateid, ctx, l_ctx, fmode) == -EIO)
return true;
- return nfs4_stateid_match(stateid, ¤t_stateid);
+ return nfs4_stateid_match(stateid, &_current_stateid);
}
static bool nfs4_error_stateid_expired(int err)
@@ -5223,28 +5290,60 @@
return true;
}
+static bool nfs4_read_plus_not_supported(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_server *server = NFS_SERVER(hdr->inode);
+ struct rpc_message *msg = &task->tk_msg;
+
+ if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
+ server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
+ server->caps &= ~NFS_CAP_READ_PLUS;
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ rpc_restart_call_prepare(task);
+ return true;
+ }
+ return false;
+}
+
static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
-
dprintk("--> %s\n", __func__);
if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &hdr->args))
return -EAGAIN;
+ if (nfs4_read_plus_not_supported(task, hdr))
+ return -EAGAIN;
if (task->tk_status > 0)
nfs_invalidate_atime(hdr->inode);
return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
nfs4_read_done_cb(task, hdr);
}
+#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS
+static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+{
+ if (server->caps & NFS_CAP_READ_PLUS)
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
+ else
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+}
+#else
+static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+{
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+}
+#endif /* CONFIG_NFS_V4_2 */
+
static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
hdr->timestamp = jiffies;
if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_read_done_cb;
- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
}
@@ -5328,6 +5427,43 @@
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
+ struct inode *inode, struct nfs_server *server,
+ struct nfs4_label *label)
+{
+ unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ unsigned int i;
+
+ memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
+
+ if (cache_validity & (NFS_INO_INVALID_CHANGE | NFS_INO_REVAL_PAGECACHE))
+ bitmask[0] |= FATTR4_WORD0_CHANGE;
+ if (cache_validity & NFS_INO_INVALID_ATIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
+ if (cache_validity & NFS_INO_INVALID_OTHER)
+ bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
+ FATTR4_WORD1_OWNER_GROUP |
+ FATTR4_WORD1_NUMLINKS;
+ if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
+ bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
+ if (cache_validity & NFS_INO_INVALID_CTIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
+ if (cache_validity & NFS_INO_INVALID_MTIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
+ if (cache_validity & NFS_INO_INVALID_BLOCKS)
+ bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
+ if (nfs4_have_delegation(inode, FMODE_READ) &&
+ !(cache_validity & NFS_INO_REVAL_FORCED))
+ bitmask[0] &= ~FATTR4_WORD0_SIZE;
+ else if (cache_validity &
+ (NFS_INO_INVALID_SIZE | NFS_INO_REVAL_PAGECACHE))
+ bitmask[0] |= FATTR4_WORD0_SIZE;
+
+ for (i = 0; i < NFS4_BITMASK_SZ; i++)
+ bitmask[i] &= server->attr_bitmask[i];
+}
+
static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg,
struct rpc_clnt **clnt)
@@ -5337,8 +5473,12 @@
if (!nfs4_write_need_cache_consistency_data(hdr)) {
hdr->args.bitmask = NULL;
hdr->res.fattr = NULL;
- } else
- hdr->args.bitmask = server->cache_consistency_bitmask;
+ } else {
+ nfs4_bitmask_set(hdr->args.bitmask_store,
+ server->cache_consistency_bitmask,
+ hdr->inode, server, NULL);
+ hdr->args.bitmask = hdr->args.bitmask_store;
+ }
if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_write_done_cb;
@@ -5528,7 +5668,7 @@
*/
#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
-static int buf_to_pages_noslab(const void *buf, size_t buflen,
+int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen,
struct page **pages)
{
struct page *newpage, **spages;
@@ -5560,7 +5700,7 @@
struct nfs4_cached_acl {
int cached;
size_t len;
- char data[0];
+ char data[];
};
static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl)
@@ -5637,10 +5777,9 @@
*/
static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
- struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
+ struct page **pages;
struct nfs_getaclargs args = {
.fh = NFS_FH(inode),
- .acl_pages = pages,
.acl_len = buflen,
};
struct nfs_getaclres res = {
@@ -5651,11 +5790,19 @@
.rpc_argp = &args,
.rpc_resp = &res,
};
- unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
+ unsigned int npages;
int ret = -ENOMEM, i;
+ struct nfs_server *server = NFS_SERVER(inode);
- if (npages > ARRAY_SIZE(pages))
- return -ERANGE;
+ if (buflen == 0)
+ buflen = server->rsize;
+
+ npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
+ pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS);
+ if (!pages)
+ return -ENOMEM;
+
+ args.acl_pages = pages;
for (i = 0; i < npages; i++) {
pages[i] = alloc_page(GFP_KERNEL);
@@ -5701,6 +5848,7 @@
__free_page(pages[i]);
if (res.acl_scratch)
__free_page(res.acl_scratch);
+ kfree(pages);
return ret;
}
@@ -5765,7 +5913,7 @@
return -EOPNOTSUPP;
if (npages > ARRAY_SIZE(pages))
return -ERANGE;
- i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
+ i = nfs4_buf_to_pages_noslab(buf, buflen, arg.acl_pages);
if (i < 0)
return i;
nfs4_inode_make_writeable(inode);
@@ -5977,9 +6125,34 @@
memcpy(bootverf->data, verf, sizeof(bootverf->data));
}
+static size_t
+nfs4_get_uniquifier(struct nfs_client *clp, char *buf, size_t buflen)
+{
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
+ struct nfs_netns_client *nn_clp = nn->nfs_client;
+ const char *id;
+
+ buf[0] = '\0';
+
+ if (nn_clp) {
+ rcu_read_lock();
+ id = rcu_dereference(nn_clp->identifier);
+ if (id)
+ strscpy(buf, id, buflen);
+ rcu_read_unlock();
+ }
+
+ if (nfs4_client_id_uniquifier[0] != '\0' && buf[0] == '\0')
+ strscpy(buf, nfs4_client_id_uniquifier, buflen);
+
+ return strlen(buf);
+}
+
static int
nfs4_init_nonuniform_client_string(struct nfs_client *clp)
{
+ char buf[NFS4_CLIENT_ID_UNIQ_LEN];
+ size_t buflen;
size_t len;
char *str;
@@ -5993,8 +6166,11 @@
strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) +
1;
rcu_read_unlock();
- if (nfs4_client_id_uniquifier[0] != '\0')
- len += strlen(nfs4_client_id_uniquifier) + 1;
+
+ buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf));
+ if (buflen)
+ len += buflen + 1;
+
if (len > NFS4_OPAQUE_LIMIT + 1)
return -EINVAL;
@@ -6008,10 +6184,9 @@
return -ENOMEM;
rcu_read_lock();
- if (nfs4_client_id_uniquifier[0] != '\0')
+ if (buflen)
scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s",
- clp->cl_rpcclient->cl_nodename,
- nfs4_client_id_uniquifier,
+ clp->cl_rpcclient->cl_nodename, buf,
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_ADDR));
else
@@ -6026,50 +6201,23 @@
}
static int
-nfs4_init_uniquifier_client_string(struct nfs_client *clp)
-{
- size_t len;
- char *str;
-
- len = 10 + 10 + 1 + 10 + 1 +
- strlen(nfs4_client_id_uniquifier) + 1 +
- strlen(clp->cl_rpcclient->cl_nodename) + 1;
-
- if (len > NFS4_OPAQUE_LIMIT + 1)
- return -EINVAL;
-
- /*
- * Since this string is allocated at mount time, and held until the
- * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
- * about a memory-reclaim deadlock.
- */
- str = kmalloc(len, GFP_KERNEL);
- if (!str)
- return -ENOMEM;
-
- scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
- clp->rpc_ops->version, clp->cl_minorversion,
- nfs4_client_id_uniquifier,
- clp->cl_rpcclient->cl_nodename);
- clp->cl_owner_id = str;
- return 0;
-}
-
-static int
nfs4_init_uniform_client_string(struct nfs_client *clp)
{
+ char buf[NFS4_CLIENT_ID_UNIQ_LEN];
+ size_t buflen;
size_t len;
char *str;
if (clp->cl_owner_id != NULL)
return 0;
- if (nfs4_client_id_uniquifier[0] != '\0')
- return nfs4_init_uniquifier_client_string(clp);
-
len = 10 + 10 + 1 + 10 + 1 +
strlen(clp->cl_rpcclient->cl_nodename) + 1;
+ buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf));
+ if (buflen)
+ len += buflen + 1;
+
if (len > NFS4_OPAQUE_LIMIT + 1)
return -EINVAL;
@@ -6082,9 +6230,14 @@
if (!str)
return -ENOMEM;
- scnprintf(str, len, "Linux NFSv%u.%u %s",
- clp->rpc_ops->version, clp->cl_minorversion,
- clp->cl_rpcclient->cl_nodename);
+ if (buflen)
+ scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ buf, clp->cl_rpcclient->cl_nodename);
+ else
+ scnprintf(str, len, "Linux NFSv%u.%u %s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ clp->cl_rpcclient->cl_nodename);
clp->cl_owner_id = str;
return 0;
}
@@ -6266,23 +6419,27 @@
nfs4_free_revoked_stateid(data->res.server,
data->args.stateid,
task->tk_msg.rpc_cred);
- /* Fallthrough */
+ fallthrough;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_STALE_STATEID:
+ case -ETIMEDOUT:
task->tk_status = 0;
break;
case -NFS4ERR_OLD_STATEID:
- if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode))
- goto out_restart;
- task->tk_status = 0;
- break;
+ if (!nfs4_refresh_delegation_stateid(&data->stateid, data->inode))
+ nfs4_stateid_seqid_inc(&data->stateid);
+ if (data->args.bitmask) {
+ data->args.bitmask = NULL;
+ data->res.fattr = NULL;
+ }
+ goto out_restart;
case -NFS4ERR_ACCESS:
if (data->args.bitmask) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
goto out_restart;
}
- /* Fallthrough */
+ fallthrough;
default:
task->tk_status = nfs4_async_handle_exception(task,
data->res.server, task->tk_status,
@@ -6290,6 +6447,7 @@
if (exception.retry)
goto out_restart;
}
+ nfs_delegation_mark_returned(data->inode, data->args.stateid);
data->rpc_status = task->tk_status;
return;
out_restart:
@@ -6355,7 +6513,7 @@
.rpc_client = server->client,
.rpc_message = &msg,
.callback_ops = &nfs4_delegreturn_ops,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
};
int status = 0;
@@ -6369,7 +6527,10 @@
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
- data->args.bitmask = server->cache_consistency_bitmask;
+ nfs4_bitmask_set(data->args.bitmask_store,
+ server->cache_consistency_bitmask, inode, server,
+ NULL);
+ data->args.bitmask = data->args.bitmask_store;
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
data->res.fattr = &data->fattr;
@@ -6592,13 +6753,13 @@
if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
break;
- /* Fall through */
+ fallthrough;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_EXPIRED:
nfs4_free_revoked_stateid(calldata->server,
&calldata->arg.stateid,
task->tk_msg.rpc_cred);
- /* Fall through */
+ fallthrough;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_STALE_STATEID:
if (nfs4_sync_lock_stateid(&calldata->arg.stateid,
@@ -6925,7 +7086,7 @@
case -NFS4ERR_STALE_STATEID:
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
nfs4_schedule_lease_recovery(server->nfs_client);
- };
+ }
}
static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
@@ -6941,7 +7102,7 @@
.rpc_message = &msg,
.callback_ops = &nfs4_lock_ops,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
int ret;
@@ -7408,7 +7569,7 @@
if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) {
len = security_inode_listsecurity(inode, list, list_len);
- if (list_len && len > list_len)
+ if (len >= 0 && list_len && len > list_len)
return -ERANGE;
}
return len;
@@ -7430,6 +7591,133 @@
#endif
+#ifdef CONFIG_NFS_V4_2
+static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *key, const void *buf,
+ size_t buflen, int flags)
+{
+ u32 mask;
+ int ret;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+ return -EOPNOTSUPP;
+
+ /*
+ * There is no mapping from the MAY_* flags to the NFS_ACCESS_XA*
+ * flags right now. Handling of xattr operations use the normal
+ * file read/write permissions.
+ *
+ * Just in case the server has other ideas (which RFC 8276 allows),
+ * do a cached access check for the XA* flags to possibly avoid
+ * doing an RPC and getting EACCES back.
+ */
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XAWRITE))
+ return -EACCES;
+ }
+
+ if (buf == NULL) {
+ ret = nfs42_proc_removexattr(inode, key);
+ if (!ret)
+ nfs4_xattr_cache_remove(inode, key);
+ } else {
+ ret = nfs42_proc_setxattr(inode, key, buf, buflen, flags);
+ if (!ret)
+ nfs4_xattr_cache_add(inode, key, buf, NULL, buflen);
+ }
+
+ return ret;
+}
+
+static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *key, void *buf, size_t buflen)
+{
+ u32 mask;
+ ssize_t ret;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+ return -EOPNOTSUPP;
+
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XAREAD))
+ return -EACCES;
+ }
+
+ ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (ret)
+ return ret;
+
+ ret = nfs4_xattr_cache_get(inode, key, buf, buflen);
+ if (ret >= 0 || (ret < 0 && ret != -ENOENT))
+ return ret;
+
+ ret = nfs42_proc_getxattr(inode, key, buf, buflen);
+
+ return ret;
+}
+
+static ssize_t
+nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
+{
+ u64 cookie;
+ bool eof;
+ ssize_t ret, size;
+ char *buf;
+ size_t buflen;
+ u32 mask;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+ return 0;
+
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XALIST))
+ return 0;
+ }
+
+ ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (ret)
+ return ret;
+
+ ret = nfs4_xattr_cache_list(inode, list, list_len);
+ if (ret >= 0 || (ret < 0 && ret != -ENOENT))
+ return ret;
+
+ cookie = 0;
+ eof = false;
+ buflen = list_len ? list_len : XATTR_LIST_MAX;
+ buf = list_len ? list : NULL;
+ size = 0;
+
+ while (!eof) {
+ ret = nfs42_proc_listxattrs(inode, buf, buflen,
+ &cookie, &eof);
+ if (ret < 0)
+ return ret;
+
+ if (list_len) {
+ buf += ret;
+ buflen -= ret;
+ }
+ size += ret;
+ }
+
+ if (list_len)
+ nfs4_xattr_cache_set_list(inode, list, size);
+
+ return size;
+}
+
+#else
+
+static ssize_t
+nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
+{
+ return 0;
+}
+#endif /* CONFIG_NFS_V4_2 */
+
/*
* nfs_fhget will use either the mounted_on_fileid or the fileid
*/
@@ -8516,7 +8804,7 @@
dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
rpc_delay(task, NFS4_POLL_RETRY_MIN);
task->tk_status = 0;
- /* fall through */
+ fallthrough;
case -NFS4ERR_RETRY_UNCACHED_REP:
rpc_restart_call_prepare(task);
return;
@@ -8711,7 +8999,7 @@
case -EACCES:
case -EAGAIN:
goto out;
- };
+ }
clp->cl_seqid++;
if (!status) {
@@ -8964,13 +9252,13 @@
switch(task->tk_status) {
case 0:
wake_up_all(&clp->cl_lock_waitq);
- /* Fallthrough */
+ fallthrough;
case -NFS4ERR_COMPLETE_ALREADY:
case -NFS4ERR_WRONG_CRED: /* What to do here? */
break;
case -NFS4ERR_DELAY:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
- /* fall through */
+ fallthrough;
case -NFS4ERR_RETRY_UNCACHED_REP:
return -EAGAIN;
case -NFS4ERR_BADSESSION:
@@ -9201,7 +9489,7 @@
.rpc_message = &msg,
.callback_ops = &nfs4_layoutget_call_ops,
.callback_data = lgp,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
struct pnfs_layout_segment *lseg = NULL;
struct nfs4_exception exception = {
@@ -9218,8 +9506,7 @@
nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return ERR_CAST(task);
+
status = rpc_wait_for_completion_task(task);
if (status != 0)
goto out;
@@ -9286,10 +9573,10 @@
&lrp->args.range,
lrp->args.inode))
goto out_restart;
- /* Fallthrough */
+ fallthrough;
default:
task->tk_status = 0;
- /* Fallthrough */
+ fallthrough;
case 0:
break;
case -NFS4ERR_DELAY:
@@ -9318,6 +9605,7 @@
lrp->ld_private.ops->free(&lrp->ld_private);
pnfs_put_layout_hdr(lrp->args.layout);
nfs_iput_and_deactive(lrp->inode);
+ put_cred(lrp->cred);
kfree(calldata);
dprintk("<-- %s\n", __func__);
}
@@ -9542,7 +9830,6 @@
.rpc_argp = &args,
.rpc_resp = &res,
};
- struct rpc_clnt *clnt = server->client;
struct nfs4_call_sync_data data = {
.seq_server = server,
.seq_args = &args.seq_args,
@@ -9559,8 +9846,7 @@
int status;
if (use_integrity) {
- clnt = server->nfs_client->cl_rpcclient;
- task_setup.rpc_client = clnt;
+ task_setup.rpc_client = server->nfs_client->cl_rpcclient;
cred = nfs4_get_clid_cred(server->nfs_client);
msg.rpc_cred = cred;
@@ -10009,11 +10295,13 @@
| NFS_CAP_ALLOCATE
| NFS_CAP_COPY
| NFS_CAP_OFFLOAD_CANCEL
+ | NFS_CAP_COPY_NOTIFY
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
| NFS_CAP_CLONE
- | NFS_CAP_LAYOUTERROR,
+ | NFS_CAP_LAYOUTERROR
+ | NFS_CAP_READ_PLUS,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
@@ -10042,7 +10330,7 @@
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
- ssize_t error, error2;
+ ssize_t error, error2, error3;
error = generic_listxattr(dentry, list, size);
if (error < 0)
@@ -10055,7 +10343,17 @@
error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
if (error2 < 0)
return error2;
- return error + error2;
+
+ if (list) {
+ list += error2;
+ size -= error2;
+ }
+
+ error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size);
+ if (error3 < 0)
+ return error3;
+
+ return error + error2 + error3;
}
static const struct inode_operations nfs4_dir_inode_operations = {
@@ -10090,7 +10388,7 @@
.file_ops = &nfs4_file_operations,
.getroot = nfs4_proc_get_root,
.submount = nfs4_submount,
- .try_mount = nfs4_try_mount,
+ .try_get_tree = nfs4_try_get_tree,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
.lookup = nfs4_proc_lookup,
@@ -10143,11 +10441,22 @@
.set = nfs4_xattr_set_nfs4_acl,
};
+#ifdef CONFIG_NFS_V4_2
+static const struct xattr_handler nfs4_xattr_nfs4_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = nfs4_xattr_get_nfs4_user,
+ .set = nfs4_xattr_set_nfs4_user,
+};
+#endif
+
const struct xattr_handler *nfs4_xattr_handlers[] = {
&nfs4_xattr_nfs4_acl_handler,
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
&nfs4_xattr_nfs4_label_handler,
#endif
+#ifdef CONFIG_NFS_V4_2
+ &nfs4_xattr_nfs4_user_handler,
+#endif
NULL
};
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ea680f6..cbeec29 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -60,6 +60,7 @@
#include "nfs4session.h"
#include "pnfs.h"
#include "netns.h"
+#include "nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_STATE
@@ -508,7 +509,7 @@
nfs4_init_seqid_counter(&sp->so_seqid);
atomic_set(&sp->so_count, 1);
INIT_LIST_HEAD(&sp->so_lru);
- seqcount_init(&sp->so_reclaim_seqcount);
+ seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock);
mutex_init(&sp->so_delegreturn_mutex);
return sp;
}
@@ -763,6 +764,7 @@
list_del(&state->open_states);
spin_unlock(&inode->i_lock);
spin_unlock(&owner->so_lock);
+ nfs4_inode_return_delegation_on_close(inode);
iput(inode);
nfs4_free_open_state(state);
nfs4_put_state_owner(owner);
@@ -1132,7 +1134,7 @@
case -NFS4ERR_MOVED:
/* Non-seqid mutating errors */
return;
- };
+ }
/*
* Note: no locking needed as we are guaranteed to be first
* on the sequence list
@@ -1405,7 +1407,7 @@
list_for_each_entry(pos, &state->lock_states, ls_locks) {
if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags))
continue;
- if (nfs4_stateid_match_other(&pos->ls_stateid, stateid))
+ if (nfs4_stateid_match_or_older(&pos->ls_stateid, stateid))
return pos;
}
return NULL;
@@ -1439,12 +1441,13 @@
state = ctx->state;
if (state == NULL)
continue;
- if (nfs4_stateid_match_other(&state->stateid, stateid) &&
+ if (nfs4_stateid_match_or_older(&state->stateid, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state)) {
found = true;
continue;
}
- if (nfs4_stateid_match_other(&state->open_stateid, stateid) &&
+ if (test_bit(NFS_OPEN_STATE, &state->flags) &&
+ nfs4_stateid_match_or_older(&state->open_stateid, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state)) {
found = true;
continue;
@@ -1527,7 +1530,7 @@
default:
pr_err("NFS: %s: unhandled error %d\n",
__func__, status);
- /* Fall through */
+ fallthrough;
case -ENOMEM:
case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD:
@@ -1554,16 +1557,32 @@
{
struct nfs4_copy_state *copy;
- if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags))
+ if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
+ !test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags))
return;
spin_lock(&sp->so_server->nfs_client->cl_lock);
list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
- if (!nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid))
- continue;
+ if ((test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
+ !nfs4_stateid_match_other(&state->stateid,
+ ©->parent_dst_state->stateid)))
+ continue;
copy->flags = 1;
- complete(©->completion);
- break;
+ if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
+ &state->flags)) {
+ clear_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags);
+ complete(©->completion);
+ }
+ }
+ list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
+ if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
+ !nfs4_stateid_match_other(&state->stateid,
+ ©->parent_src_state->stateid)))
+ continue;
+ copy->flags = 1;
+ if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
+ &state->flags))
+ complete(©->completion);
}
spin_unlock(&sp->so_server->nfs_client->cl_lock);
}
@@ -1591,6 +1610,7 @@
if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
spin_lock(&state->state_lock);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
+ trace_nfs4_state_lock_reclaim(state, lock);
if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__);
}
@@ -1607,6 +1627,9 @@
struct nfs4_state *state;
unsigned int loop = 0;
int status = 0;
+#ifdef CONFIG_NFS_V4_2
+ bool found_ssc_copy_state = false;
+#endif /* CONFIG_NFS_V4_2 */
/* Note: we rely on the sp->so_states list being ordered
* so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
@@ -1626,6 +1649,13 @@
continue;
if (state->state == 0)
continue;
+#ifdef CONFIG_NFS_V4_2
+ if (test_bit(NFS_SRV_SSC_COPY_STATE, &state->flags)) {
+ nfs4_state_mark_recovery_failed(state, -EIO);
+ found_ssc_copy_state = true;
+ continue;
+ }
+#endif /* CONFIG_NFS_V4_2 */
refcount_inc(&state->count);
spin_unlock(&sp->so_lock);
status = __nfs4_reclaim_open_state(sp, state, ops);
@@ -1637,7 +1667,7 @@
break;
}
printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
- /* Fall through */
+ fallthrough;
case -ENOENT:
case -ENOMEM:
case -EACCES:
@@ -1653,7 +1683,7 @@
set_bit(ops->state_flag_bit, &state->flags);
break;
}
- /* Fall through */
+ fallthrough;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
@@ -1665,7 +1695,7 @@
case -NFS4ERR_EXPIRED:
case -NFS4ERR_NO_GRACE:
nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
- /* Fall through */
+ fallthrough;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -1680,6 +1710,10 @@
}
raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
+#ifdef CONFIG_NFS_V4_2
+ if (found_ssc_copy_state)
+ return -EIO;
+#endif /* CONFIG_NFS_V4_2 */
return 0;
out_err:
nfs4_put_open_state(state);
@@ -2070,6 +2104,9 @@
}
result = -NFS4ERR_NXIO;
+ if (!locations->nlocations)
+ goto out;
+
if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
dprintk("<-- %s: No fs_locations data, migration skipped\n",
__func__);
@@ -2239,11 +2276,11 @@
case -ETIMEDOUT:
if (clnt->cl_softrtry)
break;
- /* Fall through */
+ fallthrough;
case -NFS4ERR_DELAY:
case -EAGAIN:
ssleep(1);
- /* Fall through */
+ fallthrough;
case -NFS4ERR_STALE_CLIENTID:
dprintk("NFS: %s after status %d, retrying\n",
__func__, status);
@@ -2255,7 +2292,7 @@
}
if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
break;
- /* Fall through */
+ fallthrough;
case -NFS4ERR_CLID_INUSE:
case -NFS4ERR_WRONGSEC:
/* No point in retrying if we already used RPC_AUTH_UNIX */
@@ -2490,6 +2527,21 @@
}
return 0;
}
+
+static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
+{
+ int iomode = 0;
+
+ if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &clp->cl_state))
+ iomode += IOMODE_READ;
+ if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &clp->cl_state))
+ iomode += IOMODE_RW;
+ /* Note: IOMODE_READ + IOMODE_RW == IOMODE_ANY */
+ if (iomode) {
+ pnfs_layout_return_unused_byclid(clp, iomode);
+ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+ }
+}
#else /* CONFIG_NFS_V4_1 */
static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
@@ -2497,6 +2549,10 @@
{
return 0;
}
+
+static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
static void nfs4_state_manager(struct nfs_client *clp)
@@ -2506,6 +2562,7 @@
/* Ensure exclusive access to NFSv4 state */
do {
+ trace_nfs4_state_mgr(clp);
clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
section = "purge state";
@@ -2600,12 +2657,13 @@
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
- if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) {
+ if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) {
if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
nfs_client_return_marked_delegations(clp);
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
}
- clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state);
+ nfs4_layoutreturn_any_run(clp);
+ clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state);
}
/* Did we race with an attempt to give us more work? */
@@ -2619,6 +2677,7 @@
out_error:
if (strlen(section))
section_sep = ": ";
+ trace_nfs4_state_mgr_failed(clp, section, status);
pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
" with error %d\n", section_sep, section,
clp->cl_hostname, -status);
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index b90642b..d09bcfd 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -4,8 +4,10 @@
*/
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/mount.h>
#include <linux/nfs4_mount.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_ssc.h>
#include "delegation.h"
#include "internal.h"
#include "nfs4_fs.h"
@@ -18,36 +20,6 @@
static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc);
static void nfs4_evict_inode(struct inode *inode);
-static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-
-static struct file_system_type nfs4_remote_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_remote_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-
-static struct file_system_type nfs4_remote_referral_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_remote_referral_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-
-struct file_system_type nfs4_referral_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_referral_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
static const struct super_operations nfs4_sops = {
.alloc_inode = nfs_alloc_inode,
@@ -61,16 +33,15 @@
.show_devname = nfs_show_devname,
.show_path = nfs_show_path,
.show_stats = nfs_show_stats,
- .remount_fs = nfs_remount,
};
struct nfs_subversion nfs_v4 = {
- .owner = THIS_MODULE,
- .nfs_fs = &nfs4_fs_type,
- .rpc_vers = &nfs_version4,
- .rpc_ops = &nfs_v4_clientops,
- .sops = &nfs4_sops,
- .xattr = nfs4_xattr_handlers,
+ .owner = THIS_MODULE,
+ .nfs_fs = &nfs4_fs_type,
+ .rpc_vers = &nfs_version4,
+ .rpc_ops = &nfs_v4_clientops,
+ .sops = &nfs4_sops,
+ .xattr = nfs4_xattr_handlers,
};
static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -92,60 +63,14 @@
{
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- /* If we are holding a delegation, return it! */
- nfs_inode_return_delegation_noreclaim(inode);
+ /* If we are holding a delegation, return and free it */
+ nfs_inode_evict_delegation(inode);
/* Note that above delegreturn would trigger pnfs return-on-close */
pnfs_return_layout(inode);
pnfs_destroy_layout_final(NFS_I(inode));
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
-}
-
-/*
- * Get the superblock for the NFS4 root partition
- */
-static struct dentry *
-nfs4_remote_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *info)
-{
- struct nfs_mount_info *mount_info = info;
- struct nfs_server *server;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
-
- mount_info->set_security = nfs_set_sb_security;
-
- /* Get a volume representation */
- server = nfs4_create_server(mount_info, &nfs_v4);
- if (IS_ERR(server)) {
- mntroot = ERR_CAST(server);
- goto out;
- }
-
- mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4);
-
-out:
- return mntroot;
-}
-
-static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
- int flags, void *data, const char *hostname)
-{
- struct vfsmount *root_mnt;
- char *root_devname;
- size_t len;
-
- len = strlen(hostname) + 5;
- root_devname = kmalloc(len, GFP_KERNEL);
- if (root_devname == NULL)
- return ERR_PTR(-ENOMEM);
- /* Does hostname needs to be enclosed in brackets? */
- if (strchr(hostname, ':'))
- snprintf(root_devname, len, "[%s]:/", hostname);
- else
- snprintf(root_devname, len, "%s:/", hostname);
- root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
- kfree(root_devname);
- return root_mnt;
+ nfs4_xattr_cache_zap(inode);
}
struct nfs_referral_count {
@@ -214,111 +139,125 @@
kfree(p);
}
-static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
- const char *export_path)
+static int do_nfs4_mount(struct nfs_server *server,
+ struct fs_context *fc,
+ const char *hostname,
+ const char *export_path)
{
+ struct nfs_fs_context *root_ctx;
+ struct fs_context *root_fc;
+ struct vfsmount *root_mnt;
struct dentry *dentry;
- int err;
+ size_t len;
+ int ret;
+
+ struct fs_parameter param = {
+ .key = "source",
+ .type = fs_value_is_string,
+ .dirfd = -1,
+ };
+
+ if (IS_ERR(server))
+ return PTR_ERR(server);
+
+ root_fc = vfs_dup_fs_context(fc);
+ if (IS_ERR(root_fc)) {
+ nfs_free_server(server);
+ return PTR_ERR(root_fc);
+ }
+ kfree(root_fc->source);
+ root_fc->source = NULL;
+
+ root_ctx = nfs_fc2context(root_fc);
+ root_ctx->internal = true;
+ root_ctx->server = server;
+ /* We leave export_path unset as it's not used to find the root. */
+
+ len = strlen(hostname) + 5;
+ param.string = kmalloc(len, GFP_KERNEL);
+ if (param.string == NULL) {
+ put_fs_context(root_fc);
+ return -ENOMEM;
+ }
+
+ /* Does hostname needs to be enclosed in brackets? */
+ if (strchr(hostname, ':'))
+ param.size = snprintf(param.string, len, "[%s]:/", hostname);
+ else
+ param.size = snprintf(param.string, len, "%s:/", hostname);
+ ret = vfs_parse_fs_param(root_fc, ¶m);
+ kfree(param.string);
+ if (ret < 0) {
+ put_fs_context(root_fc);
+ return ret;
+ }
+ root_mnt = fc_mount(root_fc);
+ put_fs_context(root_fc);
if (IS_ERR(root_mnt))
- return ERR_CAST(root_mnt);
+ return PTR_ERR(root_mnt);
- err = nfs_referral_loop_protect();
- if (err) {
+ ret = nfs_referral_loop_protect();
+ if (ret) {
mntput(root_mnt);
- return ERR_PTR(err);
+ return ret;
}
dentry = mount_subtree(root_mnt, export_path);
nfs_referral_loop_unprotect();
- return dentry;
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ fc->root = dentry;
+ return 0;
}
-struct dentry *nfs4_try_mount(int flags, const char *dev_name,
- struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+int nfs4_try_get_tree(struct fs_context *fc)
{
- char *export_path;
- struct vfsmount *root_mnt;
- struct dentry *res;
- struct nfs_parsed_mount_data *data = mount_info->parsed;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ int err;
- dfprintk(MOUNT, "--> nfs4_try_mount()\n");
+ dfprintk(MOUNT, "--> nfs4_try_get_tree()\n");
- export_path = data->nfs_server.export_path;
- data->nfs_server.export_path = "/";
- root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
- data->nfs_server.hostname);
- data->nfs_server.export_path = export_path;
-
- res = nfs_follow_remote_path(root_mnt, export_path);
-
- dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n",
- PTR_ERR_OR_ZERO(res),
- IS_ERR(res) ? " [error]" : "");
- return res;
-}
-
-static struct dentry *
-nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
-{
- struct nfs_mount_info mount_info = {
- .fill_super = nfs_fill_super,
- .set_security = nfs_clone_sb_security,
- .cloned = raw_data,
- };
- struct nfs_server *server;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
-
- dprintk("--> nfs4_referral_get_sb()\n");
-
- mount_info.mntfh = nfs_alloc_fhandle();
- if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
- goto out;
-
- /* create a new volume representation */
- server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
- if (IS_ERR(server)) {
- mntroot = ERR_CAST(server);
- goto out;
+ /* We create a mount for the server's root, walk to the requested
+ * location and then create another mount for that.
+ */
+ err= do_nfs4_mount(nfs4_create_server(fc),
+ fc, ctx->nfs_server.hostname,
+ ctx->nfs_server.export_path);
+ if (err) {
+ nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path");
+ dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err);
+ } else {
+ dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n");
}
-
- mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4);
-out:
- nfs_free_fhandle(mount_info.mntfh);
- return mntroot;
+ return err;
}
/*
* Create an NFS4 server record on referral traversal
*/
-static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data)
+int nfs4_get_referral_tree(struct fs_context *fc)
{
- struct nfs_clone_mount *data = raw_data;
- char *export_path;
- struct vfsmount *root_mnt;
- struct dentry *res;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ int err;
dprintk("--> nfs4_referral_mount()\n");
- export_path = data->mnt_path;
- data->mnt_path = "/";
-
- root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
- flags, data, data->hostname);
- data->mnt_path = export_path;
-
- res = nfs_follow_remote_path(root_mnt, export_path);
- dprintk("<-- nfs4_referral_mount() = %d%s\n",
- PTR_ERR_OR_ZERO(res),
- IS_ERR(res) ? " [error]" : "");
- return res;
+ /* create a new volume representation */
+ err = do_nfs4_mount(nfs4_create_referral_server(fc),
+ fc, ctx->nfs_server.hostname,
+ ctx->nfs_server.export_path);
+ if (err) {
+ nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path");
+ dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err);
+ } else {
+ dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n");
+ }
+ return err;
}
-
static int __init init_nfs_v4(void)
{
int err;
@@ -331,10 +270,19 @@
if (err)
goto out1;
+#ifdef CONFIG_NFS_V4_2
+ err = nfs4_xattr_cache_init();
+ if (err)
+ goto out2;
+#endif
+
err = nfs4_register_sysctl();
if (err)
goto out2;
+#ifdef CONFIG_NFS_V4_2
+ nfs42_ssc_register_ops();
+#endif
register_nfs_version(&nfs_v4);
return 0;
out2:
@@ -351,6 +299,10 @@
nfs4_pnfs_v3_ds_connect_unload();
unregister_nfs_version(&nfs_v4);
+#ifdef CONFIG_NFS_V4_2
+ nfs4_xattr_cache_exit();
+ nfs42_ssc_unregister_ops();
+#endif
nfs4_unregister_sysctl();
nfs_idmap_quit();
nfs_dns_resolver_destroy();
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index 1a8f376..d9ac556 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -24,4 +24,8 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
#endif
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 2295a93..484c1da 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -155,6 +155,9 @@
TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
+
#define show_nfsv4_errors(error) \
__print_symbolic(error, \
{ NFS4_OK, "OK" }, \
@@ -305,7 +308,10 @@
{ NFS4ERR_WRONGSEC, "WRONGSEC" }, \
{ NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
{ NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
- { NFS4ERR_XDEV, "XDEV" })
+ { NFS4ERR_XDEV, "XDEV" }, \
+ /* ***** Internal to Linux NFS client ***** */ \
+ { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
+ { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
#define show_open_flags(flags) \
__print_flags(flags, "|", \
@@ -563,6 +569,103 @@
)
);
+TRACE_DEFINE_ENUM(NFS4CLNT_MANAGER_RUNNING);
+TRACE_DEFINE_ENUM(NFS4CLNT_CHECK_LEASE);
+TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_EXPIRED);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECLAIM_REBOOT);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECLAIM_NOGRACE);
+TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN);
+TRACE_DEFINE_ENUM(NFS4CLNT_SESSION_RESET);
+TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_CONFIRM);
+TRACE_DEFINE_ENUM(NFS4CLNT_SERVER_SCOPE_MISMATCH);
+TRACE_DEFINE_ENUM(NFS4CLNT_PURGE_STATE);
+TRACE_DEFINE_ENUM(NFS4CLNT_BIND_CONN_TO_SESSION);
+TRACE_DEFINE_ENUM(NFS4CLNT_MOVED);
+TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED);
+TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED);
+TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_RUNNING);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_READ);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_RW);
+
+#define show_nfs4_clp_state(state) \
+ __print_flags(state, "|", \
+ { NFS4CLNT_MANAGER_RUNNING, "MANAGER_RUNNING" }, \
+ { NFS4CLNT_CHECK_LEASE, "CHECK_LEASE" }, \
+ { NFS4CLNT_LEASE_EXPIRED, "LEASE_EXPIRED" }, \
+ { NFS4CLNT_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \
+ { NFS4CLNT_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \
+ { NFS4CLNT_DELEGRETURN, "DELEGRETURN" }, \
+ { NFS4CLNT_SESSION_RESET, "SESSION_RESET" }, \
+ { NFS4CLNT_LEASE_CONFIRM, "LEASE_CONFIRM" }, \
+ { NFS4CLNT_SERVER_SCOPE_MISMATCH, \
+ "SERVER_SCOPE_MISMATCH" }, \
+ { NFS4CLNT_PURGE_STATE, "PURGE_STATE" }, \
+ { NFS4CLNT_BIND_CONN_TO_SESSION, \
+ "BIND_CONN_TO_SESSION" }, \
+ { NFS4CLNT_MOVED, "MOVED" }, \
+ { NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \
+ { NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \
+ { NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \
+ { NFS4CLNT_RECALL_RUNNING, "RECALL_RUNNING" }, \
+ { NFS4CLNT_RECALL_ANY_LAYOUT_READ, "RECALL_ANY_LAYOUT_READ" }, \
+ { NFS4CLNT_RECALL_ANY_LAYOUT_RW, "RECALL_ANY_LAYOUT_RW" })
+
+TRACE_EVENT(nfs4_state_mgr,
+ TP_PROTO(
+ const struct nfs_client *clp
+ ),
+
+ TP_ARGS(clp),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __string(hostname, clp->cl_hostname)
+ ),
+
+ TP_fast_assign(
+ __entry->state = clp->cl_state;
+ __assign_str(hostname, clp->cl_hostname)
+ ),
+
+ TP_printk(
+ "hostname=%s clp state=%s", __get_str(hostname),
+ show_nfs4_clp_state(__entry->state)
+ )
+)
+
+TRACE_EVENT(nfs4_state_mgr_failed,
+ TP_PROTO(
+ const struct nfs_client *clp,
+ const char *section,
+ int status
+ ),
+
+ TP_ARGS(clp, section, status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(unsigned long, state)
+ __string(hostname, clp->cl_hostname)
+ __string(section, section)
+ ),
+
+ TP_fast_assign(
+ __entry->error = status < 0 ? -status : 0;
+ __entry->state = clp->cl_state;
+ __assign_str(hostname, clp->cl_hostname);
+ __assign_str(section, section);
+ ),
+
+ TP_printk(
+ "hostname=%s clp state=%s error=%ld (%s) section=%s",
+ __get_str(hostname),
+ show_nfs4_clp_state(__entry->state), -__entry->error,
+ show_nfsv4_errors(__entry->error), __get_str(section)
+
+ )
+)
+
TRACE_EVENT(nfs4_xdr_status,
TP_PROTO(
const struct xdr_stream *xdr,
@@ -599,6 +702,41 @@
)
);
+DECLARE_EVENT_CLASS(nfs4_cb_error_class,
+ TP_PROTO(
+ __be32 xid,
+ u32 cb_ident
+ ),
+
+ TP_ARGS(xid, cb_ident),
+
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, cbident)
+ ),
+
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(xid);
+ __entry->cbident = cb_ident;
+ ),
+
+ TP_printk(
+ "xid=0x%08x cb_ident=0x%08x",
+ __entry->xid, __entry->cbident
+ )
+);
+
+#define DEFINE_CB_ERROR_EVENT(name) \
+ DEFINE_EVENT(nfs4_cb_error_class, nfs_cb_##name, \
+ TP_PROTO( \
+ __be32 xid, \
+ u32 cb_ident \
+ ), \
+ TP_ARGS(xid, cb_ident))
+
+DEFINE_CB_ERROR_EVENT(no_clp);
+DEFINE_CB_ERROR_EVENT(badprinc);
+
DECLARE_EVENT_CLASS(nfs4_open_event,
TP_PROTO(
const struct nfs_open_context *ctx,
@@ -930,6 +1068,88 @@
)
);
+TRACE_DEFINE_ENUM(LK_STATE_IN_USE);
+TRACE_DEFINE_ENUM(NFS_DELEGATED_STATE);
+TRACE_DEFINE_ENUM(NFS_OPEN_STATE);
+TRACE_DEFINE_ENUM(NFS_O_RDONLY_STATE);
+TRACE_DEFINE_ENUM(NFS_O_WRONLY_STATE);
+TRACE_DEFINE_ENUM(NFS_O_RDWR_STATE);
+TRACE_DEFINE_ENUM(NFS_STATE_RECLAIM_REBOOT);
+TRACE_DEFINE_ENUM(NFS_STATE_RECLAIM_NOGRACE);
+TRACE_DEFINE_ENUM(NFS_STATE_POSIX_LOCKS);
+TRACE_DEFINE_ENUM(NFS_STATE_RECOVERY_FAILED);
+TRACE_DEFINE_ENUM(NFS_STATE_MAY_NOTIFY_LOCK);
+TRACE_DEFINE_ENUM(NFS_STATE_CHANGE_WAIT);
+TRACE_DEFINE_ENUM(NFS_CLNT_DST_SSC_COPY_STATE);
+TRACE_DEFINE_ENUM(NFS_CLNT_SRC_SSC_COPY_STATE);
+TRACE_DEFINE_ENUM(NFS_SRV_SSC_COPY_STATE);
+
+#define show_nfs4_state_flags(flags) \
+ __print_flags(flags, "|", \
+ { LK_STATE_IN_USE, "IN_USE" }, \
+ { NFS_DELEGATED_STATE, "DELEGATED" }, \
+ { NFS_OPEN_STATE, "OPEN" }, \
+ { NFS_O_RDONLY_STATE, "O_RDONLY" }, \
+ { NFS_O_WRONLY_STATE, "O_WRONLY" }, \
+ { NFS_O_RDWR_STATE, "O_RDWR" }, \
+ { NFS_STATE_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \
+ { NFS_STATE_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \
+ { NFS_STATE_POSIX_LOCKS, "POSIX_LOCKS" }, \
+ { NFS_STATE_RECOVERY_FAILED, "RECOVERY_FAILED" }, \
+ { NFS_STATE_MAY_NOTIFY_LOCK, "MAY_NOTIFY_LOCK" }, \
+ { NFS_STATE_CHANGE_WAIT, "CHANGE_WAIT" }, \
+ { NFS_CLNT_DST_SSC_COPY_STATE, "CLNT_DST_SSC_COPY" }, \
+ { NFS_CLNT_SRC_SSC_COPY_STATE, "CLNT_SRC_SSC_COPY" }, \
+ { NFS_SRV_SSC_COPY_STATE, "SRV_SSC_COPY" })
+
+#define show_nfs4_lock_flags(flags) \
+ __print_flags(flags, "|", \
+ { BIT(NFS_LOCK_INITIALIZED), "INITIALIZED" }, \
+ { BIT(NFS_LOCK_LOST), "LOST" })
+
+TRACE_EVENT(nfs4_state_lock_reclaim,
+ TP_PROTO(
+ const struct nfs4_state *state,
+ const struct nfs4_lock_state *lock
+ ),
+
+ TP_ARGS(state, lock),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(unsigned long, state_flags)
+ __field(unsigned long, lock_flags)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = state->inode;
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->state_flags = state->flags;
+ __entry->lock_flags = lock->ls_flags;
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x state_flags=%s lock_flags=%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid, __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash,
+ show_nfs4_state_flags(__entry->state_flags),
+ show_nfs4_lock_flags(__entry->lock_flags)
+ )
+)
+
DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
TP_PROTO(
const struct inode *inode,
@@ -1508,6 +1728,13 @@
DEFINE_NFS4_IDMAP_EVENT(nfs4_map_uid_to_name);
DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
+#ifdef CONFIG_NFS_V4_1
+#define NFS4_LSEG_LAYOUT_STATEID_HASH(lseg) \
+ (lseg ? nfs_stateid_hash(&lseg->pls_layout->plh_stateid) : 0)
+#else
+#define NFS4_LSEG_LAYOUT_STATEID_HASH(lseg) (0)
+#endif
+
DECLARE_EVENT_CLASS(nfs4_read_event,
TP_PROTO(
const struct nfs_pgio_header *hdr,
@@ -1521,39 +1748,53 @@
__field(u32, fhandle)
__field(u64, fileid)
__field(loff_t, offset)
- __field(size_t, count)
+ __field(u32, arg_count)
+ __field(u32, res_count)
__field(unsigned long, error)
__field(int, stateid_seq)
__field(u32, stateid_hash)
+ __field(int, layoutstateid_seq)
+ __field(u32, layoutstateid_hash)
),
TP_fast_assign(
const struct inode *inode = hdr->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
const struct nfs4_state *state =
hdr->args.context->state;
+ const struct pnfs_layout_segment *lseg = hdr->lseg;
+
__entry->dev = inode->i_sb->s_dev;
- __entry->fileid = NFS_FILEID(inode);
- __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
__entry->offset = hdr->args.offset;
- __entry->count = hdr->args.count;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
__entry->error = error < 0 ? -error : 0;
__entry->stateid_seq =
be32_to_cpu(state->stateid.seqid);
__entry->stateid_hash =
nfs_stateid_hash(&state->stateid);
+ __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0;
+ __entry->layoutstateid_hash =
+ NFS4_LSEG_LAYOUT_STATEID_HASH(lseg);
),
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zu stateid=%d:0x%08x",
+ "offset=%lld count=%u res=%u stateid=%d:0x%08x "
+ "layoutstateid=%d:0x%08x",
-__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset,
- __entry->count,
- __entry->stateid_seq, __entry->stateid_hash
+ __entry->arg_count, __entry->res_count,
+ __entry->stateid_seq, __entry->stateid_hash,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash
)
);
#define DEFINE_NFS4_READ_EVENT(name) \
@@ -1581,39 +1822,53 @@
__field(u32, fhandle)
__field(u64, fileid)
__field(loff_t, offset)
- __field(size_t, count)
+ __field(u32, arg_count)
+ __field(u32, res_count)
__field(unsigned long, error)
__field(int, stateid_seq)
__field(u32, stateid_hash)
+ __field(int, layoutstateid_seq)
+ __field(u32, layoutstateid_hash)
),
TP_fast_assign(
const struct inode *inode = hdr->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
const struct nfs4_state *state =
hdr->args.context->state;
+ const struct pnfs_layout_segment *lseg = hdr->lseg;
+
__entry->dev = inode->i_sb->s_dev;
- __entry->fileid = NFS_FILEID(inode);
- __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
__entry->offset = hdr->args.offset;
- __entry->count = hdr->args.count;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
__entry->error = error < 0 ? -error : 0;
__entry->stateid_seq =
be32_to_cpu(state->stateid.seqid);
__entry->stateid_hash =
nfs_stateid_hash(&state->stateid);
+ __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0;
+ __entry->layoutstateid_hash =
+ NFS4_LSEG_LAYOUT_STATEID_HASH(lseg);
),
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zu stateid=%d:0x%08x",
+ "offset=%lld count=%u res=%u stateid=%d:0x%08x "
+ "layoutstateid=%d:0x%08x",
-__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset,
- __entry->count,
- __entry->stateid_seq, __entry->stateid_hash
+ __entry->arg_count, __entry->res_count,
+ __entry->stateid_seq, __entry->stateid_hash,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash
)
);
@@ -1641,31 +1896,42 @@
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
- __field(loff_t, offset)
- __field(size_t, count)
__field(unsigned long, error)
+ __field(loff_t, offset)
+ __field(u32, count)
+ __field(int, layoutstateid_seq)
+ __field(u32, layoutstateid_hash)
),
TP_fast_assign(
const struct inode *inode = data->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = data->args.fh ?
+ data->args.fh : &nfsi->fh;
+ const struct pnfs_layout_segment *lseg = data->lseg;
+
__entry->dev = inode->i_sb->s_dev;
- __entry->fileid = NFS_FILEID(inode);
- __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
__entry->offset = data->args.offset;
__entry->count = data->args.count;
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
+ __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0;
+ __entry->layoutstateid_hash =
+ NFS4_LSEG_LAYOUT_STATEID_HASH(lseg);
),
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zu",
+ "offset=%lld count=%u layoutstateid=%d:0x%08x",
-__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset,
- __entry->count
+ __entry->count,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash
)
);
#define DEFINE_NFS4_COMMIT_EVENT(name) \
@@ -1758,7 +2024,9 @@
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn);
-DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn_on_close);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layouterror);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutstats);
TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_UNKNOWN);
TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_NO_PNFS);
@@ -1921,6 +2189,115 @@
DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist);
DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist);
+DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
+ TP_PROTO(
+ const struct nfs_pgio_header *hdr
+ ),
+
+ TP_ARGS(hdr),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ __string(dstaddr, hdr->ds_clp ?
+ rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown")
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = hdr->inode;
+
+ __entry->error = hdr->res.op_status;
+ __entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
+ __entry->stateid_seq =
+ be32_to_cpu(hdr->args.stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&hdr->args.stateid);
+ __assign_str(dstaddr, hdr->ds_clp ?
+ rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown");
+ ),
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
+ -__entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->offset, __entry->count,
+ __entry->stateid_seq, __entry->stateid_hash,
+ __get_str(dstaddr)
+ )
+);
+
+#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \
+ DEFINE_EVENT(nfs4_flexfiles_io_event, name, \
+ TP_PROTO( \
+ const struct nfs_pgio_header *hdr \
+ ), \
+ TP_ARGS(hdr))
+DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error);
+DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error);
+
+TRACE_EVENT(ff_layout_commit_error,
+ TP_PROTO(
+ const struct nfs_commit_data *data
+ ),
+
+ TP_ARGS(data),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
+ __string(dstaddr, data->ds_clp ?
+ rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown")
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = data->inode;
+
+ __entry->error = data->res.op_status;
+ __entry->fhandle = nfs_fhandle_hash(data->args.fh);
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->offset = data->args.offset;
+ __entry->count = data->args.count;
+ __assign_str(dstaddr, data->ds_clp ?
+ rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown");
+ ),
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%llu count=%u dstaddr=%s",
+ -__entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->offset, __entry->count,
+ __get_str(dstaddr)
+ )
+);
+
+
#endif /* CONFIG_NFS_V4_1 */
#endif /* _TRACE_NFS4_H */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9a022a4..e2f0e34 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1059,9 +1059,9 @@
}
static __be32 *
-xdr_encode_nfstime4(__be32 *p, const struct timespec *t)
+xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t)
{
- p = xdr_encode_hyper(p, (__s64)t->tv_sec);
+ p = xdr_encode_hyper(p, t->tv_sec);
*p++ = cpu_to_be32(t->tv_nsec);
return p;
}
@@ -1072,7 +1072,6 @@
const struct nfs_server *server,
const uint32_t attrmask[])
{
- struct timespec ts;
char owner_name[IDMAP_NAMESZ];
char owner_group[IDMAP_NAMESZ];
int owner_namelen = 0;
@@ -1161,16 +1160,14 @@
if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
if (iap->ia_valid & ATTR_ATIME_SET) {
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
- ts = timespec64_to_timespec(iap->ia_atime);
- p = xdr_encode_nfstime4(p, &ts);
+ p = xdr_encode_nfstime4(p, &iap->ia_atime);
} else
*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
if (iap->ia_valid & ATTR_MTIME_SET) {
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
- ts = timespec64_to_timespec(iap->ia_mtime);
- p = xdr_encode_nfstime4(p, &ts);
+ p = xdr_encode_nfstime4(p, &iap->ia_mtime);
} else
*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
@@ -3683,8 +3680,6 @@
if (unlikely(!p))
goto out_eio;
n = be32_to_cpup(p);
- if (n <= 0)
- goto out_eio;
for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
u32 m;
struct nfs4_fs_location *loc;
@@ -4069,17 +4064,17 @@
}
static __be32 *
-xdr_decode_nfstime4(__be32 *p, struct timespec *t)
+xdr_decode_nfstime4(__be32 *p, struct timespec64 *t)
{
__u64 sec;
p = xdr_decode_hyper(p, &sec);
- t-> tv_sec = (time_t)sec;
+ t-> tv_sec = sec;
t->tv_nsec = be32_to_cpup(p++);
return p;
}
-static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
+static int decode_attr_time(struct xdr_stream *xdr, struct timespec64 *time)
{
__be32 *p;
@@ -4090,7 +4085,7 @@
return 0;
}
-static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@@ -4104,11 +4099,11 @@
status = NFS_ATTR_FATTR_ATIME;
bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS;
}
- dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec);
+ dprintk("%s: atime=%lld\n", __func__, time->tv_sec);
return status;
}
-static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@@ -4122,12 +4117,12 @@
status = NFS_ATTR_FATTR_CTIME;
bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA;
}
- dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec);
+ dprintk("%s: ctime=%lld\n", __func__, time->tv_sec);
return status;
}
static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
- struct timespec *time)
+ struct timespec64 *time)
{
int status = 0;
@@ -4139,8 +4134,8 @@
status = decode_attr_time(xdr, time);
bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA;
}
- dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec,
- (long)time->tv_nsec);
+ dprintk("%s: time_delta=%lld %ld\n", __func__, time->tv_sec,
+ time->tv_nsec);
return status;
}
@@ -4187,14 +4182,15 @@
} else
printk(KERN_WARNING "%s: label too long (%u)!\n",
__func__, len);
+ if (label && label->label)
+ dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n",
+ __func__, label->len, (char *)label->label,
+ label->len, label->pi, label->lfs);
}
- if (label && label->label)
- dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
- (char *)label->label, label->len, label->pi, label->lfs);
return status;
}
-static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@@ -4208,10 +4204,30 @@
status = NFS_ATTR_FATTR_MTIME;
bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY;
}
- dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec);
+ dprintk("%s: mtime=%lld\n", __func__, time->tv_sec);
return status;
}
+static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap,
+ uint32_t *res)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_XATTR_SUPPORT - 1U)))
+ return -EIO;
+ if (likely(bitmap[2] & FATTR4_WORD2_XATTR_SUPPORT)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ *res = be32_to_cpup(p);
+ bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT;
+ }
+ dprintk("%s: XATTR support=%s\n", __func__,
+ *res == 0 ? "false" : "true");
+ return 0;
+}
+
static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen)
{
unsigned int attrwords = XDR_QUADLEN(attrlen);
@@ -4866,6 +4882,11 @@
if (status)
goto xdr_error;
+ status = decode_attr_xattrsupport(xdr, bitmap,
+ &fsinfo->xattr_support);
+ if (status)
+ goto xdr_error;
+
status = verify_attr_len(xdr, savep, attrlen);
xdr_error:
dprintk("%s: xdr returned %d!\n", __func__, -status);
@@ -5238,7 +5259,7 @@
* The XDR encode routine has set things up so that
* the link text will be copied directly into the
* buffer. We just have to do overflow-checking,
- * and and null-terminate the text (the VFS expects
+ * and null-terminate the text (the VFS expects
* null-termination).
*/
xdr_terminate_string(rcvbuf, len);
@@ -5290,7 +5311,6 @@
uint32_t attrlen,
bitmap[3] = {0};
int status;
- unsigned int pg_offset;
res->acl_len = 0;
if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
@@ -5298,9 +5318,6 @@
xdr_enter_page(xdr, xdr->buf->page_len);
- /* Calculate the offset of the page data */
- pg_offset = xdr->buf->head[0].iov_len;
-
if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
goto out;
if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -5313,7 +5330,7 @@
/* The bitmap (xdr len + bitmaps) and the attr xdr len words
* are stored with the acl data to handle the problem of
* variable length bitmaps.*/
- res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
+ res->acl_data_offset = xdr_page_pos(xdr);
res->acl_len = attrlen;
/* Check for receive buffer overflow */
@@ -7467,6 +7484,8 @@
{ NFS4ERR_SYMLINK, -ELOOP },
{ NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
{ NFS4ERR_DEADLOCK, -EDEADLK },
+ { NFS4ERR_NOXATTR, -ENODATA },
+ { NFS4ERR_XATTR2BIG, -E2BIG },
{ -1, -EIO }
};
@@ -7592,8 +7611,14 @@
PROC42(CLONE, enc_clone, dec_clone),
PROC42(COPY, enc_copy, dec_copy),
PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel),
+ PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
+ PROC42(GETXATTR, enc_getxattr, dec_getxattr),
+ PROC42(SETXATTR, enc_setxattr, dec_setxattr),
+ PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs),
+ PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr),
+ PROC42(READ_PLUS, enc_read_plus, dec_read_plus),
};
static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index effaa42..fa14830 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -88,7 +88,13 @@
#define NFS_ROOT "/tftpboot/%s"
/* Default NFSROOT mount options. */
-#define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096"
+#if defined(CONFIG_NFS_V2)
+#define NFS_DEF_OPTIONS "vers=2,tcp,rsize=4096,wsize=4096"
+#elif defined(CONFIG_NFS_V3)
+#define NFS_DEF_OPTIONS "vers=3,tcp,rsize=4096,wsize=4096"
+#else
+#define NFS_DEF_OPTIONS "vers=4,tcp,rsize=4096,wsize=4096"
+#endif
/* Parameters passed from the kernel command line */
static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = "";
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index c8081d2..5a59dcd 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -59,7 +59,8 @@
{ NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \
{ NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \
{ NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \
- { NFS_INO_INVALID_OTHER, "INVALID_OTHER" })
+ { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \
+ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" })
TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS);
TRACE_DEFINE_ENUM(NFS_INO_STALE);
@@ -181,6 +182,7 @@
int error \
), \
TP_ARGS(inode, error))
+DEFINE_NFS_INODE_EVENT(nfs_set_inode_stale);
DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit);
DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter);
@@ -198,7 +200,66 @@
DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
DEFINE_NFS_INODE_EVENT(nfs_access_enter);
-DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit);
+
+TRACE_EVENT(nfs_access_exit,
+ TP_PROTO(
+ const struct inode *inode,
+ unsigned int mask,
+ unsigned int permitted,
+ int error
+ ),
+
+ TP_ARGS(inode, mask, permitted, error),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(unsigned char, type)
+ __field(u64, fileid)
+ __field(u64, version)
+ __field(loff_t, size)
+ __field(unsigned long, nfsi_flags)
+ __field(unsigned long, cache_validity)
+ __field(unsigned int, mask)
+ __field(unsigned int, permitted)
+ ),
+
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ __entry->error = error < 0 ? -error : 0;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->type = nfs_umode_to_dtype(inode->i_mode);
+ __entry->version = inode_peek_iversion_raw(inode);
+ __entry->size = i_size_read(inode);
+ __entry->nfsi_flags = nfsi->flags;
+ __entry->cache_validity = nfsi->cache_validity;
+ __entry->mask = mask;
+ __entry->permitted = permitted;
+ ),
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "type=%u (%s) version=%llu size=%lld "
+ "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) "
+ "mask=0x%x permitted=0x%x",
+ -__entry->error, nfs_show_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->type,
+ nfs_show_file_type(__entry->type),
+ (unsigned long long)__entry->version,
+ (long long)__entry->size,
+ __entry->cache_validity,
+ nfs_show_cache_validity(__entry->cache_validity),
+ __entry->nfsi_flags,
+ nfs_show_nfsi_flags(__entry->nfsi_flags),
+ __entry->mask, __entry->permitted
+ )
+);
TRACE_DEFINE_ENUM(LOOKUP_FOLLOW);
TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY);
@@ -206,7 +267,6 @@
TRACE_DEFINE_ENUM(LOOKUP_PARENT);
TRACE_DEFINE_ENUM(LOOKUP_REVAL);
TRACE_DEFINE_ENUM(LOOKUP_RCU);
-TRACE_DEFINE_ENUM(LOOKUP_NO_REVAL);
TRACE_DEFINE_ENUM(LOOKUP_OPEN);
TRACE_DEFINE_ENUM(LOOKUP_CREATE);
TRACE_DEFINE_ENUM(LOOKUP_EXCL);
@@ -224,7 +284,6 @@
{ LOOKUP_PARENT, "PARENT" }, \
{ LOOKUP_REVAL, "REVAL" }, \
{ LOOKUP_RCU, "RCU" }, \
- { LOOKUP_NO_REVAL, "NO_REVAL" }, \
{ LOOKUP_OPEN, "OPEN" }, \
{ LOOKUP_CREATE, "CREATE" }, \
{ LOOKUP_EXCL, "EXCL" }, \
@@ -820,79 +879,180 @@
TRACE_EVENT(nfs_initiate_read,
TP_PROTO(
- const struct inode *inode,
- loff_t offset, unsigned long count
+ const struct nfs_pgio_header *hdr
),
- TP_ARGS(inode, offset, count),
+ TP_ARGS(hdr),
TP_STRUCT__entry(
- __field(loff_t, offset)
- __field(unsigned long, count)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
),
TP_fast_assign(
+ const struct inode *inode = hdr->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
- __entry->offset = offset;
- __entry->count = count;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%lu",
+ "offset=%lld count=%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->count
+ (long long)__entry->offset, __entry->count
)
);
TRACE_EVENT(nfs_readpage_done,
TP_PROTO(
- const struct inode *inode,
- int status, loff_t offset, bool eof
+ const struct rpc_task *task,
+ const struct nfs_pgio_header *hdr
),
- TP_ARGS(inode, status, offset, eof),
+ TP_ARGS(task, hdr),
TP_STRUCT__entry(
- __field(int, status)
- __field(loff_t, offset)
- __field(bool, eof)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, arg_count)
+ __field(u32, res_count)
+ __field(bool, eof)
+ __field(int, status)
),
TP_fast_assign(
+ const struct inode *inode = hdr->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
- __entry->status = status;
- __entry->offset = offset;
- __entry->eof = eof;
+ __entry->status = task->tk_status;
+ __entry->offset = hdr->args.offset;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
+ __entry->eof = hdr->res.eof;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld status=%d%s",
+ "offset=%lld count=%u res=%u status=%d%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->status,
+ (long long)__entry->offset, __entry->arg_count,
+ __entry->res_count, __entry->status,
__entry->eof ? " eof" : ""
)
);
+TRACE_EVENT(nfs_readpage_short,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct nfs_pgio_header *hdr
+ ),
+
+ TP_ARGS(task, hdr),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, arg_count)
+ __field(u32, res_count)
+ __field(bool, eof)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = hdr->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
+
+ __entry->status = task->tk_status;
+ __entry->offset = hdr->args.offset;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
+ __entry->eof = hdr->res.eof;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%u res=%u status=%d%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset, __entry->arg_count,
+ __entry->res_count, __entry->status,
+ __entry->eof ? " eof" : ""
+ )
+);
+
+TRACE_EVENT(nfs_pgio_error,
+ TP_PROTO(
+ const struct nfs_pgio_header *hdr,
+ int error,
+ loff_t pos
+ ),
+
+ TP_ARGS(hdr, error, pos),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, arg_count)
+ __field(u32, res_count)
+ __field(loff_t, pos)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = hdr->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
+
+ __entry->status = error;
+ __entry->offset = hdr->args.offset;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ ),
+
+ TP_printk("fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%u res=%u pos=%llu status=%d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid, __entry->fhandle,
+ (long long)__entry->offset, __entry->arg_count, __entry->res_count,
+ __entry->pos, __entry->status
+ )
+);
+
TRACE_DEFINE_ENUM(NFS_UNSTABLE);
TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
@@ -905,90 +1065,144 @@
TRACE_EVENT(nfs_initiate_write,
TP_PROTO(
- const struct inode *inode,
- loff_t offset, unsigned long count,
- enum nfs3_stable_how stable
+ const struct nfs_pgio_header *hdr
),
- TP_ARGS(inode, offset, count, stable),
+ TP_ARGS(hdr),
TP_STRUCT__entry(
- __field(loff_t, offset)
- __field(unsigned long, count)
- __field(enum nfs3_stable_how, stable)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
+ __field(enum nfs3_stable_how, stable)
),
TP_fast_assign(
+ const struct inode *inode = hdr->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
- __entry->offset = offset;
- __entry->count = count;
- __entry->stable = stable;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
+ __entry->stable = hdr->args.stable;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%lu stable=%s",
+ "offset=%lld count=%u stable=%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->count,
+ (long long)__entry->offset, __entry->count,
nfs_show_stable(__entry->stable)
)
);
TRACE_EVENT(nfs_writeback_done,
TP_PROTO(
- const struct inode *inode,
- int status,
- loff_t offset,
- struct nfs_writeverf *writeverf
+ const struct rpc_task *task,
+ const struct nfs_pgio_header *hdr
),
- TP_ARGS(inode, status, offset, writeverf),
+ TP_ARGS(task, hdr),
TP_STRUCT__entry(
- __field(int, status)
- __field(loff_t, offset)
- __field(enum nfs3_stable_how, stable)
- __field(unsigned long long, verifier)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, arg_count)
+ __field(u32, res_count)
+ __field(int, status)
+ __field(enum nfs3_stable_how, stable)
+ __array(char, verifier, NFS4_VERIFIER_SIZE)
),
TP_fast_assign(
+ const struct inode *inode = hdr->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
+ const struct nfs_writeverf *verf = hdr->res.verf;
- __entry->status = status;
- __entry->offset = offset;
- __entry->stable = writeverf->committed;
- memcpy(&__entry->verifier, &writeverf->verifier,
- sizeof(__entry->verifier));
+ __entry->status = task->tk_status;
+ __entry->offset = hdr->args.offset;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
+ __entry->stable = verf->committed;
+ memcpy(__entry->verifier,
+ &verf->verifier,
+ NFS4_VERIFIER_SIZE);
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld status=%d stable=%s "
- "verifier 0x%016llx",
+ "offset=%lld count=%u res=%u status=%d stable=%s "
+ "verifier=%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->status,
+ (long long)__entry->offset, __entry->arg_count,
+ __entry->res_count, __entry->status,
nfs_show_stable(__entry->stable),
- __entry->verifier
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
)
);
+DECLARE_EVENT_CLASS(nfs_page_error_class,
+ TP_PROTO(
+ const struct nfs_page *req,
+ int error
+ ),
+
+ TP_ARGS(req, error),
+
+ TP_STRUCT__entry(
+ __field(const void *, req)
+ __field(pgoff_t, index)
+ __field(unsigned int, offset)
+ __field(unsigned int, pgbase)
+ __field(unsigned int, bytes)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->index = req->wb_index;
+ __entry->offset = req->wb_offset;
+ __entry->pgbase = req->wb_pgbase;
+ __entry->bytes = req->wb_bytes;
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d",
+ __entry->req, __entry->index, __entry->offset,
+ __entry->pgbase, __entry->bytes, __entry->error
+ )
+);
+
+#define DEFINE_NFS_PAGEERR_EVENT(name) \
+ DEFINE_EVENT(nfs_page_error_class, name, \
+ TP_PROTO( \
+ const struct nfs_page *req, \
+ int error \
+ ), \
+ TP_ARGS(req, error))
+
+DEFINE_NFS_PAGEERR_EVENT(nfs_write_error);
+DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error);
+DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error);
+
TRACE_EVENT(nfs_initiate_commit,
TP_PROTO(
const struct nfs_commit_data *data
@@ -997,71 +1211,114 @@
TP_ARGS(data),
TP_STRUCT__entry(
- __field(loff_t, offset)
- __field(unsigned long, count)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
),
TP_fast_assign(
const struct inode *inode = data->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = data->args.fh ?
+ data->args.fh : &nfsi->fh;
__entry->offset = data->args.offset;
__entry->count = data->args.count;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%lu",
+ "offset=%lld count=%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->count
+ (long long)__entry->offset, __entry->count
)
);
TRACE_EVENT(nfs_commit_done,
TP_PROTO(
+ const struct rpc_task *task,
const struct nfs_commit_data *data
),
- TP_ARGS(data),
+ TP_ARGS(task, data),
TP_STRUCT__entry(
- __field(int, status)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
__field(loff_t, offset)
- __field(unsigned long long, verifier)
+ __field(int, status)
+ __field(enum nfs3_stable_how, stable)
+ __array(char, verifier, NFS4_VERIFIER_SIZE)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = data->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = data->args.fh ?
+ data->args.fh : &nfsi->fh;
+ const struct nfs_writeverf *verf = data->res.verf;
+
+ __entry->status = task->tk_status;
+ __entry->offset = data->args.offset;
+ __entry->stable = verf->committed;
+ memcpy(__entry->verifier,
+ &verf->verifier,
+ NFS4_VERIFIER_SIZE);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld status=%d stable=%s verifier=%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset, __entry->status,
+ nfs_show_stable(__entry->stable),
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+ )
+);
+
+TRACE_EVENT(nfs_fh_to_dentry,
+ TP_PROTO(
+ const struct super_block *sb,
+ const struct nfs_fh *fh,
+ u64 fileid,
+ int error
+ ),
+
+ TP_ARGS(sb, fh, fileid, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
),
TP_fast_assign(
- const struct inode *inode = data->inode;
- const struct nfs_inode *nfsi = NFS_I(inode);
-
- __entry->status = data->res.op_status;
- __entry->offset = data->args.offset;
- memcpy(&__entry->verifier, &data->verf.verifier,
- sizeof(__entry->verifier));
- __entry->dev = inode->i_sb->s_dev;
- __entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->error = error;
+ __entry->dev = sb->s_dev;
+ __entry->fileid = fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld status=%d verifier 0x%016llx",
+ "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x ",
+ __entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
- __entry->fhandle,
- __entry->offset, __entry->status,
- __entry->verifier
+ __entry->fhandle
)
);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e3b85bf..98b9c1e 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -24,21 +24,36 @@
#include "internal.h"
#include "pnfs.h"
+#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static struct kmem_cache *nfs_page_cachep;
static const struct rpc_call_ops nfs_pgio_common_ops;
+static struct nfs_pgio_mirror *
+nfs_pgio_get_mirror(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+ if (desc->pg_ops->pg_get_mirror)
+ return desc->pg_ops->pg_get_mirror(desc, idx);
+ return &desc->pg_mirrors[0];
+}
+
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc)
{
- return nfs_pgio_has_mirroring(desc) ?
- &desc->pg_mirrors[desc->pg_mirror_idx] :
- &desc->pg_mirrors[0];
+ return nfs_pgio_get_mirror(desc, desc->pg_mirror_idx);
}
EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror);
+static u32
+nfs_pgio_set_current_mirror(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+ if (desc->pg_ops->pg_set_mirror)
+ return desc->pg_ops->pg_set_mirror(desc, idx);
+ return desc->pg_mirror_idx;
+}
+
void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr))
@@ -66,6 +81,7 @@
{
unsigned int new = pos - hdr->io_start;
+ trace_nfs_pgio_error(hdr, error, pos);
if (hdr->good_bytes > new) {
hdr->good_bytes = new;
clear_bit(NFS_IOHDR_EOF, &hdr->flags);
@@ -133,6 +149,102 @@
EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
/*
+ * nfs_page_lock_head_request - page lock the head of the page group
+ * @req: any member of the page group
+ */
+struct nfs_page *
+nfs_page_group_lock_head(struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ while (!nfs_lock_request(head)) {
+ int ret = nfs_wait_on_request(head);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ }
+ if (head != req)
+ kref_get(&head->wb_kref);
+ return head;
+}
+
+/*
+ * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
+ * @head: head request of page group, must be holding head lock
+ * @req: request that couldn't lock and needs to wait on the req bit lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests
+ * returns 0 on success, < 0 on error.
+ */
+static void
+nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
+{
+ struct nfs_page *tmp;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
+ if (!kref_read(&tmp->wb_kref))
+ continue;
+ nfs_unlock_and_release_request(tmp);
+ }
+}
+
+/*
+ * nfs_page_group_lock_subreq - try to lock a subrequest
+ * @head: head request of page group
+ * @subreq: request to lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests which
+ * must be called with the head request and page group both locked.
+ * On error, it returns with the page group unlocked.
+ */
+static int
+nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
+{
+ int ret;
+
+ if (!kref_get_unless_zero(&subreq->wb_kref))
+ return 0;
+ while (!nfs_lock_request(subreq)) {
+ nfs_page_group_unlock(head);
+ ret = nfs_wait_on_request(subreq);
+ if (!ret)
+ ret = nfs_page_group_lock(head);
+ if (ret < 0) {
+ nfs_unroll_locks(head, subreq);
+ nfs_release_request(subreq);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
+ * nfs_page_group_lock_subrequests - try to lock the subrequests
+ * @head: head request of page group
+ *
+ * This is a helper function for nfs_lock_and_join_requests which
+ * must be called with the head request locked.
+ */
+int nfs_page_group_lock_subrequests(struct nfs_page *head)
+{
+ struct nfs_page *subreq;
+ int ret;
+
+ ret = nfs_page_group_lock(head);
+ if (ret < 0)
+ return ret;
+ /* lock each request in the page group */
+ for (subreq = head->wb_this_page; subreq != head;
+ subreq = subreq->wb_this_page) {
+ ret = nfs_page_group_lock_subreq(head, subreq);
+ if (ret < 0)
+ return ret;
+ }
+ nfs_page_group_unlock(head);
+ return 0;
+}
+
+/*
* nfs_page_set_headlock - set the request PG_HEADLOCK
* @req: request that is to be locked
*
@@ -382,15 +494,23 @@
}
static struct nfs_page *
-nfs_create_subreq(struct nfs_page *req, struct nfs_page *last,
- unsigned int pgbase, unsigned int offset,
+nfs_create_subreq(struct nfs_page *req,
+ unsigned int pgbase,
+ unsigned int offset,
unsigned int count)
{
+ struct nfs_page *last;
struct nfs_page *ret;
ret = __nfs_create_request(req->wb_lock_context, req->wb_page,
pgbase, offset, count);
if (!IS_ERR(ret)) {
+ /* find the last request */
+ for (last = req->wb_head;
+ last->wb_this_page != req->wb_head;
+ last = last->wb_this_page)
+ ;
+
nfs_lock_request(ret);
ret->wb_index = req->wb_index;
nfs_page_group_init(ret, last);
@@ -607,7 +727,7 @@
case FLUSH_COND_STABLE:
if (nfs_reqs_to_commit(cinfo))
break;
- /* fall through */
+ fallthrough;
default:
hdr->args.stable = NFS_FILE_SYNC;
}
@@ -652,7 +772,6 @@
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC | flags,
};
- int ret = 0;
hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how);
@@ -664,18 +783,10 @@
(unsigned long long)hdr->args.offset);
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- ret = PTR_ERR(task);
- goto out;
- }
- if (how & FLUSH_SYNC) {
- ret = rpc_wait_for_completion_task(task);
- if (ret == 0)
- ret = task->tk_status;
- }
+ if (IS_ERR(task))
+ return PTR_ERR(task);
rpc_put_task(task);
-out:
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
@@ -857,7 +968,8 @@
hdr->cred,
NFS_PROTO(hdr->inode),
desc->pg_rpc_callops,
- desc->pg_ioflags, 0);
+ desc->pg_ioflags,
+ RPC_TASK_CRED_NOREF);
return ret;
}
@@ -925,7 +1037,7 @@
}
/**
- * nfs_can_coalesce_requests - test two requests for compatibility
+ * nfs_coalesce_size - test two requests for compatibility
* @prev: pointer to nfs_page
* @req: pointer to nfs_page
* @pgio: pointer to nfs_pagio_descriptor
@@ -934,41 +1046,36 @@
* page data area they describe is contiguous, and that their RPC
* credentials, NFSv4 open state, and lockowners are the same.
*
- * Return 'true' if this is the case, else return 'false'.
+ * Returns size of the request that can be coalesced
*/
-static bool nfs_can_coalesce_requests(struct nfs_page *prev,
+static unsigned int nfs_coalesce_size(struct nfs_page *prev,
struct nfs_page *req,
struct nfs_pageio_descriptor *pgio)
{
- size_t size;
struct file_lock_context *flctx;
if (prev) {
if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev)))
- return false;
+ return 0;
flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx;
if (flctx != NULL &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock)) &&
!nfs_match_lock_context(req->wb_lock_context,
prev->wb_lock_context))
- return false;
+ return 0;
if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
- return false;
+ return 0;
if (req->wb_page == prev->wb_page) {
if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes)
- return false;
+ return 0;
} else {
if (req->wb_pgbase != 0 ||
prev->wb_pgbase + prev->wb_bytes != PAGE_SIZE)
- return false;
+ return 0;
}
}
- size = pgio->pg_ops->pg_test(pgio, prev, req);
- WARN_ON_ONCE(size > req->wb_bytes);
- if (size && size < req->wb_bytes)
- req->wb_bytes = size;
- return size > 0;
+ return pgio->pg_ops->pg_test(pgio, prev, req);
}
/**
@@ -976,15 +1083,16 @@
* @desc: destination io descriptor
* @req: request
*
- * Returns true if the request 'req' was successfully coalesced into the
- * existing list of pages 'desc'.
+ * If the request 'req' was successfully coalesced into the existing list
+ * of pages 'desc', it returns the size of req.
*/
-static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
- struct nfs_page *req)
+static unsigned int
+nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *req)
{
struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
struct nfs_page *prev = NULL;
+ unsigned int size;
if (list_empty(&mirror->pg_list)) {
if (desc->pg_ops->pg_init)
@@ -1005,11 +1113,12 @@
return 0;
}
- if (!nfs_can_coalesce_requests(prev, req, desc))
- return 0;
+ size = nfs_coalesce_size(prev, req, desc);
+ if (size < req->wb_bytes)
+ return size;
nfs_list_move_request(req, &mirror->pg_list);
mirror->pg_count += req->wb_bytes;
- return 1;
+ return req->wb_bytes;
}
/*
@@ -1048,7 +1157,8 @@
* @req: request
*
* This may split a request into subrequests which are all part of the
- * same page group.
+ * same page group. If so, it will submit @req as the last one, to ensure
+ * the pointer to @req is still valid in case of failure.
*
* Returns true if the request 'req' was successfully coalesced into the
* existing list of pages 'desc'.
@@ -1057,51 +1167,50 @@
struct nfs_page *req)
{
struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
struct nfs_page *subreq;
- unsigned int bytes_left = 0;
- unsigned int offset, pgbase;
+ unsigned int size, subreq_size;
nfs_page_group_lock(req);
subreq = req;
- bytes_left = subreq->wb_bytes;
- offset = subreq->wb_offset;
- pgbase = subreq->wb_pgbase;
-
- do {
- if (!nfs_pageio_do_add_request(desc, subreq)) {
- /* make sure pg_test call(s) did nothing */
- WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
- WARN_ON_ONCE(subreq->wb_offset != offset);
- WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
-
+ subreq_size = subreq->wb_bytes;
+ for(;;) {
+ size = nfs_pageio_do_add_request(desc, subreq);
+ if (size == subreq_size) {
+ /* We successfully submitted a request */
+ if (subreq == req)
+ break;
+ req->wb_pgbase += size;
+ req->wb_bytes -= size;
+ req->wb_offset += size;
+ subreq_size = req->wb_bytes;
+ subreq = req;
+ continue;
+ }
+ if (WARN_ON_ONCE(subreq != req)) {
+ nfs_page_group_unlock(req);
+ nfs_pageio_cleanup_request(desc, subreq);
+ subreq = req;
+ subreq_size = req->wb_bytes;
+ nfs_page_group_lock(req);
+ }
+ if (!size) {
+ /* Can't coalesce any more, so do I/O */
nfs_page_group_unlock(req);
desc->pg_moreio = 1;
nfs_pageio_doio(desc);
if (desc->pg_error < 0 || mirror->pg_recoalesce)
- goto out_cleanup_subreq;
+ return 0;
/* retry add_request for this subreq */
nfs_page_group_lock(req);
continue;
}
-
- /* check for buggy pg_test call(s) */
- WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
- WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
- WARN_ON_ONCE(subreq->wb_bytes == 0);
-
- bytes_left -= subreq->wb_bytes;
- offset += subreq->wb_bytes;
- pgbase += subreq->wb_bytes;
-
- if (bytes_left) {
- subreq = nfs_create_subreq(req, subreq, pgbase,
- offset, bytes_left);
- if (IS_ERR(subreq))
- goto err_ptr;
- }
- } while (bytes_left > 0);
+ subreq = nfs_create_subreq(req, req->wb_pgbase,
+ req->wb_offset, size);
+ if (IS_ERR(subreq))
+ goto err_ptr;
+ subreq_size = size;
+ }
nfs_page_group_unlock(req);
return 1;
@@ -1109,10 +1218,6 @@
desc->pg_error = PTR_ERR(subreq);
nfs_page_group_unlock(req);
return 0;
-out_cleanup_subreq:
- if (req != subreq)
- nfs_pageio_cleanup_request(desc, subreq);
- return 0;
}
static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -1169,7 +1274,7 @@
return;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- mirror = &desc->pg_mirrors[midx];
+ mirror = nfs_pgio_get_mirror(desc, midx);
desc->pg_completion_ops->error_cleanup(&mirror->pg_list,
desc->pg_error);
}
@@ -1180,7 +1285,7 @@
{
u32 midx;
unsigned int pgbase, offset, bytes;
- struct nfs_page *dupreq, *lastreq;
+ struct nfs_page *dupreq;
pgbase = req->wb_pgbase;
offset = req->wb_offset;
@@ -1194,13 +1299,7 @@
for (midx = 1; midx < desc->pg_mirror_count; midx++) {
nfs_page_group_lock(req);
- /* find the last request */
- for (lastreq = req->wb_head;
- lastreq->wb_this_page != req->wb_head;
- lastreq = lastreq->wb_this_page)
- ;
-
- dupreq = nfs_create_subreq(req, lastreq,
+ dupreq = nfs_create_subreq(req,
pgbase, offset, bytes);
nfs_page_group_unlock(req);
@@ -1209,12 +1308,12 @@
goto out_failed;
}
- desc->pg_mirror_idx = midx;
+ nfs_pgio_set_current_mirror(desc, midx);
if (!nfs_pageio_add_request_mirror(desc, dupreq))
goto out_cleanup_subreq;
}
- desc->pg_mirror_idx = 0;
+ nfs_pgio_set_current_mirror(desc, 0);
if (!nfs_pageio_add_request_mirror(desc, req))
goto out_failed;
@@ -1236,11 +1335,12 @@
static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
u32 mirror_idx)
{
- struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx];
- u32 restore_idx = desc->pg_mirror_idx;
+ struct nfs_pgio_mirror *mirror;
+ u32 restore_idx;
- if (nfs_pgio_has_mirroring(desc))
- desc->pg_mirror_idx = mirror_idx;
+ restore_idx = nfs_pgio_set_current_mirror(desc, mirror_idx);
+ mirror = nfs_pgio_current_mirror(desc);
+
for (;;) {
nfs_pageio_doio(desc);
if (desc->pg_error < 0 || !mirror->pg_recoalesce)
@@ -1248,7 +1348,7 @@
if (!nfs_do_recoalesce(desc))
break;
}
- desc->pg_mirror_idx = restore_idx;
+ nfs_pgio_set_current_mirror(desc, restore_idx);
}
/*
@@ -1322,7 +1422,7 @@
u32 midx;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- mirror = &desc->pg_mirrors[midx];
+ mirror = nfs_pgio_get_mirror(desc, midx);
if (!list_empty(&mirror->pg_list)) {
prev = nfs_list_entry(mirror->pg_list.prev);
if (index != prev->wb_index + 1) {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1b512df..5370e08 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -268,11 +268,11 @@
struct nfs_server *server = NFS_SERVER(lo->plh_inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
- if (!list_empty(&lo->plh_layouts)) {
+ if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
- list_del_init(&lo->plh_layouts);
+ list_del_rcu(&lo->plh_layouts);
spin_unlock(&clp->cl_lock);
}
put_cred(lo->plh_lc_cred);
@@ -314,6 +314,31 @@
}
}
+static struct inode *
+pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ struct inode *inode = igrab(lo->plh_inode);
+ if (inode)
+ return inode;
+ set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
+ return NULL;
+}
+
+/*
+ * Compare 2 layout stateid sequence ids, to see which is newer,
+ * taking into account wraparound issues.
+ */
+static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
+{
+ return (s32)(s1 - s2) > 0;
+}
+
+static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq)
+{
+ if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier)
+ lo->plh_barrier = newseq;
+}
+
static void
pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
u32 seq)
@@ -322,10 +347,15 @@
iomode = IOMODE_ANY;
lo->plh_return_iomode = iomode;
set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
- if (seq != 0) {
- WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq);
+ /*
+ * We must set lo->plh_return_seq to avoid livelocks with
+ * pnfs_layout_need_return()
+ */
+ if (seq == 0)
+ seq = be32_to_cpu(lo->plh_stateid.seqid);
+ if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq))
lo->plh_return_seq = seq;
- }
+ pnfs_barrier_update(lo, seq);
}
static void
@@ -501,6 +531,7 @@
{
INIT_LIST_HEAD(&lseg->pls_list);
INIT_LIST_HEAD(&lseg->pls_lc_list);
+ INIT_LIST_HEAD(&lseg->pls_commits);
refcount_set(&lseg->pls_refcount, 1);
set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
lseg->pls_layout = lo;
@@ -628,15 +659,6 @@
return rv;
}
-/*
- * Compare 2 layout stateid sequence ids, to see which is newer,
- * taking into account wraparound issues.
- */
-static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
-{
- return (s32)(s1 - s2) > 0;
-}
-
static bool
pnfs_should_free_range(const struct pnfs_layout_range *lseg_range,
const struct pnfs_layout_range *recall_range)
@@ -811,9 +833,10 @@
/* If the sb is being destroyed, just bail */
if (!nfs_sb_active(server->super))
break;
- inode = igrab(lo->plh_inode);
+ inode = pnfs_grab_inode_layout_hdr(lo);
if (inode != NULL) {
- list_del_init(&lo->plh_layouts);
+ if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
+ list_del_rcu(&lo->plh_layouts);
if (pnfs_layout_add_bulk_destroy_list(inode,
layout_list))
continue;
@@ -823,7 +846,6 @@
} else {
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
- set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
}
nfs_sb_deactive(server->super);
spin_lock(&clp->cl_lock);
@@ -920,7 +942,7 @@
}
/*
- * Called by the state manger to remove all layouts established under an
+ * Called by the state manager to remove all layouts established under an
* expired lease.
*/
void
@@ -932,37 +954,48 @@
pnfs_destroy_layouts_byclid(clp, false);
}
+static void
+pnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred)
+{
+ const struct cred *old;
+
+ if (cred && cred_fscmp(lo->plh_lc_cred, cred) != 0) {
+ old = xchg(&lo->plh_lc_cred, get_cred(cred));
+ put_cred(old);
+ }
+}
+
/* update lo->plh_stateid with new if is more recent */
void
pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
- bool update_barrier)
+ const struct cred *cred, bool update_barrier)
{
- u32 oldseq, newseq, new_barrier = 0;
-
- oldseq = be32_to_cpu(lo->plh_stateid.seqid);
- newseq = be32_to_cpu(new->seqid);
+ u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
+ u32 newseq = be32_to_cpu(new->seqid);
if (!pnfs_layout_is_valid(lo)) {
+ pnfs_set_layout_cred(lo, cred);
nfs4_stateid_copy(&lo->plh_stateid, new);
lo->plh_barrier = newseq;
pnfs_clear_layoutreturn_info(lo);
clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
return;
}
- if (pnfs_seqid_is_newer(newseq, oldseq)) {
+
+ if (pnfs_seqid_is_newer(newseq, oldseq))
nfs4_stateid_copy(&lo->plh_stateid, new);
- /*
- * Because of wraparound, we want to keep the barrier
- * "close" to the current seqids.
- */
- new_barrier = newseq - atomic_read(&lo->plh_outstanding);
- }
- if (update_barrier)
- new_barrier = be32_to_cpu(new->seqid);
- else if (new_barrier == 0)
+
+ if (update_barrier) {
+ pnfs_barrier_update(lo, newseq);
return;
- if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
- lo->plh_barrier = new_barrier;
+ }
+ /*
+ * Because of wraparound, we want to keep the barrier
+ * "close" to the current seqids. We really only want to
+ * get here from a layoutget call.
+ */
+ if (atomic_read(&lo->plh_outstanding) == 1)
+ pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid));
}
static bool
@@ -971,7 +1004,7 @@
{
u32 seqid = be32_to_cpu(stateid->seqid);
- return !pnfs_seqid_is_newer(seqid, lo->plh_barrier);
+ return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid);
}
/* lget is set to 1 if called from inside send_layoutget call chain */
@@ -1090,7 +1123,7 @@
lgp->args.ctx = get_nfs_open_context(ctx);
nfs4_stateid_copy(&lgp->args.stateid, stateid);
lgp->gfp_flags = gfp_flags;
- lgp->cred = get_cred(ctx->cred);
+ lgp->cred = ctx->cred;
return lgp;
}
@@ -1101,7 +1134,6 @@
nfs4_free_pages(lgp->args.layout.pages, max_pages);
if (lgp->args.inode)
pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout);
- put_cred(lgp->cred);
put_nfs_open_context(lgp->args.ctx);
kfree(lgp);
}
@@ -1138,7 +1170,7 @@
pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
pnfs_free_returned_lsegs(lo, &freeme, range, seq);
- pnfs_set_layout_stateid(lo, stateid, true);
+ pnfs_set_layout_stateid(lo, stateid, NULL, true);
} else
pnfs_mark_layout_stateid_invalid(lo, &freeme);
out_unlock:
@@ -1151,6 +1183,7 @@
static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
nfs4_stateid *stateid,
+ const struct cred **cred,
enum pnfs_iomode *iomode)
{
/* Serialise LAYOUTGET/LAYOUTRETURN */
@@ -1160,21 +1193,17 @@
return false;
set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
pnfs_get_layout_hdr(lo);
+ nfs4_stateid_copy(stateid, &lo->plh_stateid);
+ *cred = get_cred(lo->plh_lc_cred);
if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) {
- if (stateid != NULL) {
- nfs4_stateid_copy(stateid, &lo->plh_stateid);
- if (lo->plh_return_seq != 0)
- stateid->seqid = cpu_to_be32(lo->plh_return_seq);
- }
+ if (lo->plh_return_seq != 0)
+ stateid->seqid = cpu_to_be32(lo->plh_return_seq);
if (iomode != NULL)
*iomode = lo->plh_return_iomode;
pnfs_clear_layoutreturn_info(lo);
- return true;
- }
- if (stateid != NULL)
- nfs4_stateid_copy(stateid, &lo->plh_stateid);
- if (iomode != NULL)
+ } else if (iomode != NULL)
*iomode = IOMODE_ANY;
+ pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid));
return true;
}
@@ -1196,20 +1225,26 @@
}
static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
- enum pnfs_iomode iomode, bool sync)
+pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *stateid,
+ const struct cred **pcred,
+ enum pnfs_iomode iomode,
+ bool sync)
{
struct inode *ino = lo->plh_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
struct nfs4_layoutreturn *lrp;
+ const struct cred *cred = *pcred;
int status = 0;
+ *pcred = NULL;
lrp = kzalloc(sizeof(*lrp), GFP_NOFS);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
spin_lock(&ino->i_lock);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&ino->i_lock);
+ put_cred(cred);
pnfs_put_layout_hdr(lo);
goto out;
}
@@ -1217,7 +1252,7 @@
pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode);
lrp->args.ld_private = &lrp->ld_private;
lrp->clp = NFS_SERVER(ino)->nfs_client;
- lrp->cred = lo->plh_lc_cred;
+ lrp->cred = cred;
if (ld->prepare_layoutreturn)
ld->prepare_layoutreturn(&lrp->args);
@@ -1258,15 +1293,16 @@
return;
spin_lock(&inode->i_lock);
if (pnfs_layout_need_return(lo)) {
+ const struct cred *cred;
nfs4_stateid stateid;
enum pnfs_iomode iomode;
bool send;
- send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
+ send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
spin_unlock(&inode->i_lock);
if (send) {
/* Send an async layoutreturn so we dont deadlock */
- pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
}
} else
spin_unlock(&inode->i_lock);
@@ -1291,6 +1327,7 @@
.length = NFS4_MAX_UINT64,
};
LIST_HEAD(tmp_list);
+ const struct cred *cred;
nfs4_stateid stateid;
int status = 0;
bool send, valid_layout;
@@ -1326,13 +1363,15 @@
!valid_layout) {
spin_unlock(&ino->i_lock);
dprintk("NFS: %s no layout segments to return\n", __func__);
- goto out_put_layout_hdr;
+ goto out_wait_layoutreturn;
}
- send = pnfs_prepare_layoutreturn(lo, &stateid, NULL);
+ send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
spin_unlock(&ino->i_lock);
if (send)
- status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
+ status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
+out_wait_layoutreturn:
+ wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
out_put_layout_hdr:
pnfs_free_lseg_list(&tmp_list);
pnfs_put_layout_hdr(lo);
@@ -1378,6 +1417,7 @@
struct nfs4_state *state;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg, *next;
+ const struct cred *lc_cred;
nfs4_stateid stateid;
enum pnfs_iomode iomode = 0;
bool layoutreturn = false, roc = false;
@@ -1447,15 +1487,16 @@
* 2. we don't send layoutreturn
*/
/* lo ref dropped in pnfs_roc_release() */
- layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
+ layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode);
/* If the creds don't match, we can't compound the layoutreturn */
- if (!layoutreturn || cred_fscmp(cred, lo->plh_lc_cred) != 0)
+ if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0)
goto out_noroc;
roc = layoutreturn;
pnfs_init_layoutreturn_args(args, lo, &stateid, iomode);
res->lrs_present = 0;
layoutreturn = false;
+ put_cred(lc_cred);
out_noroc:
spin_unlock(&ino->i_lock);
@@ -1469,7 +1510,7 @@
return true;
}
if (layoutreturn)
- pnfs_send_layoutreturn(lo, &stateid, iomode, true);
+ pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
pnfs_put_layout_hdr(lo);
return false;
}
@@ -1539,16 +1580,16 @@
case 0:
if (res->lrs_present)
res_stateid = &res->stateid;
- /* Fallthrough */
+ fallthrough;
default:
arg_stateid = &args->stateid;
}
+ trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret);
pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range,
res_stateid);
if (ld_private && ld_private->ops && ld_private->ops->free)
ld_private->ops->free(ld_private);
pnfs_put_layout_hdr(lo);
- trace_nfs4_layoutreturn_on_close(args->inode, 0);
}
bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
@@ -1875,6 +1916,11 @@
wake_up_var(&lo->plh_outstanding);
}
+static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
+{
+ return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags);
+}
+
static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
{
unsigned long *bitlock = &lo->plh_flags;
@@ -1887,15 +1933,14 @@
static void _add_to_server_list(struct pnfs_layout_hdr *lo,
struct nfs_server *server)
{
- if (list_empty(&lo->plh_layouts)) {
+ if (!test_and_set_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
struct nfs_client *clp = server->nfs_client;
/* The lo must be on the clp list if there is any
* chance of a CB_LAYOUTRECALL(FILE) coming in.
*/
spin_lock(&clp->cl_lock);
- if (list_empty(&lo->plh_layouts))
- list_add_tail(&lo->plh_layouts, &server->layouts);
+ list_add_tail_rcu(&lo->plh_layouts, &server->layouts);
spin_unlock(&clp->cl_lock);
}
}
@@ -1974,7 +2019,7 @@
* If the layout segment list is empty, but there are outstanding
* layoutget calls, then they might be subject to a layoutrecall.
*/
- if (list_empty(&lo->plh_segs) &&
+ if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) &&
atomic_read(&lo->plh_outstanding) != 0) {
spin_unlock(&ino->i_lock);
lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
@@ -2038,6 +2083,7 @@
goto lookup_again;
}
+ spin_unlock(&ino->i_lock);
first = true;
status = nfs4_select_rw_stateid(ctx->state,
iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ,
@@ -2047,14 +2093,12 @@
trace_pnfs_update_layout(ino, pos, count,
iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_INVALID_OPEN);
- if (status != -EAGAIN)
- goto out_unlock;
- spin_unlock(&ino->i_lock);
nfs4_schedule_stateid_recovery(server, ctx->state);
pnfs_clear_first_layoutget(lo);
pnfs_put_layout_hdr(lo);
goto lookup_again;
}
+ spin_lock(&ino->i_lock);
} else {
nfs4_stateid_copy(&stateid, &lo->plh_stateid);
}
@@ -2187,8 +2231,6 @@
return NULL;
}
-extern const nfs4_stateid current_stateid;
-
static void _lgopen_prepare_attached(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{
@@ -2353,17 +2395,19 @@
goto out_forget;
}
- if (!pnfs_layout_is_valid(lo)) {
- /* We have a completely new layout */
- pnfs_set_layout_stateid(lo, &res->stateid, true);
- } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
+ if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo))
+ goto out_forget;
+
+ if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
/* existing state ID, make sure the sequence number matches. */
if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
+ if (!pnfs_layout_is_valid(lo))
+ lo->plh_barrier = 0;
dprintk("%s forget reply due to sequence\n", __func__);
goto out_forget;
}
- pnfs_set_layout_stateid(lo, &res->stateid, false);
- } else {
+ pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false);
+ } else if (pnfs_layout_is_valid(lo)) {
/*
* We got an entirely new state ID. Mark all segments for the
* inode invalid, and retry the layoutget
@@ -2376,6 +2420,9 @@
pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
&range, 0);
goto out_forget;
+ } else {
+ /* We have a completely new layout */
+ pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true);
}
pnfs_get_lseg(lseg);
@@ -2457,43 +2504,159 @@
return -ENOENT;
}
-void pnfs_error_mark_layout_for_return(struct inode *inode,
- struct pnfs_layout_segment *lseg)
+static void
+pnfs_mark_layout_for_return(struct inode *inode,
+ const struct pnfs_layout_range *range)
{
- struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
- struct pnfs_layout_range range = {
- .iomode = lseg->pls_range.iomode,
- .offset = 0,
- .length = NFS4_MAX_UINT64,
- };
+ struct pnfs_layout_hdr *lo;
bool return_now = false;
spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
if (!pnfs_layout_is_valid(lo)) {
spin_unlock(&inode->i_lock);
return;
}
- pnfs_set_plh_return_info(lo, range.iomode, 0);
+ pnfs_set_plh_return_info(lo, range->iomode, 0);
/*
* mark all matching lsegs so that we are sure to have no live
* segments at hand when sending layoutreturn. See pnfs_put_lseg()
* for how it works.
*/
- if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0) != -EBUSY) {
+ if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
+ const struct cred *cred;
nfs4_stateid stateid;
enum pnfs_iomode iomode;
- return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
+ return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
spin_unlock(&inode->i_lock);
if (return_now)
- pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
} else {
spin_unlock(&inode->i_lock);
nfs_commit_inode(inode, 0);
}
}
+
+void pnfs_error_mark_layout_for_return(struct inode *inode,
+ struct pnfs_layout_segment *lseg)
+{
+ struct pnfs_layout_range range = {
+ .iomode = lseg->pls_range.iomode,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ pnfs_mark_layout_for_return(inode, &range);
+}
EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
+static bool
+pnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo)
+{
+ return pnfs_layout_is_valid(lo) &&
+ !test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) &&
+ !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
+}
+
+static struct pnfs_layout_segment *
+pnfs_find_first_lseg(struct pnfs_layout_hdr *lo,
+ const struct pnfs_layout_range *range,
+ enum pnfs_iomode iomode)
+{
+ struct pnfs_layout_segment *lseg;
+
+ list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+ if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
+ continue;
+ if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+ continue;
+ if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY)
+ continue;
+ if (pnfs_lseg_range_intersecting(&lseg->pls_range, range))
+ return lseg;
+ }
+ return NULL;
+}
+
+/* Find open file states whose mode matches that of the range */
+static bool
+pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
+ const struct pnfs_layout_range *range)
+{
+ struct list_head *head;
+ struct nfs_open_context *ctx;
+ fmode_t mode = 0;
+
+ if (!pnfs_layout_can_be_returned(lo) ||
+ !pnfs_find_first_lseg(lo, range, range->iomode))
+ return false;
+
+ head = &NFS_I(lo->plh_inode)->open_files;
+ list_for_each_entry_rcu(ctx, head, list) {
+ if (ctx->state)
+ mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE);
+ }
+
+ switch (range->iomode) {
+ default:
+ break;
+ case IOMODE_READ:
+ mode &= ~FMODE_WRITE;
+ break;
+ case IOMODE_RW:
+ if (pnfs_find_first_lseg(lo, range, IOMODE_READ))
+ mode &= ~FMODE_READ;
+ }
+ return mode == 0;
+}
+
+static int
+pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
+{
+ const struct pnfs_layout_range *range = data;
+ struct pnfs_layout_hdr *lo;
+ struct inode *inode;
+restart:
+ rcu_read_lock();
+ list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
+ if (!pnfs_layout_can_be_returned(lo) ||
+ test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+ continue;
+ inode = lo->plh_inode;
+ spin_lock(&inode->i_lock);
+ if (!pnfs_should_return_unused_layout(lo, range)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ spin_unlock(&inode->i_lock);
+ inode = pnfs_grab_inode_layout_hdr(lo);
+ if (!inode)
+ continue;
+ rcu_read_unlock();
+ pnfs_mark_layout_for_return(inode, range);
+ iput(inode);
+ cond_resched();
+ goto restart;
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
+void
+pnfs_layout_return_unused_byclid(struct nfs_client *clp,
+ enum pnfs_iomode iomode)
+{
+ struct pnfs_layout_range range = {
+ .iomode = iomode,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver,
+ &range);
+}
+
void
pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
{
@@ -2509,7 +2672,7 @@
* Check for any intersection between the request and the pgio->pg_lseg,
* and if none, put this pgio->pg_lseg away.
*/
-static void
+void
pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
@@ -2517,6 +2680,7 @@
pgio->pg_lseg = NULL;
}
}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
@@ -2836,7 +3000,8 @@
}
/* Resend all requests through pnfs. */
-void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr)
+void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr,
+ unsigned int mirror_idx)
{
struct nfs_pageio_descriptor pgio;
@@ -2847,6 +3012,7 @@
nfs_pageio_init_read(&pgio, hdr->inode, false,
hdr->completion_ops);
+ pgio.pg_mirror_idx = mirror_idx;
hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr);
}
}
@@ -3034,10 +3200,10 @@
end_pos = nfsi->layout->plh_lwb;
nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
+ data->cred = get_cred(nfsi->layout->plh_lc_cred);
spin_unlock(&inode->i_lock);
data->args.inode = inode;
- data->cred = get_cred(nfsi->layout->plh_lc_cred);
nfs_fattr_init(&data->fattr);
data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
data->res.fattr = &data->fattr;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 3d55edd..0212fe3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -66,6 +66,7 @@
struct pnfs_layout_segment {
struct list_head pls_list;
struct list_head pls_lc_list;
+ struct list_head pls_commits;
struct pnfs_layout_range pls_range;
refcount_t pls_refcount;
u32 pls_seq;
@@ -79,6 +80,10 @@
PNFS_TRY_AGAIN = 2,
};
+/* error codes for internal use */
+#define NFS4ERR_RESET_TO_MDS 12001
+#define NFS4ERR_RESET_TO_PNFS 12002
+
#ifdef CONFIG_NFS_V4_1
#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -91,10 +96,6 @@
#define NFS4_DEF_DS_RETRANS 5
#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
-/* error codes for internal use */
-#define NFS4ERR_RESET_TO_MDS 12001
-#define NFS4ERR_RESET_TO_PNFS 12002
-
enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
@@ -105,6 +106,7 @@
NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */
+ NFS_LAYOUT_HASHED, /* The layout visible */
};
enum layoutdriver_policy_flags {
@@ -148,22 +150,6 @@
const struct nfs_pageio_ops *pg_write_ops;
struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
- void (*mark_request_commit) (struct nfs_page *req,
- struct pnfs_layout_segment *lseg,
- struct nfs_commit_info *cinfo,
- u32 ds_commit_idx);
- void (*clear_request_commit) (struct nfs_page *req,
- struct nfs_commit_info *cinfo);
- int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
- int max);
- void (*recover_commit_reqs) (struct list_head *list,
- struct nfs_commit_info *cinfo);
- struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
- struct page *page);
- int (*commit_pagelist)(struct inode *inode,
- struct list_head *mds_pages,
- int how,
- struct nfs_commit_info *cinfo);
int (*sync)(struct inode *inode, bool datasync);
@@ -186,6 +172,29 @@
int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
};
+struct pnfs_commit_ops {
+ void (*setup_ds_info)(struct pnfs_ds_commit_info *,
+ struct pnfs_layout_segment *);
+ void (*release_ds_info)(struct pnfs_ds_commit_info *,
+ struct inode *inode);
+ int (*commit_pagelist)(struct inode *inode,
+ struct list_head *mds_pages,
+ int how,
+ struct nfs_commit_info *cinfo);
+ void (*mark_request_commit) (struct nfs_page *req,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo,
+ u32 ds_commit_idx);
+ void (*clear_request_commit) (struct nfs_page *req,
+ struct nfs_commit_info *cinfo);
+ int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
+ int max);
+ void (*recover_commit_reqs) (struct list_head *list,
+ struct nfs_commit_info *cinfo);
+ struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
+ struct page *page);
+};
+
struct pnfs_layout_hdr {
refcount_t plh_refcount;
atomic_t plh_outstanding; /* number of RPCs out */
@@ -203,6 +212,7 @@
loff_t plh_lwb; /* last write byte for layoutcommit */
const struct cred *plh_lc_cred; /* layoutcommit cred */
struct inode *plh_inode;
+ struct rcu_head plh_rcu;
};
struct pnfs_device {
@@ -242,6 +252,7 @@
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
void unset_pnfs_layoutdriver(struct nfs_server *);
void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio);
+void pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
@@ -268,6 +279,7 @@
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
+ const struct cred *cred,
bool update_barrier);
int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
@@ -298,7 +310,7 @@
int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_pgio_header *);
void pnfs_ld_read_done(struct nfs_pgio_header *);
-void pnfs_read_resend_pnfs(struct nfs_pgio_header *);
+void pnfs_read_resend_pnfs(struct nfs_pgio_header *, unsigned int mirror_idx);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
@@ -325,6 +337,9 @@
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
+void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
+ enum pnfs_iomode iomode);
+
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
@@ -359,6 +374,16 @@
void nfs4_deviceid_purge_client(const struct nfs_client *);
/* pnfs_nfs.c */
+struct pnfs_commit_array *pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags);
+void pnfs_free_commit_array(struct pnfs_commit_array *p);
+struct pnfs_commit_array *pnfs_add_commit_array(struct pnfs_ds_commit_info *,
+ struct pnfs_commit_array *,
+ struct pnfs_layout_segment *);
+
+void pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo,
+ struct pnfs_layout_segment *lseg);
+void pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo);
+
void pnfs_generic_clear_request_commit(struct nfs_page *req,
struct nfs_commit_info *cinfo);
void pnfs_generic_commit_release(void *calldata);
@@ -366,6 +391,8 @@
void pnfs_generic_rw_release(void *data);
void pnfs_generic_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo);
+struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
+ struct page *page);
int pnfs_generic_commit_pagelist(struct inode *inode,
struct list_head *mds_pages,
int how,
@@ -437,9 +464,11 @@
pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
struct nfs_commit_info *cinfo)
{
- if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+
+ if (fl_cinfo == NULL || fl_cinfo->ncommitting == 0)
return PNFS_NOT_ATTEMPTED;
- return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
+ return fl_cinfo->ops->commit_pagelist(inode, mds_pages, how, cinfo);
}
static inline struct pnfs_ds_commit_info *
@@ -453,6 +482,28 @@
}
static inline void
+pnfs_init_ds_commit_info_ops(struct pnfs_ds_commit_info *fl_cinfo, struct inode *inode)
+{
+ struct pnfs_ds_commit_info *inode_cinfo = pnfs_get_ds_info(inode);
+ if (inode_cinfo != NULL)
+ fl_cinfo->ops = inode_cinfo->ops;
+}
+
+static inline void
+pnfs_init_ds_commit_info(struct pnfs_ds_commit_info *fl_cinfo)
+{
+ INIT_LIST_HEAD(&fl_cinfo->commits);
+ fl_cinfo->ops = NULL;
+}
+
+static inline void
+pnfs_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo, struct inode *inode)
+{
+ if (fl_cinfo->ops != NULL && fl_cinfo->ops->release_ds_info != NULL)
+ fl_cinfo->ops->release_ds_info(fl_cinfo, inode);
+}
+
+static inline void
pnfs_generic_mark_devid_invalid(struct nfs4_deviceid_node *node)
{
set_bit(NFS_DEVICEID_INVALID, &node->flags);
@@ -462,24 +513,22 @@
pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo, u32 ds_commit_idx)
{
- struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
- struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
- if (lseg == NULL || ld->mark_request_commit == NULL)
+ if (!lseg || !fl_cinfo->ops || !fl_cinfo->ops->mark_request_commit)
return false;
- ld->mark_request_commit(req, lseg, cinfo, ds_commit_idx);
+ fl_cinfo->ops->mark_request_commit(req, lseg, cinfo, ds_commit_idx);
return true;
}
static inline bool
pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
- struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
- struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
- if (ld == NULL || ld->clear_request_commit == NULL)
+ if (!fl_cinfo || !fl_cinfo->ops || !fl_cinfo->ops->clear_request_commit)
return false;
- ld->clear_request_commit(req, cinfo);
+ fl_cinfo->ops->clear_request_commit(req, cinfo);
return true;
}
@@ -487,21 +536,31 @@
pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
int max)
{
- if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+
+ if (!fl_cinfo || fl_cinfo->nwritten == 0)
return 0;
- else
- return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
+ return fl_cinfo->ops->scan_commit_lists(cinfo, max);
+}
+
+static inline void
+pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
+{
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+
+ if (fl_cinfo && fl_cinfo->nwritten != 0)
+ fl_cinfo->ops->recover_commit_reqs(head, cinfo);
}
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
struct page *page)
{
- struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
- if (ld == NULL || ld->search_commit_reqs == NULL)
+ if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs)
return NULL;
- return ld->search_commit_reqs(cinfo, page);
+ return fl_cinfo->ops->search_commit_reqs(cinfo, page);
}
/* Should the pNFS client commit and return the layout upon a setattr */
@@ -753,6 +812,21 @@
return NULL;
}
+static inline void
+pnfs_init_ds_commit_info_ops(struct pnfs_ds_commit_info *fl_cinfo, struct inode *inode)
+{
+}
+
+static inline void
+pnfs_init_ds_commit_info(struct pnfs_ds_commit_info *fl_cinfo)
+{
+}
+
+static inline void
+pnfs_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo, struct inode *inode)
+{
+}
+
static inline bool
pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo, u32 ds_commit_idx)
@@ -773,6 +847,11 @@
return 0;
}
+static inline void
+pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
+{
+}
+
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
struct page *page)
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 249cf90..7b9d701 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -59,6 +59,17 @@
}
EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
+static struct pnfs_layout_segment *
+pnfs_free_bucket_lseg(struct pnfs_commit_bucket *bucket)
+{
+ if (list_empty(&bucket->committing) && list_empty(&bucket->written)) {
+ struct pnfs_layout_segment *freeme = bucket->lseg;
+ bucket->lseg = NULL;
+ return freeme;
+ }
+ return NULL;
+}
+
/* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty, it will need to put the lseg reference.
* Note this must be called holding nfsi->commit_mutex
@@ -67,30 +78,169 @@
pnfs_generic_clear_request_commit(struct nfs_page *req,
struct nfs_commit_info *cinfo)
{
- struct pnfs_layout_segment *freeme = NULL;
+ struct pnfs_commit_bucket *bucket = NULL;
if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
goto out;
cinfo->ds->nwritten--;
- if (list_is_singular(&req->wb_list)) {
- struct pnfs_commit_bucket *bucket;
-
+ if (list_is_singular(&req->wb_list))
bucket = list_first_entry(&req->wb_list,
- struct pnfs_commit_bucket,
- written);
- freeme = bucket->wlseg;
- bucket->wlseg = NULL;
- }
+ struct pnfs_commit_bucket, written);
out:
nfs_request_remove_commit_list(req, cinfo);
- pnfs_put_lseg(freeme);
+ if (bucket)
+ pnfs_put_lseg(pnfs_free_bucket_lseg(bucket));
}
EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
+struct pnfs_commit_array *
+pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags)
+{
+ struct pnfs_commit_array *p;
+ struct pnfs_commit_bucket *b;
+
+ p = kmalloc(struct_size(p, buckets, n), gfp_flags);
+ if (!p)
+ return NULL;
+ p->nbuckets = n;
+ INIT_LIST_HEAD(&p->cinfo_list);
+ INIT_LIST_HEAD(&p->lseg_list);
+ p->lseg = NULL;
+ for (b = &p->buckets[0]; n != 0; b++, n--) {
+ INIT_LIST_HEAD(&b->written);
+ INIT_LIST_HEAD(&b->committing);
+ b->lseg = NULL;
+ b->direct_verf.committed = NFS_INVALID_STABLE_HOW;
+ }
+ return p;
+}
+EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array);
+
+void
+pnfs_free_commit_array(struct pnfs_commit_array *p)
+{
+ kfree_rcu(p, rcu);
+}
+EXPORT_SYMBOL_GPL(pnfs_free_commit_array);
+
+static struct pnfs_commit_array *
+pnfs_find_commit_array_by_lseg(struct pnfs_ds_commit_info *fl_cinfo,
+ struct pnfs_layout_segment *lseg)
+{
+ struct pnfs_commit_array *array;
+
+ list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
+ if (array->lseg == lseg)
+ return array;
+ }
+ return NULL;
+}
+
+struct pnfs_commit_array *
+pnfs_add_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
+ struct pnfs_commit_array *new,
+ struct pnfs_layout_segment *lseg)
+{
+ struct pnfs_commit_array *array;
+
+ array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
+ if (array)
+ return array;
+ new->lseg = lseg;
+ refcount_set(&new->refcount, 1);
+ list_add_rcu(&new->cinfo_list, &fl_cinfo->commits);
+ list_add(&new->lseg_list, &lseg->pls_commits);
+ return new;
+}
+EXPORT_SYMBOL_GPL(pnfs_add_commit_array);
+
+static struct pnfs_commit_array *
+pnfs_lookup_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
+ struct pnfs_layout_segment *lseg)
+{
+ struct pnfs_commit_array *array;
+
+ rcu_read_lock();
+ array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
+ if (!array) {
+ rcu_read_unlock();
+ fl_cinfo->ops->setup_ds_info(fl_cinfo, lseg);
+ rcu_read_lock();
+ array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
+ }
+ rcu_read_unlock();
+ return array;
+}
+
+static void
+pnfs_release_commit_array_locked(struct pnfs_commit_array *array)
+{
+ list_del_rcu(&array->cinfo_list);
+ list_del(&array->lseg_list);
+ pnfs_free_commit_array(array);
+}
+
+static void
+pnfs_put_commit_array_locked(struct pnfs_commit_array *array)
+{
+ if (refcount_dec_and_test(&array->refcount))
+ pnfs_release_commit_array_locked(array);
+}
+
+static void
+pnfs_put_commit_array(struct pnfs_commit_array *array, struct inode *inode)
+{
+ if (refcount_dec_and_lock(&array->refcount, &inode->i_lock)) {
+ pnfs_release_commit_array_locked(array);
+ spin_unlock(&inode->i_lock);
+ }
+}
+
+static struct pnfs_commit_array *
+pnfs_get_commit_array(struct pnfs_commit_array *array)
+{
+ if (refcount_inc_not_zero(&array->refcount))
+ return array;
+ return NULL;
+}
+
+static void
+pnfs_remove_and_free_commit_array(struct pnfs_commit_array *array)
+{
+ array->lseg = NULL;
+ list_del_init(&array->lseg_list);
+ pnfs_put_commit_array_locked(array);
+}
+
+void
+pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo,
+ struct pnfs_layout_segment *lseg)
+{
+ struct pnfs_commit_array *array, *tmp;
+
+ list_for_each_entry_safe(array, tmp, &lseg->pls_commits, lseg_list)
+ pnfs_remove_and_free_commit_array(array);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg);
+
+void
+pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo)
+{
+ struct pnfs_commit_array *array, *tmp;
+
+ list_for_each_entry_safe(array, tmp, &fl_cinfo->commits, cinfo_list)
+ pnfs_remove_and_free_commit_array(array);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy);
+
+/*
+ * Locks the nfs_page requests for commit and moves them to
+ * @bucket->committing.
+ */
static int
-pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
- struct nfs_commit_info *cinfo,
- int max)
+pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+ struct nfs_commit_info *cinfo,
+ int max)
{
struct list_head *src = &bucket->written;
struct list_head *dst = &bucket->committing;
@@ -101,158 +251,255 @@
if (ret) {
cinfo->ds->nwritten -= ret;
cinfo->ds->ncommitting += ret;
- if (bucket->clseg == NULL)
- bucket->clseg = pnfs_get_lseg(bucket->wlseg);
- if (list_empty(src)) {
- pnfs_put_lseg(bucket->wlseg);
- bucket->wlseg = NULL;
- }
}
return ret;
}
+static int pnfs_bucket_scan_array(struct nfs_commit_info *cinfo,
+ struct pnfs_commit_bucket *buckets,
+ unsigned int nbuckets,
+ int max)
+{
+ unsigned int i;
+ int rv = 0, cnt;
+
+ for (i = 0; i < nbuckets && max != 0; i++) {
+ cnt = pnfs_bucket_scan_ds_commit_list(&buckets[i], cinfo, max);
+ rv += cnt;
+ max -= cnt;
+ }
+ return rv;
+}
+
/* Move reqs from written to committing lists, returning count
* of number moved.
*/
-int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo,
- int max)
+int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max)
{
- int i, rv = 0, cnt;
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+ struct pnfs_commit_array *array;
+ int rv = 0, cnt;
- lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
- for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
- cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i],
- cinfo, max);
- max -= cnt;
+ rcu_read_lock();
+ list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
+ if (!array->lseg || !pnfs_get_commit_array(array))
+ continue;
+ rcu_read_unlock();
+ cnt = pnfs_bucket_scan_array(cinfo, array->buckets,
+ array->nbuckets, max);
+ rcu_read_lock();
+ pnfs_put_commit_array(array, cinfo->inode);
rv += cnt;
+ max -= cnt;
+ if (!max)
+ break;
}
+ rcu_read_unlock();
return rv;
}
EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists);
+static unsigned int
+pnfs_bucket_recover_commit_reqs(struct list_head *dst,
+ struct pnfs_commit_bucket *buckets,
+ unsigned int nbuckets,
+ struct nfs_commit_info *cinfo)
+{
+ struct pnfs_commit_bucket *b;
+ struct pnfs_layout_segment *freeme;
+ unsigned int nwritten, ret = 0;
+ unsigned int i;
+
+restart:
+ for (i = 0, b = buckets; i < nbuckets; i++, b++) {
+ nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
+ if (!nwritten)
+ continue;
+ ret += nwritten;
+ freeme = pnfs_free_bucket_lseg(b);
+ if (freeme) {
+ pnfs_put_lseg(freeme);
+ goto restart;
+ }
+ }
+ return ret;
+}
+
/* Pull everything off the committing lists and dump into @dst. */
void pnfs_generic_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo)
{
- struct pnfs_commit_bucket *b;
- struct pnfs_layout_segment *freeme;
- int nwritten;
- int i;
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+ struct pnfs_commit_array *array;
+ unsigned int nwritten;
lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
-restart:
- for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
- nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
- if (!nwritten)
+ rcu_read_lock();
+ list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
+ if (!array->lseg || !pnfs_get_commit_array(array))
continue;
- cinfo->ds->nwritten -= nwritten;
- if (list_empty(&b->written)) {
- freeme = b->wlseg;
- b->wlseg = NULL;
- pnfs_put_lseg(freeme);
- goto restart;
- }
+ rcu_read_unlock();
+ nwritten = pnfs_bucket_recover_commit_reqs(dst,
+ array->buckets,
+ array->nbuckets,
+ cinfo);
+ rcu_read_lock();
+ pnfs_put_commit_array(array, cinfo->inode);
+ fl_cinfo->nwritten -= nwritten;
}
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
-static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
+static struct nfs_page *
+pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
+ unsigned int nbuckets, struct page *page)
+{
+ struct nfs_page *req;
+ struct pnfs_commit_bucket *b;
+ unsigned int i;
+
+ /* Linearly search the commit lists for each bucket until a matching
+ * request is found */
+ for (i = 0, b = buckets; i < nbuckets; i++, b++) {
+ list_for_each_entry(req, &b->written, wb_list) {
+ if (req->wb_page == page)
+ return req->wb_head;
+ }
+ list_for_each_entry(req, &b->committing, wb_list) {
+ if (req->wb_page == page)
+ return req->wb_head;
+ }
+ }
+ return NULL;
+}
+
+/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head reqest
+ * for @page
+ * @cinfo - commit info for current inode
+ * @page - page to search for matching head request
+ *
+ * Returns a the head request if one is found, otherwise returns NULL.
+ */
+struct nfs_page *
+pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
{
struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+ struct pnfs_commit_array *array;
+ struct nfs_page *req;
+
+ list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
+ req = pnfs_bucket_search_commit_reqs(array->buckets,
+ array->nbuckets, page);
+ if (req)
+ return req;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs);
+
+static struct pnfs_layout_segment *
+pnfs_bucket_get_committing(struct list_head *head,
+ struct pnfs_commit_bucket *bucket,
+ struct nfs_commit_info *cinfo)
+{
+ struct pnfs_layout_segment *lseg;
+ struct list_head *pos;
+
+ list_for_each(pos, &bucket->committing)
+ cinfo->ds->ncommitting--;
+ list_splice_init(&bucket->committing, head);
+ lseg = pnfs_free_bucket_lseg(bucket);
+ if (!lseg)
+ lseg = pnfs_get_lseg(bucket->lseg);
+ return lseg;
+}
+
+static struct nfs_commit_data *
+pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket,
+ struct nfs_commit_info *cinfo)
+{
+ struct nfs_commit_data *data = nfs_commitdata_alloc(false);
+
+ if (!data)
+ return NULL;
+ data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo);
+ return data;
+}
+
+static void pnfs_generic_retry_commit(struct pnfs_commit_bucket *buckets,
+ unsigned int nbuckets,
+ struct nfs_commit_info *cinfo,
+ unsigned int idx)
+{
struct pnfs_commit_bucket *bucket;
struct pnfs_layout_segment *freeme;
- struct list_head *pos;
LIST_HEAD(pages);
- int i;
- mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
- for (i = idx; i < fl_cinfo->nbuckets; i++) {
- bucket = &fl_cinfo->buckets[i];
+ for (bucket = buckets; idx < nbuckets; bucket++, idx++) {
if (list_empty(&bucket->committing))
continue;
- freeme = bucket->clseg;
- bucket->clseg = NULL;
- list_for_each(pos, &bucket->committing)
- cinfo->ds->ncommitting--;
- list_splice_init(&bucket->committing, &pages);
- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
- nfs_retry_commit(&pages, freeme, cinfo, i);
- pnfs_put_lseg(freeme);
mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
+ freeme = pnfs_bucket_get_committing(&pages, bucket, cinfo);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
+ nfs_retry_commit(&pages, freeme, cinfo, idx);
+ pnfs_put_lseg(freeme);
}
- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
}
static unsigned int
-pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
- struct list_head *list)
+pnfs_bucket_alloc_ds_commits(struct list_head *list,
+ struct pnfs_commit_bucket *buckets,
+ unsigned int nbuckets,
+ struct nfs_commit_info *cinfo)
{
- struct pnfs_ds_commit_info *fl_cinfo;
struct pnfs_commit_bucket *bucket;
struct nfs_commit_data *data;
- int i;
+ unsigned int i;
unsigned int nreq = 0;
- fl_cinfo = cinfo->ds;
- bucket = fl_cinfo->buckets;
- for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
+ for (i = 0, bucket = buckets; i < nbuckets; i++, bucket++) {
if (list_empty(&bucket->committing))
continue;
- data = nfs_commitdata_alloc(false);
- if (!data)
- break;
- data->ds_commit_index = i;
- list_add(&data->pages, list);
- nreq++;
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
+ if (!list_empty(&bucket->committing)) {
+ data = pnfs_bucket_fetch_commitdata(bucket, cinfo);
+ if (!data)
+ goto out_error;
+ data->ds_commit_index = i;
+ list_add_tail(&data->list, list);
+ nreq++;
+ }
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
}
-
+ return nreq;
+out_error:
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
/* Clean up on error */
- pnfs_generic_retry_commit(cinfo, i);
+ pnfs_generic_retry_commit(buckets, nbuckets, cinfo, i);
return nreq;
}
-static inline
-void pnfs_fetch_commit_bucket_list(struct list_head *pages,
- struct nfs_commit_data *data,
- struct nfs_commit_info *cinfo)
+static unsigned int
+pnfs_alloc_ds_commits_list(struct list_head *list,
+ struct pnfs_ds_commit_info *fl_cinfo,
+ struct nfs_commit_info *cinfo)
{
- struct pnfs_commit_bucket *bucket;
- struct list_head *pos;
+ struct pnfs_commit_array *array;
+ unsigned int ret = 0;
- bucket = &cinfo->ds->buckets[data->ds_commit_index];
- mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
- list_for_each(pos, &bucket->committing)
- cinfo->ds->ncommitting--;
- list_splice_init(&bucket->committing, pages);
- data->lseg = bucket->clseg;
- bucket->clseg = NULL;
- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
-
-}
-
-/* Helper function for pnfs_generic_commit_pagelist to catch an empty
- * page list. This can happen when two commits race.
- *
- * This must be called instead of nfs_init_commit - call one or the other, but
- * not both!
- */
-static bool
-pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
- struct nfs_commit_data *data,
- struct nfs_commit_info *cinfo)
-{
- if (list_empty(pages)) {
- if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
- wake_up_var(&cinfo->mds->rpcs_out);
- /* don't call nfs_commitdata_release - it tries to put
- * the open_context which is not acquired until nfs_init_commit
- * which has not been called on @data */
- WARN_ON_ONCE(data->context);
- nfs_commit_free(data);
- return true;
+ rcu_read_lock();
+ list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
+ if (!array->lseg || !pnfs_get_commit_array(array))
+ continue;
+ rcu_read_unlock();
+ ret += pnfs_bucket_alloc_ds_commits(list, array->buckets,
+ array->nbuckets, cinfo);
+ rcu_read_lock();
+ pnfs_put_commit_array(array, cinfo->inode);
}
-
- return false;
+ rcu_read_unlock();
+ return ret;
}
/* This follows nfs_commit_list pretty closely */
@@ -262,6 +509,7 @@
int (*initiate_commit)(struct nfs_commit_data *data,
int how))
{
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
struct nfs_commit_data *data, *tmp;
LIST_HEAD(list);
unsigned int nreq = 0;
@@ -269,40 +517,25 @@
if (!list_empty(mds_pages)) {
data = nfs_commitdata_alloc(true);
data->ds_commit_index = -1;
- list_add(&data->pages, &list);
+ list_splice_init(mds_pages, &data->pages);
+ list_add_tail(&data->list, &list);
nreq++;
}
- nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
-
+ nreq += pnfs_alloc_ds_commits_list(&list, fl_cinfo, cinfo);
if (nreq == 0)
goto out;
- atomic_add(nreq, &cinfo->mds->rpcs_out);
-
- list_for_each_entry_safe(data, tmp, &list, pages) {
- list_del_init(&data->pages);
+ list_for_each_entry_safe(data, tmp, &list, list) {
+ list_del(&data->list);
if (data->ds_commit_index < 0) {
- /* another commit raced with us */
- if (pnfs_generic_commit_cancel_empty_pagelist(mds_pages,
- data, cinfo))
- continue;
-
- nfs_init_commit(data, mds_pages, NULL, cinfo);
+ nfs_init_commit(data, NULL, NULL, cinfo);
nfs_initiate_commit(NFS_CLIENT(inode), data,
NFS_PROTO(data->inode),
- data->mds_ops, how, 0);
+ data->mds_ops, how,
+ RPC_TASK_CRED_NOREF);
} else {
- LIST_HEAD(pages);
-
- pnfs_fetch_commit_bucket_list(&pages, data, cinfo);
-
- /* another commit raced with us */
- if (pnfs_generic_commit_cancel_empty_pagelist(&pages,
- data, cinfo))
- continue;
-
- nfs_init_commit(data, &pages, data->lseg, cinfo);
+ nfs_init_commit(data, NULL, data->lseg, cinfo);
initiate_commit(data, how);
}
}
@@ -641,7 +874,7 @@
}
smp_wmb();
- ds->ds_clp = clp;
+ WRITE_ONCE(ds->ds_clp, clp);
dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
@@ -714,7 +947,7 @@
}
smp_wmb();
- ds->ds_clp = clp;
+ WRITE_ONCE(ds->ds_clp, clp);
dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
@@ -930,32 +1163,33 @@
u32 ds_commit_idx)
{
struct list_head *list;
- struct pnfs_commit_bucket *buckets;
+ struct pnfs_commit_array *array;
+ struct pnfs_commit_bucket *bucket;
mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
- buckets = cinfo->ds->buckets;
- list = &buckets[ds_commit_idx].written;
- if (list_empty(list)) {
- if (!pnfs_is_valid_lseg(lseg)) {
- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
- cinfo->completion_ops->resched_write(cinfo, req);
- return;
- }
- /* Non-empty buckets hold a reference on the lseg. That ref
- * is normally transferred to the COMMIT call and released
- * there. It could also be released if the last req is pulled
- * off due to a rewrite, in which case it will be done in
- * pnfs_common_clear_request_commit
- */
- WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL);
- buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg);
- }
+ array = pnfs_lookup_commit_array(cinfo->ds, lseg);
+ if (!array || !pnfs_is_valid_lseg(lseg))
+ goto out_resched;
+ bucket = &array->buckets[ds_commit_idx];
+ list = &bucket->written;
+ /* Non-empty buckets hold a reference on the lseg. That ref
+ * is normally transferred to the COMMIT call and released
+ * there. It could also be released if the last req is pulled
+ * off due to a rewrite, in which case it will be done in
+ * pnfs_common_clear_request_commit
+ */
+ if (!bucket->lseg)
+ bucket->lseg = pnfs_get_lseg(lseg);
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
cinfo->ds->nwritten++;
nfs_request_add_commit_list_locked(req, list, cinfo);
mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
nfs_mark_page_unstable(req->wb_page, cinfo);
+ return;
+out_resched:
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
+ cinfo->completion_ops->resched_write(cinfo, req);
}
EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0f7288b..15c865c 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -108,10 +108,15 @@
.rpc_resp = fattr,
};
int status;
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+ task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call_sync(server->client, &msg, 0);
+ status = rpc_call_sync(server->client, &msg, task_flags);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -147,14 +152,14 @@
}
static int
-nfs_proc_lookup(struct inode *dir, const struct qstr *name,
+nfs_proc_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
- .name = name->name,
- .len = name->len
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len
};
struct nfs_diropok res = {
.fh = fhandle,
@@ -166,10 +171,15 @@
.rpc_resp = &res,
};
int status;
+ unsigned short task_flags = 0;
- dprintk("NFS call lookup %s\n", name->name);
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (nfs_lookup_is_soft_revalidate(dentry))
+ task_flags |= RPC_TASK_TIMEOUT;
+
+ dprintk("NFS call lookup %pd2\n", dentry);
nfs_fattr_init(fattr);
- status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -710,7 +720,7 @@
.file_ops = &nfs_file_operations,
.getroot = nfs_proc_get_root,
.submount = nfs_submount,
- .try_mount = nfs_try_mount,
+ .try_get_tree = nfs_try_get_tree,
.getattr = nfs_proc_getattr,
.setattr = nfs_proc_setattr,
.lookup = nfs_proc_lookup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cfe0b58..eb854f1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -214,7 +214,7 @@
task_setup_data->flags |= swap_flags;
rpc_ops->read_setup(hdr, msg);
- trace_nfs_initiate_read(inode, hdr->io_start, hdr->good_bytes);
+ trace_nfs_initiate_read(hdr);
}
static void
@@ -247,11 +247,10 @@
return status;
nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
- trace_nfs_readpage_done(inode, task->tk_status,
- hdr->args.offset, hdr->res.eof);
+ trace_nfs_readpage_done(task, hdr);
if (task->tk_status == -ESTALE) {
- set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+ nfs_set_inode_stale(inode);
nfs_mark_for_revalidate(inode);
}
return 0;
@@ -265,6 +264,8 @@
/* This is a short read! */
nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
+ trace_nfs_readpage_short(task, hdr);
+
/* Has the server at least made some progress? */
if (resp->count == 0) {
nfs_set_pgio_error(hdr, -EIO, argp->offset);
@@ -282,6 +283,8 @@
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
+ resp->count = 0;
+ resp->eof = 0;
rpc_restart_call_prepare(task);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a84df7d..4034102 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -57,6 +57,7 @@
#include <linux/rcupdate.h>
#include <linux/uaccess.h>
+#include <linux/nfs_ssc.h>
#include "nfs4_fs.h"
#include "callback.h"
@@ -69,250 +70,6 @@
#include "nfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
-#define NFS_TEXT_DATA 1
-
-#if IS_ENABLED(CONFIG_NFS_V3)
-#define NFS_DEFAULT_VERSION 3
-#else
-#define NFS_DEFAULT_VERSION 2
-#endif
-
-#define NFS_MAX_CONNECTIONS 16
-
-enum {
- /* Mount options that take no arguments */
- Opt_soft, Opt_softerr, Opt_hard,
- Opt_posix, Opt_noposix,
- Opt_cto, Opt_nocto,
- Opt_ac, Opt_noac,
- Opt_lock, Opt_nolock,
- Opt_udp, Opt_tcp, Opt_rdma,
- Opt_acl, Opt_noacl,
- Opt_rdirplus, Opt_nordirplus,
- Opt_sharecache, Opt_nosharecache,
- Opt_resvport, Opt_noresvport,
- Opt_fscache, Opt_nofscache,
- Opt_migration, Opt_nomigration,
-
- /* Mount options that take integer arguments */
- Opt_port,
- Opt_rsize, Opt_wsize, Opt_bsize,
- Opt_timeo, Opt_retrans,
- Opt_acregmin, Opt_acregmax,
- Opt_acdirmin, Opt_acdirmax,
- Opt_actimeo,
- Opt_namelen,
- Opt_mountport,
- Opt_mountvers,
- Opt_minorversion,
-
- /* Mount options that take string arguments */
- Opt_nfsvers,
- Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
- Opt_addr, Opt_mountaddr, Opt_clientaddr,
- Opt_nconnect,
- Opt_lookupcache,
- Opt_fscache_uniq,
- Opt_local_lock,
-
- /* Special mount options */
- Opt_userspace, Opt_deprecated, Opt_sloppy,
-
- Opt_err
-};
-
-static const match_table_t nfs_mount_option_tokens = {
- { Opt_userspace, "bg" },
- { Opt_userspace, "fg" },
- { Opt_userspace, "retry=%s" },
-
- { Opt_sloppy, "sloppy" },
-
- { Opt_soft, "soft" },
- { Opt_softerr, "softerr" },
- { Opt_hard, "hard" },
- { Opt_deprecated, "intr" },
- { Opt_deprecated, "nointr" },
- { Opt_posix, "posix" },
- { Opt_noposix, "noposix" },
- { Opt_cto, "cto" },
- { Opt_nocto, "nocto" },
- { Opt_ac, "ac" },
- { Opt_noac, "noac" },
- { Opt_lock, "lock" },
- { Opt_nolock, "nolock" },
- { Opt_udp, "udp" },
- { Opt_tcp, "tcp" },
- { Opt_rdma, "rdma" },
- { Opt_acl, "acl" },
- { Opt_noacl, "noacl" },
- { Opt_rdirplus, "rdirplus" },
- { Opt_nordirplus, "nordirplus" },
- { Opt_sharecache, "sharecache" },
- { Opt_nosharecache, "nosharecache" },
- { Opt_resvport, "resvport" },
- { Opt_noresvport, "noresvport" },
- { Opt_fscache, "fsc" },
- { Opt_nofscache, "nofsc" },
- { Opt_migration, "migration" },
- { Opt_nomigration, "nomigration" },
-
- { Opt_port, "port=%s" },
- { Opt_rsize, "rsize=%s" },
- { Opt_wsize, "wsize=%s" },
- { Opt_bsize, "bsize=%s" },
- { Opt_timeo, "timeo=%s" },
- { Opt_retrans, "retrans=%s" },
- { Opt_acregmin, "acregmin=%s" },
- { Opt_acregmax, "acregmax=%s" },
- { Opt_acdirmin, "acdirmin=%s" },
- { Opt_acdirmax, "acdirmax=%s" },
- { Opt_actimeo, "actimeo=%s" },
- { Opt_namelen, "namlen=%s" },
- { Opt_mountport, "mountport=%s" },
- { Opt_mountvers, "mountvers=%s" },
- { Opt_minorversion, "minorversion=%s" },
-
- { Opt_nfsvers, "nfsvers=%s" },
- { Opt_nfsvers, "vers=%s" },
-
- { Opt_sec, "sec=%s" },
- { Opt_proto, "proto=%s" },
- { Opt_mountproto, "mountproto=%s" },
- { Opt_addr, "addr=%s" },
- { Opt_clientaddr, "clientaddr=%s" },
- { Opt_mounthost, "mounthost=%s" },
- { Opt_mountaddr, "mountaddr=%s" },
-
- { Opt_nconnect, "nconnect=%s" },
-
- { Opt_lookupcache, "lookupcache=%s" },
- { Opt_fscache_uniq, "fsc=%s" },
- { Opt_local_lock, "local_lock=%s" },
-
- /* The following needs to be listed after all other options */
- { Opt_nfsvers, "v%s" },
-
- { Opt_err, NULL }
-};
-
-enum {
- Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma,
- Opt_xprt_rdma6,
-
- Opt_xprt_err
-};
-
-static const match_table_t nfs_xprt_protocol_tokens = {
- { Opt_xprt_udp, "udp" },
- { Opt_xprt_udp6, "udp6" },
- { Opt_xprt_tcp, "tcp" },
- { Opt_xprt_tcp6, "tcp6" },
- { Opt_xprt_rdma, "rdma" },
- { Opt_xprt_rdma6, "rdma6" },
-
- { Opt_xprt_err, NULL }
-};
-
-enum {
- Opt_sec_none, Opt_sec_sys,
- Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
- Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
- Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
-
- Opt_sec_err
-};
-
-static const match_table_t nfs_secflavor_tokens = {
- { Opt_sec_none, "none" },
- { Opt_sec_none, "null" },
- { Opt_sec_sys, "sys" },
-
- { Opt_sec_krb5, "krb5" },
- { Opt_sec_krb5i, "krb5i" },
- { Opt_sec_krb5p, "krb5p" },
-
- { Opt_sec_lkey, "lkey" },
- { Opt_sec_lkeyi, "lkeyi" },
- { Opt_sec_lkeyp, "lkeyp" },
-
- { Opt_sec_spkm, "spkm3" },
- { Opt_sec_spkmi, "spkm3i" },
- { Opt_sec_spkmp, "spkm3p" },
-
- { Opt_sec_err, NULL }
-};
-
-enum {
- Opt_lookupcache_all, Opt_lookupcache_positive,
- Opt_lookupcache_none,
-
- Opt_lookupcache_err
-};
-
-static match_table_t nfs_lookupcache_tokens = {
- { Opt_lookupcache_all, "all" },
- { Opt_lookupcache_positive, "pos" },
- { Opt_lookupcache_positive, "positive" },
- { Opt_lookupcache_none, "none" },
-
- { Opt_lookupcache_err, NULL }
-};
-
-enum {
- Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
- Opt_local_lock_none,
-
- Opt_local_lock_err
-};
-
-static match_table_t nfs_local_lock_tokens = {
- { Opt_local_lock_all, "all" },
- { Opt_local_lock_flock, "flock" },
- { Opt_local_lock_posix, "posix" },
- { Opt_local_lock_none, "none" },
-
- { Opt_local_lock_err, NULL }
-};
-
-enum {
- Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0,
- Opt_vers_4_1, Opt_vers_4_2,
-
- Opt_vers_err
-};
-
-static match_table_t nfs_vers_tokens = {
- { Opt_vers_2, "2" },
- { Opt_vers_3, "3" },
- { Opt_vers_4, "4" },
- { Opt_vers_4_0, "4.0" },
- { Opt_vers_4_1, "4.1" },
- { Opt_vers_4_2, "4.2" },
-
- { Opt_vers_err, NULL }
-};
-
-static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-
-struct file_system_type nfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs",
- .mount = nfs_fs_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-MODULE_ALIAS_FS("nfs");
-EXPORT_SYMBOL_GPL(nfs_fs_type);
-
-struct file_system_type nfs_xdev_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs",
- .mount = nfs_xdev_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
const struct super_operations nfs_sops = {
.alloc_inode = nfs_alloc_inode,
@@ -326,26 +83,14 @@
.show_devname = nfs_show_devname,
.show_path = nfs_show_path,
.show_stats = nfs_show_stats,
- .remount_fs = nfs_remount,
};
EXPORT_SYMBOL_GPL(nfs_sops);
-#if IS_ENABLED(CONFIG_NFS_V4)
-static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
-static int nfs4_validate_mount_data(void *options,
- struct nfs_parsed_mount_data *args, const char *dev_name);
-
-struct file_system_type nfs4_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs_fs_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+static const struct nfs_ssc_client_ops nfs_ssc_clnt_ops_tbl = {
+ .sco_sb_deactive = nfs_sb_deactive,
};
-MODULE_ALIAS_FS("nfs4");
-MODULE_ALIAS("nfs4");
-EXPORT_SYMBOL_GPL(nfs4_fs_type);
+#if IS_ENABLED(CONFIG_NFS_V4)
static int __init register_nfs4_fs(void)
{
return register_filesystem(&nfs4_fs_type);
@@ -366,6 +111,16 @@
}
#endif
+static void nfs_ssc_register_ops(void)
+{
+ nfs_ssc_register(&nfs_ssc_clnt_ops_tbl);
+}
+
+static void nfs_ssc_unregister_ops(void)
+{
+ nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl);
+}
+
static struct shrinker acl_shrinker = {
.count_objects = nfs_access_cache_count,
.scan_objects = nfs_access_cache_scan,
@@ -393,6 +148,7 @@
ret = register_shrinker(&acl_shrinker);
if (ret < 0)
goto error_3;
+ nfs_ssc_register_ops();
return 0;
error_3:
nfs_unregister_sysctl();
@@ -412,6 +168,7 @@
unregister_shrinker(&acl_shrinker);
nfs_unregister_sysctl();
unregister_nfs4_fs();
+ nfs_ssc_unregister_ops();
unregister_filesystem(&nfs_fs_type);
}
@@ -436,6 +193,41 @@
}
EXPORT_SYMBOL_GPL(nfs_sb_deactive);
+static int __nfs_list_for_each_server(struct list_head *head,
+ int (*fn)(struct nfs_server *, void *),
+ void *data)
+{
+ struct nfs_server *server, *last = NULL;
+ int ret = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, head, client_link) {
+ if (!(server->super && nfs_sb_active(server->super)))
+ continue;
+ rcu_read_unlock();
+ if (last)
+ nfs_sb_deactive(last->super);
+ last = server;
+ ret = fn(server, data);
+ if (ret)
+ goto out;
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+out:
+ if (last)
+ nfs_sb_deactive(last->super);
+ return ret;
+}
+
+int nfs_client_for_each_server(struct nfs_client *clp,
+ int (*fn)(struct nfs_server *, void *),
+ void *data)
+{
+ return __nfs_list_for_each_server(&clp->cl_superblocks, fn, data);
+}
+EXPORT_SYMBOL_GPL(nfs_client_for_each_server);
+
/*
* Deliver file system statistics to userspace
*/
@@ -635,6 +427,7 @@
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", "" },
{ NFS_MOUNT_SOFTERR, ",softerr", "" },
+ { NFS_MOUNT_SOFTREVAL, ",softreval", "" },
{ NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
@@ -931,141 +724,6 @@
}
EXPORT_SYMBOL_GPL(nfs_umount_begin);
-static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
-{
- struct nfs_parsed_mount_data *data;
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (data) {
- data->timeo = NFS_UNSPEC_TIMEO;
- data->retrans = NFS_UNSPEC_RETRANS;
- data->acregmin = NFS_DEF_ACREGMIN;
- data->acregmax = NFS_DEF_ACREGMAX;
- data->acdirmin = NFS_DEF_ACDIRMIN;
- data->acdirmax = NFS_DEF_ACDIRMAX;
- data->mount_server.port = NFS_UNSPEC_PORT;
- data->nfs_server.port = NFS_UNSPEC_PORT;
- data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- data->selected_flavor = RPC_AUTH_MAXFLAVOR;
- data->minorversion = 0;
- data->need_mount = true;
- data->net = current->nsproxy->net_ns;
- data->lsm_opts = NULL;
- }
- return data;
-}
-
-static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data)
-{
- if (data) {
- kfree(data->client_address);
- kfree(data->mount_server.hostname);
- kfree(data->nfs_server.export_path);
- kfree(data->nfs_server.hostname);
- kfree(data->fscache_uniq);
- security_free_mnt_opts(&data->lsm_opts);
- kfree(data);
- }
-}
-
-/*
- * Sanity-check a server address provided by the mount command.
- *
- * Address family must be initialized, and address must not be
- * the ANY address for that family.
- */
-static int nfs_verify_server_address(struct sockaddr *addr)
-{
- switch (addr->sa_family) {
- case AF_INET: {
- struct sockaddr_in *sa = (struct sockaddr_in *)addr;
- return sa->sin_addr.s_addr != htonl(INADDR_ANY);
- }
- case AF_INET6: {
- struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
- return !ipv6_addr_any(sa);
- }
- }
-
- dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
- return 0;
-}
-
-/*
- * Select between a default port value and a user-specified port value.
- * If a zero value is set, then autobind will be used.
- */
-static void nfs_set_port(struct sockaddr *sap, int *port,
- const unsigned short default_port)
-{
- if (*port == NFS_UNSPEC_PORT)
- *port = default_port;
-
- rpc_set_port(sap, *port);
-}
-
-/*
- * Sanity check the NFS transport protocol.
- *
- */
-static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt)
-{
- switch (mnt->nfs_server.protocol) {
- case XPRT_TRANSPORT_UDP:
- case XPRT_TRANSPORT_TCP:
- case XPRT_TRANSPORT_RDMA:
- break;
- default:
- mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- }
-}
-
-/*
- * For text based NFSv2/v3 mounts, the mount protocol transport default
- * settings should depend upon the specified NFS transport.
- */
-static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
-{
- nfs_validate_transport_protocol(mnt);
-
- if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP ||
- mnt->mount_server.protocol == XPRT_TRANSPORT_TCP)
- return;
- switch (mnt->nfs_server.protocol) {
- case XPRT_TRANSPORT_UDP:
- mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
- break;
- case XPRT_TRANSPORT_TCP:
- case XPRT_TRANSPORT_RDMA:
- mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
- }
-}
-
-/*
- * Add 'flavor' to 'auth_info' if not already present.
- * Returns true if 'flavor' ends up in the list, false otherwise
- */
-static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
- rpc_authflavor_t flavor)
-{
- unsigned int i;
- unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
-
- /* make sure this flavor isn't already in the list */
- for (i = 0; i < auth_info->flavor_len; i++) {
- if (flavor == auth_info->flavors[i])
- return true;
- }
-
- if (auth_info->flavor_len + 1 >= max_flavor_len) {
- dfprintk(MOUNT, "NFS: too many sec= flavors\n");
- return false;
- }
-
- auth_info->flavors[auth_info->flavor_len++] = flavor;
- return true;
-}
-
/*
* Return true if 'match' is in auth_info or auth_info is empty.
* Return false otherwise.
@@ -1087,633 +745,13 @@
EXPORT_SYMBOL_GPL(nfs_auth_info_match);
/*
- * Parse the value of the 'sec=' option.
- */
-static int nfs_parse_security_flavors(char *value,
- struct nfs_parsed_mount_data *mnt)
-{
- substring_t args[MAX_OPT_ARGS];
- rpc_authflavor_t pseudoflavor;
- char *p;
-
- dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
-
- while ((p = strsep(&value, ":")) != NULL) {
- switch (match_token(p, nfs_secflavor_tokens, args)) {
- case Opt_sec_none:
- pseudoflavor = RPC_AUTH_NULL;
- break;
- case Opt_sec_sys:
- pseudoflavor = RPC_AUTH_UNIX;
- break;
- case Opt_sec_krb5:
- pseudoflavor = RPC_AUTH_GSS_KRB5;
- break;
- case Opt_sec_krb5i:
- pseudoflavor = RPC_AUTH_GSS_KRB5I;
- break;
- case Opt_sec_krb5p:
- pseudoflavor = RPC_AUTH_GSS_KRB5P;
- break;
- case Opt_sec_lkey:
- pseudoflavor = RPC_AUTH_GSS_LKEY;
- break;
- case Opt_sec_lkeyi:
- pseudoflavor = RPC_AUTH_GSS_LKEYI;
- break;
- case Opt_sec_lkeyp:
- pseudoflavor = RPC_AUTH_GSS_LKEYP;
- break;
- case Opt_sec_spkm:
- pseudoflavor = RPC_AUTH_GSS_SPKM;
- break;
- case Opt_sec_spkmi:
- pseudoflavor = RPC_AUTH_GSS_SPKMI;
- break;
- case Opt_sec_spkmp:
- pseudoflavor = RPC_AUTH_GSS_SPKMP;
- break;
- default:
- dfprintk(MOUNT,
- "NFS: sec= option '%s' not recognized\n", p);
- return 0;
- }
-
- if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor))
- return 0;
- }
-
- return 1;
-}
-
-static int nfs_parse_version_string(char *string,
- struct nfs_parsed_mount_data *mnt,
- substring_t *args)
-{
- mnt->flags &= ~NFS_MOUNT_VER3;
- switch (match_token(string, nfs_vers_tokens, args)) {
- case Opt_vers_2:
- mnt->version = 2;
- break;
- case Opt_vers_3:
- mnt->flags |= NFS_MOUNT_VER3;
- mnt->version = 3;
- break;
- case Opt_vers_4:
- /* Backward compatibility option. In future,
- * the mount program should always supply
- * a NFSv4 minor version number.
- */
- mnt->version = 4;
- break;
- case Opt_vers_4_0:
- mnt->version = 4;
- mnt->minorversion = 0;
- break;
- case Opt_vers_4_1:
- mnt->version = 4;
- mnt->minorversion = 1;
- break;
- case Opt_vers_4_2:
- mnt->version = 4;
- mnt->minorversion = 2;
- break;
- default:
- return 0;
- }
- return 1;
-}
-
-static int nfs_get_option_str(substring_t args[], char **option)
-{
- kfree(*option);
- *option = match_strdup(args);
- return !*option;
-}
-
-static int nfs_get_option_ul(substring_t args[], unsigned long *option)
-{
- int rc;
- char *string;
-
- string = match_strdup(args);
- if (string == NULL)
- return -ENOMEM;
- rc = kstrtoul(string, 10, option);
- kfree(string);
-
- return rc;
-}
-
-static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
- unsigned long l_bound, unsigned long u_bound)
-{
- int ret;
-
- ret = nfs_get_option_ul(args, option);
- if (ret != 0)
- return ret;
- if (*option < l_bound || *option > u_bound)
- return -ERANGE;
- return 0;
-}
-
-/*
- * Error-check and convert a string of mount options from user space into
- * a data structure. The whole mount string is processed; bad options are
- * skipped as they are encountered. If there were no errors, return 1;
- * otherwise return 0 (zero).
- */
-static int nfs_parse_mount_options(char *raw,
- struct nfs_parsed_mount_data *mnt)
-{
- char *p, *string;
- int rc, sloppy = 0, invalid_option = 0;
- unsigned short protofamily = AF_UNSPEC;
- unsigned short mountfamily = AF_UNSPEC;
-
- if (!raw) {
- dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
- return 1;
- }
- dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
-
- rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts);
- if (rc)
- goto out_security_failure;
-
- while ((p = strsep(&raw, ",")) != NULL) {
- substring_t args[MAX_OPT_ARGS];
- unsigned long option;
- int token;
-
- if (!*p)
- continue;
-
- dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p);
-
- token = match_token(p, nfs_mount_option_tokens, args);
- switch (token) {
-
- /*
- * boolean options: foo/nofoo
- */
- case Opt_soft:
- mnt->flags |= NFS_MOUNT_SOFT;
- mnt->flags &= ~NFS_MOUNT_SOFTERR;
- break;
- case Opt_softerr:
- mnt->flags |= NFS_MOUNT_SOFTERR;
- mnt->flags &= ~NFS_MOUNT_SOFT;
- break;
- case Opt_hard:
- mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR);
- break;
- case Opt_posix:
- mnt->flags |= NFS_MOUNT_POSIX;
- break;
- case Opt_noposix:
- mnt->flags &= ~NFS_MOUNT_POSIX;
- break;
- case Opt_cto:
- mnt->flags &= ~NFS_MOUNT_NOCTO;
- break;
- case Opt_nocto:
- mnt->flags |= NFS_MOUNT_NOCTO;
- break;
- case Opt_ac:
- mnt->flags &= ~NFS_MOUNT_NOAC;
- break;
- case Opt_noac:
- mnt->flags |= NFS_MOUNT_NOAC;
- break;
- case Opt_lock:
- mnt->flags &= ~NFS_MOUNT_NONLM;
- mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
- NFS_MOUNT_LOCAL_FCNTL);
- break;
- case Opt_nolock:
- mnt->flags |= NFS_MOUNT_NONLM;
- mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
- NFS_MOUNT_LOCAL_FCNTL);
- break;
- case Opt_udp:
- mnt->flags &= ~NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
- break;
- case Opt_tcp:
- mnt->flags |= NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- break;
- case Opt_rdma:
- mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
- mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
- xprt_load_transport(p);
- break;
- case Opt_acl:
- mnt->flags &= ~NFS_MOUNT_NOACL;
- break;
- case Opt_noacl:
- mnt->flags |= NFS_MOUNT_NOACL;
- break;
- case Opt_rdirplus:
- mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
- break;
- case Opt_nordirplus:
- mnt->flags |= NFS_MOUNT_NORDIRPLUS;
- break;
- case Opt_sharecache:
- mnt->flags &= ~NFS_MOUNT_UNSHARED;
- break;
- case Opt_nosharecache:
- mnt->flags |= NFS_MOUNT_UNSHARED;
- break;
- case Opt_resvport:
- mnt->flags &= ~NFS_MOUNT_NORESVPORT;
- break;
- case Opt_noresvport:
- mnt->flags |= NFS_MOUNT_NORESVPORT;
- break;
- case Opt_fscache:
- mnt->options |= NFS_OPTION_FSCACHE;
- kfree(mnt->fscache_uniq);
- mnt->fscache_uniq = NULL;
- break;
- case Opt_nofscache:
- mnt->options &= ~NFS_OPTION_FSCACHE;
- kfree(mnt->fscache_uniq);
- mnt->fscache_uniq = NULL;
- break;
- case Opt_migration:
- mnt->options |= NFS_OPTION_MIGRATION;
- break;
- case Opt_nomigration:
- mnt->options &= ~NFS_OPTION_MIGRATION;
- break;
-
- /*
- * options that take numeric values
- */
- case Opt_port:
- if (nfs_get_option_ul(args, &option) ||
- option > USHRT_MAX)
- goto out_invalid_value;
- mnt->nfs_server.port = option;
- break;
- case Opt_rsize:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->rsize = option;
- break;
- case Opt_wsize:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->wsize = option;
- break;
- case Opt_bsize:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->bsize = option;
- break;
- case Opt_timeo:
- if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX))
- goto out_invalid_value;
- mnt->timeo = option;
- break;
- case Opt_retrans:
- if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX))
- goto out_invalid_value;
- mnt->retrans = option;
- break;
- case Opt_acregmin:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acregmin = option;
- break;
- case Opt_acregmax:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acregmax = option;
- break;
- case Opt_acdirmin:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acdirmin = option;
- break;
- case Opt_acdirmax:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acdirmax = option;
- break;
- case Opt_actimeo:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acregmin = mnt->acregmax =
- mnt->acdirmin = mnt->acdirmax = option;
- break;
- case Opt_namelen:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->namlen = option;
- break;
- case Opt_mountport:
- if (nfs_get_option_ul(args, &option) ||
- option > USHRT_MAX)
- goto out_invalid_value;
- mnt->mount_server.port = option;
- break;
- case Opt_mountvers:
- if (nfs_get_option_ul(args, &option) ||
- option < NFS_MNT_VERSION ||
- option > NFS_MNT3_VERSION)
- goto out_invalid_value;
- mnt->mount_server.version = option;
- break;
- case Opt_minorversion:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- if (option > NFS4_MAX_MINOR_VERSION)
- goto out_invalid_value;
- mnt->minorversion = option;
- break;
-
- /*
- * options that take text values
- */
- case Opt_nfsvers:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = nfs_parse_version_string(string, mnt, args);
- kfree(string);
- if (!rc)
- goto out_invalid_value;
- break;
- case Opt_sec:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = nfs_parse_security_flavors(string, mnt);
- kfree(string);
- if (!rc) {
- dfprintk(MOUNT, "NFS: unrecognized "
- "security flavor\n");
- return 0;
- }
- break;
- case Opt_proto:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- token = match_token(string,
- nfs_xprt_protocol_tokens, args);
-
- protofamily = AF_INET;
- switch (token) {
- case Opt_xprt_udp6:
- protofamily = AF_INET6;
- /* fall through */
- case Opt_xprt_udp:
- mnt->flags &= ~NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
- break;
- case Opt_xprt_tcp6:
- protofamily = AF_INET6;
- /* fall through */
- case Opt_xprt_tcp:
- mnt->flags |= NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- break;
- case Opt_xprt_rdma6:
- protofamily = AF_INET6;
- /* fall through */
- case Opt_xprt_rdma:
- /* vector side protocols to TCP */
- mnt->flags |= NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
- xprt_load_transport(string);
- break;
- default:
- dfprintk(MOUNT, "NFS: unrecognized "
- "transport protocol\n");
- kfree(string);
- return 0;
- }
- kfree(string);
- break;
- case Opt_mountproto:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- token = match_token(string,
- nfs_xprt_protocol_tokens, args);
- kfree(string);
-
- mountfamily = AF_INET;
- switch (token) {
- case Opt_xprt_udp6:
- mountfamily = AF_INET6;
- /* fall through */
- case Opt_xprt_udp:
- mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
- break;
- case Opt_xprt_tcp6:
- mountfamily = AF_INET6;
- /* fall through */
- case Opt_xprt_tcp:
- mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
- break;
- case Opt_xprt_rdma: /* not used for side protocols */
- default:
- dfprintk(MOUNT, "NFS: unrecognized "
- "transport protocol\n");
- return 0;
- }
- break;
- case Opt_addr:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- mnt->nfs_server.addrlen =
- rpc_pton(mnt->net, string, strlen(string),
- (struct sockaddr *)
- &mnt->nfs_server.address,
- sizeof(mnt->nfs_server.address));
- kfree(string);
- if (mnt->nfs_server.addrlen == 0)
- goto out_invalid_address;
- break;
- case Opt_clientaddr:
- if (nfs_get_option_str(args, &mnt->client_address))
- goto out_nomem;
- break;
- case Opt_mounthost:
- if (nfs_get_option_str(args,
- &mnt->mount_server.hostname))
- goto out_nomem;
- break;
- case Opt_mountaddr:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- mnt->mount_server.addrlen =
- rpc_pton(mnt->net, string, strlen(string),
- (struct sockaddr *)
- &mnt->mount_server.address,
- sizeof(mnt->mount_server.address));
- kfree(string);
- if (mnt->mount_server.addrlen == 0)
- goto out_invalid_address;
- break;
- case Opt_nconnect:
- if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS))
- goto out_invalid_value;
- mnt->nfs_server.nconnect = option;
- break;
- case Opt_lookupcache:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- token = match_token(string,
- nfs_lookupcache_tokens, args);
- kfree(string);
- switch (token) {
- case Opt_lookupcache_all:
- mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
- break;
- case Opt_lookupcache_positive:
- mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE;
- mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG;
- break;
- case Opt_lookupcache_none:
- mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
- break;
- default:
- dfprintk(MOUNT, "NFS: invalid "
- "lookupcache argument\n");
- return 0;
- };
- break;
- case Opt_fscache_uniq:
- if (nfs_get_option_str(args, &mnt->fscache_uniq))
- goto out_nomem;
- mnt->options |= NFS_OPTION_FSCACHE;
- break;
- case Opt_local_lock:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- token = match_token(string, nfs_local_lock_tokens,
- args);
- kfree(string);
- switch (token) {
- case Opt_local_lock_all:
- mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
- NFS_MOUNT_LOCAL_FCNTL);
- break;
- case Opt_local_lock_flock:
- mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
- break;
- case Opt_local_lock_posix:
- mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
- break;
- case Opt_local_lock_none:
- mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
- NFS_MOUNT_LOCAL_FCNTL);
- break;
- default:
- dfprintk(MOUNT, "NFS: invalid "
- "local_lock argument\n");
- return 0;
- };
- break;
-
- /*
- * Special options
- */
- case Opt_sloppy:
- sloppy = 1;
- dfprintk(MOUNT, "NFS: relaxing parsing rules\n");
- break;
- case Opt_userspace:
- case Opt_deprecated:
- dfprintk(MOUNT, "NFS: ignoring mount option "
- "'%s'\n", p);
- break;
-
- default:
- invalid_option = 1;
- dfprintk(MOUNT, "NFS: unrecognized mount option "
- "'%s'\n", p);
- }
- }
-
- if (!sloppy && invalid_option)
- return 0;
-
- if (mnt->minorversion && mnt->version != 4)
- goto out_minorversion_mismatch;
-
- if (mnt->options & NFS_OPTION_MIGRATION &&
- (mnt->version != 4 || mnt->minorversion != 0))
- goto out_migration_misuse;
-
- /*
- * verify that any proto=/mountproto= options match the address
- * families in the addr=/mountaddr= options.
- */
- if (protofamily != AF_UNSPEC &&
- protofamily != mnt->nfs_server.address.ss_family)
- goto out_proto_mismatch;
-
- if (mountfamily != AF_UNSPEC) {
- if (mnt->mount_server.addrlen) {
- if (mountfamily != mnt->mount_server.address.ss_family)
- goto out_mountproto_mismatch;
- } else {
- if (mountfamily != mnt->nfs_server.address.ss_family)
- goto out_mountproto_mismatch;
- }
- }
-
- return 1;
-
-out_mountproto_mismatch:
- printk(KERN_INFO "NFS: mount server address does not match mountproto= "
- "option\n");
- return 0;
-out_proto_mismatch:
- printk(KERN_INFO "NFS: server address does not match proto= option\n");
- return 0;
-out_invalid_address:
- printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
- return 0;
-out_invalid_value:
- printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
- return 0;
-out_minorversion_mismatch:
- printk(KERN_INFO "NFS: mount option vers=%u does not support "
- "minorversion=%u\n", mnt->version, mnt->minorversion);
- return 0;
-out_migration_misuse:
- printk(KERN_INFO
- "NFS: 'migration' not supported for this NFS version\n");
- return 0;
-out_nomem:
- printk(KERN_INFO "NFS: not enough memory to parse option\n");
- return 0;
-out_security_failure:
- printk(KERN_INFO "NFS: security options invalid: %d\n", rc);
- return 0;
-}
-
-/*
- * Ensure that a specified authtype in args->auth_info is supported by
- * the server. Returns 0 and sets args->selected_flavor if it's ok, and
+ * Ensure that a specified authtype in ctx->auth_info is supported by
+ * the server. Returns 0 and sets ctx->selected_flavor if it's ok, and
* -EACCES if not.
*/
-static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
- rpc_authflavor_t *server_authlist, unsigned int count)
+static int nfs_verify_authflavors(struct nfs_fs_context *ctx,
+ rpc_authflavor_t *server_authlist,
+ unsigned int count)
{
rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
bool found_auth_null = false;
@@ -1734,7 +772,7 @@
for (i = 0; i < count; i++) {
flavor = server_authlist[i];
- if (nfs_auth_info_match(&args->auth_info, flavor))
+ if (nfs_auth_info_match(&ctx->auth_info, flavor))
goto out;
if (flavor == RPC_AUTH_NULL)
@@ -1742,7 +780,7 @@
}
if (found_auth_null) {
- flavor = args->auth_info.flavors[0];
+ flavor = ctx->auth_info.flavors[0];
goto out;
}
@@ -1751,8 +789,8 @@
return -EACCES;
out:
- args->selected_flavor = flavor;
- dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor);
+ ctx->selected_flavor = flavor;
+ dfprintk(MOUNT, "NFS: using auth flavor %u\n", ctx->selected_flavor);
return 0;
}
@@ -1760,50 +798,51 @@
* Use the remote server's MOUNT service to request the NFS file handle
* corresponding to the provided path.
*/
-static int nfs_request_mount(struct nfs_parsed_mount_data *args,
+static int nfs_request_mount(struct fs_context *fc,
struct nfs_fh *root_fh,
rpc_authflavor_t *server_authlist,
unsigned int *server_authlist_len)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_mount_request request = {
.sap = (struct sockaddr *)
- &args->mount_server.address,
- .dirpath = args->nfs_server.export_path,
- .protocol = args->mount_server.protocol,
+ &ctx->mount_server.address,
+ .dirpath = ctx->nfs_server.export_path,
+ .protocol = ctx->mount_server.protocol,
.fh = root_fh,
- .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
+ .noresvport = ctx->flags & NFS_MOUNT_NORESVPORT,
.auth_flav_len = server_authlist_len,
.auth_flavs = server_authlist,
- .net = args->net,
+ .net = fc->net_ns,
};
int status;
- if (args->mount_server.version == 0) {
- switch (args->version) {
+ if (ctx->mount_server.version == 0) {
+ switch (ctx->version) {
default:
- args->mount_server.version = NFS_MNT3_VERSION;
+ ctx->mount_server.version = NFS_MNT3_VERSION;
break;
case 2:
- args->mount_server.version = NFS_MNT_VERSION;
+ ctx->mount_server.version = NFS_MNT_VERSION;
}
}
- request.version = args->mount_server.version;
+ request.version = ctx->mount_server.version;
- if (args->mount_server.hostname)
- request.hostname = args->mount_server.hostname;
+ if (ctx->mount_server.hostname)
+ request.hostname = ctx->mount_server.hostname;
else
- request.hostname = args->nfs_server.hostname;
+ request.hostname = ctx->nfs_server.hostname;
/*
* Construct the mount server's address.
*/
- if (args->mount_server.address.ss_family == AF_UNSPEC) {
- memcpy(request.sap, &args->nfs_server.address,
- args->nfs_server.addrlen);
- args->mount_server.addrlen = args->nfs_server.addrlen;
+ if (ctx->mount_server.address.sa_family == AF_UNSPEC) {
+ memcpy(request.sap, &ctx->nfs_server.address,
+ ctx->nfs_server.addrlen);
+ ctx->mount_server.addrlen = ctx->nfs_server.addrlen;
}
- request.salen = args->mount_server.addrlen;
- nfs_set_port(request.sap, &args->mount_server.port, 0);
+ request.salen = ctx->mount_server.addrlen;
+ nfs_set_port(request.sap, &ctx->mount_server.port, 0);
/*
* Now ask the mount server to map our export path
@@ -1819,20 +858,18 @@
return 0;
}
-static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+static struct nfs_server *nfs_try_mount_request(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
int status;
unsigned int i;
bool tried_auth_unix = false;
bool auth_null_in_list = false;
struct nfs_server *server = ERR_PTR(-EACCES);
- struct nfs_parsed_mount_data *args = mount_info->parsed;
rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS];
unsigned int authlist_len = ARRAY_SIZE(authlist);
- status = nfs_request_mount(args, mount_info->mntfh, authlist,
- &authlist_len);
+ status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len);
if (status)
return ERR_PTR(status);
@@ -1840,13 +877,13 @@
* Was a sec= authflavor specified in the options? First, verify
* whether the server supports it, and then just try to use it if so.
*/
- if (args->auth_info.flavor_len > 0) {
- status = nfs_verify_authflavors(args, authlist, authlist_len);
+ if (ctx->auth_info.flavor_len > 0) {
+ status = nfs_verify_authflavors(ctx, authlist, authlist_len);
dfprintk(MOUNT, "NFS: using auth flavor %u\n",
- args->selected_flavor);
+ ctx->selected_flavor);
if (status)
return ERR_PTR(status);
- return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ return ctx->nfs_mod->rpc_ops->create_server(fc);
}
/*
@@ -1869,11 +906,11 @@
default:
if (rpcauth_get_gssinfo(flavor, &info) != 0)
continue;
- /* Fallthrough */
+ break;
}
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
- args->selected_flavor = flavor;
- server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ ctx->selected_flavor = flavor;
+ server = ctx->nfs_mod->rpc_ops->create_server(fc);
if (!IS_ERR(server))
return server;
}
@@ -1888,348 +925,23 @@
/* Last chance! Try AUTH_UNIX */
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX);
- args->selected_flavor = RPC_AUTH_UNIX;
- return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ ctx->selected_flavor = RPC_AUTH_UNIX;
+ return ctx->nfs_mod->rpc_ops->create_server(fc);
}
-struct dentry *nfs_try_mount(int flags, const char *dev_name,
- struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+int nfs_try_get_tree(struct fs_context *fc)
{
- struct nfs_server *server;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
- if (mount_info->parsed->need_mount)
- server = nfs_try_mount_request(mount_info, nfs_mod);
+ if (ctx->need_mount)
+ ctx->server = nfs_try_mount_request(fc);
else
- server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ ctx->server = ctx->nfs_mod->rpc_ops->create_server(fc);
- if (IS_ERR(server))
- return ERR_CAST(server);
-
- return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod);
+ return nfs_get_tree_common(fc);
}
-EXPORT_SYMBOL_GPL(nfs_try_mount);
+EXPORT_SYMBOL_GPL(nfs_try_get_tree);
-/*
- * Split "dev_name" into "hostname:export_path".
- *
- * The leftmost colon demarks the split between the server's hostname
- * and the export path. If the hostname starts with a left square
- * bracket, then it may contain colons.
- *
- * Note: caller frees hostname and export path, even on error.
- */
-static int nfs_parse_devname(const char *dev_name,
- char **hostname, size_t maxnamlen,
- char **export_path, size_t maxpathlen)
-{
- size_t len;
- char *end;
-
- if (unlikely(!dev_name || !*dev_name)) {
- dfprintk(MOUNT, "NFS: device name not specified\n");
- return -EINVAL;
- }
-
- /* Is the host name protected with square brakcets? */
- if (*dev_name == '[') {
- end = strchr(++dev_name, ']');
- if (end == NULL || end[1] != ':')
- goto out_bad_devname;
-
- len = end - dev_name;
- end++;
- } else {
- char *comma;
-
- end = strchr(dev_name, ':');
- if (end == NULL)
- goto out_bad_devname;
- len = end - dev_name;
-
- /* kill possible hostname list: not supported */
- comma = strchr(dev_name, ',');
- if (comma != NULL && comma < end)
- len = comma - dev_name;
- }
-
- if (len > maxnamlen)
- goto out_hostname;
-
- /* N.B. caller will free nfs_server.hostname in all cases */
- *hostname = kstrndup(dev_name, len, GFP_KERNEL);
- if (*hostname == NULL)
- goto out_nomem;
- len = strlen(++end);
- if (len > maxpathlen)
- goto out_path;
- *export_path = kstrndup(end, len, GFP_KERNEL);
- if (!*export_path)
- goto out_nomem;
-
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
- return 0;
-
-out_bad_devname:
- dfprintk(MOUNT, "NFS: device name not in host:path format\n");
- return -EINVAL;
-
-out_nomem:
- dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
- return -ENOMEM;
-
-out_hostname:
- dfprintk(MOUNT, "NFS: server hostname too long\n");
- return -ENAMETOOLONG;
-
-out_path:
- dfprintk(MOUNT, "NFS: export pathname too long\n");
- return -ENAMETOOLONG;
-}
-
-/*
- * Validate the NFS2/NFS3 mount data
- * - fills in the mount root filehandle
- *
- * For option strings, user space handles the following behaviors:
- *
- * + DNS: mapping server host name to IP address ("addr=" option)
- *
- * + failure mode: how to behave if a mount request can't be handled
- * immediately ("fg/bg" option)
- *
- * + retry: how often to retry a mount request ("retry=" option)
- *
- * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
- * mountproto=tcp after mountproto=udp, and so on
- */
-static int nfs23_validate_mount_data(void *options,
- struct nfs_parsed_mount_data *args,
- struct nfs_fh *mntfh,
- const char *dev_name)
-{
- struct nfs_mount_data *data = (struct nfs_mount_data *)options;
- struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
- int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
-
- if (data == NULL)
- goto out_no_data;
-
- args->version = NFS_DEFAULT_VERSION;
- switch (data->version) {
- case 1:
- data->namlen = 0; /* fall through */
- case 2:
- data->bsize = 0; /* fall through */
- case 3:
- if (data->flags & NFS_MOUNT_VER3)
- goto out_no_v3;
- data->root.size = NFS2_FHSIZE;
- memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
- /* Turn off security negotiation */
- extra_flags |= NFS_MOUNT_SECFLAVOUR;
- /* fall through */
- case 4:
- if (data->flags & NFS_MOUNT_SECFLAVOUR)
- goto out_no_sec;
- /* fall through */
- case 5:
- memset(data->context, 0, sizeof(data->context));
- /* fall through */
- case 6:
- if (data->flags & NFS_MOUNT_VER3) {
- if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
- goto out_invalid_fh;
- mntfh->size = data->root.size;
- args->version = 3;
- } else {
- mntfh->size = NFS2_FHSIZE;
- args->version = 2;
- }
-
-
- memcpy(mntfh->data, data->root.data, mntfh->size);
- if (mntfh->size < sizeof(mntfh->data))
- memset(mntfh->data + mntfh->size, 0,
- sizeof(mntfh->data) - mntfh->size);
-
- /*
- * Translate to nfs_parsed_mount_data, which nfs_fill_super
- * can deal with.
- */
- args->flags = data->flags & NFS_MOUNT_FLAGMASK;
- args->flags |= extra_flags;
- args->rsize = data->rsize;
- args->wsize = data->wsize;
- args->timeo = data->timeo;
- args->retrans = data->retrans;
- args->acregmin = data->acregmin;
- args->acregmax = data->acregmax;
- args->acdirmin = data->acdirmin;
- args->acdirmax = data->acdirmax;
- args->need_mount = false;
-
- memcpy(sap, &data->addr, sizeof(data->addr));
- args->nfs_server.addrlen = sizeof(data->addr);
- args->nfs_server.port = ntohs(data->addr.sin_port);
- if (sap->sa_family != AF_INET ||
- !nfs_verify_server_address(sap))
- goto out_no_address;
-
- if (!(data->flags & NFS_MOUNT_TCP))
- args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
- /* N.B. caller will free nfs_server.hostname in all cases */
- args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
- args->namlen = data->namlen;
- args->bsize = data->bsize;
-
- if (data->flags & NFS_MOUNT_SECFLAVOUR)
- args->selected_flavor = data->pseudoflavor;
- else
- args->selected_flavor = RPC_AUTH_UNIX;
- if (!args->nfs_server.hostname)
- goto out_nomem;
-
- if (!(data->flags & NFS_MOUNT_NONLM))
- args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
- NFS_MOUNT_LOCAL_FCNTL);
- else
- args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
- NFS_MOUNT_LOCAL_FCNTL);
- /*
- * The legacy version 6 binary mount data from userspace has a
- * field used only to transport selinux information into the
- * the kernel. To continue to support that functionality we
- * have a touch of selinux knowledge here in the NFS code. The
- * userspace code converted context=blah to just blah so we are
- * converting back to the full string selinux understands.
- */
- if (data->context[0]){
-#ifdef CONFIG_SECURITY_SELINUX
- int rc;
- data->context[NFS_MAX_CONTEXT_LEN] = '\0';
- rc = security_add_mnt_opt("context", data->context,
- strlen(data->context), &args->lsm_opts);
- if (rc)
- return rc;
-#else
- return -EINVAL;
-#endif
- }
-
- break;
- default:
- return NFS_TEXT_DATA;
- }
-
- return 0;
-
-out_no_data:
- dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
- return -EINVAL;
-
-out_no_v3:
- dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
- data->version);
- return -EINVAL;
-
-out_no_sec:
- dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
- return -EINVAL;
-
-out_nomem:
- dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
- return -ENOMEM;
-
-out_no_address:
- dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
- return -EINVAL;
-
-out_invalid_fh:
- dfprintk(MOUNT, "NFS: invalid root filehandle\n");
- return -EINVAL;
-}
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-static int nfs_validate_mount_data(struct file_system_type *fs_type,
- void *options,
- struct nfs_parsed_mount_data *args,
- struct nfs_fh *mntfh,
- const char *dev_name)
-{
- if (fs_type == &nfs_fs_type)
- return nfs23_validate_mount_data(options, args, mntfh, dev_name);
- return nfs4_validate_mount_data(options, args, dev_name);
-}
-#else
-static int nfs_validate_mount_data(struct file_system_type *fs_type,
- void *options,
- struct nfs_parsed_mount_data *args,
- struct nfs_fh *mntfh,
- const char *dev_name)
-{
- return nfs23_validate_mount_data(options, args, mntfh, dev_name);
-}
-#endif
-
-static int nfs_validate_text_mount_data(void *options,
- struct nfs_parsed_mount_data *args,
- const char *dev_name)
-{
- int port = 0;
- int max_namelen = PAGE_SIZE;
- int max_pathlen = NFS_MAXPATHLEN;
- struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
-
- if (nfs_parse_mount_options((char *)options, args) == 0)
- return -EINVAL;
-
- if (!nfs_verify_server_address(sap))
- goto out_no_address;
-
- if (args->version == 4) {
-#if IS_ENABLED(CONFIG_NFS_V4)
- if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
- port = NFS_RDMA_PORT;
- else
- port = NFS_PORT;
- max_namelen = NFS4_MAXNAMLEN;
- max_pathlen = NFS4_MAXPATHLEN;
- nfs_validate_transport_protocol(args);
- if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
- nfs4_validate_mount_flags(args);
-#else
- goto out_v4_not_compiled;
-#endif /* CONFIG_NFS_V4 */
- } else {
- nfs_set_mount_transport_protocol(args);
- if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
- port = NFS_RDMA_PORT;
- }
-
- nfs_set_port(sap, &args->nfs_server.port, port);
-
- return nfs_parse_devname(dev_name,
- &args->nfs_server.hostname,
- max_namelen,
- &args->nfs_server.export_path,
- max_pathlen);
-
-#if !IS_ENABLED(CONFIG_NFS_V4)
-out_v4_not_compiled:
- dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
- return -EPROTONOSUPPORT;
-#else
-out_invalid_transport_udp:
- dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
- return -EINVAL;
-#endif /* !CONFIG_NFS_V4 */
-
-out_no_address:
- dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
- return -EINVAL;
-}
#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
| NFS_MOUNT_SECURE \
@@ -2246,39 +958,35 @@
static int
nfs_compare_remount_data(struct nfs_server *nfss,
- struct nfs_parsed_mount_data *data)
+ struct nfs_fs_context *ctx)
{
- if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
- data->rsize != nfss->rsize ||
- data->wsize != nfss->wsize ||
- data->version != nfss->nfs_client->rpc_ops->version ||
- data->minorversion != nfss->nfs_client->cl_minorversion ||
- data->retrans != nfss->client->cl_timeout->to_retries ||
- !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) ||
- data->acregmin != nfss->acregmin / HZ ||
- data->acregmax != nfss->acregmax / HZ ||
- data->acdirmin != nfss->acdirmin / HZ ||
- data->acdirmax != nfss->acdirmax / HZ ||
- data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
- (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
- data->nfs_server.port != nfss->port ||
- data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
- !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address,
+ if ((ctx->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
+ ctx->rsize != nfss->rsize ||
+ ctx->wsize != nfss->wsize ||
+ ctx->version != nfss->nfs_client->rpc_ops->version ||
+ ctx->minorversion != nfss->nfs_client->cl_minorversion ||
+ ctx->retrans != nfss->client->cl_timeout->to_retries ||
+ !nfs_auth_info_match(&ctx->auth_info, nfss->client->cl_auth->au_flavor) ||
+ ctx->acregmin != nfss->acregmin / HZ ||
+ ctx->acregmax != nfss->acregmax / HZ ||
+ ctx->acdirmin != nfss->acdirmin / HZ ||
+ ctx->acdirmax != nfss->acdirmax / HZ ||
+ ctx->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
+ (ctx->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
+ ctx->nfs_server.port != nfss->port ||
+ ctx->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
+ !rpc_cmp_addr((struct sockaddr *)&ctx->nfs_server.address,
(struct sockaddr *)&nfss->nfs_client->cl_addr))
return -EINVAL;
return 0;
}
-int
-nfs_remount(struct super_block *sb, int *flags, char *raw_data)
+int nfs_reconfigure(struct fs_context *fc)
{
- int error;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct super_block *sb = fc->root->d_sb;
struct nfs_server *nfss = sb->s_fs_info;
- struct nfs_parsed_mount_data *data;
- struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data;
- struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data;
- u32 nfsvers = nfss->nfs_client->rpc_ops->version;
sync_filesystem(sb);
@@ -2288,92 +996,38 @@
* ones were explicitly specified. Fall back to legacy behavior and
* just return success.
*/
- if ((nfsvers == 4 && (!options4 || options4->version == 1)) ||
- (nfsvers <= 3 && (!options || (options->version >= 1 &&
- options->version <= 6))))
+ if (ctx->skip_reconfig_option_check)
return 0;
- data = nfs_alloc_parsed_mount_data();
- if (data == NULL)
- return -ENOMEM;
-
- /* fill out struct with values from existing mount */
- data->flags = nfss->flags;
- data->rsize = nfss->rsize;
- data->wsize = nfss->wsize;
- data->retrans = nfss->client->cl_timeout->to_retries;
- data->selected_flavor = nfss->client->cl_auth->au_flavor;
- data->acregmin = nfss->acregmin / HZ;
- data->acregmax = nfss->acregmax / HZ;
- data->acdirmin = nfss->acdirmin / HZ;
- data->acdirmax = nfss->acdirmax / HZ;
- data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
- data->nfs_server.port = nfss->port;
- data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
- data->version = nfsvers;
- data->minorversion = nfss->nfs_client->cl_minorversion;
- data->net = current->nsproxy->net_ns;
- memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
- data->nfs_server.addrlen);
-
- /* overwrite those values with any that were specified */
- error = -EINVAL;
- if (!nfs_parse_mount_options((char *)options, data))
- goto out;
-
/*
* noac is a special case. It implies -o sync, but that's not
- * necessarily reflected in the mtab options. do_remount_sb
+ * necessarily reflected in the mtab options. reconfigure_super
* will clear SB_SYNCHRONOUS if -o sync wasn't specified in the
* remount options, so we have to explicitly reset it.
*/
- if (data->flags & NFS_MOUNT_NOAC)
- *flags |= SB_SYNCHRONOUS;
+ if (ctx->flags & NFS_MOUNT_NOAC) {
+ fc->sb_flags |= SB_SYNCHRONOUS;
+ fc->sb_flags_mask |= SB_SYNCHRONOUS;
+ }
/* compare new mount options with old ones */
- error = nfs_compare_remount_data(nfss, data);
- if (!error)
- error = security_sb_remount(sb, data->lsm_opts);
-out:
- nfs_free_parsed_mount_data(data);
- return error;
+ return nfs_compare_remount_data(nfss, ctx);
}
-EXPORT_SYMBOL_GPL(nfs_remount);
+EXPORT_SYMBOL_GPL(nfs_reconfigure);
/*
- * Initialise the common bits of the superblock
+ * Finish setting up an NFS superblock
*/
-static void nfs_initialise_sb(struct super_block *sb)
+static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
{
struct nfs_server *server = NFS_SB(sb);
- sb->s_magic = NFS_SUPER_MAGIC;
-
- /* We probably want something more informative here */
- snprintf(sb->s_id, sizeof(sb->s_id),
- "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
- if (sb->s_blocksize == 0)
- sb->s_blocksize = nfs_block_bits(server->wsize,
- &sb->s_blocksize_bits);
-
- nfs_super_set_maxbytes(sb, server->maxfilesize);
-}
-
-/*
- * Finish setting up an NFS2/3 superblock
- */
-void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
-{
- struct nfs_parsed_mount_data *data = mount_info->parsed;
- struct nfs_server *server = NFS_SB(sb);
-
sb->s_blocksize_bits = 0;
sb->s_blocksize = 0;
sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr;
sb->s_op = server->nfs_client->cl_nfs_mod->sops;
- if (data && data->bsize)
- sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+ if (ctx && ctx->bsize)
+ sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
if (server->nfs_client->rpc_ops->version != 2) {
/* The VFS shouldn't apply the umask to mode bits. We will do
@@ -2393,53 +1047,27 @@
sb->s_time_max = S64_MAX;
}
- nfs_initialise_sb(sb);
-}
-EXPORT_SYMBOL_GPL(nfs_fill_super);
+ sb->s_magic = NFS_SUPER_MAGIC;
-/*
- * Finish setting up a cloned NFS2/3/4 superblock
- */
-static void nfs_clone_super(struct super_block *sb,
- struct nfs_mount_info *mount_info)
-{
- const struct super_block *old_sb = mount_info->cloned->sb;
- struct nfs_server *server = NFS_SB(sb);
+ /* We probably want something more informative here */
+ snprintf(sb->s_id, sizeof(sb->s_id),
+ "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
- sb->s_blocksize_bits = old_sb->s_blocksize_bits;
- sb->s_blocksize = old_sb->s_blocksize;
- sb->s_maxbytes = old_sb->s_maxbytes;
- sb->s_xattr = old_sb->s_xattr;
- sb->s_op = old_sb->s_op;
- sb->s_export_op = old_sb->s_export_op;
+ if (sb->s_blocksize == 0)
+ sb->s_blocksize = nfs_block_bits(server->wsize,
+ &sb->s_blocksize_bits);
- if (server->nfs_client->rpc_ops->version != 2) {
- /* The VFS shouldn't apply the umask to mode bits. We will do
- * so ourselves when necessary.
- */
- sb->s_flags |= SB_POSIXACL;
- sb->s_time_gran = 1;
- } else
- sb->s_time_gran = 1000;
-
- if (server->nfs_client->rpc_ops->version != 4) {
- sb->s_time_min = 0;
- sb->s_time_max = U32_MAX;
- } else {
- sb->s_time_min = S64_MIN;
- sb->s_time_max = S64_MAX;
- }
-
- nfs_initialise_sb(sb);
+ nfs_super_set_maxbytes(sb, server->maxfilesize);
}
-static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
+static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b,
+ const struct fs_context *fc)
{
const struct nfs_server *a = s->s_fs_info;
const struct rpc_clnt *clnt_a = a->client;
const struct rpc_clnt *clnt_b = b->client;
- if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
+ if ((s->s_flags & NFS_SB_MASK) != (fc->sb_flags & NFS_SB_MASK))
goto Ebusy;
if (a->nfs_client != b->nfs_client)
goto Ebusy;
@@ -2464,19 +1092,11 @@
return 0;
}
-struct nfs_sb_mountdata {
- struct nfs_server *server;
- int mntflags;
-};
-
-static int nfs_set_super(struct super_block *s, void *data)
+static int nfs_set_super(struct super_block *s, struct fs_context *fc)
{
- struct nfs_sb_mountdata *sb_mntdata = data;
- struct nfs_server *server = sb_mntdata->server;
+ struct nfs_server *server = fc->s_fs_info;
int ret;
- s->s_flags = sb_mntdata->mntflags;
- s->s_fs_info = server;
s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
ret = set_anon_super(s, server);
if (ret == 0)
@@ -2541,11 +1161,9 @@
return 1;
}
-static int nfs_compare_super(struct super_block *sb, void *data)
+static int nfs_compare_super(struct super_block *sb, struct fs_context *fc)
{
- struct nfs_sb_mountdata *sb_mntdata = data;
- struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
- int mntflags = sb_mntdata->mntflags;
+ struct nfs_server *server = fc->s_fs_info, *old = NFS_SB(sb);
if (!nfs_compare_super_address(old, server))
return 0;
@@ -2556,13 +1174,12 @@
return 0;
if (!nfs_compare_userns(old, server))
return 0;
- return nfs_compare_mount_options(sb, server, mntflags);
+ return nfs_compare_mount_options(sb, server, fc);
}
#ifdef CONFIG_NFS_FSCACHE
static void nfs_get_cache_cookie(struct super_block *sb,
- struct nfs_parsed_mount_data *parsed,
- struct nfs_clone_mount *cloned)
+ struct nfs_fs_context *ctx)
{
struct nfs_server *nfss = NFS_SB(sb);
char *uniq = NULL;
@@ -2571,116 +1188,68 @@
nfss->fscache_key = NULL;
nfss->fscache = NULL;
- if (parsed) {
- if (!(parsed->options & NFS_OPTION_FSCACHE))
- return;
- if (parsed->fscache_uniq) {
- uniq = parsed->fscache_uniq;
- ulen = strlen(parsed->fscache_uniq);
- }
- } else if (cloned) {
- struct nfs_server *mnt_s = NFS_SB(cloned->sb);
+ if (!ctx)
+ return;
+
+ if (ctx->clone_data.sb) {
+ struct nfs_server *mnt_s = NFS_SB(ctx->clone_data.sb);
if (!(mnt_s->options & NFS_OPTION_FSCACHE))
return;
if (mnt_s->fscache_key) {
uniq = mnt_s->fscache_key->key.uniquifier;
ulen = mnt_s->fscache_key->key.uniq_len;
- };
- } else
- return;
+ }
+ } else {
+ if (!(ctx->options & NFS_OPTION_FSCACHE))
+ return;
+ if (ctx->fscache_uniq) {
+ uniq = ctx->fscache_uniq;
+ ulen = strlen(ctx->fscache_uniq);
+ }
+ }
nfs_fscache_get_super_cookie(sb, uniq, ulen);
}
#else
static void nfs_get_cache_cookie(struct super_block *sb,
- struct nfs_parsed_mount_data *parsed,
- struct nfs_clone_mount *cloned)
+ struct nfs_fs_context *ctx)
{
}
#endif
-int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
- struct nfs_mount_info *mount_info)
+int nfs_get_tree_common(struct fs_context *fc)
{
- int error;
- unsigned long kflags = 0, kflags_out = 0;
- if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
- kflags |= SECURITY_LSM_NATIVE_LABELS;
-
- error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts,
- kflags, &kflags_out);
- if (error)
- goto err;
-
- if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
- !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
- NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
-err:
- return error;
-}
-EXPORT_SYMBOL_GPL(nfs_set_sb_security);
-
-int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
- struct nfs_mount_info *mount_info)
-{
- int error;
- unsigned long kflags = 0, kflags_out = 0;
-
- /* clone any lsm security options from the parent to the new sb */
- if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
- return -ESTALE;
-
- if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
- kflags |= SECURITY_LSM_NATIVE_LABELS;
-
- error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags,
- &kflags_out);
- if (error)
- return error;
-
- if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
- !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
- NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
- return 0;
-}
-EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
-
-static void nfs_set_readahead(struct backing_dev_info *bdi,
- unsigned long iomax_pages)
-{
- bdi->ra_pages = VM_READAHEAD_PAGES;
- bdi->io_pages = iomax_pages;
-}
-
-struct dentry *nfs_fs_mount_common(struct nfs_server *server,
- int flags, const char *dev_name,
- struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
-{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct super_block *s;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
- int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
- struct nfs_sb_mountdata sb_mntdata = {
- .mntflags = flags,
- .server = server,
- };
+ int (*compare_super)(struct super_block *, struct fs_context *) = nfs_compare_super;
+ struct nfs_server *server = ctx->server;
int error;
+ ctx->server = NULL;
+ if (IS_ERR(server))
+ return PTR_ERR(server);
+
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
/* -o noac implies -o sync */
if (server->flags & NFS_MOUNT_NOAC)
- sb_mntdata.mntflags |= SB_SYNCHRONOUS;
+ fc->sb_flags |= SB_SYNCHRONOUS;
- if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
- if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS)
- sb_mntdata.mntflags |= SB_SYNCHRONOUS;
+ if (ctx->clone_data.sb)
+ if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS)
+ fc->sb_flags |= SB_SYNCHRONOUS;
+
+ if (server->caps & NFS_CAP_SECURITY_LABEL)
+ fc->lsm_flags |= SECURITY_LSM_NATIVE_LABELS;
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
+ fc->s_fs_info = server;
+ s = sget_fc(fc, compare_super, nfs_set_super);
+ fc->s_fs_info = NULL;
if (IS_ERR(s)) {
- mntroot = ERR_CAST(s);
+ error = PTR_ERR(s);
+ nfs_errorf(fc, "NFS: Couldn't get superblock");
goto out_err_nosb;
}
@@ -2690,88 +1259,42 @@
} else {
error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev),
MINOR(server->s_dev));
- if (error) {
- mntroot = ERR_PTR(error);
+ if (error)
goto error_splat_super;
- }
- nfs_set_readahead(s->s_bdi, server->rpages);
+ s->s_bdi->io_pages = server->rpages;
server->super = s;
}
if (!s->s_root) {
+ unsigned bsize = ctx->clone_data.inherited_bsize;
/* initial superblock/root creation */
- mount_info->fill_super(s, mount_info);
- nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
- if (!(server->flags & NFS_MOUNT_UNSHARED))
- s->s_iflags |= SB_I_MULTIROOT;
+ nfs_fill_super(s, ctx);
+ if (bsize) {
+ s->s_blocksize_bits = bsize;
+ s->s_blocksize = 1U << bsize;
+ }
+ nfs_get_cache_cookie(s, ctx);
}
- mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
- if (IS_ERR(mntroot))
+ error = nfs_get_root(s, fc);
+ if (error < 0) {
+ nfs_errorf(fc, "NFS: Couldn't get root dentry");
goto error_splat_super;
-
- error = mount_info->set_security(s, mntroot, mount_info);
- if (error)
- goto error_splat_root;
+ }
s->s_flags |= SB_ACTIVE;
+ error = 0;
out:
- return mntroot;
+ return error;
out_err_nosb:
nfs_free_server(server);
goto out;
-
-error_splat_root:
- dput(mntroot);
- mntroot = ERR_PTR(error);
error_splat_super:
deactivate_locked_super(s);
goto out;
}
-EXPORT_SYMBOL_GPL(nfs_fs_mount_common);
-
-struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data)
-{
- struct nfs_mount_info mount_info = {
- .fill_super = nfs_fill_super,
- .set_security = nfs_set_sb_security,
- };
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
- struct nfs_subversion *nfs_mod;
- int error;
-
- mount_info.parsed = nfs_alloc_parsed_mount_data();
- mount_info.mntfh = nfs_alloc_fhandle();
- if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
- goto out;
-
- /* Validate the mount data */
- error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
- if (error == NFS_TEXT_DATA)
- error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
- if (error < 0) {
- mntroot = ERR_PTR(error);
- goto out;
- }
-
- nfs_mod = get_nfs_version(mount_info.parsed->version);
- if (IS_ERR(nfs_mod)) {
- mntroot = ERR_CAST(nfs_mod);
- goto out;
- }
-
- mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod);
-
- put_nfs_version(nfs_mod);
-out:
- nfs_free_parsed_mount_data(mount_info.parsed);
- nfs_free_fhandle(mount_info.mntfh);
- return mntroot;
-}
-EXPORT_SYMBOL_GPL(nfs_fs_mount);
/*
* Destroy an NFS2/3 superblock
@@ -2790,150 +1313,8 @@
}
EXPORT_SYMBOL_GPL(nfs_kill_super);
-/*
- * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
- */
-static struct dentry *
-nfs_xdev_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
-{
- struct nfs_clone_mount *data = raw_data;
- struct nfs_mount_info mount_info = {
- .fill_super = nfs_clone_super,
- .set_security = nfs_clone_sb_security,
- .cloned = data,
- };
- struct nfs_server *server;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
- struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
-
- dprintk("--> nfs_xdev_mount()\n");
-
- mount_info.mntfh = mount_info.cloned->fh;
-
- /* create a new volume representation */
- server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
-
- if (IS_ERR(server))
- mntroot = ERR_CAST(server);
- else
- mntroot = nfs_fs_mount_common(server, flags,
- dev_name, &mount_info, nfs_mod);
-
- dprintk("<-- nfs_xdev_mount() = %ld\n",
- IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
- return mntroot;
-}
-
#if IS_ENABLED(CONFIG_NFS_V4)
-static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
-{
- args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
- NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
-}
-
-/*
- * Validate NFSv4 mount options
- */
-static int nfs4_validate_mount_data(void *options,
- struct nfs_parsed_mount_data *args,
- const char *dev_name)
-{
- struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
- struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
- char *c;
-
- if (data == NULL)
- goto out_no_data;
-
- args->version = 4;
-
- switch (data->version) {
- case 1:
- if (data->host_addrlen > sizeof(args->nfs_server.address))
- goto out_no_address;
- if (data->host_addrlen == 0)
- goto out_no_address;
- args->nfs_server.addrlen = data->host_addrlen;
- if (copy_from_user(sap, data->host_addr, data->host_addrlen))
- return -EFAULT;
- if (!nfs_verify_server_address(sap))
- goto out_no_address;
- args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
-
- if (data->auth_flavourlen) {
- rpc_authflavor_t pseudoflavor;
- if (data->auth_flavourlen > 1)
- goto out_inval_auth;
- if (copy_from_user(&pseudoflavor,
- data->auth_flavours,
- sizeof(pseudoflavor)))
- return -EFAULT;
- args->selected_flavor = pseudoflavor;
- } else
- args->selected_flavor = RPC_AUTH_UNIX;
-
- c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
- if (IS_ERR(c))
- return PTR_ERR(c);
- args->nfs_server.hostname = c;
-
- c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
- if (IS_ERR(c))
- return PTR_ERR(c);
- args->nfs_server.export_path = c;
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
-
- c = strndup_user(data->client_addr.data, 16);
- if (IS_ERR(c))
- return PTR_ERR(c);
- args->client_address = c;
-
- /*
- * Translate to nfs_parsed_mount_data, which nfs4_fill_super
- * can deal with.
- */
-
- args->flags = data->flags & NFS4_MOUNT_FLAGMASK;
- args->rsize = data->rsize;
- args->wsize = data->wsize;
- args->timeo = data->timeo;
- args->retrans = data->retrans;
- args->acregmin = data->acregmin;
- args->acregmax = data->acregmax;
- args->acdirmin = data->acdirmin;
- args->acdirmax = data->acdirmax;
- args->nfs_server.protocol = data->proto;
- nfs_validate_transport_protocol(args);
- if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
-
- break;
- default:
- return NFS_TEXT_DATA;
- }
-
- return 0;
-
-out_no_data:
- dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
- return -EINVAL;
-
-out_inval_auth:
- dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
- data->auth_flavourlen);
- return -EINVAL;
-
-out_no_address:
- dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
- return -EINVAL;
-
-out_invalid_transport_udp:
- dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
- return -EINVAL;
-}
-
/*
* NFS v4 module parameters need to stay in the
* NFS client for backwards compatibility
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 4f3390b..8cb7075 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -79,7 +79,12 @@
struct nfs_netns_client *c = container_of(kobj,
struct nfs_netns_client,
kobject);
- return scnprintf(buf, PAGE_SIZE, "%s\n", c->identifier);
+ ssize_t ret;
+
+ rcu_read_lock();
+ ret = scnprintf(buf, PAGE_SIZE, "%s\n", rcu_dereference(c->identifier));
+ rcu_read_unlock();
+ return ret;
}
/* Strip trailing '\n' */
@@ -107,7 +112,7 @@
p = kmemdup_nul(buf, len, GFP_KERNEL);
if (!p)
return -ENOMEM;
- old = xchg(&c->identifier, p);
+ old = rcu_dereference_protected(xchg(&c->identifier, (char __rcu *)p), 1);
if (old) {
synchronize_rcu();
kfree(old);
@@ -121,8 +126,7 @@
struct nfs_netns_client,
kobject);
- if (c->identifier)
- kfree(c->identifier);
+ kfree(rcu_dereference_raw(c->identifier));
kfree(c);
}
diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h
index f1b2741..5501ef5 100644
--- a/fs/nfs/sysfs.h
+++ b/fs/nfs/sysfs.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2019 Hammerspace Inc
*/
@@ -11,7 +11,7 @@
struct nfs_netns_client {
struct kobject kobject;
struct net *net;
- const char *identifier;
+ const char __rcu *identifier;
};
extern struct kobject *nfs_client_kobj;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 0effeee..b27ebdc 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -98,7 +98,7 @@
.callback_ops = &nfs_unlink_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
struct rpc_task *task;
struct inode *dir = d_inode(data->dentry->d_parent);
@@ -341,7 +341,7 @@
.callback_ops = &nfs_rename_ops,
.workqueue = nfsiod_workqueue,
.rpc_client = NFS_CLIENT(old_dir),
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
data = kzalloc(sizeof(*data), GFP_KERNEL);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 613c3ef..bde4c36 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -149,6 +149,31 @@
kref_put(&ioc->refcount, nfs_io_completion_release);
}
+static void
+nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
+{
+ if (!test_and_set_bit(PG_INODE_REF, &req->wb_flags)) {
+ kref_get(&req->wb_kref);
+ atomic_long_inc(&NFS_I(inode)->nrequests);
+ }
+}
+
+static int
+nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
+{
+ int ret;
+
+ if (!test_bit(PG_REMOVE, &req->wb_flags))
+ return 0;
+ ret = nfs_page_group_lock(req);
+ if (ret)
+ return ret;
+ if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
+ nfs_page_set_inode_ref(req, inode);
+ nfs_page_group_unlock(req);
+ return 0;
+}
+
static struct nfs_page *
nfs_page_private_request(struct page *page)
{
@@ -218,6 +243,36 @@
return req;
}
+static struct nfs_page *nfs_find_and_lock_page_request(struct page *page)
+{
+ struct inode *inode = page_file_mapping(page)->host;
+ struct nfs_page *req, *head;
+ int ret;
+
+ for (;;) {
+ req = nfs_page_find_head_request(page);
+ if (!req)
+ return req;
+ head = nfs_page_group_lock_head(req);
+ if (head != req)
+ nfs_release_request(req);
+ if (IS_ERR(head))
+ return head;
+ ret = nfs_cancel_remove_inode(head, inode);
+ if (ret < 0) {
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
+ }
+ /* Ensure that nobody removed the request before we locked it */
+ if (head == nfs_page_private_request(page))
+ break;
+ if (PageSwapCache(page))
+ break;
+ nfs_unlock_and_release_request(head);
+ }
+ return head;
+}
+
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
@@ -256,8 +311,11 @@
static void nfs_mapping_set_error(struct page *page, int error)
{
+ struct address_space *mapping = page_file_mapping(page);
+
SetPageError(page);
- mapping_set_error(page_file_mapping(page), error);
+ mapping_set_error(mapping, error);
+ nfs_set_pageerror(mapping);
}
/*
@@ -377,34 +435,6 @@
}
/*
- * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
- *
- * this is a helper function for nfs_lock_and_join_requests
- *
- * @inode - inode associated with request page group, must be holding inode lock
- * @head - head request of page group, must be holding head lock
- * @req - request that couldn't lock and needs to wait on the req bit lock
- *
- * NOTE: this must be called holding page_group bit lock
- * which will be released before returning.
- *
- * returns 0 on success, < 0 on error.
- */
-static void
-nfs_unroll_locks(struct inode *inode, struct nfs_page *head,
- struct nfs_page *req)
-{
- struct nfs_page *tmp;
-
- /* relinquish all the locks successfully grabbed this run */
- for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
- if (!kref_read(&tmp->wb_kref))
- continue;
- nfs_unlock_and_release_request(tmp);
- }
-}
-
-/*
* nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
*
* @destroy_list - request list (using wb_this_page) terminated by @old_head
@@ -461,11 +491,64 @@
}
/*
- * nfs_lock_and_join_requests - join all subreqs to the head req and return
- * a locked reference, cancelling any pending
- * operations for this page.
+ * nfs_join_page_group - destroy subrequests of the head req
+ * @head: the page used to lookup the "page group" of nfs_page structures
+ * @inode: Inode to which the request belongs.
*
- * @page - the page used to lookup the "page group" of nfs_page structures
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group. All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ */
+void
+nfs_join_page_group(struct nfs_page *head, struct inode *inode)
+{
+ struct nfs_page *subreq;
+ struct nfs_page *destroy_list = NULL;
+ unsigned int pgbase, off, bytes;
+
+ pgbase = head->wb_pgbase;
+ bytes = head->wb_bytes;
+ off = head->wb_offset;
+ for (subreq = head->wb_this_page; subreq != head;
+ subreq = subreq->wb_this_page) {
+ /* Subrequests should always form a contiguous range */
+ if (pgbase > subreq->wb_pgbase) {
+ off -= pgbase - subreq->wb_pgbase;
+ bytes += pgbase - subreq->wb_pgbase;
+ pgbase = subreq->wb_pgbase;
+ }
+ bytes = max(subreq->wb_pgbase + subreq->wb_bytes
+ - pgbase, bytes);
+ }
+
+ /* Set the head request's range to cover the former page group */
+ head->wb_pgbase = pgbase;
+ head->wb_bytes = bytes;
+ head->wb_offset = off;
+
+ /* Now that all requests are locked, make sure they aren't on any list.
+ * Commit list removal accounting is done after locks are dropped */
+ subreq = head;
+ do {
+ nfs_clear_request_commit(subreq);
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* unlink subrequests from head, destroy them later */
+ if (head->wb_this_page != head) {
+ /* destroy list will be terminated by head */
+ destroy_list = head->wb_this_page;
+ head->wb_this_page = head;
+ }
+
+ nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
+}
+
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req
+ * @page: the page used to lookup the "page group" of nfs_page structures
*
* This function joins all sub requests to the head request by first
* locking all requests in the group, cancelling any pending operations
@@ -482,132 +565,33 @@
nfs_lock_and_join_requests(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *head, *subreq;
- struct nfs_page *destroy_list = NULL;
- unsigned int total_bytes;
+ struct nfs_page *head;
int ret;
-try_again:
/*
* A reference is taken only on the head request which acts as a
* reference to the whole page group - the group will not be destroyed
* until the head reference is released.
*/
- head = nfs_page_find_head_request(page);
- if (!head)
- return NULL;
-
- /* lock the page head first in order to avoid an ABBA inefficiency */
- if (!nfs_lock_request(head)) {
- ret = nfs_wait_on_request(head);
- nfs_release_request(head);
- if (ret < 0)
- return ERR_PTR(ret);
- goto try_again;
- }
-
- /* Ensure that nobody removed the request before we locked it */
- if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {
- nfs_unlock_and_release_request(head);
- goto try_again;
- }
-
- ret = nfs_page_group_lock(head);
- if (ret < 0)
- goto release_request;
+ head = nfs_find_and_lock_page_request(page);
+ if (IS_ERR_OR_NULL(head))
+ return head;
/* lock each request in the page group */
- total_bytes = head->wb_bytes;
- for (subreq = head->wb_this_page; subreq != head;
- subreq = subreq->wb_this_page) {
-
- if (!kref_get_unless_zero(&subreq->wb_kref)) {
- if (subreq->wb_offset == head->wb_offset + total_bytes)
- total_bytes += subreq->wb_bytes;
- continue;
- }
-
- while (!nfs_lock_request(subreq)) {
- /*
- * Unlock page to allow nfs_page_group_sync_on_bit()
- * to succeed
- */
- nfs_page_group_unlock(head);
- ret = nfs_wait_on_request(subreq);
- if (!ret)
- ret = nfs_page_group_lock(head);
- if (ret < 0) {
- nfs_unroll_locks(inode, head, subreq);
- nfs_release_request(subreq);
- goto release_request;
- }
- }
- /*
- * Subrequests are always contiguous, non overlapping
- * and in order - but may be repeated (mirrored writes).
- */
- if (subreq->wb_offset == (head->wb_offset + total_bytes)) {
- /* keep track of how many bytes this group covers */
- total_bytes += subreq->wb_bytes;
- } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset ||
- ((subreq->wb_offset + subreq->wb_bytes) >
- (head->wb_offset + total_bytes)))) {
- nfs_page_group_unlock(head);
- nfs_unroll_locks(inode, head, subreq);
- nfs_unlock_and_release_request(subreq);
- ret = -EIO;
- goto release_request;
- }
- }
-
- /* Now that all requests are locked, make sure they aren't on any list.
- * Commit list removal accounting is done after locks are dropped */
- subreq = head;
- do {
- nfs_clear_request_commit(subreq);
- subreq = subreq->wb_this_page;
- } while (subreq != head);
-
- /* unlink subrequests from head, destroy them later */
- if (head->wb_this_page != head) {
- /* destroy list will be terminated by head */
- destroy_list = head->wb_this_page;
- head->wb_this_page = head;
-
- /* change head request to cover whole range that
- * the former page group covered */
- head->wb_bytes = total_bytes;
- }
-
- /* Postpone destruction of this request */
- if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {
- set_bit(PG_INODE_REF, &head->wb_flags);
- kref_get(&head->wb_kref);
- atomic_long_inc(&NFS_I(inode)->nrequests);
- }
-
- nfs_page_group_unlock(head);
-
- nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
-
- /* Did we lose a race with nfs_inode_remove_request()? */
- if (!(PagePrivate(page) || PageSwapCache(page))) {
+ ret = nfs_page_group_lock_subrequests(head);
+ if (ret < 0) {
nfs_unlock_and_release_request(head);
- return NULL;
+ return ERR_PTR(ret);
}
- /* still holds ref on head from nfs_page_find_head_request
- * and still has lock on head from lock loop */
- return head;
+ nfs_join_page_group(head, inode);
-release_request:
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
+ return head;
}
static void nfs_write_error(struct nfs_page *req, int error)
{
- nfs_set_pageerror(page_file_mapping(req->wb_page));
+ trace_nfs_write_error(req, error);
nfs_mapping_set_error(req->wb_page, error);
nfs_inode_remove_request(req);
nfs_end_page_writeback(req);
@@ -962,9 +946,9 @@
static void
nfs_clear_page_commit(struct page *page)
{
- dec_node_page_state(page, NR_UNSTABLE_NFS);
+ dec_node_page_state(page, NR_WRITEBACK);
dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
- WB_RECLAIMABLE);
+ WB_WRITEBACK);
}
/* Called holding the request lock on @req */
@@ -1013,7 +997,7 @@
nfs_list_remove_request(req);
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
(hdr->good_bytes < bytes)) {
- nfs_set_pageerror(page_file_mapping(req->wb_page));
+ trace_nfs_comp_error(req, hdr->error);
nfs_mapping_set_error(req->wb_page, hdr->error);
goto remove_req;
}
@@ -1050,25 +1034,11 @@
struct nfs_page *req, *tmp;
int ret = 0;
-restart:
list_for_each_entry_safe(req, tmp, src, wb_list) {
kref_get(&req->wb_kref);
if (!nfs_lock_request(req)) {
- int status;
-
- /* Prevent deadlock with nfs_lock_and_join_requests */
- if (!list_empty(dst)) {
- nfs_release_request(req);
- continue;
- }
- /* Ensure we make progress to prevent livelock */
- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
- status = nfs_wait_on_request(req);
nfs_release_request(req);
- mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
- if (status < 0)
- break;
- goto restart;
+ continue;
}
nfs_request_remove_commit_list(req, cinfo);
clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
@@ -1418,8 +1388,7 @@
task_setup_data->priority = priority;
rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
- trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes,
- hdr->args.stable);
+ trace_nfs_initiate_write(hdr);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1583,8 +1552,7 @@
return status;
nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
- trace_nfs_writeback_done(inode, task->tk_status,
- hdr->args.offset, hdr->res.verf);
+ trace_nfs_writeback_done(task, hdr);
if (hdr->res.verf->committed < hdr->args.stable &&
task->tk_status >= 0) {
@@ -1664,6 +1632,8 @@
*/
argp->stable = NFS_FILE_SYNC;
}
+ resp->count = 0;
+ resp->verf->committed = 0;
rpc_restart_call_prepare(task);
}
}
@@ -1679,10 +1649,13 @@
atomic_inc(&cinfo->rpcs_out);
}
-static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
+bool nfs_commit_end(struct nfs_mds_commit_info *cinfo)
{
- if (atomic_dec_and_test(&cinfo->rpcs_out))
+ if (atomic_dec_and_test(&cinfo->rpcs_out)) {
wake_up_var(&cinfo->rpcs_out);
+ return true;
+ }
+ return false;
}
void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1750,14 +1723,19 @@
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo)
{
- struct nfs_page *first = nfs_list_entry(head->next);
- struct nfs_open_context *ctx = nfs_req_openctx(first);
- struct inode *inode = d_inode(ctx->dentry);
+ struct nfs_page *first;
+ struct nfs_open_context *ctx;
+ struct inode *inode;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
- list_splice_init(head, &data->pages);
+ if (head)
+ list_splice_init(head, &data->pages);
+
+ first = nfs_list_entry(data->pages.next);
+ ctx = nfs_req_openctx(first);
+ inode = d_inode(ctx->dentry);
data->inode = inode;
data->cred = ctx->cred;
@@ -1777,6 +1755,7 @@
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
+ nfs_commit_begin(cinfo->mds);
}
EXPORT_SYMBOL_GPL(nfs_init_commit);
@@ -1822,9 +1801,8 @@
/* Set up the argument struct */
nfs_init_commit(data, head, NULL, cinfo);
- atomic_inc(&cinfo->mds->rpcs_out);
return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
- data->mds_ops, how, 0);
+ data->mds_ops, how, RPC_TASK_CRED_NOREF);
}
/*
@@ -1839,7 +1817,7 @@
/* Call the NFS version-specific code */
NFS_PROTO(data->inode)->commit_done(task, data);
- trace_nfs_commit_done(data);
+ trace_nfs_commit_done(task, data);
}
static void nfs_commit_release_pages(struct nfs_commit_data *data)
@@ -1863,6 +1841,7 @@
(long long)req_offset(req));
if (status < 0) {
if (req->wb_page) {
+ trace_nfs_commit_error(req, status);
nfs_mapping_set_error(req->wb_page, status);
nfs_inode_remove_request(req);
}
@@ -1872,8 +1851,7 @@
/* Okay, COMMIT succeeded, apparently. Check the verifier
* returned by the server against all stored verfs. */
- if (verf->committed > NFS_UNSTABLE &&
- !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) {
+ if (nfs_write_match_verf(verf, req)) {
/* We have a match */
if (req->wb_page)
nfs_inode_remove_request(req);
@@ -1935,6 +1913,7 @@
int may_wait = how & FLUSH_SYNC;
int ret, nscan;
+ how &= ~FLUSH_SYNC;
nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_commit_begin(cinfo.mds);
for (;;) {