Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 20b1c17..10cefb0 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -1,7 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
config NFSD
tristate "NFS server support"
depends on INET
depends on FILE_LOCKING
+ depends on FSNOTIFY
select LOCKD
select SUNRPC
select EXPORTFS
@@ -146,7 +148,7 @@
config NFSD_FAULT_INJECTION
bool "NFS server manual fault injection"
- depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
+ depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN
help
This option enables support for manually injecting faults
into the NFS server. This is intended to be used for
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 2bfb58e..6a40b1a 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -11,7 +11,8 @@
nfsd-y += trace.o
nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
- export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+ export.o auth.o lockd.o nfscache.o nfsxdr.o \
+ stats.o filecache.o
nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 4cd7c69..ba14d2f 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -39,14 +39,6 @@
struct svc_fh;
struct svc_rqst;
-/*
- * Maximum ACL we'll accept from a client; chosen (somewhat
- * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
- * high-order allocation. This allows 204 ACEs on x86_64:
- */
-#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
- / sizeof(struct nfs4_ace))
-
int nfs4_acl_bytes(int entries);
int nfs4_acl_get_whotype(char *, u32);
__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 4fb1f72..9bbaa67 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -15,6 +15,7 @@
#include "blocklayoutxdr.h"
#include "pnfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
@@ -121,15 +122,13 @@
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
- struct timespec ts;
int error;
- ts = timespec64_to_timespec(inode->i_mtime);
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
- timespec_compare(&lcp->lc_mtime, &ts) < 0)
- lcp->lc_mtime = timespec64_to_timespec(current_time(inode));
+ timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
+ lcp->lc_mtime = current_time(inode);
iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
- iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime);
+ iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
if (new_size > i_size_read(inode)) {
iattr.ia_valid |= ATTR_SIZE;
@@ -406,7 +405,7 @@
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
- struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+ struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true);
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index b7559c6..10ec5ec 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -10,6 +10,7 @@
#define NFSCACHE_H
#include <linux/sunrpc/svc.h>
+#include "netns.h"
/*
* Representation of a reply cache entry.
@@ -19,18 +20,22 @@
* is much larger than a sockaddr_in6.
*/
struct svc_cacherep {
- struct list_head c_lru;
+ struct {
+ /* Keep often-read xid, csum in the same cache line: */
+ __be32 k_xid;
+ __wsum k_csum;
+ u32 k_proc;
+ u32 k_prot;
+ u32 k_vers;
+ unsigned int k_len;
+ struct sockaddr_in6 k_addr;
+ } c_key;
+ struct rb_node c_node;
+ struct list_head c_lru;
unsigned char c_state, /* unused, inprog, done */
c_type, /* status, buffer */
c_secure : 1; /* req came from port < 1024 */
- struct sockaddr_in6 c_addr;
- __be32 c_xid;
- u32 c_prot;
- u32 c_proc;
- u32 c_vers;
- unsigned int c_len;
- __wsum c_csum;
unsigned long c_timestamp;
union {
struct kvec u_vec;
@@ -73,8 +78,8 @@
/* Checksum this amount of the request */
#define RC_CSUMLEN (256U)
-int nfsd_reply_cache_init(void);
-void nfsd_reply_cache_shutdown(void);
+int nfsd_reply_cache_init(struct nfsd_net *);
+void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
int nfsd_reply_cache_stats_open(struct inode *, struct file *);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index a1143f7..15422c9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -22,6 +22,7 @@
#include "nfsfh.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
@@ -46,7 +47,7 @@
!test_bit(CACHE_NEGATIVE, &key->h.flags))
path_put(&key->ek_path);
auth_domain_put(key->ek_client);
- kfree(key);
+ kfree_rcu(key, ek_rcu);
}
static void expkey_request(struct cache_detail *cd,
@@ -232,6 +233,17 @@
return NULL;
}
+static void expkey_flush(void)
+{
+ /*
+ * Take the nfsd_mutex here to ensure that the file cache is not
+ * destroyed while we're in the middle of flushing.
+ */
+ mutex_lock(&nfsd_mutex);
+ nfsd_file_cache_purge(current->nsproxy->net_ns);
+ mutex_unlock(&nfsd_mutex);
+}
+
static const struct cache_detail svc_expkey_cache_template = {
.owner = THIS_MODULE,
.hash_size = EXPKEY_HASHMAX,
@@ -244,6 +256,7 @@
.init = expkey_init,
.update = expkey_update,
.alloc = expkey_alloc,
+ .flush = expkey_flush,
};
static int
@@ -265,7 +278,7 @@
struct cache_head *ch;
int hash = svc_expkey_hash(item);
- ch = sunrpc_cache_lookup(cd, &item->h, hash);
+ ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash);
if (ch)
return container_of(ch, struct svc_expkey, h);
else
@@ -314,7 +327,7 @@
auth_domain_put(exp->ex_client);
nfsd4_fslocs_free(&exp->ex_fslocs);
kfree(exp->ex_uuid);
- kfree(exp);
+ kfree_rcu(exp, ex_rcu);
}
static void svc_export_request(struct cache_detail *cd,
@@ -570,13 +583,13 @@
err = get_int(&mesg, &an_int);
if (err)
goto out3;
- exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
+ exp.ex_anon_uid= make_kuid(current_user_ns(), an_int);
/* anon gid */
err = get_int(&mesg, &an_int);
if (err)
goto out3;
- exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
+ exp.ex_anon_gid= make_kgid(current_user_ns(), an_int);
/* fsid */
err = get_int(&mesg, &an_int);
@@ -780,7 +793,7 @@
struct cache_head *ch;
int hash = svc_export_hash(exp);
- ch = sunrpc_cache_lookup(exp->cd, &exp->h, hash);
+ ch = sunrpc_cache_lookup_rcu(exp->cd, &exp->h, hash);
if (ch)
return container_of(ch, struct svc_export, h);
else
@@ -1170,15 +1183,17 @@
static void exp_flags(struct seq_file *m, int flag, int fsid,
kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc)
{
+ struct user_namespace *userns = m->file->f_cred->user_ns;
+
show_expflags(m, flag, NFSEXP_ALLFLAGS);
if (flag & NFSEXP_FSID)
seq_printf(m, ",fsid=%d", fsid);
- if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) &&
- !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2)))
- seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu));
- if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) &&
- !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2)))
- seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong));
+ if (!uid_eq(anonu, make_kuid(userns, (uid_t)-2)) &&
+ !uid_eq(anonu, make_kuid(userns, 0x10000-2)))
+ seq_printf(m, ",anonuid=%u", from_kuid_munged(userns, anonu));
+ if (!gid_eq(anong, make_kgid(userns, (gid_t)-2)) &&
+ !gid_eq(anong, make_kgid(userns, 0x10000-2)))
+ seq_printf(m, ",anongid=%u", from_kgid_munged(userns, anong));
if (fsloc && fsloc->locations_count > 0) {
char *loctype = (fsloc->migrated) ? "refer" : "replicas";
int i;
@@ -1216,9 +1231,9 @@
}
const struct seq_operations nfs_exports_op = {
- .start = cache_seq_start,
- .next = cache_seq_next,
- .stop = cache_seq_stop,
+ .start = cache_seq_start_rcu,
+ .next = cache_seq_next_rcu,
+ .stop = cache_seq_stop_rcu,
.show = e_show,
};
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index c8b7412..e7daa1f 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -61,6 +61,7 @@
u32 ex_layout_types;
struct nfsd4_deviceid_map *ex_devid_map;
struct cache_detail *cd;
+ struct rcu_head ex_rcu;
};
/* an "export key" (expkey) maps a filehandlefragement to an
@@ -75,6 +76,7 @@
u32 ek_fsid[6];
struct path ek_path;
+ struct rcu_head ek_rcu;
};
#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC))
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 8483125..76bee0a 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -127,24 +127,16 @@
},
};
-int nfsd_fault_inject_init(void)
+void nfsd_fault_inject_init(void)
{
unsigned int i;
struct nfsd_fault_inject_op *op;
umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
debug_dir = debugfs_create_dir("nfsd", NULL);
- if (!debug_dir)
- goto fail;
for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
op = &inject_ops[i];
- if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
- goto fail;
+ debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd);
}
- return 0;
-
-fail:
- nfsd_fault_inject_cleanup();
- return -ENOMEM;
}
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644
index 0000000..ef55e9b
--- /dev/null
+++ b/fs/nfsd/filecache.c
@@ -0,0 +1,934 @@
+/*
+ * Open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ */
+
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/list_lru.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+
+#include "vfs.h"
+#include "nfsd.h"
+#include "nfsfh.h"
+#include "netns.h"
+#include "filecache.h"
+#include "trace.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_FH
+
+/* FIXME: dynamically size this for the machine somehow? */
+#define NFSD_FILE_HASH_BITS 12
+#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
+#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
+
+#define NFSD_FILE_LRU_RESCAN (0)
+#define NFSD_FILE_SHUTDOWN (1)
+#define NFSD_FILE_LRU_THRESHOLD (4096UL)
+#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
+
+/* We only care about NFSD_MAY_READ/WRITE for this cache */
+#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
+
+struct nfsd_fcache_bucket {
+ struct hlist_head nfb_head;
+ spinlock_t nfb_lock;
+ unsigned int nfb_count;
+ unsigned int nfb_maxcount;
+};
+
+static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+
+static struct kmem_cache *nfsd_file_slab;
+static struct kmem_cache *nfsd_file_mark_slab;
+static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
+static struct list_lru nfsd_file_lru;
+static long nfsd_file_lru_flags;
+static struct fsnotify_group *nfsd_file_fsnotify_group;
+static atomic_long_t nfsd_filecache_count;
+static struct delayed_work nfsd_filecache_laundrette;
+
+enum nfsd_file_laundrette_ctl {
+ NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
+ NFSD_FILE_LAUNDRETTE_MAY_FLUSH
+};
+
+static void
+nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
+{
+ long count = atomic_long_read(&nfsd_filecache_count);
+
+ if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
+ return;
+
+ /* Be more aggressive about scanning if over the threshold */
+ if (count > NFSD_FILE_LRU_THRESHOLD)
+ mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
+ else
+ schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
+
+ if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
+ return;
+
+ /* ...and don't delay flushing if we're out of control */
+ if (count >= NFSD_FILE_LRU_LIMIT)
+ flush_delayed_work(&nfsd_filecache_laundrette);
+}
+
+static void
+nfsd_file_slab_free(struct rcu_head *rcu)
+{
+ struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
+
+ put_cred(nf->nf_cred);
+ kmem_cache_free(nfsd_file_slab, nf);
+}
+
+static void
+nfsd_file_mark_free(struct fsnotify_mark *mark)
+{
+ struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
+ nfm_mark);
+
+ kmem_cache_free(nfsd_file_mark_slab, nfm);
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_get(struct nfsd_file_mark *nfm)
+{
+ if (!atomic_inc_not_zero(&nfm->nfm_ref))
+ return NULL;
+ return nfm;
+}
+
+static void
+nfsd_file_mark_put(struct nfsd_file_mark *nfm)
+{
+ if (atomic_dec_and_test(&nfm->nfm_ref)) {
+
+ fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
+ fsnotify_put_mark(&nfm->nfm_mark);
+ }
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+{
+ int err;
+ struct fsnotify_mark *mark;
+ struct nfsd_file_mark *nfm = NULL, *new;
+ struct inode *inode = nf->nf_inode;
+
+ do {
+ mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
+ mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
+ nfsd_file_fsnotify_group);
+ if (mark) {
+ nfm = nfsd_file_mark_get(container_of(mark,
+ struct nfsd_file_mark,
+ nfm_mark));
+ mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+ fsnotify_put_mark(mark);
+ if (likely(nfm))
+ break;
+ } else
+ mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+
+ /* allocate a new nfm */
+ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
+ new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
+ atomic_set(&new->nfm_ref, 1);
+
+ err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
+
+ /*
+ * If the add was successful, then return the object.
+ * Otherwise, we need to put the reference we hold on the
+ * nfm_mark. The fsnotify code will take a reference and put
+ * it on failure, so we can't just free it directly. It's also
+ * not safe to call fsnotify_destroy_mark on it as the
+ * mark->group will be NULL. Thus, we can't let the nfm_ref
+ * counter drive the destruction at this point.
+ */
+ if (likely(!err))
+ nfm = new;
+ else
+ fsnotify_put_mark(&new->nfm_mark);
+ } while (unlikely(err == -EEXIST));
+
+ return nfm;
+}
+
+static struct nfsd_file *
+nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+ struct net *net)
+{
+ struct nfsd_file *nf;
+
+ nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+ if (nf) {
+ INIT_HLIST_NODE(&nf->nf_node);
+ INIT_LIST_HEAD(&nf->nf_lru);
+ nf->nf_file = NULL;
+ nf->nf_cred = get_current_cred();
+ nf->nf_net = net;
+ nf->nf_flags = 0;
+ nf->nf_inode = inode;
+ nf->nf_hashval = hashval;
+ atomic_set(&nf->nf_ref, 1);
+ nf->nf_may = may & NFSD_FILE_MAY_MASK;
+ if (may & NFSD_MAY_NOT_BREAK_LEASE) {
+ if (may & NFSD_MAY_WRITE)
+ __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+ if (may & NFSD_MAY_READ)
+ __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+ }
+ nf->nf_mark = NULL;
+ trace_nfsd_file_alloc(nf);
+ }
+ return nf;
+}
+
+static bool
+nfsd_file_free(struct nfsd_file *nf)
+{
+ bool flush = false;
+
+ trace_nfsd_file_put_final(nf);
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file, NULL);
+ fput(nf->nf_file);
+ flush = true;
+ }
+ call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+ return flush;
+}
+
+static bool
+nfsd_file_check_writeback(struct nfsd_file *nf)
+{
+ struct file *file = nf->nf_file;
+ struct address_space *mapping;
+
+ if (!file || !(file->f_mode & FMODE_WRITE))
+ return false;
+ mapping = file->f_mapping;
+ return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+ mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+}
+
+static int
+nfsd_file_check_write_error(struct nfsd_file *nf)
+{
+ struct file *file = nf->nf_file;
+
+ if (!file || !(file->f_mode & FMODE_WRITE))
+ return 0;
+ return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+}
+
+static bool
+nfsd_file_in_use(struct nfsd_file *nf)
+{
+ return nfsd_file_check_writeback(nf) ||
+ nfsd_file_check_write_error(nf);
+}
+
+static void
+nfsd_file_do_unhash(struct nfsd_file *nf)
+{
+ lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+ trace_nfsd_file_unhash(nf);
+
+ if (nfsd_file_check_write_error(nf))
+ nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
+ hlist_del_rcu(&nf->nf_node);
+ if (!list_empty(&nf->nf_lru))
+ list_lru_del(&nfsd_file_lru, &nf->nf_lru);
+ atomic_long_dec(&nfsd_filecache_count);
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+ if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ nfsd_file_do_unhash(nf);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Return true if the file was unhashed.
+ */
+static bool
+nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+{
+ lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+ trace_nfsd_file_unhash_and_release_locked(nf);
+ if (!nfsd_file_unhash(nf))
+ return false;
+ /* keep final reference for nfsd_file_lru_dispose */
+ if (atomic_add_unless(&nf->nf_ref, -1, 1))
+ return true;
+
+ list_add(&nf->nf_lru, dispose);
+ return true;
+}
+
+static int
+nfsd_file_put_noref(struct nfsd_file *nf)
+{
+ int count;
+ trace_nfsd_file_put(nf);
+
+ count = atomic_dec_return(&nf->nf_ref);
+ if (!count) {
+ WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+ nfsd_file_free(nf);
+ }
+ return count;
+}
+
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+ bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+ bool unused = !nfsd_file_in_use(nf);
+
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
+ nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
+}
+
+struct nfsd_file *
+nfsd_file_get(struct nfsd_file *nf)
+{
+ if (likely(atomic_inc_not_zero(&nf->nf_ref)))
+ return nf;
+ return NULL;
+}
+
+static void
+nfsd_file_dispose_list(struct list_head *dispose)
+{
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del(&nf->nf_lru);
+ nfsd_file_put_noref(nf);
+ }
+}
+
+static void
+nfsd_file_dispose_list_sync(struct list_head *dispose)
+{
+ bool flush = false;
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del(&nf->nf_lru);
+ if (!atomic_dec_and_test(&nf->nf_ref))
+ continue;
+ if (nfsd_file_free(nf))
+ flush = true;
+ }
+ if (flush)
+ flush_delayed_fput();
+}
+
+/*
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static enum lru_status
+nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ spinlock_t *lock, void *arg)
+ __releases(lock)
+ __acquires(lock)
+{
+ struct list_head *head = arg;
+ struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+ /*
+ * Do a lockless refcount check. The hashtable holds one reference, so
+ * we look to see if anything else has a reference, or if any have
+ * been put since the shrinker last ran. Those don't get unhashed and
+ * released.
+ *
+ * Note that in the put path, we set the flag and then decrement the
+ * counter. Here we check the counter and then test and clear the flag.
+ * That order is deliberate to ensure that we can do this locklessly.
+ */
+ if (atomic_read(&nf->nf_ref) > 1)
+ goto out_skip;
+
+ /*
+ * Don't throw out files that are still undergoing I/O or
+ * that have uncleared errors pending.
+ */
+ if (nfsd_file_check_writeback(nf))
+ goto out_skip;
+
+ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
+ goto out_rescan;
+
+ if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+ goto out_skip;
+
+ list_lru_isolate_move(lru, &nf->nf_lru, head);
+ return LRU_REMOVED;
+out_rescan:
+ set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
+out_skip:
+ return LRU_SKIP;
+}
+
+static void
+nfsd_file_lru_dispose(struct list_head *head)
+{
+ while(!list_empty(head)) {
+ struct nfsd_file *nf = list_first_entry(head,
+ struct nfsd_file, nf_lru);
+ list_del_init(&nf->nf_lru);
+ spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+ nfsd_file_do_unhash(nf);
+ spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+ nfsd_file_put_noref(nf);
+ }
+}
+
+static unsigned long
+nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
+{
+ return list_lru_count(&nfsd_file_lru);
+}
+
+static unsigned long
+nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ LIST_HEAD(head);
+ unsigned long ret;
+
+ ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
+ nfsd_file_lru_dispose(&head);
+ return ret;
+}
+
+static struct shrinker nfsd_file_shrinker = {
+ .scan_objects = nfsd_file_lru_scan,
+ .count_objects = nfsd_file_lru_count,
+ .seeks = 1,
+};
+
+static void
+__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
+ struct list_head *dispose)
+{
+ struct nfsd_file *nf;
+ struct hlist_node *tmp;
+
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+ if (inode == nf->nf_inode)
+ nfsd_file_unhash_and_release_locked(nf, dispose);
+ }
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put. Also ensure that any of the
+ * fputs also have their final __fput done as well.
+ */
+void
+nfsd_file_close_inode_sync(struct inode *inode)
+{
+ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+ NFSD_FILE_HASH_BITS);
+ LIST_HEAD(dispose);
+
+ __nfsd_file_close_inode(inode, hashval, &dispose);
+ trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
+ nfsd_file_dispose_list_sync(&dispose);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put.
+ */
+static void
+nfsd_file_close_inode(struct inode *inode)
+{
+ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+ NFSD_FILE_HASH_BITS);
+ LIST_HEAD(dispose);
+
+ __nfsd_file_close_inode(inode, hashval, &dispose);
+ trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
+ nfsd_file_dispose_list(&dispose);
+}
+
+/**
+ * nfsd_file_delayed_close - close unused nfsd_files
+ * @work: dummy
+ *
+ * Walk the LRU list and close any entries that have not been used since
+ * the last scan.
+ *
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static void
+nfsd_file_delayed_close(struct work_struct *work)
+{
+ LIST_HEAD(head);
+
+ list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
+
+ if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
+ nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
+
+ if (!list_empty(&head)) {
+ nfsd_file_lru_dispose(&head);
+ flush_delayed_fput();
+ }
+}
+
+static int
+nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
+ void *data)
+{
+ struct file_lock *fl = data;
+
+ /* Only close files for F_SETLEASE leases */
+ if (fl->fl_flags & FL_LEASE)
+ nfsd_file_close_inode_sync(file_inode(fl->fl_file));
+ return 0;
+}
+
+static struct notifier_block nfsd_file_lease_notifier = {
+ .notifier_call = nfsd_file_lease_notifier_call,
+};
+
+static int
+nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
+ struct inode *inode,
+ u32 mask, const void *data, int data_type,
+ const struct qstr *file_name, u32 cookie,
+ struct fsnotify_iter_info *iter_info)
+{
+ trace_nfsd_file_fsnotify_handle_event(inode, mask);
+
+ /* Should be no marks on non-regular files */
+ if (!S_ISREG(inode->i_mode)) {
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ /* don't close files if this was not the last link */
+ if (mask & FS_ATTRIB) {
+ if (inode->i_nlink)
+ return 0;
+ }
+
+ nfsd_file_close_inode(inode);
+ return 0;
+}
+
+
+static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
+ .handle_event = nfsd_file_fsnotify_handle_event,
+ .free_mark = nfsd_file_mark_free,
+};
+
+int
+nfsd_file_cache_init(void)
+{
+ int ret = -ENOMEM;
+ unsigned int i;
+
+ clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+ if (nfsd_file_hashtbl)
+ return 0;
+
+ nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+ sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+ if (!nfsd_file_hashtbl) {
+ pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+ goto out_err;
+ }
+
+ nfsd_file_slab = kmem_cache_create("nfsd_file",
+ sizeof(struct nfsd_file), 0, 0, NULL);
+ if (!nfsd_file_slab) {
+ pr_err("nfsd: unable to create nfsd_file_slab\n");
+ goto out_err;
+ }
+
+ nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
+ sizeof(struct nfsd_file_mark), 0, 0, NULL);
+ if (!nfsd_file_mark_slab) {
+ pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
+ goto out_err;
+ }
+
+
+ ret = list_lru_init(&nfsd_file_lru);
+ if (ret) {
+ pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
+ goto out_err;
+ }
+
+ ret = register_shrinker(&nfsd_file_shrinker);
+ if (ret) {
+ pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
+ goto out_lru;
+ }
+
+ ret = lease_register_notifier(&nfsd_file_lease_notifier);
+ if (ret) {
+ pr_err("nfsd: unable to register lease notifier: %d\n", ret);
+ goto out_shrinker;
+ }
+
+ nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
+ if (IS_ERR(nfsd_file_fsnotify_group)) {
+ pr_err("nfsd: unable to create fsnotify group: %ld\n",
+ PTR_ERR(nfsd_file_fsnotify_group));
+ nfsd_file_fsnotify_group = NULL;
+ goto out_notifier;
+ }
+
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+ spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+ }
+
+ INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
+out:
+ return ret;
+out_notifier:
+ lease_unregister_notifier(&nfsd_file_lease_notifier);
+out_shrinker:
+ unregister_shrinker(&nfsd_file_shrinker);
+out_lru:
+ list_lru_destroy(&nfsd_file_lru);
+out_err:
+ kmem_cache_destroy(nfsd_file_slab);
+ nfsd_file_slab = NULL;
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+ kfree(nfsd_file_hashtbl);
+ nfsd_file_hashtbl = NULL;
+ goto out;
+}
+
+/*
+ * Note this can deadlock with nfsd_file_lru_cb.
+ */
+void
+nfsd_file_cache_purge(struct net *net)
+{
+ unsigned int i;
+ struct nfsd_file *nf;
+ struct hlist_node *next;
+ LIST_HEAD(dispose);
+ bool del;
+
+ if (!nfsd_file_hashtbl)
+ return;
+
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
+
+ spin_lock(&nfb->nfb_lock);
+ hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
+ if (net && nf->nf_net != net)
+ continue;
+ del = nfsd_file_unhash_and_release_locked(nf, &dispose);
+
+ /*
+ * Deadlock detected! Something marked this entry as
+ * unhased, but hasn't removed it from the hash list.
+ */
+ WARN_ON_ONCE(!del);
+ }
+ spin_unlock(&nfb->nfb_lock);
+ nfsd_file_dispose_list(&dispose);
+ }
+}
+
+void
+nfsd_file_cache_shutdown(void)
+{
+ LIST_HEAD(dispose);
+
+ set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+ lease_unregister_notifier(&nfsd_file_lease_notifier);
+ unregister_shrinker(&nfsd_file_shrinker);
+ /*
+ * make sure all callers of nfsd_file_lru_cb are done before
+ * calling nfsd_file_cache_purge
+ */
+ cancel_delayed_work_sync(&nfsd_filecache_laundrette);
+ nfsd_file_cache_purge(NULL);
+ list_lru_destroy(&nfsd_file_lru);
+ rcu_barrier();
+ fsnotify_put_group(nfsd_file_fsnotify_group);
+ nfsd_file_fsnotify_group = NULL;
+ kmem_cache_destroy(nfsd_file_slab);
+ nfsd_file_slab = NULL;
+ fsnotify_wait_marks_destroyed();
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+ kfree(nfsd_file_hashtbl);
+ nfsd_file_hashtbl = NULL;
+}
+
+static bool
+nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+{
+ int i;
+
+ if (!uid_eq(c1->fsuid, c2->fsuid))
+ return false;
+ if (!gid_eq(c1->fsgid, c2->fsgid))
+ return false;
+ if (c1->group_info == NULL || c2->group_info == NULL)
+ return c1->group_info == c2->group_info;
+ if (c1->group_info->ngroups != c2->group_info->ngroups)
+ return false;
+ for (i = 0; i < c1->group_info->ngroups; i++) {
+ if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+ return false;
+ }
+ return true;
+}
+
+static struct nfsd_file *
+nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+ unsigned int hashval, struct net *net)
+{
+ struct nfsd_file *nf;
+ unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+
+ hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+ nf_node) {
+ if ((need & nf->nf_may) != need)
+ continue;
+ if (nf->nf_inode != inode)
+ continue;
+ if (nf->nf_net != net)
+ continue;
+ if (!nfsd_match_cred(nf->nf_cred, current_cred()))
+ continue;
+ if (nfsd_file_get(nf) != NULL)
+ return nf;
+ }
+ return NULL;
+}
+
+/**
+ * nfsd_file_is_cached - are there any cached open files for this fh?
+ * @inode: inode of the file to check
+ *
+ * Scan the hashtable for open files that match this fh. Returns true if there
+ * are any, and false if not.
+ */
+bool
+nfsd_file_is_cached(struct inode *inode)
+{
+ bool ret = false;
+ struct nfsd_file *nf;
+ unsigned int hashval;
+
+ hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+ nf_node) {
+ if (inode == nf->nf_inode) {
+ ret = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ trace_nfsd_file_is_cached(inode, hashval, (int)ret);
+ return ret;
+}
+
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ __be32 status;
+ struct net *net = SVC_NET(rqstp);
+ struct nfsd_file *nf, *new;
+ struct inode *inode;
+ unsigned int hashval;
+
+ /* FIXME: skip this if fh_dentry is already set? */
+ status = fh_verify(rqstp, fhp, S_IFREG,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ if (status != nfs_ok)
+ return status;
+
+ inode = d_inode(fhp->fh_dentry);
+ hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+retry:
+ rcu_read_lock();
+ nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+ rcu_read_unlock();
+ if (nf)
+ goto wait_for_construction;
+
+ new = nfsd_file_alloc(inode, may_flags, hashval, net);
+ if (!new) {
+ trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
+ NULL, nfserr_jukebox);
+ return nfserr_jukebox;
+ }
+
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+ if (nf == NULL)
+ goto open_file;
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ nfsd_file_slab_free(&new->nf_rcu);
+
+wait_for_construction:
+ wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+
+ /* Did construction of this file fail? */
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ nfsd_file_put_noref(nf);
+ goto retry;
+ }
+
+ this_cpu_inc(nfsd_file_cache_hits);
+
+ if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
+ bool write = (may_flags & NFSD_MAY_WRITE);
+
+ if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
+ (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
+ status = nfserrno(nfsd_open_break_lease(
+ file_inode(nf->nf_file), may_flags));
+ if (status == nfs_ok) {
+ clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+ if (write)
+ clear_bit(NFSD_FILE_BREAK_WRITE,
+ &nf->nf_flags);
+ }
+ }
+ }
+out:
+ if (status == nfs_ok) {
+ *pnf = nf;
+ } else {
+ nfsd_file_put(nf);
+ nf = NULL;
+ }
+
+ trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
+ return status;
+open_file:
+ nf = new;
+ /* Take reference for the hashtable */
+ atomic_inc(&nf->nf_ref);
+ __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+ __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ list_lru_add(&nfsd_file_lru, &nf->nf_lru);
+ hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+ ++nfsd_file_hashtbl[hashval].nfb_count;
+ nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+ nfsd_file_hashtbl[hashval].nfb_count);
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ atomic_long_inc(&nfsd_filecache_count);
+
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+ if (nf->nf_mark)
+ status = nfsd_open_verified(rqstp, fhp, S_IFREG,
+ may_flags, &nf->nf_file);
+ else
+ status = nfserr_jukebox;
+ /*
+ * If construction failed, or we raced with a call to unlink()
+ * then unhash.
+ */
+ if (status != nfs_ok || inode->i_nlink == 0) {
+ bool do_free;
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ do_free = nfsd_file_unhash(nf);
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ if (do_free)
+ nfsd_file_put_noref(nf);
+ }
+ clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+ goto out;
+}
+
+/*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+{
+ unsigned int i, count = 0, longest = 0;
+ unsigned long hits = 0;
+
+ /*
+ * No need for spinlocks here since we're not terribly interested in
+ * accuracy. We do take the nfsd_mutex simply to ensure that we
+ * don't end up racing with server shutdown
+ */
+ mutex_lock(&nfsd_mutex);
+ if (nfsd_file_hashtbl) {
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ count += nfsd_file_hashtbl[i].nfb_count;
+ longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+ }
+ }
+ mutex_unlock(&nfsd_mutex);
+
+ for_each_possible_cpu(i)
+ hits += per_cpu(nfsd_file_cache_hits, i);
+
+ seq_printf(m, "total entries: %u\n", count);
+ seq_printf(m, "longest chain: %u\n", longest);
+ seq_printf(m, "cache hits: %lu\n", hits);
+ return 0;
+}
+
+int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, nfsd_file_cache_stats_show, NULL);
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644
index 0000000..851d9ab
--- /dev/null
+++ b/fs/nfsd/filecache.h
@@ -0,0 +1,61 @@
+#ifndef _FS_NFSD_FILECACHE_H
+#define _FS_NFSD_FILECACHE_H
+
+#include <linux/fsnotify_backend.h>
+
+/*
+ * This is the fsnotify_mark container that nfsd attaches to the files that it
+ * is holding open. Note that we have a separate refcount here aside from the
+ * one in the fsnotify_mark. We only want a single fsnotify_mark attached to
+ * the inode, and for each nfsd_file to hold a reference to it.
+ *
+ * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
+ * how to put that reference. If there are still outstanding nfsd_files that
+ * reference the mark, then we would want to call fsnotify_put_mark on it.
+ * If there were not, then we'd need to call fsnotify_destroy_mark. Since we
+ * can't really tell the difference, we use the nfm_mark to keep track of how
+ * many nfsd_files hold references to the mark. When that counter goes to zero
+ * then we know to call fsnotify_destroy_mark on it.
+ */
+struct nfsd_file_mark {
+ struct fsnotify_mark nfm_mark;
+ atomic_t nfm_ref;
+};
+
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only used for comparison.
+ */
+struct nfsd_file {
+ struct hlist_node nf_node;
+ struct list_head nf_lru;
+ struct rcu_head nf_rcu;
+ struct file *nf_file;
+ const struct cred *nf_cred;
+ struct net *nf_net;
+#define NFSD_FILE_HASHED (0)
+#define NFSD_FILE_PENDING (1)
+#define NFSD_FILE_BREAK_READ (2)
+#define NFSD_FILE_BREAK_WRITE (3)
+#define NFSD_FILE_REFERENCED (4)
+ unsigned long nf_flags;
+ struct inode *nf_inode;
+ unsigned int nf_hashval;
+ atomic_t nf_ref;
+ unsigned char nf_may;
+ struct nfsd_file_mark *nf_mark;
+};
+
+int nfsd_file_cache_init(void);
+void nfsd_file_cache_purge(struct net *);
+void nfsd_file_cache_shutdown(void);
+void nfsd_file_put(struct nfsd_file *nf);
+struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+void nfsd_file_close_inode_sync(struct inode *inode);
+bool nfsd_file_is_cached(struct inode *inode);
+__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+int nfsd_file_cache_stats_open(struct inode *, struct file *);
+#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 426f550..9a4ef81 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* per net namespace data structures for nfsd
*
* Copyright (C) 2012, Jeff Layton <jlayton@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 51
- * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __NFSD_NETNS_H__
@@ -55,6 +42,11 @@
bool grace_ended;
time_t boot_time;
+ /* internal mount of the "nfsd" pseudofilesystem: */
+ struct vfsmount *nfsd_mnt;
+
+ struct dentry *nfsd_client_dir;
+
/*
* reclaim_str_hashtbl[] holds known client info from previous reset/reboot
* used in reboot/reset lease grace period processing
@@ -104,11 +96,15 @@
time_t nfsd4_grace;
bool somebody_reclaimed;
+ bool track_reclaim_completes;
+ atomic_t nr_reclaim_complete;
+
bool nfsd_net_up;
bool lockd_up;
/* Time of server startup */
struct timespec64 nfssvc_boot;
+ seqlock_t boot_lock;
/*
* Max number of connections this nfsd container will allow. Defaults
@@ -116,6 +112,7 @@
*/
unsigned int max_connections;
+ u32 clientid_base;
u32 clientid_counter;
u32 clverifier_counter;
@@ -123,10 +120,67 @@
wait_queue_head_t ntf_wq;
atomic_t ntf_refcnt;
+
+ /*
+ * clientid and stateid data for construction of net unique COPY
+ * stateids.
+ */
+ u32 s2s_cp_cl_id;
+ struct idr s2s_cp_stateids;
+ spinlock_t s2s_cp_lock;
+
+ /*
+ * Version information
+ */
+ bool *nfsd_versions;
+ bool *nfsd4_minorversions;
+
+ /*
+ * Duplicate reply cache
+ */
+ struct nfsd_drc_bucket *drc_hashtbl;
+ struct kmem_cache *drc_slab;
+
+ /* max number of entries allowed in the cache */
+ unsigned int max_drc_entries;
+
+ /* number of significant bits in the hash value */
+ unsigned int maskbits;
+ unsigned int drc_hashsize;
+
+ /*
+ * Stats and other tracking of on the duplicate reply cache.
+ * These fields and the "rc" fields in nfsdstats are modified
+ * with only the per-bucket cache lock, which isn't really safe
+ * and should be fixed if we want the statistics to be
+ * completely accurate.
+ */
+
+ /* total number of entries */
+ atomic_t num_drc_entries;
+
+ /* cache misses due only to checksum comparison failures */
+ unsigned int payload_misses;
+
+ /* amount of memory (in bytes) currently consumed by the DRC */
+ unsigned int drc_mem_usage;
+
+ /* longest hash chain seen */
+ unsigned int longest_chain;
+
+ /* size of cache when we saw the longest hash chain */
+ unsigned int longest_chain_cachesize;
+
+ struct shrinker nfsd_reply_cache_shrinker;
};
/* Simple check to find out if a given net was properly initialized */
#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
+extern void nfsd_netns_free_versions(struct nfsd_net *nn);
+
extern unsigned int nfsd_net_id;
+
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_boot_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 9eb8086..cea68d8 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -172,13 +172,8 @@
nfserr = nfsd_read(rqstp, &resp->fh,
argp->offset,
rqstp->rq_vec, argp->vlen,
- &resp->count);
- if (nfserr == 0) {
- struct inode *inode = d_inode(resp->fh.fh_dentry);
- resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
- inode->i_size);
- }
-
+ &resp->count,
+ &resp->eof);
RETURN_STATUS(nfserr);
}
@@ -442,7 +437,9 @@
struct nfsd3_readdirargs *argp = rqstp->rq_argp;
struct nfsd3_readdirres *resp = rqstp->rq_resp;
__be32 nfserr;
- int count;
+ int count = 0;
+ struct page **p;
+ caddr_t page_addr = NULL;
dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
@@ -462,9 +459,31 @@
nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie,
&resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8);
- resp->count = resp->buffer - argp->buffer;
- if (resp->offset)
- xdr_encode_hyper(resp->offset, argp->cookie);
+ count = 0;
+ for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+ page_addr = page_address(*p);
+
+ if (((caddr_t)resp->buffer >= page_addr) &&
+ ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+ count += (caddr_t)resp->buffer - page_addr;
+ break;
+ }
+ count += PAGE_SIZE;
+ }
+ resp->count = count >> 2;
+ if (resp->offset) {
+ loff_t offset = argp->cookie;
+
+ if (unlikely(resp->offset1)) {
+ /* we ended up with offset on a page boundary */
+ *resp->offset = htonl(offset >> 32);
+ *resp->offset1 = htonl(offset & 0xffffffff);
+ resp->offset1 = NULL;
+ } else {
+ xdr_encode_hyper(resp->offset, offset);
+ }
+ resp->offset = NULL;
+ }
RETURN_STATUS(nfserr);
}
@@ -533,6 +552,7 @@
} else {
xdr_encode_hyper(resp->offset, offset);
}
+ resp->offset = NULL;
}
RETURN_STATUS(nfserr);
@@ -576,7 +596,7 @@
resp->f_wtmax = max_blocksize;
resp->f_wtpref = max_blocksize;
resp->f_wtmult = PAGE_SIZE;
- resp->f_dtpref = PAGE_SIZE;
+ resp->f_dtpref = max_blocksize;
resp->f_maxfilesize = ~(u32) 0;
resp->f_properties = NFS3_FSF_DEFAULT;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 9b973f4..86e5658 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -27,6 +27,7 @@
NF3SOCK, NF3BAD, NF3LNK, NF3BAD,
};
+
/*
* XDR functions for basic NFS types
*/
@@ -96,7 +97,7 @@
}
static __be32 *
-decode_sattr3(__be32 *p, struct iattr *iap)
+decode_sattr3(__be32 *p, struct iattr *iap, struct user_namespace *userns)
{
u32 tmp;
@@ -107,12 +108,12 @@
iap->ia_mode = ntohl(*p++);
}
if (*p++) {
- iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
+ iap->ia_uid = make_kuid(userns, ntohl(*p++));
if (uid_valid(iap->ia_uid))
iap->ia_valid |= ATTR_UID;
}
if (*p++) {
- iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
+ iap->ia_gid = make_kgid(userns, ntohl(*p++));
if (gid_valid(iap->ia_gid))
iap->ia_valid |= ATTR_GID;
}
@@ -165,12 +166,13 @@
encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct kstat *stat)
{
+ struct user_namespace *userns = nfsd_user_namespace(rqstp);
struct timespec ts;
*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
*p++ = htonl((u32) (stat->mode & S_IALLUGO));
*p++ = htonl((u32) stat->nlink);
- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
+ *p++ = htonl((u32) from_kuid_munged(userns, stat->uid));
+ *p++ = htonl((u32) from_kgid_munged(userns, stat->gid));
if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
} else {
@@ -325,7 +327,7 @@
p = decode_fh(p, &args->fh);
if (!p)
return 0;
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
if ((args->check_guard = ntohl(*p++)) != 0) {
struct timespec time;
@@ -455,7 +457,7 @@
switch (args->createmode = ntohl(*p++)) {
case NFS3_CREATE_UNCHECKED:
case NFS3_CREATE_GUARDED:
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
break;
case NFS3_CREATE_EXCLUSIVE:
args->verf = p;
@@ -476,7 +478,7 @@
if (!(p = decode_fh(p, &args->fh)) ||
!(p = decode_filename(p, &args->name, &args->len)))
return 0;
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
return xdr_argsize_check(rqstp, p);
}
@@ -491,7 +493,7 @@
if (!(p = decode_fh(p, &args->ffh)) ||
!(p = decode_filename(p, &args->fname, &args->flen)))
return 0;
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
args->tlen = ntohl(*p++);
@@ -519,7 +521,7 @@
if (args->ftype == NF3BLK || args->ftype == NF3CHR
|| args->ftype == NF3SOCK || args->ftype == NF3FIFO)
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
args->major = ntohl(*p++);
@@ -573,6 +575,9 @@
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_readdirargs *args = rqstp->rq_argp;
+ int len;
+ u32 max_blocksize = svc_max_payload(rqstp);
+
p = decode_fh(p, &args->fh);
if (!p)
return 0;
@@ -580,8 +585,14 @@
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
- args->count = min_t(u32, args->count, PAGE_SIZE);
- args->buffer = page_address(*(rqstp->rq_next_page++));
+ len = args->count = min_t(u32, args->count, max_blocksize);
+
+ while (len > 0) {
+ struct page *p = *(rqstp->rq_next_page++);
+ if (!args->buffer)
+ args->buffer = page_address(p);
+ len -= PAGE_SIZE;
+ }
return xdr_argsize_check(rqstp, p);
}
@@ -741,14 +752,16 @@
{
struct nfsd3_writeres *resp = rqstp->rq_resp;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ __be32 verf[2];
p = encode_wcc_data(rqstp, p, &resp->fh);
if (resp->status == 0) {
*p++ = htonl(resp->count);
*p++ = htonl(resp->committed);
/* unique identifier, y2038 overflow can be ignored */
- *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
- *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+ nfsd_copy_boot_verifier(verf, nn);
+ *p++ = verf[0];
+ *p++ = verf[1];
}
return xdr_ressize_check(rqstp, p);
}
@@ -921,6 +934,7 @@
} else {
xdr_encode_hyper(cd->offset, offset64);
}
+ cd->offset = NULL;
}
/*
@@ -1114,13 +1128,15 @@
{
struct nfsd3_commitres *resp = rqstp->rq_resp;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ __be32 verf[2];
p = encode_wcc_data(rqstp, p, &resp->fh);
/* Write verifier */
if (resp->status == 0) {
/* unique identifier, y2038 overflow can be ignored */
- *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
- *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+ nfsd_copy_boot_verifier(verf, nn);
+ *p++ = verf[0];
+ *p++ = verf[1];
}
return xdr_ressize_check(rqstp, p);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 601bf33..5241114 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -39,6 +39,7 @@
#include "state.h"
#include "netns.h"
#include "xdr4cb.h"
+#include "xdr4.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -59,16 +60,6 @@
int status;
};
-/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("NFS: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
static __be32 *xdr_encode_empty_array(__be32 *p)
{
*p++ = xdr_zero;
@@ -105,6 +96,7 @@
OP_CB_WANTS_CANCELLED = 12,
OP_CB_NOTIFY_LOCK = 13,
OP_CB_NOTIFY_DEVICEID = 14,
+ OP_CB_OFFLOAD = 15,
OP_CB_ILLEGAL = 10044
};
@@ -238,7 +230,6 @@
*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
return 0;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
out_unexpected:
dprintk("NFSD: Callback server returned operation %d but "
@@ -307,7 +298,6 @@
hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -435,7 +425,6 @@
cb->cb_seq_status = status;
return status;
out_overflow:
- print_overflow_msg(__func__, xdr);
status = -EIO;
goto out;
}
@@ -523,11 +512,9 @@
if (unlikely(status))
return status;
- if (cb != NULL) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
@@ -615,11 +602,10 @@
if (unlikely(status))
return status;
- if (cb) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
}
#endif /* CONFIG_NFSD_PNFS */
@@ -674,15 +660,108 @@
if (unlikely(status))
return status;
- if (cb) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
}
/*
+ * struct write_response4 {
+ * stateid4 wr_callback_id<1>;
+ * length4 wr_count;
+ * stable_how4 wr_committed;
+ * verifier4 wr_writeverf;
+ * };
+ * union offload_info4 switch (nfsstat4 coa_status) {
+ * case NFS4_OK:
+ * write_response4 coa_resok4;
+ * default:
+ * length4 coa_bytes_copied;
+ * };
+ * struct CB_OFFLOAD4args {
+ * nfs_fh4 coa_fh;
+ * stateid4 coa_stateid;
+ * offload_info4 coa_offload_info;
+ * };
+ */
+static void encode_offload_info4(struct xdr_stream *xdr,
+ __be32 nfserr,
+ const struct nfsd4_copy *cp)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p++ = nfserr;
+ if (!nfserr) {
+ p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
+ p = xdr_encode_empty_array(p);
+ p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
+ *p++ = cpu_to_be32(cp->cp_res.wr_stable_how);
+ p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data,
+ NFS4_VERIFIER_SIZE);
+ } else {
+ p = xdr_reserve_space(xdr, 8);
+ /* We always return success if bytes were written */
+ p = xdr_encode_hyper(p, 0);
+ }
+}
+
+static void encode_cb_offload4args(struct xdr_stream *xdr,
+ __be32 nfserr,
+ const struct knfsd_fh *fh,
+ const struct nfsd4_copy *cp,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p++ = cpu_to_be32(OP_CB_OFFLOAD);
+ encode_nfs_fh4(xdr, fh);
+ encode_stateid4(xdr, &cp->cp_res.cb_stateid);
+ encode_offload_info4(xdr, nfserr, cp);
+
+ hdr->nops++;
+}
+
+static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ const struct nfsd4_copy *cp =
+ container_of(cb, struct nfsd4_copy, cp_cb);
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = 0,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr);
+ encode_cb_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
+ return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
+}
+/*
* RPC procedure tables
*/
#define PROC(proc, call, argtype, restype) \
@@ -703,6 +782,7 @@
PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout),
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
+ PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
@@ -746,24 +826,23 @@
return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
}
-static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
+static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
- char *principal = clp->cl_cred.cr_targ_princ ?
- clp->cl_cred.cr_targ_princ : "nfs";
- struct rpc_cred *cred;
+ client->cl_principal = clp->cl_cred.cr_targ_princ ?
+ clp->cl_cred.cr_targ_princ : "nfs";
- cred = rpc_lookup_machine_cred(principal);
- if (!IS_ERR(cred))
- get_rpccred(cred);
- return cred;
+ return get_cred(rpc_machine_cred());
} else {
- struct rpc_auth *auth = client->cl_auth;
- struct auth_cred acred = {};
+ struct cred *kcred;
- acred.uid = ses->se_cb_sec.uid;
- acred.gid = ses->se_cb_sec.gid;
- return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0);
+ kcred = prepare_kernel_cred(NULL);
+ if (!kcred)
+ return NULL;
+
+ kcred->uid = ses->se_cb_sec.uid;
+ kcred->gid = ses->se_cb_sec.gid;
+ return kcred;
}
}
@@ -784,9 +863,10 @@
.program = &cb_program,
.version = 1,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+ .cred = current_cred(),
};
struct rpc_clnt *client;
- struct rpc_cred *cred;
+ const struct cred *cred;
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
@@ -816,9 +896,9 @@
return PTR_ERR(client);
}
cred = get_backchannel_cred(clp, client, ses);
- if (IS_ERR(cred)) {
+ if (!cred) {
rpc_shutdown_client(client);
- return PTR_ERR(cred);
+ return -ENOMEM;
}
clp->cl_cb_client = client;
clp->cl_cb_cred = cred;
@@ -926,8 +1006,9 @@
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion) {
- if (!nfsd41_cb_get_slot(clp, task))
+ if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
return;
+ cb->cb_holds_slot = true;
}
rpc_call_start(task);
}
@@ -948,12 +1029,15 @@
* the submission code will error out, so we don't need to
* handle that case here.
*/
- if (task->tk_flags & RPC_TASK_KILLED)
+ if (RPC_SIGNALLED(task))
goto need_restart;
return true;
}
+ if (!cb->cb_holds_slot)
+ goto need_restart;
+
switch (cb->cb_seq_status) {
case 0:
/*
@@ -992,12 +1076,13 @@
cb->cb_seq_status);
}
+ cb->cb_holds_slot = false;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
- if (task->tk_flags & RPC_TASK_KILLED)
+ if (RPC_SIGNALLED(task))
goto need_restart;
out:
return ret;
@@ -1033,10 +1118,11 @@
rpc_restart_call_prepare(task);
return;
case 1:
- break;
- case -1:
- /* Network partition? */
- nfsd4_mark_cb_down(clp, task->tk_status);
+ switch (task->tk_status) {
+ case -EIO:
+ case -ETIMEDOUT:
+ nfsd4_mark_cb_down(clp, task->tk_status);
+ }
break;
default:
BUG();
@@ -1116,7 +1202,7 @@
if (clp->cl_cb_client) {
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
- put_rpccred(clp->cl_cb_cred);
+ put_cred(clp->cl_cb_cred);
clp->cl_cb_cred = NULL;
}
if (clp->cl_cb_conn.cb_xprt) {
@@ -1199,6 +1285,7 @@
cb->cb_seq_status = 1;
cb->cb_status = 0;
cb->cb_need_restart = false;
+ cb->cb_holds_slot = false;
}
void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index a5bb765..d1f2852 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -65,6 +65,7 @@
u32 id;
char name[IDMAP_NAMESZ];
char authname[IDMAP_NAMESZ];
+ struct rcu_head rcu_head;
};
/* Common entry handling */
@@ -82,14 +83,14 @@
new->type = itm->type;
strlcpy(new->name, itm->name, sizeof(new->name));
- strlcpy(new->authname, itm->authname, sizeof(new->name));
+ strlcpy(new->authname, itm->authname, sizeof(new->authname));
}
static void
ent_put(struct kref *ref)
{
struct ent *map = container_of(ref, struct ent, h.ref);
- kfree(map);
+ kfree_rcu(map, rcu_head);
}
static struct cache_head *
@@ -264,8 +265,8 @@
static struct ent *
idtoname_lookup(struct cache_detail *cd, struct ent *item)
{
- struct cache_head *ch = sunrpc_cache_lookup(cd, &item->h,
- idtoname_hash(item));
+ struct cache_head *ch = sunrpc_cache_lookup_rcu(cd, &item->h,
+ idtoname_hash(item));
if (ch)
return container_of(ch, struct ent, h);
else
@@ -422,8 +423,8 @@
static struct ent *
nametoid_lookup(struct cache_detail *cd, struct ent *item)
{
- struct cache_head *ch = sunrpc_cache_lookup(cd, &item->h,
- nametoid_hash(item));
+ struct cache_head *ch = sunrpc_cache_lookup_rcu(cd, &item->h,
+ nametoid_hash(item));
if (ch)
return container_of(ch, struct ent, h);
else
@@ -633,7 +634,7 @@
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
- *uid = make_kuid(&init_user_ns, id);
+ *uid = make_kuid(nfsd_user_namespace(rqstp), id);
if (!uid_valid(*uid))
status = nfserr_badowner;
return status;
@@ -650,7 +651,7 @@
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
- *gid = make_kgid(&init_user_ns, id);
+ *gid = make_kgid(nfsd_user_namespace(rqstp), id);
if (!gid_valid(*gid))
status = nfserr_badowner;
return status;
@@ -659,13 +660,13 @@
__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kuid_t uid)
{
- u32 id = from_kuid(&init_user_ns, uid);
+ u32 id = from_kuid_munged(nfsd_user_namespace(rqstp), uid);
return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id);
}
__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kgid_t gid)
{
- u32 id = from_kgid(&init_user_ns, gid);
+ u32 id = from_kgid_munged(nfsd_user_namespace(rqstp), gid);
return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id);
}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2b36aa0..2681c70 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -169,8 +169,8 @@
spin_unlock(&fp->fi_lock);
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
- vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
- fput(ls->ls_file);
+ vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
+ nfsd_file_put(ls->ls_file);
if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -197,7 +197,7 @@
fl->fl_end = OFFSET_MAX;
fl->fl_owner = ls;
fl->fl_pid = current->tgid;
- fl->fl_file = ls->ls_file;
+ fl->fl_file = ls->ls_file->nf_file;
status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
if (status) {
@@ -236,13 +236,13 @@
NFSPROC4_CLNT_CB_LAYOUT);
if (parent->sc_type == NFS4_DELEG_STID)
- ls->ls_file = get_file(fp->fi_deleg_file);
+ ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else
ls->ls_file = find_any_file(fp);
BUG_ON(!ls->ls_file);
if (nfsd4_layout_setlease(ls)) {
- fput(ls->ls_file);
+ nfsd_file_put(ls->ls_file);
put_nfs4_file(fp);
kmem_cache_free(nfs4_layout_stateid_cache, ls);
return NULL;
@@ -626,7 +626,7 @@
argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str;
- argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
+ argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp,
@@ -656,7 +656,6 @@
struct nfsd_net *nn;
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
- LIST_HEAD(reaplist);
switch (task->tk_status) {
@@ -694,7 +693,7 @@
ops->fence_client(ls);
else
nfsd4_cb_layout_fail(ls);
- return -1;
+ return 1;
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 9d6b4f0..4e3e77b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -36,6 +36,7 @@
#include <linux/file.h>
#include <linux/falloc.h>
#include <linux/slab.h>
+#include <linux/kthread.h>
#include "idmap.h"
#include "cache.h"
@@ -426,6 +427,7 @@
goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
reclaim = true;
+ /* fall through */
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, cstate, open);
@@ -566,17 +568,11 @@
static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
{
- __be32 verf[2];
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ __be32 *verf = (__be32 *)verifier->data;
- /*
- * This is opaque to client, so no need to byte-swap. Use
- * __force to keep sparse happy. y2038 time_t overflow is
- * irrelevant in this usage.
- */
- verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
- memcpy(verifier->data, verf, sizeof(verifier->data));
+ BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
+
+ nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
}
static __be32
@@ -759,7 +755,7 @@
struct nfsd4_read *read = &u->read;
__be32 status;
- read->rd_filp = NULL;
+ read->rd_nf = NULL;
if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval;
@@ -780,7 +776,7 @@
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE,
- &read->rd_filp, &read->rd_tmp_file);
+ &read->rd_nf);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
@@ -796,8 +792,8 @@
static void
nfsd4_read_release(union nfsd4_op_u *u)
{
- if (u->read.rd_filp)
- fput(u->read.rd_filp);
+ if (u->read.rd_nf)
+ nfsd_file_put(u->read.rd_nf);
trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
u->read.rd_offset, u->read.rd_length);
}
@@ -862,8 +858,7 @@
struct nfsd4_rename *rename = &u->rename;
__be32 status;
- if (opens_in_grace(SVC_NET(rqstp)) &&
- !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
+ if (opens_in_grace(SVC_NET(rqstp)))
return nfserr_grace;
status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
rename->rn_snamelen, &cstate->current_fh,
@@ -953,7 +948,7 @@
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&cstate->current_fh, &setattr->sa_stateid,
- WR_STATE, NULL, NULL);
+ WR_STATE, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
@@ -992,7 +987,7 @@
{
struct nfsd4_write *write = &u->write;
stateid_t *stateid = &write->wr_stateid;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
__be32 status = nfs_ok;
unsigned long cnt;
int nvecs;
@@ -1004,7 +999,7 @@
trace_nfsd_write_start(rqstp, &cstate->current_fh,
write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
- stateid, WR_STATE, &filp, NULL);
+ stateid, WR_STATE, &nf);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
@@ -1015,14 +1010,12 @@
nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
&write->wr_head, write->wr_buflen);
- if (!nvecs)
- return nfserr_io;
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
- status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
write->wr_how_written);
- fput(filp);
+ nfsd_file_put(nf);
write->wr_bytes_written = cnt;
trace_nfsd_write_done(rqstp, &cstate->current_fh,
@@ -1032,8 +1025,8 @@
static __be32
nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
- stateid_t *src_stateid, struct file **src,
- stateid_t *dst_stateid, struct file **dst)
+ stateid_t *src_stateid, struct nfsd_file **src,
+ stateid_t *dst_stateid, struct nfsd_file **dst)
{
__be32 status;
@@ -1041,22 +1034,22 @@
return nfserr_nofilehandle;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
- src_stateid, RD_STATE, src, NULL);
+ src_stateid, RD_STATE, src);
if (status) {
dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
goto out;
}
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
- dst_stateid, WR_STATE, dst, NULL);
+ dst_stateid, WR_STATE, dst);
if (status) {
dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
goto out_put_src;
}
/* fix up for NFS-specific error code */
- if (!S_ISREG(file_inode(*src)->i_mode) ||
- !S_ISREG(file_inode(*dst)->i_mode)) {
+ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
+ !S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
status = nfserr_wrong_type;
goto out_put_dst;
}
@@ -1064,9 +1057,9 @@
out:
return status;
out_put_dst:
- fput(*dst);
+ nfsd_file_put(*dst);
out_put_src:
- fput(*src);
+ nfsd_file_put(*src);
goto out;
}
@@ -1075,7 +1068,7 @@
union nfsd4_op_u *u)
{
struct nfsd4_clone *clone = &u->clone;
- struct file *src, *dst;
+ struct nfsd_file *src, *dst;
__be32 status;
status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
@@ -1083,68 +1076,306 @@
if (status)
goto out;
- status = nfsd4_clone_file_range(src, clone->cl_src_pos,
- dst, clone->cl_dst_pos, clone->cl_count);
+ status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
+ dst->nf_file, clone->cl_dst_pos, clone->cl_count);
- fput(dst);
- fput(src);
+ nfsd_file_put(dst);
+ nfsd_file_put(src);
out:
return status;
}
+void nfs4_put_copy(struct nfsd4_copy *copy)
+{
+ if (!refcount_dec_and_test(©->refcount))
+ return;
+ kfree(copy);
+}
+
+static bool
+check_and_set_stop_copy(struct nfsd4_copy *copy)
+{
+ bool value;
+
+ spin_lock(©->cp_clp->async_lock);
+ value = copy->stopped;
+ if (!copy->stopped)
+ copy->stopped = true;
+ spin_unlock(©->cp_clp->async_lock);
+ return value;
+}
+
+static void nfsd4_stop_copy(struct nfsd4_copy *copy)
+{
+ /* only 1 thread should stop the copy */
+ if (!check_and_set_stop_copy(copy))
+ kthread_stop(copy->copy_task);
+ nfs4_put_copy(copy);
+}
+
+static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp)
+{
+ struct nfsd4_copy *copy = NULL;
+
+ spin_lock(&clp->async_lock);
+ if (!list_empty(&clp->async_copies)) {
+ copy = list_first_entry(&clp->async_copies, struct nfsd4_copy,
+ copies);
+ refcount_inc(©->refcount);
+ }
+ spin_unlock(&clp->async_lock);
+ return copy;
+}
+
+void nfsd4_shutdown_copy(struct nfs4_client *clp)
+{
+ struct nfsd4_copy *copy;
+
+ while ((copy = nfsd4_get_copy(clp)) != NULL)
+ nfsd4_stop_copy(copy);
+}
+
+static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
+{
+ struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
+
+ nfs4_put_copy(copy);
+}
+
+static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+{
+ return 1;
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = {
+ .release = nfsd4_cb_offload_release,
+ .done = nfsd4_cb_offload_done
+};
+
+static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
+{
+ copy->cp_res.wr_stable_how = NFS_UNSTABLE;
+ copy->cp_synchronous = sync;
+ gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net);
+}
+
+static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
+{
+ ssize_t bytes_copied = 0;
+ size_t bytes_total = copy->cp_count;
+ u64 src_pos = copy->cp_src_pos;
+ u64 dst_pos = copy->cp_dst_pos;
+
+ do {
+ if (kthread_should_stop())
+ break;
+ bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
+ src_pos, copy->nf_dst->nf_file, dst_pos,
+ bytes_total);
+ if (bytes_copied <= 0)
+ break;
+ bytes_total -= bytes_copied;
+ copy->cp_res.wr_bytes_written += bytes_copied;
+ src_pos += bytes_copied;
+ dst_pos += bytes_copied;
+ } while (bytes_total > 0 && !copy->cp_synchronous);
+ return bytes_copied;
+}
+
+static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
+{
+ __be32 status;
+ ssize_t bytes;
+
+ bytes = _nfsd_copy_file_range(copy);
+ /* for async copy, we ignore the error, client can always retry
+ * to get the error
+ */
+ if (bytes < 0 && !copy->cp_res.wr_bytes_written)
+ status = nfserrno(bytes);
+ else {
+ nfsd4_init_copy_res(copy, sync);
+ status = nfs_ok;
+ }
+
+ nfsd_file_put(copy->nf_src);
+ nfsd_file_put(copy->nf_dst);
+ return status;
+}
+
+static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+{
+ dst->cp_src_pos = src->cp_src_pos;
+ dst->cp_dst_pos = src->cp_dst_pos;
+ dst->cp_count = src->cp_count;
+ dst->cp_synchronous = src->cp_synchronous;
+ memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
+ memcpy(&dst->fh, &src->fh, sizeof(src->fh));
+ dst->cp_clp = src->cp_clp;
+ dst->nf_dst = nfsd_file_get(src->nf_dst);
+ dst->nf_src = nfsd_file_get(src->nf_src);
+ memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
+}
+
+static void cleanup_async_copy(struct nfsd4_copy *copy)
+{
+ nfs4_free_cp_state(copy);
+ nfsd_file_put(copy->nf_dst);
+ nfsd_file_put(copy->nf_src);
+ spin_lock(©->cp_clp->async_lock);
+ list_del(©->copies);
+ spin_unlock(©->cp_clp->async_lock);
+ nfs4_put_copy(copy);
+}
+
+static int nfsd4_do_async_copy(void *data)
+{
+ struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
+ struct nfsd4_copy *cb_copy;
+
+ copy->nfserr = nfsd4_do_copy(copy, 0);
+ cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+ if (!cb_copy)
+ goto out;
+ memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res));
+ cb_copy->cp_clp = copy->cp_clp;
+ cb_copy->nfserr = copy->nfserr;
+ memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh));
+ nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp,
+ &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
+ nfsd4_run_cb(&cb_copy->cp_cb);
+out:
+ cleanup_async_copy(copy);
+ return 0;
+}
+
static __be32
nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_copy *copy = &u->copy;
- struct file *src, *dst;
__be32 status;
- ssize_t bytes;
+ struct nfsd4_copy *async_copy = NULL;
- status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, &src,
- ©->cp_dst_stateid, &dst);
+ status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid,
+ ©->nf_src, ©->cp_dst_stateid,
+ ©->nf_dst);
if (status)
goto out;
- bytes = nfsd_copy_file_range(src, copy->cp_src_pos,
- dst, copy->cp_dst_pos, copy->cp_count);
+ copy->cp_clp = cstate->clp;
+ memcpy(©->fh, &cstate->current_fh.fh_handle,
+ sizeof(struct knfsd_fh));
+ if (!copy->cp_synchronous) {
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- if (bytes < 0)
- status = nfserrno(bytes);
- else {
- copy->cp_res.wr_bytes_written = bytes;
- copy->cp_res.wr_stable_how = NFS_UNSTABLE;
- copy->cp_synchronous = 1;
- gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp));
+ status = nfserrno(-ENOMEM);
+ async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+ if (!async_copy)
+ goto out;
+ if (!nfs4_init_cp_state(nn, copy)) {
+ kfree(async_copy);
+ goto out;
+ }
+ refcount_set(&async_copy->refcount, 1);
+ memcpy(©->cp_res.cb_stateid, ©->cp_stateid,
+ sizeof(copy->cp_stateid));
+ dup_copy_fields(copy, async_copy);
+ async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
+ async_copy, "%s", "copy thread");
+ if (IS_ERR(async_copy->copy_task))
+ goto out_err;
+ spin_lock(&async_copy->cp_clp->async_lock);
+ list_add(&async_copy->copies,
+ &async_copy->cp_clp->async_copies);
+ spin_unlock(&async_copy->cp_clp->async_lock);
+ wake_up_process(async_copy->copy_task);
status = nfs_ok;
- }
-
- fput(src);
- fput(dst);
+ } else
+ status = nfsd4_do_copy(copy, 1);
out:
return status;
+out_err:
+ cleanup_async_copy(async_copy);
+ goto out;
+}
+
+struct nfsd4_copy *
+find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
+{
+ struct nfsd4_copy *copy;
+
+ spin_lock(&clp->async_lock);
+ list_for_each_entry(copy, &clp->async_copies, copies) {
+ if (memcmp(©->cp_stateid, stateid, NFS4_STATEID_SIZE))
+ continue;
+ refcount_inc(©->refcount);
+ spin_unlock(&clp->async_lock);
+ return copy;
+ }
+ spin_unlock(&clp->async_lock);
+ return NULL;
+}
+
+static __be32
+nfsd4_offload_cancel(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_offload_status *os = &u->offload_status;
+ __be32 status = 0;
+ struct nfsd4_copy *copy;
+ struct nfs4_client *clp = cstate->clp;
+
+ copy = find_async_copy(clp, &os->stateid);
+ if (copy)
+ nfsd4_stop_copy(copy);
+ else
+ status = nfserr_bad_stateid;
+
+ return status;
}
static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
- __be32 status = nfserr_notsupp;
- struct file *file;
+ __be32 status;
+ struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
- WR_STATE, &file, NULL);
+ WR_STATE, &nf);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
}
- status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
+ status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
fallocate->falloc_offset,
fallocate->falloc_length,
flags);
- fput(file);
+ nfsd_file_put(nf);
+ return status;
+}
+static __be32
+nfsd4_offload_status(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_offload_status *os = &u->offload_status;
+ __be32 status = 0;
+ struct nfsd4_copy *copy;
+ struct nfs4_client *clp = cstate->clp;
+
+ copy = find_async_copy(clp, &os->stateid);
+ if (copy) {
+ os->count = copy->cp_res.wr_bytes_written;
+ nfs4_put_copy(copy);
+ } else
+ status = nfserr_bad_stateid;
+
return status;
}
@@ -1170,11 +1401,11 @@
struct nfsd4_seek *seek = &u->seek;
int whence;
__be32 status;
- struct file *file;
+ struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
- RD_STATE, &file, NULL);
+ RD_STATE, &nf);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
@@ -1196,14 +1427,14 @@
* Note: This call does change file->f_pos, but nothing in NFSD
* should ever file->f_pos.
*/
- seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+ seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
if (seek->seek_pos < 0)
status = nfserrno(seek->seek_pos);
- else if (seek->seek_pos >= i_size_read(file_inode(file)))
+ else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
seek->seek_eof = true;
out:
- fput(file);
+ nfsd_file_put(nf);
return status;
}
@@ -1691,6 +1922,7 @@
struct nfsd4_compound_state *cstate = &resp->cstate;
struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
__be32 status;
svcxdr_init_encode(rqstp, resp);
@@ -1713,7 +1945,7 @@
* According to RFC3010, this takes precedence over all other errors.
*/
status = nfserr_minor_vers_mismatch;
- if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0)
+ if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0)
goto out;
status = nfserr_resource;
if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
@@ -2050,6 +2282,14 @@
1 /* cr_synchronous */) * sizeof(__be32);
}
+static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
+ struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size +
+ 2 /* osr_count */ +
+ 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
+}
+
#ifdef CONFIG_NFSD_PNFS
static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
@@ -2436,25 +2676,25 @@
/* NFSv4.2 operations */
[OP_ALLOCATE] = {
.op_func = nfsd4_allocate,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_ALLOCATE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_DEALLOCATE] = {
.op_func = nfsd4_deallocate,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_CLONE] = {
.op_func = nfsd4_clone,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_CLONE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_COPY] = {
.op_func = nfsd4_copy,
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_COPY",
.op_rsize_bop = nfsd4_copy_rsize,
},
@@ -2463,6 +2703,17 @@
.op_name = "OP_SEEK",
.op_rsize_bop = nfsd4_seek_rsize,
},
+ [OP_OFFLOAD_STATUS] = {
+ .op_func = nfsd4_offload_status,
+ .op_name = "OP_OFFLOAD_STATUS",
+ .op_rsize_bop = nfsd4_offload_status_rsize,
+ },
+ [OP_OFFLOAD_CANCEL] = {
+ .op_func = nfsd4_offload_cancel,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_OFFLOAD_CANCEL",
+ .op_rsize_bop = nfsd4_only_status_rsize,
+ },
};
/**
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 9c247fa..cdc75ad 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -59,8 +59,13 @@
void (*remove)(struct nfs4_client *);
int (*check)(struct nfs4_client *);
void (*grace_done)(struct nfsd_net *);
+ uint8_t version;
+ size_t msglen;
};
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
+
/* Globals */
static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
@@ -126,7 +131,6 @@
SHASH_DESC_ON_STACK(desc, tfm);
desc->tfm = tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
status = crypto_shash_digest(desc, clname->data, clname->len,
cksum.data);
@@ -170,12 +174,34 @@
}
static void
+__nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
+ const char *dname, int len, struct nfsd_net *nn)
+{
+ struct xdr_netobj name;
+ struct xdr_netobj princhash = { .len = 0, .data = NULL };
+ struct nfs4_client_reclaim *crp;
+
+ name.data = kmemdup(dname, len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return;
+ }
+ name.len = len;
+ crp = nfs4_client_to_reclaim(name, princhash, nn);
+ if (!crp) {
+ kfree(name.data);
+ return;
+ }
+ crp->cr_clp = clp;
+}
+
+static void
nfsd4_create_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
char dname[HEXDIR_LEN];
struct dentry *dir, *dentry;
- struct nfs4_client_reclaim *crp;
int status;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -221,11 +247,9 @@
out_unlock:
inode_unlock(d_inode(dir));
if (status == 0) {
- if (nn->in_grace) {
- crp = nfs4_client_to_reclaim(dname, nn);
- if (crp)
- crp->cr_clp = clp;
- }
+ if (nn->in_grace)
+ __nfsd4_create_reclaim_record_grace(clp, dname,
+ HEXDIR_LEN, nn);
vfs_fsync(nn->rec_file, 0);
} else {
printk(KERN_ERR "NFSD: failed to write recovery record"
@@ -346,10 +370,29 @@
}
static void
+__nfsd4_remove_reclaim_record_grace(const char *dname, int len,
+ struct nfsd_net *nn)
+{
+ struct xdr_netobj name;
+ struct nfs4_client_reclaim *crp;
+
+ name.data = kmemdup(dname, len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return;
+ }
+ name.len = len;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ nfs4_remove_reclaim_record(crp, nn);
+}
+
+static void
nfsd4_remove_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
- struct nfs4_client_reclaim *crp;
char dname[HEXDIR_LEN];
int status;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -374,12 +417,9 @@
nfs4_reset_creds(original_cred);
if (status == 0) {
vfs_fsync(nn->rec_file, 0);
- if (nn->in_grace) {
- /* remove reclaim record */
- crp = nfsd4_find_reclaim_client(dname, nn);
- if (crp)
- nfs4_remove_reclaim_record(crp, nn);
- }
+ if (nn->in_grace)
+ __nfsd4_remove_reclaim_record_grace(dname,
+ HEXDIR_LEN, nn);
}
out_drop_write:
mnt_drop_write_file(nn->rec_file);
@@ -393,14 +433,31 @@
purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
{
int status;
+ struct xdr_netobj name;
- if (nfs4_has_reclaimed_state(child->d_name.name, nn))
+ if (child->d_name.len != HEXDIR_LEN - 1) {
+ printk("%s: illegal name %pd in recovery directory\n",
+ __func__, child);
+ /* Keep trying; maybe the others are OK: */
return 0;
+ }
+ name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out;
+ }
+ name.len = HEXDIR_LEN;
+ if (nfs4_has_reclaimed_state(name, nn))
+ goto out_free;
status = vfs_rmdir(d_inode(parent), child);
if (status)
printk("failed to remove client recovery directory %pd\n",
child);
+out_free:
+ kfree(name.data);
+out:
/* Keep trying, success or failure: */
return 0;
}
@@ -430,13 +487,25 @@
static int
load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
{
+ struct xdr_netobj name;
+ struct xdr_netobj princhash = { .len = 0, .data = NULL };
+
if (child->d_name.len != HEXDIR_LEN - 1) {
- printk("nfsd4: illegal name %pd in recovery directory\n",
- child);
+ printk("%s: illegal name %pd in recovery directory\n",
+ __func__, child);
/* Keep trying; maybe the others are OK: */
return 0;
}
- nfs4_client_to_reclaim(child->d_name.name, nn);
+ name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out;
+ }
+ name.len = HEXDIR_LEN;
+ if (!nfs4_client_to_reclaim(name, princhash, nn))
+ kfree(name.data);
+out:
return 0;
}
@@ -565,6 +634,7 @@
status = nfsd4_load_reboot_recovery_data(net);
if (status)
goto err;
+ printk("NFSD: Using legacy client tracking operations.\n");
return 0;
err:
@@ -616,6 +686,7 @@
char dname[HEXDIR_LEN];
struct nfs4_client_reclaim *crp;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct xdr_netobj name;
/* did we already find that this client is stable? */
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
@@ -628,13 +699,22 @@
}
/* look for it in the reclaim hashtable otherwise */
- crp = nfsd4_find_reclaim_client(dname, nn);
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out_enoent;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
if (crp) {
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
crp->cr_clp = clp;
return 0;
}
+out_enoent:
return -ENOENT;
}
@@ -645,6 +725,8 @@
.remove = nfsd4_remove_clid_dir,
.check = nfsd4_check_legacy_client,
.grace_done = nfsd4_recdir_purge_old,
+ .version = 1,
+ .msglen = 0,
};
/* Globals */
@@ -657,37 +739,40 @@
spinlock_t cn_lock;
struct list_head cn_list;
unsigned int cn_xid;
+ bool cn_has_legacy;
+ struct crypto_shash *cn_tfm;
};
struct cld_upcall {
struct list_head cu_list;
struct cld_net *cu_net;
- struct task_struct *cu_task;
- struct cld_msg cu_msg;
+ struct completion cu_done;
+ union {
+ struct cld_msg_hdr cu_hdr;
+ struct cld_msg cu_msg;
+ struct cld_msg_v2 cu_msg_v2;
+ } cu_u;
};
static int
-__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
{
int ret;
struct rpc_pipe_msg msg;
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
+ struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
+ nfsd_net_id);
memset(&msg, 0, sizeof(msg));
msg.data = cmsg;
- msg.len = sizeof(*cmsg);
+ msg.len = nn->client_tracking_ops->msglen;
- /*
- * Set task state before we queue the upcall. That prevents
- * wake_up_process in the downcall from racing with schedule.
- */
- set_current_state(TASK_UNINTERRUPTIBLE);
ret = rpc_queue_upcall(pipe, &msg);
if (ret < 0) {
- set_current_state(TASK_RUNNING);
goto out;
}
- schedule();
+ wait_for_completion(&cup->cu_done);
if (msg.errno < 0)
ret = msg.errno;
@@ -696,7 +781,7 @@
}
static int
-cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
{
int ret;
@@ -712,34 +797,107 @@
}
static ssize_t
+__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ struct nfsd_net *nn)
+{
+ uint8_t cmd, princhashlen;
+ struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
+ uint16_t namelen;
+ struct cld_net *cn = nn->cld_net;
+
+ if (get_user(cmd, &cmsg->cm_cmd)) {
+ dprintk("%s: error when copying cmd from userspace", __func__);
+ return -EFAULT;
+ }
+ if (cmd == Cld_GraceStart) {
+ if (nn->client_tracking_ops->version >= 2) {
+ const struct cld_clntinfo __user *ci;
+
+ ci = &cmsg->cm_u.cm_clntinfo;
+ if (get_user(namelen, &ci->cc_name.cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+ if (IS_ERR_OR_NULL(name.data))
+ return -EFAULT;
+ name.len = namelen;
+ get_user(princhashlen, &ci->cc_princhash.cp_len);
+ if (princhashlen > 0) {
+ princhash.data = memdup_user(
+ &ci->cc_princhash.cp_data,
+ princhashlen);
+ if (IS_ERR_OR_NULL(princhash.data))
+ return -EFAULT;
+ princhash.len = princhashlen;
+ } else
+ princhash.len = 0;
+ } else {
+ const struct cld_name __user *cnm;
+
+ cnm = &cmsg->cm_u.cm_name;
+ if (get_user(namelen, &cnm->cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&cnm->cn_id, namelen);
+ if (IS_ERR_OR_NULL(name.data))
+ return -EFAULT;
+ name.len = namelen;
+ }
+ if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
+ name.len = name.len - 5;
+ memmove(name.data, name.data + 5, name.len);
+ cn->cn_has_legacy = true;
+ }
+ if (!nfs4_client_to_reclaim(name, princhash, nn)) {
+ kfree(name.data);
+ kfree(princhash.data);
+ return -EFAULT;
+ }
+ return nn->client_tracking_ops->msglen;
+ }
+ return -EFAULT;
+}
+
+static ssize_t
cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct cld_upcall *tmp, *cup;
- struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
+ struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
+ struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
uint32_t xid;
struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
nfsd_net_id);
struct cld_net *cn = nn->cld_net;
+ int16_t status;
- if (mlen != sizeof(*cmsg)) {
+ if (mlen != nn->client_tracking_ops->msglen) {
dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
- sizeof(*cmsg));
+ nn->client_tracking_ops->msglen);
return -EINVAL;
}
/* copy just the xid so we can try to find that */
- if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+ if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
dprintk("%s: error when copying xid from userspace", __func__);
return -EFAULT;
}
+ /*
+ * copy the status so we know whether to remove the upcall from the
+ * list (for -EINPROGRESS, we just want to make sure the xid is
+ * valid, not remove the upcall from the list)
+ */
+ if (get_user(status, &hdr->cm_status)) {
+ dprintk("%s: error when copying status from userspace", __func__);
+ return -EFAULT;
+ }
+
/* walk the list and find corresponding xid */
cup = NULL;
spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) {
- if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+ if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
cup = tmp;
- list_del_init(&cup->cu_list);
+ if (status != -EINPROGRESS)
+ list_del_init(&cup->cu_list);
break;
}
}
@@ -751,10 +909,13 @@
return -EINVAL;
}
- if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+ if (status == -EINPROGRESS)
+ return __cld_pipe_inprogress_downcall(cmsg, nn);
+
+ if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
return -EFAULT;
- wake_up_process(cup->cu_task);
+ complete(&cup->cu_done);
return mlen;
}
@@ -763,13 +924,13 @@
{
struct cld_msg *cmsg = msg->data;
struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
- cu_msg);
+ cu_u.cu_msg);
/* errno >= 0 means we got a downcall */
if (msg->errno >= 0)
return;
- wake_up_process(cup->cu_task);
+ complete(&cup->cu_done);
}
static const struct rpc_pipe_ops cld_upcall_ops = {
@@ -826,7 +987,7 @@
/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
static int
-nfsd4_init_cld_pipe(struct net *net)
+__nfsd4_init_cld_pipe(struct net *net)
{
int ret;
struct dentry *dentry;
@@ -857,6 +1018,7 @@
}
cn->cn_pipe->dentry = dentry;
+ cn->cn_has_legacy = false;
nn->cld_net = cn;
return 0;
@@ -869,6 +1031,17 @@
return ret;
}
+static int
+nfsd4_init_cld_pipe(struct net *net)
+{
+ int status;
+
+ status = __nfsd4_init_cld_pipe(net);
+ if (!status)
+ printk("NFSD: Using old nfsdcld client tracking operations.\n");
+ return status;
+}
+
static void
nfsd4_remove_cld_pipe(struct net *net)
{
@@ -877,14 +1050,17 @@
nfsd4_cld_unregister_net(net, cn->cn_pipe);
rpc_destroy_pipe_data(cn->cn_pipe);
+ if (cn->cn_tfm)
+ crypto_free_shash(cn->cn_tfm);
kfree(nn->cld_net);
nn->cld_net = NULL;
}
static struct cld_upcall *
-alloc_cld_upcall(struct cld_net *cn)
+alloc_cld_upcall(struct nfsd_net *nn)
{
struct cld_upcall *new, *tmp;
+ struct cld_net *cn = nn->cld_net;
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
@@ -894,20 +1070,20 @@
restart_search:
spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) {
- if (tmp->cu_msg.cm_xid == cn->cn_xid) {
+ if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
cn->cn_xid++;
spin_unlock(&cn->cn_lock);
goto restart_search;
}
}
- new->cu_task = current;
- new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
- put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
+ init_completion(&new->cu_done);
+ new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
+ put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
new->cu_net = cn;
list_add(&new->cu_list, &cn->cn_list);
spin_unlock(&cn->cn_lock);
- dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+ dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
return new;
}
@@ -936,20 +1112,20 @@
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_Create;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Create;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -962,6 +1138,75 @@
/* Ask daemon to create a new record */
static void
+nfsd4_cld_create_v2(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ struct cld_msg_v2 *cmsg;
+ struct crypto_shash *tfm = cn->cn_tfm;
+ struct xdr_netobj cksum;
+ char *principal = NULL;
+ SHASH_DESC_ON_STACK(desc, tfm);
+
+ /* Don't upcall if it's already stored */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cmsg = &cup->cu_u.cu_msg_v2;
+ cmsg->cm_cmd = Cld_Create;
+ cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
+ memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+ if (clp->cl_cred.cr_raw_principal)
+ principal = clp->cl_cred.cr_raw_principal;
+ else if (clp->cl_cred.cr_principal)
+ principal = clp->cl_cred.cr_principal;
+ if (principal) {
+ desc->tfm = tfm;
+ cksum.len = crypto_shash_digestsize(tfm);
+ cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+ if (cksum.data == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = crypto_shash_digest(desc, principal, strlen(principal),
+ cksum.data);
+ shash_desc_zero(desc);
+ if (ret) {
+ kfree(cksum.data);
+ goto out;
+ }
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
+ memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+ cksum.data, cksum.len);
+ kfree(cksum.data);
+ } else
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
+
+ ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
+ if (!ret) {
+ ret = cmsg->cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+out:
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ pr_err("NFSD: Unable to create client record on stable storage: %d\n",
+ ret);
+}
+
+/* Ask daemon to create a new record */
+static void
nfsd4_cld_remove(struct nfs4_client *clp)
{
int ret;
@@ -973,20 +1218,20 @@
if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_Remove;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -997,9 +1242,14 @@
"record from stable storage: %d\n", ret);
}
-/* Check for presence of a record, and update its timestamp */
+/*
+ * For older nfsdcld's that do not allow us to "slurp" the clients
+ * from the tracking database during startup.
+ *
+ * Check for presence of a record, and update its timestamp
+ */
static int
-nfsd4_cld_check(struct nfs4_client *clp)
+nfsd4_cld_check_v0(struct nfs4_client *clp)
{
int ret;
struct cld_upcall *cup;
@@ -1010,21 +1260,21 @@
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return 0;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
printk(KERN_ERR "NFSD: Unable to check client record on "
"stable storage: %d\n", -ENOMEM);
return -ENOMEM;
}
- cup->cu_msg.cm_cmd = Cld_Check;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Check;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -1032,24 +1282,173 @@
return ret;
}
-static void
-nfsd4_cld_grace_done(struct nfsd_net *nn)
+/*
+ * For newer nfsdcld's that allow us to "slurp" the clients
+ * from the tracking database during startup.
+ *
+ * Check for presence of a record in the reclaim_str_hashtbl
+ */
+static int
+nfsd4_cld_check(struct nfs4_client *clp)
+{
+ struct nfs4_client_reclaim *crp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ int status;
+ char dname[HEXDIR_LEN];
+ struct xdr_netobj name;
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+ if (crp)
+ goto found;
+
+ if (cn->cn_has_legacy) {
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status)
+ return -ENOENT;
+
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return -ENOENT;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ goto found;
+
+ }
+ return -ENOENT;
+found:
+ crp->cr_clp = clp;
+ return 0;
+}
+
+static int
+nfsd4_cld_check_v2(struct nfs4_client *clp)
+{
+ struct nfs4_client_reclaim *crp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ int status;
+ char dname[HEXDIR_LEN];
+ struct xdr_netobj name;
+ struct crypto_shash *tfm = cn->cn_tfm;
+ struct xdr_netobj cksum;
+ char *principal = NULL;
+ SHASH_DESC_ON_STACK(desc, tfm);
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+ if (crp)
+ goto found;
+
+ if (cn->cn_has_legacy) {
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status)
+ return -ENOENT;
+
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data\n",
+ __func__);
+ return -ENOENT;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ goto found;
+
+ }
+ return -ENOENT;
+found:
+ if (crp->cr_princhash.len) {
+ if (clp->cl_cred.cr_raw_principal)
+ principal = clp->cl_cred.cr_raw_principal;
+ else if (clp->cl_cred.cr_principal)
+ principal = clp->cl_cred.cr_principal;
+ if (principal == NULL)
+ return -ENOENT;
+ desc->tfm = tfm;
+ cksum.len = crypto_shash_digestsize(tfm);
+ cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+ if (cksum.data == NULL)
+ return -ENOENT;
+ status = crypto_shash_digest(desc, principal, strlen(principal),
+ cksum.data);
+ shash_desc_zero(desc);
+ if (status) {
+ kfree(cksum.data);
+ return -ENOENT;
+ }
+ if (memcmp(crp->cr_princhash.data, cksum.data,
+ crp->cr_princhash.len)) {
+ kfree(cksum.data);
+ return -ENOENT;
+ }
+ kfree(cksum.data);
+ }
+ crp->cr_clp = clp;
+ return 0;
+}
+
+static int
+nfsd4_cld_grace_start(struct nfsd_net *nn)
{
int ret;
struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_GraceDone;
- cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret)
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ dprintk("%s: Unable to get clients from userspace: %d\n",
+ __func__, ret);
+ return ret;
+}
+
+/* For older nfsdcld's that need cm_gracetime */
+static void
+nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+ cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
+ if (!ret)
+ ret = cup->cu_u.cu_msg.cm_status;
free_cld_upcall(cup);
out_err:
@@ -1057,13 +1456,223 @@
printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
}
-static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+/*
+ * For newer nfsdcld's that do not need cm_gracetime. We also need to call
+ * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl.
+ */
+static void
+nfsd4_cld_grace_done(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
+ if (!ret)
+ ret = cup->cu_u.cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ nfs4_release_reclaim(nn);
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
+}
+
+static int
+nfs4_cld_state_init(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int i;
+
+ nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
+ sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!nn->reclaim_str_hashtbl)
+ return -ENOMEM;
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
+ nn->reclaim_str_hashtbl_size = 0;
+ nn->track_reclaim_completes = true;
+ atomic_set(&nn->nr_reclaim_complete, 0);
+
+ return 0;
+}
+
+static void
+nfs4_cld_state_shutdown(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nn->track_reclaim_completes = false;
+ kfree(nn->reclaim_str_hashtbl);
+}
+
+static bool
+cld_running(struct nfsd_net *nn)
+{
+ struct cld_net *cn = nn->cld_net;
+ struct rpc_pipe *pipe = cn->cn_pipe;
+
+ return pipe->nreaders || pipe->nwriters;
+}
+
+static int
+nfsd4_cld_get_version(struct nfsd_net *nn)
+{
+ int ret = 0;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+ uint8_t version;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
+ if (!ret) {
+ ret = cup->cu_u.cu_msg.cm_status;
+ if (ret)
+ goto out_free;
+ version = cup->cu_u.cu_msg.cm_u.cm_version;
+ dprintk("%s: userspace returned version %u\n",
+ __func__, version);
+ if (version < 1)
+ version = 1;
+ else if (version > CLD_UPCALL_VERSION)
+ version = CLD_UPCALL_VERSION;
+
+ switch (version) {
+ case 1:
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+ break;
+ case 2:
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
+ break;
+ default:
+ break;
+ }
+ }
+out_free:
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ dprintk("%s: Unable to get version from userspace: %d\n",
+ __func__, ret);
+ return ret;
+}
+
+static int
+nfsd4_cld_tracking_init(struct net *net)
+{
+ int status;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool running;
+ int retries = 10;
+
+ status = nfs4_cld_state_init(net);
+ if (status)
+ return status;
+
+ status = __nfsd4_init_cld_pipe(net);
+ if (status)
+ goto err_shutdown;
+ nn->cld_net->cn_tfm = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(nn->cld_net->cn_tfm)) {
+ status = PTR_ERR(nn->cld_net->cn_tfm);
+ goto err_remove;
+ }
+
+ /*
+ * rpc pipe upcalls take 30 seconds to time out, so we don't want to
+ * queue an upcall unless we know that nfsdcld is running (because we
+ * want this to fail fast so that nfsd4_client_tracking_init() can try
+ * the next client tracking method). nfsdcld should already be running
+ * before nfsd is started, so the wait here is for nfsdcld to open the
+ * pipefs file we just created.
+ */
+ while (!(running = cld_running(nn)) && retries--)
+ msleep(100);
+
+ if (!running) {
+ status = -ETIMEDOUT;
+ goto err_remove;
+ }
+
+ status = nfsd4_cld_get_version(nn);
+ if (status == -EOPNOTSUPP)
+ pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
+
+ status = nfsd4_cld_grace_start(nn);
+ if (status) {
+ if (status == -EOPNOTSUPP)
+ pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
+ nfs4_release_reclaim(nn);
+ goto err_remove;
+ } else
+ printk("NFSD: Using nfsdcld client tracking operations.\n");
+ return 0;
+
+err_remove:
+ nfsd4_remove_cld_pipe(net);
+err_shutdown:
+ nfs4_cld_state_shutdown(net);
+ return status;
+}
+
+static void
+nfsd4_cld_tracking_exit(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs4_release_reclaim(nn);
+ nfsd4_remove_cld_pipe(net);
+ nfs4_cld_state_shutdown(net);
+}
+
+/* For older nfsdcld's */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
.init = nfsd4_init_cld_pipe,
.exit = nfsd4_remove_cld_pipe,
.create = nfsd4_cld_create,
.remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check_v0,
+ .grace_done = nfsd4_cld_grace_done_v0,
+ .version = 1,
+ .msglen = sizeof(struct cld_msg),
+};
+
+/* For newer nfsdcld's */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+ .init = nfsd4_cld_tracking_init,
+ .exit = nfsd4_cld_tracking_exit,
+ .create = nfsd4_cld_create,
+ .remove = nfsd4_cld_remove,
.check = nfsd4_cld_check,
.grace_done = nfsd4_cld_grace_done,
+ .version = 1,
+ .msglen = sizeof(struct cld_msg),
+};
+
+/* v2 create/check ops include the principal, if available */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
+ .init = nfsd4_cld_tracking_init,
+ .exit = nfsd4_cld_tracking_exit,
+ .create = nfsd4_cld_create_v2,
+ .remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check_v2,
+ .grace_done = nfsd4_cld_grace_done,
+ .version = 2,
+ .msglen = sizeof(struct cld_msg_v2),
};
/* upcall via usermodehelper */
@@ -1273,6 +1882,8 @@
ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
kfree(grace_start);
+ if (!ret)
+ printk("NFSD: Using UMH upcall client tracking operations.\n");
return ret;
}
@@ -1409,6 +2020,8 @@
.remove = nfsd4_umh_cltrack_remove,
.check = nfsd4_umh_cltrack_check,
.grace_done = nfsd4_umh_cltrack_grace_done,
+ .version = 1,
+ .msglen = 0,
};
int
@@ -1422,9 +2035,20 @@
if (nn->client_tracking_ops)
goto do_init;
+ /* First, try to use nfsdcld */
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+ if (status != -ETIMEDOUT) {
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0;
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+ }
+
/*
- * First, try a UMH upcall. It should succeed or fail quickly, so
- * there's little harm in trying that first.
+ * Next, try the UMH upcall.
*/
nn->client_tracking_ops = &nfsd4_umh_tracking_ops;
status = nn->client_tracking_ops->init(net);
@@ -1432,25 +2056,23 @@
return status;
/*
- * See if the recoverydir exists and is a directory. If it is,
- * then use the legacy ops.
+ * Finally, See if the recoverydir exists and is a directory.
+ * If it is, then use the legacy ops.
*/
nn->client_tracking_ops = &nfsd4_legacy_tracking_ops;
status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
if (!status) {
status = d_is_dir(path.dentry);
path_put(&path);
- if (status)
- goto do_init;
+ if (!status) {
+ status = -EINVAL;
+ goto out;
+ }
}
- /* Finally, try to use nfsdcld */
- nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
- printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be "
- "removed in 3.10. Please transition to using "
- "nfsdcltrack.\n");
do_init:
status = nn->client_tracking_ops->init(net);
+out:
if (status) {
printk(KERN_WARNING "NFSD: Unable to initialize client "
"recovery tracking! (%d)\n", status);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9c6d1d5..c65aeaa 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -42,6 +42,7 @@
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/addr.h>
#include <linux/jhash.h>
+#include <linux/string_helpers.h>
#include "xdr4.h"
#include "xdr4cb.h"
#include "vfs.h"
@@ -49,6 +50,7 @@
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -77,6 +79,7 @@
/* forward declarations */
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
+void nfsd4_end_grace(struct nfsd_net *nn);
/* Locking: */
@@ -98,6 +101,13 @@
*/
static DECLARE_WAIT_QUEUE_HEAD(close_wq);
+/*
+ * A waitqueue where a writer to clients/#/ctl destroying a client can
+ * wait for cl_rpc_users to drop to 0 and then for the client to be
+ * unhashed.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(expiry_wq);
+
static struct kmem_cache *client_slab;
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
@@ -137,7 +147,7 @@
if (is_client_expired(clp))
return nfserr_expired;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
return nfs_ok;
}
@@ -169,20 +179,24 @@
lockdep_assert_held(&nn->client_lock);
- if (!atomic_dec_and_test(&clp->cl_refcount))
+ if (!atomic_dec_and_test(&clp->cl_rpc_users))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
}
static void put_client_renew(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
+ if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
spin_unlock(&nn->client_lock);
}
@@ -238,7 +252,7 @@
}
spin_unlock(&nn->blocked_locks_lock);
if (found)
- posix_unblock_lock(&found->nbl_lock);
+ locks_delete_block(&found->nbl_lock);
return found;
}
@@ -265,6 +279,7 @@
static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
+ locks_delete_block(&nbl->nbl_lock);
locks_release_private(&nbl->nbl_lock);
kfree(nbl);
}
@@ -293,11 +308,18 @@
nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
nbl_lru);
list_del_init(&nbl->nbl_lru);
- posix_unblock_lock(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
}
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+ struct nfsd4_blocked_lock *nbl = container_of(cb,
+ struct nfsd4_blocked_lock, nbl_cb);
+ locks_delete_block(&nbl->nbl_lock);
+}
+
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
@@ -325,6 +347,7 @@
}
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ .prepare = nfsd4_cb_notify_lock_prepare,
.done = nfsd4_cb_notify_lock_done,
.release = nfsd4_cb_notify_lock_release,
};
@@ -407,18 +430,18 @@
}
}
-static struct file *
+static struct nfsd_file *
__nfs4_get_fd(struct nfs4_file *f, int oflag)
{
if (f->fi_fds[oflag])
- return get_file(f->fi_fds[oflag]);
+ return nfsd_file_get(f->fi_fds[oflag]);
return NULL;
}
-static struct file *
+static struct nfsd_file *
find_writeable_file_locked(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
@@ -428,10 +451,10 @@
return ret;
}
-static struct file *
+static struct nfsd_file *
find_writeable_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = find_writeable_file_locked(f);
@@ -440,9 +463,10 @@
return ret;
}
-static struct file *find_readable_file_locked(struct nfs4_file *f)
+static struct nfsd_file *
+find_readable_file_locked(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
@@ -452,10 +476,10 @@
return ret;
}
-static struct file *
+static struct nfsd_file *
find_readable_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = find_readable_file_locked(f);
@@ -464,10 +488,10 @@
return ret;
}
-struct file *
+struct nfsd_file *
find_any_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = __nfs4_get_fd(f, O_RDWR);
@@ -568,17 +592,17 @@
might_lock(&fp->fi_lock);
if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
- struct file *f1 = NULL;
- struct file *f2 = NULL;
+ struct nfsd_file *f1 = NULL;
+ struct nfsd_file *f2 = NULL;
swap(f1, fp->fi_fds[oflag]);
if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
swap(f2, fp->fi_fds[O_RDWR]);
spin_unlock(&fp->fi_lock);
if (f1)
- fput(f1);
+ nfsd_file_put(f1);
if (f2)
- fput(f2);
+ nfsd_file_put(f2);
}
}
@@ -684,7 +708,8 @@
idr_preload(GFP_KERNEL);
spin_lock(&cl->cl_lock);
- new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
+ /* Reserving 0 for start of file in nfsdfs "states" file: */
+ new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT);
spin_unlock(&cl->cl_lock);
idr_preload_end();
if (new_id < 0)
@@ -713,6 +738,36 @@
return NULL;
}
+/*
+ * Create a unique stateid_t to represent each COPY.
+ */
+int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+{
+ int new_id;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&nn->s2s_cp_lock);
+ new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
+ spin_unlock(&nn->s2s_cp_lock);
+ idr_preload_end();
+ if (new_id < 0)
+ return 0;
+ copy->cp_stateid.si_opaque.so_id = new_id;
+ copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
+ copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+ return 1;
+}
+
+void nfs4_free_cp_state(struct nfsd4_copy *copy)
+{
+ struct nfsd_net *nn;
+
+ nn = net_generic(copy->cp_clp->net, nfsd_net_id);
+ spin_lock(&nn->s2s_cp_lock);
+ idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
+ spin_unlock(&nn->s2s_cp_lock);
+}
+
static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
{
struct nfs4_stid *stid;
@@ -880,25 +935,25 @@
static void put_deleg_file(struct nfs4_file *fp)
{
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
spin_lock(&fp->fi_lock);
if (--fp->fi_delegees == 0)
- swap(filp, fp->fi_deleg_file);
+ swap(nf, fp->fi_deleg_file);
spin_unlock(&fp->fi_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
}
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
- struct file *filp = fp->fi_deleg_file;
+ struct nfsd_file *nf = fp->fi_deleg_file;
WARN_ON_ONCE(!fp->fi_delegees);
- vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
+ vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
put_deleg_file(fp);
}
@@ -1028,9 +1083,9 @@
return id & CLIENT_HASH_MASK;
}
-static unsigned int clientstr_hashval(const char *name)
+static unsigned int clientstr_hashval(struct xdr_netobj name)
{
- return opaque_hashval(name, 8) & CLIENT_HASH_MASK;
+ return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK;
}
/*
@@ -1236,11 +1291,14 @@
{
struct nfs4_ol_stateid *stp = openlockstateid(stid);
struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
- struct file *file;
+ struct nfsd_file *nf;
- file = find_any_file(stp->st_stid.sc_file);
- if (file)
- filp_close(file, (fl_owner_t)lo);
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (nf) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file, (fl_owner_t)lo);
+ nfsd_file_put(nf);
+ }
nfs4_free_ol_stateid(stid);
}
@@ -1510,21 +1568,39 @@
* re-negotiate active sessions and reduce their slot usage to make
* room for new connections. For now we just fail the create session.
*/
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
+static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
{
u32 slotsize = slot_bytes(ca);
u32 num = ca->maxreqs;
- int avail;
+ unsigned long avail, total_avail;
+ unsigned int scale_factor;
spin_lock(&nfsd_drc_lock);
- avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
- nfsd_drc_max_mem - nfsd_drc_mem_used);
+ if (nfsd_drc_max_mem > nfsd_drc_mem_used)
+ total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+ else
+ /* We have handed out more space than we chose in
+ * set_max_drc() to allow. That isn't really a
+ * problem as long as that doesn't make us think we
+ * have lots more due to integer overflow.
+ */
+ total_avail = 0;
+ avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
/*
- * Never use more than a third of the remaining memory,
- * unless it's the only way to give this client a slot:
+ * Never use more than a fraction of the remaining memory,
+ * unless it's the only way to give this client a slot.
+ * The chosen fraction is either 1/8 or 1/number of threads,
+ * whichever is smaller. This ensures there are adequate
+ * slots to support multiple clients per thread.
+ * Give the client one slot even if that would require
+ * over-allocation--it is better than failure.
*/
- avail = clamp_t(int, avail, slotsize, avail/3);
+ scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
+
+ avail = clamp_t(unsigned long, avail, slotsize,
+ total_avail/scale_factor);
num = min_t(int, num, avail / slotsize);
+ num = max_t(int, num, 1);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
@@ -1804,7 +1880,7 @@
clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
- clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
+ xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL);
if (clp->cl_name.data == NULL)
goto err_no_name;
clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE,
@@ -1814,10 +1890,9 @@
goto err_no_hashtbl;
for (i = 0; i < OWNER_HASH_SIZE; i++)
INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
- clp->cl_name.len = name.len;
INIT_LIST_HEAD(&clp->cl_sessions);
idr_init(&clp->cl_stateids);
- atomic_set(&clp->cl_refcount, 0);
+ atomic_set(&clp->cl_rpc_users, 0);
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_openowners);
@@ -1827,6 +1902,8 @@
#ifdef CONFIG_NFSD_PNFS
INIT_LIST_HEAD(&clp->cl_lo_states);
#endif
+ INIT_LIST_HEAD(&clp->async_copies);
+ spin_lock_init(&clp->async_lock);
spin_lock_init(&clp->cl_lock);
rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
return clp;
@@ -1837,6 +1914,25 @@
return NULL;
}
+static void __free_client(struct kref *k)
+{
+ struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref);
+ struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
+
+ free_svc_cred(&clp->cl_cred);
+ kfree(clp->cl_ownerstr_hashtbl);
+ kfree(clp->cl_name.data);
+ kfree(clp->cl_nii_domain.data);
+ kfree(clp->cl_nii_name.data);
+ idr_destroy(&clp->cl_stateids);
+ kmem_cache_free(client_slab, clp);
+}
+
+static void drop_client(struct nfs4_client *clp)
+{
+ kref_put(&clp->cl_nfsdfs.cl_ref, __free_client);
+}
+
static void
free_client(struct nfs4_client *clp)
{
@@ -1849,11 +1945,12 @@
free_session(ses);
}
rpc_destroy_wait_queue(&clp->cl_cb_waitq);
- free_svc_cred(&clp->cl_cred);
- kfree(clp->cl_ownerstr_hashtbl);
- kfree(clp->cl_name.data);
- idr_destroy(&clp->cl_stateids);
- kmem_cache_free(client_slab, clp);
+ if (clp->cl_nfsd_dentry) {
+ nfsd_client_rmdir(clp->cl_nfsd_dentry);
+ clp->cl_nfsd_dentry = NULL;
+ wake_up_all(&expiry_wq);
+ }
+ drop_client(clp);
}
/* must be called under the client_lock */
@@ -1894,7 +1991,7 @@
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
- if (atomic_read(&clp->cl_refcount))
+ if (atomic_read(&clp->cl_rpc_users))
return nfserr_jukebox;
unhash_client_locked(clp);
return nfs_ok;
@@ -1942,10 +2039,12 @@
}
}
nfsd4_return_all_client_layouts(clp);
+ nfsd4_shutdown_copy(clp);
nfsd4_shutdown_callback(clp);
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
free_client(clp);
+ wake_up_all(&expiry_wq);
}
static void
@@ -1955,6 +2054,22 @@
__destroy_client(clp);
}
+static void inc_reclaim_complete(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ if (!nn->track_reclaim_completes)
+ return;
+ if (!nfsd4_find_reclaim_client(clp->cl_name, nn))
+ return;
+ if (atomic_inc_return(&nn->nr_reclaim_complete) ==
+ nn->reclaim_str_hashtbl_size) {
+ printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
+ clp->net->ns.inum);
+ nfsd4_end_grace(nn);
+ }
+}
+
static void expire_client(struct nfs4_client *clp)
{
unhash_client(clp);
@@ -2006,11 +2121,6 @@
return memcmp(o1->data, o2->data, o1->len);
}
-static int same_name(const char *n1, const char *n2)
-{
- return 0 == memcmp(n1, n2, HEXDIR_LEN);
-}
-
static int
same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
{
@@ -2145,6 +2255,342 @@
return s;
}
+static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+ nc = get_nfsdfs_client(inode);
+ if (!nc)
+ return NULL;
+ return container_of(nc, struct nfs4_client, cl_nfsdfs);
+}
+
+static void seq_quote_mem(struct seq_file *m, char *data, int len)
+{
+ seq_printf(m, "\"");
+ seq_escape_mem_ascii(m, data, len);
+ seq_printf(m, "\"");
+}
+
+static int client_info_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct nfs4_client *clp;
+ u64 clid;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+ memcpy(&clid, &clp->cl_clientid, sizeof(clid));
+ seq_printf(m, "clientid: 0x%llx\n", clid);
+ seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
+ seq_printf(m, "name: ");
+ seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
+ seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
+ if (clp->cl_nii_domain.data) {
+ seq_printf(m, "Implementation domain: ");
+ seq_quote_mem(m, clp->cl_nii_domain.data,
+ clp->cl_nii_domain.len);
+ seq_printf(m, "\nImplementation name: ");
+ seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
+ seq_printf(m, "\nImplementation time: [%ld, %ld]\n",
+ clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
+ }
+ drop_client(clp);
+
+ return 0;
+}
+
+static int client_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, client_info_show, inode);
+}
+
+static const struct file_operations client_info_fops = {
+ .open = client_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void *states_start(struct seq_file *s, loff_t *pos)
+ __acquires(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ spin_lock(&clp->cl_lock);
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void *states_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ id = *pos;
+ id++;
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void states_stop(struct seq_file *s, void *v)
+ __releases(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+
+ spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
+{
+ struct inode *inode = f->nf_inode;
+
+ seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev),
+ inode->i_ino);
+}
+
+static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
+{
+ seq_printf(s, "owner: ");
+ seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
+}
+
+static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct nfsd_file *file;
+ struct nfs4_stateowner *oo;
+ unsigned int access, deny;
+
+ if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
+ return 0; /* XXX: or SEQ_SKIP? */
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+ file = find_any_file(nf);
+
+ seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid);
+
+ access = bmap_to_share_mode(ols->st_access_bmap);
+ deny = bmap_to_share_mode(ols->st_deny_bmap);
+
+ seq_printf(s, "access: \%s\%s, ",
+ access & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+ seq_printf(s, "deny: \%s\%s, ",
+ deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+ nfsd_file_put(file);
+
+ return 0;
+}
+
+static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct nfsd_file *file;
+ struct nfs4_stateowner *oo;
+
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+ file = find_any_file(nf);
+
+ seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid);
+
+ /*
+ * Note: a lock stateid isn't really the same thing as a lock,
+ * it's the locking state held by one owner on a file, and there
+ * may be multiple (or no) lock ranges associated with it.
+ * (Same for the matter is true of open stateids.)
+ */
+
+ nfs4_show_superblock(s, file);
+ /* XXX: open stateid? */
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+ nfsd_file_put(file);
+
+ return 0;
+}
+
+static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_delegation *ds;
+ struct nfs4_file *nf;
+ struct nfsd_file *file;
+
+ ds = delegstateid(st);
+ nf = st->sc_file;
+ file = nf->fi_deleg_file;
+
+ seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid);
+
+ /* Kinda dead code as long as we only support read delegs: */
+ seq_printf(s, "access: %s, ",
+ ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+
+ /* XXX: lease time, whether it's being recalled. */
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, " }\n");
+
+ return 0;
+}
+
+static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_layout_stateid *ls;
+ struct nfsd_file *file;
+
+ ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
+ file = ls->ls_file;
+
+ seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid);
+
+ /* XXX: What else would be useful? */
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, " }\n");
+
+ return 0;
+}
+
+static int states_show(struct seq_file *s, void *v)
+{
+ struct nfs4_stid *st = v;
+
+ switch (st->sc_type) {
+ case NFS4_OPEN_STID:
+ return nfs4_show_open(s, st);
+ case NFS4_LOCK_STID:
+ return nfs4_show_lock(s, st);
+ case NFS4_DELEG_STID:
+ return nfs4_show_deleg(s, st);
+ case NFS4_LAYOUT_STID:
+ return nfs4_show_layout(s, st);
+ default:
+ return 0; /* XXX: or SEQ_SKIP? */
+ }
+ /* XXX: copy stateids? */
+}
+
+static struct seq_operations states_seq_ops = {
+ .start = states_start,
+ .next = states_next,
+ .stop = states_stop,
+ .show = states_show
+};
+
+static int client_states_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *s;
+ struct nfs4_client *clp;
+ int ret;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+
+ ret = seq_open(file, &states_seq_ops);
+ if (ret)
+ return ret;
+ s = file->private_data;
+ s->private = clp;
+ return 0;
+}
+
+static int client_opens_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct nfs4_client *clp = m->private;
+
+ /* XXX: alternatively, we could get/drop in seq start/stop */
+ drop_client(clp);
+ return 0;
+}
+
+static const struct file_operations client_states_fops = {
+ .open = client_states_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = client_opens_release,
+};
+
+/*
+ * Normally we refuse to destroy clients that are in use, but here the
+ * administrator is telling us to just do it. We also want to wait
+ * so the caller has a guarantee that the client's locks are gone by
+ * the time the write returns:
+ */
+static void force_expire_client(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ bool already_expired;
+
+ spin_lock(&clp->cl_lock);
+ clp->cl_time = 0;
+ spin_unlock(&clp->cl_lock);
+
+ wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
+ spin_lock(&nn->client_lock);
+ already_expired = list_empty(&clp->cl_lru);
+ if (!already_expired)
+ unhash_client_locked(clp);
+ spin_unlock(&nn->client_lock);
+
+ if (!already_expired)
+ expire_client(clp);
+ else
+ wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL);
+}
+
+static ssize_t client_ctl_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ char *data;
+ struct nfs4_client *clp;
+
+ data = simple_transaction_get(file, buf, size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ if (size != 7 || 0 != memcmp(data, "expire\n", 7))
+ return -EINVAL;
+ clp = get_nfsdfs_clp(file_inode(file));
+ if (!clp)
+ return -ENXIO;
+ force_expire_client(clp);
+ drop_client(clp);
+ return 7;
+}
+
+static const struct file_operations client_ctl_fops = {
+ .write = client_ctl_write,
+ .release = simple_transaction_release,
+};
+
+static const struct tree_descr client_files[] = {
+ [0] = {"info", &client_info_fops, S_IRUSR},
+ [1] = {"states", &client_states_fops, S_IRUSR},
+ [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR},
+ [3] = {""},
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2152,6 +2598,7 @@
struct sockaddr *sa = svc_addr(rqstp);
int ret;
struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
clp = alloc_client(name);
if (clp == NULL)
@@ -2162,13 +2609,22 @@
free_client(clp);
return NULL;
}
+ gen_clid(clp, nn);
+ kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
- rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
+ memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
clp->net = net;
+ clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ client_files);
+ if (!clp->cl_nfsd_dentry) {
+ free_client(clp);
+ return NULL;
+ }
return clp;
}
@@ -2475,7 +2931,24 @@
|| !list_empty(&clp->cl_lo_states)
#endif
|| !list_empty(&clp->cl_delegations)
- || !list_empty(&clp->cl_sessions);
+ || !list_empty(&clp->cl_sessions)
+ || !list_empty(&clp->async_copies);
+}
+
+static __be32 copy_impl_id(struct nfs4_client *clp,
+ struct nfsd4_exchange_id *exid)
+{
+ if (!exid->nii_domain.data)
+ return 0;
+ xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL);
+ if (!clp->cl_nii_domain.data)
+ return nfserr_jukebox;
+ xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
+ if (!clp->cl_nii_name.data)
+ return nfserr_jukebox;
+ clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec;
+ clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec;
+ return 0;
}
__be32
@@ -2504,6 +2977,9 @@
new = create_client(exid->clname, rqstp, &verf);
if (new == NULL)
return nfserr_jukebox;
+ status = copy_impl_id(new, exid);
+ if (status)
+ goto out_nolock;
switch (exid->spa_how) {
case SP4_MACH_CRED:
@@ -2542,6 +3018,7 @@
break;
default: /* checked by xdr code */
WARN_ON_ONCE(1);
+ /* fall through */
case SP4_SSV:
status = nfserr_encr_alg_unsupp;
goto out_nolock;
@@ -2611,7 +3088,6 @@
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
- gen_clid(new, nn);
add_to_unconfirmed(new);
swap(new, conf);
out_copy:
@@ -2716,10 +3192,10 @@
* performance. When short on memory we therefore prefer to
* decrease number of slots instead of their size. Clients that
* request larger slots than they need will get poor results:
+ * Note that we always allow at least one slot, because our
+ * accounting is soft and provides no guarantees either way.
*/
- ca->maxreqs = nfsd4_get_drc_mem(ca);
- if (!ca->maxreqs)
- return nfserr_jukebox;
+ ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
return nfs_ok;
}
@@ -3310,6 +3786,7 @@
status = nfs_ok;
nfsd4_client_record_create(cstate->session->se_client);
+ inc_reclaim_complete(cstate->session->se_client);
out:
return status;
}
@@ -3354,7 +3831,7 @@
copy_clid(new, conf);
gen_confirm(new, nn);
} else /* case 4 (new client) or cases 2, 3 (client reboot): */
- gen_clid(new, nn);
+ ;
new->cl_minorversion = 0;
gen_callback(new, setclid, rqstp);
add_to_unconfirmed(new);
@@ -3575,12 +4052,11 @@
if (!sop)
return NULL;
- sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
+ xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL);
if (!sop->so_owner.data) {
kmem_cache_free(slab, sop);
return NULL;
}
- sop->so_owner.len = owner->len;
INIT_LIST_HEAD(&sop->so_stateids);
sop->so_client = clp;
@@ -3914,6 +4390,9 @@
switch (task->tk_status) {
case 0:
return 1;
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/*
@@ -3926,7 +4405,7 @@
}
/*FALLTHRU*/
default:
- return -1;
+ return 1;
}
}
@@ -4032,7 +4511,7 @@
spin_unlock(&nn->client_lock);
return nfserr_expired;
}
- atomic_inc(&found->cl_refcount);
+ atomic_inc(&found->cl_rpc_users);
spin_unlock(&nn->client_lock);
/* Cache the nfs4_client in cstate! */
@@ -4195,7 +4674,7 @@
struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
struct nfsd4_open *open)
{
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
__be32 status;
int oflag = nfs4_access_to_omode(open->op_share_access);
int access = nfs4_access_to_access(open->op_share_access);
@@ -4231,18 +4710,18 @@
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
+ status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
if (status)
goto out_put_access;
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
- fp->fi_fds[oflag] = filp;
- filp = NULL;
+ fp->fi_fds[oflag] = nf;
+ nf = NULL;
}
}
spin_unlock(&fp->fi_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
status = nfsd4_truncate(rqstp, cur_fh, open);
if (status)
@@ -4311,7 +4790,7 @@
fl->fl_end = OFFSET_MAX;
fl->fl_owner = (fl_owner_t)dp;
fl->fl_pid = current->tgid;
- fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
+ fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
return fl;
}
@@ -4321,7 +4800,7 @@
{
int status = 0;
struct nfs4_delegation *dp;
- struct file *filp;
+ struct nfsd_file *nf;
struct file_lock *fl;
/*
@@ -4332,8 +4811,8 @@
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- filp = find_readable_file(fp);
- if (!filp) {
+ nf = find_readable_file(fp);
+ if (!nf) {
/* We should always have a readable file here */
WARN_ON_ONCE(1);
return ERR_PTR(-EBADF);
@@ -4343,17 +4822,17 @@
if (nfs4_delegation_exists(clp, fp))
status = -EAGAIN;
else if (!fp->fi_deleg_file) {
- fp->fi_deleg_file = filp;
+ fp->fi_deleg_file = nf;
/* increment early to prevent fi_deleg_file from being
* cleared */
fp->fi_delegees = 1;
- filp = NULL;
+ nf = NULL;
} else
fp->fi_delegees++;
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
if (status)
return ERR_PTR(status);
@@ -4366,7 +4845,7 @@
if (!fl)
goto out_clnt_odstate;
- status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
+ status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
if (fl)
locks_free_lock(fl);
if (status)
@@ -4386,7 +4865,7 @@
return dp;
out_unlock:
- vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp);
+ vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
out_clnt_odstate:
put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_stid(&dp->dl_stid);
@@ -4669,7 +5148,6 @@
if (nn->grace_ended)
return;
- dprintk("NFSD: end of grace period\n");
nn->grace_ended = true;
/*
* If the server goes down again right now, an NFSv4
@@ -4705,6 +5183,10 @@
unsigned long double_grace_period_end = nn->boot_time +
2 * nn->nfsd4_lease;
+ if (nn->track_reclaim_completes &&
+ atomic_read(&nn->nr_reclaim_complete) ==
+ nn->reclaim_str_hashtbl_size)
+ return false;
if (!nn->somebody_reclaimed)
return false;
nn->somebody_reclaimed = false;
@@ -4735,6 +5217,7 @@
new_timeo = 0;
goto out;
}
+ dprintk("NFSD: end of grace period\n");
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);
spin_lock(&nn->client_lock);
@@ -4829,7 +5312,6 @@
nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
- posix_unblock_lock(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
out:
@@ -5054,7 +5536,7 @@
return nfs_ok;
}
-static struct file *
+static struct nfsd_file *
nfs4_find_file(struct nfs4_stid *s, int flags)
{
if (!s)
@@ -5064,7 +5546,7 @@
case NFS4_DELEG_STID:
if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
return NULL;
- return get_file(s->sc_file->fi_deleg_file);
+ return nfsd_file_get(s->sc_file->fi_deleg_file);
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
if (flags & RD_STATE)
@@ -5078,7 +5560,7 @@
}
static __be32
-nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
+nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
{
__be32 status;
@@ -5090,32 +5572,28 @@
static __be32
nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
- struct file **filpp, bool *tmp_file, int flags)
+ struct nfsd_file **nfp, int flags)
{
int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
- struct file *file;
+ struct nfsd_file *nf;
__be32 status;
- file = nfs4_find_file(s, flags);
- if (file) {
+ nf = nfs4_find_file(s, flags);
+ if (nf) {
status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE);
if (status) {
- fput(file);
- return status;
+ nfsd_file_put(nf);
+ goto out;
}
-
- *filpp = file;
} else {
- status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+ status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
if (status)
return status;
-
- if (tmp_file)
- *tmp_file = true;
}
-
- return 0;
+ *nfp = nf;
+out:
+ return status;
}
/*
@@ -5124,7 +5602,7 @@
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
- stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
+ stateid_t *stateid, int flags, struct nfsd_file **nfp)
{
struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
@@ -5132,10 +5610,8 @@
struct nfs4_stid *s = NULL;
__be32 status;
- if (filpp)
- *filpp = NULL;
- if (tmp_file)
- *tmp_file = false;
+ if (nfp)
+ *nfp = NULL;
if (grace_disallows_io(net, ino))
return nfserr_grace;
@@ -5161,7 +5637,7 @@
break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
- status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
+ status = nfs4_check_olstateid(openlockstateid(s), flags);
break;
default:
status = nfserr_bad_stateid;
@@ -5172,8 +5648,8 @@
status = nfs4_check_fh(fhp, s);
done:
- if (!status && filpp)
- status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
+ if (status == nfs_ok && nfp)
+ status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
out:
if (s)
nfs4_put_stid(s);
@@ -5662,12 +6138,11 @@
if (fl->fl_lmops == &nfsd_posix_mng_ops) {
lo = (struct nfs4_lockowner *) fl->fl_owner;
- deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
- lo->lo_owner.so_owner.len, GFP_KERNEL);
+ xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner,
+ GFP_KERNEL);
if (!deny->ld_owner.data)
/* We just don't care that much */
goto nevermind;
- deny->ld_owner.len = lo->lo_owner.so_owner.len;
deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
} else {
nevermind:
@@ -5934,7 +6409,7 @@
struct nfs4_ol_stateid *lock_stp = NULL;
struct nfs4_ol_stateid *open_stp = NULL;
struct nfs4_file *fp;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
@@ -6016,8 +6491,8 @@
/* Fallthrough */
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
- filp = find_readable_file_locked(fp);
- if (filp)
+ nf = find_readable_file_locked(fp);
+ if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
spin_unlock(&fp->fi_lock);
fl_type = F_RDLCK;
@@ -6028,8 +6503,8 @@
/* Fallthrough */
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
- filp = find_writeable_file_locked(fp);
- if (filp)
+ nf = find_writeable_file_locked(fp);
+ if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
spin_unlock(&fp->fi_lock);
fl_type = F_WRLCK;
@@ -6039,7 +6514,7 @@
goto out;
}
- if (!filp) {
+ if (!nf) {
status = nfserr_openmode;
goto out;
}
@@ -6055,7 +6530,7 @@
file_lock->fl_type = fl_type;
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
file_lock->fl_pid = current->tgid;
- file_lock->fl_file = filp;
+ file_lock->fl_file = nf->nf_file;
file_lock->fl_flags = fl_flags;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = lock->lk_offset;
@@ -6077,7 +6552,7 @@
spin_unlock(&nn->blocked_locks_lock);
}
- err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
+ err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
switch (err) {
case 0: /* success! */
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
@@ -6112,8 +6587,8 @@
}
free_blocked_lock(nbl);
}
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
if (lock_stp) {
/* Bump seqid manually if the 4.0 replay owner is openowner */
if (cstate->replay_owner &&
@@ -6148,11 +6623,11 @@
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
- struct file *file;
- __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ struct nfsd_file *nf;
+ __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
if (!err) {
- err = nfserrno(vfs_test_lock(file, lock));
- fput(file);
+ err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ nfsd_file_put(nf);
}
return err;
}
@@ -6196,15 +6671,15 @@
case NFS4_READ_LT:
case NFS4_READW_LT:
file_lock->fl_type = F_RDLCK;
- break;
+ break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
file_lock->fl_type = F_WRLCK;
- break;
+ break;
default:
dprintk("NFSD: nfs4_lockt: bad lock type!\n");
status = nfserr_inval;
- goto out;
+ goto out;
}
lo = find_lockowner_str(cstate->clp, &lockt->lt_owner);
@@ -6240,7 +6715,7 @@
{
struct nfsd4_locku *locku = &u->locku;
struct nfs4_ol_stateid *stp;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
struct file_lock *file_lock = NULL;
__be32 status;
int err;
@@ -6258,8 +6733,8 @@
&stp, nn);
if (status)
goto out;
- filp = find_any_file(stp->st_stid.sc_file);
- if (!filp) {
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (!nf) {
status = nfserr_lock_range;
goto put_stateid;
}
@@ -6267,13 +6742,13 @@
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
status = nfserr_jukebox;
- goto fput;
+ goto put_file;
}
file_lock->fl_type = F_UNLCK;
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
file_lock->fl_pid = current->tgid;
- file_lock->fl_file = filp;
+ file_lock->fl_file = nf->nf_file;
file_lock->fl_flags = FL_POSIX;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = locku->lu_offset;
@@ -6282,14 +6757,14 @@
locku->lu_length);
nfs4_transform_lock_offset(file_lock);
- err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
+ err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
if (err) {
dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
goto out_nfserr;
}
nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
-fput:
- fput(filp);
+put_file:
+ nfsd_file_put(nf);
put_stateid:
mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
@@ -6301,7 +6776,7 @@
out_nfserr:
status = nfserrno(err);
- goto fput;
+ goto put_file;
}
/*
@@ -6314,17 +6789,17 @@
{
struct file_lock *fl;
int status = false;
- struct file *filp = find_any_file(fp);
+ struct nfsd_file *nf = find_any_file(fp);
struct inode *inode;
struct file_lock_context *flctx;
- if (!filp) {
+ if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
return status;
}
- inode = locks_inode(filp);
+ inode = locks_inode(nf->nf_file);
flctx = inode->i_flctx;
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -6337,7 +6812,7 @@
}
spin_unlock(&flctx->flc_lock);
}
- fput(filp);
+ nfsd_file_put(nf);
return status;
}
@@ -6415,7 +6890,7 @@
}
bool
-nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
+nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
{
struct nfs4_client_reclaim *crp;
@@ -6425,20 +6900,27 @@
/*
* failure => all reset bets are off, nfserr_no_grace...
+ *
+ * The caller is responsible for freeing name.data if NULL is returned (it
+ * will be freed in nfs4_remove_reclaim_record in the normal case).
*/
struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn)
+nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
+ struct nfsd_net *nn)
{
unsigned int strhashval;
struct nfs4_client_reclaim *crp;
- dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
+ dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data);
crp = alloc_reclaim();
if (crp) {
strhashval = clientstr_hashval(name);
INIT_LIST_HEAD(&crp->cr_strhash);
list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
- memcpy(crp->cr_recdir, name, HEXDIR_LEN);
+ crp->cr_name.data = name.data;
+ crp->cr_name.len = name.len;
+ crp->cr_princhash.data = princhash.data;
+ crp->cr_princhash.len = princhash.len;
crp->cr_clp = NULL;
nn->reclaim_str_hashtbl_size++;
}
@@ -6449,6 +6931,8 @@
nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
{
list_del(&crp->cr_strhash);
+ kfree(crp->cr_name.data);
+ kfree(crp->cr_princhash.data);
kfree(crp);
nn->reclaim_str_hashtbl_size--;
}
@@ -6472,16 +6956,16 @@
/*
* called from OPEN, CLAIM_PREVIOUS with a new clientid. */
struct nfs4_client_reclaim *
-nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
+nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn)
{
unsigned int strhashval;
struct nfs4_client_reclaim *crp = NULL;
- dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir);
+ dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data);
- strhashval = clientstr_hashval(recdir);
+ strhashval = clientstr_hashval(name);
list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
- if (same_name(crp->cr_recdir, recdir)) {
+ if (compare_blob(&crp->cr_name, &name) == 0) {
return crp;
}
}
@@ -6516,7 +7000,7 @@
static inline void
put_client(struct nfs4_client *clp)
{
- atomic_dec(&clp->cl_refcount);
+ atomic_dec(&clp->cl_rpc_users);
}
static struct nfs4_client *
@@ -6634,7 +7118,7 @@
return;
lockdep_assert_held(&nn->client_lock);
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
list_add(&lst->st_locks, collect);
}
@@ -6663,7 +7147,7 @@
* Despite the fact that these functions deal
* with 64-bit integers for "count", we must
* ensure that it doesn't blow up the
- * clp->cl_refcount. Throw a warning if we
+ * clp->cl_rpc_users. Throw a warning if we
* start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -6787,7 +7271,7 @@
if (func) {
func(oop);
if (collect) {
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
list_add(&oop->oo_perclient, collect);
}
}
@@ -6795,7 +7279,7 @@
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
+ * it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -6925,7 +7409,7 @@
if (dp->dl_time != 0)
continue;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, victims);
}
@@ -6933,7 +7417,7 @@
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
+ * it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -7160,6 +7644,8 @@
INIT_LIST_HEAD(&nn->close_lru);
INIT_LIST_HEAD(&nn->del_recall_lru);
spin_lock_init(&nn->client_lock);
+ spin_lock_init(&nn->s2s_cp_lock);
+ idr_init(&nn->s2s_cp_stateids);
spin_lock_init(&nn->blocked_locks_lock);
INIT_LIST_HEAD(&nn->blocked_locks_lru);
@@ -7217,10 +7703,19 @@
return ret;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
+ if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
+ goto skip_grace;
printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
nn->nfsd4_grace, net->ns.inum);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;
+
+skip_grace:
+ printk(KERN_INFO "NFSD: no clients to reclaim, skipping NFSv4 grace period (net %x)\n",
+ net->ns.inum);
+ queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_lease * HZ);
+ nfsd4_end_grace(nn);
+ return 0;
}
/* initialization to perform when the nfsd service is started: */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 418fa9c..533d0fc 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -49,6 +49,7 @@
#include "cache.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h>
@@ -203,6 +204,13 @@
return p;
}
+static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
+{
+ unsigned int this = (char *)argp->end - (char *)argp->p;
+
+ return this + argp->pagelen;
+}
+
static int zero_clientid(clientid_t *clid)
{
return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -211,10 +219,10 @@
/**
* svcxdr_tmpalloc - allocate memory to be freed after compound processing
* @argp: NFSv4 compound argument structure
- * @p: pointer to be freed (with kfree())
+ * @len: length of buffer to allocate
*
- * Marks @p to be freed when processing the compound operation
- * described in @argp finishes.
+ * Allocates a buffer of size @len to be freed when processing the compound
+ * operation described in @argp finishes.
*/
static void *
svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
@@ -269,19 +277,13 @@
return ret;
}
-/*
- * We require the high 32 bits of 'seconds' to be 0, and
- * we ignore all 32 bits of 'nseconds'.
- */
static __be32
-nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv)
+nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
{
DECODE_HEAD;
- u64 sec;
READ_BUF(12);
- p = xdr_decode_hyper(p, &sec);
- tv->tv_sec = sec;
+ p = xdr_decode_hyper(p, &tv->tv_sec);
tv->tv_nsec = be32_to_cpup(p++);
if (tv->tv_nsec >= (u32)1000000000)
return nfserr_inval;
@@ -320,7 +322,6 @@
struct iattr *iattr, struct nfs4_acl **acl,
struct xdr_netobj *label, int *umask)
{
- struct timespec ts;
int expected_len, len = 0;
u32 dummy32;
char *buf;
@@ -354,7 +355,12 @@
READ_BUF(4); len += 4;
nace = be32_to_cpup(p++);
- if (nace > NFS4_ACL_MAX)
+ if (nace > compoundargs_bytes_left(argp)/20)
+ /*
+ * Even with 4-byte names there wouldn't be
+ * space for that many aces; something fishy is
+ * going on:
+ */
return nfserr_fbig;
*acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
@@ -422,8 +428,7 @@
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_atime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_time(argp, &iattr->ia_atime);
if (status)
return status;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -442,8 +447,7 @@
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_mtime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_time(argp, &iattr->ia_mtime);
if (status)
return status;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -521,6 +525,7 @@
static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
{
DECODE_HEAD;
+ struct user_namespace *userns = nfsd_user_namespace(argp->rqstp);
u32 dummy, uid, gid;
char *machine_name;
int i;
@@ -563,8 +568,8 @@
dummy = be32_to_cpup(p++);
READ_BUF(dummy * 4);
if (cbs->flavor == (u32)(-1)) {
- kuid_t kuid = make_kuid(&init_user_ns, uid);
- kgid_t kgid = make_kgid(&init_user_ns, gid);
+ kuid_t kuid = make_kuid(userns, uid);
+ kgid_t kgid = make_kgid(userns, gid);
if (uid_valid(kuid) && gid_valid(kgid)) {
cbs->uid = kuid;
cbs->gid = kgid;
@@ -1397,7 +1402,6 @@
goto xdr_error;
}
- /* Ignore Implementation ID */
READ_BUF(4); /* nfs_impl_id4 array length */
dummy = be32_to_cpup(p++);
@@ -1405,21 +1409,19 @@
goto xdr_error;
if (dummy == 1) {
- /* nii_domain */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ status = nfsd4_decode_opaque(argp, &exid->nii_domain);
+ if (status)
+ goto xdr_error;
/* nii_name */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ status = nfsd4_decode_opaque(argp, &exid->nii_name);
+ if (status)
+ goto xdr_error;
/* nii_date */
- READ_BUF(12);
- p += 3;
+ status = nfsd4_decode_time(argp, &exid->nii_time);
+ if (status)
+ goto xdr_error;
}
DECODE_TAIL;
}
@@ -1429,7 +1431,6 @@
struct nfsd4_create_session *sess)
{
DECODE_HEAD;
- u32 dummy;
READ_BUF(16);
COPYMEM(&sess->clientid, 8);
@@ -1438,7 +1439,7 @@
/* Fore channel attrs */
READ_BUF(28);
- dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+ p++; /* headerpadsz is always 0 */
sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1455,7 +1456,7 @@
/* Back channel attrs */
READ_BUF(28);
- dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+ p++; /* headerpadsz is always 0 */
sess->back_channel.maxreq_sz = be32_to_cpup(p++);
sess->back_channel.maxresp_sz = be32_to_cpup(p++);
sess->back_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1747,7 +1748,6 @@
nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
{
DECODE_HEAD;
- unsigned int tmp;
status = nfsd4_decode_stateid(argp, ©->cp_src_stateid);
if (status)
@@ -1762,12 +1762,19 @@
p = xdr_decode_hyper(p, ©->cp_count);
p++; /* ca_consecutive: we always do consecutive copies */
copy->cp_synchronous = be32_to_cpup(p++);
- tmp = be32_to_cpup(p); /* Source server list not supported */
+ /* tmp = be32_to_cpup(p); Source server list not supported */
DECODE_TAIL;
}
static __be32
+nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
+ struct nfsd4_offload_status *os)
+{
+ return nfsd4_decode_stateid(argp, &os->stateid);
+}
+
+static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
@@ -1873,8 +1880,8 @@
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
+ [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
[OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2413,8 +2420,10 @@
__be32 status;
int err;
struct nfs4_acl *acl = NULL;
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
void *context = NULL;
int contextlen;
+#endif
bool contextsupport = false;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
u32 minorversion = resp->cstate.minorversion;
@@ -2899,12 +2908,14 @@
*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
}
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
status = nfsd4_encode_security_label(xdr, rqstp, context,
contextlen);
if (status)
goto out;
}
+#endif
attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
@@ -3217,9 +3228,8 @@
if (!p)
return nfserr_resource;
encode_cinfo(p, &create->cr_cinfo);
- nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
+ return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
create->cr_bmval[1], create->cr_bmval[2]);
- return 0;
}
static __be32
@@ -3462,7 +3472,7 @@
len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
- file, read->rd_offset, &maxcount);
+ file, read->rd_offset, &maxcount, &eof);
read->rd_length = maxcount;
if (nfserr) {
/*
@@ -3474,9 +3484,6 @@
return nfserr;
}
- eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
- d_inode(read->rd_fhp->fh_dentry)->i_size);
-
*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
@@ -3547,15 +3554,13 @@
len = maxcount;
nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
+ resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
+ &eof);
read->rd_length = maxcount;
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
- eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
- d_inode(read->rd_fhp->fh_dentry)->i_size);
-
tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
tmp = htonl(maxcount);
@@ -3574,11 +3579,14 @@
{
unsigned long maxcount;
struct xdr_stream *xdr = &resp->xdr;
- struct file *file = read->rd_filp;
+ struct file *file;
int starting_len = xdr->buf->len;
- struct raparms *ra = NULL;
__be32 *p;
+ if (nfserr)
+ return nfserr;
+ file = read->rd_nf->nf_file;
+
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
@@ -3596,18 +3604,12 @@
(xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
- if (read->rd_tmp_file)
- ra = nfsd_init_raparms(file);
-
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
- if (ra)
- nfsd_put_raparams(file, ra);
-
if (nfserr)
xdr_truncate_encode(xdr, starting_len);
@@ -4224,15 +4226,27 @@
#endif /* CONFIG_NFSD_PNFS */
static __be32
-nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write)
+nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
+ struct nfsd42_write_res *write, bool sync)
{
__be32 *p;
-
- p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
+ p = xdr_reserve_space(&resp->xdr, 4);
if (!p)
return nfserr_resource;
- *p++ = cpu_to_be32(0);
+ if (sync)
+ *p++ = cpu_to_be32(0);
+ else {
+ __be32 nfserr;
+ *p++ = cpu_to_be32(1);
+ nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid);
+ if (nfserr)
+ return nfserr;
+ }
+ p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
+
p = xdr_encode_hyper(p, write->wr_bytes_written);
*p++ = cpu_to_be32(write->wr_stable_how);
p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
@@ -4246,7 +4260,8 @@
{
__be32 *p;
- nfserr = nfsd42_encode_write_res(resp, ©->cp_res);
+ nfserr = nfsd42_encode_write_res(resp, ©->cp_res,
+ copy->cp_synchronous);
if (nfserr)
return nfserr;
@@ -4257,6 +4272,22 @@
}
static __be32
+nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_offload_status *os)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 8 + 4);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_hyper(p, os->count);
+ *p++ = cpu_to_be32(0);
+
+ return nfserr;
+}
+
+static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
{
@@ -4359,7 +4390,7 @@
[OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
[OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index dbdeb9d..96352ab 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -9,6 +9,7 @@
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sunrpc/addr.h>
@@ -30,52 +31,17 @@
#define TARGET_BUCKET_SIZE 64
struct nfsd_drc_bucket {
+ struct rb_root rb_head;
struct list_head lru_head;
spinlock_t cache_lock;
};
-static struct nfsd_drc_bucket *drc_hashtbl;
-static struct kmem_cache *drc_slab;
-
-/* max number of entries allowed in the cache */
-static unsigned int max_drc_entries;
-
-/* number of significant bits in the hash value */
-static unsigned int maskbits;
-static unsigned int drc_hashsize;
-
-/*
- * Stats and other tracking of on the duplicate reply cache. All of these and
- * the "rc" fields in nfsdstats are protected by the cache_lock
- */
-
-/* total number of entries */
-static atomic_t num_drc_entries;
-
-/* cache misses due only to checksum comparison failures */
-static unsigned int payload_misses;
-
-/* amount of memory (in bytes) currently consumed by the DRC */
-static unsigned int drc_mem_usage;
-
-/* longest hash chain seen */
-static unsigned int longest_chain;
-
-/* size of cache when we saw the longest hash chain */
-static unsigned int longest_chain_cachesize;
-
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
-static struct shrinker nfsd_reply_cache_shrinker = {
- .scan_objects = nfsd_reply_cache_scan,
- .count_objects = nfsd_reply_cache_count,
- .seeks = 1,
-};
-
/*
* Put a cap on the size of the DRC based on the amount of available
* low memory in the machine.
@@ -93,12 +59,15 @@
* ...with a hard cap of 256k entries. In the worst case, each entry will be
* ~1k, so the above numbers should give a rough max of the amount of memory
* used in k.
+ *
+ * XXX: these limits are per-container, so memory used will increase
+ * linearly with number of containers. Maybe that's OK.
*/
static unsigned int
nfsd_cache_size_limit(void)
{
unsigned int limit;
- unsigned long low_pages = totalram_pages - totalhigh_pages;
+ unsigned long low_pages = totalram_pages() - totalhigh_pages();
limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
return min_t(unsigned int, limit, 256*1024);
@@ -115,108 +84,133 @@
}
static u32
-nfsd_cache_hash(__be32 xid)
+nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
{
- return hash_32(be32_to_cpu(xid), maskbits);
+ return hash_32(be32_to_cpu(xid), nn->maskbits);
}
static struct svc_cacherep *
-nfsd_reply_cache_alloc(void)
+nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
{
struct svc_cacherep *rp;
- rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
+ rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL);
if (rp) {
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
+ RB_CLEAR_NODE(&rp->c_node);
INIT_LIST_HEAD(&rp->c_lru);
+
+ memset(&rp->c_key, 0, sizeof(rp->c_key));
+ rp->c_key.k_xid = rqstp->rq_xid;
+ rp->c_key.k_proc = rqstp->rq_proc;
+ rpc_copy_addr((struct sockaddr *)&rp->c_key.k_addr, svc_addr(rqstp));
+ rpc_set_port((struct sockaddr *)&rp->c_key.k_addr, rpc_get_port(svc_addr(rqstp)));
+ rp->c_key.k_prot = rqstp->rq_prot;
+ rp->c_key.k_vers = rqstp->rq_vers;
+ rp->c_key.k_len = rqstp->rq_arg.len;
+ rp->c_key.k_csum = csum;
}
return rp;
}
static void
-nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+ struct nfsd_net *nn)
{
if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
- drc_mem_usage -= rp->c_replvec.iov_len;
+ nn->drc_mem_usage -= rp->c_replvec.iov_len;
kfree(rp->c_replvec.iov_base);
}
- list_del(&rp->c_lru);
- atomic_dec(&num_drc_entries);
- drc_mem_usage -= sizeof(*rp);
- kmem_cache_free(drc_slab, rp);
+ if (rp->c_state != RC_UNUSED) {
+ rb_erase(&rp->c_node, &b->rb_head);
+ list_del(&rp->c_lru);
+ atomic_dec(&nn->num_drc_entries);
+ nn->drc_mem_usage -= sizeof(*rp);
+ }
+ kmem_cache_free(nn->drc_slab, rp);
}
static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+ struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
- nfsd_reply_cache_free_locked(rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
spin_unlock(&b->cache_lock);
}
-int nfsd_reply_cache_init(void)
+int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
unsigned int i;
int status = 0;
- max_drc_entries = nfsd_cache_size_limit();
- atomic_set(&num_drc_entries, 0);
- hashsize = nfsd_hashsize(max_drc_entries);
- maskbits = ilog2(hashsize);
+ nn->max_drc_entries = nfsd_cache_size_limit();
+ atomic_set(&nn->num_drc_entries, 0);
+ hashsize = nfsd_hashsize(nn->max_drc_entries);
+ nn->maskbits = ilog2(hashsize);
- status = register_shrinker(&nfsd_reply_cache_shrinker);
+ nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
+ nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
+ nn->nfsd_reply_cache_shrinker.seeks = 1;
+ status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
if (status)
- return status;
-
- drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
- 0, 0, NULL);
- if (!drc_slab)
goto out_nomem;
- drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
- if (!drc_hashtbl) {
- drc_hashtbl = vzalloc(array_size(hashsize,
- sizeof(*drc_hashtbl)));
- if (!drc_hashtbl)
- goto out_nomem;
+ nn->drc_slab = kmem_cache_create("nfsd_drc",
+ sizeof(struct svc_cacherep), 0, 0, NULL);
+ if (!nn->drc_slab)
+ goto out_shrinker;
+
+ nn->drc_hashtbl = kcalloc(hashsize,
+ sizeof(*nn->drc_hashtbl), GFP_KERNEL);
+ if (!nn->drc_hashtbl) {
+ nn->drc_hashtbl = vzalloc(array_size(hashsize,
+ sizeof(*nn->drc_hashtbl)));
+ if (!nn->drc_hashtbl)
+ goto out_slab;
}
for (i = 0; i < hashsize; i++) {
- INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
- spin_lock_init(&drc_hashtbl[i].cache_lock);
+ INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head);
+ spin_lock_init(&nn->drc_hashtbl[i].cache_lock);
}
- drc_hashsize = hashsize;
+ nn->drc_hashsize = hashsize;
return 0;
+out_slab:
+ kmem_cache_destroy(nn->drc_slab);
+out_shrinker:
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
- nfsd_reply_cache_shutdown();
return -ENOMEM;
}
-void nfsd_reply_cache_shutdown(void)
+void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
struct svc_cacherep *rp;
unsigned int i;
- unregister_shrinker(&nfsd_reply_cache_shrinker);
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
- for (i = 0; i < drc_hashsize; i++) {
- struct list_head *head = &drc_hashtbl[i].lru_head;
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
rp = list_first_entry(head, struct svc_cacherep, c_lru);
- nfsd_reply_cache_free_locked(rp);
+ nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
+ rp, nn);
}
}
- kvfree(drc_hashtbl);
- drc_hashtbl = NULL;
- drc_hashsize = 0;
+ kvfree(nn->drc_hashtbl);
+ nn->drc_hashtbl = NULL;
+ nn->drc_hashsize = 0;
- kmem_cache_destroy(drc_slab);
- drc_slab = NULL;
+ kmem_cache_destroy(nn->drc_slab);
+ nn->drc_slab = NULL;
}
/*
@@ -231,7 +225,7 @@
}
static long
-prune_bucket(struct nfsd_drc_bucket *b)
+prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
{
struct svc_cacherep *rp, *tmp;
long freed = 0;
@@ -243,10 +237,10 @@
*/
if (rp->c_state == RC_INPROG)
continue;
- if (atomic_read(&num_drc_entries) <= max_drc_entries &&
+ if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
- nfsd_reply_cache_free_locked(rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
freed++;
}
return freed;
@@ -257,18 +251,18 @@
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long
-prune_cache_entries(void)
+prune_cache_entries(struct nfsd_net *nn)
{
unsigned int i;
long freed = 0;
- for (i = 0; i < drc_hashsize; i++) {
- struct nfsd_drc_bucket *b = &drc_hashtbl[i];
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
spin_lock(&b->cache_lock);
- freed += prune_bucket(b);
+ freed += prune_bucket(b, nn);
spin_unlock(&b->cache_lock);
}
return freed;
@@ -277,13 +271,19 @@
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return atomic_read(&num_drc_entries);
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return atomic_read(&nn->num_drc_entries);
}
static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- return prune_cache_entries();
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return prune_cache_entries(nn);
}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
@@ -318,62 +318,62 @@
return csum;
}
-static bool
-nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
+static int
+nfsd_cache_key_cmp(const struct svc_cacherep *key,
+ const struct svc_cacherep *rp, struct nfsd_net *nn)
{
- /* Check RPC XID first */
- if (rqstp->rq_xid != rp->c_xid)
- return false;
- /* compare checksum of NFS data */
- if (csum != rp->c_csum) {
- ++payload_misses;
- return false;
- }
+ if (key->c_key.k_xid == rp->c_key.k_xid &&
+ key->c_key.k_csum != rp->c_key.k_csum)
+ ++nn->payload_misses;
- /* Other discriminators */
- if (rqstp->rq_proc != rp->c_proc ||
- rqstp->rq_prot != rp->c_prot ||
- rqstp->rq_vers != rp->c_vers ||
- rqstp->rq_arg.len != rp->c_len ||
- !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
- rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
- return false;
-
- return true;
+ return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key));
}
/*
* Search the request hash for an entry that matches the given rqstp.
* Must be called with cache_lock held. Returns the found entry or
- * NULL on failure.
+ * inserts an empty key on failure.
*/
static struct svc_cacherep *
-nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp,
- __wsum csum)
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+ struct nfsd_net *nn)
{
- struct svc_cacherep *rp, *ret = NULL;
- struct list_head *rh = &b->lru_head;
+ struct svc_cacherep *rp, *ret = key;
+ struct rb_node **p = &b->rb_head.rb_node,
+ *parent = NULL;
unsigned int entries = 0;
+ int cmp;
- list_for_each_entry(rp, rh, c_lru) {
+ while (*p != NULL) {
++entries;
- if (nfsd_cache_match(rqstp, csum, rp)) {
+ parent = *p;
+ rp = rb_entry(parent, struct svc_cacherep, c_node);
+
+ cmp = nfsd_cache_key_cmp(key, rp, nn);
+ if (cmp < 0)
+ p = &parent->rb_left;
+ else if (cmp > 0)
+ p = &parent->rb_right;
+ else {
ret = rp;
- break;
+ goto out;
}
}
-
+ rb_link_node(&key->c_node, parent, p);
+ rb_insert_color(&key->c_node, &b->rb_head);
+out:
/* tally hash chain length stats */
- if (entries > longest_chain) {
- longest_chain = entries;
- longest_chain_cachesize = atomic_read(&num_drc_entries);
- } else if (entries == longest_chain) {
+ if (entries > nn->longest_chain) {
+ nn->longest_chain = entries;
+ nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries);
+ } else if (entries == nn->longest_chain) {
/* prefer to keep the smallest cachesize possible here */
- longest_chain_cachesize = min_t(unsigned int,
- longest_chain_cachesize,
- atomic_read(&num_drc_entries));
+ nn->longest_chain_cachesize = min_t(unsigned int,
+ nn->longest_chain_cachesize,
+ atomic_read(&nn->num_drc_entries));
}
+ lru_put_end(b, ret);
return ret;
}
@@ -387,14 +387,12 @@
int
nfsd_cache_lookup(struct svc_rqst *rqstp)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp, *found;
__be32 xid = rqstp->rq_xid;
- u32 proto = rqstp->rq_prot,
- vers = rqstp->rq_vers,
- proc = rqstp->rq_proc;
__wsum csum;
- u32 hash = nfsd_cache_hash(xid);
- struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
+ u32 hash = nfsd_cache_hash(xid, nn);
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
int type = rqstp->rq_cachetype;
int rtn = RC_DOIT;
@@ -410,60 +408,38 @@
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
- rp = nfsd_reply_cache_alloc();
- spin_lock(&b->cache_lock);
- if (likely(rp)) {
- atomic_inc(&num_drc_entries);
- drc_mem_usage += sizeof(*rp);
- }
-
- /* go ahead and prune the cache */
- prune_bucket(b);
-
- found = nfsd_cache_search(b, rqstp, csum);
- if (found) {
- if (likely(rp))
- nfsd_reply_cache_free_locked(rp);
- rp = found;
- goto found_entry;
- }
-
+ rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
if (!rp) {
dprintk("nfsd: unable to allocate DRC entry!\n");
- goto out;
+ return rtn;
+ }
+
+ spin_lock(&b->cache_lock);
+ found = nfsd_cache_insert(b, rp, nn);
+ if (found != rp) {
+ nfsd_reply_cache_free_locked(NULL, rp, nn);
+ rp = found;
+ goto found_entry;
}
nfsdstats.rcmisses++;
rqstp->rq_cacherep = rp;
rp->c_state = RC_INPROG;
- rp->c_xid = xid;
- rp->c_proc = proc;
- rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp));
- rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp)));
- rp->c_prot = proto;
- rp->c_vers = vers;
- rp->c_len = rqstp->rq_arg.len;
- rp->c_csum = csum;
- lru_put_end(b, rp);
+ atomic_inc(&nn->num_drc_entries);
+ nn->drc_mem_usage += sizeof(*rp);
- /* release any buffer */
- if (rp->c_type == RC_REPLBUFF) {
- drc_mem_usage -= rp->c_replvec.iov_len;
- kfree(rp->c_replvec.iov_base);
- rp->c_replvec.iov_base = NULL;
- }
- rp->c_type = RC_NOCACHE;
+ /* go ahead and prune the cache */
+ prune_bucket(b, nn);
out:
spin_unlock(&b->cache_lock);
return rtn;
found_entry:
- nfsdstats.rchits++;
/* We found a matching entry which is either in progress or done. */
- lru_put_end(b, rp);
-
+ nfsdstats.rchits++;
rtn = RC_DROPIT;
+
/* Request being processed */
if (rp->c_state == RC_INPROG)
goto out;
@@ -489,7 +465,7 @@
break;
default:
printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
- nfsd_reply_cache_free_locked(rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
}
goto out;
@@ -514,6 +490,7 @@
void
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
u32 hash;
@@ -524,15 +501,15 @@
if (!rp)
return;
- hash = nfsd_cache_hash(rp->c_xid);
- b = &drc_hashtbl[hash];
+ hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
+ b = &nn->drc_hashtbl[hash];
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
@@ -547,18 +524,18 @@
bufsize = len << 2;
cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
if (!cachv->iov_base) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
cachv->iov_len = bufsize;
memcpy(cachv->iov_base, statp, bufsize);
break;
case RC_NOCACHE:
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
spin_lock(&b->cache_lock);
- drc_mem_usage += bufsize;
+ nn->drc_mem_usage += bufsize;
lru_put_end(b, rp);
rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype;
@@ -594,21 +571,26 @@
*/
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "max entries: %u\n", max_drc_entries);
+ struct nfsd_net *nn = m->private;
+
+ seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
- atomic_read(&num_drc_entries));
- seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
- seq_printf(m, "mem usage: %u\n", drc_mem_usage);
+ atomic_read(&nn->num_drc_entries));
+ seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
+ seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage);
seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses);
seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache);
- seq_printf(m, "payload misses: %u\n", payload_misses);
- seq_printf(m, "longest chain len: %u\n", longest_chain);
- seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
+ seq_printf(m, "payload misses: %u\n", nn->payload_misses);
+ seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
+ seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
}
int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
{
- return single_open(file, nfsd_reply_cache_stats_show, NULL);
+ struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info,
+ nfsd_net_id);
+
+ return single_open(file, nfsd_reply_cache_stats_show, nn);
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7fb9f7c..11b42c5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Syscall interface to knfsd.
*
@@ -7,6 +8,7 @@
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/ctype.h>
+#include <linux/fs_context.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/lockd/lockd.h>
@@ -15,6 +17,7 @@
#include <linux/sunrpc/gss_krb5_enctypes.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/module.h>
+#include <linux/fsnotify.h>
#include "idmap.h"
#include "nfsd.h"
@@ -52,6 +55,7 @@
NFSD_RecoveryDir,
NFSD_V4EndGrace,
#endif
+ NFSD_MaxReserved
};
/*
@@ -439,7 +443,7 @@
return rv;
if (newthreads < 0)
return -EINVAL;
- rv = nfsd_svc(newthreads, net);
+ rv = nfsd_svc(newthreads, net, file->f_cred);
if (rv < 0)
return rv;
} else
@@ -537,14 +541,14 @@
}
static ssize_t
-nfsd_print_version_support(char *buf, int remaining, const char *sep,
- unsigned vers, int minor)
+nfsd_print_version_support(struct nfsd_net *nn, char *buf, int remaining,
+ const char *sep, unsigned vers, int minor)
{
const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u";
- bool supported = !!nfsd_vers(vers, NFSD_TEST);
+ bool supported = !!nfsd_vers(nn, vers, NFSD_TEST);
if (vers == 4 && minor >= 0 &&
- !nfsd_minorversion(minor, NFSD_TEST))
+ !nfsd_minorversion(nn, minor, NFSD_TEST))
supported = false;
if (minor == 0 && supported)
/*
@@ -599,20 +603,20 @@
switch(num) {
case 2:
case 3:
- nfsd_vers(num, cmd);
+ nfsd_vers(nn, num, cmd);
break;
case 4:
if (*minorp == '.') {
- if (nfsd_minorversion(minor, cmd) < 0)
+ if (nfsd_minorversion(nn, minor, cmd) < 0)
return -EINVAL;
- } else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) {
+ } else if ((cmd == NFSD_SET) != nfsd_vers(nn, num, NFSD_TEST)) {
/*
* Either we have +4 and no minors are enabled,
* or we have -4 and at least one minor is enabled.
* In either case, propagate 'cmd' to all minors.
*/
minor = 0;
- while (nfsd_minorversion(minor, cmd) >= 0)
+ while (nfsd_minorversion(nn, minor, cmd) >= 0)
minor++;
}
break;
@@ -624,7 +628,7 @@
/* If all get turned off, turn them back on, as
* having no versions is BAD
*/
- nfsd_reset_versions();
+ nfsd_reset_versions(nn);
}
/* Now write current state into reply buffer */
@@ -633,12 +637,12 @@
remaining = SIMPLE_TRANSACTION_LIMIT;
for (num=2 ; num <= 4 ; num++) {
int minor;
- if (!nfsd_vers(num, NFSD_AVAIL))
+ if (!nfsd_vers(nn, num, NFSD_AVAIL))
continue;
minor = -1;
do {
- len = nfsd_print_version_support(buf, remaining,
+ len = nfsd_print_version_support(nn, buf, remaining,
sep, num, minor);
if (len >= remaining)
goto out;
@@ -717,7 +721,7 @@
* a socket of a supported family/protocol, and we use it as an
* nfsd listener.
*/
-static ssize_t __write_ports_addfd(char *buf, struct net *net)
+static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred *cred)
{
char *mesg = buf;
int fd, err;
@@ -736,7 +740,7 @@
if (err != 0)
return err;
- err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
+ err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
if (err < 0) {
nfsd_destroy(net);
return err;
@@ -751,7 +755,7 @@
* A transport listener is added by writing it's transport name and
* a port number.
*/
-static ssize_t __write_ports_addxprt(char *buf, struct net *net)
+static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred)
{
char transport[16];
struct svc_xprt *xprt;
@@ -769,12 +773,12 @@
return err;
err = svc_create_xprt(nn->nfsd_serv, transport, net,
- PF_INET, port, SVC_SOCK_ANONYMOUS);
+ PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
if (err < 0)
goto out_err;
err = svc_create_xprt(nn->nfsd_serv, transport, net,
- PF_INET6, port, SVC_SOCK_ANONYMOUS);
+ PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
if (err < 0 && err != -EAFNOSUPPORT)
goto out_close;
@@ -799,10 +803,10 @@
return __write_ports_names(buf, net);
if (isdigit(buf[0]))
- return __write_ports_addfd(buf, net);
+ return __write_ports_addfd(buf, net, file->f_cred);
if (isalpha(buf[0]))
- return __write_ports_addxprt(buf, net);
+ return __write_ports_addxprt(buf, net, file->f_cred);
return -EINVAL;
}
@@ -1126,6 +1130,8 @@
case 'Y':
case 'y':
case '1':
+ if (!nn->nfsd_serv)
+ return -EBUSY;
nfsd4_end_grace(nn);
break;
default:
@@ -1144,8 +1150,200 @@
* populating the filesystem.
*/
-static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
+/* Basically copying rpc_get_inode. */
+static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
{
+ struct inode *inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+ /* Following advice from simple_fill_super documentation: */
+ inode->i_ino = iunique(sb, NFSD_MaxReserved);
+ inode->i_mode = mode;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ inode->i_fop = &simple_dir_operations;
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ default:
+ break;
+ }
+ return inode;
+}
+
+static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, struct nfsdfs_client *ncl)
+{
+ struct inode *inode;
+
+ inode = nfsd_get_inode(dir->i_sb, mode);
+ if (!inode)
+ return -ENOMEM;
+ if (ncl) {
+ inode->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
+ d_add(dentry, inode);
+ inc_nlink(dir);
+ fsnotify_mkdir(dir, dentry);
+ return 0;
+}
+
+static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name)
+{
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+ int ret = -ENOMEM;
+
+ inode_lock(dir);
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ goto out_err;
+ ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600, ncl);
+ if (ret)
+ goto out_err;
+out:
+ inode_unlock(dir);
+ return dentry;
+out_err:
+ dput(dentry);
+ dentry = ERR_PTR(ret);
+ goto out;
+}
+
+static void clear_ncl(struct inode *inode)
+{
+ struct nfsdfs_client *ncl = inode->i_private;
+
+ inode->i_private = NULL;
+ kref_put(&ncl->cl_ref, ncl->cl_release);
+}
+
+static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc = inode->i_private;
+
+ if (nc)
+ kref_get(&nc->cl_ref);
+ return nc;
+}
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+
+ inode_lock_shared(inode);
+ nc = __get_nfsdfs_client(inode);
+ inode_unlock_shared(inode);
+ return nc;
+}
+/* from __rpc_unlink */
+static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
+{
+ int ret;
+
+ clear_ncl(d_inode(dentry));
+ dget(dentry);
+ ret = simple_unlink(dir, dentry);
+ d_delete(dentry);
+ dput(dentry);
+ WARN_ON_ONCE(ret);
+}
+
+static void nfsdfs_remove_files(struct dentry *root)
+{
+ struct dentry *dentry, *tmp;
+
+ list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) {
+ if (!simple_positive(dentry)) {
+ WARN_ON_ONCE(1); /* I think this can't happen? */
+ continue;
+ }
+ nfsdfs_remove_file(d_inode(root), dentry);
+ }
+}
+
+/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
+ * code instead. */
+static int nfsdfs_create_files(struct dentry *root,
+ const struct tree_descr *files)
+{
+ struct inode *dir = d_inode(root);
+ struct inode *inode;
+ struct dentry *dentry;
+ int i;
+
+ inode_lock(dir);
+ for (i = 0; files->name && files->name[0]; i++, files++) {
+ if (!files->name)
+ continue;
+ dentry = d_alloc_name(root, files->name);
+ if (!dentry)
+ goto out;
+ inode = nfsd_get_inode(d_inode(root)->i_sb,
+ S_IFREG | files->mode);
+ if (!inode) {
+ dput(dentry);
+ goto out;
+ }
+ inode->i_fop = files->ops;
+ inode->i_private = __get_nfsdfs_client(dir);
+ d_add(dentry, inode);
+ fsnotify_create(dir, dentry);
+ }
+ inode_unlock(dir);
+ return 0;
+out:
+ nfsdfs_remove_files(root);
+ inode_unlock(dir);
+ return -ENOMEM;
+}
+
+/* on success, returns positive number unique to that client. */
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id,
+ const struct tree_descr *files)
+{
+ struct dentry *dentry;
+ char name[11];
+ int ret;
+
+ sprintf(name, "%u", id);
+
+ dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
+ if (IS_ERR(dentry)) /* XXX: tossing errors? */
+ return NULL;
+ ret = nfsdfs_create_files(dentry, files);
+ if (ret) {
+ nfsd_client_rmdir(dentry);
+ return NULL;
+ }
+ return dentry;
+}
+
+/* Taken from __rpc_rmdir: */
+void nfsd_client_rmdir(struct dentry *dentry)
+{
+ struct inode *dir = d_inode(dentry->d_parent);
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ inode_lock(dir);
+ nfsdfs_remove_files(dentry);
+ clear_ncl(inode);
+ dget(dentry);
+ ret = simple_rmdir(dir, dentry);
+ WARN_ON_ONCE(ret);
+ d_delete(dentry);
+ inode_unlock(dir);
+}
+
+static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ struct dentry *dentry;
+ int ret;
+
static const struct tree_descr nfsd_files[] = {
[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
@@ -1174,15 +1372,39 @@
#endif
/* last one */ {""}
};
- get_net(sb->s_fs_info);
- return simple_fill_super(sb, 0x6e667364, nfsd_files);
+
+ ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
+ if (ret)
+ return ret;
+ dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ nn->nfsd_client_dir = dentry;
+ return 0;
}
-static struct dentry *nfsd_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int nfsd_fs_get_tree(struct fs_context *fc)
{
- struct net *net = current->nsproxy->net_ns;
- return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super);
+ return get_tree_keyed(fc, nfsd_fill_super, get_net(fc->net_ns));
+}
+
+static void nfsd_fs_free_fc(struct fs_context *fc)
+{
+ if (fc->s_fs_info)
+ put_net(fc->s_fs_info);
+}
+
+static const struct fs_context_operations nfsd_fs_context_ops = {
+ .free = nfsd_fs_free_fc,
+ .get_tree = nfsd_fs_get_tree,
+};
+
+static int nfsd_init_fs_context(struct fs_context *fc)
+{
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(fc->net_ns->user_ns);
+ fc->ops = &nfsd_fs_context_ops;
+ return 0;
}
static void nfsd_umount(struct super_block *sb)
@@ -1196,7 +1418,7 @@
static struct file_system_type nfsd_fs_type = {
.owner = THIS_MODULE,
.name = "nfsd",
- .mount = nfsd_mount,
+ .init_fs_context = nfsd_init_fs_context,
.kill_sb = nfsd_umount,
};
MODULE_ALIAS_FS("nfsd");
@@ -1229,6 +1451,7 @@
static __net_init int nfsd_init_net(struct net *net)
{
int retval;
+ struct vfsmount *mnt;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
retval = nfsd_export_init(net);
@@ -1237,16 +1460,36 @@
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
- nn->nfsd4_lease = 45; /* default lease time */
- nn->nfsd4_grace = 45;
+ nn->nfsd_versions = NULL;
+ nn->nfsd4_minorversions = NULL;
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_drc_error;
+ nn->nfsd4_lease = 90; /* default lease time */
+ nn->nfsd4_grace = 90;
nn->somebody_reclaimed = false;
+ nn->track_reclaim_completes = false;
nn->clverifier_counter = prandom_u32();
- nn->clientid_counter = prandom_u32();
+ nn->clientid_base = prandom_u32();
+ nn->clientid_counter = nn->clientid_base + 1;
+ nn->s2s_cp_cl_id = nn->clientid_counter++;
atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq);
+ seqlock_init(&nn->boot_lock);
+
+ mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+ if (IS_ERR(mnt)) {
+ retval = PTR_ERR(mnt);
+ goto out_mount_err;
+ }
+ nn->nfsd_mnt = mnt;
return 0;
+out_mount_err:
+ nfsd_reply_cache_shutdown(nn);
+out_drc_error:
+ nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
@@ -1255,8 +1498,13 @@
static __net_exit void nfsd_exit_net(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ mntput(nn->nfsd_mnt);
+ nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
+ nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
}
static struct pernet_operations nfsd_net_ops = {
@@ -1283,13 +1531,8 @@
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
- retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
- if (retval)
- goto out_exit_pnfs;
+ nfsd_fault_inject_init(); /* nfsd fault injection controls */
nfsd_stat_init(); /* Statistics */
- retval = nfsd_reply_cache_init();
- if (retval)
- goto out_free_stat;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
@@ -1303,11 +1546,8 @@
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
- nfsd_reply_cache_shutdown();
-out_free_stat:
nfsd_stat_shutdown();
nfsd_fault_inject_cleanup();
-out_exit_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
nfsd4_free_slabs();
@@ -1320,7 +1560,6 @@
static void __exit exit_nfsd(void)
{
- nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 0668999..af29475 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -17,10 +17,12 @@
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/msg_prot.h>
#include <uapi/linux/nfsd/debug.h>
+#include "netns.h"
#include "stats.h"
#include "export.h"
@@ -73,7 +75,7 @@
/*
* Function prototypes.
*/
-int nfsd_svc(int nrservs, struct net *net);
+int nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
int nfsd_nrthreads(struct net *);
@@ -85,6 +87,16 @@
void nfsd_destroy(struct net *net);
+struct nfsdfs_client {
+ struct kref cl_ref;
+ void (*cl_release)(struct kref *kref);
+};
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *);
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
+void nfsd_client_rmdir(struct dentry *dentry);
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
extern const struct svc_version nfsd_acl_version2;
@@ -98,10 +110,12 @@
#endif
#endif
+struct nfsd_net;
+
enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-int nfsd_vers(int vers, enum vers_op change);
-int nfsd_minorversion(u32 minorversion, enum vers_op change);
-void nfsd_reset_versions(void);
+int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change);
+int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change);
+void nfsd_reset_versions(struct nfsd_net *nn);
int nfsd_create_serv(struct net *net);
extern int nfsd_max_blksize;
@@ -110,6 +124,12 @@
{
return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
}
+static inline struct user_namespace *
+nfsd_user_namespace(const struct svc_rqst *rqstp)
+{
+ const struct cred *cred = rqstp->rq_xprt->xpt_cred;
+ return cred ? cred->user_ns : &init_user_ns;
+}
/*
* NFSv4 State
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0d20fd1..c83ddac 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -172,6 +172,7 @@
struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp;
__be32 nfserr;
+ u32 eof;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
@@ -195,7 +196,8 @@
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
rqstp->rq_vec, argp->vlen,
- &resp->count);
+ &resp->count,
+ &eof);
if (nfserr) return nfserr;
return fh_getattr(&resp->fh, &resp->stat);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 89cb484..fdf7ed4 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -27,11 +27,30 @@
#include "cache.h"
#include "vfs.h"
#include "netns.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_SVC
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static int nfsd_acl_rpcbind_set(struct net *,
+ const struct svc_program *,
+ u32, int,
+ unsigned short,
+ unsigned short);
+static __be32 nfsd_acl_init_request(struct svc_rqst *,
+ const struct svc_program *,
+ struct svc_process_info *);
+#endif
+static int nfsd_rpcbind_set(struct net *,
+ const struct svc_program *,
+ u32, int,
+ unsigned short,
+ unsigned short);
+static __be32 nfsd_init_request(struct svc_rqst *,
+ const struct svc_program *,
+ struct svc_process_info *);
/*
* nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
@@ -86,6 +105,8 @@
.pg_class = "nfsd",
.pg_stats = &nfsd_acl_svcstats,
.pg_authenticate = &svc_set_client,
+ .pg_init_request = nfsd_acl_init_request,
+ .pg_rpcbind_set = nfsd_acl_rpcbind_set,
};
static struct svc_stat nfsd_acl_svcstats = {
@@ -105,7 +126,6 @@
#define NFSD_MINVERS 2
#define NFSD_NRVERS ARRAY_SIZE(nfsd_version)
-static const struct svc_version *nfsd_versions[NFSD_NRVERS];
struct svc_program nfsd_program = {
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -113,77 +133,136 @@
#endif
.pg_prog = NFS_PROGRAM, /* program number */
.pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */
- .pg_vers = nfsd_versions, /* version table */
+ .pg_vers = nfsd_version, /* version table */
.pg_name = "nfsd", /* program name */
.pg_class = "nfsd", /* authentication class */
.pg_stats = &nfsd_svcstats, /* version table */
.pg_authenticate = &svc_set_client, /* export authentication */
-
+ .pg_init_request = nfsd_init_request,
+ .pg_rpcbind_set = nfsd_rpcbind_set,
};
-static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = {
- [0] = 1,
- [1] = 1,
- [2] = 1,
-};
+static bool
+nfsd_support_version(int vers)
+{
+ if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS)
+ return nfsd_version[vers] != NULL;
+ return false;
+}
-int nfsd_vers(int vers, enum vers_op change)
+static bool *
+nfsd_alloc_versions(void)
+{
+ bool *vers = kmalloc_array(NFSD_NRVERS, sizeof(bool), GFP_KERNEL);
+ unsigned i;
+
+ if (vers) {
+ /* All compiled versions are enabled by default */
+ for (i = 0; i < NFSD_NRVERS; i++)
+ vers[i] = nfsd_support_version(i);
+ }
+ return vers;
+}
+
+static bool *
+nfsd_alloc_minorversions(void)
+{
+ bool *vers = kmalloc_array(NFSD_SUPPORTED_MINOR_VERSION + 1,
+ sizeof(bool), GFP_KERNEL);
+ unsigned i;
+
+ if (vers) {
+ /* All minor versions are enabled by default */
+ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++)
+ vers[i] = nfsd_support_version(4);
+ }
+ return vers;
+}
+
+void
+nfsd_netns_free_versions(struct nfsd_net *nn)
+{
+ kfree(nn->nfsd_versions);
+ kfree(nn->nfsd4_minorversions);
+ nn->nfsd_versions = NULL;
+ nn->nfsd4_minorversions = NULL;
+}
+
+static void
+nfsd_netns_init_versions(struct nfsd_net *nn)
+{
+ if (!nn->nfsd_versions) {
+ nn->nfsd_versions = nfsd_alloc_versions();
+ nn->nfsd4_minorversions = nfsd_alloc_minorversions();
+ if (!nn->nfsd_versions || !nn->nfsd4_minorversions)
+ nfsd_netns_free_versions(nn);
+ }
+}
+
+int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change)
{
if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
return 0;
switch(change) {
case NFSD_SET:
- nfsd_versions[vers] = nfsd_version[vers];
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- if (vers < NFSD_ACL_NRVERS)
- nfsd_acl_versions[vers] = nfsd_acl_version[vers];
-#endif
+ if (nn->nfsd_versions)
+ nn->nfsd_versions[vers] = nfsd_support_version(vers);
break;
case NFSD_CLEAR:
- nfsd_versions[vers] = NULL;
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- if (vers < NFSD_ACL_NRVERS)
- nfsd_acl_versions[vers] = NULL;
-#endif
+ nfsd_netns_init_versions(nn);
+ if (nn->nfsd_versions)
+ nn->nfsd_versions[vers] = false;
break;
case NFSD_TEST:
- return nfsd_versions[vers] != NULL;
+ if (nn->nfsd_versions)
+ return nn->nfsd_versions[vers];
+ /* Fallthrough */
case NFSD_AVAIL:
- return nfsd_version[vers] != NULL;
+ return nfsd_support_version(vers);
}
return 0;
}
static void
-nfsd_adjust_nfsd_versions4(void)
+nfsd_adjust_nfsd_versions4(struct nfsd_net *nn)
{
unsigned i;
for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
- if (nfsd_supported_minorversions[i])
+ if (nn->nfsd4_minorversions[i])
return;
}
- nfsd_vers(4, NFSD_CLEAR);
+ nfsd_vers(nn, 4, NFSD_CLEAR);
}
-int nfsd_minorversion(u32 minorversion, enum vers_op change)
+int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change)
{
if (minorversion > NFSD_SUPPORTED_MINOR_VERSION &&
change != NFSD_AVAIL)
return -1;
+
switch(change) {
case NFSD_SET:
- nfsd_supported_minorversions[minorversion] = true;
- nfsd_vers(4, NFSD_SET);
+ if (nn->nfsd4_minorversions) {
+ nfsd_vers(nn, 4, NFSD_SET);
+ nn->nfsd4_minorversions[minorversion] =
+ nfsd_vers(nn, 4, NFSD_TEST);
+ }
break;
case NFSD_CLEAR:
- nfsd_supported_minorversions[minorversion] = false;
- nfsd_adjust_nfsd_versions4();
+ nfsd_netns_init_versions(nn);
+ if (nn->nfsd4_minorversions) {
+ nn->nfsd4_minorversions[minorversion] = false;
+ nfsd_adjust_nfsd_versions4(nn);
+ }
break;
case NFSD_TEST:
- return nfsd_supported_minorversions[minorversion];
+ if (nn->nfsd4_minorversions)
+ return nn->nfsd4_minorversions[minorversion];
+ return nfsd_vers(nn, 4, NFSD_TEST);
case NFSD_AVAIL:
- return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
+ return minorversion <= NFSD_SUPPORTED_MINOR_VERSION &&
+ nfsd_vers(nn, 4, NFSD_AVAIL);
}
return 0;
}
@@ -205,7 +284,7 @@
return rv;
}
-static int nfsd_init_socks(struct net *net)
+static int nfsd_init_socks(struct net *net, const struct cred *cred)
{
int error;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -214,12 +293,12 @@
return 0;
error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
- SVC_SOCK_DEFAULTS);
+ SVC_SOCK_DEFAULTS, cred);
if (error < 0)
return error;
error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
- SVC_SOCK_DEFAULTS);
+ SVC_SOCK_DEFAULTS, cred);
if (error < 0)
return error;
@@ -235,22 +314,17 @@
if (nfsd_users++)
return 0;
- /*
- * Readahead param cache - will no-op if it already exists.
- * (Note therefore results will be suboptimal if number of
- * threads is modified after nfsd start.)
- */
- ret = nfsd_racache_init(2*nrservs);
+ ret = nfsd_file_cache_init();
if (ret)
goto dec_users;
ret = nfs4_state_start();
if (ret)
- goto out_racache;
+ goto out_file_cache;
return 0;
-out_racache:
- nfsd_racache_shutdown();
+out_file_cache:
+ nfsd_file_cache_shutdown();
dec_users:
nfsd_users--;
return ret;
@@ -262,19 +336,44 @@
return;
nfs4_state_shutdown();
- nfsd_racache_shutdown();
+ nfsd_file_cache_shutdown();
}
-static bool nfsd_needs_lockd(void)
+static bool nfsd_needs_lockd(struct nfsd_net *nn)
{
-#if defined(CONFIG_NFSD_V3)
- return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL);
-#else
- return (nfsd_versions[2] != NULL);
-#endif
+ return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
}
-static int nfsd_startup_net(int nrservs, struct net *net)
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+{
+ int seq = 0;
+
+ do {
+ read_seqbegin_or_lock(&nn->boot_lock, &seq);
+ /*
+ * This is opaque to client, so no need to byte-swap. Use
+ * __force to keep sparse happy. y2038 time_t overflow is
+ * irrelevant in this usage
+ */
+ verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
+ verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
+ } while (need_seqretry(&nn->boot_lock, seq));
+ done_seqretry(&nn->boot_lock, seq);
+}
+
+static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+{
+ ktime_get_real_ts64(&nn->nfssvc_boot);
+}
+
+void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+{
+ write_seqlock(&nn->boot_lock);
+ nfsd_reset_boot_verifier_locked(nn);
+ write_sequnlock(&nn->boot_lock);
+}
+
+static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int ret;
@@ -285,12 +384,12 @@
ret = nfsd_startup_generic(nrservs);
if (ret)
return ret;
- ret = nfsd_init_socks(net);
+ ret = nfsd_init_socks(net, cred);
if (ret)
goto out_socks;
- if (nfsd_needs_lockd() && !nn->lockd_up) {
- ret = lockd_up(net);
+ if (nfsd_needs_lockd(nn) && !nn->lockd_up) {
+ ret = lockd_up(net, cred);
if (ret)
goto out_socks;
nn->lockd_up = 1;
@@ -317,6 +416,7 @@
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ nfsd_file_cache_purge(net);
nfs4_state_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
@@ -422,20 +522,20 @@
nfsd_export_flush(net);
}
-void nfsd_reset_versions(void)
+void nfsd_reset_versions(struct nfsd_net *nn)
{
int i;
for (i = 0; i < NFSD_NRVERS; i++)
- if (nfsd_vers(i, NFSD_TEST))
+ if (nfsd_vers(nn, i, NFSD_TEST))
return;
for (i = 0; i < NFSD_NRVERS; i++)
if (i != 4)
- nfsd_vers(i, NFSD_SET);
+ nfsd_vers(nn, i, NFSD_SET);
else {
int minor = 0;
- while (nfsd_minorversion(minor, NFSD_SET) >= 0)
+ while (nfsd_minorversion(nn, minor, NFSD_SET) >= 0)
minor++;
}
}
@@ -503,7 +603,7 @@
}
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
- nfsd_reset_versions();
+ nfsd_reset_versions(nn);
nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
&nfsd_thread_sv_ops);
if (nn->nfsd_serv == NULL)
@@ -525,7 +625,7 @@
#endif
}
atomic_inc(&nn->ntf_refcnt);
- ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
+ nfsd_reset_boot_verifier(nn);
return 0;
}
@@ -623,7 +723,7 @@
* this is the first time nrservs is nonzero.
*/
int
-nfsd_svc(int nrservs, struct net *net)
+nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
{
int error;
bool nfsd_up_before;
@@ -645,7 +745,7 @@
nfsd_up_before = nn->nfsd_net_up;
- error = nfsd_startup_net(nrservs, net);
+ error = nfsd_startup_net(nrservs, net, cred);
if (error)
goto out_destroy;
error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
@@ -667,6 +767,101 @@
return error;
}
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static bool
+nfsd_support_acl_version(int vers)
+{
+ if (vers >= NFSD_ACL_MINVERS && vers < NFSD_ACL_NRVERS)
+ return nfsd_acl_version[vers] != NULL;
+ return false;
+}
+
+static int
+nfsd_acl_rpcbind_set(struct net *net, const struct svc_program *progp,
+ u32 version, int family, unsigned short proto,
+ unsigned short port)
+{
+ if (!nfsd_support_acl_version(version) ||
+ !nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST))
+ return 0;
+ return svc_generic_rpcbind_set(net, progp, version, family,
+ proto, port);
+}
+
+static __be32
+nfsd_acl_init_request(struct svc_rqst *rqstp,
+ const struct svc_program *progp,
+ struct svc_process_info *ret)
+{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ int i;
+
+ if (likely(nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST)))
+ return svc_generic_init_request(rqstp, progp, ret);
+
+ ret->mismatch.lovers = NFSD_ACL_NRVERS;
+ for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) {
+ if (nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.lovers = i;
+ break;
+ }
+ }
+ if (ret->mismatch.lovers == NFSD_ACL_NRVERS)
+ return rpc_prog_unavail;
+ ret->mismatch.hivers = NFSD_ACL_MINVERS;
+ for (i = NFSD_ACL_NRVERS - 1; i >= NFSD_ACL_MINVERS; i--) {
+ if (nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.hivers = i;
+ break;
+ }
+ }
+ return rpc_prog_mismatch;
+}
+#endif
+
+static int
+nfsd_rpcbind_set(struct net *net, const struct svc_program *progp,
+ u32 version, int family, unsigned short proto,
+ unsigned short port)
+{
+ if (!nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST))
+ return 0;
+ return svc_generic_rpcbind_set(net, progp, version, family,
+ proto, port);
+}
+
+static __be32
+nfsd_init_request(struct svc_rqst *rqstp,
+ const struct svc_program *progp,
+ struct svc_process_info *ret)
+{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ int i;
+
+ if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST)))
+ return svc_generic_init_request(rqstp, progp, ret);
+
+ ret->mismatch.lovers = NFSD_NRVERS;
+ for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
+ if (nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.lovers = i;
+ break;
+ }
+ }
+ if (ret->mismatch.lovers == NFSD_NRVERS)
+ return rpc_prog_unavail;
+ ret->mismatch.hivers = NFSD_MINVERS;
+ for (i = NFSD_NRVERS - 1; i >= NFSD_MINVERS; i--) {
+ if (nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.hivers = i;
+ break;
+ }
+ }
+ return rpc_prog_mismatch;
+}
/*
* This is the NFS server kernel thread
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6b2e8b7..b51fe51 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -71,7 +71,7 @@
}
static __be32 *
-decode_sattr(__be32 *p, struct iattr *iap)
+decode_sattr(__be32 *p, struct iattr *iap, struct user_namespace *userns)
{
u32 tmp, tmp1;
@@ -86,12 +86,12 @@
iap->ia_mode = tmp;
}
if ((tmp = ntohl(*p++)) != (u32)-1) {
- iap->ia_uid = make_kuid(&init_user_ns, tmp);
+ iap->ia_uid = make_kuid(userns, tmp);
if (uid_valid(iap->ia_uid))
iap->ia_valid |= ATTR_UID;
}
if ((tmp = ntohl(*p++)) != (u32)-1) {
- iap->ia_gid = make_kgid(&init_user_ns, tmp);
+ iap->ia_gid = make_kgid(userns, tmp);
if (gid_valid(iap->ia_gid))
iap->ia_valid |= ATTR_GID;
}
@@ -129,6 +129,7 @@
encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct kstat *stat)
{
+ struct user_namespace *userns = nfsd_user_namespace(rqstp);
struct dentry *dentry = fhp->fh_dentry;
int type;
struct timespec64 time;
@@ -139,8 +140,8 @@
*p++ = htonl(nfs_ftypes[type >> 12]);
*p++ = htonl((u32) stat->mode);
*p++ = htonl((u32) stat->nlink);
- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
+ *p++ = htonl((u32) from_kuid_munged(userns, stat->uid));
+ *p++ = htonl((u32) from_kgid_munged(userns, stat->gid));
if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
*p++ = htonl(NFS_MAXPATHLEN);
@@ -216,7 +217,7 @@
p = decode_fh(p, &args->fh);
if (!p)
return 0;
- p = decode_sattr(p, &args->attrs);
+ p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
return xdr_argsize_check(rqstp, p);
}
@@ -319,7 +320,7 @@
if ( !(p = decode_fh(p, &args->fh))
|| !(p = decode_filename(p, &args->name, &args->len)))
return 0;
- p = decode_sattr(p, &args->attrs);
+ p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
return xdr_argsize_check(rqstp, p);
}
@@ -398,7 +399,7 @@
return 0;
p += xdrlen;
}
- decode_sattr(p, &args->attrs);
+ decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
return 1;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 0b15dac..46f56af 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -39,6 +39,7 @@
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h"
+#include "nfsd.h"
typedef struct {
u32 cl_boot;
@@ -70,6 +71,7 @@
int cb_seq_status;
int cb_status;
bool cb_need_restart;
+ bool cb_holds_slot;
};
struct nfsd4_callback_ops {
@@ -315,6 +317,10 @@
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
+ /* NFSv4.1 client implementation id: */
+ struct xdr_netobj cl_nii_domain;
+ struct xdr_netobj cl_nii_name;
+ struct timespec cl_nii_time;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
@@ -327,7 +333,7 @@
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
unsigned long cl_flags;
- struct rpc_cred *cl_cb_cred;
+ const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
u32 cl_cb_ident;
#define NFSD4_CB_UP 0
@@ -346,15 +352,21 @@
struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */
- atomic_t cl_refcount;
+ atomic_t cl_rpc_users;
+ struct nfsdfs_client cl_nfsdfs;
struct nfs4_op_map cl_spo_must_allow;
+ /* debugging info directory under nfsd/clients/ : */
+ struct dentry *cl_nfsd_dentry;
+
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
unsigned long cl_cb_slot_busy;
struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
/* wait here for slots */
struct net *net;
+ struct list_head async_copies; /* list of async copies */
+ spinlock_t async_lock; /* lock for async copies */
};
/* struct nfs4_client_reset
@@ -365,7 +377,8 @@
struct nfs4_client_reclaim {
struct list_head cr_strhash; /* hash by cr_name */
struct nfs4_client *cr_clp; /* pointer to associated clp */
- char cr_recdir[HEXDIR_LEN]; /* recover dir */
+ struct xdr_netobj cr_name; /* recovery dir name */
+ struct xdr_netobj cr_princhash;
};
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
@@ -494,7 +507,7 @@
};
struct list_head fi_clnt_odstate;
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
- struct file * fi_fds[3];
+ struct nfsd_file *fi_fds[3];
/*
* Each open or lock stateid contributes 0-4 to the counts
* below depending on which bits are set in st_access_bitmap:
@@ -504,7 +517,7 @@
*/
atomic_t fi_access[2];
u32 fi_share_deny;
- struct file *fi_deleg_file;
+ struct nfsd_file *fi_deleg_file;
int fi_delegees;
struct knfsd_fh fi_fhandle;
bool fi_had_conflict;
@@ -553,7 +566,7 @@
spinlock_t ls_lock;
struct list_head ls_layouts;
u32 ls_layout_type;
- struct file *ls_file;
+ struct nfsd_file *ls_file;
struct nfsd4_callback ls_recall;
stateid_t ls_recall_sid;
bool ls_recalled;
@@ -573,6 +586,7 @@
NFSPROC4_CLNT_CB_NULL = 0,
NFSPROC4_CLNT_CB_RECALL,
NFSPROC4_CLNT_CB_LAYOUT,
+ NFSPROC4_CLNT_CB_OFFLOAD,
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
};
@@ -599,21 +613,24 @@
struct nfsd4_compound_state;
struct nfsd_net;
+struct nfsd4_copy;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
- stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
+ stateid_t *stateid, int flags, struct nfsd_file **filp);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
+int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
+void nfs4_free_cp_state(struct nfsd4_copy *copy);
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
extern void nfs4_release_reclaim(struct nfsd_net *);
-extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
+extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct xdr_netobj name,
struct nfsd_net *nn);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
@@ -626,18 +643,22 @@
extern int nfsd4_create_callback_queue(void);
extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *);
+extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
-extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
- struct nfsd_net *nn);
-extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
+extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
+ struct xdr_netobj princhash, struct nfsd_net *nn);
+extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
struct nfs4_file *find_file(struct knfsd_fh *fh);
void put_nfs4_file(struct nfs4_file *fi);
+extern void nfs4_put_copy(struct nfsd4_copy *copy);
+extern struct nfsd4_copy *
+find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
static inline void get_nfs4_file(struct nfs4_file *fi)
{
refcount_inc(&fi->fi_ref);
}
-struct file *find_any_file(struct nfs4_file *f);
+struct nfsd_file *find_any_file(struct nfs4_file *f);
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
@@ -652,7 +673,7 @@
/* nfs fault injection functions */
#ifdef CONFIG_NFSD_FAULT_INJECTION
-int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_init(void);
void nfsd_fault_inject_cleanup(void);
u64 nfsd_inject_print_clients(void);
@@ -673,7 +694,7 @@
u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t);
u64 nfsd_inject_recall_delegations(u64);
#else /* CONFIG_NFSD_FAULT_INJECTION */
-static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_init(void) {}
static inline void nfsd_fault_inject_cleanup(void) {}
#endif /* CONFIG_NFSD_FAULT_INJECTION */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 80933e4..ffc78a0 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -126,6 +126,8 @@
DEFINE_NFSD_ERR_EVENT(write_err);
#include "state.h"
+#include "filecache.h"
+#include "vfs.h"
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
@@ -164,6 +166,144 @@
DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release);
+#define show_nf_flags(val) \
+ __print_flags(val, "|", \
+ { 1 << NFSD_FILE_HASHED, "HASHED" }, \
+ { 1 << NFSD_FILE_PENDING, "PENDING" }, \
+ { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
+ { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
+ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
+
+/* FIXME: This should probably be fleshed out in the future. */
+#define show_nf_may(val) \
+ __print_flags(val, "|", \
+ { NFSD_MAY_READ, "READ" }, \
+ { NFSD_MAY_WRITE, "WRITE" }, \
+ { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" })
+
+DECLARE_EVENT_CLASS(nfsd_file_class,
+ TP_PROTO(struct nfsd_file *nf),
+ TP_ARGS(nf),
+ TP_STRUCT__entry(
+ __field(unsigned int, nf_hashval)
+ __field(void *, nf_inode)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned char, nf_may)
+ __field(struct file *, nf_file)
+ ),
+ TP_fast_assign(
+ __entry->nf_hashval = nf->nf_hashval;
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_ref = atomic_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+ TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
+ __entry->nf_hashval,
+ __entry->nf_inode,
+ __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nf_may(__entry->nf_may),
+ __entry->nf_file)
+)
+
+#define DEFINE_NFSD_FILE_EVENT(name) \
+DEFINE_EVENT(nfsd_file_class, name, \
+ TP_PROTO(struct nfsd_file *nf), \
+ TP_ARGS(nf))
+
+DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
+
+TRACE_EVENT(nfsd_file_acquire,
+ TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
+ struct inode *inode, unsigned int may_flags,
+ struct nfsd_file *nf, __be32 status),
+
+ TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
+
+ TP_STRUCT__entry(
+ __field(__be32, xid)
+ __field(unsigned int, hash)
+ __field(void *, inode)
+ __field(unsigned int, may_flags)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned char, nf_may)
+ __field(struct file *, nf_file)
+ __field(__be32, status)
+ ),
+
+ TP_fast_assign(
+ __entry->xid = rqstp->rq_xid;
+ __entry->hash = hash;
+ __entry->inode = inode;
+ __entry->may_flags = may_flags;
+ __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
+ __entry->nf_flags = nf ? nf->nf_flags : 0;
+ __entry->nf_may = nf ? nf->nf_may : 0;
+ __entry->nf_file = nf ? nf->nf_file : NULL;
+ __entry->status = status;
+ ),
+
+ TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
+ be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
+ show_nf_may(__entry->may_flags), __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nf_may(__entry->nf_may), __entry->nf_file,
+ be32_to_cpu(__entry->status))
+);
+
+DECLARE_EVENT_CLASS(nfsd_file_search_class,
+ TP_PROTO(struct inode *inode, unsigned int hash, int found),
+ TP_ARGS(inode, hash, found),
+ TP_STRUCT__entry(
+ __field(struct inode *, inode)
+ __field(unsigned int, hash)
+ __field(int, found)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->hash = hash;
+ __entry->found = found;
+ ),
+ TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
+ __entry->inode, __entry->found)
+);
+
+#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
+DEFINE_EVENT(nfsd_file_search_class, name, \
+ TP_PROTO(struct inode *inode, unsigned int hash, int found), \
+ TP_ARGS(inode, hash, found))
+
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
+
+TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+ TP_PROTO(struct inode *inode, u32 mask),
+ TP_ARGS(inode, mask),
+ TP_STRUCT__entry(
+ __field(struct inode *, inode)
+ __field(unsigned int, nlink)
+ __field(umode_t, mode)
+ __field(u32, mask)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->nlink = inode->i_nlink;
+ __entry->mode = inode->i_mode;
+ __entry->mask = mask;
+ ),
+ TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+ __entry->nlink, __entry->mode, __entry->mask)
+);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b53e763..bd0a385 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -44,38 +44,11 @@
#include "nfsd.h"
#include "vfs.h"
+#include "filecache.h"
#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
-
-/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
- * If you increase the number of cached files very much, you'll need to
- * add a hash table here.
- */
-struct raparms {
- struct raparms *p_next;
- unsigned int p_count;
- ino_t p_ino;
- dev_t p_dev;
- int p_set;
- struct file_ra_state p_ra;
- unsigned int p_hindex;
-};
-
-struct raparm_hbucket {
- struct raparms *pb_head;
- spinlock_t pb_lock;
-} ____cacheline_aligned_in_smp;
-
-#define RAPARM_HASH_BITS 4
-#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
-
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
* a mount point.
@@ -396,10 +369,23 @@
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
- if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ if (iap->ia_valid & ATTR_SIZE) {
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
- if (iap->ia_valid & ATTR_SIZE)
ftype = S_IFREG;
+ }
+
+ /*
+ * If utimes(2) and friends are called with times not NULL, we should
+ * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
+ * will return EACCES, when the caller's effective UID does not match
+ * the owner of the file, and the caller is not privileged. In this
+ * situation, we should return EPERM(notify_change will return this).
+ */
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) {
+ accmode |= NFSD_MAY_OWNER_OVERRIDE;
+ if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)))
+ accmode |= NFSD_MAY_WRITE;
+ }
/* Callers that do fh_verify should do the fh_want_write: */
get_write_count = !fhp->fh_dentry;
@@ -541,8 +527,14 @@
__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
- return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
- count));
+ loff_t cloned;
+
+ cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
+ if (cloned < 0)
+ return nfserrno(cloned);
+ if (count && cloned != count)
+ return nfserrno(-EINVAL);
+ return 0;
}
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
@@ -680,7 +672,7 @@
}
#endif /* CONFIG_NFSD_V3 */
-static int nfsd_open_break_lease(struct inode *inode, int access)
+int nfsd_open_break_lease(struct inode *inode, int access)
{
unsigned int mode;
@@ -696,8 +688,8 @@
* and additional flags.
* N.B. After this call fhp needs an fh_put
*/
-__be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+static __be32
+__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{
struct path path;
@@ -707,25 +699,6 @@
__be32 err;
int host_err = 0;
- validate_process_creds();
-
- /*
- * If we get here, then the client has already done an "open",
- * and (hopefully) checked permission - so allow OWNER_OVERRIDE
- * in case a chmod has now revoked permission.
- *
- * Arguably we should also allow the owner override for
- * directories, but we never have and it doesn't seem to have
- * caused anyone a problem. If we were to change this, note
- * also that our filldir callbacks would need a variant of
- * lookup_one_len that doesn't check permissions.
- */
- if (type == S_IFREG)
- may_flags |= NFSD_MAY_OWNER_OVERRIDE;
- err = fh_verify(rqstp, fhp, type, may_flags);
- if (err)
- goto out;
-
path.mnt = fhp->fh_export->ex_path.mnt;
path.dentry = fhp->fh_dentry;
inode = d_inode(path.dentry);
@@ -779,67 +752,46 @@
out_nfserr:
err = nfserrno(host_err);
out:
+ return err;
+}
+
+__be32
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
+{
+ __be32 err;
+
+ validate_process_creds();
+ /*
+ * If we get here, then the client has already done an "open",
+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+ * in case a chmod has now revoked permission.
+ *
+ * Arguably we should also allow the owner override for
+ * directories, but we never have and it doesn't seem to have
+ * caused anyone a problem. If we were to change this, note
+ * also that our filldir callbacks would need a variant of
+ * lookup_one_len that doesn't check permissions.
+ */
+ if (type == S_IFREG)
+ may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+ err = fh_verify(rqstp, fhp, type, may_flags);
+ if (!err)
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
validate_process_creds();
return err;
}
-struct raparms *
-nfsd_init_raparms(struct file *file)
+__be32
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
{
- struct inode *inode = file_inode(file);
- dev_t dev = inode->i_sb->s_dev;
- ino_t ino = inode->i_ino;
- struct raparms *ra, **rap, **frap = NULL;
- int depth = 0;
- unsigned int hash;
- struct raparm_hbucket *rab;
+ __be32 err;
- hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
- rab = &raparm_hash[hash];
-
- spin_lock(&rab->pb_lock);
- for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
- if (ra->p_ino == ino && ra->p_dev == dev)
- goto found;
- depth++;
- if (ra->p_count == 0)
- frap = rap;
- }
- depth = nfsdstats.ra_size;
- if (!frap) {
- spin_unlock(&rab->pb_lock);
- return NULL;
- }
- rap = frap;
- ra = *frap;
- ra->p_dev = dev;
- ra->p_ino = ino;
- ra->p_set = 0;
- ra->p_hindex = hash;
-found:
- if (rap != &rab->pb_head) {
- *rap = ra->p_next;
- ra->p_next = rab->pb_head;
- rab->pb_head = ra;
- }
- ra->p_count++;
- nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&rab->pb_lock);
-
- if (ra->p_set)
- file->f_ra = ra->p_ra;
- return ra;
-}
-
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
-{
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
-
- spin_lock(&rab->pb_lock);
- ra->p_ra = file->f_ra;
- ra->p_set = 1;
- ra->p_count--;
- spin_unlock(&rab->pb_lock);
+ validate_process_creds();
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ validate_process_creds();
+ return err;
}
/*
@@ -882,12 +834,23 @@
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
+ size_t expected)
+{
+ if (expected != 0 && len == 0)
+ return 1;
+ if (offset+len >= i_size_read(file_inode(file)))
+ return 1;
+ return 0;
+}
+
static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- unsigned long *count, int host_err)
+ unsigned long *count, u32 *eof, ssize_t host_err)
{
if (host_err >= 0) {
nfsdstats.io_read += host_err;
+ *eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err;
fsnotify_access(file);
trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@@ -899,7 +862,8 @@
}
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset, unsigned long *count)
+ struct file *file, loff_t offset, unsigned long *count,
+ u32 *eof)
{
struct splice_desc sd = {
.len = 0,
@@ -907,25 +871,27 @@
.pos = offset,
.u.data = rqstp,
};
- int host_err;
+ ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *count)
+ struct kvec *vec, int vlen, unsigned long *count,
+ u32 *eof)
{
struct iov_iter iter;
- int host_err;
+ loff_t ppos = offset;
+ ssize_t host_err;
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
- host_err = vfs_iter_read(file, &iter, &offset, 0);
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+ iov_iter_kvec(&iter, READ, vec, vlen, *count);
+ host_err = vfs_iter_read(file, &iter, &ppos, 0);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
/*
@@ -999,15 +965,19 @@
if (stable && !use_wgather)
flags |= RWF_SYNC;
- iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0)
goto out_nfserr;
nfsdstats.io_write += *cnt;
fsnotify_modify(file);
- if (stable && use_wgather)
+ if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
+ if (host_err < 0)
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ }
out_nfserr:
if (host_err >= 0) {
@@ -1028,27 +998,25 @@
* N.B. After this call fhp needs an fh_put
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
+ u32 *eof)
{
+ struct nfsd_file *nf;
struct file *file;
- struct raparms *ra;
__be32 err;
trace_nfsd_read_start(rqstp, fhp, offset, *count);
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
if (err)
return err;
- ra = nfsd_init_raparms(file);
-
+ file = nf->nf_file;
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
- err = nfsd_splice_read(rqstp, fhp, file, offset, count);
+ err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else
- err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
+ err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
- if (ra)
- nfsd_put_raparams(file, ra);
- fput(file);
+ nfsd_file_put(nf);
trace_nfsd_read_done(rqstp, fhp, offset, *count);
@@ -1064,17 +1032,18 @@
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt, int stable)
{
- struct file *file = NULL;
- __be32 err = 0;
+ struct nfsd_file *nf;
+ __be32 err;
trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
if (err)
goto out;
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
- fput(file);
+ err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
+ vlen, cnt, stable);
+ nfsd_file_put(nf);
out:
trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
return err;
@@ -1094,9 +1063,9 @@
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long count)
{
- struct file *file;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
+ struct nfsd_file *nf;
+ loff_t end = LLONG_MAX;
+ __be32 err = nfserr_inval;
if (offset < 0)
goto out;
@@ -1106,20 +1075,27 @@
goto out;
}
- err = nfsd_open(rqstp, fhp, S_IFREG,
- NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
+ err = nfsd_file_acquire(rqstp, fhp,
+ NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = vfs_fsync_range(file, offset, end, 0);
+ int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
- if (err2 != -EINVAL)
- err = nfserrno(err2);
- else
+ switch (err2) {
+ case 0:
+ break;
+ case -EINVAL:
err = nfserr_notsupp;
+ break;
+ default:
+ err = nfserrno(err2);
+ nfsd_reset_boot_verifier(net_generic(nf->nf_net,
+ nfsd_net_id));
+ }
}
- fput(file);
+ nfsd_file_put(nf);
out:
return err;
}
@@ -1276,7 +1252,6 @@
int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild = NULL;
- struct inode *dirp;
__be32 err;
int host_err;
@@ -1288,7 +1263,6 @@
return err;
dentry = fhp->fh_dentry;
- dirp = d_inode(dentry);
host_err = fh_want_write(fhp);
if (host_err)
@@ -1409,6 +1383,7 @@
*created = 1;
break;
}
+ /* fall through */
case NFS4_CREATE_EXCLUSIVE4_1:
if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
&& d_inode(dchild)->i_atime.tv_sec == v_atime
@@ -1417,7 +1392,7 @@
*created = 1;
goto set_attr;
}
- /* fallthru */
+ /* fall through */
case NFS3_CREATE_GUARDED:
err = nfserr_exist;
}
@@ -1641,6 +1616,26 @@
goto out_unlock;
}
+static void
+nfsd_close_cached_files(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+
+ if (inode && S_ISREG(inode->i_mode))
+ nfsd_file_close_inode_sync(inode);
+}
+
+static bool
+nfsd_has_cached_files(struct dentry *dentry)
+{
+ bool ret = false;
+ struct inode *inode = d_inode(dentry);
+
+ if (inode && S_ISREG(inode->i_mode))
+ ret = nfsd_file_is_cached(inode);
+ return ret;
+}
+
/*
* Rename a file
* N.B. After this call _both_ ffhp and tfhp need an fh_put
@@ -1653,6 +1648,7 @@
struct inode *fdir, *tdir;
__be32 err;
int host_err;
+ bool has_cached = false;
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
@@ -1671,6 +1667,7 @@
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
+retry:
host_err = fh_want_write(ffhp);
if (host_err) {
err = nfserrno(host_err);
@@ -1710,11 +1707,16 @@
if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
goto out_dput_new;
- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
- if (!host_err) {
- host_err = commit_metadata(tfhp);
- if (!host_err)
- host_err = commit_metadata(ffhp);
+ if (nfsd_has_cached_files(ndentry)) {
+ has_cached = true;
+ goto out_dput_old;
+ } else {
+ host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
+ if (!host_err) {
+ host_err = commit_metadata(tfhp);
+ if (!host_err)
+ host_err = commit_metadata(ffhp);
+ }
}
out_dput_new:
dput(ndentry);
@@ -1727,12 +1729,26 @@
* as that would do the wrong thing if the two directories
* were the same, so again we do it by hand.
*/
- fill_post_wcc(ffhp);
- fill_post_wcc(tfhp);
+ if (!has_cached) {
+ fill_post_wcc(ffhp);
+ fill_post_wcc(tfhp);
+ }
unlock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = false;
fh_drop_write(ffhp);
+ /*
+ * If the target dentry has cached open files, then we need to try to
+ * close them prior to doing the rename. Flushing delayed fput
+ * shouldn't be done with locks held however, so we delay it until this
+ * point and then reattempt the whole shebang.
+ */
+ if (has_cached) {
+ has_cached = false;
+ nfsd_close_cached_files(ndentry);
+ dput(ndentry);
+ goto retry;
+ }
out:
return err;
}
@@ -1768,25 +1784,30 @@
rdentry = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
- goto out_nfserr;
+ goto out_drop_write;
if (d_really_is_negative(rdentry)) {
dput(rdentry);
- err = nfserr_noent;
- goto out;
+ host_err = -ENOENT;
+ goto out_drop_write;
}
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
- if (type != S_IFDIR)
+ if (type != S_IFDIR) {
+ nfsd_close_cached_files(rdentry);
host_err = vfs_unlink(dirp, rdentry, NULL);
- else
+ } else {
host_err = vfs_rmdir(dirp, rdentry);
+ }
+
if (!host_err)
host_err = commit_metadata(fhp);
dput(rdentry);
+out_drop_write:
+ fh_drop_write(fhp);
out_nfserr:
err = nfserrno(host_err);
out:
@@ -2054,63 +2075,3 @@
return err? nfserrno(err) : 0;
}
-
-void
-nfsd_racache_shutdown(void)
-{
- struct raparms *raparm, *last_raparm;
- unsigned int i;
-
- dprintk("nfsd: freeing readahead buffers.\n");
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- raparm = raparm_hash[i].pb_head;
- while(raparm) {
- last_raparm = raparm;
- raparm = raparm->p_next;
- kfree(last_raparm);
- }
- raparm_hash[i].pb_head = NULL;
- }
-}
-/*
- * Initialize readahead param cache
- */
-int
-nfsd_racache_init(int cache_size)
-{
- int i;
- int j = 0;
- int nperbucket;
- struct raparms **raparm = NULL;
-
-
- if (raparm_hash[0].pb_head)
- return 0;
- nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
- nperbucket = max(2, nperbucket);
- cache_size = nperbucket * RAPARM_HASH_SIZE;
-
- dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- spin_lock_init(&raparm_hash[i].pb_lock);
-
- raparm = &raparm_hash[i].pb_head;
- for (j = 0; j < nperbucket; j++) {
- *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
- if (!*raparm)
- goto out_nomem;
- raparm = &(*raparm)->p_next;
- }
- *raparm = NULL;
- }
-
- nfsdstats.ra_size = cache_size;
- return 0;
-
-out_nomem:
- dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
- nfsd_racache_shutdown();
- return -ENOMEM;
-}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index a7e1073..a13fd9d 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -40,8 +40,6 @@
typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
/* nfsd/vfs.c */
-int nfsd_racache_init(int);
-void nfsd_racache_shutdown(void);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -75,18 +73,23 @@
__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
loff_t, unsigned long);
#endif /* CONFIG_NFSD_V3 */
+int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-struct raparms;
+__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
+ int, struct file **);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- unsigned long *count);
+ unsigned long *count,
+ u32 *eof);
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen,
- unsigned long *count);
+ unsigned long *count,
+ u32 *eof);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
- loff_t, struct kvec *, int, unsigned long *);
+ loff_t, struct kvec *, int, unsigned long *,
+ u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -115,13 +118,13 @@
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
-struct raparms *nfsd_init_raparms(struct file *file);
-void nfsd_put_raparams(struct file *file, struct raparms *ra);
-
static inline int fh_want_write(struct svc_fh *fh)
{
- int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
+ int ret;
+ if (fh->fh_want_write)
+ return 0;
+ ret = mnt_want_write(fh->fh_export->ex_path.mnt);
if (!ret)
fh->fh_want_write = true;
return ret;
@@ -149,23 +152,4 @@
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
-static inline bool nfsd_eof_on_read(long requested, long read,
- loff_t offset, loff_t size)
-{
- /* We assume a short read means eof: */
- if (requested > read)
- return true;
- /*
- * A non-short read might also reach end of file. The spec
- * still requires us to set eof in that case.
- *
- * Further operations may have modified the file size since
- * the read, so the following check is not atomic with the read.
- * We've only seen that cause a problem for a client in the case
- * where the read returned a count of 0 without setting eof.
- * That case was fixed by the addition of the above check.
- */
- return (offset + read >= size);
-}
-
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 2cb29e9..99ff9f4 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -151,7 +151,7 @@
__be32 status;
struct svc_fh fh;
unsigned long count;
- int eof;
+ __u32 eof;
};
struct nfsd3_writeres {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 17c453a..f4737d6 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -273,15 +273,14 @@
struct nfsd4_read {
- stateid_t rd_stateid; /* request */
- u64 rd_offset; /* request */
- u32 rd_length; /* request */
- int rd_vlen;
- struct file *rd_filp;
- bool rd_tmp_file;
+ stateid_t rd_stateid; /* request */
+ u64 rd_offset; /* request */
+ u32 rd_length; /* request */
+ int rd_vlen;
+ struct nfsd_file *rd_nf;
- struct svc_rqst *rd_rqstp; /* response */
- struct svc_fh * rd_fhp; /* response */
+ struct svc_rqst *rd_rqstp; /* response */
+ struct svc_fh *rd_fhp; /* response */
};
struct nfsd4_readdir {
@@ -410,6 +409,9 @@
int spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
+ struct xdr_netobj nii_domain;
+ struct xdr_netobj nii_name;
+ struct timespec64 nii_time;
};
struct nfsd4_sequence {
@@ -472,7 +474,7 @@
u32 lc_reclaim; /* request */
u32 lc_newoffset; /* request */
u64 lc_last_wr; /* request */
- struct timespec lc_mtime; /* request */
+ struct timespec64 lc_mtime; /* request */
u32 lc_layout_type; /* request */
u32 lc_up_len; /* layout length */
void *lc_up_layout; /* decoded by callback */
@@ -511,6 +513,7 @@
u64 wr_bytes_written;
u32 wr_stable_how;
nfs4_verifier wr_verifier;
+ stateid_t cb_stateid;
};
struct nfsd4_copy {
@@ -526,6 +529,23 @@
/* response */
struct nfsd42_write_res cp_res;
+
+ /* for cb_offload */
+ struct nfsd4_callback cp_cb;
+ __be32 nfserr;
+ struct knfsd_fh fh;
+
+ struct nfs4_client *cp_clp;
+
+ struct nfsd_file *nf_src;
+ struct nfsd_file *nf_dst;
+
+ stateid_t cp_stateid;
+
+ struct list_head copies;
+ struct task_struct *copy_task;
+ refcount_t refcount;
+ bool stopped;
};
struct nfsd4_seek {
@@ -539,6 +559,15 @@
loff_t seek_pos;
};
+struct nfsd4_offload_status {
+ /* request */
+ stateid_t stateid;
+
+ /* response */
+ u64 count;
+ u32 status;
+};
+
struct nfsd4_op {
int opnum;
const struct nfsd4_operation * opdesc;
@@ -597,6 +626,7 @@
struct nfsd4_fallocate deallocate;
struct nfsd4_clone clone;
struct nfsd4_copy copy;
+ struct nfsd4_offload_status offload_status;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 517239a..547cf07 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -38,3 +38,13 @@
#define NFS4_dec_cb_notify_lock_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+#define enc_cb_offload_info_sz (1 + 1 + 2 + 1 + \
+ XDR_QUADLEN(NFS4_VERIFIER_SIZE))
+#define NFS4_enc_cb_offload_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ enc_nfs4_fh_sz + \
+ enc_stateid_sz + \
+ enc_cb_offload_info_sz)
+#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)