Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/nfs/flexfilelayout/Makefile b/fs/nfs/flexfilelayout/Makefile
index 1d2c9f6..49f0342 100644
--- a/fs/nfs/flexfilelayout/Makefile
+++ b/fs/nfs/flexfilelayout/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the pNFS Flexfile Layout Driver kernel module
#
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index e0fe9a0..5657b7f 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Module for pnfs flexfile layout driver.
*
@@ -7,8 +8,10 @@
*/
#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -27,8 +30,7 @@
#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
#define FF_LAYOUTRETURN_MAXERR 20
-
-static struct group_info *ff_zero_group;
+static unsigned short io_maxretrans;
static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr);
@@ -226,16 +228,14 @@
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- struct rpc_cred *cred;
+ const struct cred *cred;
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
cred = rcu_access_pointer(mirror->ro_cred);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
cred = rcu_access_pointer(mirror->rw_cred);
- if (cred)
- put_rpccred(cred);
+ put_cred(cred);
nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
kfree(mirror);
}
@@ -413,8 +413,10 @@
for (i = 0; i < fls->mirror_array_cnt; i++) {
struct nfs4_ff_layout_mirror *mirror;
- struct auth_cred acred = { .group_info = ff_zero_group };
- struct rpc_cred __rcu *cred;
+ struct cred *kcred;
+ const struct cred __rcu *cred;
+ kuid_t uid;
+ kgid_t gid;
u32 ds_count, fh_count, id;
int j;
@@ -482,21 +484,28 @@
if (rc)
goto out_err_free;
- acred.uid = make_kuid(&init_user_ns, id);
+ uid = make_kuid(&init_user_ns, id);
/* group */
rc = decode_name(&stream, &id);
if (rc)
goto out_err_free;
- acred.gid = make_kgid(&init_user_ns, id);
+ gid = make_kgid(&init_user_ns, id);
- /* find the cred for it */
- rcu_assign_pointer(cred, rpc_lookup_generic_cred(&acred, 0, gfp_flags));
- if (IS_ERR(cred)) {
- rc = PTR_ERR(cred);
- goto out_err_free;
+ if (gfp_flags & __GFP_FS)
+ kcred = prepare_kernel_cred(NULL);
+ else {
+ unsigned int nofs_flags = memalloc_nofs_save();
+ kcred = prepare_kernel_cred(NULL);
+ memalloc_nofs_restore(nofs_flags);
}
+ rc = -ENOMEM;
+ if (!kcred)
+ goto out_err_free;
+ kcred->fsuid = uid;
+ kcred->fsgid = gid;
+ cred = RCU_INITIALIZER(kcred);
if (lgr->range.iomode == IOMODE_READ)
rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
@@ -519,8 +528,8 @@
dprintk("%s: iomode %s uid %u gid %u\n", __func__,
lgr->range.iomode == IOMODE_READ ? "READ" : "RW",
- from_kuid(&init_user_ns, acred.uid),
- from_kgid(&init_user_ns, acred.gid));
+ from_kuid(&init_user_ns, uid),
+ from_kgid(&init_user_ns, gid));
}
p = xdr_inline_decode(&stream, 4);
@@ -783,30 +792,82 @@
}
}
+static void
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
+{
+ struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+
+ if (devid)
+ nfs4_mark_deviceid_unavailable(devid);
+}
+
+static void
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
+{
+ struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+
+ if (devid)
+ nfs4_mark_deviceid_available(devid);
+}
+
static struct nfs4_pnfs_ds *
-ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx,
- int *best_idx)
+ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
+ int start_idx, int *best_idx,
+ bool check_device)
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
+ struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
bool fail_return = false;
int idx;
- /* mirrors are sorted by efficiency */
+ /* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
if (idx+1 == fls->mirror_array_cnt)
- fail_return = true;
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
- if (ds) {
- *best_idx = idx;
- return ds;
- }
+ fail_return = !check_device;
+
+ mirror = FF_LAYOUT_COMP(lseg, idx);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
+ if (!ds)
+ continue;
+
+ if (check_device &&
+ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
+ continue;
+
+ *best_idx = idx;
+ return ds;
}
return NULL;
}
+static struct nfs4_pnfs_ds *
+ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
+ int start_idx, int *best_idx)
+{
+ return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
+}
+
+static struct nfs4_pnfs_ds *
+ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
+ int start_idx, int *best_idx)
+{
+ return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
+}
+
+static struct nfs4_pnfs_ds *
+ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
+ int start_idx, int *best_idx)
+{
+ struct nfs4_pnfs_ds *ds;
+
+ ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
+ if (ds)
+ return ds;
+ return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
+}
+
static void
ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req,
@@ -814,7 +875,7 @@
{
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_READ,
@@ -868,13 +929,21 @@
pgm = &pgio->pg_mirrors[0];
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
+ if (NFS_SERVER(pgio->pg_inode)->flags &
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
+ pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
if (pgio->pg_error < 0)
return;
out_mds:
+ trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
+ 0, NFS4_MAX_UINT64, IOMODE_READ,
+ NFS_I(pgio->pg_inode)->layout,
+ pgio->pg_lseg);
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
+ pgio->pg_maxretrans = 0;
nfs_pageio_reset_read_mds(pgio);
}
@@ -893,7 +962,7 @@
pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_RW,
@@ -920,7 +989,8 @@
goto out_mds;
for (i = 0; i < pgio->pg_mirror_count; i++) {
- ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
+ mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
+ ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
if (!ds) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
@@ -931,15 +1001,22 @@
goto retry;
}
pgm = &pgio->pg_mirrors[i];
- mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
}
+ if (NFS_SERVER(pgio->pg_inode)->flags &
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
+ pgio->pg_maxretrans = io_maxretrans;
return;
out_mds:
+ trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode,
+ 0, NFS4_MAX_UINT64, IOMODE_RW,
+ NFS_I(pgio->pg_inode)->layout,
+ pgio->pg_lseg);
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
+ pgio->pg_maxretrans = 0;
nfs_pageio_reset_write_mds(pgio);
}
@@ -949,7 +1026,7 @@
{
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_RW,
@@ -964,6 +1041,10 @@
if (pgio->pg_lseg)
return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
+ trace_pnfs_mds_fallback_pg_get_mirror_count(pgio->pg_inode,
+ 0, NFS4_MAX_UINT64, IOMODE_RW,
+ NFS_I(pgio->pg_inode)->layout,
+ pgio->pg_lseg);
/* no lseg means that pnfs is not in use, so no mirroring here */
nfs_pageio_reset_write_mds(pgio);
out:
@@ -1013,6 +1094,10 @@
hdr->args.count,
(unsigned long long)hdr->args.offset);
+ trace_pnfs_mds_fallback_write_done(hdr->inode,
+ hdr->args.offset, hdr->args.count,
+ IOMODE_RW, NFS_I(hdr->inode)->layout,
+ hdr->lseg);
task->tk_status = pnfs_write_done_resend_to_mds(hdr);
}
}
@@ -1032,6 +1117,10 @@
hdr->args.count,
(unsigned long long)hdr->args.offset);
+ trace_pnfs_mds_fallback_read_done(hdr->inode,
+ hdr->args.offset, hdr->args.count,
+ IOMODE_READ, NFS_I(hdr->inode)->layout,
+ hdr->lseg);
task->tk_status = pnfs_read_done_resend_to_mds(hdr);
}
}
@@ -1153,8 +1242,10 @@
{
int vers = clp->cl_nfs_mod->rpc_vers->number;
- if (task->tk_status >= 0)
+ if (task->tk_status >= 0) {
+ ff_layout_mark_ds_reachable(lseg, idx);
return 0;
+ }
/* Handle the case of an invalid layout segment */
if (!pnfs_is_valid_lseg(lseg))
@@ -1217,6 +1308,8 @@
err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, offset, length, status, opnum,
GFP_NOIO);
+ if (status == NFS4ERR_NXIO)
+ ff_layout_mark_ds_unreachable(lseg, idx);
pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status);
}
@@ -1225,6 +1318,7 @@
static int ff_layout_read_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ int new_idx = hdr->pgio_mirror_idx;
int err;
trace_nfs4_pnfs_read(hdr, task->tk_status);
@@ -1243,8 +1337,8 @@
case -NFS4ERR_RESET_TO_PNFS:
if (ff_layout_choose_best_ds_for_read(hdr->lseg,
hdr->pgio_mirror_idx + 1,
- &hdr->pgio_mirror_idx))
- goto out_eagain;
+ &new_idx))
+ goto out_layouterror;
set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
return task->tk_status;
case -NFS4ERR_RESET_TO_MDS:
@@ -1255,6 +1349,10 @@
}
return 0;
+out_layouterror:
+ ff_layout_read_record_layoutstats_done(task, hdr);
+ ff_layout_send_layouterror(hdr->lseg);
+ hdr->pgio_mirror_idx = new_idx;
out_eagain:
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -1288,15 +1386,6 @@
(unsigned long long) NFS_I(inode)->layout->plh_lwb);
}
-static bool
-ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx)
-{
- /* No mirroring for now */
- struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx);
-
- return ff_layout_test_devid_unavailable(node);
-}
-
static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
@@ -1327,10 +1416,6 @@
rpc_exit(task, -EIO);
return -EIO;
}
- if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
- rpc_exit(task, -EHOSTDOWN);
- return -EAGAIN;
- }
ff_layout_read_record_layoutstats_start(task, hdr);
return 0;
@@ -1394,9 +1479,10 @@
struct nfs_pgio_header *hdr = data;
ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
- if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+ if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
+ ff_layout_send_layouterror(hdr->lseg);
pnfs_read_resend_pnfs(hdr);
- else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+ } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
ff_layout_reset_read(hdr);
pnfs_generic_rw_release(data);
}
@@ -1508,11 +1594,6 @@
return -EIO;
}
- if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
- rpc_exit(task, -EHOSTDOWN);
- return -EAGAIN;
- }
-
ff_layout_write_record_layoutstats_start(task, hdr);
return 0;
}
@@ -1568,9 +1649,10 @@
struct nfs_pgio_header *hdr = data;
ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
- if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+ if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
+ ff_layout_send_layouterror(hdr->lseg);
ff_layout_reset_write(hdr, true);
- else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+ } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
ff_layout_reset_write(hdr, false);
pnfs_generic_rw_release(data);
}
@@ -1698,7 +1780,8 @@
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- struct rpc_cred *ds_cred;
+ struct nfs4_ff_layout_mirror *mirror;
+ const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
u32 idx = hdr->pgio_mirror_idx;
int vers;
@@ -1708,20 +1791,21 @@
__func__, hdr->inode->i_ino,
hdr->args.pgbase, (size_t)hdr->args.count, offset);
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
+ mirror = FF_LAYOUT_COMP(lseg, idx);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
if (!ds)
goto out_failed;
- ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
+ ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
if (IS_ERR(ds_clnt))
goto out_failed;
- ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
if (!ds_cred)
goto out_failed;
- vers = nfs4_ff_layout_ds_version(lseg, idx);
+ vers = nfs4_ff_layout_ds_version(mirror);
dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
@@ -1729,13 +1813,11 @@
hdr->pgio_done_cb = ff_layout_read_done_cb;
refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
- fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
+ fh = nfs4_ff_layout_select_ds_fh(mirror);
if (fh)
hdr->args.fh = fh;
- if (vers == 4 &&
- !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
- goto out_failed;
+ nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
/*
* Note that if we ever decide to split across DSes,
@@ -1749,12 +1831,15 @@
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
0, RPC_TASK_SOFTCONN);
- put_rpccred(ds_cred);
+ put_cred(ds_cred);
return PNFS_ATTEMPTED;
out_failed:
if (ff_layout_avoid_mds_available_ds(lseg))
return PNFS_TRY_AGAIN;
+ trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
+ hdr->args.offset, hdr->args.count,
+ IOMODE_READ, NFS_I(hdr->inode)->layout, lseg);
return PNFS_NOT_ATTEMPTED;
}
@@ -1765,26 +1850,28 @@
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- struct rpc_cred *ds_cred;
+ struct nfs4_ff_layout_mirror *mirror;
+ const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
int vers;
struct nfs_fh *fh;
int idx = hdr->pgio_mirror_idx;
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
+ mirror = FF_LAYOUT_COMP(lseg, idx);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
if (!ds)
goto out_failed;
- ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
+ ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
if (IS_ERR(ds_clnt))
goto out_failed;
- ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
if (!ds_cred)
goto out_failed;
- vers = nfs4_ff_layout_ds_version(lseg, idx);
+ vers = nfs4_ff_layout_ds_version(mirror);
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
@@ -1795,13 +1882,11 @@
refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
- fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
+ fh = nfs4_ff_layout_select_ds_fh(mirror);
if (fh)
hdr->args.fh = fh;
- if (vers == 4 &&
- !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
- goto out_failed;
+ nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
/*
* Note that if we ever decide to split across DSes,
@@ -1814,12 +1899,15 @@
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
sync, RPC_TASK_SOFTCONN);
- put_rpccred(ds_cred);
+ put_cred(ds_cred);
return PNFS_ATTEMPTED;
out_failed:
if (ff_layout_avoid_mds_available_ds(lseg))
return PNFS_TRY_AGAIN;
+ trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
+ hdr->args.offset, hdr->args.count,
+ IOMODE_RW, NFS_I(hdr->inode)->layout, lseg);
return PNFS_NOT_ATTEMPTED;
}
@@ -1844,7 +1932,8 @@
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- struct rpc_cred *ds_cred;
+ struct nfs4_ff_layout_mirror *mirror;
+ const struct cred *ds_cred;
u32 idx;
int vers, ret;
struct nfs_fh *fh;
@@ -1854,20 +1943,21 @@
goto out_err;
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
+ mirror = FF_LAYOUT_COMP(lseg, idx);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
if (!ds)
goto out_err;
- ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
+ ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
data->inode);
if (IS_ERR(ds_clnt))
goto out_err;
- ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred);
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred);
if (!ds_cred)
goto out_err;
- vers = nfs4_ff_layout_ds_version(lseg, idx);
+ vers = nfs4_ff_layout_ds_version(mirror);
dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
@@ -1884,7 +1974,7 @@
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
how, RPC_TASK_SOFTCONN);
- put_rpccred(ds_cred);
+ put_cred(ds_cred);
return ret;
out_err:
pnfs_generic_prepare_to_resend_writes(data);
@@ -2031,7 +2121,7 @@
dprintk("%s: Begin\n", __func__);
- xdr_init_encode(&tmp_xdr, &tmp_buf, NULL);
+ xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL);
ff_layout_encode_ioerr(&tmp_xdr, args, ff_args);
ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args);
@@ -2097,6 +2187,52 @@
return -ENOMEM;
}
+#ifdef CONFIG_NFS_V4_2
+void
+ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
+{
+ struct pnfs_layout_hdr *lo = lseg->pls_layout;
+ struct nfs42_layout_error *errors;
+ LIST_HEAD(head);
+
+ if (!nfs_server_capable(lo->plh_inode, NFS_CAP_LAYOUTERROR))
+ return;
+ ff_layout_fetch_ds_ioerr(lo, &lseg->pls_range, &head, -1);
+ if (list_empty(&head))
+ return;
+
+ errors = kmalloc_array(NFS42_LAYOUTERROR_MAX,
+ sizeof(*errors), GFP_NOFS);
+ if (errors != NULL) {
+ const struct nfs4_ff_layout_ds_err *pos;
+ size_t n = 0;
+
+ list_for_each_entry(pos, &head, list) {
+ errors[n].offset = pos->offset;
+ errors[n].length = pos->length;
+ nfs4_stateid_copy(&errors[n].stateid, &pos->stateid);
+ errors[n].errors[0].dev_id = pos->deviceid;
+ errors[n].errors[0].status = pos->status;
+ errors[n].errors[0].opnum = pos->opnum;
+ n++;
+ if (!list_is_last(&pos->list, &head) &&
+ n < NFS42_LAYOUTERROR_MAX)
+ continue;
+ if (nfs42_proc_layouterror(lseg, errors, n) < 0)
+ break;
+ n = 0;
+ }
+ kfree(errors);
+ }
+ ff_layout_free_ds_ioerr(&head);
+}
+#else
+void
+ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
+{
+}
+#endif
+
static int
ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
{
@@ -2355,6 +2491,7 @@
.name = "LAYOUT_FLEX_FILES",
.owner = THIS_MODULE,
.flags = PNFS_LAYOUTGET_ON_OPEN,
+ .max_layoutget_response = 4096, /* 1 page or so... */
.set_layoutdriver = ff_layout_set_layoutdriver,
.alloc_layout_hdr = ff_layout_alloc_layout_hdr,
.free_layout_hdr = ff_layout_free_layout_hdr,
@@ -2382,11 +2519,6 @@
{
printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n",
__func__);
- if (!ff_zero_group) {
- ff_zero_group = groups_alloc(0);
- if (!ff_zero_group)
- return -ENOMEM;
- }
return pnfs_register_layoutdriver(&flexfilelayout_type);
}
@@ -2395,10 +2527,6 @@
printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n",
__func__);
pnfs_unregister_layoutdriver(&flexfilelayout_type);
- if (ff_zero_group) {
- put_group_info(ff_zero_group);
- ff_zero_group = NULL;
- }
}
MODULE_ALIAS("nfs-layouttype4-4");
@@ -2408,3 +2536,7 @@
module_init(nfs4flexfilelayout_init);
module_exit(nfs4flexfilelayout_exit);
+
+module_param(io_maxretrans, ushort, 0644);
+MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client "
+ "retries an I/O request before returning an error. ");
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index de50a34..2f36996 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -81,8 +81,8 @@
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
- struct rpc_cred __rcu *ro_cred;
- struct rpc_cred __rcu *rw_cred;
+ const struct cred __rcu *ro_cred;
+ const struct cred __rcu *rw_cred;
refcount_t ref;
spinlock_t lock;
unsigned long flags;
@@ -132,16 +132,6 @@
generic_hdr);
}
-static inline struct nfs4_deviceid_node *
-FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx)
-{
- if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt ||
- FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL ||
- FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL)
- return NULL;
- return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node;
-}
-
static inline struct nfs4_ff_layout_ds *
FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node)
{
@@ -151,9 +141,25 @@
static inline struct nfs4_ff_layout_mirror *
FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx)
{
- if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt)
- return NULL;
- return FF_LAYOUT_LSEG(lseg)->mirror_array[idx];
+ struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
+
+ if (idx < fls->mirror_array_cnt)
+ return fls->mirror_array[idx];
+ return NULL;
+}
+
+static inline struct nfs4_deviceid_node *
+FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx)
+{
+ struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx);
+
+ if (mirror != NULL) {
+ struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds;
+
+ if (!IS_ERR_OR_NULL(mirror_ds))
+ return &mirror_ds->id_node;
+ }
+ return NULL;
}
static inline u32
@@ -174,28 +180,10 @@
return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_READ_IO;
}
-static inline bool
-ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
-{
- /*
- * Flexfiles should never mark a DS unavailable, but if it does
- * print a (ratelimited) warning as this can affect performance.
- */
- if (nfs4_test_deviceid_unavailable(node)) {
- u32 *p = (u32 *)node->deviceid.data;
-
- pr_warn_ratelimited("NFS: flexfiles layout referencing an "
- "unavailable device [%x%x%x%x]\n",
- p[0], p[1], p[2], p[3]);
- return true;
- }
- return false;
-}
-
static inline int
-nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx)
+nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror)
{
- return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version;
+ return mirror->mirror_ds->ds_versions[0].version;
}
struct nfs4_ff_layout_ds *
@@ -207,6 +195,7 @@
struct nfs4_ff_layout_mirror *mirror, u64 offset,
u64 length, int status, enum nfs_opnum4 opnum,
gfp_t gfp_flags);
+void ff_layout_send_layouterror(struct pnfs_layout_segment *lseg);
int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head);
void ff_layout_free_ds_ioerr(struct list_head *head);
unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
@@ -214,23 +203,23 @@
struct list_head *head,
unsigned int maxnum);
struct nfs_fh *
-nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx);
-int
-nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
- u32 mirror_idx,
- nfs4_stateid *stateid);
+nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror);
+void
+nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
+ nfs4_stateid *stateid);
struct nfs4_pnfs_ds *
-nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
+nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
+ struct nfs4_ff_layout_mirror *mirror,
bool fail_return);
struct rpc_clnt *
-nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg,
- u32 ds_idx,
+nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
struct nfs_client *ds_clp,
struct inode *inode);
-struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg,
- u32 ds_idx, struct rpc_cred *mdscred);
+const struct cred *ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
+ const struct pnfs_layout_range *range,
+ const struct cred *mdscred);
bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg);
bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index a8df2f4..3eda40a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -18,7 +18,7 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
static unsigned int dataserver_retrans;
static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
@@ -183,56 +183,6 @@
return NULL;
}
-static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg,
- struct nfs4_deviceid_node *devid)
-{
- nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid);
- if (!ff_layout_has_available_ds(lseg))
- pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
- lseg);
-}
-
-static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,
- struct nfs4_ff_layout_mirror *mirror,
- bool create)
-{
- if (mirror == NULL || IS_ERR(mirror->mirror_ds))
- goto outerr;
- if (mirror->mirror_ds == NULL) {
- if (create) {
- struct nfs4_deviceid_node *node;
- struct pnfs_layout_hdr *lh = lseg->pls_layout;
- struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
-
- node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode),
- &mirror->devid, lh->plh_lc_cred,
- GFP_KERNEL);
- if (node)
- mirror_ds = FF_LAYOUT_MIRROR_DS(node);
-
- /* check for race with another call to this function */
- if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
- mirror_ds != ERR_PTR(-ENODEV))
- nfs4_put_deviceid_node(node);
- } else
- goto outerr;
- }
-
- if (IS_ERR(mirror->mirror_ds))
- goto outerr;
-
- if (mirror->mirror_ds->ds == NULL) {
- struct nfs4_deviceid_node *devid;
- devid = &mirror->mirror_ds->id_node;
- ff_layout_mark_devid_invalid(lseg, devid);
- return false;
- }
- return true;
-outerr:
- pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
- return false;
-}
-
static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
u64 offset, u64 length)
{
@@ -307,7 +257,7 @@
if (status == 0)
return 0;
- if (mirror->mirror_ds == NULL)
+ if (IS_ERR_OR_NULL(mirror->mirror_ds))
return -EINVAL;
dserr = kmalloc(sizeof(*dserr), gfp_flags);
@@ -326,14 +276,13 @@
spin_lock(&flo->generic_hdr.plh_inode->i_lock);
ff_layout_add_ds_error_locked(flo, dserr);
spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-
return 0;
}
-static struct rpc_cred *
+static const struct cred *
ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
{
- struct rpc_cred *cred, __rcu **pcred;
+ const struct cred *cred, __rcu **pcred;
if (iomode == IOMODE_READ)
pcred = &mirror->ro_cred;
@@ -346,53 +295,61 @@
if (!cred)
break;
- cred = get_rpccred_rcu(cred);
+ cred = get_cred_rcu(cred);
} while(!cred);
rcu_read_unlock();
return cred;
}
struct nfs_fh *
-nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx)
+nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror)
{
- struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
- struct nfs_fh *fh = NULL;
-
- if (!ff_layout_mirror_valid(lseg, mirror, false)) {
- pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
- __func__, mirror_idx);
- goto out;
- }
-
/* FIXME: For now assume there is only 1 version available for the DS */
- fh = &mirror->fh_versions[0];
-out:
- return fh;
+ return &mirror->fh_versions[0];
}
-int
-nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
- u32 mirror_idx,
- nfs4_stateid *stateid)
+void
+nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
+ nfs4_stateid *stateid)
{
- struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
+ if (nfs4_ff_layout_ds_version(mirror) == 4)
+ nfs4_stateid_copy(stateid, &mirror->stateid);
+}
- if (!ff_layout_mirror_valid(lseg, mirror, false)) {
- pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
- __func__, mirror_idx);
- goto out;
+static bool
+ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo,
+ struct nfs4_ff_layout_mirror *mirror)
+{
+ if (mirror == NULL)
+ goto outerr;
+ if (mirror->mirror_ds == NULL) {
+ struct nfs4_deviceid_node *node;
+ struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
+
+ node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode),
+ &mirror->devid, lo->plh_lc_cred,
+ GFP_KERNEL);
+ if (node)
+ mirror_ds = FF_LAYOUT_MIRROR_DS(node);
+
+ /* check for race with another call to this function */
+ if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
+ mirror_ds != ERR_PTR(-ENODEV))
+ nfs4_put_deviceid_node(node);
}
- nfs4_stateid_copy(stateid, &mirror->stateid);
- return 1;
-out:
- return 0;
+ if (IS_ERR(mirror->mirror_ds))
+ goto outerr;
+
+ return true;
+outerr:
+ return false;
}
/**
* nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
* @lseg: the layout segment we're operating on
- * @ds_idx: index of the DS to use
+ * @mirror: layout mirror describing the DS to use
* @fail_return: return layout on connect failure?
*
* Try to prepare a DS connection to accept an RPC call. This involves
@@ -407,26 +364,18 @@
* Returns a pointer to a connected DS object on success or NULL on failure.
*/
struct nfs4_pnfs_ds *
-nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
+nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
+ struct nfs4_ff_layout_mirror *mirror,
bool fail_return)
{
- struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
struct nfs4_pnfs_ds *ds = NULL;
- struct nfs4_deviceid_node *devid;
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
int status;
- if (!ff_layout_mirror_valid(lseg, mirror, true)) {
- pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
- __func__, ds_idx);
- goto out;
- }
-
- devid = &mirror->mirror_ds->id_node;
- if (ff_layout_test_devid_unavailable(devid))
- goto out_fail;
+ if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
+ goto noconnect;
ds = mirror->mirror_ds->ds;
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
@@ -437,8 +386,8 @@
/* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS.
*/
- status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
- dataserver_retrans,
+ status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node,
+ dataserver_timeo, dataserver_retrans,
mirror->mirror_ds->ds_versions[0].version,
mirror->mirror_ds->ds_versions[0].minor_version);
@@ -453,11 +402,12 @@
mirror->mirror_ds->ds_versions[0].wsize = max_payload;
goto out;
}
-out_fail:
+noconnect:
ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO);
+ ff_layout_send_layouterror(lseg);
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
ds = NULL;
@@ -465,33 +415,36 @@
return ds;
}
-struct rpc_cred *
-ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
- struct rpc_cred *mdscred)
+const struct cred *
+ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
+ const struct pnfs_layout_range *range,
+ const struct cred *mdscred)
{
- struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
- struct rpc_cred *cred;
+ const struct cred *cred;
- if (mirror) {
- cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
+ if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
+ cred = ff_layout_get_mirror_cred(mirror, range->iomode);
if (!cred)
- cred = get_rpccred(mdscred);
+ cred = get_cred(mdscred);
} else {
- cred = get_rpccred(mdscred);
+ cred = get_cred(mdscred);
}
return cred;
}
/**
-* Find or create a DS rpc client with th MDS server rpc client auth flavor
-* in the nfs_client cl_ds_clients list.
-*/
+ * nfs4_ff_find_or_create_ds_client - Find or create a DS rpc client
+ * @mirror: pointer to the mirror
+ * @ds_clp: nfs_client for the DS
+ * @inode: pointer to inode
+ *
+ * Find or create a DS rpc client with th MDS server rpc client auth flavor
+ * in the nfs_client cl_ds_clients list.
+ */
struct rpc_clnt *
-nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx,
+nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
struct nfs_client *ds_clp, struct inode *inode)
{
- struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
-
switch (mirror->mirror_ds->ds_versions[0].version) {
case 3:
/* For NFSv3 DS, flavor is set when creating DS connections */
@@ -608,7 +561,7 @@
if (IS_ERR(mirror->mirror_ds))
continue;
devid = &mirror->mirror_ds->id_node;
- if (!ff_layout_test_devid_unavailable(devid))
+ if (!nfs4_test_deviceid_unavailable(devid))
return true;
}
}
@@ -629,7 +582,7 @@
if (!mirror->mirror_ds)
continue;
devid = &mirror->mirror_ds->id_node;
- if (ff_layout_test_devid_unavailable(devid))
+ if (nfs4_test_deviceid_unavailable(devid))
return false;
}