NFSv4.1: filelayout async error handler
authorAndy Adamson <andros@netapp.com>
Tue, 1 Mar 2011 01:34:20 +0000 (01:34 +0000)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 11 Mar 2011 20:38:43 +0000 (15:38 -0500)
Use our own async error handler.
Mark the layout as failed and retry i/o through the MDS on specified errors.

Update the mds_offset in nfs_readpage_retry so that a failed short-read retry
to a DS gets correctly resent through the MDS.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

fs/nfs/internal.h
fs/nfs/nfs4filelayout.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/read.c
include/linux/nfs_xdr.h
include/linux/sunrpc/clnt.h
net/sunrpc/clnt.c

index 5e9df99..1a3228e 100644 (file)
@@ -285,6 +285,7 @@ extern int nfs_migrate_page(struct address_space *,
 #endif
 
 /* nfs4proc.c */
+extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
 extern int nfs4_init_client(struct nfs_client *clp,
                            const struct rpc_timeout *timeparms,
                            const char *ip_addr,
index 3608411..6a424c1 100644 (file)
@@ -40,6 +40,8 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
 MODULE_DESCRIPTION("The NFSv4 file layout driver");
 
+#define FILELAYOUT_POLL_RETRY_MAX     (15*HZ)
+
 static int
 filelayout_set_layoutdriver(struct nfs_server *nfss)
 {
@@ -100,6 +102,83 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
        BUG();
 }
 
+/* For data server errors we don't recover from */
+static void
+filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
+{
+       if (lseg->pls_range.iomode == IOMODE_RW) {
+               dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
+               set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+       } else {
+               dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
+               set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+       }
+}
+
+static int filelayout_async_handle_error(struct rpc_task *task,
+                                        struct nfs4_state *state,
+                                        struct nfs_client *clp,
+                                        int *reset)
+{
+       if (task->tk_status >= 0)
+               return 0;
+
+       *reset = 0;
+
+       switch (task->tk_status) {
+       case -NFS4ERR_BADSESSION:
+       case -NFS4ERR_BADSLOT:
+       case -NFS4ERR_BAD_HIGH_SLOT:
+       case -NFS4ERR_DEADSESSION:
+       case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+       case -NFS4ERR_SEQ_FALSE_RETRY:
+       case -NFS4ERR_SEQ_MISORDERED:
+               dprintk("%s ERROR %d, Reset session. Exchangeid "
+                       "flags 0x%x\n", __func__, task->tk_status,
+                       clp->cl_exchange_flags);
+               nfs4_schedule_session_recovery(clp->cl_session);
+               break;
+       case -NFS4ERR_DELAY:
+       case -NFS4ERR_GRACE:
+       case -EKEYEXPIRED:
+               rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
+               break;
+       default:
+               dprintk("%s DS error. Retry through MDS %d\n", __func__,
+                       task->tk_status);
+               *reset = 1;
+               break;
+       }
+       task->tk_status = 0;
+       return -EAGAIN;
+}
+
+/* NFS_PROTO call done callback routines */
+
+static int filelayout_read_done_cb(struct rpc_task *task,
+                               struct nfs_read_data *data)
+{
+       struct nfs_client *clp = data->ds_clp;
+       int reset = 0;
+
+       dprintk("%s DS read\n", __func__);
+
+       if (filelayout_async_handle_error(task, data->args.context->state,
+                                         data->ds_clp, &reset) == -EAGAIN) {
+               dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
+                       __func__, data->ds_clp, data->ds_clp->cl_session);
+               if (reset) {
+                       filelayout_set_lo_fail(data->lseg);
+                       nfs4_reset_read(task, data);
+                       clp = NFS_SERVER(data->inode)->nfs_client;
+               }
+               nfs_restart_rpc(task, clp);
+               return -EAGAIN;
+       }
+
+       return 0;
+}
+
 /*
  * Call ops for the async read/write cases
  * In the case of dense layouts, the offset needs to be reset to its
@@ -109,6 +188,8 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 {
        struct nfs_read_data *rdata = (struct nfs_read_data *)data;
 
+       rdata->read_done_cb = filelayout_read_done_cb;
+
        if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
                                &rdata->args.seq_args, &rdata->res.seq_res,
                                0, task))
index d096239..1dc8090 100644 (file)
@@ -3074,15 +3074,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
        return err;
 }
 
-static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 {
        struct nfs_server *server = NFS_SERVER(data->inode);
 
-       dprintk("--> %s\n", __func__);
-
-       if (!nfs4_sequence_done(task, &data->res.seq_res))
-               return -EAGAIN;
-
        if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
                nfs_restart_rpc(task, server->nfs_client);
                return -EAGAIN;
@@ -3094,12 +3089,40 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
        return 0;
 }
 
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+{
+
+       dprintk("--> %s\n", __func__);
+
+       if (!nfs4_sequence_done(task, &data->res.seq_res))
+               return -EAGAIN;
+
+       return data->read_done_cb(task, data);
+}
+
 static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
 {
        data->timestamp   = jiffies;
+       data->read_done_cb = nfs4_read_done_cb;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
 }
 
+/* Reset the the nfs_read_data to send the read to the MDS. */
+void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
+{
+       dprintk("%s Reset task for i/o through\n", __func__);
+       put_lseg(data->lseg);
+       data->lseg = NULL;
+       /* offsets will differ in the dense stripe case */
+       data->args.offset = data->mds_offset;
+       data->ds_clp = NULL;
+       data->args.fh     = NFS_FH(data->inode);
+       data->read_done_cb = nfs4_read_done_cb;
+       task->tk_ops = data->mds_ops;
+       rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+}
+EXPORT_SYMBOL_GPL(nfs4_reset_read);
+
 static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
        struct inode *inode = data->inode;
index 69c8363..ab1bf5b 100644 (file)
@@ -1453,6 +1453,7 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session)
 {
        nfs4_schedule_lease_recovery(session->clp);
 }
+EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
 
 void nfs41_handle_recall_slot(struct nfs_client *clp)
 {
index f4d0fcf..f40c7f4 100644 (file)
@@ -391,6 +391,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
                return;
 
        /* Yes, so retry the read at the end of the data */
+       data->mds_offset += resp->count;
        argp->offset += resp->count;
        argp->pgbase += resp->count;
        argp->count -= resp->count;
index b63faef..eb0e870 100644 (file)
@@ -1020,6 +1020,7 @@ struct nfs_read_data {
        struct pnfs_layout_segment *lseg;
        struct nfs_client       *ds_clp;        /* pNFS data server */
        const struct rpc_call_ops *mds_ops;
+       int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
        __u64                   mds_offset;
        struct page             *page_array[NFS_PAGEVEC_SIZE];
 };
index ef9476a..db7bcaf 100644 (file)
@@ -129,6 +129,7 @@ struct rpc_create_args {
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
                                struct rpc_program *, u32);
+void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 void           rpc_shutdown_client(struct rpc_clnt *);
 void           rpc_release_client(struct rpc_clnt *);
index 8b5a6b4..edaf56e 100644 (file)
@@ -597,6 +597,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
        }
 }
 
+void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
+{
+       rpc_task_release_client(task);
+       rpc_task_set_client(task, clnt);
+}
+EXPORT_SYMBOL_GPL(rpc_task_reset_client);
+
+
 static void
 rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
 {