libceph: fix msgr backoff
authorSage Weil <sage@newdream.net>
Fri, 4 Mar 2011 20:24:28 +0000 (12:24 -0800)
committerSage Weil <sage@newdream.net>
Fri, 4 Mar 2011 20:24:28 +0000 (12:24 -0800)
With commit f363e45f we replaced a bunch of hacky workqueue mutual
exclusion logic with the WQ_NON_REENTRANT flag.  One pieces of fallout is
that the exponential backoff breaks in certain cases:

 * con_work attempts to connect.
 * we get an immediate failure, and the socket state change handler queues
   immediate work.
 * con_work calls con_fault, we decide to back off, but can't queue delayed
   work.

In this case, we add a BACKOFF bit to make con_work reschedule delayed work
next time it runs (which should be immediately).

Signed-off-by: Sage Weil <sage@newdream.net>

include/linux/ceph/messenger.h
net/ceph/messenger.c

index c3011be..eb31e10 100644 (file)
@@ -123,6 +123,7 @@ struct ceph_msg_pos {
 #define SOCK_CLOSED    11 /* socket state changed to closed */
 #define OPENING         13 /* open connection w/ (possibly new) peer */
 #define DEAD            14 /* dead, about to kfree */
+#define BACKOFF         15
 
 /*
  * A single connection with another host.
index 6bd5025..46fbc42 100644 (file)
@@ -1949,6 +1949,19 @@ static void con_work(struct work_struct *work)
                                                   work.work);
 
        mutex_lock(&con->mutex);
+       if (test_and_clear_bit(BACKOFF, &con->state)) {
+               dout("con_work %p backing off\n", con);
+               if (queue_delayed_work(ceph_msgr_wq, &con->work,
+                                      round_jiffies_relative(con->delay))) {
+                       dout("con_work %p backoff %lu\n", con, con->delay);
+                       mutex_unlock(&con->mutex);
+                       return;
+               } else {
+                       con->ops->put(con);
+                       dout("con_work %p FAILED to back off %lu\n", con,
+                            con->delay);
+               }
+       }
 
        if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
                dout("con_work CLOSED\n");
@@ -2017,11 +2030,24 @@ static void ceph_fault(struct ceph_connection *con)
                        con->delay = BASE_DELAY_INTERVAL;
                else if (con->delay < MAX_DELAY_INTERVAL)
                        con->delay *= 2;
-               dout("fault queueing %p delay %lu\n", con, con->delay);
                con->ops->get(con);
                if (queue_delayed_work(ceph_msgr_wq, &con->work,
-                                      round_jiffies_relative(con->delay)) == 0)
+                                      round_jiffies_relative(con->delay))) {
+                       dout("fault queued %p delay %lu\n", con, con->delay);
+               } else {
                        con->ops->put(con);
+                       dout("fault failed to queue %p delay %lu, backoff\n",
+                            con, con->delay);
+                       /*
+                        * In many cases we see a socket state change
+                        * while con_work is running and end up
+                        * queuing (non-delayed) work, such that we
+                        * can't backoff with a delay.  Set a flag so
+                        * that when con_work restarts we schedule the
+                        * delay then.
+                        */
+                       set_bit(BACKOFF, &con->state);
+               }
        }
 
 out_unlock: