nbd: Fix using zero offset and length for REQ_FLUSH properly
[nbd-module.git] / nbd.c
diff --git a/nbd.c b/nbd.c
index e6fc716..f665f54 100644 (file)
--- a/nbd.c
+++ b/nbd.c
 #include <asm/system.h>
 #include <asm/types.h>
 
+#ifdef NBD_OUTOFTREE
+#include "nbd.h"
+#else
 #include <linux/nbd.h>
+#endif
 
 #define LO_MAGIC 0x68797548
 
@@ -85,6 +89,8 @@ static const char *ioctl_cmd_to_ascii(int cmd)
        case NBD_PRINT_DEBUG: return "print-debug";
        case NBD_SET_SIZE_BLOCKS: return "set-size-blocks";
        case NBD_DISCONNECT: return "disconnect";
+       case NBD_SET_TIMEOUT: return "set-timeout";
+       case NBD_SET_FLAGS: return "set-flags";
        case BLKROSET: return "set-read-only";
        case BLKFLSBUF: return "flush-buffer-cache";
        }
@@ -94,9 +100,10 @@ static const char *ioctl_cmd_to_ascii(int cmd)
 static const char *nbdcmd_to_ascii(int cmd)
 {
        switch (cmd) {
-       case  NBD_CMD_READ: return "read";
+       case NBD_CMD_READ: return "read";
        case NBD_CMD_WRITE: return "write";
-       case  NBD_CMD_DISC: return "disconnect";
+       case NBD_CMD_DISC: return "disconnect";
+       case NBD_CMD_FLUSH: return "flush";
        }
        return "invalid";
 }
@@ -236,9 +243,18 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
        unsigned long size = blk_rq_bytes(req);
 
        request.magic = htonl(NBD_REQUEST_MAGIC);
-       request.type = htonl(nbd_cmd(req));
-       request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
-       request.len = htonl(size);
+       /* If FUA is set in the request, and we are told to send FUA, then OR in NBD_CMD_FLAG_FUA */
+       request.type = htonl(nbd_cmd(req) |
+                            (( (req->cmd_flags & REQ_FUA) && (lo->flags & NBD_FLAG_SEND_FUA)) ?
+                             NBD_CMD_FLAG_FUA : 0));
+       /* Send from & len as zero on FLUSH - other values reserved per protocol */
+       if (nbd_cmd(req) == NBD_CMD_FLUSH) {
+               request.from = 0;
+               request.len = 0;
+       } else {
+               request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
+               request.len = htonl(size);
+       }
        memcpy(request.handle, &req, sizeof(req));
 
        dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
@@ -455,13 +471,34 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
        nbd_cmd(req) = NBD_CMD_READ;
        if (rq_data_dir(req) == WRITE) {
                nbd_cmd(req) = NBD_CMD_WRITE;
-               if (lo->flags & NBD_READ_ONLY) {
+               if (lo->flags & NBD_FLAG_READ_ONLY) {
                        printk(KERN_ERR "%s: Write on read-only\n",
                                        lo->disk->disk_name);
                        goto error_out;
                }
        }
 
+       if (req->cmd_flags & REQ_FLUSH) {
+               if (unlikely(blk_rq_sectors(req))) {
+                       /* Elevator is meant to guarantee that a request with REQ_FLUSH
+                        * set is broken into an empty request with REQ_FLUSH set then
+                        * the rest of the content (if any). If this doesn't happen,
+                        * whinge, then proceed to do the content without a flush.
+                        */
+                       printk(KERN_ERR "%s: nbd passed non-empty flush request\n",
+                              lo->disk->disk_name);
+
+               } else {
+                       if (lo->flags & NBD_FLAG_SEND_FLUSH)
+                               nbd_cmd(req) = NBD_CMD_FLUSH;
+                       else {
+                               /* Ignore flush that we don't need */
+                               nbd_end_request(req);
+                               return;
+                       }
+               }
+       }
+
        req->errors = 0;
 
        mutex_lock(&lo->tx_lock);
@@ -638,6 +675,18 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
                lo->xmit_timeout = arg * HZ;
                return 0;
 
+       case NBD_SET_FLAGS:
+               lo->flags = arg;
+               if (lo->disk)
+               {
+                       if (lo->flags & NBD_FLAG_ROTATIONAL)
+                               queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, lo->disk->queue);
+                       else
+                               queue_flag_set_unlocked(QUEUE_FLAG_NONROT, lo->disk->queue);
+
+               }
+               return 0;
+
        case NBD_SET_SIZE_BLOCKS:
                lo->bytesize = ((u64) arg) * lo->blksize;
                bdev->bd_inode->i_size = lo->bytesize;
@@ -771,6 +820,11 @@ static int __init nbd_init(void)
                        put_disk(disk);
                        goto out;
                }
+               /* In order not to confuse the elevator, say we always
+                * want FLUSH and FUA. We won't send them to the server
+                * unless the relevant flag bit is set
+                */
+               blk_queue_flush(disk->queue, REQ_FLUSH | REQ_FUA);
                /*
                 * Tell the block layer that we are not a rotational device
                 */