Merge branch 'master' of git://nbd.git.sourceforge.net/gitroot/nbd/nbd
authorAlex Bligh <alex@alex.org.uk>
Sun, 22 May 2011 10:29:31 +0000 (11:29 +0100)
committerAlex Bligh <alex@alex.org.uk>
Sun, 22 May 2011 10:29:31 +0000 (11:29 +0100)
12 files changed:
Makefile.am
cliserv.h
configure.ac
doc/proto.txt
lfs.h
maketr [new file with mode: 0755]
man/nbd-server.5.in.sgml
nbd-client.c
nbd-server.c
nbd-tester-client.c
nbd-tester-client.tr [new file with mode: 0644]
simple_test

index a99eb37..c9d8a9a 100644 (file)
@@ -3,7 +3,7 @@ bin_PROGRAMS = nbd-server
 sbin_PROGRAMS = @NBD_CLIENT_NAME@
 EXTRA_PROGRAMS = nbd-client knbd-client
 TESTS_ENVIRONMENT=$(srcdir)/simple_test
-TESTS = cmd cfg1 cfgmulti cfgnew cfgsize write flush
+TESTS = cmd cfg1 cfgmulti cfgnew cfgsize write flush integrity
 check_PROGRAMS = nbd-tester-client
 knbd_client_SOURCES = nbd-client.c cliserv.h
 nbd_client_SOURCES = nbd-client.c cliserv.h
@@ -23,3 +23,5 @@ cfgnew:
 cfgsize:
 write:
 flush:
+integrity:
+
index b32626f..51b1a9e 100644 (file)
--- a/cliserv.h
+++ b/cliserv.h
@@ -40,7 +40,12 @@ typedef unsigned long long u64;
 #include "nbd.h"
 
 #if NBD_LFS==1
+/* /usr/include/features.h (included from /usr/include/sys/types.h)
+   defines this when _GNU_SOURCE is defined
+ */
+#ifndef _LARGEFILE_SOURCE
 #define _LARGEFILE_SOURCE
+#endif
 #define _FILE_OFFSET_BITS 64
 #endif
 
index f50193f..cff4928 100644 (file)
@@ -93,7 +93,10 @@ AC_CHECK_SIZEOF(unsigned short int)
 AC_CHECK_SIZEOF(unsigned int)
 AC_CHECK_SIZEOF(unsigned long int)
 AC_CHECK_SIZEOF(unsigned long long int)
-AC_CHECK_FUNCS([llseek alarm gethostbyname inet_ntoa memset socket strerror strstr])
+AC_CHECK_FUNCS([llseek alarm gethostbyname inet_ntoa memset socket strerror strstr mkstemp])
+AC_CHECK_FUNC([sync_file_range],
+       [AC_DEFINE([HAVE_SYNC_FILE_RANGE], [sync_file_range(2) is not supported], [sync_file_range(2) is supported])],
+        [])
 AC_FUNC_FORK
 AC_FUNC_SETVBUF_REVERSED
 AC_MSG_CHECKING(whether client should be built)
index fe5e819..bd26657 100644 (file)
@@ -26,15 +26,17 @@ server during the handshake.
 There are two message types in the data pushing phase: the request, and
 the response.
 
-There are three request types in the data pushing phase: NBD_CMD_READ,
-NBD_CMD_WRITE, and NBD_CMD_DISC (disconnect).
+There are four request types in the data pushing phase: NBD_CMD_READ,
+NBD_CMD_WRITE, NBD_CMD_DISC (disconnect), and NBD_CMD_FLUSH.
 
 The request is sent by the client; the response by the server. A request
 header consists a 32 bit magic number (magic), a 32 bit field denoting
 the request type (see below; 'type'), a 64 bit handle ('handle'), a 64
 bit data offset ('from'), and a 32 bit length ('len'). In case of a
 write request, the header is immediately followed by 'len' bytes of
-data.
+data. In the case of NBD_CMD_FLUSH, the offset and length should
+be zero (meaning "flush entire device"); other values are reserved
+for future use (e.g. for flushing specific areas without a write).
 
 The reply contains three fields: a 32 bit magic number ('magic'), a 32
 bit error code ('error'; 0, unless an error occurred in which case it is
@@ -50,6 +52,16 @@ we change that to asynchronous handling, handling the disconnect request
 will probably be postponed until there are no other outstanding
 requests.
 
+A flush request will not be sent unless NBD_FLAG_SEND_FLUSH is set,
+and indicates the backing file should be fdatasync()'d to disk.
+
+The top 16 bits of the request are flags. NBD_CMD_FLAG_FUA implies
+a force unit access, and can currently only be usefully combined
+with NBD_CMD_WRITE. This is implementing using sync_file_range
+if present, else by fdatasync() of that file (note not all files
+in a multifile environment). NBD_CMD_FLAG_FUA will not be set
+unless NBD_FLAG_SEND_FUA is set.
+
 There are two versions of the negotiation: the 'old' style (nbd <=
 2.9.16) and the 'new' style (nbd >= 2.9.17, though due to a bug it does
 not work with anything below 2.9.18). What follows is a description of
diff --git a/lfs.h b/lfs.h
index 929ce08..480d6bf 100644 (file)
--- a/lfs.h
+++ b/lfs.h
@@ -4,7 +4,13 @@
 #include "config.h"
 #if NBD_LFS
 #define _FILE_OFFSET_BITS 64
+#ifndef _LARGEFILE_SOURCE
 #define _LARGEFILE_SOURCE
+#endif
+#ifdef HAVE_SYNC_FILE_RANGE
+#define USE_SYNC_FILE_RANGE
+#define _GNU_SOURCE
+#endif /* HAVE_SYNC_FILE_RANGE */
 #endif /* NBD_LFS */
 
 #endif /* LFS_H */
diff --git a/maketr b/maketr
new file mode 100755 (executable)
index 0000000..f124837
--- /dev/null
+++ b/maketr
@@ -0,0 +1,51 @@
+#!/bin/sh
+#
+# Example script to make a transaction log file
+# Must be run as root. Remember to chown the file afterwards
+
+# Insert the name of a tarfile here
+tarfile=/home/amb/iptables/iptables_1.4.4.orig.tar.gz
+tmpnam=`mktemp`
+conffile=${tmpnam}.conf
+pidfile=${tmpnam}.pid
+output=`pwd`/output.tr
+
+ulimit -c unlimited
+
+cat >${conffile} <<EOF
+[generic]
+[export1]
+        exportname = $tmpnam
+       transactionlog = $output
+        flush = true
+        fua = true
+        rotational = true
+EOF
+./nbd-server -C ${conffile} -p ${pidfile} &
+PID=$!
+sleep 1
+dd if=/dev/zero of=${tmpnam} bs=1M count=50
+./nbd-client -N export1 127.0.0.1 /dev/nbd0
+mkfs.ext3 /dev/nbd0
+mount -t ext3 -odata=journal,barrier=1 /dev/nbd0 /mnt
+(cd /mnt ; tar xvzf ${tarfile} ; sync) 2>&1 >/dev/null
+umount /mnt
+mount -t ext3 -odata=journal,barrier=1 /dev/nbd0 /mnt
+(cd /mnt ; tar cvzf /dev/null . ; sync) 2>&1 >/dev/null
+dbench -D /mnt 1 &
+sleep 10
+killall dbench
+sleep 2
+killall -KILL dbench
+sync
+umount /mnt
+./nbd-client -d /dev/nbd0
+if [ -f ${pidfile} ]
+then
+        kill `cat ${pidfile}`
+        rm -f ${pidfile}
+else
+        kill $PID
+fi
+rm -f $tmpnam ${conffile}
+ls -la ${output}
index 9fb2eff..358b1c3 100644 (file)
@@ -410,6 +410,64 @@ manpage.1: manpage.sgml
        </listitem>
       </varlistentry>
       <varlistentry>
+        <term><option>flush</option></term>
+       <listitem>
+         <para>Optional; boolean.</para>
+         <para>When this option is enabled,
+           <command>nbd-server</command> will inform the client that it
+           supports and desires to be sent flush requests when the
+           elevator layer receives them. Receipt of a flush request
+           will cause an fdatasync() (or, if the sync option is set,
+           an fsync()) on the backend storage. This increases
+           reliability in the case of an unclean shutdown at
+           the expense of a degradation of performance. The default
+           state is disabled. This option will have no effect unless
+           supported by the client.
+         </para>
+       </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><option>fua</option></term>
+       <listitem>
+         <para>Optional; boolean.</para>
+         <para>When this option is enabled,
+           <command>nbd-server</command> will inform the client that it
+           supports and desires to be sent fua (force unit access) commands
+           when the elevator layer receives them. Receipt of a force unit
+           access command will cause the specified command to be synced
+           to backend storage using sync_file_range() if supported, or
+           fdatasync() otherwise. This increases
+           reliability in the case of an unclean shutdown at
+           the expense of a degradation of performance. The default
+           state is disabled. This option will have no effect unless
+           supported by the client.
+         </para>
+       </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><option>rotational</option></term>
+       <listitem>
+         <para>Optional; boolean.</para>
+         <para>When this option is enabled,
+           <command>nbd-server</command> will inform the client that it
+           it would prefer it to send requests in elevator order, perhaps
+           because it has a backing store and no local elevator. By
+           default, the client uses QUEUE_FLAG_NONROT, which effectively
+           restricts the function of the elevator to block merges. By
+           specifying this flag on the server, the client will not use
+           QUEUE_FLAG_NONROT, meaning the client elevator will perform
+           normal elevator ordering of I/O requests. Note that even when
+           the backing store is on rotating media, it is not normally
+           necessary to specify this flag, as the server's elevator
+           algorithm will be used. This flag is only required where
+           the server will not be using an elevator algorithm or where
+           the elevator algorithm is effectively neutered (e.g. with
+           the sync option set). This option will have no effect unless
+           supported by the client.
+         </para>
+       </listitem>
+      </varlistentry>
+      <varlistentry>
        <term><option>sparse_cow</option></term>
        <listitem>
          <para>Optional; boolean.</para>
@@ -605,6 +663,30 @@ manpage.1: manpage.sgml
          </para>
        </listitem>
       </varlistentry>
+      <varlistentry>
+       <term><option>transactionlog</option></term>
+       <listitem>
+         <para>Optional; string</para>
+         <para>
+           If specified, then this pathname is used to generate a transaction
+           log. A transaction log is a binary file consisting of the requests
+           sent to and the replies received by the server, but excluding any
+           data (so, for a write command, it records the offset and length
+           of the write but not the data written). It is therefore relatively
+           safe to distribute to a third party. Note that the transaction log
+           does not include the negotiation sequence. Transaction logs are
+           mainly useful for debugging. The program
+           <emphasis>nbd-tester-client</emphasis> distributed with the source
+           to this program can reply a transaction log against a server and
+           perform a data integrity test. Note that the transaction log is
+           written to for every client opened. If it is necessary to maintain
+           separate transaction logs for each client, the
+           <emphasis>prerun</emphasis> script should rename the transaction log
+           (which will just have been opened in order to avoid transaction logs
+           overwriting eachother. This action should be race-free.
+         </para>
+       </listitem>
+      </varlistentry>
     </variablelist>
     
   </refsect1>
index 0fc9c34..78e62f9 100644 (file)
@@ -150,7 +150,7 @@ void negotiate(int sock, u64 *rsize64, u32 *flags, char* name) {
                if(read(sock, &tmp, sizeof(uint16_t)) < 0) {
                        err("Failed reading flags: %m");
                }
-               *flags = ((u32)ntohs(tmp)) << 16;
+               *flags = ((u32)ntohs(tmp));
 
                /* reserved for future use*/
                if (write(sock, &reserved, sizeof(reserved)) < 0)
@@ -240,6 +240,9 @@ void setsizes(int nbd, u64 size64, int blocksize, u32 flags) {
 
        ioctl(nbd, NBD_CLEAR_SOCK);
 
+       /* ignore error as kernel may not support */
+       ioctl(nbd, NBD_SET_FLAGS, (unsigned long) flags);
+
        if (ioctl(nbd, BLKROSET, (unsigned long) &read_only) < 0)
                err("Unable to set read-only attribute for device");
 }
index cd584f0..6c3953d 100644 (file)
@@ -137,11 +137,13 @@ int dontfork = 0;
 #define DEBUG2( a,b ) printf( a,b )
 #define DEBUG3( a,b,c ) printf( a,b,c )
 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
+#define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e )
 #else
 #define DEBUG( a )
 #define DEBUG2( a,b ) 
 #define DEBUG3( a,b,c ) 
 #define DEBUG4( a,b,c,d ) 
+#define DEBUG5( a,b,c,d,e ) 
 #endif
 #ifndef PACKAGE_VERSION
 #define PACKAGE_VERSION ""
@@ -163,6 +165,9 @@ int dontfork = 0;
 #define F_SPARSE 16      /**< flag to tell us copyronwrite should use a sparse file */
 #define F_SDP 32         /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
 #define F_SYNC 64        /**< Whether to fsync() after a write */
+#define F_FLUSH 128      /**< Whether server wants FLUSH to be sent by the client */
+#define F_FUA 256        /**< Whether server wants FUA to be sent by the client */
+#define F_ROTATIONAL 512  /**< Whether server wants the client to implement the elevator algorithm */
 GHashTable *children;
 char pidfname[256]; /**< name of our PID file */
 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
@@ -208,6 +213,7 @@ typedef struct {
                                  disconnects */
        gchar* servename;    /**< name of the export as selected by nbd-client */
        int max_connections; /**< maximum number of opened connections */
+       gchar* transactionlog;/**< filename for transaction log */
 } SERVER;
 
 /**
@@ -234,6 +240,7 @@ typedef struct {
        u32 difffilelen;     /**< number of pages in difffile */
        u32 *difmap;         /**< see comment on the global difmap for this one */
        gboolean modern;     /**< client was negotiated using modern negotiation protocol */
+       int transactionlogfd;/**< fd for transaction log */
 } CLIENT;
 
 /**
@@ -577,6 +584,8 @@ void remove_server(gpointer s) {
                g_free(server->prerun);
        if(server->postrun)
                g_free(server->postrun);
+       if(server->transactionlog)
+               g_free(server->transactionlog);
        g_free(server);
 }
 
@@ -616,6 +625,9 @@ SERVER* dup_serve(SERVER *s) {
 
        if(s->postrun)
                serve->postrun = g_strdup(s->postrun);
+
+       if(s->transactionlog)
+               serve->transactionlog = g_strdup(s->transactionlog);
        
        if(s->servename)
                serve->servename = g_strdup(s->servename);
@@ -714,12 +726,16 @@ GArray* parse_cfile(gchar* f, GError** e) {
                { "virtstyle",  FALSE,  PARAM_STRING,   &(virtstyle),           0 },
                { "prerun",     FALSE,  PARAM_STRING,   &(s.prerun),            0 },
                { "postrun",    FALSE,  PARAM_STRING,   &(s.postrun),           0 },
+               { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog),   0 },
                { "readonly",   FALSE,  PARAM_BOOL,     &(s.flags),             F_READONLY },
                { "multifile",  FALSE,  PARAM_BOOL,     &(s.flags),             F_MULTIFILE },
                { "copyonwrite", FALSE, PARAM_BOOL,     &(s.flags),             F_COPYONWRITE },
                { "sparse_cow", FALSE,  PARAM_BOOL,     &(s.flags),             F_SPARSE },
                { "sdp",        FALSE,  PARAM_BOOL,     &(s.flags),             F_SDP },
                { "sync",       FALSE,  PARAM_BOOL,     &(s.flags),             F_SYNC },
+               { "flush",      FALSE,  PARAM_BOOL,     &(s.flags),             F_FLUSH },
+               { "fua",        FALSE,  PARAM_BOOL,     &(s.flags),             F_FUA },
+               { "rotational", FALSE,  PARAM_BOOL,     &(s.flags),             F_ROTATIONAL },
                { "listenaddr", FALSE,  PARAM_STRING,   &(s.listenaddr),        0 },
                { "maxconnections", FALSE, PARAM_INT,   &(s.max_connections),   0 },
        };
@@ -1055,7 +1071,7 @@ void myseek(int handle,off_t a) {
  * @param client The client we're serving for
  * @return The number of bytes actually written, or -1 in case of an error
  **/
-ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
        int fhandle;
        off_t foffset;
        size_t maxbytes;
@@ -1066,12 +1082,20 @@ ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
        if(maxbytes && len > maxbytes)
                len = maxbytes;
 
-       DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len);
+       DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua);
 
        myseek(fhandle, foffset);
        retval = write(fhandle, buf, len);
        if(client->server->flags & F_SYNC) {
                fsync(fhandle);
+       } else if (fua) {
+#ifdef USE_SYNC_FILE_RANGE
+               sync_file_range(fhandle, foffset, len,
+                               SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
+                               SYNC_FILE_RANGE_WAIT_AFTER);
+#else
+               fdatasync(fhandle);
+#endif
        }
        return retval;
 }
@@ -1080,10 +1104,10 @@ ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
  * Call rawexpwrite repeatedly until all data has been written.
  * @return 0 on success, nonzero on failure
  **/
-int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
+int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
        ssize_t ret=0;
 
-       while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
+       while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
                a += ret;
                buf += ret;
                len -= ret;
@@ -1184,7 +1208,7 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) {
  * @param client The client we're going to write for.
  * @return 0 on success, nonzero on failure
  **/
-int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
        char pagebuf[DIFFPAGESIZE];
        off_t mapcnt,mapl,maph;
        off_t wrlen,rdlen; 
@@ -1192,7 +1216,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
        off_t offset;
 
        if (!(client->server->flags & F_COPYONWRITE))
-               return(rawexpwrite_fully(a, buf, len, client)); 
+               return(rawexpwrite_fully(a, buf, len, client, fua)); 
        DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
 
        mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
@@ -1225,6 +1249,33 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
                }                                                   
                len-=wrlen ; a+=wrlen ; buf+=wrlen ;
        }
+       if (client->server->flags & F_SYNC) {
+               fsync(client->difffile);
+       } else if (fua) {
+               /* open question: would it be cheaper to do multiple sync_file_ranges?
+                  as we iterate through the above?
+                */
+               fdatasync(client->difffile);
+       }
+       return 0;
+}
+
+int expflush(CLIENT *client) {
+       int fhandle;
+       off_t foffset;
+       size_t maxbytes;
+       gint i;
+
+        if (client->server->flags & F_COPYONWRITE) {
+               return fsync(client->difffile);
+       }
+       
+       for (i = 0; i < client->export->len; i++) {
+               FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
+               if (fsync(fi.fhandle) < 0)
+                       return -1;
+       }
+       
        return 0;
 }
 
@@ -1304,6 +1355,7 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
                                client->exportsize = OFFT_MAX;
                                client->net = net;
                                client->modern = TRUE;
+                               client->transactionlogfd = -1;
                                free(name);
                                return client;
                        }
@@ -1317,6 +1369,12 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
                err("Negotiation failed: %m");
        if (client->server->flags & F_READONLY)
                flags |= NBD_FLAG_READ_ONLY;
+       if (client->server->flags & F_FLUSH)
+               flags |= NBD_FLAG_SEND_FLUSH;
+       if (client->server->flags & F_FUA)
+               flags |= NBD_FLAG_SEND_FUA;
+       if (client->server->flags & F_ROTATIONAL)
+               flags |= NBD_FLAG_ROTATIONAL;
        if (!client->modern) {
                /* oldstyle */
                flags = htonl(flags);
@@ -1337,7 +1395,9 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
 }
 
 /** sending macro. */
-#define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
+#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
+       if (client->transactionlogfd != -1) \
+               writeit(client->transactionlogfd, &reply, sizeof(reply)); }
 /** error macro. */
 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
 /**
@@ -1366,15 +1426,20 @@ int mainloop(CLIENT *client) {
                size_t len;
                size_t currlen;
                size_t writelen;
+               uint16_t command;
 #ifdef DODBG
                i++;
                printf("%d: ", i);
 #endif
                readit(client->net, &request, sizeof(request));
+               if (client->transactionlogfd != -1)
+                       writeit(client->transactionlogfd, &request, sizeof(request));
+
                request.from = ntohll(request.from);
                request.type = ntohl(request.type);
+               command = request.type & NBD_CMD_MASK_COMMAND;
 
-               if (request.type==NBD_CMD_DISC) {
+               if (command==NBD_CMD_DISC) {
                        msg2(LOG_INFO, "Disconnect request received.");
                        if (client->server->flags & F_COPYONWRITE) { 
                                if (client->difmap) g_free(client->difmap) ;
@@ -1397,24 +1462,27 @@ int mainloop(CLIENT *client) {
                        currlen = len;
                }
 #ifdef DODBG
-               printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" :
+               printf("%s from %llu (%llu) len %d, ", command ? "WRITE" :
                                "READ", (unsigned long long)request.from,
                                (unsigned long long)request.from / 512, len);
 #endif
                memcpy(reply.handle, request.handle, sizeof(reply.handle));
-               if ((request.from + len) > (OFFT_MAX)) {
-                       DEBUG("[Number too large!]");
-                       ERROR(client, reply, EINVAL);
-                       continue;
-               }
 
-               if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
-                       DEBUG("[RANGE!]");
-                       ERROR(client, reply, EINVAL);
-                       continue;
+               if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
+                       if ((request.from + len) > (OFFT_MAX)) {
+                               DEBUG("[Number too large!]");
+                               ERROR(client, reply, EINVAL);
+                               continue;
+                       }
+
+                       if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
+                               DEBUG("[RANGE!]");
+                               ERROR(client, reply, EINVAL);
+                               continue;
+                       }
                }
 
-               if (request.type==NBD_CMD_WRITE) {
+               if (command==NBD_CMD_WRITE) {
                        DEBUG("wr: net->buf, ");
                        while(len > 0) {
                                readit(client->net, buf, currlen);
@@ -1425,7 +1493,8 @@ int mainloop(CLIENT *client) {
                                        ERROR(client, reply, EPERM);
                                        continue;
                                }
-                               if (expwrite(request.from, buf, len, client)) {
+                               if (expwrite(request.from, buf, len, client,
+                                            request.type & NBD_CMD_FLAG_FUA)) {
                                        DEBUG("Write failed: %m" );
                                        ERROR(client, reply, errno);
                                        continue;
@@ -1437,27 +1506,46 @@ int mainloop(CLIENT *client) {
                        }
                        continue;
                }
-               /* READ */
-
-               DEBUG("exp->buf, ");
-               memcpy(buf, &reply, sizeof(struct nbd_reply));
-               p = buf + sizeof(struct nbd_reply);
-               writelen = currlen + sizeof(struct nbd_reply);
-               while(len > 0) {
-                       if (expread(request.from, p, currlen, client)) {
-                               DEBUG("Read failed: %m");
+
+               if (command==NBD_CMD_FLUSH) {
+                       DEBUG("fl: ");
+                       if (expflush(client)) {
+                               DEBUG("Flush failed: %m");
                                ERROR(client, reply, errno);
                                continue;
                        }
+                       SEND(client->net, reply);
+                       DEBUG("OK!\n");
+                       continue;
+               }
 
-                       DEBUG("buf->net, ");
-                       writeit(client->net, buf, writelen);
-                       len -= currlen;
-                       currlen = (len < BUFSIZE) ? len : BUFSIZE;
-                       p = buf;
-                       writelen = currlen;
+               if (command==NBD_CMD_READ) {
+                       DEBUG("exp->buf, ");
+                       memcpy(buf, &reply, sizeof(struct nbd_reply));
+                       if (client->transactionlogfd != -1)
+                               writeit(client->transactionlogfd, &reply, sizeof(reply));
+                       p = buf + sizeof(struct nbd_reply);
+                       writelen = currlen + sizeof(struct nbd_reply);
+                       while(len > 0) {
+                               if (expread(request.from, p, currlen, client)) {
+                                       DEBUG("Read failed: %m");
+                                       ERROR(client, reply, errno);
+                                       continue;
+                               }
+                               
+                               DEBUG("buf->net, ");
+                               writeit(client->net, buf, writelen);
+                               len -= currlen;
+                               request.from += currlen;
+                               currlen = (len < BUFSIZE) ? len : BUFSIZE;
+                               p = buf;
+                               writelen = currlen;
+                       }
+                       DEBUG("OK!\n");
+                       continue;
                }
-               DEBUG("OK!\n");
+
+               DEBUG ("Ignoring unknown command\n");
        }
        return 0;
 }
@@ -1587,6 +1675,15 @@ int do_run(gchar* command, gchar* file) {
  * @param client a connected client
  **/
 void serveconnection(CLIENT *client) {
+       if (client->server->transactionlog && (client->transactionlogfd == -1))
+       {
+               if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
+                                                          O_WRONLY | O_CREAT,
+                                                          S_IRUSR | S_IWUSR)))
+                       g_warning("Could not open transaction log %s",
+                                 client->server->transactionlog);
+       }
+
        if(do_run(client->server->prerun, client->exportname)) {
                exit(EXIT_FAILURE);
        }
@@ -1600,6 +1697,12 @@ void serveconnection(CLIENT *client) {
 
        mainloop(client);
        do_run(client->server->postrun, client->exportname);
+
+       if (-1 != client->transactionlogfd)
+       {
+               close(client->transactionlogfd);
+               client->transactionlogfd = -1;
+       }
 }
 
 /**
@@ -1785,6 +1888,7 @@ int serveloop(GArray* servers) {
                                        client->server=serve;
                                        client->exportsize=OFFT_MAX;
                                        client->net=net;
+                                       client->transactionlogfd = -1;
                                }
                                set_peername(net, client);
                                if (!authorized_client(client)) {
index 1b7ec52..662c6f7 100644 (file)
@@ -28,6 +28,9 @@
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
 #include <syslog.h>
 #include <unistd.h>
 #include "config.h"
@@ -43,6 +46,8 @@ const static int errstr_len=1024;
 
 static uint64_t size;
 
+static gchar * transactionlog = "nbd-tester-client.tr";
+
 typedef enum {
        CONNECTION_TYPE_NONE,
        CONNECTION_TYPE_CONNECT,
@@ -56,6 +61,64 @@ typedef enum {
        CONNECTION_CLOSE_FAST,
 } CLOSE_TYPE;
 
+struct reqcontext {
+       uint64_t seq;
+       struct nbd_request req;
+       struct reqcontext * next;
+       struct reqcontext * prev;
+};
+
+struct rclist {
+       struct reqcontext * head;
+       struct reqcontext * tail;
+       int numitems;
+};
+
+void rclist_unlink(struct rclist * l, struct reqcontext * p) {
+       if (p && l) {
+               struct reqcontext * prev = p->prev;
+               struct reqcontext * next = p->next;
+               
+               /* Fix link to previous */
+               if (prev)
+                       prev->next = next;
+               else
+                       l->head = next;
+               
+               if (next)
+                       next->prev = prev;
+               else
+                       l->tail = prev;
+
+               p->prev = NULL;
+               p->next = NULL;
+               l->numitems--;
+       }                                                       
+}                                                                      
+
+/* Add a new list item to the tail */
+void rclist_addtail(struct rclist * l, struct reqcontext * p)
+{
+       if (!p || !l)
+               return;
+       if (l->tail) {
+               if (l->tail->next)
+                       g_warning("addtail found list tail has a next pointer");
+               l->tail->next = p;
+               p->next = NULL;
+               p->prev = l->tail;
+               l->tail = p;
+       } else {
+               if (l->head)
+                       g_warning("addtail found no list tail but a list head");
+               l->head = p;
+               l->tail = p;
+               p->prev = NULL;
+               p->next = NULL;
+       }
+       l->numitems++;
+}
+
 #define TEST_WRITE (1<<0)
 #define TEST_FLUSH (1<<1)
 
@@ -91,6 +154,8 @@ static inline int read_all(int f, void *buf, size_t len) {
 
        while(len>0) {
                if((res=read(f, buf, len)) <=0) {
+                       if (!res)
+                               errno=EAGAIN;
                        snprintf(errstr, errstr_len, "Read failed: %s", strerror(errno));
                        return -1;
                }
@@ -107,6 +172,8 @@ static inline int write_all(int f, void *buf, size_t len) {
 
        while(len>0) {
                if((res=write(f, buf, len)) <=0) {
+                       if (!res)
+                               errno=EAGAIN;
                        snprintf(errstr, errstr_len, "Write failed: %s", strerror(errno));
                        return -1;
                }
@@ -197,7 +264,6 @@ int setup_connection(gchar *hostname, int port, gchar* name, CONNECTION_TYPE cty
        READ_ALL_ERRCHK(sock, &flags, sizeof(uint16_t), err_open, "Could not read flags: %s", strerror(errno));
        flags = ntohs(flags);
        *serverflags = flags;
-       g_warning("Server flags are: %08x", flags);
        READ_ALL_ERRCHK(sock, buf, 124, err_open, "Could not read reserved zeroes: %s", strerror(errno));
        goto end;
 err_open:
@@ -356,7 +422,7 @@ int throughput_test(gchar* hostname, int port, char* name, int sock,
        if (!(testflags & TEST_WRITE))
                testflags &= ~TEST_FLUSH;
 
-       memset (writebuf, 'X', sizeof(1024));
+       memset (writebuf, 'X', 1024);
        size=0;
        if(!sock_is_open) {
                if((sock=setup_connection(hostname, port, name, CONNECTION_TYPE_FULL, &serverflags))<0) {
@@ -380,8 +446,8 @@ int throughput_test(gchar* hostname, int port, char* name, int sock,
        }
        for(i=0;i+1024<=size;i+=1024) {
                if(do_write) {
-                       int sendfua = (testflags & TEST_FLUSH) && ((i & 15) == 3);
-                       int sendflush = (testflags & TEST_FLUSH) && ((i & 15) == 11);
+                       int sendfua = (testflags & TEST_FLUSH) && (((i>>10) & 15) == 3);
+                       int sendflush = (testflags & TEST_FLUSH) && (((i>>10) & 15) == 11);
                        req.type=htonl((testflags & TEST_WRITE)?NBD_CMD_WRITE:NBD_CMD_READ);
                        if (sendfua)
                                req.type = htonl(NBD_CMD_WRITE | NBD_CMD_FLAG_FUA);
@@ -473,13 +539,471 @@ int throughput_test(gchar* hostname, int port, char* name, int sock,
                speed=speed/1024.0;
                speedchar[0]='G';
        }
-       g_message("%d: Throughput %s test complete. Took %.3f seconds to complete, %.3f%sib/s", (int)getpid(), (testflags & TEST_WRITE)?"write":"read", timespan, speed, speedchar);
+       g_message("%d: Throughput %s test (%s flushes) complete. Took %.3f seconds to complete, %.3f%sib/s", (int)getpid(), (testflags & TEST_WRITE)?"write":"read", (testflags & TEST_FLUSH)?"with":"without", timespan, speed, speedchar);
+
+err_open:
+       if(close_sock) {
+               close_connection(sock, CONNECTION_CLOSE_PROPERLY);
+       }
+err:
+       return retval;
+}
+
+/*
+ * fill 512 byte buffer 'buf' with a hashed selection of interesting data based
+ * only on handle and blknum. The first word is blknum, and the second handle, for ease
+ * of understanding. Things with handle 0 are blank.
+ */
+static inline void makebuf(char *buf, uint64_t seq, uint64_t blknum) {
+       uint64_t x = ((uint64_t)blknum) ^ (seq << 32) ^ (seq >> 32);
+       uint64_t* p = (uint64_t*)buf;
+       int i;
+       if (!seq) {
+               bzero(buf, 512);
+               return;
+       }
+       for (i = 0; i<512/sizeof(uint64_t); i++) {
+               int s;
+               *(p++) = x;
+               x+=0xFEEDA1ECDEADBEEFULL+i+(((uint64_t)i)<<56);
+               s = x & 63;
+               x = x ^ (x<<s) ^ (x>>(64-s)) ^ 0xAA55AA55AA55AA55ULL ^ seq;
+       }
+}
+               
+static inline int checkbuf(char *buf, uint64_t seq, uint64_t blknum) {
+       char cmp[512];
+       makebuf(cmp, seq, blknum);
+       return memcmp(cmp, buf, 512)?-1:0;
+}
+
+static inline void dumpcommand(char * text, uint32_t command)
+{
+#ifdef DEBUG_COMMANDS
+       command=ntohl(command);
+       char * ctext;
+       switch (command & NBD_CMD_MASK_COMMAND) {
+       case NBD_CMD_READ:
+               ctext="NBD_CMD_READ";
+               break;
+       case NBD_CMD_WRITE:
+               ctext="NBD_CMD_WRITE";
+               break;
+       case NBD_CMD_DISC:
+               ctext="NBD_CMD_DISC";
+               break;
+       case NBD_CMD_FLUSH:
+               ctext="NBD_CMD_FLUSH";
+               break;
+       default:
+               ctext="UNKNOWN";
+               break;
+       }
+       printf("%s: %s [%s] (0x%08x)\n",
+              text,
+              ctext,
+              (command & NBD_CMD_FLAG_FUA)?"FUA":"NONE",
+              command);
+#endif
+}
+
+int integrity_test(gchar* hostname, int port, char* name, int sock,
+                  char sock_is_open, char close_sock, int testflags) {
+       struct nbd_request req;
+       struct nbd_reply rep;
+       fd_set rset;
+       fd_set wset;
+       struct timeval tv;
+       struct timeval start;
+       struct timeval stop;
+       double timespan;
+       double speed;
+       char speedchar[2] = { '\0', '\0' };
+       int retval=0;
+       int serverflags = 0;
+       pid_t mypid = getpid();
+       int blkhashfd = -1;
+       char *blkhashname=NULL;
+       uint32_t *blkhash = NULL;
+       int logfd=-1;
+       uint64_t seq=1;
+       uint64_t processed=0;
+       uint64_t printer=0;
+       int readtransactionfile = 1;
+       struct rclist txqueue={NULL, NULL, 0};
+       struct rclist inflight={NULL, NULL, 0};
+
+       size=0;
+       if(!sock_is_open) {
+               if((sock=setup_connection(hostname, port, name, CONNECTION_TYPE_FULL, &serverflags))<0) {
+                       g_warning("Could not open socket: %s", errstr);
+                       retval=-1;
+                       goto err;
+               }
+       }
+
+       if ((serverflags & (NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA))
+           != (NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA))
+               g_warning("Server flags do not support FLUSH and FUA - these may error");
+
+#ifdef HAVE_MKSTEMP
+       blkhashname=strdup("/tmp/blkarray-XXXXXX");
+       if (!blkhashname || (-1 == (blkhashfd = mkstemp(blkhashname)))) {
+               g_warning("Could not open temp file: %s", strerror(errno));
+               retval=-1;
+               goto err;
+       }
+#else
+       /* use tmpnam here to avoid further feature test nightmare */
+       if (-1 == (blkhashfd = open(blkhashname=strdup(tmpnam(NULL)),
+                                   O_CREAT | O_RDWR,
+                                   S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH))) {
+               g_warning("Could not open temp file: %s", strerror(errno));
+               retval=-1;
+               goto err;
+       }
+#endif
+       /* Ensure space freed if we die */
+       if (-1 == unlink(blkhashname)) {
+               g_warning("Could not unlink temp file: %s", strerror(errno));
+               retval=-1;
+               goto err;
+       }
+
+       if (-1 == lseek(blkhashfd, (off_t)((size>>9)<<2), SEEK_SET)) {
+               g_warning("Could not llseek temp file: %s", strerror(errno));
+               retval=-1;
+               goto err;
+       }
+
+       if (-1 == write(blkhashfd, "\0", 1)) {
+               g_warning("Could not write temp file: %s", strerror(errno));
+               retval=-1;
+               goto err;
+       }
+
+       if (NULL == (blkhash = mmap(NULL,
+                                   (size>>9)<<2,
+                                   PROT_READ | PROT_WRITE,
+                                   MAP_SHARED,
+                                   blkhashfd,
+                                   0))) {
+               g_warning("Could not mmap temp file: %s", strerror(errno));
+               retval=-1;
+               goto err;
+       }
+
+       if (-1 == (logfd = open(transactionlog, O_RDONLY)))
+       {
+               g_warning("Could open log file: %s", strerror(errno));
+               retval=-1;
+               goto err;
+       }
+               
+       if(gettimeofday(&start, NULL)<0) {
+               retval=-1;
+               snprintf(errstr, errstr_len, "Could not measure start time: %s", strerror(errno));
+               goto err_open;
+       }
+
+       while (readtransactionfile || txqueue.numitems || inflight.numitems) {
+               int ret;
+
+               uint32_t magic;
+                uint64_t hand;
+                uint32_t command;
+                uint64_t from;
+                uint32_t len;
+               struct reqcontext * prc;
+
+               *errstr=0;
+
+               FD_ZERO(&wset);
+               FD_ZERO(&rset);
+               if (readtransactionfile)
+                       FD_SET(logfd, &rset);
+               if (txqueue.numitems)
+                       FD_SET(sock, &wset);
+               if (inflight.numitems)
+                       FD_SET(sock, &rset);
+               tv.tv_sec=5;
+               tv.tv_usec=0;
+               ret = select(1+((sock>logfd)?sock:logfd), &rset, &wset, NULL, &tv);
+               if (ret == 0) {
+                       retval=-1;
+                       snprintf(errstr, errstr_len, "Timeout reading from socket");
+                       goto err_open;
+               } else if (ret<0) {
+                       g_warning("Could not mmap temp file: %s", errstr);
+                       retval=-1;
+                       goto err;
+               }
+               /* We know we've got at least one thing to do here then */
+
+               /* Get a command from the transaction log */
+               if (FD_ISSET(logfd, &rset)) {
+                       
+                       /* Read a request or reply from the transaction file */
+                       READ_ALL_ERRCHK(logfd,
+                                       &magic,
+                                       sizeof(magic),
+                                       err_open,
+                                       "Could not read transaction log: %s",
+                                       strerror(errno));
+                       magic = ntohl(magic);
+                       switch (magic) {
+                       case NBD_REQUEST_MAGIC:
+                               if (NULL == (prc = calloc(1, sizeof(struct reqcontext)))) {
+                                       retval=-1;
+                                       snprintf(errstr, errstr_len, "Could not allocate request");
+                                       goto err_open;
+                               }
+                               READ_ALL_ERRCHK(logfd,
+                                               sizeof(magic)+(char *)&(prc->req),
+                                               sizeof(struct nbd_request)-sizeof(magic),
+                                               err_open,
+                                               "Could not read transaction log: %s",
+                                               strerror(errno));
+                               prc->req.magic = htonl(NBD_REQUEST_MAGIC);
+                               prc->seq=seq++;
+                               if ((ntohl(prc->req.type) & NBD_CMD_MASK_COMMAND) == NBD_CMD_DISC) {
+                                       /* no more to read; don't enqueue as no reply
+                                        * we will disconnect manually at the end
+                                        */
+                                       readtransactionfile = 0;
+                                       free (prc);
+                               } else {
+                                       dumpcommand("Enqueuing command", prc->req.type);
+                                       rclist_addtail(&txqueue, prc);
+                               }
+                               prc = NULL;
+                               break;
+                       case NBD_REPLY_MAGIC:
+                               READ_ALL_ERRCHK(logfd,
+                                               sizeof(magic)+(char *)(&rep),
+                                               sizeof(struct nbd_reply)-sizeof(magic),
+                                               err_open,
+                                               "Could not read transaction log: %s",
+                                               strerror(errno));
+
+                               if (rep.error) {
+                                       retval=-1;
+                                       snprintf(errstr, errstr_len, "Transaction log file contained errored transaction");
+                                       goto err_open;
+                               }
+                                       
+                               /* We do not need to consume data on a read reply as there is
+                                * none in the log */
+                               break;
+                       default:
+                               retval=-1;
+                               snprintf(errstr, errstr_len, "Could not measure start time: %08x", magic);
+                               goto err_open;
+                       }
+               }
+
+               /* See if we have a write we can do */
+               if (FD_ISSET(sock, &wset))
+               {
+                       prc = txqueue.head;
+                       if (!prc)
+                               g_warning("Socket write FD set but we shouldn't have been interested");
+                       else
+                       {
+                       
+                               rclist_unlink(&txqueue, prc);
+                               rclist_addtail(&inflight, prc);
+                               
+                               if (ntohl(prc->req.magic) != NBD_REQUEST_MAGIC) {
+                                       retval=-1;
+                                       g_warning("Asked to write a reply without a magic number");
+                                       goto err_open;
+                               }
+                                       
+                               dumpcommand("Sending command", prc->req.type);
+                               command = ntohl(prc->req.type);
+                               from = ntohll(prc->req.from);
+                               len = ntohl(prc->req.len);
+                               /* we rewrite the handle as they otherwise may not be unique */
+                               *((uint64_t*)(prc->req.handle))=htonll((uint64_t)prc);
+                               WRITE_ALL_ERRCHK(sock,
+                                                &(prc->req),
+                                                sizeof(struct nbd_request),
+                                                err_open,
+                                                "Could not write command: %s",
+                                                strerror(errno));
+                               switch (command & NBD_CMD_MASK_COMMAND) {
+                               case NBD_CMD_WRITE:
+                                       while (len > 0) {
+                                               uint64_t blknum = from>>9;
+                                               char dbuf[512];
+                                               if (from>=size) {
+                                                       snprintf(errstr, errstr_len, "offset %llx beyond size %llx",
+                                                                (long long int) from, (long long int)size);
+                                                       goto err_open;
+                                               }
+                                               /* work out what we should be writing */
+                                               makebuf(dbuf, prc->seq, blknum);
+                                               WRITE_ALL_ERRCHK(sock,
+                                                                dbuf,
+                                                                512,
+                                                                err_open,
+                                                                "Could not write data: %s",
+                                                                strerror(errno));
+                                               from += 512;
+                                               len -= 512;
+                                       }
+                                       
+                               case NBD_CMD_DISC:
+                               case NBD_CMD_READ:
+                               case NBD_CMD_FLUSH:
+                                       break;
+                               default:
+                                       retval=-1;
+                                       snprintf(errstr, errstr_len, "Incomprehensible command: %08x", command);
+                                       goto err_open;
+                                       break;
+                               }
+                               
+                               prc = NULL;
+                       }
+                       
+               }
+
+               /* See if there is a reply to be processed from the socket */
+               if(FD_ISSET(sock, &rset)) {
+                       /* Okay, there's something ready for
+                        * reading here */
+                       
+                       READ_ALL_ERRCHK(sock,
+                                       &rep,
+                                       sizeof(struct nbd_reply),
+                                       err_open,
+                                       "Could not read from server socket: %s",
+                                       strerror(errno));
+                       
+                       if (rep.magic != htonl(NBD_REPLY_MAGIC)) {
+                               retval=-1;
+                               snprintf(errstr, errstr_len, "Bad magic from server");
+                               goto err_open;
+                       }
+                       
+                       if (rep.error) {
+                               retval=-1;
+                               snprintf(errstr, errstr_len, "Server errored a transaction");
+                               goto err_open;
+                       }
+                               
+                       prc=(struct reqcontext *)ntohll(*((uint64_t *)rep.handle));
+                       if (prc->req.magic != htonl(NBD_REQUEST_MAGIC)) {
+                               retval=-1;
+                               snprintf(errstr, errstr_len, "Bad magic in inflight data: %08x", prc->req.magic);
+                               goto err_open;
+                       }
+                       
+                       dumpcommand("Processing reply to command", prc->req.type);
+                       command = ntohl(prc->req.type);
+                       from = ntohll(prc->req.from);
+                       len = ntohl(prc->req.len);
+                       
+                       switch (command & NBD_CMD_MASK_COMMAND) {
+                       case NBD_CMD_READ:
+                               while (len > 0) {
+                                       uint64_t blknum = from>>9;
+                                       char dbuf[512];
+                                       if (from>=size) {
+                                               snprintf(errstr, errstr_len, "offset %llx beyond size %llx",
+                                                        (long long int) from, (long long int)size);
+                                               goto err_open;
+                                       }
+                                       READ_ALL_ERRCHK(sock,
+                                                       dbuf,
+                                                       512,
+                                                       err_open,
+                                                       "Could not read data: %s",
+                                                       strerror(errno));
+                                       /* work out what we was written */
+                                       if (checkbuf(dbuf, blkhash[blknum], blknum))
+                                       {
+                                               retval=-1;
+                                               snprintf(errstr, errstr_len, "Bad reply data: seq %08x", blkhash[blknum]);
+                                               goto err_open;
+                                               
+                                       }
+                                       from += 512;
+                                       len -= 512;
+                               }
+                               break;
+                       case NBD_CMD_WRITE:
+                               /* subsequent reads should get data with this seq*/
+                               while (len > 0) {
+                                       uint64_t blknum = from>>9;
+                                       blkhash[blknum]=(uint32_t)(prc->seq);
+                                       from += 512;
+                                       len -= 512;
+                               }
+                               break;
+                       default:
+                               break;
+                       }
+                       
+                       processed++;
+                       rclist_unlink(&inflight, prc);
+                       prc->req.magic=0; /* so a duplicate reply is detected */
+                       free(prc);
+               }
+
+               if (!(printer++ % 10000) || !(readtransactionfile || txqueue.numitems || inflight.numitems) )
+                       printf("%d: Seq %08lld Queued: %08d Inflight: %08d Done: %08lld\n",
+                              (int)mypid,
+                              (long long int) seq,
+                              txqueue.numitems,
+                              inflight.numitems,
+                              (long long int) processed);
+
+       }
+
+       if (gettimeofday(&stop, NULL)<0) {
+               retval=-1;
+               snprintf(errstr, errstr_len, "Could not measure end time: %s", strerror(errno));
+               goto err_open;
+       }
+       timespan=timeval_diff_to_double(&stop, &start);
+       speed=size/timespan;
+       if(speed>1024) {
+               speed=speed/1024.0;
+               speedchar[0]='K';
+       }
+       if(speed>1024) {
+               speed=speed/1024.0;
+               speedchar[0]='M';
+       }
+       if(speed>1024) {
+               speed=speed/1024.0;
+               speedchar[0]='G';
+       }
+       g_message("%d: Integrity %s test complete. Took %.3f seconds to complete, %.3f%sib/s", (int)getpid(), (testflags & TEST_WRITE)?"write":"read", timespan, speed, speedchar);
 
 err_open:
        if(close_sock) {
                close_connection(sock, CONNECTION_CLOSE_PROPERLY);
        }
 err:
+       if (size && blkhash)
+               munmap(blkhash, (size>>9)<<2);
+
+       if (blkhashfd != -1)
+               close (blkhashfd);
+
+       if (logfd != -1)
+               close (logfd);
+
+       if (blkhashname)
+               free(blkhashname);
+
+       if (*errstr)
+               g_warning("%s",errstr);
+
        return retval;
 }
 
@@ -503,7 +1027,7 @@ int main(int argc, char**argv) {
                exit(EXIT_FAILURE);
        }
        logging();
-       while((c=getopt(argc, argv, "-N:owf"))>=0) {
+       while((c=getopt(argc, argv, "-N:t:owfi"))>=0) {
                switch(c) {
                        case 1:
                                switch(nonopt) {
@@ -526,6 +1050,9 @@ int main(int argc, char**argv) {
                                p = 10809;
                                want_port = false;
                                break;
+                       case 't':
+                               transactionlog=g_strdup(optarg);
+                               break;
                        case 'o':
                                test=oversize_test;
                                break;
@@ -535,6 +1062,9 @@ int main(int argc, char**argv) {
                        case 'f':
                                testflags|=TEST_FLUSH;
                                break;
+                       case 'i':
+                               test=integrity_test;
+                               break;
                }
        }
 
diff --git a/nbd-tester-client.tr b/nbd-tester-client.tr
new file mode 100644 (file)
index 0000000..e65a50c
Binary files /dev/null and b/nbd-tester-client.tr differ
index a01d3dc..a05a6a5 100755 (executable)
@@ -2,6 +2,8 @@
 # Yes, that's POSIX sh, not bash!
 
 tmpnam=`mktemp`
+conffile=${tmpnam}.conf
+pidfile=${tmpnam}.pid
 
 ulimit -c unlimited
 
@@ -13,7 +15,7 @@ echo $1
 case $1 in
        */cmd)
                # Test with export specified on command line
-               ./nbd-server -C /dev/null -p `pwd`/nbd-server.pid 11111 $tmpnam &
+               ./nbd-server -C /dev/null -p ${pidfile} 11111 $tmpnam &
                # -p only works if nbd-server wasn't compiled with -DNOFORK or
                # -DNODAEMON, which I sometimes do for testing and debugging.
                PID=$!
@@ -23,7 +25,7 @@ case $1 in
        ;;
        */cfgsize)
                # Test oversized requests
-               ./nbd-server -C /dev/null -p `pwd`/nbd-server.pid 11111 $tmpnam &
+               ./nbd-server -C /dev/null -p ${pidfile} 11111 $tmpnam &
                # -p only works if nbd-server wasn't compiled with -DNOFORK or
                # -DNODAEMON, which I sometimes do for testing and debugging.
                PID=$!
@@ -33,14 +35,14 @@ case $1 in
        ;;
        */cfg1)
                # Test with export specified in config file
-               cat > nbd-server.conf <<EOF
+               cat > ${conffile} <<EOF
 [generic]
        oldstyle = true
 [export]
        exportname = $tmpnam
        port = 11112
 EOF
-               ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+               ./nbd-server -C ${conffile} -p ${pidfile} &
                PID=$!
                sleep 1
                ./nbd-tester-client 127.0.0.1 11112
@@ -49,7 +51,7 @@ EOF
        */cfgmulti)
                # Test with multiple exports specified in config file, and
                # testing more options too
-               cat >nbd-server.conf <<EOF
+               cat >${conffile} <<EOF
 [generic]
        oldstyle = true
 [export1]
@@ -63,23 +65,23 @@ EOF
        readonly = true
        listenaddr = 127.0.0.1
 EOF
-               ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+               ./nbd-server -C ${conffile} -p ${pidfile} &
                PID=$!
                sleep 1
                ./nbd-tester-client localhost 11113
                retval=$?
                if [ $retval -ne 0 ]
                then
-                       if [ -f nbd-server.pid ]
+                       if [ -f ${pidfile} ]
                        then
-                               kill `cat nbd-server.pid`
-                               rm -f nbd-server.pid
+                               kill `cat ${pidfile}`
+                               rm -f ${pidfile}
                        else
                                kill $PID
                        fi
                        if [ -z "$2" ]
                        then
-                               rm -f $tmpnam nbd-server.conf
+                               rm -f $tmpnam ${conffile}
                        fi
                        exit $retval
                fi
@@ -88,58 +90,79 @@ EOF
        ;;
        */cfgnew)
                # Test new-style exports
-               cat >nbd-server.conf <<EOF
+               cat >${conffile} <<EOF
 [generic]
 [export1]
        exportname = $tmpnam
 EOF
-               ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+               ./nbd-server -C ${conffile} -p ${pidfile} &
                PID=$!
                sleep 1
                ./nbd-tester-client localhost -N export1
                retval=$?
        ;;
        */write)
-               # Test new-style exports
-               cat >nbd-server.conf <<EOF
+               # Test writing
+               cat >${conffile} <<EOF
 [generic]
 [export1]
        exportname = $tmpnam
 EOF
-               ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+               ./nbd-server -C ${conffile} -p ${pidfile} &
                PID=$!
                sleep 1
                ./nbd-tester-client localhost -N export1 -w
                retval=$?
        ;;
        */flush)
-               # Test new-style exports
-               cat >nbd-server.conf <<EOF
+               # Test writes with flush
+               cat >${conffile} <<EOF
 [generic]
 [export1]
        exportname = $tmpnam
+       flush = true
+       fua = true
+       rotational = true
 EOF
-               ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+               ./nbd-server -C ${conffile} -p ${pidfile} &
                PID=$!
                sleep 1
                ./nbd-tester-client localhost -N export1 -w -f
                retval=$?
        ;;
+       */integrity)
+               # Integrity test
+               cat >${conffile} <<EOF
+[generic]
+[export1]
+       exportname = $tmpnam
+       flush = true
+       fua = true
+       rotational = true
+EOF
+               # we need a bigger disk
+               dd if=/dev/zero of=$tmpnam bs=1M count=50 >/dev/null 2>&1
+               ./nbd-server -C ${conffile} -p ${pidfile} &
+               PID=$!
+               sleep 1
+               ./nbd-tester-client localhost -N export1 -i
+               retval=$?
+       ;;
        *)
                echo "E: unknown test $1"
                exit 1
        ;;
 esac
-if [ -f nbd-server.pid ]
+if [ -f ${pidfile} ]
 then
-       kill `cat nbd-server.pid`
-       rm -f nbd-server.pid
+       kill `cat ${pidfile}`
+       rm -f ${pidfile}
 else
        kill $PID
 fi
 if [ -z "$2" ]
 then
-       rm -f $tmpnam nbd-server.conf
+       rm -f $tmpnam ${conffile}
 fi
 if [ $retval -ne 0 ]
 then