sbin_PROGRAMS = @NBD_CLIENT_NAME@
EXTRA_PROGRAMS = nbd-client knbd-client
TESTS_ENVIRONMENT=$(srcdir)/simple_test
-TESTS = cmd cfg1 cfgmulti cfgnew cfgsize write flush
+TESTS = cmd cfg1 cfgmulti cfgnew cfgsize write flush integrity
check_PROGRAMS = nbd-tester-client
knbd_client_SOURCES = nbd-client.c cliserv.h
nbd_client_SOURCES = nbd-client.c cliserv.h
cfgsize:
write:
flush:
+integrity:
+
#include "nbd.h"
#if NBD_LFS==1
+/* /usr/include/features.h (included from /usr/include/sys/types.h)
+ defines this when _GNU_SOURCE is defined
+ */
+#ifndef _LARGEFILE_SOURCE
#define _LARGEFILE_SOURCE
+#endif
#define _FILE_OFFSET_BITS 64
#endif
AC_CHECK_SIZEOF(unsigned int)
AC_CHECK_SIZEOF(unsigned long int)
AC_CHECK_SIZEOF(unsigned long long int)
-AC_CHECK_FUNCS([llseek alarm gethostbyname inet_ntoa memset socket strerror strstr])
+AC_CHECK_FUNCS([llseek alarm gethostbyname inet_ntoa memset socket strerror strstr mkstemp])
+AC_CHECK_FUNC([sync_file_range],
+ [AC_DEFINE([HAVE_SYNC_FILE_RANGE], [sync_file_range(2) is not supported], [sync_file_range(2) is supported])],
+ [])
AC_FUNC_FORK
AC_FUNC_SETVBUF_REVERSED
AC_MSG_CHECKING(whether client should be built)
There are two message types in the data pushing phase: the request, and
the response.
-There are three request types in the data pushing phase: NBD_CMD_READ,
-NBD_CMD_WRITE, and NBD_CMD_DISC (disconnect).
+There are four request types in the data pushing phase: NBD_CMD_READ,
+NBD_CMD_WRITE, NBD_CMD_DISC (disconnect), and NBD_CMD_FLUSH.
The request is sent by the client; the response by the server. A request
header consists a 32 bit magic number (magic), a 32 bit field denoting
the request type (see below; 'type'), a 64 bit handle ('handle'), a 64
bit data offset ('from'), and a 32 bit length ('len'). In case of a
write request, the header is immediately followed by 'len' bytes of
-data.
+data. In the case of NBD_CMD_FLUSH, the offset and length should
+be zero (meaning "flush entire device"); other values are reserved
+for future use (e.g. for flushing specific areas without a write).
The reply contains three fields: a 32 bit magic number ('magic'), a 32
bit error code ('error'; 0, unless an error occurred in which case it is
will probably be postponed until there are no other outstanding
requests.
+A flush request will not be sent unless NBD_FLAG_SEND_FLUSH is set,
+and indicates the backing file should be fdatasync()'d to disk.
+
+The top 16 bits of the request are flags. NBD_CMD_FLAG_FUA implies
+a force unit access, and can currently only be usefully combined
+with NBD_CMD_WRITE. This is implementing using sync_file_range
+if present, else by fdatasync() of that file (note not all files
+in a multifile environment). NBD_CMD_FLAG_FUA will not be set
+unless NBD_FLAG_SEND_FUA is set.
+
There are two versions of the negotiation: the 'old' style (nbd <=
2.9.16) and the 'new' style (nbd >= 2.9.17, though due to a bug it does
not work with anything below 2.9.18). What follows is a description of
#include "config.h"
#if NBD_LFS
#define _FILE_OFFSET_BITS 64
+#ifndef _LARGEFILE_SOURCE
#define _LARGEFILE_SOURCE
+#endif
+#ifdef HAVE_SYNC_FILE_RANGE
+#define USE_SYNC_FILE_RANGE
+#define _GNU_SOURCE
+#endif /* HAVE_SYNC_FILE_RANGE */
#endif /* NBD_LFS */
#endif /* LFS_H */
--- /dev/null
+#!/bin/sh
+#
+# Example script to make a transaction log file
+# Must be run as root. Remember to chown the file afterwards
+
+# Insert the name of a tarfile here
+tarfile=/home/amb/iptables/iptables_1.4.4.orig.tar.gz
+tmpnam=`mktemp`
+conffile=${tmpnam}.conf
+pidfile=${tmpnam}.pid
+output=`pwd`/output.tr
+
+ulimit -c unlimited
+
+cat >${conffile} <<EOF
+[generic]
+[export1]
+ exportname = $tmpnam
+ transactionlog = $output
+ flush = true
+ fua = true
+ rotational = true
+EOF
+./nbd-server -C ${conffile} -p ${pidfile} &
+PID=$!
+sleep 1
+dd if=/dev/zero of=${tmpnam} bs=1M count=50
+./nbd-client -N export1 127.0.0.1 /dev/nbd0
+mkfs.ext3 /dev/nbd0
+mount -t ext3 -odata=journal,barrier=1 /dev/nbd0 /mnt
+(cd /mnt ; tar xvzf ${tarfile} ; sync) 2>&1 >/dev/null
+umount /mnt
+mount -t ext3 -odata=journal,barrier=1 /dev/nbd0 /mnt
+(cd /mnt ; tar cvzf /dev/null . ; sync) 2>&1 >/dev/null
+dbench -D /mnt 1 &
+sleep 10
+killall dbench
+sleep 2
+killall -KILL dbench
+sync
+umount /mnt
+./nbd-client -d /dev/nbd0
+if [ -f ${pidfile} ]
+then
+ kill `cat ${pidfile}`
+ rm -f ${pidfile}
+else
+ kill $PID
+fi
+rm -f $tmpnam ${conffile}
+ls -la ${output}
</listitem>
</varlistentry>
<varlistentry>
+ <term><option>flush</option></term>
+ <listitem>
+ <para>Optional; boolean.</para>
+ <para>When this option is enabled,
+ <command>nbd-server</command> will inform the client that it
+ supports and desires to be sent flush requests when the
+ elevator layer receives them. Receipt of a flush request
+ will cause an fdatasync() (or, if the sync option is set,
+ an fsync()) on the backend storage. This increases
+ reliability in the case of an unclean shutdown at
+ the expense of a degradation of performance. The default
+ state is disabled. This option will have no effect unless
+ supported by the client.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>fua</option></term>
+ <listitem>
+ <para>Optional; boolean.</para>
+ <para>When this option is enabled,
+ <command>nbd-server</command> will inform the client that it
+ supports and desires to be sent fua (force unit access) commands
+ when the elevator layer receives them. Receipt of a force unit
+ access command will cause the specified command to be synced
+ to backend storage using sync_file_range() if supported, or
+ fdatasync() otherwise. This increases
+ reliability in the case of an unclean shutdown at
+ the expense of a degradation of performance. The default
+ state is disabled. This option will have no effect unless
+ supported by the client.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>rotational</option></term>
+ <listitem>
+ <para>Optional; boolean.</para>
+ <para>When this option is enabled,
+ <command>nbd-server</command> will inform the client that it
+ it would prefer it to send requests in elevator order, perhaps
+ because it has a backing store and no local elevator. By
+ default, the client uses QUEUE_FLAG_NONROT, which effectively
+ restricts the function of the elevator to block merges. By
+ specifying this flag on the server, the client will not use
+ QUEUE_FLAG_NONROT, meaning the client elevator will perform
+ normal elevator ordering of I/O requests. Note that even when
+ the backing store is on rotating media, it is not normally
+ necessary to specify this flag, as the server's elevator
+ algorithm will be used. This flag is only required where
+ the server will not be using an elevator algorithm or where
+ the elevator algorithm is effectively neutered (e.g. with
+ the sync option set). This option will have no effect unless
+ supported by the client.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
<term><option>sparse_cow</option></term>
<listitem>
<para>Optional; boolean.</para>
</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><option>transactionlog</option></term>
+ <listitem>
+ <para>Optional; string</para>
+ <para>
+ If specified, then this pathname is used to generate a transaction
+ log. A transaction log is a binary file consisting of the requests
+ sent to and the replies received by the server, but excluding any
+ data (so, for a write command, it records the offset and length
+ of the write but not the data written). It is therefore relatively
+ safe to distribute to a third party. Note that the transaction log
+ does not include the negotiation sequence. Transaction logs are
+ mainly useful for debugging. The program
+ <emphasis>nbd-tester-client</emphasis> distributed with the source
+ to this program can reply a transaction log against a server and
+ perform a data integrity test. Note that the transaction log is
+ written to for every client opened. If it is necessary to maintain
+ separate transaction logs for each client, the
+ <emphasis>prerun</emphasis> script should rename the transaction log
+ (which will just have been opened in order to avoid transaction logs
+ overwriting eachother. This action should be race-free.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
if(read(sock, &tmp, sizeof(uint16_t)) < 0) {
err("Failed reading flags: %m");
}
- *flags = ((u32)ntohs(tmp)) << 16;
+ *flags = ((u32)ntohs(tmp));
/* reserved for future use*/
if (write(sock, &reserved, sizeof(reserved)) < 0)
ioctl(nbd, NBD_CLEAR_SOCK);
+ /* ignore error as kernel may not support */
+ ioctl(nbd, NBD_SET_FLAGS, (unsigned long) flags);
+
if (ioctl(nbd, BLKROSET, (unsigned long) &read_only) < 0)
err("Unable to set read-only attribute for device");
}
#define DEBUG2( a,b ) printf( a,b )
#define DEBUG3( a,b,c ) printf( a,b,c )
#define DEBUG4( a,b,c,d ) printf( a,b,c,d )
+#define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e )
#else
#define DEBUG( a )
#define DEBUG2( a,b )
#define DEBUG3( a,b,c )
#define DEBUG4( a,b,c,d )
+#define DEBUG5( a,b,c,d,e )
#endif
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION ""
#define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
#define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
#define F_SYNC 64 /**< Whether to fsync() after a write */
+#define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
+#define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
+#define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
GHashTable *children;
char pidfname[256]; /**< name of our PID file */
char pidftemplate[256]; /**< template to be used for the filename of the PID file */
disconnects */
gchar* servename; /**< name of the export as selected by nbd-client */
int max_connections; /**< maximum number of opened connections */
+ gchar* transactionlog;/**< filename for transaction log */
} SERVER;
/**
u32 difffilelen; /**< number of pages in difffile */
u32 *difmap; /**< see comment on the global difmap for this one */
gboolean modern; /**< client was negotiated using modern negotiation protocol */
+ int transactionlogfd;/**< fd for transaction log */
} CLIENT;
/**
g_free(server->prerun);
if(server->postrun)
g_free(server->postrun);
+ if(server->transactionlog)
+ g_free(server->transactionlog);
g_free(server);
}
if(s->postrun)
serve->postrun = g_strdup(s->postrun);
+
+ if(s->transactionlog)
+ serve->transactionlog = g_strdup(s->transactionlog);
if(s->servename)
serve->servename = g_strdup(s->servename);
{ "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
{ "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
{ "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
+ { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
{ "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
{ "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
{ "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
{ "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
{ "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
{ "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
+ { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
+ { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
+ { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
{ "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
{ "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
};
* @param client The client we're serving for
* @return The number of bytes actually written, or -1 in case of an error
**/
-ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
int fhandle;
off_t foffset;
size_t maxbytes;
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len);
+ DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua);
myseek(fhandle, foffset);
retval = write(fhandle, buf, len);
if(client->server->flags & F_SYNC) {
fsync(fhandle);
+ } else if (fua) {
+#ifdef USE_SYNC_FILE_RANGE
+ sync_file_range(fhandle, foffset, len,
+ SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
+ SYNC_FILE_RANGE_WAIT_AFTER);
+#else
+ fdatasync(fhandle);
+#endif
}
return retval;
}
* Call rawexpwrite repeatedly until all data has been written.
* @return 0 on success, nonzero on failure
**/
-int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
+int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
ssize_t ret=0;
- while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
+ while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
a += ret;
buf += ret;
len -= ret;
* @param client The client we're going to write for.
* @return 0 on success, nonzero on failure
**/
-int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
char pagebuf[DIFFPAGESIZE];
off_t mapcnt,mapl,maph;
off_t wrlen,rdlen;
off_t offset;
if (!(client->server->flags & F_COPYONWRITE))
- return(rawexpwrite_fully(a, buf, len, client));
+ return(rawexpwrite_fully(a, buf, len, client, fua));
DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
}
len-=wrlen ; a+=wrlen ; buf+=wrlen ;
}
+ if (client->server->flags & F_SYNC) {
+ fsync(client->difffile);
+ } else if (fua) {
+ /* open question: would it be cheaper to do multiple sync_file_ranges?
+ as we iterate through the above?
+ */
+ fdatasync(client->difffile);
+ }
+ return 0;
+}
+
+int expflush(CLIENT *client) {
+ int fhandle;
+ off_t foffset;
+ size_t maxbytes;
+ gint i;
+
+ if (client->server->flags & F_COPYONWRITE) {
+ return fsync(client->difffile);
+ }
+
+ for (i = 0; i < client->export->len; i++) {
+ FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
+ if (fsync(fi.fhandle) < 0)
+ return -1;
+ }
+
return 0;
}
client->exportsize = OFFT_MAX;
client->net = net;
client->modern = TRUE;
+ client->transactionlogfd = -1;
free(name);
return client;
}
err("Negotiation failed: %m");
if (client->server->flags & F_READONLY)
flags |= NBD_FLAG_READ_ONLY;
+ if (client->server->flags & F_FLUSH)
+ flags |= NBD_FLAG_SEND_FLUSH;
+ if (client->server->flags & F_FUA)
+ flags |= NBD_FLAG_SEND_FUA;
+ if (client->server->flags & F_ROTATIONAL)
+ flags |= NBD_FLAG_ROTATIONAL;
if (!client->modern) {
/* oldstyle */
flags = htonl(flags);
}
/** sending macro. */
-#define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
+#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
+ if (client->transactionlogfd != -1) \
+ writeit(client->transactionlogfd, &reply, sizeof(reply)); }
/** error macro. */
#define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
/**
size_t len;
size_t currlen;
size_t writelen;
+ uint16_t command;
#ifdef DODBG
i++;
printf("%d: ", i);
#endif
readit(client->net, &request, sizeof(request));
+ if (client->transactionlogfd != -1)
+ writeit(client->transactionlogfd, &request, sizeof(request));
+
request.from = ntohll(request.from);
request.type = ntohl(request.type);
+ command = request.type & NBD_CMD_MASK_COMMAND;
- if (request.type==NBD_CMD_DISC) {
+ if (command==NBD_CMD_DISC) {
msg2(LOG_INFO, "Disconnect request received.");
if (client->server->flags & F_COPYONWRITE) {
if (client->difmap) g_free(client->difmap) ;
currlen = len;
}
#ifdef DODBG
- printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" :
+ printf("%s from %llu (%llu) len %d, ", command ? "WRITE" :
"READ", (unsigned long long)request.from,
(unsigned long long)request.from / 512, len);
#endif
memcpy(reply.handle, request.handle, sizeof(reply.handle));
- if ((request.from + len) > (OFFT_MAX)) {
- DEBUG("[Number too large!]");
- ERROR(client, reply, EINVAL);
- continue;
- }
- if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
- DEBUG("[RANGE!]");
- ERROR(client, reply, EINVAL);
- continue;
+ if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
+ if ((request.from + len) > (OFFT_MAX)) {
+ DEBUG("[Number too large!]");
+ ERROR(client, reply, EINVAL);
+ continue;
+ }
+
+ if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
+ DEBUG("[RANGE!]");
+ ERROR(client, reply, EINVAL);
+ continue;
+ }
}
- if (request.type==NBD_CMD_WRITE) {
+ if (command==NBD_CMD_WRITE) {
DEBUG("wr: net->buf, ");
while(len > 0) {
readit(client->net, buf, currlen);
ERROR(client, reply, EPERM);
continue;
}
- if (expwrite(request.from, buf, len, client)) {
+ if (expwrite(request.from, buf, len, client,
+ request.type & NBD_CMD_FLAG_FUA)) {
DEBUG("Write failed: %m" );
ERROR(client, reply, errno);
continue;
}
continue;
}
- /* READ */
-
- DEBUG("exp->buf, ");
- memcpy(buf, &reply, sizeof(struct nbd_reply));
- p = buf + sizeof(struct nbd_reply);
- writelen = currlen + sizeof(struct nbd_reply);
- while(len > 0) {
- if (expread(request.from, p, currlen, client)) {
- DEBUG("Read failed: %m");
+
+ if (command==NBD_CMD_FLUSH) {
+ DEBUG("fl: ");
+ if (expflush(client)) {
+ DEBUG("Flush failed: %m");
ERROR(client, reply, errno);
continue;
}
+ SEND(client->net, reply);
+ DEBUG("OK!\n");
+ continue;
+ }
- DEBUG("buf->net, ");
- writeit(client->net, buf, writelen);
- len -= currlen;
- currlen = (len < BUFSIZE) ? len : BUFSIZE;
- p = buf;
- writelen = currlen;
+ if (command==NBD_CMD_READ) {
+ DEBUG("exp->buf, ");
+ memcpy(buf, &reply, sizeof(struct nbd_reply));
+ if (client->transactionlogfd != -1)
+ writeit(client->transactionlogfd, &reply, sizeof(reply));
+ p = buf + sizeof(struct nbd_reply);
+ writelen = currlen + sizeof(struct nbd_reply);
+ while(len > 0) {
+ if (expread(request.from, p, currlen, client)) {
+ DEBUG("Read failed: %m");
+ ERROR(client, reply, errno);
+ continue;
+ }
+
+ DEBUG("buf->net, ");
+ writeit(client->net, buf, writelen);
+ len -= currlen;
+ request.from += currlen;
+ currlen = (len < BUFSIZE) ? len : BUFSIZE;
+ p = buf;
+ writelen = currlen;
+ }
+ DEBUG("OK!\n");
+ continue;
}
- DEBUG("OK!\n");
+
+ DEBUG ("Ignoring unknown command\n");
}
return 0;
}
* @param client a connected client
**/
void serveconnection(CLIENT *client) {
+ if (client->server->transactionlog && (client->transactionlogfd == -1))
+ {
+ if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
+ O_WRONLY | O_CREAT,
+ S_IRUSR | S_IWUSR)))
+ g_warning("Could not open transaction log %s",
+ client->server->transactionlog);
+ }
+
if(do_run(client->server->prerun, client->exportname)) {
exit(EXIT_FAILURE);
}
mainloop(client);
do_run(client->server->postrun, client->exportname);
+
+ if (-1 != client->transactionlogfd)
+ {
+ close(client->transactionlogfd);
+ client->transactionlogfd = -1;
+ }
}
/**
client->server=serve;
client->exportsize=OFFT_MAX;
client->net=net;
+ client->transactionlogfd = -1;
}
set_peername(net, client);
if (!authorized_client(client)) {
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
#include <syslog.h>
#include <unistd.h>
#include "config.h"
static uint64_t size;
+static gchar * transactionlog = "nbd-tester-client.tr";
+
typedef enum {
CONNECTION_TYPE_NONE,
CONNECTION_TYPE_CONNECT,
CONNECTION_CLOSE_FAST,
} CLOSE_TYPE;
+struct reqcontext {
+ uint64_t seq;
+ struct nbd_request req;
+ struct reqcontext * next;
+ struct reqcontext * prev;
+};
+
+struct rclist {
+ struct reqcontext * head;
+ struct reqcontext * tail;
+ int numitems;
+};
+
+void rclist_unlink(struct rclist * l, struct reqcontext * p) {
+ if (p && l) {
+ struct reqcontext * prev = p->prev;
+ struct reqcontext * next = p->next;
+
+ /* Fix link to previous */
+ if (prev)
+ prev->next = next;
+ else
+ l->head = next;
+
+ if (next)
+ next->prev = prev;
+ else
+ l->tail = prev;
+
+ p->prev = NULL;
+ p->next = NULL;
+ l->numitems--;
+ }
+}
+
+/* Add a new list item to the tail */
+void rclist_addtail(struct rclist * l, struct reqcontext * p)
+{
+ if (!p || !l)
+ return;
+ if (l->tail) {
+ if (l->tail->next)
+ g_warning("addtail found list tail has a next pointer");
+ l->tail->next = p;
+ p->next = NULL;
+ p->prev = l->tail;
+ l->tail = p;
+ } else {
+ if (l->head)
+ g_warning("addtail found no list tail but a list head");
+ l->head = p;
+ l->tail = p;
+ p->prev = NULL;
+ p->next = NULL;
+ }
+ l->numitems++;
+}
+
#define TEST_WRITE (1<<0)
#define TEST_FLUSH (1<<1)
while(len>0) {
if((res=read(f, buf, len)) <=0) {
+ if (!res)
+ errno=EAGAIN;
snprintf(errstr, errstr_len, "Read failed: %s", strerror(errno));
return -1;
}
while(len>0) {
if((res=write(f, buf, len)) <=0) {
+ if (!res)
+ errno=EAGAIN;
snprintf(errstr, errstr_len, "Write failed: %s", strerror(errno));
return -1;
}
READ_ALL_ERRCHK(sock, &flags, sizeof(uint16_t), err_open, "Could not read flags: %s", strerror(errno));
flags = ntohs(flags);
*serverflags = flags;
- g_warning("Server flags are: %08x", flags);
READ_ALL_ERRCHK(sock, buf, 124, err_open, "Could not read reserved zeroes: %s", strerror(errno));
goto end;
err_open:
if (!(testflags & TEST_WRITE))
testflags &= ~TEST_FLUSH;
- memset (writebuf, 'X', sizeof(1024));
+ memset (writebuf, 'X', 1024);
size=0;
if(!sock_is_open) {
if((sock=setup_connection(hostname, port, name, CONNECTION_TYPE_FULL, &serverflags))<0) {
}
for(i=0;i+1024<=size;i+=1024) {
if(do_write) {
- int sendfua = (testflags & TEST_FLUSH) && ((i & 15) == 3);
- int sendflush = (testflags & TEST_FLUSH) && ((i & 15) == 11);
+ int sendfua = (testflags & TEST_FLUSH) && (((i>>10) & 15) == 3);
+ int sendflush = (testflags & TEST_FLUSH) && (((i>>10) & 15) == 11);
req.type=htonl((testflags & TEST_WRITE)?NBD_CMD_WRITE:NBD_CMD_READ);
if (sendfua)
req.type = htonl(NBD_CMD_WRITE | NBD_CMD_FLAG_FUA);
speed=speed/1024.0;
speedchar[0]='G';
}
- g_message("%d: Throughput %s test complete. Took %.3f seconds to complete, %.3f%sib/s", (int)getpid(), (testflags & TEST_WRITE)?"write":"read", timespan, speed, speedchar);
+ g_message("%d: Throughput %s test (%s flushes) complete. Took %.3f seconds to complete, %.3f%sib/s", (int)getpid(), (testflags & TEST_WRITE)?"write":"read", (testflags & TEST_FLUSH)?"with":"without", timespan, speed, speedchar);
+
+err_open:
+ if(close_sock) {
+ close_connection(sock, CONNECTION_CLOSE_PROPERLY);
+ }
+err:
+ return retval;
+}
+
+/*
+ * fill 512 byte buffer 'buf' with a hashed selection of interesting data based
+ * only on handle and blknum. The first word is blknum, and the second handle, for ease
+ * of understanding. Things with handle 0 are blank.
+ */
+static inline void makebuf(char *buf, uint64_t seq, uint64_t blknum) {
+ uint64_t x = ((uint64_t)blknum) ^ (seq << 32) ^ (seq >> 32);
+ uint64_t* p = (uint64_t*)buf;
+ int i;
+ if (!seq) {
+ bzero(buf, 512);
+ return;
+ }
+ for (i = 0; i<512/sizeof(uint64_t); i++) {
+ int s;
+ *(p++) = x;
+ x+=0xFEEDA1ECDEADBEEFULL+i+(((uint64_t)i)<<56);
+ s = x & 63;
+ x = x ^ (x<<s) ^ (x>>(64-s)) ^ 0xAA55AA55AA55AA55ULL ^ seq;
+ }
+}
+
+static inline int checkbuf(char *buf, uint64_t seq, uint64_t blknum) {
+ char cmp[512];
+ makebuf(cmp, seq, blknum);
+ return memcmp(cmp, buf, 512)?-1:0;
+}
+
+static inline void dumpcommand(char * text, uint32_t command)
+{
+#ifdef DEBUG_COMMANDS
+ command=ntohl(command);
+ char * ctext;
+ switch (command & NBD_CMD_MASK_COMMAND) {
+ case NBD_CMD_READ:
+ ctext="NBD_CMD_READ";
+ break;
+ case NBD_CMD_WRITE:
+ ctext="NBD_CMD_WRITE";
+ break;
+ case NBD_CMD_DISC:
+ ctext="NBD_CMD_DISC";
+ break;
+ case NBD_CMD_FLUSH:
+ ctext="NBD_CMD_FLUSH";
+ break;
+ default:
+ ctext="UNKNOWN";
+ break;
+ }
+ printf("%s: %s [%s] (0x%08x)\n",
+ text,
+ ctext,
+ (command & NBD_CMD_FLAG_FUA)?"FUA":"NONE",
+ command);
+#endif
+}
+
+int integrity_test(gchar* hostname, int port, char* name, int sock,
+ char sock_is_open, char close_sock, int testflags) {
+ struct nbd_request req;
+ struct nbd_reply rep;
+ fd_set rset;
+ fd_set wset;
+ struct timeval tv;
+ struct timeval start;
+ struct timeval stop;
+ double timespan;
+ double speed;
+ char speedchar[2] = { '\0', '\0' };
+ int retval=0;
+ int serverflags = 0;
+ pid_t mypid = getpid();
+ int blkhashfd = -1;
+ char *blkhashname=NULL;
+ uint32_t *blkhash = NULL;
+ int logfd=-1;
+ uint64_t seq=1;
+ uint64_t processed=0;
+ uint64_t printer=0;
+ int readtransactionfile = 1;
+ struct rclist txqueue={NULL, NULL, 0};
+ struct rclist inflight={NULL, NULL, 0};
+
+ size=0;
+ if(!sock_is_open) {
+ if((sock=setup_connection(hostname, port, name, CONNECTION_TYPE_FULL, &serverflags))<0) {
+ g_warning("Could not open socket: %s", errstr);
+ retval=-1;
+ goto err;
+ }
+ }
+
+ if ((serverflags & (NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA))
+ != (NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA))
+ g_warning("Server flags do not support FLUSH and FUA - these may error");
+
+#ifdef HAVE_MKSTEMP
+ blkhashname=strdup("/tmp/blkarray-XXXXXX");
+ if (!blkhashname || (-1 == (blkhashfd = mkstemp(blkhashname)))) {
+ g_warning("Could not open temp file: %s", strerror(errno));
+ retval=-1;
+ goto err;
+ }
+#else
+ /* use tmpnam here to avoid further feature test nightmare */
+ if (-1 == (blkhashfd = open(blkhashname=strdup(tmpnam(NULL)),
+ O_CREAT | O_RDWR,
+ S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH))) {
+ g_warning("Could not open temp file: %s", strerror(errno));
+ retval=-1;
+ goto err;
+ }
+#endif
+ /* Ensure space freed if we die */
+ if (-1 == unlink(blkhashname)) {
+ g_warning("Could not unlink temp file: %s", strerror(errno));
+ retval=-1;
+ goto err;
+ }
+
+ if (-1 == lseek(blkhashfd, (off_t)((size>>9)<<2), SEEK_SET)) {
+ g_warning("Could not llseek temp file: %s", strerror(errno));
+ retval=-1;
+ goto err;
+ }
+
+ if (-1 == write(blkhashfd, "\0", 1)) {
+ g_warning("Could not write temp file: %s", strerror(errno));
+ retval=-1;
+ goto err;
+ }
+
+ if (NULL == (blkhash = mmap(NULL,
+ (size>>9)<<2,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ blkhashfd,
+ 0))) {
+ g_warning("Could not mmap temp file: %s", strerror(errno));
+ retval=-1;
+ goto err;
+ }
+
+ if (-1 == (logfd = open(transactionlog, O_RDONLY)))
+ {
+ g_warning("Could open log file: %s", strerror(errno));
+ retval=-1;
+ goto err;
+ }
+
+ if(gettimeofday(&start, NULL)<0) {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Could not measure start time: %s", strerror(errno));
+ goto err_open;
+ }
+
+ while (readtransactionfile || txqueue.numitems || inflight.numitems) {
+ int ret;
+
+ uint32_t magic;
+ uint64_t hand;
+ uint32_t command;
+ uint64_t from;
+ uint32_t len;
+ struct reqcontext * prc;
+
+ *errstr=0;
+
+ FD_ZERO(&wset);
+ FD_ZERO(&rset);
+ if (readtransactionfile)
+ FD_SET(logfd, &rset);
+ if (txqueue.numitems)
+ FD_SET(sock, &wset);
+ if (inflight.numitems)
+ FD_SET(sock, &rset);
+ tv.tv_sec=5;
+ tv.tv_usec=0;
+ ret = select(1+((sock>logfd)?sock:logfd), &rset, &wset, NULL, &tv);
+ if (ret == 0) {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Timeout reading from socket");
+ goto err_open;
+ } else if (ret<0) {
+ g_warning("Could not mmap temp file: %s", errstr);
+ retval=-1;
+ goto err;
+ }
+ /* We know we've got at least one thing to do here then */
+
+ /* Get a command from the transaction log */
+ if (FD_ISSET(logfd, &rset)) {
+
+ /* Read a request or reply from the transaction file */
+ READ_ALL_ERRCHK(logfd,
+ &magic,
+ sizeof(magic),
+ err_open,
+ "Could not read transaction log: %s",
+ strerror(errno));
+ magic = ntohl(magic);
+ switch (magic) {
+ case NBD_REQUEST_MAGIC:
+ if (NULL == (prc = calloc(1, sizeof(struct reqcontext)))) {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Could not allocate request");
+ goto err_open;
+ }
+ READ_ALL_ERRCHK(logfd,
+ sizeof(magic)+(char *)&(prc->req),
+ sizeof(struct nbd_request)-sizeof(magic),
+ err_open,
+ "Could not read transaction log: %s",
+ strerror(errno));
+ prc->req.magic = htonl(NBD_REQUEST_MAGIC);
+ prc->seq=seq++;
+ if ((ntohl(prc->req.type) & NBD_CMD_MASK_COMMAND) == NBD_CMD_DISC) {
+ /* no more to read; don't enqueue as no reply
+ * we will disconnect manually at the end
+ */
+ readtransactionfile = 0;
+ free (prc);
+ } else {
+ dumpcommand("Enqueuing command", prc->req.type);
+ rclist_addtail(&txqueue, prc);
+ }
+ prc = NULL;
+ break;
+ case NBD_REPLY_MAGIC:
+ READ_ALL_ERRCHK(logfd,
+ sizeof(magic)+(char *)(&rep),
+ sizeof(struct nbd_reply)-sizeof(magic),
+ err_open,
+ "Could not read transaction log: %s",
+ strerror(errno));
+
+ if (rep.error) {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Transaction log file contained errored transaction");
+ goto err_open;
+ }
+
+ /* We do not need to consume data on a read reply as there is
+ * none in the log */
+ break;
+ default:
+ retval=-1;
+ snprintf(errstr, errstr_len, "Could not measure start time: %08x", magic);
+ goto err_open;
+ }
+ }
+
+ /* See if we have a write we can do */
+ if (FD_ISSET(sock, &wset))
+ {
+ prc = txqueue.head;
+ if (!prc)
+ g_warning("Socket write FD set but we shouldn't have been interested");
+ else
+ {
+
+ rclist_unlink(&txqueue, prc);
+ rclist_addtail(&inflight, prc);
+
+ if (ntohl(prc->req.magic) != NBD_REQUEST_MAGIC) {
+ retval=-1;
+ g_warning("Asked to write a reply without a magic number");
+ goto err_open;
+ }
+
+ dumpcommand("Sending command", prc->req.type);
+ command = ntohl(prc->req.type);
+ from = ntohll(prc->req.from);
+ len = ntohl(prc->req.len);
+ /* we rewrite the handle as they otherwise may not be unique */
+ *((uint64_t*)(prc->req.handle))=htonll((uint64_t)prc);
+ WRITE_ALL_ERRCHK(sock,
+ &(prc->req),
+ sizeof(struct nbd_request),
+ err_open,
+ "Could not write command: %s",
+ strerror(errno));
+ switch (command & NBD_CMD_MASK_COMMAND) {
+ case NBD_CMD_WRITE:
+ while (len > 0) {
+ uint64_t blknum = from>>9;
+ char dbuf[512];
+ if (from>=size) {
+ snprintf(errstr, errstr_len, "offset %llx beyond size %llx",
+ (long long int) from, (long long int)size);
+ goto err_open;
+ }
+ /* work out what we should be writing */
+ makebuf(dbuf, prc->seq, blknum);
+ WRITE_ALL_ERRCHK(sock,
+ dbuf,
+ 512,
+ err_open,
+ "Could not write data: %s",
+ strerror(errno));
+ from += 512;
+ len -= 512;
+ }
+
+ case NBD_CMD_DISC:
+ case NBD_CMD_READ:
+ case NBD_CMD_FLUSH:
+ break;
+ default:
+ retval=-1;
+ snprintf(errstr, errstr_len, "Incomprehensible command: %08x", command);
+ goto err_open;
+ break;
+ }
+
+ prc = NULL;
+ }
+
+ }
+
+ /* See if there is a reply to be processed from the socket */
+ if(FD_ISSET(sock, &rset)) {
+ /* Okay, there's something ready for
+ * reading here */
+
+ READ_ALL_ERRCHK(sock,
+ &rep,
+ sizeof(struct nbd_reply),
+ err_open,
+ "Could not read from server socket: %s",
+ strerror(errno));
+
+ if (rep.magic != htonl(NBD_REPLY_MAGIC)) {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Bad magic from server");
+ goto err_open;
+ }
+
+ if (rep.error) {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Server errored a transaction");
+ goto err_open;
+ }
+
+ prc=(struct reqcontext *)ntohll(*((uint64_t *)rep.handle));
+ if (prc->req.magic != htonl(NBD_REQUEST_MAGIC)) {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Bad magic in inflight data: %08x", prc->req.magic);
+ goto err_open;
+ }
+
+ dumpcommand("Processing reply to command", prc->req.type);
+ command = ntohl(prc->req.type);
+ from = ntohll(prc->req.from);
+ len = ntohl(prc->req.len);
+
+ switch (command & NBD_CMD_MASK_COMMAND) {
+ case NBD_CMD_READ:
+ while (len > 0) {
+ uint64_t blknum = from>>9;
+ char dbuf[512];
+ if (from>=size) {
+ snprintf(errstr, errstr_len, "offset %llx beyond size %llx",
+ (long long int) from, (long long int)size);
+ goto err_open;
+ }
+ READ_ALL_ERRCHK(sock,
+ dbuf,
+ 512,
+ err_open,
+ "Could not read data: %s",
+ strerror(errno));
+ /* work out what we was written */
+ if (checkbuf(dbuf, blkhash[blknum], blknum))
+ {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Bad reply data: seq %08x", blkhash[blknum]);
+ goto err_open;
+
+ }
+ from += 512;
+ len -= 512;
+ }
+ break;
+ case NBD_CMD_WRITE:
+ /* subsequent reads should get data with this seq*/
+ while (len > 0) {
+ uint64_t blknum = from>>9;
+ blkhash[blknum]=(uint32_t)(prc->seq);
+ from += 512;
+ len -= 512;
+ }
+ break;
+ default:
+ break;
+ }
+
+ processed++;
+ rclist_unlink(&inflight, prc);
+ prc->req.magic=0; /* so a duplicate reply is detected */
+ free(prc);
+ }
+
+ if (!(printer++ % 10000) || !(readtransactionfile || txqueue.numitems || inflight.numitems) )
+ printf("%d: Seq %08lld Queued: %08d Inflight: %08d Done: %08lld\n",
+ (int)mypid,
+ (long long int) seq,
+ txqueue.numitems,
+ inflight.numitems,
+ (long long int) processed);
+
+ }
+
+ if (gettimeofday(&stop, NULL)<0) {
+ retval=-1;
+ snprintf(errstr, errstr_len, "Could not measure end time: %s", strerror(errno));
+ goto err_open;
+ }
+ timespan=timeval_diff_to_double(&stop, &start);
+ speed=size/timespan;
+ if(speed>1024) {
+ speed=speed/1024.0;
+ speedchar[0]='K';
+ }
+ if(speed>1024) {
+ speed=speed/1024.0;
+ speedchar[0]='M';
+ }
+ if(speed>1024) {
+ speed=speed/1024.0;
+ speedchar[0]='G';
+ }
+ g_message("%d: Integrity %s test complete. Took %.3f seconds to complete, %.3f%sib/s", (int)getpid(), (testflags & TEST_WRITE)?"write":"read", timespan, speed, speedchar);
err_open:
if(close_sock) {
close_connection(sock, CONNECTION_CLOSE_PROPERLY);
}
err:
+ if (size && blkhash)
+ munmap(blkhash, (size>>9)<<2);
+
+ if (blkhashfd != -1)
+ close (blkhashfd);
+
+ if (logfd != -1)
+ close (logfd);
+
+ if (blkhashname)
+ free(blkhashname);
+
+ if (*errstr)
+ g_warning("%s",errstr);
+
return retval;
}
exit(EXIT_FAILURE);
}
logging();
- while((c=getopt(argc, argv, "-N:owf"))>=0) {
+ while((c=getopt(argc, argv, "-N:t:owfi"))>=0) {
switch(c) {
case 1:
switch(nonopt) {
p = 10809;
want_port = false;
break;
+ case 't':
+ transactionlog=g_strdup(optarg);
+ break;
case 'o':
test=oversize_test;
break;
case 'f':
testflags|=TEST_FLUSH;
break;
+ case 'i':
+ test=integrity_test;
+ break;
}
}
# Yes, that's POSIX sh, not bash!
tmpnam=`mktemp`
+conffile=${tmpnam}.conf
+pidfile=${tmpnam}.pid
ulimit -c unlimited
case $1 in
*/cmd)
# Test with export specified on command line
- ./nbd-server -C /dev/null -p `pwd`/nbd-server.pid 11111 $tmpnam &
+ ./nbd-server -C /dev/null -p ${pidfile} 11111 $tmpnam &
# -p only works if nbd-server wasn't compiled with -DNOFORK or
# -DNODAEMON, which I sometimes do for testing and debugging.
PID=$!
;;
*/cfgsize)
# Test oversized requests
- ./nbd-server -C /dev/null -p `pwd`/nbd-server.pid 11111 $tmpnam &
+ ./nbd-server -C /dev/null -p ${pidfile} 11111 $tmpnam &
# -p only works if nbd-server wasn't compiled with -DNOFORK or
# -DNODAEMON, which I sometimes do for testing and debugging.
PID=$!
;;
*/cfg1)
# Test with export specified in config file
- cat > nbd-server.conf <<EOF
+ cat > ${conffile} <<EOF
[generic]
oldstyle = true
[export]
exportname = $tmpnam
port = 11112
EOF
- ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+ ./nbd-server -C ${conffile} -p ${pidfile} &
PID=$!
sleep 1
./nbd-tester-client 127.0.0.1 11112
*/cfgmulti)
# Test with multiple exports specified in config file, and
# testing more options too
- cat >nbd-server.conf <<EOF
+ cat >${conffile} <<EOF
[generic]
oldstyle = true
[export1]
readonly = true
listenaddr = 127.0.0.1
EOF
- ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+ ./nbd-server -C ${conffile} -p ${pidfile} &
PID=$!
sleep 1
./nbd-tester-client localhost 11113
retval=$?
if [ $retval -ne 0 ]
then
- if [ -f nbd-server.pid ]
+ if [ -f ${pidfile} ]
then
- kill `cat nbd-server.pid`
- rm -f nbd-server.pid
+ kill `cat ${pidfile}`
+ rm -f ${pidfile}
else
kill $PID
fi
if [ -z "$2" ]
then
- rm -f $tmpnam nbd-server.conf
+ rm -f $tmpnam ${conffile}
fi
exit $retval
fi
;;
*/cfgnew)
# Test new-style exports
- cat >nbd-server.conf <<EOF
+ cat >${conffile} <<EOF
[generic]
[export1]
exportname = $tmpnam
EOF
- ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+ ./nbd-server -C ${conffile} -p ${pidfile} &
PID=$!
sleep 1
./nbd-tester-client localhost -N export1
retval=$?
;;
*/write)
- # Test new-style exports
- cat >nbd-server.conf <<EOF
+ # Test writing
+ cat >${conffile} <<EOF
[generic]
[export1]
exportname = $tmpnam
EOF
- ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+ ./nbd-server -C ${conffile} -p ${pidfile} &
PID=$!
sleep 1
./nbd-tester-client localhost -N export1 -w
retval=$?
;;
*/flush)
- # Test new-style exports
- cat >nbd-server.conf <<EOF
+ # Test writes with flush
+ cat >${conffile} <<EOF
[generic]
[export1]
exportname = $tmpnam
+ flush = true
+ fua = true
+ rotational = true
EOF
- ./nbd-server -C nbd-server.conf -p `pwd`/nbd-server.pid &
+ ./nbd-server -C ${conffile} -p ${pidfile} &
PID=$!
sleep 1
./nbd-tester-client localhost -N export1 -w -f
retval=$?
;;
+ */integrity)
+ # Integrity test
+ cat >${conffile} <<EOF
+[generic]
+[export1]
+ exportname = $tmpnam
+ flush = true
+ fua = true
+ rotational = true
+EOF
+ # we need a bigger disk
+ dd if=/dev/zero of=$tmpnam bs=1M count=50 >/dev/null 2>&1
+ ./nbd-server -C ${conffile} -p ${pidfile} &
+ PID=$!
+ sleep 1
+ ./nbd-tester-client localhost -N export1 -i
+ retval=$?
+ ;;
*)
echo "E: unknown test $1"
exit 1
;;
esac
-if [ -f nbd-server.pid ]
+if [ -f ${pidfile} ]
then
- kill `cat nbd-server.pid`
- rm -f nbd-server.pid
+ kill `cat ${pidfile}`
+ rm -f ${pidfile}
else
kill $PID
fi
if [ -z "$2" ]
then
- rm -f $tmpnam nbd-server.conf
+ rm -f $tmpnam ${conffile}
fi
if [ $retval -ne 0 ]
then