/* Debugging macros */
//#define DODBG
#ifdef DODBG
-#define DEBUG( a ) printf( a )
-#define DEBUG2( a,b ) printf( a,b )
-#define DEBUG3( a,b,c ) printf( a,b,c )
-#define DEBUG4( a,b,c,d ) printf( a,b,c,d )
-#define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e )
+#define DEBUG(...) printf(__VA_ARGS__)
#else
-#define DEBUG( a )
-#define DEBUG2( a,b )
-#define DEBUG3( a,b,c )
-#define DEBUG4( a,b,c,d )
-#define DEBUG5( a,b,c,d,e )
+#define DEBUG(...)
#endif
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION ""
disconnects */
gchar* servename; /**< name of the export as selected by nbd-client */
int max_connections; /**< maximum number of opened connections */
+ gchar* transactionlog;/**< filename for transaction log */
} SERVER;
/**
u32 difffilelen; /**< number of pages in difffile */
u32 *difmap; /**< see comment on the global difmap for this one */
gboolean modern; /**< client was negotiated using modern negotiation protocol */
+ int transactionlogfd;/**< fd for transaction log */
} CLIENT;
/**
g_free(server->prerun);
if(server->postrun)
g_free(server->postrun);
+ if(server->transactionlog)
+ g_free(server->transactionlog);
g_free(server);
}
if(s->postrun)
serve->postrun = g_strdup(s->postrun);
+
+ if(s->transactionlog)
+ serve->transactionlog = g_strdup(s->transactionlog);
if(s->servename)
serve->servename = g_strdup(s->servename);
{ "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
{ "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
{ "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
+ { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
{ "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
{ "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
{ "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
if(!i) {
msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
} else {
- DEBUG2("Removing %d from the list of children", pid);
+ DEBUG("Removing %d from the list of children", pid);
g_hash_table_remove(children, &pid);
}
}
if (es > ((off_t)0)) {
return es;
} else {
- DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
+ DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
}
err("Could not find size of exported block device: %m");
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua);
+ DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
myseek(fhandle, foffset);
retval = write(fhandle, buf, len);
if(client->server->flags & F_SYNC) {
fsync(fhandle);
} else if (fua) {
-#ifdef USE_SYNC_FILE_RANGE
+
+ /* This is where we would do the following
+ * #ifdef USE_SYNC_FILE_RANGE
+ * However, we don't, for the reasons set out below
+ * by Christoph Hellwig <hch@infradead.org>
+ *
+ * [BEGINS]
+ * fdatasync is equivalent to fsync except that it does not flush
+ * non-essential metadata (basically just timestamps in practice), but it
+ * does flush metadata requried to find the data again, e.g. allocation
+ * information and extent maps. sync_file_range does nothing but flush
+ * out pagecache content - it means you basically won't get your data
+ * back in case of a crash if you either:
+ *
+ * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
+ * b) are using a sparse file on a filesystem
+ * c) are using a fallocate-preallocated file on a filesystem
+ * d) use any file on a COW filesystem like btrfs
+ *
+ * e.g. it only does anything useful for you if you do not have a volatile
+ * write cache, and either use a raw block device node, or just overwrite
+ * an already fully allocated (and not preallocated) file on a non-COW
+ * filesystem.
+ * [ENDS]
+ *
+ * What we should do is open a second FD with O_DSYNC set, then write to
+ * that when appropriate. However, with a Linux client, every REQ_FUA
+ * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
+ * problems.
+ *
+ */
+#if 0
sync_file_range(fhandle, foffset, len,
SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
SYNC_FILE_RANGE_WAIT_AFTER);
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
+ DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
myseek(fhandle, foffset);
return read(fhandle, buf, len);
if (!(client->server->flags & F_COPYONWRITE))
return(rawexpread_fully(a, buf, len, client));
- DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
+ DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
len : (size_t)DIFFPAGESIZE-offset;
if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
- DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+ DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt]));
myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
if (read(client->difffile, buf, rdlen) != rdlen) return -1;
} else { /* the block is not there */
- DEBUG2("Page %llu is not here, we read the original one\n",
+ DEBUG("Page %llu is not here, we read the original one\n",
(unsigned long long)mapcnt);
if(rawexpread_fully(a, buf, rdlen, client)) return -1;
}
if (!(client->server->flags & F_COPYONWRITE))
return(rawexpwrite_fully(a, buf, len, client, fua));
- DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
+ DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
len : (size_t)DIFFPAGESIZE-offset;
if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
- DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+ DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt])) ;
myseek(client->difffile,
client->difmap[mapcnt]*DIFFPAGESIZE+offset);
} else { /* the block is not there */
myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
- DEBUG3("Page %llu is not here, we put it at %lu\n",
+ DEBUG("Page %llu is not here, we put it at %lu\n",
(unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt]));
rdlen=DIFFPAGESIZE ;
}
int expflush(CLIENT *client) {
- int fhandle;
- off_t foffset;
- size_t maxbytes;
gint i;
if (client->server->flags & F_COPYONWRITE) {
client->exportsize = OFFT_MAX;
client->net = net;
client->modern = TRUE;
+ client->transactionlogfd = -1;
free(name);
return client;
}
}
/** sending macro. */
-#define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
+#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
+ if (client->transactionlogfd != -1) \
+ writeit(client->transactionlogfd, &reply, sizeof(reply)); }
/** error macro. */
#define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
/**
printf("%d: ", i);
#endif
readit(client->net, &request, sizeof(request));
+ if (client->transactionlogfd != -1)
+ writeit(client->transactionlogfd, &request, sizeof(request));
+
request.from = ntohll(request.from);
request.type = ntohl(request.type);
command = request.type & NBD_CMD_MASK_COMMAND;
} else {
currlen = len;
}
-#ifdef DODBG
- printf("%s from %llu (%llu) len %d, ", command ? "WRITE" :
+ DEBUG("%s from %llu (%llu) len %d, ", command ? "WRITE" :
"READ", (unsigned long long)request.from,
- (unsigned long long)request.from / 512, len);
-#endif
+ (unsigned long long)request.from / 512, (unsigned int)len);
memcpy(reply.handle, request.handle, sizeof(reply.handle));
- if ((request.from + len) > (OFFT_MAX)) {
- DEBUG("[Number too large!]");
- ERROR(client, reply, EINVAL);
- continue;
- }
- if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
- DEBUG("[RANGE!]");
- ERROR(client, reply, EINVAL);
- continue;
+ if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
+ if ((request.from + len) > (OFFT_MAX)) {
+ DEBUG("[Number too large!]");
+ ERROR(client, reply, EINVAL);
+ continue;
+ }
+
+ if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
+ DEBUG("[RANGE!]");
+ ERROR(client, reply, EINVAL);
+ continue;
+ }
}
if (command==NBD_CMD_WRITE) {
if (command==NBD_CMD_READ) {
DEBUG("exp->buf, ");
memcpy(buf, &reply, sizeof(struct nbd_reply));
+ if (client->transactionlogfd != -1)
+ writeit(client->transactionlogfd, &reply, sizeof(reply));
p = buf + sizeof(struct nbd_reply);
writelen = currlen + sizeof(struct nbd_reply);
while(len > 0) {
DEBUG("buf->net, ");
writeit(client->net, buf, writelen);
len -= currlen;
+ request.from += currlen;
currlen = (len < BUFSIZE) ? len : BUFSIZE;
p = buf;
writelen = currlen;
} else {
tmpname=g_strdup(client->exportname);
}
- DEBUG2( "Opening %s\n", tmpname );
+ DEBUG( "Opening %s\n", tmpname );
fi.fhandle = open(tmpname, mode);
if(fi.fhandle == -1 && mode == O_RDWR) {
/* Try again because maybe media was read-only */
* @param client a connected client
**/
void serveconnection(CLIENT *client) {
+ if (client->server->transactionlog && (client->transactionlogfd == -1))
+ {
+ if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
+ O_WRONLY | O_CREAT,
+ S_IRUSR | S_IWUSR)))
+ g_warning("Could not open transaction log %s",
+ client->server->transactionlog);
+ }
+
if(do_run(client->server->prerun, client->exportname)) {
exit(EXIT_FAILURE);
}
mainloop(client);
do_run(client->server->postrun, client->exportname);
+
+ if (-1 != client->transactionlogfd)
+ {
+ close(client->transactionlogfd);
+ client->transactionlogfd = -1;
+ }
}
/**
client->server=serve;
client->exportsize=OFFT_MAX;
client->net=net;
+ client->transactionlogfd = -1;
}
set_peername(net, client);
if (!authorized_client(client)) {