X-Git-Url: http://git.alex.org.uk diff --git a/nbd-server.c b/nbd-server.c index 4122cef..3343d9d 100644 --- a/nbd-server.c +++ b/nbd-server.c @@ -133,17 +133,9 @@ int dontfork = 0; /* Debugging macros */ //#define DODBG #ifdef DODBG -#define DEBUG( a ) printf( a ) -#define DEBUG2( a,b ) printf( a,b ) -#define DEBUG3( a,b,c ) printf( a,b,c ) -#define DEBUG4( a,b,c,d ) printf( a,b,c,d ) -#define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e ) +#define DEBUG(...) printf(__VA_ARGS__) #else -#define DEBUG( a ) -#define DEBUG2( a,b ) -#define DEBUG3( a,b,c ) -#define DEBUG4( a,b,c,d ) -#define DEBUG5( a,b,c,d,e ) +#define DEBUG(...) #endif #ifndef PACKAGE_VERSION #define PACKAGE_VERSION "" @@ -213,6 +205,7 @@ typedef struct { disconnects */ gchar* servename; /**< name of the export as selected by nbd-client */ int max_connections; /**< maximum number of opened connections */ + gchar* transactionlog;/**< filename for transaction log */ } SERVER; /** @@ -239,6 +232,7 @@ typedef struct { u32 difffilelen; /**< number of pages in difffile */ u32 *difmap; /**< see comment on the global difmap for this one */ gboolean modern; /**< client was negotiated using modern negotiation protocol */ + int transactionlogfd;/**< fd for transaction log */ } CLIENT; /** @@ -582,6 +576,8 @@ void remove_server(gpointer s) { g_free(server->prerun); if(server->postrun) g_free(server->postrun); + if(server->transactionlog) + g_free(server->transactionlog); g_free(server); } @@ -621,6 +617,9 @@ SERVER* dup_serve(SERVER *s) { if(s->postrun) serve->postrun = g_strdup(s->postrun); + + if(s->transactionlog) + serve->transactionlog = g_strdup(s->transactionlog); if(s->servename) serve->servename = g_strdup(s->servename); @@ -719,6 +718,7 @@ GArray* parse_cfile(gchar* f, GError** e) { { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 }, { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 }, { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 }, + { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 }, { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY }, { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE }, { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE }, @@ -906,7 +906,7 @@ void sigchld_handler(int s) { if(!i) { msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid); } else { - DEBUG2("Removing %d from the list of children", pid); + DEBUG("Removing %d from the list of children", pid); g_hash_table_remove(children, &pid); } } @@ -984,7 +984,7 @@ off_t size_autodetect(int fhandle) { if (es > ((off_t)0)) { return es; } else { - DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); + DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); } err("Could not find size of exported block device: %m"); @@ -1074,14 +1074,45 @@ ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { if(maxbytes && len > maxbytes) len = maxbytes; - DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua); + DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua); myseek(fhandle, foffset); retval = write(fhandle, buf, len); if(client->server->flags & F_SYNC) { fsync(fhandle); } else if (fua) { -#ifdef USE_SYNC_FILE_RANGE + + /* This is where we would do the following + * #ifdef USE_SYNC_FILE_RANGE + * However, we don't, for the reasons set out below + * by Christoph Hellwig + * + * [BEGINS] + * fdatasync is equivalent to fsync except that it does not flush + * non-essential metadata (basically just timestamps in practice), but it + * does flush metadata requried to find the data again, e.g. allocation + * information and extent maps. sync_file_range does nothing but flush + * out pagecache content - it means you basically won't get your data + * back in case of a crash if you either: + * + * a) have a volatile write cache in your disk (e.g. any normal SATA disk) + * b) are using a sparse file on a filesystem + * c) are using a fallocate-preallocated file on a filesystem + * d) use any file on a COW filesystem like btrfs + * + * e.g. it only does anything useful for you if you do not have a volatile + * write cache, and either use a raw block device node, or just overwrite + * an already fully allocated (and not preallocated) file on a non-COW + * filesystem. + * [ENDS] + * + * What we should do is open a second FD with O_DSYNC set, then write to + * that when appropriate. However, with a Linux client, every REQ_FUA + * immediately follows a REQ_FLUSH, so fdatasync does not cause performance + * problems. + * + */ +#if 0 sync_file_range(fhandle, foffset, len, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER); @@ -1128,7 +1159,7 @@ ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { if(maxbytes && len > maxbytes) len = maxbytes; - DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len); + DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len); myseek(fhandle, foffset); return read(fhandle, buf, len); @@ -1165,7 +1196,7 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { if (!(client->server->flags & F_COPYONWRITE)) return(rawexpread_fully(a, buf, len, client)); - DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a); + DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE; @@ -1175,12 +1206,12 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { rdlen=(0difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); if (read(client->difffile, buf, rdlen) != rdlen) return -1; } else { /* the block is not there */ - DEBUG2("Page %llu is not here, we read the original one\n", + DEBUG("Page %llu is not here, we read the original one\n", (unsigned long long)mapcnt); if(rawexpread_fully(a, buf, rdlen, client)) return -1; } @@ -1209,7 +1240,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { if (!(client->server->flags & F_COPYONWRITE)) return(rawexpwrite_fully(a, buf, len, client, fua)); - DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a); + DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ; @@ -1220,7 +1251,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { len : (size_t)DIFFPAGESIZE-offset; if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])) ; myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); @@ -1228,7 +1259,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { } else { /* the block is not there */ myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ; client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++; - DEBUG3("Page %llu is not here, we put it at %lu\n", + DEBUG("Page %llu is not here, we put it at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); rdlen=DIFFPAGESIZE ; @@ -1253,9 +1284,6 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { } int expflush(CLIENT *client) { - int fhandle; - off_t foffset; - size_t maxbytes; gint i; if (client->server->flags & F_COPYONWRITE) { @@ -1347,6 +1375,7 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) { client->exportsize = OFFT_MAX; client->net = net; client->modern = TRUE; + client->transactionlogfd = -1; free(name); return client; } @@ -1386,7 +1415,9 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) { } /** sending macro. */ -#define SEND(net,reply) writeit( net, &reply, sizeof( reply )); +#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \ + if (client->transactionlogfd != -1) \ + writeit(client->transactionlogfd, &reply, sizeof(reply)); } /** error macro. */ #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; } /** @@ -1421,6 +1452,9 @@ int mainloop(CLIENT *client) { printf("%d: ", i); #endif readit(client->net, &request, sizeof(request)); + if (client->transactionlogfd != -1) + writeit(client->transactionlogfd, &request, sizeof(request)); + request.from = ntohll(request.from); request.type = ntohl(request.type); command = request.type & NBD_CMD_MASK_COMMAND; @@ -1447,11 +1481,9 @@ int mainloop(CLIENT *client) { } else { currlen = len; } -#ifdef DODBG - printf("%s from %llu (%llu) len %d, ", command ? "WRITE" : + DEBUG("%s from %llu (%llu) len %d, ", command ? "WRITE" : "READ", (unsigned long long)request.from, - (unsigned long long)request.from / 512, len); -#endif + (unsigned long long)request.from / 512, (unsigned int)len); memcpy(reply.handle, request.handle, sizeof(reply.handle)); if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) { @@ -1508,6 +1540,8 @@ int mainloop(CLIENT *client) { if (command==NBD_CMD_READ) { DEBUG("exp->buf, "); memcpy(buf, &reply, sizeof(struct nbd_reply)); + if (client->transactionlogfd != -1) + writeit(client->transactionlogfd, &reply, sizeof(reply)); p = buf + sizeof(struct nbd_reply); writelen = currlen + sizeof(struct nbd_reply); while(len > 0) { @@ -1520,6 +1554,7 @@ int mainloop(CLIENT *client) { DEBUG("buf->net, "); writeit(client->net, buf, writelen); len -= currlen; + request.from += currlen; currlen = (len < BUFSIZE) ? len : BUFSIZE; p = buf; writelen = currlen; @@ -1559,7 +1594,7 @@ void setupexport(CLIENT* client) { } else { tmpname=g_strdup(client->exportname); } - DEBUG2( "Opening %s\n", tmpname ); + DEBUG( "Opening %s\n", tmpname ); fi.fhandle = open(tmpname, mode); if(fi.fhandle == -1 && mode == O_RDWR) { /* Try again because maybe media was read-only */ @@ -1658,6 +1693,15 @@ int do_run(gchar* command, gchar* file) { * @param client a connected client **/ void serveconnection(CLIENT *client) { + if (client->server->transactionlog && (client->transactionlogfd == -1)) + { + if (-1 == (client->transactionlogfd = open(client->server->transactionlog, + O_WRONLY | O_CREAT, + S_IRUSR | S_IWUSR))) + g_warning("Could not open transaction log %s", + client->server->transactionlog); + } + if(do_run(client->server->prerun, client->exportname)) { exit(EXIT_FAILURE); } @@ -1671,6 +1715,12 @@ void serveconnection(CLIENT *client) { mainloop(client); do_run(client->server->postrun, client->exportname); + + if (-1 != client->transactionlogfd) + { + close(client->transactionlogfd); + client->transactionlogfd = -1; + } } /** @@ -1856,6 +1906,7 @@ int serveloop(GArray* servers) { client->server=serve; client->exportsize=OFFT_MAX; client->net=net; + client->transactionlogfd = -1; } set_peername(net, client); if (!authorized_client(client)) {