X-Git-Url: http://git.alex.org.uk diff --git a/nbd-server.c b/nbd-server.c index 6c3953d..6d734b3 100644 --- a/nbd-server.c +++ b/nbd-server.c @@ -133,17 +133,9 @@ int dontfork = 0; /* Debugging macros */ //#define DODBG #ifdef DODBG -#define DEBUG( a ) printf( a ) -#define DEBUG2( a,b ) printf( a,b ) -#define DEBUG3( a,b,c ) printf( a,b,c ) -#define DEBUG4( a,b,c,d ) printf( a,b,c,d ) -#define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e ) +#define DEBUG(...) printf(__VA_ARGS__) #else -#define DEBUG( a ) -#define DEBUG2( a,b ) -#define DEBUG3( a,b,c ) -#define DEBUG4( a,b,c,d ) -#define DEBUG5( a,b,c,d,e ) +#define DEBUG(...) #endif #ifndef PACKAGE_VERSION #define PACKAGE_VERSION "" @@ -269,6 +261,22 @@ typedef struct { is PARAM_BOOL. */ } PARAM; +static inline const char * getcommandname(uint64_t command) { + switch (command) { + case NBD_CMD_READ: + return "NBD_CMD_READ"; + case NBD_CMD_WRITE: + return "NBD_CMD_WRITE"; + case NBD_CMD_DISC: + return "NBD_CMD_DISC"; + case NBD_CMD_FLUSH: + return "NBD_CMD_FLUSH"; + default: + break; + } + return "UNKNOWN"; +} + /** * Check whether a client is allowed to connect. Works with an authorization * file which contains one line per machine, no wildcards. @@ -914,7 +922,7 @@ void sigchld_handler(int s) { if(!i) { msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid); } else { - DEBUG2("Removing %d from the list of children", pid); + DEBUG("Removing %d from the list of children", pid); g_hash_table_remove(children, &pid); } } @@ -992,7 +1000,7 @@ off_t size_autodetect(int fhandle) { if (es > ((off_t)0)) { return es; } else { - DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); + DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); } err("Could not find size of exported block device: %m"); @@ -1082,14 +1090,45 @@ ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { if(maxbytes && len > maxbytes) len = maxbytes; - DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua); + DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua); myseek(fhandle, foffset); retval = write(fhandle, buf, len); if(client->server->flags & F_SYNC) { fsync(fhandle); } else if (fua) { -#ifdef USE_SYNC_FILE_RANGE + + /* This is where we would do the following + * #ifdef USE_SYNC_FILE_RANGE + * However, we don't, for the reasons set out below + * by Christoph Hellwig + * + * [BEGINS] + * fdatasync is equivalent to fsync except that it does not flush + * non-essential metadata (basically just timestamps in practice), but it + * does flush metadata requried to find the data again, e.g. allocation + * information and extent maps. sync_file_range does nothing but flush + * out pagecache content - it means you basically won't get your data + * back in case of a crash if you either: + * + * a) have a volatile write cache in your disk (e.g. any normal SATA disk) + * b) are using a sparse file on a filesystem + * c) are using a fallocate-preallocated file on a filesystem + * d) use any file on a COW filesystem like btrfs + * + * e.g. it only does anything useful for you if you do not have a volatile + * write cache, and either use a raw block device node, or just overwrite + * an already fully allocated (and not preallocated) file on a non-COW + * filesystem. + * [ENDS] + * + * What we should do is open a second FD with O_DSYNC set, then write to + * that when appropriate. However, with a Linux client, every REQ_FUA + * immediately follows a REQ_FLUSH, so fdatasync does not cause performance + * problems. + * + */ +#if 0 sync_file_range(fhandle, foffset, len, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER); @@ -1136,7 +1175,7 @@ ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { if(maxbytes && len > maxbytes) len = maxbytes; - DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len); + DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len); myseek(fhandle, foffset); return read(fhandle, buf, len); @@ -1173,7 +1212,7 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { if (!(client->server->flags & F_COPYONWRITE)) return(rawexpread_fully(a, buf, len, client)); - DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a); + DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE; @@ -1183,12 +1222,12 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { rdlen=(0difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); if (read(client->difffile, buf, rdlen) != rdlen) return -1; } else { /* the block is not there */ - DEBUG2("Page %llu is not here, we read the original one\n", + DEBUG("Page %llu is not here, we read the original one\n", (unsigned long long)mapcnt); if(rawexpread_fully(a, buf, rdlen, client)) return -1; } @@ -1217,7 +1256,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { if (!(client->server->flags & F_COPYONWRITE)) return(rawexpwrite_fully(a, buf, len, client, fua)); - DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a); + DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ; @@ -1228,7 +1267,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { len : (size_t)DIFFPAGESIZE-offset; if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])) ; myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); @@ -1236,7 +1275,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { } else { /* the block is not there */ myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ; client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++; - DEBUG3("Page %llu is not here, we put it at %lu\n", + DEBUG("Page %llu is not here, we put it at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); rdlen=DIFFPAGESIZE ; @@ -1261,9 +1300,6 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { } int expflush(CLIENT *client) { - int fhandle; - off_t foffset; - size_t maxbytes; gint i; if (client->server->flags & F_COPYONWRITE) { @@ -1438,34 +1474,15 @@ int mainloop(CLIENT *client) { request.from = ntohll(request.from); request.type = ntohl(request.type); command = request.type & NBD_CMD_MASK_COMMAND; - - if (command==NBD_CMD_DISC) { - msg2(LOG_INFO, "Disconnect request received."); - if (client->server->flags & F_COPYONWRITE) { - if (client->difmap) g_free(client->difmap) ; - close(client->difffile); - unlink(client->difffilename); - free(client->difffilename); - } - go_on=FALSE; - continue; - } - len = ntohl(request.len); + DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command), + (unsigned long long)request.from, + (unsigned long long)request.from / 512, (unsigned int)len); + if (request.magic != htonl(NBD_REQUEST_MAGIC)) err("Not enough magic."); - if (len > BUFSIZE - sizeof(struct nbd_reply)) { - currlen = BUFSIZE - sizeof(struct nbd_reply); - msg2(LOG_INFO, "oversized request (this is not a problem)"); - } else { - currlen = len; - } -#ifdef DODBG - printf("%s from %llu (%llu) len %d, ", command ? "WRITE" : - "READ", (unsigned long long)request.from, - (unsigned long long)request.from / 512, len); -#endif + memcpy(reply.handle, request.handle, sizeof(reply.handle)); if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) { @@ -1480,9 +1497,28 @@ int mainloop(CLIENT *client) { ERROR(client, reply, EINVAL); continue; } + + currlen = len; + if (currlen > BUFSIZE - sizeof(struct nbd_reply)) { + currlen = BUFSIZE - sizeof(struct nbd_reply); + msg2(LOG_INFO, "oversized request (this is not a problem)"); + } } - if (command==NBD_CMD_WRITE) { + switch (command) { + + case NBD_CMD_DISC: + msg2(LOG_INFO, "Disconnect request received."); + if (client->server->flags & F_COPYONWRITE) { + if (client->difmap) g_free(client->difmap) ; + close(client->difffile); + unlink(client->difffilename); + free(client->difffilename); + } + go_on=FALSE; + continue; + + case NBD_CMD_WRITE: DEBUG("wr: net->buf, "); while(len > 0) { readit(client->net, buf, currlen); @@ -1499,15 +1535,14 @@ int mainloop(CLIENT *client) { ERROR(client, reply, errno); continue; } - SEND(client->net, reply); - DEBUG("OK!\n"); len -= currlen; currlen = (len < BUFSIZE) ? len : BUFSIZE; } + SEND(client->net, reply); + DEBUG("OK!\n"); continue; - } - if (command==NBD_CMD_FLUSH) { + case NBD_CMD_FLUSH: DEBUG("fl: "); if (expflush(client)) { DEBUG("Flush failed: %m"); @@ -1517,9 +1552,8 @@ int mainloop(CLIENT *client) { SEND(client->net, reply); DEBUG("OK!\n"); continue; - } - if (command==NBD_CMD_READ) { + case NBD_CMD_READ: DEBUG("exp->buf, "); memcpy(buf, &reply, sizeof(struct nbd_reply)); if (client->transactionlogfd != -1) @@ -1543,9 +1577,11 @@ int mainloop(CLIENT *client) { } DEBUG("OK!\n"); continue; - } - DEBUG ("Ignoring unknown command\n"); + default: + DEBUG ("Ignoring unknown command\n"); + continue; + } } return 0; } @@ -1576,7 +1612,7 @@ void setupexport(CLIENT* client) { } else { tmpname=g_strdup(client->exportname); } - DEBUG2( "Opening %s\n", tmpname ); + DEBUG( "Opening %s\n", tmpname ); fi.fhandle = open(tmpname, mode); if(fi.fhandle == -1 && mode == O_RDWR) { /* Try again because maybe media was read-only */