X-Git-Url: http://git.alex.org.uk diff --git a/nbd-server.c b/nbd-server.c index ea446db..3343d9d 100644 --- a/nbd-server.c +++ b/nbd-server.c @@ -96,6 +96,10 @@ #define MY_NAME "nbd_server" #include "cliserv.h" +#ifdef WITH_SDP +#include +#endif + /** Default position of the config file */ #ifndef SYSCONFDIR #define SYSCONFDIR "/etc" @@ -112,6 +116,9 @@ gchar* rungroup=NULL; /** whether to export using the old negotiation protocol (port-based) */ gboolean do_oldstyle=FALSE; +/* Whether we should avoid forking */ +int dontfork = 0; + /** Logging macros, now nothing goes to syslog unless you say ISSERVER */ #ifdef ISSERVER #define msg2(a,b) syslog(a,b) @@ -126,15 +133,9 @@ gboolean do_oldstyle=FALSE; /* Debugging macros */ //#define DODBG #ifdef DODBG -#define DEBUG( a ) printf( a ) -#define DEBUG2( a,b ) printf( a,b ) -#define DEBUG3( a,b,c ) printf( a,b,c ) -#define DEBUG4( a,b,c,d ) printf( a,b,c,d ) +#define DEBUG(...) printf(__VA_ARGS__) #else -#define DEBUG( a ) -#define DEBUG2( a,b ) -#define DEBUG3( a,b,c ) -#define DEBUG4( a,b,c,d ) +#define DEBUG(...) #endif #ifndef PACKAGE_VERSION #define PACKAGE_VERSION "" @@ -146,7 +147,7 @@ gboolean do_oldstyle=FALSE; #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1)) #define LINELEN 256 /**< Size of static buffer used to read the authorization file (yuck) */ -#define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */ +#define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */ #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */ #define F_READONLY 1 /**< flag to tell us a file is readonly */ #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */ @@ -156,6 +157,9 @@ gboolean do_oldstyle=FALSE; #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */ #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */ #define F_SYNC 64 /**< Whether to fsync() after a write */ +#define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */ +#define F_FUA 256 /**< Whether server wants FUA to be sent by the client */ +#define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */ GHashTable *children; char pidfname[256]; /**< name of our PID file */ char pidftemplate[256]; /**< template to be used for the filename of the PID file */ @@ -200,6 +204,8 @@ typedef struct { gchar* postrun; /**< command that will be ran after the client disconnects */ gchar* servename; /**< name of the export as selected by nbd-client */ + int max_connections; /**< maximum number of opened connections */ + gchar* transactionlog;/**< filename for transaction log */ } SERVER; /** @@ -226,6 +232,7 @@ typedef struct { u32 difffilelen; /**< number of pages in difffile */ u32 *difmap; /**< see comment on the global difmap for this one */ gboolean modern; /**< client was negotiated using modern negotiation protocol */ + int transactionlogfd;/**< fd for transaction log */ } CLIENT; /** @@ -315,7 +322,7 @@ int authorized_client(CLIENT *opts) { * @param buf a buffer * @param len the number of bytes to be read **/ -inline void readit(int f, void *buf, size_t len) { +static inline void readit(int f, void *buf, size_t len) { ssize_t res; while (len > 0) { DEBUG("*"); @@ -337,7 +344,7 @@ inline void readit(int f, void *buf, size_t len) { * @param buf a buffer containing data * @param len the number of bytes to be written **/ -inline void writeit(int f, void *buf, size_t len) { +static inline void writeit(int f, void *buf, size_t len) { ssize_t res; while (len > 0) { DEBUG("+"); @@ -354,14 +361,15 @@ inline void writeit(int f, void *buf, size_t len) { */ void usage() { printf("This is nbd-server version " VERSION "\n"); - printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name]\n" + printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n" "\t-r|--read-only\t\tread only\n" "\t-m|--multi-file\t\tmultiple file\n" "\t-c|--copy-on-write\tcopy on write\n" "\t-C|--config-file\tspecify an alternate configuration file\n" "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n" "\t-p|--pid-file\t\tspecify a filename to write our PID to\n" - "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n" + "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n" + "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n" "\tif port is set to 0, stdin is used (for running from inetd)\n" "\tif file_to_export contains '%%s', it is substituted with the IP\n" "\t\taddress of the machine trying to connect\n" @@ -407,10 +415,12 @@ SERVER* cmdline(int argc, char *argv[]) { {"read-only", no_argument, NULL, 'r'}, {"multi-file", no_argument, NULL, 'm'}, {"copy-on-write", no_argument, NULL, 'c'}, + {"dont-fork", no_argument, NULL, 'd'}, {"authorize-file", required_argument, NULL, 'l'}, {"config-file", required_argument, NULL, 'C'}, {"pid-file", required_argument, NULL, 'p'}, {"output-config", required_argument, NULL, 'o'}, + {"max-connection", required_argument, NULL, 'M'}, {0,0,0,0} }; SERVER *serve; @@ -427,7 +437,7 @@ SERVER* cmdline(int argc, char *argv[]) { serve=g_new0(SERVER, 1); serve->authname = g_strdup(default_authname); serve->virtstyle=VIRT_IPLIT; - while((c=getopt_long(argc, argv, "-C:cl:mo:rp:", long_options, &i))>=0) { + while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) { switch (c) { case 1: /* non-option argument */ @@ -496,6 +506,9 @@ SERVER* cmdline(int argc, char *argv[]) { case 'c': serve->flags |=F_COPYONWRITE; break; + case 'd': + dontfork = 1; + break; case 'C': g_free(config_file_pos); config_file_pos=g_strdup(optarg); @@ -504,6 +517,9 @@ SERVER* cmdline(int argc, char *argv[]) { g_free(serve->authname); serve->authname=g_strdup(optarg); break; + case 'M': + serve->max_connections = strtol(optarg, NULL, 0); + break; default: usage(); exit(EXIT_FAILURE); @@ -560,6 +576,8 @@ void remove_server(gpointer s) { g_free(server->prerun); if(server->postrun) g_free(server->postrun); + if(server->transactionlog) + g_free(server->transactionlog); g_free(server); } @@ -589,8 +607,9 @@ SERVER* dup_serve(SERVER *s) { serve->authname = strdup(s->authname); serve->flags = s->flags; - serve->socket = serve->socket; - serve->socket_family = serve->socket_family; + serve->socket = s->socket; + serve->socket_family = s->socket_family; + serve->virtstyle = s->virtstyle; serve->cidrlen = s->cidrlen; if(s->prerun) @@ -598,10 +617,15 @@ SERVER* dup_serve(SERVER *s) { if(s->postrun) serve->postrun = g_strdup(s->postrun); + + if(s->transactionlog) + serve->transactionlog = g_strdup(s->transactionlog); if(s->servename) serve->servename = g_strdup(s->servename); + serve->max_connections = s->max_connections; + return serve; } @@ -687,20 +711,25 @@ GArray* parse_cfile(gchar* f, GError** e) { SERVER s; gchar *virtstyle=NULL; PARAM lp[] = { - { "exportname", TRUE, PARAM_STRING, NULL, 0 }, - { "port", TRUE, PARAM_INT, NULL, 0 }, - { "authfile", FALSE, PARAM_STRING, NULL, 0 }, - { "filesize", FALSE, PARAM_INT, NULL, 0 }, - { "virtstyle", FALSE, PARAM_STRING, NULL, 0 }, - { "prerun", FALSE, PARAM_STRING, NULL, 0 }, - { "postrun", FALSE, PARAM_STRING, NULL, 0 }, - { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY }, - { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE }, - { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE }, - { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE }, - { "sdp", FALSE, PARAM_BOOL, NULL, F_SDP }, - { "sync", FALSE, PARAM_BOOL, NULL, F_SYNC }, - { "listenaddr", FALSE, PARAM_STRING, NULL, 0 }, + { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 }, + { "port", TRUE, PARAM_INT, &(s.port), 0 }, + { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 }, + { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 }, + { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 }, + { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 }, + { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 }, + { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 }, + { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY }, + { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE }, + { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE }, + { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE }, + { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP }, + { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC }, + { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH }, + { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA }, + { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL }, + { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 }, + { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 }, }; const int lp_size=sizeof(lp)/sizeof(PARAM); PARAM gp[] = { @@ -740,17 +769,6 @@ GArray* parse_cfile(gchar* f, GError** e) { groups = g_key_file_get_groups(cfile, NULL); for(i=0;groups[i];i++) { memset(&s, '\0', sizeof(SERVER)); - lp[0].target=&(s.exportname); - lp[1].target=&(s.port); - lp[2].target=&(s.authname); - lp[3].target=&(s.expected_size); - lp[4].target=&(virtstyle); - lp[5].target=&(s.prerun); - lp[6].target=&(s.postrun); - lp[7].target=lp[8].target=lp[9].target= - lp[10].target=lp[11].target= - lp[12].target=&(s.flags); - lp[13].target=&(s.listenaddr); /* After the [generic] group, start parsing exports */ if(i==1) { @@ -888,7 +906,7 @@ void sigchld_handler(int s) { if(!i) { msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid); } else { - DEBUG2("Removing %d from the list of children", pid); + DEBUG("Removing %d from the list of children", pid); g_hash_table_remove(children, &pid); } } @@ -966,7 +984,7 @@ off_t size_autodetect(int fhandle) { if (es > ((off_t)0)) { return es; } else { - DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); + DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); } err("Could not find size of exported block device: %m"); @@ -1045,7 +1063,7 @@ void myseek(int handle,off_t a) { * @param client The client we're serving for * @return The number of bytes actually written, or -1 in case of an error **/ -ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) { +ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { int fhandle; off_t foffset; size_t maxbytes; @@ -1056,12 +1074,51 @@ ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) { if(maxbytes && len > maxbytes) len = maxbytes; - DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len); + DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua); myseek(fhandle, foffset); retval = write(fhandle, buf, len); if(client->server->flags & F_SYNC) { fsync(fhandle); + } else if (fua) { + + /* This is where we would do the following + * #ifdef USE_SYNC_FILE_RANGE + * However, we don't, for the reasons set out below + * by Christoph Hellwig + * + * [BEGINS] + * fdatasync is equivalent to fsync except that it does not flush + * non-essential metadata (basically just timestamps in practice), but it + * does flush metadata requried to find the data again, e.g. allocation + * information and extent maps. sync_file_range does nothing but flush + * out pagecache content - it means you basically won't get your data + * back in case of a crash if you either: + * + * a) have a volatile write cache in your disk (e.g. any normal SATA disk) + * b) are using a sparse file on a filesystem + * c) are using a fallocate-preallocated file on a filesystem + * d) use any file on a COW filesystem like btrfs + * + * e.g. it only does anything useful for you if you do not have a volatile + * write cache, and either use a raw block device node, or just overwrite + * an already fully allocated (and not preallocated) file on a non-COW + * filesystem. + * [ENDS] + * + * What we should do is open a second FD with O_DSYNC set, then write to + * that when appropriate. However, with a Linux client, every REQ_FUA + * immediately follows a REQ_FLUSH, so fdatasync does not cause performance + * problems. + * + */ +#if 0 + sync_file_range(fhandle, foffset, len, + SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | + SYNC_FILE_RANGE_WAIT_AFTER); +#else + fdatasync(fhandle); +#endif } return retval; } @@ -1070,10 +1127,10 @@ ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) { * Call rawexpwrite repeatedly until all data has been written. * @return 0 on success, nonzero on failure **/ -int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) { +int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) { ssize_t ret=0; - while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) { + while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) { a += ret; buf += ret; len -= ret; @@ -1102,7 +1159,7 @@ ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { if(maxbytes && len > maxbytes) len = maxbytes; - DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len); + DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len); myseek(fhandle, foffset); return read(fhandle, buf, len); @@ -1139,7 +1196,7 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { if (!(client->server->flags & F_COPYONWRITE)) return(rawexpread_fully(a, buf, len, client)); - DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a); + DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE; @@ -1149,12 +1206,12 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { rdlen=(0difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); if (read(client->difffile, buf, rdlen) != rdlen) return -1; } else { /* the block is not there */ - DEBUG2("Page %llu is not here, we read the original one\n", + DEBUG("Page %llu is not here, we read the original one\n", (unsigned long long)mapcnt); if(rawexpread_fully(a, buf, rdlen, client)) return -1; } @@ -1174,7 +1231,7 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { * @param client The client we're going to write for. * @return 0 on success, nonzero on failure **/ -int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { +int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { char pagebuf[DIFFPAGESIZE]; off_t mapcnt,mapl,maph; off_t wrlen,rdlen; @@ -1182,8 +1239,8 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { off_t offset; if (!(client->server->flags & F_COPYONWRITE)) - return(rawexpwrite_fully(a, buf, len, client)); - DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a); + return(rawexpwrite_fully(a, buf, len, client, fua)); + DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ; @@ -1194,7 +1251,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { len : (size_t)DIFFPAGESIZE-offset; if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])) ; myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); @@ -1202,7 +1259,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { } else { /* the block is not there */ myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ; client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++; - DEBUG3("Page %llu is not here, we put it at %lu\n", + DEBUG("Page %llu is not here, we put it at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); rdlen=DIFFPAGESIZE ; @@ -1215,6 +1272,30 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { } len-=wrlen ; a+=wrlen ; buf+=wrlen ; } + if (client->server->flags & F_SYNC) { + fsync(client->difffile); + } else if (fua) { + /* open question: would it be cheaper to do multiple sync_file_ranges? + as we iterate through the above? + */ + fdatasync(client->difffile); + } + return 0; +} + +int expflush(CLIENT *client) { + gint i; + + if (client->server->flags & F_COPYONWRITE) { + return fsync(client->difffile); + } + + for (i = 0; i < client->export->len; i++) { + FILE_INFO fi = g_array_index(client->export, FILE_INFO, i); + if (fsync(fi.fhandle) < 0) + return -1; + } + return 0; } @@ -1232,14 +1313,17 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) { memset(zeros, '\0', sizeof(zeros)); if(!client || !client->modern) { + /* common */ if (write(net, INIT_PASSWD, 8) < 0) { err_nonfatal("Negotiation failed: %m"); if(client) exit(EXIT_FAILURE); } - if(client && client->modern) { + if(!client || client->modern) { + /* modern */ magic = htonll(opts_magic); } else { + /* oldstyle */ magic = htonll(cliserv_magic); } if (write(net, &magic, sizeof(magic)) < 0) { @@ -1249,6 +1333,7 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) { } } if(!client) { + /* modern */ uint32_t reserved; uint32_t opt; uint32_t namelen; @@ -1257,25 +1342,31 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) { if(!servers) err("programmer error"); - write(net, &smallflags, sizeof(uint16_t)); - read(net, &reserved, sizeof(reserved)); - read(net, &magic, sizeof(magic)); + if (write(net, &smallflags, sizeof(uint16_t)) < 0) + err("Negotiation failed: %m"); + if (read(net, &reserved, sizeof(reserved)) < 0) + err("Negotiation failed: %m"); + if (read(net, &magic, sizeof(magic)) < 0) + err("Negotiation failed: %m"); magic = ntohll(magic); if(magic != opts_magic) { close(net); return NULL; } - read(net, &opt, sizeof(opt)); + if (read(net, &opt, sizeof(opt)) < 0) + err("Negotiation failed: %m"); opt = ntohl(opt); if(opt != NBD_OPT_EXPORT_NAME) { close(net); return NULL; } - read(net, &namelen, sizeof(namelen)); + if (read(net, &namelen, sizeof(namelen)) < 0) + err("Negotiation failed: %m"); namelen = ntohl(namelen); name = malloc(namelen+1); - name[namelen+1]=0; - read(net, name, namelen); + name[namelen]=0; + if (read(net, name, namelen) < 0) + err("Negotiation failed: %m"); for(i=0; ilen; i++) { SERVER* serve = &(g_array_index(servers, SERVER, i)); if(!strcmp(serve->servename, name)) { @@ -1284,33 +1375,49 @@ CLIENT* negotiate(int net, CLIENT *client, GArray* servers) { client->exportsize = OFFT_MAX; client->net = net; client->modern = TRUE; + client->transactionlogfd = -1; + free(name); return client; } } + free(name); + return NULL; } + /* common */ size_host = htonll((u64)(client->exportsize)); if (write(net, &size_host, 8) < 0) err("Negotiation failed: %m"); if (client->server->flags & F_READONLY) flags |= NBD_FLAG_READ_ONLY; + if (client->server->flags & F_FLUSH) + flags |= NBD_FLAG_SEND_FLUSH; + if (client->server->flags & F_FUA) + flags |= NBD_FLAG_SEND_FUA; + if (client->server->flags & F_ROTATIONAL) + flags |= NBD_FLAG_ROTATIONAL; if (!client->modern) { + /* oldstyle */ flags = htonl(flags); if (write(client->net, &flags, 4) < 0) err("Negotiation failed: %m"); } else { + /* modern */ smallflags = (uint16_t)(flags & ~((uint16_t)0)); smallflags = htons(smallflags); if (write(client->net, &smallflags, sizeof(smallflags)) < 0) { err("Negotiation failed: %m"); } } + /* common */ if (write(client->net, zeros, 124) < 0) err("Negotiation failed: %m"); return NULL; } /** sending macro. */ -#define SEND(net,reply) writeit( net, &reply, sizeof( reply )); +#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \ + if (client->transactionlogfd != -1) \ + writeit(client->transactionlogfd, &reply, sizeof(reply)); } /** error macro. */ #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; } /** @@ -1335,16 +1442,24 @@ int mainloop(CLIENT *client) { reply.error = 0; while (go_on) { char buf[BUFSIZE]; + char* p; size_t len; + size_t currlen; + size_t writelen; + uint16_t command; #ifdef DODBG i++; printf("%d: ", i); #endif readit(client->net, &request, sizeof(request)); + if (client->transactionlogfd != -1) + writeit(client->transactionlogfd, &request, sizeof(request)); + request.from = ntohll(request.from); request.type = ntohl(request.type); + command = request.type & NBD_CMD_MASK_COMMAND; - if (request.type==NBD_CMD_DISC) { + if (command==NBD_CMD_DISC) { msg2(LOG_INFO, "Disconnect request received."); if (client->server->flags & F_COPYONWRITE) { if (client->difmap) g_free(client->difmap) ; @@ -1360,38 +1475,60 @@ int mainloop(CLIENT *client) { if (request.magic != htonl(NBD_REQUEST_MAGIC)) err("Not enough magic."); - if (len > BUFSIZE + sizeof(struct nbd_reply)) - err("Request too big!"); -#ifdef DODBG - printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" : + if (len > BUFSIZE - sizeof(struct nbd_reply)) { + currlen = BUFSIZE - sizeof(struct nbd_reply); + msg2(LOG_INFO, "oversized request (this is not a problem)"); + } else { + currlen = len; + } + DEBUG("%s from %llu (%llu) len %d, ", command ? "WRITE" : "READ", (unsigned long long)request.from, - (unsigned long long)request.from / 512, len); -#endif + (unsigned long long)request.from / 512, (unsigned int)len); memcpy(reply.handle, request.handle, sizeof(reply.handle)); - if ((request.from + len) > (OFFT_MAX)) { - DEBUG("[Number too large!]"); - ERROR(client, reply, EINVAL); - continue; - } - if (((ssize_t)((off_t)request.from + len) > client->exportsize)) { - DEBUG("[RANGE!]"); - ERROR(client, reply, EINVAL); - continue; + if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) { + if ((request.from + len) > (OFFT_MAX)) { + DEBUG("[Number too large!]"); + ERROR(client, reply, EINVAL); + continue; + } + + if (((ssize_t)((off_t)request.from + len) > client->exportsize)) { + DEBUG("[RANGE!]"); + ERROR(client, reply, EINVAL); + continue; + } } - if (request.type==NBD_CMD_WRITE) { + if (command==NBD_CMD_WRITE) { DEBUG("wr: net->buf, "); - readit(client->net, buf, len); - DEBUG("buf->exp, "); - if ((client->server->flags & F_READONLY) || - (client->server->flags & F_AUTOREADONLY)) { - DEBUG("[WRITE to READONLY!]"); - ERROR(client, reply, EPERM); - continue; + while(len > 0) { + readit(client->net, buf, currlen); + DEBUG("buf->exp, "); + if ((client->server->flags & F_READONLY) || + (client->server->flags & F_AUTOREADONLY)) { + DEBUG("[WRITE to READONLY!]"); + ERROR(client, reply, EPERM); + continue; + } + if (expwrite(request.from, buf, len, client, + request.type & NBD_CMD_FLAG_FUA)) { + DEBUG("Write failed: %m" ); + ERROR(client, reply, errno); + continue; + } + SEND(client->net, reply); + DEBUG("OK!\n"); + len -= currlen; + currlen = (len < BUFSIZE) ? len : BUFSIZE; } - if (expwrite(request.from, buf, len, client)) { - DEBUG("Write failed: %m" ); + continue; + } + + if (command==NBD_CMD_FLUSH) { + DEBUG("fl: "); + if (expflush(client)) { + DEBUG("Flush failed: %m"); ERROR(client, reply, errno); continue; } @@ -1399,19 +1536,34 @@ int mainloop(CLIENT *client) { DEBUG("OK!\n"); continue; } - /* READ */ - DEBUG("exp->buf, "); - if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) { - DEBUG("Read failed: %m"); - ERROR(client, reply, errno); + if (command==NBD_CMD_READ) { + DEBUG("exp->buf, "); + memcpy(buf, &reply, sizeof(struct nbd_reply)); + if (client->transactionlogfd != -1) + writeit(client->transactionlogfd, &reply, sizeof(reply)); + p = buf + sizeof(struct nbd_reply); + writelen = currlen + sizeof(struct nbd_reply); + while(len > 0) { + if (expread(request.from, p, currlen, client)) { + DEBUG("Read failed: %m"); + ERROR(client, reply, errno); + continue; + } + + DEBUG("buf->net, "); + writeit(client->net, buf, writelen); + len -= currlen; + request.from += currlen; + currlen = (len < BUFSIZE) ? len : BUFSIZE; + p = buf; + writelen = currlen; + } + DEBUG("OK!\n"); continue; } - DEBUG("buf->net, "); - memcpy(buf, &reply, sizeof(struct nbd_reply)); - writeit(client->net, buf, len + sizeof(struct nbd_reply)); - DEBUG("OK!\n"); + DEBUG ("Ignoring unknown command\n"); } return 0; } @@ -1442,7 +1594,7 @@ void setupexport(CLIENT* client) { } else { tmpname=g_strdup(client->exportname); } - DEBUG2( "Opening %s\n", tmpname ); + DEBUG( "Opening %s\n", tmpname ); fi.fhandle = open(tmpname, mode); if(fi.fhandle == -1 && mode == O_RDWR) { /* Try again because maybe media was read-only */ @@ -1541,6 +1693,15 @@ int do_run(gchar* command, gchar* file) { * @param client a connected client **/ void serveconnection(CLIENT *client) { + if (client->server->transactionlog && (client->transactionlogfd == -1)) + { + if (-1 == (client->transactionlogfd = open(client->server->transactionlog, + O_WRONLY | O_CREAT, + S_IRUSR | S_IWUSR))) + g_warning("Could not open transaction log %s", + client->server->transactionlog); + } + if(do_run(client->server->prerun, client->exportname)) { exit(EXIT_FAILURE); } @@ -1554,6 +1715,12 @@ void serveconnection(CLIENT *client) { mainloop(client); do_run(client->server->postrun, client->exportname); + + if (-1 != client->transactionlogfd) + { + close(client->transactionlogfd); + client->transactionlogfd = -1; + } } /** @@ -1697,7 +1864,7 @@ int serveloop(GArray* servers) { memcpy(&rset, &mset, sizeof(fd_set)); if(select(max+1, &rset, NULL, NULL, NULL)>0) { int net = 0; - SERVER* serve; + SERVER* serve=NULL; DEBUG("accept, "); if(FD_ISSET(modernsock, &rset)) { @@ -1707,7 +1874,10 @@ int serveloop(GArray* servers) { if(!client) { err_nonfatal("negotiation failed"); close(net); + net=0; + continue; } + serve = client->server; } for(i=0;ilen && !net;i++) { serve=&(g_array_index(servers, SERVER, i)); @@ -1719,6 +1889,12 @@ int serveloop(GArray* servers) { if(net) { int sock_flags; + if(serve->max_connections > 0 && + g_hash_table_size(children) >= serve->max_connections) { + msg2(LOG_INFO, "Max connections reached"); + close(net); + continue; + } if((sock_flags = fcntl(net, F_GETFL, 0))==-1) { err("fcntl F_GETFL"); } @@ -1730,6 +1906,7 @@ int serveloop(GArray* servers) { client->server=serve; client->exportsize=OFFT_MAX; client->net=net; + client->transactionlogfd = -1; } set_peername(net, client); if (!authorized_client(client)) { @@ -1739,31 +1916,33 @@ int serveloop(GArray* servers) { } msg2(LOG_INFO,"Authorized client") ; pid=g_malloc(sizeof(pid_t)); -#ifndef NOFORK - if ((*pid=fork())<0) { - msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ; - close(net); - continue; - } - if (*pid>0) { /* parent */ - close(net); - g_hash_table_insert(children, pid, pid); - continue; - } - /* child */ - g_hash_table_destroy(children); - for(i=0;ilen;i++) { - serve=&g_array_index(servers, SERVER, i); - close(serve->socket); + + if (!dontfork) { + if ((*pid=fork())<0) { + msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ; + close(net); + continue; + } + if (*pid>0) { /* parent */ + close(net); + g_hash_table_insert(children, pid, pid); + continue; + } + /* child */ + g_hash_table_destroy(children); + for(i=0;ilen;i++) { + serve=&g_array_index(servers, SERVER, i); + close(serve->socket); + } + /* FALSE does not free the + actual data. This is required, + because the client has a + direct reference into that + data, and otherwise we get a + segfault... */ + g_array_free(servers, FALSE); } - /* FALSE does not free the - actual data. This is required, - because the client has a - direct reference into that - data, and otherwise we get a - segfault... */ - g_array_free(servers, FALSE); -#endif // NOFORK + msg2(LOG_INFO,"Starting to serve"); serveconnection(client); exit(EXIT_SUCCESS); @@ -1930,7 +2109,7 @@ void setup_servers(GArray* servers) { * is only used to create a PID file of the form * /var/run/nbd-server.<port>.pid; it's not modified in any way. **/ -#if !defined(NODAEMON) && !defined(NOFORK) +#if !defined(NODAEMON) void daemonize(SERVER* serve) { FILE*pidf; @@ -1959,7 +2138,7 @@ void daemonize(SERVER* serve) { } #else #define daemonize(serve) -#endif /* !defined(NODAEMON) && !defined(NOFORK) */ +#endif /* !defined(NODAEMON) */ /* * Everything beyond this point (in the file) is run in non-daemon mode. @@ -2030,7 +2209,7 @@ void glib_message_syslog_redirect(const gchar *log_domain, default: level=LOG_ERR; } - syslog(level, message); + syslog(level, "%s", message); } #endif @@ -2083,8 +2262,11 @@ int main(int argc, char *argv[]) { } if(!servers || !servers->len) { - g_warning("Could not parse config file: %s", - err ? err->message : "Unknown error"); + if(err && !(err->domain == g_quark_from_string("parse_cfile") + && err->code == CFILE_NOTFOUND)) { + g_warning("Could not parse config file: %s", + err ? err->message : "Unknown error"); + } } if(serve) { g_warning("Specifying an export on the command line is deprecated."); @@ -2092,10 +2274,11 @@ int main(int argc, char *argv[]) { } if((!serve) && (!servers||!servers->len)) { - g_message("Nothing to do! Bye!"); + g_message("No configured exports; quitting."); exit(EXIT_FAILURE); } - daemonize(serve); + if (!dontfork) + daemonize(serve); setup_servers(servers); dousers(); serveloop(servers);