/** whether to export using the old negotiation protocol (port-based) */
gboolean do_oldstyle=FALSE;
+/* Whether we should avoid forking */
+int dontfork = 0;
+
/** Logging macros, now nothing goes to syslog unless you say ISSERVER */
#ifdef ISSERVER
#define msg2(a,b) syslog(a,b)
/* Debugging macros */
//#define DODBG
#ifdef DODBG
-#define DEBUG( a ) printf( a )
-#define DEBUG2( a,b ) printf( a,b )
-#define DEBUG3( a,b,c ) printf( a,b,c )
-#define DEBUG4( a,b,c,d ) printf( a,b,c,d )
+#define DEBUG(...) printf(__VA_ARGS__)
#else
-#define DEBUG( a )
-#define DEBUG2( a,b )
-#define DEBUG3( a,b,c )
-#define DEBUG4( a,b,c,d )
+#define DEBUG(...)
#endif
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION ""
#define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
#define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
#define F_SYNC 64 /**< Whether to fsync() after a write */
+#define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
+#define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
+#define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
GHashTable *children;
char pidfname[256]; /**< name of our PID file */
char pidftemplate[256]; /**< template to be used for the filename of the PID file */
char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
+#define NEG_INIT (1 << 0)
+#define NEG_OLD (1 << 1)
+#define NEG_MODERN (1 << 2)
+
int modernsock=0; /**< Socket for the modern handler. Not used
if a client was only specified on the
command line; only port used if
oldstyle is set to false (and then the
command-line client isn't used, gna gna) */
char* modern_listen; /**< listenaddr value for modernsock */
+char* modernport=NBD_DEFAULT_PORT; /**< Port number on which to listen for
+ new-style nbd-client connections */
/**
* Types of virtuatlization
disconnects */
gchar* servename; /**< name of the export as selected by nbd-client */
int max_connections; /**< maximum number of opened connections */
+ gchar* transactionlog;/**< filename for transaction log */
} SERVER;
/**
u32 difffilelen; /**< number of pages in difffile */
u32 *difmap; /**< see comment on the global difmap for this one */
gboolean modern; /**< client was negotiated using modern negotiation protocol */
+ int transactionlogfd;/**< fd for transaction log */
} CLIENT;
/**
} PARAM;
/**
+ * Translate a command name into human readable form
+ *
+ * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
+ * @return pointer to the command name
+ **/
+static inline const char * getcommandname(uint64_t command) {
+ switch (command) {
+ case NBD_CMD_READ:
+ return "NBD_CMD_READ";
+ case NBD_CMD_WRITE:
+ return "NBD_CMD_WRITE";
+ case NBD_CMD_DISC:
+ return "NBD_CMD_DISC";
+ case NBD_CMD_FLUSH:
+ return "NBD_CMD_FLUSH";
+ default:
+ break;
+ }
+ return "UNKNOWN";
+}
+
+/**
* Check whether a client is allowed to connect. Works with an authorization
* file which contains one line per machine, no wildcards.
*
* @param buf a buffer
* @param len the number of bytes to be read
**/
-inline void readit(int f, void *buf, size_t len) {
+static inline void readit(int f, void *buf, size_t len) {
ssize_t res;
while (len > 0) {
DEBUG("*");
}
/**
+ * Consume data from an FD that we don't want
+ *
+ * @param f a file descriptor
+ * @param buf a buffer
+ * @param len the number of bytes to consume
+ * @param bufsiz the size of the buffer
+ **/
+static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
+ size_t curlen;
+ while (len>0) {
+ curlen = (len>bufsiz)?bufsiz:len;
+ readit(f, buf, curlen);
+ len -= curlen;
+ }
+}
+
+
+/**
* Write data from a buffer into a filedescriptor
*
* @param f a file descriptor
* @param buf a buffer containing data
* @param len the number of bytes to be written
**/
-inline void writeit(int f, void *buf, size_t len) {
+static inline void writeit(int f, void *buf, size_t len) {
ssize_t res;
while (len > 0) {
DEBUG("+");
"\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
"\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
"\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
- "\tif port is set to 0, stdin is used (for running from inetd)\n"
+ "\tif port is set to 0, stdin is used (for running from inetd).\n"
"\tif file_to_export contains '%%s', it is substituted with the IP\n"
"\t\taddress of the machine trying to connect\n"
"\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
{"read-only", no_argument, NULL, 'r'},
{"multi-file", no_argument, NULL, 'm'},
{"copy-on-write", no_argument, NULL, 'c'},
+ {"dont-fork", no_argument, NULL, 'd'},
{"authorize-file", required_argument, NULL, 'l'},
{"config-file", required_argument, NULL, 'C'},
{"pid-file", required_argument, NULL, 'p'},
serve=g_new0(SERVER, 1);
serve->authname = g_strdup(default_authname);
serve->virtstyle=VIRT_IPLIT;
- while((c=getopt_long(argc, argv, "-C:cl:mo:rp:M:", long_options, &i))>=0) {
+ while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
switch (c) {
case 1:
/* non-option argument */
case 'c':
serve->flags |=F_COPYONWRITE;
break;
+ case 'd':
+ dontfork = 1;
+ break;
case 'C':
g_free(config_file_pos);
config_file_pos=g_strdup(optarg);
g_free(server->prerun);
if(server->postrun)
g_free(server->postrun);
+ if(server->transactionlog)
+ g_free(server->transactionlog);
g_free(server);
}
serve->authname = strdup(s->authname);
serve->flags = s->flags;
- serve->socket = serve->socket;
- serve->socket_family = serve->socket_family;
+ serve->socket = s->socket;
+ serve->socket_family = s->socket_family;
+ serve->virtstyle = s->virtstyle;
serve->cidrlen = s->cidrlen;
if(s->prerun)
if(s->postrun)
serve->postrun = g_strdup(s->postrun);
+
+ if(s->transactionlog)
+ serve->transactionlog = g_strdup(s->transactionlog);
if(s->servename)
serve->servename = g_strdup(s->servename);
SERVER s;
gchar *virtstyle=NULL;
PARAM lp[] = {
- { "exportname", TRUE, PARAM_STRING, NULL, 0 },
- { "port", TRUE, PARAM_INT, NULL, 0 },
- { "authfile", FALSE, PARAM_STRING, NULL, 0 },
- { "filesize", FALSE, PARAM_INT, NULL, 0 },
- { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
- { "prerun", FALSE, PARAM_STRING, NULL, 0 },
- { "postrun", FALSE, PARAM_STRING, NULL, 0 },
- { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
- { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
- { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
- { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
- { "sdp", FALSE, PARAM_BOOL, NULL, F_SDP },
- { "sync", FALSE, PARAM_BOOL, NULL, F_SYNC },
- { "listenaddr", FALSE, PARAM_STRING, NULL, 0 },
- { "maxconnections", FALSE, PARAM_INT, NULL, 0 },
+ { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
+ { "port", TRUE, PARAM_INT, &(s.port), 0 },
+ { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
+ { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
+ { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
+ { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
+ { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
+ { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
+ { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
+ { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
+ { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
+ { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
+ { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
+ { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
+ { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
+ { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
+ { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
+ { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
+ { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
};
const int lp_size=sizeof(lp)/sizeof(PARAM);
PARAM gp[] = {
{ "group", FALSE, PARAM_STRING, &rungroup, 0 },
{ "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
{ "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
+ { "port", FALSE, PARAM_STRING, &modernport, 0 },
};
PARAM* p=gp;
int p_size=sizeof(gp)/sizeof(PARAM);
groups = g_key_file_get_groups(cfile, NULL);
for(i=0;groups[i];i++) {
memset(&s, '\0', sizeof(SERVER));
- lp[0].target=&(s.exportname);
- lp[1].target=&(s.port);
- lp[2].target=&(s.authname);
- lp[3].target=&(s.expected_size);
- lp[4].target=&(virtstyle);
- lp[5].target=&(s.prerun);
- lp[6].target=&(s.postrun);
- lp[7].target=lp[8].target=lp[9].target=
- lp[10].target=lp[11].target=
- lp[12].target=&(s.flags);
- lp[13].target=&(s.listenaddr);
- lp[14].target=&(s.max_connections);
/* After the [generic] group, start parsing exports */
if(i==1) {
}
break;
}
- if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
- g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
+ if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, modernport)) {
+ g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies new-style port for oldstyle export");
g_key_file_free(cfile);
return NULL;
}
if(!i) {
msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
} else {
- DEBUG2("Removing %d from the list of children", pid);
+ DEBUG("Removing %d from the list of children", pid);
g_hash_table_remove(children, &pid);
}
}
if (es > ((off_t)0)) {
return es;
} else {
- DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
+ DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
}
err("Could not find size of exported block device: %m");
* @param buf The buffer to write from
* @param len The length of buf
* @param client The client we're serving for
+ * @param fua Flag to indicate 'Force Unit Access'
* @return The number of bytes actually written, or -1 in case of an error
**/
-ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
int fhandle;
off_t foffset;
size_t maxbytes;
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len);
+ DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
myseek(fhandle, foffset);
retval = write(fhandle, buf, len);
if(client->server->flags & F_SYNC) {
fsync(fhandle);
+ } else if (fua) {
+
+ /* This is where we would do the following
+ * #ifdef USE_SYNC_FILE_RANGE
+ * However, we don't, for the reasons set out below
+ * by Christoph Hellwig <hch@infradead.org>
+ *
+ * [BEGINS]
+ * fdatasync is equivalent to fsync except that it does not flush
+ * non-essential metadata (basically just timestamps in practice), but it
+ * does flush metadata requried to find the data again, e.g. allocation
+ * information and extent maps. sync_file_range does nothing but flush
+ * out pagecache content - it means you basically won't get your data
+ * back in case of a crash if you either:
+ *
+ * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
+ * b) are using a sparse file on a filesystem
+ * c) are using a fallocate-preallocated file on a filesystem
+ * d) use any file on a COW filesystem like btrfs
+ *
+ * e.g. it only does anything useful for you if you do not have a volatile
+ * write cache, and either use a raw block device node, or just overwrite
+ * an already fully allocated (and not preallocated) file on a non-COW
+ * filesystem.
+ * [ENDS]
+ *
+ * What we should do is open a second FD with O_DSYNC set, then write to
+ * that when appropriate. However, with a Linux client, every REQ_FUA
+ * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
+ * problems.
+ *
+ */
+#if 0
+ sync_file_range(fhandle, foffset, len,
+ SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
+ SYNC_FILE_RANGE_WAIT_AFTER);
+#else
+ fdatasync(fhandle);
+#endif
}
return retval;
}
/**
* Call rawexpwrite repeatedly until all data has been written.
+ *
+ * @param a The offset where the write should start
+ * @param buf The buffer to write from
+ * @param len The length of buf
+ * @param client The client we're serving for
+ * @param fua Flag to indicate 'Force Unit Access'
* @return 0 on success, nonzero on failure
**/
-int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
+int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
ssize_t ret=0;
- while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
+ while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
a += ret;
buf += ret;
len -= ret;
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
+ DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
myseek(fhandle, foffset);
return read(fhandle, buf, len);
if (!(client->server->flags & F_COPYONWRITE))
return(rawexpread_fully(a, buf, len, client));
- DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
+ DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
len : (size_t)DIFFPAGESIZE-offset;
if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
- DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+ DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt]));
myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
if (read(client->difffile, buf, rdlen) != rdlen) return -1;
} else { /* the block is not there */
- DEBUG2("Page %llu is not here, we read the original one\n",
+ DEBUG("Page %llu is not here, we read the original one\n",
(unsigned long long)mapcnt);
if(rawexpread_fully(a, buf, rdlen, client)) return -1;
}
* @param buf The buffer to write from
* @param len The length of buf
* @param client The client we're going to write for.
+ * @param fua Flag to indicate 'Force Unit Access'
* @return 0 on success, nonzero on failure
**/
-int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
char pagebuf[DIFFPAGESIZE];
off_t mapcnt,mapl,maph;
off_t wrlen,rdlen;
off_t offset;
if (!(client->server->flags & F_COPYONWRITE))
- return(rawexpwrite_fully(a, buf, len, client));
- DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
+ return(rawexpwrite_fully(a, buf, len, client, fua));
+ DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
len : (size_t)DIFFPAGESIZE-offset;
if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
- DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+ DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt])) ;
myseek(client->difffile,
client->difmap[mapcnt]*DIFFPAGESIZE+offset);
} else { /* the block is not there */
myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
- DEBUG3("Page %llu is not here, we put it at %lu\n",
+ DEBUG("Page %llu is not here, we put it at %lu\n",
(unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt]));
rdlen=DIFFPAGESIZE ;
}
len-=wrlen ; a+=wrlen ; buf+=wrlen ;
}
+ if (client->server->flags & F_SYNC) {
+ fsync(client->difffile);
+ } else if (fua) {
+ /* open question: would it be cheaper to do multiple sync_file_ranges?
+ as we iterate through the above?
+ */
+ fdatasync(client->difffile);
+ }
+ return 0;
+}
+
+/**
+ * Flush data to a client
+ *
+ * @param client The client we're going to write for.
+ * @return 0 on success, nonzero on failure
+ **/
+int expflush(CLIENT *client) {
+ gint i;
+
+ if (client->server->flags & F_COPYONWRITE) {
+ return fsync(client->difffile);
+ }
+
+ for (i = 0; i < client->export->len; i++) {
+ FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
+ if (fsync(fi.fhandle) < 0)
+ return -1;
+ }
+
return 0;
}
*
* @param client The client we're negotiating with.
**/
-CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
+CLIENT* negotiate(int net, CLIENT *client, GArray* servers, int phase) {
char zeros[128];
uint64_t size_host;
uint32_t flags = NBD_FLAG_HAS_FLAGS;
uint64_t magic;
memset(zeros, '\0', sizeof(zeros));
- if(!client || !client->modern) {
+ if(phase & NEG_INIT) {
/* common */
if (write(net, INIT_PASSWD, 8) < 0) {
err_nonfatal("Negotiation failed: %m");
if(client)
exit(EXIT_FAILURE);
}
- if(!client || client->modern) {
+ if(phase & NEG_MODERN) {
/* modern */
magic = htonll(opts_magic);
} else {
exit(EXIT_FAILURE);
}
}
- if(!client) {
+ if(phase & NEG_MODERN) {
/* modern */
uint32_t reserved;
uint32_t opt;
client->exportsize = OFFT_MAX;
client->net = net;
client->modern = TRUE;
+ client->transactionlogfd = -1;
+ free(name);
return client;
}
}
+ free(name);
return NULL;
}
/* common */
err("Negotiation failed: %m");
if (client->server->flags & F_READONLY)
flags |= NBD_FLAG_READ_ONLY;
- if (!client->modern) {
+ if (client->server->flags & F_FLUSH)
+ flags |= NBD_FLAG_SEND_FLUSH;
+ if (client->server->flags & F_FUA)
+ flags |= NBD_FLAG_SEND_FUA;
+ if (client->server->flags & F_ROTATIONAL)
+ flags |= NBD_FLAG_ROTATIONAL;
+ if (phase & NEG_OLD) {
/* oldstyle */
flags = htonl(flags);
if (write(client->net, &flags, 4) < 0)
}
/** sending macro. */
-#define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
+#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
+ if (client->transactionlogfd != -1) \
+ writeit(client->transactionlogfd, &reply, sizeof(reply)); }
/** error macro. */
#define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
/**
#ifdef DODBG
int i = 0;
#endif
- negotiate(client->net, client, NULL);
+ negotiate(client->net, client, NULL, client->modern ? NEG_MODERN : (NEG_OLD | NEG_INIT));
DEBUG("Entering request loop!\n");
reply.magic = htonl(NBD_REPLY_MAGIC);
reply.error = 0;
size_t len;
size_t currlen;
size_t writelen;
+ uint16_t command;
#ifdef DODBG
i++;
printf("%d: ", i);
#endif
readit(client->net, &request, sizeof(request));
+ if (client->transactionlogfd != -1)
+ writeit(client->transactionlogfd, &request, sizeof(request));
+
request.from = ntohll(request.from);
request.type = ntohl(request.type);
+ command = request.type & NBD_CMD_MASK_COMMAND;
+ len = ntohl(request.len);
+
+ DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
+ (unsigned long long)request.from,
+ (unsigned long long)request.from / 512, (unsigned int)len);
+
+ if (request.magic != htonl(NBD_REQUEST_MAGIC))
+ err("Not enough magic.");
+
+ memcpy(reply.handle, request.handle, sizeof(reply.handle));
+
+ if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
+ if ((request.from + len) > (OFFT_MAX)) {
+ DEBUG("[Number too large!]");
+ ERROR(client, reply, EINVAL);
+ continue;
+ }
+
+ if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
+ DEBUG("[RANGE!]");
+ ERROR(client, reply, EINVAL);
+ continue;
+ }
- if (request.type==NBD_CMD_DISC) {
+ currlen = len;
+ if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
+ currlen = BUFSIZE - sizeof(struct nbd_reply);
+ msg2(LOG_INFO, "oversized request (this is not a problem)");
+ }
+ }
+
+ switch (command) {
+
+ case NBD_CMD_DISC:
msg2(LOG_INFO, "Disconnect request received.");
if (client->server->flags & F_COPYONWRITE) {
if (client->difmap) g_free(client->difmap) ;
}
go_on=FALSE;
continue;
- }
-
- len = ntohl(request.len);
- if (request.magic != htonl(NBD_REQUEST_MAGIC))
- err("Not enough magic.");
- if (len > BUFSIZE - sizeof(struct nbd_reply)) {
- currlen = BUFSIZE - sizeof(struct nbd_reply);
- msg("INFO: oversized request (this is not a problem)");
- } else {
- currlen = len;
- }
-#ifdef DODBG
- printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" :
- "READ", (unsigned long long)request.from,
- (unsigned long long)request.from / 512, len);
-#endif
- memcpy(reply.handle, request.handle, sizeof(reply.handle));
- if ((request.from + len) > (OFFT_MAX)) {
- DEBUG("[Number too large!]");
- ERROR(client, reply, EINVAL);
- continue;
- }
-
- if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
- DEBUG("[RANGE!]");
- ERROR(client, reply, EINVAL);
- continue;
- }
-
- if (request.type==NBD_CMD_WRITE) {
+ case NBD_CMD_WRITE:
DEBUG("wr: net->buf, ");
while(len > 0) {
readit(client->net, buf, currlen);
(client->server->flags & F_AUTOREADONLY)) {
DEBUG("[WRITE to READONLY!]");
ERROR(client, reply, EPERM);
+ consume(client->net, buf, len-currlen, BUFSIZE);
continue;
}
- if (expwrite(request.from, buf, len, client)) {
+ if (expwrite(request.from, buf, currlen, client,
+ request.type & NBD_CMD_FLAG_FUA)) {
DEBUG("Write failed: %m" );
ERROR(client, reply, errno);
+ consume(client->net, buf, len-currlen, BUFSIZE);
continue;
}
- SEND(client->net, reply);
- DEBUG("OK!\n");
len -= currlen;
+ request.from += currlen;
currlen = (len < BUFSIZE) ? len : BUFSIZE;
}
+ SEND(client->net, reply);
+ DEBUG("OK!\n");
continue;
- }
- /* READ */
-
- DEBUG("exp->buf, ");
- memcpy(buf, &reply, sizeof(struct nbd_reply));
- p = buf + sizeof(struct nbd_reply);
- writelen = currlen + sizeof(struct nbd_reply);
- while(len > 0) {
- if (expread(request.from, p, currlen, client)) {
- DEBUG("Read failed: %m");
+
+ case NBD_CMD_FLUSH:
+ DEBUG("fl: ");
+ if (expflush(client)) {
+ DEBUG("Flush failed: %m");
ERROR(client, reply, errno);
continue;
}
+ SEND(client->net, reply);
+ DEBUG("OK!\n");
+ continue;
+
+ case NBD_CMD_READ:
+ DEBUG("exp->buf, ");
+ memcpy(buf, &reply, sizeof(struct nbd_reply));
+ if (client->transactionlogfd != -1)
+ writeit(client->transactionlogfd, &reply, sizeof(reply));
+ p = buf + sizeof(struct nbd_reply);
+ writelen = currlen + sizeof(struct nbd_reply);
+ while(len > 0) {
+ if (expread(request.from, p, currlen, client)) {
+ DEBUG("Read failed: %m");
+ ERROR(client, reply, errno);
+ continue;
+ }
+
+ DEBUG("buf->net, ");
+ writeit(client->net, buf, writelen);
+ len -= currlen;
+ request.from += currlen;
+ currlen = (len < BUFSIZE) ? len : BUFSIZE;
+ p = buf;
+ writelen = currlen;
+ }
+ DEBUG("OK!\n");
+ continue;
- DEBUG("buf->net, ");
- writeit(client->net, buf, writelen);
- len -= currlen;
- currlen = (len < BUFSIZE) ? len : BUFSIZE;
- p = buf;
- writelen = currlen;
+ default:
+ DEBUG ("Ignoring unknown command\n");
+ continue;
}
- DEBUG("OK!\n");
}
return 0;
}
} else {
tmpname=g_strdup(client->exportname);
}
- DEBUG2( "Opening %s\n", tmpname );
+ DEBUG( "Opening %s\n", tmpname );
fi.fhandle = open(tmpname, mode);
if(fi.fhandle == -1 && mode == O_RDWR) {
/* Try again because maybe media was read-only */
* @param client a connected client
**/
void serveconnection(CLIENT *client) {
+ if (client->server->transactionlog && (client->transactionlogfd == -1))
+ {
+ if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
+ O_WRONLY | O_CREAT,
+ S_IRUSR | S_IWUSR)))
+ g_warning("Could not open transaction log %s",
+ client->server->transactionlog);
+ }
+
if(do_run(client->server->prerun, client->exportname)) {
exit(EXIT_FAILURE);
}
mainloop(client);
do_run(client->server->postrun, client->exportname);
+
+ if (-1 != client->transactionlogfd)
+ {
+ close(client->transactionlogfd);
+ client->transactionlogfd = -1;
+ }
}
/**
memcpy(&rset, &mset, sizeof(fd_set));
if(select(max+1, &rset, NULL, NULL, NULL)>0) {
int net = 0;
- SERVER* serve;
+ SERVER* serve=NULL;
DEBUG("accept, ");
if(FD_ISSET(modernsock, &rset)) {
if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
err("accept: %m");
- client = negotiate(net, NULL, servers);
+ client = negotiate(net, NULL, servers, NEG_INIT | NEG_MODERN);
if(!client) {
err_nonfatal("negotiation failed");
close(net);
net=0;
+ continue;
}
+ serve = client->server;
}
for(i=0;i<servers->len && !net;i++) {
serve=&(g_array_index(servers, SERVER, i));
client->server=serve;
client->exportsize=OFFT_MAX;
client->net=net;
+ client->transactionlogfd = -1;
}
set_peername(net, client);
if (!authorized_client(client)) {
}
msg2(LOG_INFO,"Authorized client") ;
pid=g_malloc(sizeof(pid_t));
-#ifndef NOFORK
- if ((*pid=fork())<0) {
- msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
- close(net);
- continue;
- }
- if (*pid>0) { /* parent */
- close(net);
- g_hash_table_insert(children, pid, pid);
- continue;
- }
- /* child */
- g_hash_table_destroy(children);
- for(i=0;i<servers->len;i++) {
- serve=&g_array_index(servers, SERVER, i);
- close(serve->socket);
+
+ if (!dontfork) {
+ if ((*pid=fork())<0) {
+ msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
+ close(net);
+ continue;
+ }
+ if (*pid>0) { /* parent */
+ close(net);
+ g_hash_table_insert(children, pid, pid);
+ continue;
+ }
+ /* child */
+ g_hash_table_destroy(children);
+ for(i=0;i<servers->len;i++) {
+ serve=&g_array_index(servers, SERVER, i);
+ close(serve->socket);
+ }
+ /* FALSE does not free the
+ actual data. This is required,
+ because the client has a
+ direct reference into that
+ data, and otherwise we get a
+ segfault... */
+ g_array_free(servers, FALSE);
}
- /* FALSE does not free the
- actual data. This is required,
- because the client has a
- direct reference into that
- data, and otherwise we get a
- segfault... */
- g_array_free(servers, FALSE);
-#endif // NOFORK
+
msg2(LOG_INFO,"Starting to serve");
serveconnection(client);
exit(EXIT_SUCCESS);
hints.ai_socktype = SOCK_STREAM;
hints.ai_family = AF_UNSPEC;
hints.ai_protocol = IPPROTO_TCP;
- e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
+ e = getaddrinfo(modern_listen, modernport, &hints, &ai);
if(e != 0) {
fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
exit(EXIT_FAILURE);
* is only used to create a PID file of the form
* /var/run/nbd-server.<port>.pid; it's not modified in any way.
**/
-#if !defined(NODAEMON) && !defined(NOFORK)
+#if !defined(NODAEMON)
void daemonize(SERVER* serve) {
FILE*pidf;
}
#else
#define daemonize(serve)
-#endif /* !defined(NODAEMON) && !defined(NOFORK) */
+#endif /* !defined(NODAEMON) */
/*
* Everything beyond this point (in the file) is run in non-daemon mode.
g_message("No configured exports; quitting.");
exit(EXIT_FAILURE);
}
- daemonize(serve);
+ if (!dontfork)
+ daemonize(serve);
setup_servers(servers);
dousers();
serveloop(servers);