/** whether to export using the old negotiation protocol (port-based) */
gboolean do_oldstyle=FALSE;
+/* Whether we should avoid forking */
+int dontfork = 0;
+
/** Logging macros, now nothing goes to syslog unless you say ISSERVER */
#ifdef ISSERVER
#define msg2(a,b) syslog(a,b)
#define DEBUG2( a,b ) printf( a,b )
#define DEBUG3( a,b,c ) printf( a,b,c )
#define DEBUG4( a,b,c,d ) printf( a,b,c,d )
+#define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e )
#else
#define DEBUG( a )
#define DEBUG2( a,b )
#define DEBUG3( a,b,c )
#define DEBUG4( a,b,c,d )
+#define DEBUG5( a,b,c,d,e )
#endif
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION ""
#define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
#define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
#define F_SYNC 64 /**< Whether to fsync() after a write */
+#define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
+#define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
+#define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
GHashTable *children;
char pidfname[256]; /**< name of our PID file */
char pidftemplate[256]; /**< template to be used for the filename of the PID file */
{"read-only", no_argument, NULL, 'r'},
{"multi-file", no_argument, NULL, 'm'},
{"copy-on-write", no_argument, NULL, 'c'},
+ {"dont-fork", no_argument, NULL, 'd'},
{"authorize-file", required_argument, NULL, 'l'},
{"config-file", required_argument, NULL, 'C'},
{"pid-file", required_argument, NULL, 'p'},
serve=g_new0(SERVER, 1);
serve->authname = g_strdup(default_authname);
serve->virtstyle=VIRT_IPLIT;
- while((c=getopt_long(argc, argv, "-C:cl:mo:rp:M:", long_options, &i))>=0) {
+ while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
switch (c) {
case 1:
/* non-option argument */
case 'c':
serve->flags |=F_COPYONWRITE;
break;
+ case 'd':
+ dontfork = 1;
+ break;
case 'C':
g_free(config_file_pos);
config_file_pos=g_strdup(optarg);
SERVER s;
gchar *virtstyle=NULL;
PARAM lp[] = {
- { "exportname", TRUE, PARAM_STRING, NULL, 0 },
- { "port", TRUE, PARAM_INT, NULL, 0 },
- { "authfile", FALSE, PARAM_STRING, NULL, 0 },
- { "filesize", FALSE, PARAM_INT, NULL, 0 },
- { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
- { "prerun", FALSE, PARAM_STRING, NULL, 0 },
- { "postrun", FALSE, PARAM_STRING, NULL, 0 },
- { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
- { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
- { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
- { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
- { "sdp", FALSE, PARAM_BOOL, NULL, F_SDP },
- { "sync", FALSE, PARAM_BOOL, NULL, F_SYNC },
- { "listenaddr", FALSE, PARAM_STRING, NULL, 0 },
- { "maxconnections", FALSE, PARAM_INT, NULL, 0 },
+ { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
+ { "port", TRUE, PARAM_INT, &(s.port), 0 },
+ { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
+ { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
+ { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
+ { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
+ { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
+ { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
+ { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
+ { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
+ { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
+ { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
+ { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
+ { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
+ { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
+ { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
+ { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
+ { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
};
const int lp_size=sizeof(lp)/sizeof(PARAM);
PARAM gp[] = {
groups = g_key_file_get_groups(cfile, NULL);
for(i=0;groups[i];i++) {
memset(&s, '\0', sizeof(SERVER));
- lp[0].target=&(s.exportname);
- lp[1].target=&(s.port);
- lp[2].target=&(s.authname);
- lp[3].target=&(s.expected_size);
- lp[4].target=&(virtstyle);
- lp[5].target=&(s.prerun);
- lp[6].target=&(s.postrun);
- lp[7].target=lp[8].target=lp[9].target=
- lp[10].target=lp[11].target=
- lp[12].target=&(s.flags);
- lp[13].target=&(s.listenaddr);
- lp[14].target=&(s.max_connections);
/* After the [generic] group, start parsing exports */
if(i==1) {
* @param client The client we're serving for
* @return The number of bytes actually written, or -1 in case of an error
**/
-ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
int fhandle;
off_t foffset;
size_t maxbytes;
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len);
+ DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua);
myseek(fhandle, foffset);
retval = write(fhandle, buf, len);
if(client->server->flags & F_SYNC) {
fsync(fhandle);
+ } else if (fua) {
+#ifdef USE_SYNC_FILE_RANGE
+ sync_file_range(fhandle, foffset, len,
+ SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
+ SYNC_FILE_RANGE_WAIT_AFTER);
+#else
+ fdatasync(fhandle);
+#endif
}
return retval;
}
* Call rawexpwrite repeatedly until all data has been written.
* @return 0 on success, nonzero on failure
**/
-int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
+int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
ssize_t ret=0;
- while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
+ while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
a += ret;
buf += ret;
len -= ret;
* @param client The client we're going to write for.
* @return 0 on success, nonzero on failure
**/
-int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
char pagebuf[DIFFPAGESIZE];
off_t mapcnt,mapl,maph;
off_t wrlen,rdlen;
off_t offset;
if (!(client->server->flags & F_COPYONWRITE))
- return(rawexpwrite_fully(a, buf, len, client));
+ return(rawexpwrite_fully(a, buf, len, client, fua));
DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
}
len-=wrlen ; a+=wrlen ; buf+=wrlen ;
}
+ if (client->server->flags & F_SYNC) {
+ fsync(client->difffile);
+ } else if (fua) {
+ /* open question: would it be cheaper to do multiple sync_file_ranges?
+ as we iterate through the above?
+ */
+ fdatasync(client->difffile);
+ }
+ return 0;
+}
+
+int expflush(CLIENT *client) {
+ int fhandle;
+ off_t foffset;
+ size_t maxbytes;
+ gint i;
+
+ if (client->server->flags & F_COPYONWRITE) {
+ return fsync(client->difffile);
+ }
+
+ for (i = 0; i < client->export->len; i++) {
+ FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
+ if (fsync(fi.fhandle) < 0)
+ return -1;
+ }
+
return 0;
}
err("Negotiation failed: %m");
if (client->server->flags & F_READONLY)
flags |= NBD_FLAG_READ_ONLY;
+ if (client->server->flags & F_FLUSH)
+ flags |= NBD_FLAG_SEND_FLUSH;
+ if (client->server->flags & F_FUA)
+ flags |= NBD_FLAG_SEND_FUA;
+ if (client->server->flags & F_ROTATIONAL)
+ flags |= NBD_FLAG_ROTATIONAL;
if (!client->modern) {
/* oldstyle */
flags = htonl(flags);
size_t len;
size_t currlen;
size_t writelen;
+ uint16_t command;
#ifdef DODBG
i++;
printf("%d: ", i);
readit(client->net, &request, sizeof(request));
request.from = ntohll(request.from);
request.type = ntohl(request.type);
+ command = request.type & NBD_CMD_MASK_COMMAND;
- if (request.type==NBD_CMD_DISC) {
+ if (command==NBD_CMD_DISC) {
msg2(LOG_INFO, "Disconnect request received.");
if (client->server->flags & F_COPYONWRITE) {
if (client->difmap) g_free(client->difmap) ;
currlen = len;
}
#ifdef DODBG
- printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" :
+ printf("%s from %llu (%llu) len %d, ", command ? "WRITE" :
"READ", (unsigned long long)request.from,
(unsigned long long)request.from / 512, len);
#endif
memcpy(reply.handle, request.handle, sizeof(reply.handle));
- if ((request.from + len) > (OFFT_MAX)) {
- DEBUG("[Number too large!]");
- ERROR(client, reply, EINVAL);
- continue;
- }
- if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
- DEBUG("[RANGE!]");
- ERROR(client, reply, EINVAL);
- continue;
+ if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
+ if ((request.from + len) > (OFFT_MAX)) {
+ DEBUG("[Number too large!]");
+ ERROR(client, reply, EINVAL);
+ continue;
+ }
+
+ if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
+ DEBUG("[RANGE!]");
+ ERROR(client, reply, EINVAL);
+ continue;
+ }
}
- if (request.type==NBD_CMD_WRITE) {
+ if (command==NBD_CMD_WRITE) {
DEBUG("wr: net->buf, ");
while(len > 0) {
readit(client->net, buf, currlen);
ERROR(client, reply, EPERM);
continue;
}
- if (expwrite(request.from, buf, len, client)) {
+ if (expwrite(request.from, buf, len, client,
+ request.type & NBD_CMD_FLAG_FUA)) {
DEBUG("Write failed: %m" );
ERROR(client, reply, errno);
continue;
}
continue;
}
- /* READ */
-
- DEBUG("exp->buf, ");
- memcpy(buf, &reply, sizeof(struct nbd_reply));
- p = buf + sizeof(struct nbd_reply);
- writelen = currlen + sizeof(struct nbd_reply);
- while(len > 0) {
- if (expread(request.from, p, currlen, client)) {
- DEBUG("Read failed: %m");
+
+ if (command==NBD_CMD_FLUSH) {
+ DEBUG("fl: ");
+ if (expflush(client)) {
+ DEBUG("Flush failed: %m");
ERROR(client, reply, errno);
continue;
}
+ SEND(client->net, reply);
+ DEBUG("OK!\n");
+ continue;
+ }
- DEBUG("buf->net, ");
- writeit(client->net, buf, writelen);
- len -= currlen;
- currlen = (len < BUFSIZE) ? len : BUFSIZE;
- p = buf;
- writelen = currlen;
+ if (command==NBD_CMD_READ) {
+ DEBUG("exp->buf, ");
+ memcpy(buf, &reply, sizeof(struct nbd_reply));
+ p = buf + sizeof(struct nbd_reply);
+ writelen = currlen + sizeof(struct nbd_reply);
+ while(len > 0) {
+ if (expread(request.from, p, currlen, client)) {
+ DEBUG("Read failed: %m");
+ ERROR(client, reply, errno);
+ continue;
+ }
+
+ DEBUG("buf->net, ");
+ writeit(client->net, buf, writelen);
+ len -= currlen;
+ currlen = (len < BUFSIZE) ? len : BUFSIZE;
+ p = buf;
+ writelen = currlen;
+ }
+ DEBUG("OK!\n");
+ continue;
}
- DEBUG("OK!\n");
+
+ DEBUG ("Ignoring unknown command\n");
}
return 0;
}
err_nonfatal("negotiation failed");
close(net);
net=0;
+ continue;
}
serve = client->server;
}
}
msg2(LOG_INFO,"Authorized client") ;
pid=g_malloc(sizeof(pid_t));
-#ifndef NOFORK
- if ((*pid=fork())<0) {
- msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
- close(net);
- continue;
- }
- if (*pid>0) { /* parent */
- close(net);
- g_hash_table_insert(children, pid, pid);
- continue;
- }
- /* child */
- g_hash_table_destroy(children);
- for(i=0;i<servers->len;i++) {
- serve=&g_array_index(servers, SERVER, i);
- close(serve->socket);
+
+ if (!dontfork) {
+ if ((*pid=fork())<0) {
+ msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
+ close(net);
+ continue;
+ }
+ if (*pid>0) { /* parent */
+ close(net);
+ g_hash_table_insert(children, pid, pid);
+ continue;
+ }
+ /* child */
+ g_hash_table_destroy(children);
+ for(i=0;i<servers->len;i++) {
+ serve=&g_array_index(servers, SERVER, i);
+ close(serve->socket);
+ }
+ /* FALSE does not free the
+ actual data. This is required,
+ because the client has a
+ direct reference into that
+ data, and otherwise we get a
+ segfault... */
+ g_array_free(servers, FALSE);
}
- /* FALSE does not free the
- actual data. This is required,
- because the client has a
- direct reference into that
- data, and otherwise we get a
- segfault... */
- g_array_free(servers, FALSE);
-#endif // NOFORK
+
msg2(LOG_INFO,"Starting to serve");
serveconnection(client);
exit(EXIT_SUCCESS);
* is only used to create a PID file of the form
* /var/run/nbd-server.<port>.pid; it's not modified in any way.
**/
-#if !defined(NODAEMON) && !defined(NOFORK)
+#if !defined(NODAEMON)
void daemonize(SERVER* serve) {
FILE*pidf;
}
#else
#define daemonize(serve)
-#endif /* !defined(NODAEMON) && !defined(NOFORK) */
+#endif /* !defined(NODAEMON) */
/*
* Everything beyond this point (in the file) is run in non-daemon mode.
g_message("No configured exports; quitting.");
exit(EXIT_FAILURE);
}
- daemonize(serve);
+ if (!dontfork)
+ daemonize(serve);
setup_servers(servers);
dousers();
serveloop(servers);