/* Debugging macros */
//#define DODBG
#ifdef DODBG
-#define DEBUG( a ) printf( a )
-#define DEBUG2( a,b ) printf( a,b )
-#define DEBUG3( a,b,c ) printf( a,b,c )
-#define DEBUG4( a,b,c,d ) printf( a,b,c,d )
-#define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e )
+#define DEBUG(...) printf(__VA_ARGS__)
#else
-#define DEBUG( a )
-#define DEBUG2( a,b )
-#define DEBUG3( a,b,c )
-#define DEBUG4( a,b,c,d )
-#define DEBUG5( a,b,c,d,e )
+#define DEBUG(...)
#endif
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION ""
oldstyle is set to false (and then the
command-line client isn't used, gna gna) */
char* modern_listen; /**< listenaddr value for modernsock */
+char* modernport=NBD_DEFAULT_PORT; /**< Port number on which to listen for
+ new-style nbd-client connections */
/**
* Types of virtuatlization
} PARAM;
/**
+ * Translate a command name into human readable form
+ *
+ * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
+ * @return pointer to the command name
+ **/
+static inline const char * getcommandname(uint64_t command) {
+ switch (command) {
+ case NBD_CMD_READ:
+ return "NBD_CMD_READ";
+ case NBD_CMD_WRITE:
+ return "NBD_CMD_WRITE";
+ case NBD_CMD_DISC:
+ return "NBD_CMD_DISC";
+ case NBD_CMD_FLUSH:
+ return "NBD_CMD_FLUSH";
+ default:
+ break;
+ }
+ return "UNKNOWN";
+}
+
+/**
* Check whether a client is allowed to connect. Works with an authorization
* file which contains one line per machine, no wildcards.
*
}
/**
+ * Consume data from an FD that we don't want
+ *
+ * @param f a file descriptor
+ * @param buf a buffer
+ * @param len the number of bytes to consume
+ * @param bufsiz the size of the buffer
+ **/
+static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
+ size_t curlen;
+ while (len>0) {
+ curlen = (len>bufsiz)?bufsiz:len;
+ readit(f, buf, curlen);
+ len -= curlen;
+ }
+}
+
+
+/**
* Write data from a buffer into a filedescriptor
*
* @param f a file descriptor
"\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
"\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
"\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
- "\tif port is set to 0, stdin is used (for running from inetd)\n"
+ "\tif port is set to 0, stdin is used (for running from inetd).\n"
"\tif file_to_export contains '%%s', it is substituted with the IP\n"
"\t\taddress of the machine trying to connect\n"
"\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
{ "group", FALSE, PARAM_STRING, &rungroup, 0 },
{ "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
{ "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
+ { "port", FALSE, PARAM_STRING, &modernport, 0 },
};
PARAM* p=gp;
int p_size=sizeof(gp)/sizeof(PARAM);
}
break;
}
- if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
- g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
+ if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, modernport)) {
+ g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies new-style port for oldstyle export");
g_key_file_free(cfile);
return NULL;
}
if(!i) {
msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
} else {
- DEBUG2("Removing %d from the list of children", pid);
+ DEBUG("Removing %d from the list of children", pid);
g_hash_table_remove(children, &pid);
}
}
if (es > ((off_t)0)) {
return es;
} else {
- DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
+ DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
}
err("Could not find size of exported block device: %m");
* @param buf The buffer to write from
* @param len The length of buf
* @param client The client we're serving for
+ * @param fua Flag to indicate 'Force Unit Access'
* @return The number of bytes actually written, or -1 in case of an error
**/
ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua);
+ DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
myseek(fhandle, foffset);
retval = write(fhandle, buf, len);
if(client->server->flags & F_SYNC) {
fsync(fhandle);
} else if (fua) {
-#ifdef USE_SYNC_FILE_RANGE
+
+ /* This is where we would do the following
+ * #ifdef USE_SYNC_FILE_RANGE
+ * However, we don't, for the reasons set out below
+ * by Christoph Hellwig <hch@infradead.org>
+ *
+ * [BEGINS]
+ * fdatasync is equivalent to fsync except that it does not flush
+ * non-essential metadata (basically just timestamps in practice), but it
+ * does flush metadata requried to find the data again, e.g. allocation
+ * information and extent maps. sync_file_range does nothing but flush
+ * out pagecache content - it means you basically won't get your data
+ * back in case of a crash if you either:
+ *
+ * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
+ * b) are using a sparse file on a filesystem
+ * c) are using a fallocate-preallocated file on a filesystem
+ * d) use any file on a COW filesystem like btrfs
+ *
+ * e.g. it only does anything useful for you if you do not have a volatile
+ * write cache, and either use a raw block device node, or just overwrite
+ * an already fully allocated (and not preallocated) file on a non-COW
+ * filesystem.
+ * [ENDS]
+ *
+ * What we should do is open a second FD with O_DSYNC set, then write to
+ * that when appropriate. However, with a Linux client, every REQ_FUA
+ * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
+ * problems.
+ *
+ */
+#if 0
sync_file_range(fhandle, foffset, len,
SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
SYNC_FILE_RANGE_WAIT_AFTER);
/**
* Call rawexpwrite repeatedly until all data has been written.
+ *
+ * @param a The offset where the write should start
+ * @param buf The buffer to write from
+ * @param len The length of buf
+ * @param client The client we're serving for
+ * @param fua Flag to indicate 'Force Unit Access'
* @return 0 on success, nonzero on failure
**/
int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
+ DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
myseek(fhandle, foffset);
return read(fhandle, buf, len);
if (!(client->server->flags & F_COPYONWRITE))
return(rawexpread_fully(a, buf, len, client));
- DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
+ DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
len : (size_t)DIFFPAGESIZE-offset;
if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
- DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+ DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt]));
myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
if (read(client->difffile, buf, rdlen) != rdlen) return -1;
} else { /* the block is not there */
- DEBUG2("Page %llu is not here, we read the original one\n",
+ DEBUG("Page %llu is not here, we read the original one\n",
(unsigned long long)mapcnt);
if(rawexpread_fully(a, buf, rdlen, client)) return -1;
}
* @param buf The buffer to write from
* @param len The length of buf
* @param client The client we're going to write for.
+ * @param fua Flag to indicate 'Force Unit Access'
* @return 0 on success, nonzero on failure
**/
int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
if (!(client->server->flags & F_COPYONWRITE))
return(rawexpwrite_fully(a, buf, len, client, fua));
- DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
+ DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
len : (size_t)DIFFPAGESIZE-offset;
if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
- DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+ DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt])) ;
myseek(client->difffile,
client->difmap[mapcnt]*DIFFPAGESIZE+offset);
} else { /* the block is not there */
myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
- DEBUG3("Page %llu is not here, we put it at %lu\n",
+ DEBUG("Page %llu is not here, we put it at %lu\n",
(unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt]));
rdlen=DIFFPAGESIZE ;
return 0;
}
+/**
+ * Flush data to a client
+ *
+ * @param client The client we're going to write for.
+ * @return 0 on success, nonzero on failure
+ **/
int expflush(CLIENT *client) {
- int fhandle;
- off_t foffset;
- size_t maxbytes;
gint i;
if (client->server->flags & F_COPYONWRITE) {
request.from = ntohll(request.from);
request.type = ntohl(request.type);
command = request.type & NBD_CMD_MASK_COMMAND;
-
- if (command==NBD_CMD_DISC) {
- msg2(LOG_INFO, "Disconnect request received.");
- if (client->server->flags & F_COPYONWRITE) {
- if (client->difmap) g_free(client->difmap) ;
- close(client->difffile);
- unlink(client->difffilename);
- free(client->difffilename);
- }
- go_on=FALSE;
- continue;
- }
-
len = ntohl(request.len);
+ DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
+ (unsigned long long)request.from,
+ (unsigned long long)request.from / 512, (unsigned int)len);
+
if (request.magic != htonl(NBD_REQUEST_MAGIC))
err("Not enough magic.");
- if (len > BUFSIZE - sizeof(struct nbd_reply)) {
- currlen = BUFSIZE - sizeof(struct nbd_reply);
- msg2(LOG_INFO, "oversized request (this is not a problem)");
- } else {
- currlen = len;
- }
-#ifdef DODBG
- printf("%s from %llu (%llu) len %d, ", command ? "WRITE" :
- "READ", (unsigned long long)request.from,
- (unsigned long long)request.from / 512, len);
-#endif
+
memcpy(reply.handle, request.handle, sizeof(reply.handle));
if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
ERROR(client, reply, EINVAL);
continue;
}
+
+ currlen = len;
+ if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
+ currlen = BUFSIZE - sizeof(struct nbd_reply);
+ msg2(LOG_INFO, "oversized request (this is not a problem)");
+ }
}
- if (command==NBD_CMD_WRITE) {
+ switch (command) {
+
+ case NBD_CMD_DISC:
+ msg2(LOG_INFO, "Disconnect request received.");
+ if (client->server->flags & F_COPYONWRITE) {
+ if (client->difmap) g_free(client->difmap) ;
+ close(client->difffile);
+ unlink(client->difffilename);
+ free(client->difffilename);
+ }
+ go_on=FALSE;
+ continue;
+
+ case NBD_CMD_WRITE:
DEBUG("wr: net->buf, ");
while(len > 0) {
readit(client->net, buf, currlen);
(client->server->flags & F_AUTOREADONLY)) {
DEBUG("[WRITE to READONLY!]");
ERROR(client, reply, EPERM);
+ consume(client->net, buf, len-currlen, BUFSIZE);
continue;
}
- if (expwrite(request.from, buf, len, client,
+ if (expwrite(request.from, buf, currlen, client,
request.type & NBD_CMD_FLAG_FUA)) {
DEBUG("Write failed: %m" );
ERROR(client, reply, errno);
+ consume(client->net, buf, len-currlen, BUFSIZE);
continue;
}
- SEND(client->net, reply);
- DEBUG("OK!\n");
len -= currlen;
+ request.from += currlen;
currlen = (len < BUFSIZE) ? len : BUFSIZE;
}
+ SEND(client->net, reply);
+ DEBUG("OK!\n");
continue;
- }
- if (command==NBD_CMD_FLUSH) {
+ case NBD_CMD_FLUSH:
DEBUG("fl: ");
if (expflush(client)) {
DEBUG("Flush failed: %m");
SEND(client->net, reply);
DEBUG("OK!\n");
continue;
- }
- if (command==NBD_CMD_READ) {
+ case NBD_CMD_READ:
DEBUG("exp->buf, ");
memcpy(buf, &reply, sizeof(struct nbd_reply));
if (client->transactionlogfd != -1)
}
DEBUG("OK!\n");
continue;
- }
- DEBUG ("Ignoring unknown command\n");
+ default:
+ DEBUG ("Ignoring unknown command\n");
+ continue;
+ }
}
return 0;
}
} else {
tmpname=g_strdup(client->exportname);
}
- DEBUG2( "Opening %s\n", tmpname );
+ DEBUG( "Opening %s\n", tmpname );
fi.fhandle = open(tmpname, mode);
if(fi.fhandle == -1 && mode == O_RDWR) {
/* Try again because maybe media was read-only */
hints.ai_socktype = SOCK_STREAM;
hints.ai_family = AF_UNSPEC;
hints.ai_protocol = IPPROTO_TCP;
- e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
+ e = getaddrinfo(modern_listen, modernport, &hints, &ai);
if(e != 0) {
fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
exit(EXIT_FAILURE);