/* Debugging macros */
//#define DODBG
#ifdef DODBG
-#define DEBUG( a ) printf( a )
-#define DEBUG2( a,b ) printf( a,b )
-#define DEBUG3( a,b,c ) printf( a,b,c )
-#define DEBUG4( a,b,c,d ) printf( a,b,c,d )
-#define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e )
+#define DEBUG(...) printf(__VA_ARGS__)
#else
-#define DEBUG( a )
-#define DEBUG2( a,b )
-#define DEBUG3( a,b,c )
-#define DEBUG4( a,b,c,d )
-#define DEBUG5( a,b,c,d,e )
+#define DEBUG(...)
#endif
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION ""
is PARAM_BOOL. */
} PARAM;
+static inline const char * getcommandname(uint64_t command) {
+ switch (command) {
+ case NBD_CMD_READ:
+ return "NBD_CMD_READ";
+ case NBD_CMD_WRITE:
+ return "NBD_CMD_WRITE";
+ case NBD_CMD_DISC:
+ return "NBD_CMD_DISC";
+ case NBD_CMD_FLUSH:
+ return "NBD_CMD_FLUSH";
+ default:
+ break;
+ }
+ return "UNKNOWN";
+}
+
/**
* Check whether a client is allowed to connect. Works with an authorization
* file which contains one line per machine, no wildcards.
if(!i) {
msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
} else {
- DEBUG2("Removing %d from the list of children", pid);
+ DEBUG("Removing %d from the list of children", pid);
g_hash_table_remove(children, &pid);
}
}
if (es > ((off_t)0)) {
return es;
} else {
- DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
+ DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
}
err("Could not find size of exported block device: %m");
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua);
+ DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
myseek(fhandle, foffset);
retval = write(fhandle, buf, len);
if(client->server->flags & F_SYNC) {
fsync(fhandle);
} else if (fua) {
-#ifdef USE_SYNC_FILE_RANGE
+
+ /* This is where we would do the following
+ * #ifdef USE_SYNC_FILE_RANGE
+ * However, we don't, for the reasons set out below
+ * by Christoph Hellwig <hch@infradead.org>
+ *
+ * [BEGINS]
+ * fdatasync is equivalent to fsync except that it does not flush
+ * non-essential metadata (basically just timestamps in practice), but it
+ * does flush metadata requried to find the data again, e.g. allocation
+ * information and extent maps. sync_file_range does nothing but flush
+ * out pagecache content - it means you basically won't get your data
+ * back in case of a crash if you either:
+ *
+ * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
+ * b) are using a sparse file on a filesystem
+ * c) are using a fallocate-preallocated file on a filesystem
+ * d) use any file on a COW filesystem like btrfs
+ *
+ * e.g. it only does anything useful for you if you do not have a volatile
+ * write cache, and either use a raw block device node, or just overwrite
+ * an already fully allocated (and not preallocated) file on a non-COW
+ * filesystem.
+ * [ENDS]
+ *
+ * What we should do is open a second FD with O_DSYNC set, then write to
+ * that when appropriate. However, with a Linux client, every REQ_FUA
+ * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
+ * problems.
+ *
+ */
+#if 0
sync_file_range(fhandle, foffset, len,
SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
SYNC_FILE_RANGE_WAIT_AFTER);
if(maxbytes && len > maxbytes)
len = maxbytes;
- DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
+ DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
myseek(fhandle, foffset);
return read(fhandle, buf, len);
if (!(client->server->flags & F_COPYONWRITE))
return(rawexpread_fully(a, buf, len, client));
- DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
+ DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
len : (size_t)DIFFPAGESIZE-offset;
if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
- DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+ DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt]));
myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
if (read(client->difffile, buf, rdlen) != rdlen) return -1;
} else { /* the block is not there */
- DEBUG2("Page %llu is not here, we read the original one\n",
+ DEBUG("Page %llu is not here, we read the original one\n",
(unsigned long long)mapcnt);
if(rawexpread_fully(a, buf, rdlen, client)) return -1;
}
if (!(client->server->flags & F_COPYONWRITE))
return(rawexpwrite_fully(a, buf, len, client, fua));
- DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
+ DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
len : (size_t)DIFFPAGESIZE-offset;
if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
- DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+ DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt])) ;
myseek(client->difffile,
client->difmap[mapcnt]*DIFFPAGESIZE+offset);
} else { /* the block is not there */
myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
- DEBUG3("Page %llu is not here, we put it at %lu\n",
+ DEBUG("Page %llu is not here, we put it at %lu\n",
(unsigned long long)mapcnt,
(unsigned long)(client->difmap[mapcnt]));
rdlen=DIFFPAGESIZE ;
}
int expflush(CLIENT *client) {
- int fhandle;
- off_t foffset;
- size_t maxbytes;
gint i;
if (client->server->flags & F_COPYONWRITE) {
request.from = ntohll(request.from);
request.type = ntohl(request.type);
command = request.type & NBD_CMD_MASK_COMMAND;
-
- if (command==NBD_CMD_DISC) {
- msg2(LOG_INFO, "Disconnect request received.");
- if (client->server->flags & F_COPYONWRITE) {
- if (client->difmap) g_free(client->difmap) ;
- close(client->difffile);
- unlink(client->difffilename);
- free(client->difffilename);
- }
- go_on=FALSE;
- continue;
- }
-
len = ntohl(request.len);
+ DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
+ (unsigned long long)request.from,
+ (unsigned long long)request.from / 512, (unsigned int)len);
+
if (request.magic != htonl(NBD_REQUEST_MAGIC))
err("Not enough magic.");
- if (len > BUFSIZE - sizeof(struct nbd_reply)) {
- currlen = BUFSIZE - sizeof(struct nbd_reply);
- msg2(LOG_INFO, "oversized request (this is not a problem)");
- } else {
- currlen = len;
- }
-#ifdef DODBG
- printf("%s from %llu (%llu) len %d, ", command ? "WRITE" :
- "READ", (unsigned long long)request.from,
- (unsigned long long)request.from / 512, len);
-#endif
+
memcpy(reply.handle, request.handle, sizeof(reply.handle));
if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
ERROR(client, reply, EINVAL);
continue;
}
+
+ currlen = len;
+ if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
+ currlen = BUFSIZE - sizeof(struct nbd_reply);
+ msg2(LOG_INFO, "oversized request (this is not a problem)");
+ }
}
- if (command==NBD_CMD_WRITE) {
+ switch (command) {
+
+ case NBD_CMD_DISC:
+ msg2(LOG_INFO, "Disconnect request received.");
+ if (client->server->flags & F_COPYONWRITE) {
+ if (client->difmap) g_free(client->difmap) ;
+ close(client->difffile);
+ unlink(client->difffilename);
+ free(client->difffilename);
+ }
+ go_on=FALSE;
+ continue;
+
+ case NBD_CMD_WRITE:
DEBUG("wr: net->buf, ");
while(len > 0) {
readit(client->net, buf, currlen);
ERROR(client, reply, errno);
continue;
}
- SEND(client->net, reply);
- DEBUG("OK!\n");
len -= currlen;
currlen = (len < BUFSIZE) ? len : BUFSIZE;
}
+ SEND(client->net, reply);
+ DEBUG("OK!\n");
continue;
- }
- if (command==NBD_CMD_FLUSH) {
+ case NBD_CMD_FLUSH:
DEBUG("fl: ");
if (expflush(client)) {
DEBUG("Flush failed: %m");
SEND(client->net, reply);
DEBUG("OK!\n");
continue;
- }
- if (command==NBD_CMD_READ) {
+ case NBD_CMD_READ:
DEBUG("exp->buf, ");
memcpy(buf, &reply, sizeof(struct nbd_reply));
if (client->transactionlogfd != -1)
}
DEBUG("OK!\n");
continue;
- }
- DEBUG ("Ignoring unknown command\n");
+ default:
+ DEBUG ("Ignoring unknown command\n");
+ continue;
+ }
}
return 0;
}
} else {
tmpname=g_strdup(client->exportname);
}
- DEBUG2( "Opening %s\n", tmpname );
+ DEBUG( "Opening %s\n", tmpname );
fi.fhandle = open(tmpname, mode);
if(fi.fhandle == -1 && mode == O_RDWR) {
/* Try again because maybe media was read-only */