Add missing break
[nbd.git] / nbd-server.c
index 52f777d..aea5d2f 100644 (file)
 #define MY_NAME "nbd_server"
 #include "cliserv.h"
 
+#ifdef WITH_SDP
+#include <sdp_inet.h>
+#endif
+
 /** Default position of the config file */
 #ifndef SYSCONFDIR
 #define SYSCONFDIR "/etc"
@@ -109,6 +113,11 @@ gchar* config_file_pos;
 gchar* runuser=NULL;
 /** What group we're running as */
 gchar* rungroup=NULL;
+/** whether to export using the old negotiation protocol (port-based) */
+gboolean do_oldstyle=FALSE;
+
+/* Whether we should avoid forking */
+int dontfork = 0;
 
 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
 #ifdef ISSERVER
@@ -124,15 +133,9 @@ gchar* rungroup=NULL;
 /* Debugging macros */
 //#define DODBG
 #ifdef DODBG
-#define DEBUG( a ) printf( a )
-#define DEBUG2( a,b ) printf( a,b )
-#define DEBUG3( a,b,c ) printf( a,b,c )
-#define DEBUG4( a,b,c,d ) printf( a,b,c,d )
+#define DEBUG(...) printf(__VA_ARGS__)
 #else
-#define DEBUG( a )
-#define DEBUG2( a,b ) 
-#define DEBUG3( a,b,c ) 
-#define DEBUG4( a,b,c,d ) 
+#define DEBUG(...)
 #endif
 #ifndef PACKAGE_VERSION
 #define PACKAGE_VERSION ""
@@ -144,7 +147,7 @@ gchar* rungroup=NULL;
 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
 #define LINELEN 256      /**< Size of static buffer used to read the
                               authorization file (yuck) */
-#define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
+#define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
 #define F_READONLY 1      /**< flag to tell us a file is readonly */
 #define F_MULTIFILE 2    /**< flag to tell us a file is exported using -m */
@@ -154,11 +157,27 @@ gchar* rungroup=NULL;
 #define F_SPARSE 16      /**< flag to tell us copyronwrite should use a sparse file */
 #define F_SDP 32         /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
 #define F_SYNC 64        /**< Whether to fsync() after a write */
+#define F_FLUSH 128      /**< Whether server wants FLUSH to be sent by the client */
+#define F_FUA 256        /**< Whether server wants FUA to be sent by the client */
+#define F_ROTATIONAL 512  /**< Whether server wants the client to implement the elevator algorithm */
 GHashTable *children;
 char pidfname[256]; /**< name of our PID file */
 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
 
+#define NEG_INIT       (1 << 0)
+#define NEG_OLD                (1 << 1)
+#define NEG_MODERN     (1 << 2)
+
+int modernsock=0;        /**< Socket for the modern handler. Not used
+                              if a client was only specified on the
+                              command line; only port used if
+                              oldstyle is set to false (and then the
+                              command-line client isn't used, gna gna) */
+char* modern_listen;     /**< listenaddr value for modernsock */
+char* modernport=NBD_DEFAULT_PORT; /**< Port number on which to listen for
+                                     new-style nbd-client connections */
+
 /**
  * Types of virtuatlization
  **/
@@ -190,6 +209,9 @@ typedef struct {
                                  but before starting to serve */
        gchar* postrun;      /**< command that will be ran after the client
                                  disconnects */
+       gchar* servename;    /**< name of the export as selected by nbd-client */
+       int max_connections; /**< maximum number of opened connections */
+       gchar* transactionlog;/**< filename for transaction log */
 } SERVER;
 
 /**
@@ -215,6 +237,8 @@ typedef struct {
                               make -m and -c mutually exclusive */
        u32 difffilelen;     /**< number of pages in difffile */
        u32 *difmap;         /**< see comment on the global difmap for this one */
+       gboolean modern;     /**< client was negotiated using modern negotiation protocol */
+       int transactionlogfd;/**< fd for transaction log */
 } CLIENT;
 
 /**
@@ -244,6 +268,28 @@ typedef struct {
 } PARAM;
 
 /**
+ * Translate a command name into human readable form
+ *
+ * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
+ * @return pointer to the command name
+ **/
+static inline const char * getcommandname(uint64_t command) {
+       switch (command) {
+       case NBD_CMD_READ:
+               return "NBD_CMD_READ";
+       case NBD_CMD_WRITE:
+               return "NBD_CMD_WRITE";
+       case NBD_CMD_DISC:
+               return "NBD_CMD_DISC";
+       case NBD_CMD_FLUSH:
+               return "NBD_CMD_FLUSH";
+       default:
+               break;
+       }
+       return "UNKNOWN";
+}
+
+/**
  * Check whether a client is allowed to connect. Works with an authorization
  * file which contains one line per machine, no wildcards.
  *
@@ -274,7 +320,7 @@ int authorized_client(CLIENT *opts) {
                                return 0;
                        }
                        *(tmp++)=0;
-                       if(inet_aton(line,&addr)) {
+                       if(!inet_aton(line,&addr)) {
                                msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
                                return 0;
                        }
@@ -304,7 +350,7 @@ int authorized_client(CLIENT *opts) {
  * @param buf a buffer
  * @param len the number of bytes to be read
  **/
-inline void readit(int f, void *buf, size_t len) {
+static inline void readit(int f, void *buf, size_t len) {
        ssize_t res;
        while (len > 0) {
                DEBUG("*");
@@ -320,13 +366,31 @@ inline void readit(int f, void *buf, size_t len) {
 }
 
 /**
+ * Consume data from an FD that we don't want
+ *
+ * @param f a file descriptor
+ * @param buf a buffer
+ * @param len the number of bytes to consume
+ * @param bufsiz the size of the buffer
+ **/
+static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
+       size_t curlen;
+       while (len>0) {
+               curlen = (len>bufsiz)?bufsiz:len;
+               readit(f, buf, curlen);
+               len -= curlen;
+       }
+}
+
+
+/**
  * Write data from a buffer into a filedescriptor
  *
  * @param f a file descriptor
  * @param buf a buffer containing data
  * @param len the number of bytes to be written
  **/
-inline void writeit(int f, void *buf, size_t len) {
+static inline void writeit(int f, void *buf, size_t len) {
        ssize_t res;
        while (len > 0) {
                DEBUG("+");
@@ -343,15 +407,16 @@ inline void writeit(int f, void *buf, size_t len) {
  */
 void usage() {
        printf("This is nbd-server version " VERSION "\n");
-       printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name]\n"
+       printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
               "\t-r|--read-only\t\tread only\n"
               "\t-m|--multi-file\t\tmultiple file\n"
               "\t-c|--copy-on-write\tcopy on write\n"
               "\t-C|--config-file\tspecify an alternate configuration file\n"
               "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
               "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
-              "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
-              "\tif port is set to 0, stdin is used (for running from inetd)\n"
+              "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
+              "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
+              "\tif port is set to 0, stdin is used (for running from inetd).\n"
               "\tif file_to_export contains '%%s', it is substituted with the IP\n"
               "\t\taddress of the machine trying to connect\n" 
               "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
@@ -396,10 +461,12 @@ SERVER* cmdline(int argc, char *argv[]) {
                {"read-only", no_argument, NULL, 'r'},
                {"multi-file", no_argument, NULL, 'm'},
                {"copy-on-write", no_argument, NULL, 'c'},
+               {"dont-fork", no_argument, NULL, 'd'},
                {"authorize-file", required_argument, NULL, 'l'},
                {"config-file", required_argument, NULL, 'C'},
                {"pid-file", required_argument, NULL, 'p'},
                {"output-config", required_argument, NULL, 'o'},
+               {"max-connection", required_argument, NULL, 'M'},
                {0,0,0,0}
        };
        SERVER *serve;
@@ -416,7 +483,7 @@ SERVER* cmdline(int argc, char *argv[]) {
        serve=g_new0(SERVER, 1);
        serve->authname = g_strdup(default_authname);
        serve->virtstyle=VIRT_IPLIT;
-       while((c=getopt_long(argc, argv, "-C:cl:mo:rp:", long_options, &i))>=0) {
+       while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
                switch (c) {
                case 1:
                        /* non-option argument */
@@ -485,6 +552,9 @@ SERVER* cmdline(int argc, char *argv[]) {
                case 'c': 
                        serve->flags |=F_COPYONWRITE;
                        break;
+               case 'd': 
+                       dontfork = 1;
+                       break;
                case 'C':
                        g_free(config_file_pos);
                        config_file_pos=g_strdup(optarg);
@@ -493,6 +563,9 @@ SERVER* cmdline(int argc, char *argv[]) {
                        g_free(serve->authname);
                        serve->authname=g_strdup(optarg);
                        break;
+               case 'M':
+                       serve->max_connections = strtol(optarg, NULL, 0);
+                       break;
                default:
                        usage();
                        exit(EXIT_FAILURE);
@@ -504,6 +577,8 @@ SERVER* cmdline(int argc, char *argv[]) {
        if(nonspecial<2) {
                g_free(serve);
                serve=NULL;
+       } else {
+               do_oldstyle = TRUE;
        }
        if(do_output) {
                if(!serve) {
@@ -525,8 +600,10 @@ typedef enum {
        CFILE_VALUE_INVALID,    /**< A value is syntactically invalid */
        CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
        CFILE_PROGERR,          /**< Programmer error */
-       CFILE_NO_EXPORTS        /**< A config file was specified that does not
+       CFILE_NO_EXPORTS,       /**< A config file was specified that does not
                                     define any exports */
+       CFILE_INCORRECT_PORT,   /**< The reserved port was specified for an
+                                    old-style export. */
 } CFILE_ERRORS;
 
 /**
@@ -545,6 +622,8 @@ void remove_server(gpointer s) {
                g_free(server->prerun);
        if(server->postrun)
                g_free(server->postrun);
+       if(server->transactionlog)
+               g_free(server->transactionlog);
        g_free(server);
 }
 
@@ -557,33 +636,42 @@ SERVER* dup_serve(SERVER *s) {
        SERVER *serve = NULL;
 
        serve=g_new0(SERVER, 1);
-       if (serve == NULL)
+       if(serve == NULL)
                return NULL;
 
-       if (s->exportname)
+       if(s->exportname)
                serve->exportname = g_strdup(s->exportname);
 
        serve->expected_size = s->expected_size;
 
-       if (s->listenaddr)
+       if(s->listenaddr)
                serve->listenaddr = g_strdup(s->listenaddr);
 
        serve->port = s->port;
 
-       if (s->authname)
+       if(s->authname)
                serve->authname = strdup(s->authname);
 
        serve->flags = s->flags;
-       serve->socket = serve->socket;
-       serve->socket_family = serve->socket_family;
+       serve->socket = s->socket;
+       serve->socket_family = s->socket_family;
+       serve->virtstyle = s->virtstyle;
        serve->cidrlen = s->cidrlen;
 
-       if (s->prerun)
+       if(s->prerun)
                serve->prerun = g_strdup(s->prerun);
 
-       if (s->postrun)
+       if(s->postrun)
                serve->postrun = g_strdup(s->postrun);
 
+       if(s->transactionlog)
+               serve->transactionlog = g_strdup(s->transactionlog);
+       
+       if(s->servename)
+               serve->servename = g_strdup(s->servename);
+
+       serve->max_connections = s->max_connections;
+
        return serve;
 }
 
@@ -593,8 +681,7 @@ SERVER* dup_serve(SERVER *s) {
  * @param a server array
  * @return 0 success, -1 error
  */
-int append_serve(SERVER *s, GArray *a)
-{
+int append_serve(SERVER *s, GArray *a) {
        SERVER *ns = NULL;
        struct addrinfo hints;
        struct addrinfo *ai = NULL;
@@ -670,25 +757,33 @@ GArray* parse_cfile(gchar* f, GError** e) {
        SERVER s;
        gchar *virtstyle=NULL;
        PARAM lp[] = {
-               { "exportname", TRUE,   PARAM_STRING,   NULL, 0 },
-               { "port",       TRUE,   PARAM_INT,      NULL, 0 },
-               { "authfile",   FALSE,  PARAM_STRING,   NULL, 0 },
-               { "filesize",   FALSE,  PARAM_INT,      NULL, 0 },
-               { "virtstyle",  FALSE,  PARAM_STRING,   NULL, 0 },
-               { "prerun",     FALSE,  PARAM_STRING,   NULL, 0 },
-               { "postrun",    FALSE,  PARAM_STRING,   NULL, 0 },
-               { "readonly",   FALSE,  PARAM_BOOL,     NULL, F_READONLY },
-               { "multifile",  FALSE,  PARAM_BOOL,     NULL, F_MULTIFILE },
-               { "copyonwrite", FALSE, PARAM_BOOL,     NULL, F_COPYONWRITE },
-               { "sparse_cow", FALSE,  PARAM_BOOL,     NULL, F_SPARSE },
-               { "sdp",        FALSE,  PARAM_BOOL,     NULL, F_SDP },
-               { "sync",       FALSE,  PARAM_BOOL,     NULL, F_SYNC },
-               { "listenaddr", FALSE,  PARAM_STRING,   NULL, 0 },
+               { "exportname", TRUE,   PARAM_STRING,   &(s.exportname),        0 },
+               { "port",       TRUE,   PARAM_INT,      &(s.port),              0 },
+               { "authfile",   FALSE,  PARAM_STRING,   &(s.authname),          0 },
+               { "filesize",   FALSE,  PARAM_INT,      &(s.expected_size),     0 },
+               { "virtstyle",  FALSE,  PARAM_STRING,   &(virtstyle),           0 },
+               { "prerun",     FALSE,  PARAM_STRING,   &(s.prerun),            0 },
+               { "postrun",    FALSE,  PARAM_STRING,   &(s.postrun),           0 },
+               { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog),   0 },
+               { "readonly",   FALSE,  PARAM_BOOL,     &(s.flags),             F_READONLY },
+               { "multifile",  FALSE,  PARAM_BOOL,     &(s.flags),             F_MULTIFILE },
+               { "copyonwrite", FALSE, PARAM_BOOL,     &(s.flags),             F_COPYONWRITE },
+               { "sparse_cow", FALSE,  PARAM_BOOL,     &(s.flags),             F_SPARSE },
+               { "sdp",        FALSE,  PARAM_BOOL,     &(s.flags),             F_SDP },
+               { "sync",       FALSE,  PARAM_BOOL,     &(s.flags),             F_SYNC },
+               { "flush",      FALSE,  PARAM_BOOL,     &(s.flags),             F_FLUSH },
+               { "fua",        FALSE,  PARAM_BOOL,     &(s.flags),             F_FUA },
+               { "rotational", FALSE,  PARAM_BOOL,     &(s.flags),             F_ROTATIONAL },
+               { "listenaddr", FALSE,  PARAM_STRING,   &(s.listenaddr),        0 },
+               { "maxconnections", FALSE, PARAM_INT,   &(s.max_connections),   0 },
        };
        const int lp_size=sizeof(lp)/sizeof(PARAM);
        PARAM gp[] = {
                { "user",       FALSE, PARAM_STRING,    &runuser,       0 },
                { "group",      FALSE, PARAM_STRING,    &rungroup,      0 },
+               { "oldstyle",   FALSE, PARAM_BOOL,      &do_oldstyle,   1 },
+               { "listenaddr", FALSE, PARAM_STRING,    &modern_listen, 0 },
+               { "port",       FALSE, PARAM_STRING,    &modernport,    0 },
        };
        PARAM* p=gp;
        int p_size=sizeof(gp)/sizeof(PARAM);
@@ -698,7 +793,9 @@ GArray* parse_cfile(gchar* f, GError** e) {
        GQuark errdomain;
        GArray *retval=NULL;
        gchar **groups;
-       gboolean value;
+       gboolean bval;
+       gint ival;
+       gchar* sval;
        gchar* startgroup;
        gint i;
        gint j;
@@ -721,17 +818,6 @@ GArray* parse_cfile(gchar* f, GError** e) {
        groups = g_key_file_get_groups(cfile, NULL);
        for(i=0;groups[i];i++) {
                memset(&s, '\0', sizeof(SERVER));
-               lp[0].target=&(s.exportname);
-               lp[1].target=&(s.port);
-               lp[2].target=&(s.authname);
-               lp[3].target=&(s.expected_size);
-               lp[4].target=&(virtstyle);
-               lp[5].target=&(s.prerun);
-               lp[6].target=&(s.postrun);
-               lp[7].target=lp[8].target=lp[9].target=
-                               lp[10].target=lp[11].target=
-                               lp[12].target=&(s.flags);
-               lp[13].target=&(s.listenaddr);
 
                /* After the [generic] group, start parsing exports */
                if(i==1) {
@@ -743,25 +829,29 @@ GArray* parse_cfile(gchar* f, GError** e) {
                        g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
                        switch(p[j].ptype) {
                                case PARAM_INT:
-                                       *((gint*)p[j].target) =
-                                               g_key_file_get_integer(cfile,
+                                       ival = g_key_file_get_integer(cfile,
                                                                groups[i],
                                                                p[j].paramname,
                                                                &err);
+                                       if(!err) {
+                                               *((gint*)p[j].target) = ival;
+                                       }
                                        break;
                                case PARAM_STRING:
-                                       *((gchar**)p[j].target) =
-                                               g_key_file_get_string(cfile,
+                                       sval = g_key_file_get_string(cfile,
                                                                groups[i],
                                                                p[j].paramname,
                                                                &err);
+                                       if(!err) {
+                                               *((gchar**)p[j].target) = sval;
+                                       }
                                        break;
                                case PARAM_BOOL:
-                                       value = g_key_file_get_boolean(cfile,
+                                       bval = g_key_file_get_boolean(cfile,
                                                        groups[i],
                                                        p[j].paramname, &err);
                                        if(!err) {
-                                               if(value) {
+                                               if(bval) {
                                                        *((gint*)p[j].target) |= p[j].flagval;
                                                } else {
                                                        *((gint*)p[j].target) &= ~(p[j].flagval);
@@ -810,6 +900,10 @@ GArray* parse_cfile(gchar* f, GError** e) {
                                g_key_file_free(cfile);
                                return NULL;
                        }
+                       if(s.port && !do_oldstyle) {
+                               g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
+                               g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
+                       }
                } else {
                        s.virtstyle=VIRT_IPLIT;
                }
@@ -818,8 +912,13 @@ GArray* parse_cfile(gchar* f, GError** e) {
                /* Don't append values for the [generic] group */
                if(i>0) {
                        s.socket_family = AF_UNSPEC;
+                       s.servename = groups[i];
 
                        append_serve(&s, retval);
+               } else {
+                       if(!do_oldstyle) {
+                               lp[1].required = 0;
+                       }
                }
 #ifndef WITH_SDP
                if(s.flags & F_SDP) {
@@ -855,7 +954,7 @@ void sigchld_handler(int s) {
                if(!i) {
                        msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
                } else {
-                       DEBUG2("Removing %d from the list of children", pid);
+                       DEBUG("Removing %d from the list of children", pid);
                        g_hash_table_remove(children, &pid);
                }
        }
@@ -911,7 +1010,7 @@ off_t size_autodetect(int fhandle) {
 #ifdef HAVE_SYS_IOCTL_H
 #ifdef BLKGETSIZE64
        DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
-       if (!ioctl(fhandle, BLKGETSIZE64, bytes) && bytes) {
+       if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
                return (off_t)bytes;
        }
 #endif /* BLKGETSIZE64 */
@@ -933,7 +1032,7 @@ off_t size_autodetect(int fhandle) {
        if (es > ((off_t)0)) {
                return es;
         } else {
-                DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
+                DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
         }
 
        err("Could not find size of exported block device: %m");
@@ -1010,9 +1109,10 @@ void myseek(int handle,off_t a) {
  * @param buf The buffer to write from
  * @param len The length of buf
  * @param client The client we're serving for
+ * @param fua Flag to indicate 'Force Unit Access'
  * @return The number of bytes actually written, or -1 in case of an error
  **/
-ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
        int fhandle;
        off_t foffset;
        size_t maxbytes;
@@ -1023,24 +1123,69 @@ ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
        if(maxbytes && len > maxbytes)
                len = maxbytes;
 
-       DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len);
+       DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
 
        myseek(fhandle, foffset);
        retval = write(fhandle, buf, len);
        if(client->server->flags & F_SYNC) {
                fsync(fhandle);
+       } else if (fua) {
+
+         /* This is where we would do the following
+          *   #ifdef USE_SYNC_FILE_RANGE
+          * However, we don't, for the reasons set out below
+          * by Christoph Hellwig <hch@infradead.org>
+          *
+          * [BEGINS] 
+          * fdatasync is equivalent to fsync except that it does not flush
+          * non-essential metadata (basically just timestamps in practice), but it
+          * does flush metadata requried to find the data again, e.g. allocation
+          * information and extent maps.  sync_file_range does nothing but flush
+          * out pagecache content - it means you basically won't get your data
+          * back in case of a crash if you either:
+          * 
+          *  a) have a volatile write cache in your disk (e.g. any normal SATA disk)
+          *  b) are using a sparse file on a filesystem
+          *  c) are using a fallocate-preallocated file on a filesystem
+          *  d) use any file on a COW filesystem like btrfs
+          * 
+          * e.g. it only does anything useful for you if you do not have a volatile
+          * write cache, and either use a raw block device node, or just overwrite
+          * an already fully allocated (and not preallocated) file on a non-COW
+          * filesystem.
+          * [ENDS]
+          *
+          * What we should do is open a second FD with O_DSYNC set, then write to
+          * that when appropriate. However, with a Linux client, every REQ_FUA
+          * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
+          * problems.
+          *
+          */
+#if 0
+               sync_file_range(fhandle, foffset, len,
+                               SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
+                               SYNC_FILE_RANGE_WAIT_AFTER);
+#else
+               fdatasync(fhandle);
+#endif
        }
        return retval;
 }
 
 /**
  * Call rawexpwrite repeatedly until all data has been written.
+ *
+ * @param a The offset where the write should start
+ * @param buf The buffer to write from
+ * @param len The length of buf
+ * @param client The client we're serving for
+ * @param fua Flag to indicate 'Force Unit Access'
  * @return 0 on success, nonzero on failure
  **/
-int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
+int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
        ssize_t ret=0;
 
-       while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
+       while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
                a += ret;
                buf += ret;
                len -= ret;
@@ -1069,7 +1214,7 @@ ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
        if(maxbytes && len > maxbytes)
                len = maxbytes;
 
-       DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
+       DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
 
        myseek(fhandle, foffset);
        return read(fhandle, buf, len);
@@ -1106,7 +1251,7 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) {
 
        if (!(client->server->flags & F_COPYONWRITE))
                return(rawexpread_fully(a, buf, len, client));
-       DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
+       DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
 
        mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
 
@@ -1116,12 +1261,12 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) {
                rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
                        len : (size_t)DIFFPAGESIZE-offset;
                if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
-                       DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+                       DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
                               (unsigned long)(client->difmap[mapcnt]));
                        myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
                        if (read(client->difffile, buf, rdlen) != rdlen) return -1;
                } else { /* the block is not there */
-                       DEBUG2("Page %llu is not here, we read the original one\n",
+                       DEBUG("Page %llu is not here, we read the original one\n",
                               (unsigned long long)mapcnt);
                        if(rawexpread_fully(a, buf, rdlen, client)) return -1;
                }
@@ -1139,9 +1284,10 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) {
  * @param buf The buffer to write from
  * @param len The length of buf
  * @param client The client we're going to write for.
+ * @param fua Flag to indicate 'Force Unit Access'
  * @return 0 on success, nonzero on failure
  **/
-int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
+int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
        char pagebuf[DIFFPAGESIZE];
        off_t mapcnt,mapl,maph;
        off_t wrlen,rdlen; 
@@ -1149,8 +1295,8 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
        off_t offset;
 
        if (!(client->server->flags & F_COPYONWRITE))
-               return(rawexpwrite_fully(a, buf, len, client)); 
-       DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
+               return(rawexpwrite_fully(a, buf, len, client, fua)); 
+       DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
 
        mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
 
@@ -1161,7 +1307,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
                        len : (size_t)DIFFPAGESIZE-offset;
 
                if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
-                       DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
+                       DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
                               (unsigned long)(client->difmap[mapcnt])) ;
                        myseek(client->difffile,
                                        client->difmap[mapcnt]*DIFFPAGESIZE+offset);
@@ -1169,7 +1315,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
                } else { /* the block is not there */
                        myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
                        client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
-                       DEBUG3("Page %llu is not here, we put it at %lu\n",
+                       DEBUG("Page %llu is not here, we put it at %lu\n",
                               (unsigned long long)mapcnt,
                               (unsigned long)(client->difmap[mapcnt]));
                        rdlen=DIFFPAGESIZE ;
@@ -1182,6 +1328,36 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
                }                                                   
                len-=wrlen ; a+=wrlen ; buf+=wrlen ;
        }
+       if (client->server->flags & F_SYNC) {
+               fsync(client->difffile);
+       } else if (fua) {
+               /* open question: would it be cheaper to do multiple sync_file_ranges?
+                  as we iterate through the above?
+                */
+               fdatasync(client->difffile);
+       }
+       return 0;
+}
+
+/**
+ * Flush data to a client
+ *
+ * @param client The client we're going to write for.
+ * @return 0 on success, nonzero on failure
+ **/
+int expflush(CLIENT *client) {
+       gint i;
+
+        if (client->server->flags & F_COPYONWRITE) {
+               return fsync(client->difffile);
+       }
+       
+       for (i = 0; i < client->export->len; i++) {
+               FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
+               if (fsync(fi.fhandle) < 0)
+                       return -1;
+       }
+       
        return 0;
 }
 
@@ -1190,31 +1366,120 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
  *
  * @param client The client we're negotiating with.
  **/
-void negotiate(CLIENT *client) {
+CLIENT* negotiate(int net, CLIENT *client, GArray* servers, int phase) {
        char zeros[128];
-       u64 size_host;
-       u32 flags = NBD_FLAG_HAS_FLAGS;
+       uint64_t size_host;
+       uint32_t flags = NBD_FLAG_HAS_FLAGS;
+       uint16_t smallflags = 0;
+       uint64_t magic;
 
        memset(zeros, '\0', sizeof(zeros));
-       if (write(client->net, INIT_PASSWD, 8) < 0)
-               err("Negotiation failed: %m");
-       cliserv_magic = htonll(cliserv_magic);
-       if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
-               err("Negotiation failed: %m");
+       if(phase & NEG_INIT) {
+               /* common */
+               if (write(net, INIT_PASSWD, 8) < 0) {
+                       err_nonfatal("Negotiation failed: %m");
+                       if(client)
+                               exit(EXIT_FAILURE);
+               }
+               if(phase & NEG_MODERN) {
+                       /* modern */
+                       magic = htonll(opts_magic);
+               } else {
+                       /* oldstyle */
+                       magic = htonll(cliserv_magic);
+               }
+               if (write(net, &magic, sizeof(magic)) < 0) {
+                       err_nonfatal("Negotiation failed: %m");
+                       if(phase & NEG_OLD)
+                               exit(EXIT_FAILURE);
+               }
+       }
+       if ((phase & NEG_MODERN) && (phase & NEG_INIT)) {
+               /* modern */
+               uint32_t reserved;
+               uint32_t opt;
+               uint32_t namelen;
+               char* name;
+               int i;
+
+               if(!servers)
+                       err("programmer error");
+               if (write(net, &smallflags, sizeof(uint16_t)) < 0)
+                       err("Negotiation failed: %m");
+               if (read(net, &reserved, sizeof(reserved)) < 0)
+                       err("Negotiation failed: %m");
+               if (read(net, &magic, sizeof(magic)) < 0)
+                       err("Negotiation failed: %m");
+               magic = ntohll(magic);
+               if(magic != opts_magic) {
+                       close(net);
+                       return NULL;
+               }
+               if (read(net, &opt, sizeof(opt)) < 0)
+                       err("Negotiation failed: %m");
+               opt = ntohl(opt);
+               if(opt != NBD_OPT_EXPORT_NAME) {
+                       close(net);
+                       return NULL;
+               }
+               if (read(net, &namelen, sizeof(namelen)) < 0)
+                       err("Negotiation failed: %m");
+               namelen = ntohl(namelen);
+               name = malloc(namelen+1);
+               name[namelen]=0;
+               if (read(net, name, namelen) < 0)
+                       err("Negotiation failed: %m");
+               for(i=0; i<servers->len; i++) {
+                       SERVER* serve = &(g_array_index(servers, SERVER, i));
+                       if(!strcmp(serve->servename, name)) {
+                               CLIENT* client = g_new0(CLIENT, 1);
+                               client->server = serve;
+                               client->exportsize = OFFT_MAX;
+                               client->net = net;
+                               client->modern = TRUE;
+                               client->transactionlogfd = -1;
+                               free(name);
+                               return client;
+                       }
+               }
+               free(name);
+               return NULL;
+       }
+       /* common */
        size_host = htonll((u64)(client->exportsize));
-       if (write(client->net, &size_host, 8) < 0)
+       if (write(net, &size_host, 8) < 0)
                err("Negotiation failed: %m");
        if (client->server->flags & F_READONLY)
                flags |= NBD_FLAG_READ_ONLY;
-       flags = htonl(flags);
-       if (write(client->net, &flags, 4) < 0)
-               err("Negotiation failed: %m");
+       if (client->server->flags & F_FLUSH)
+               flags |= NBD_FLAG_SEND_FLUSH;
+       if (client->server->flags & F_FUA)
+               flags |= NBD_FLAG_SEND_FUA;
+       if (client->server->flags & F_ROTATIONAL)
+               flags |= NBD_FLAG_ROTATIONAL;
+       if (phase & NEG_OLD) {
+               /* oldstyle */
+               flags = htonl(flags);
+               if (write(client->net, &flags, 4) < 0)
+                       err("Negotiation failed: %m");
+       } else {
+               /* modern */
+               smallflags = (uint16_t)(flags & ~((uint16_t)0));
+               smallflags = htons(smallflags);
+               if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
+                       err("Negotiation failed: %m");
+               }
+       }
+       /* common */
        if (write(client->net, zeros, 124) < 0)
                err("Negotiation failed: %m");
+       return NULL;
 }
 
 /** sending macro. */
-#define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
+#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
+       if (client->transactionlogfd != -1) \
+               writeit(client->transactionlogfd, &reply, sizeof(reply)); }
 /** error macro. */
 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
 /**
@@ -1233,22 +1498,62 @@ int mainloop(CLIENT *client) {
 #ifdef DODBG
        int i = 0;
 #endif
-       negotiate(client);
+       negotiate(client->net, client, NULL, client->modern ? NEG_MODERN : (NEG_OLD | NEG_INIT));
        DEBUG("Entering request loop!\n");
        reply.magic = htonl(NBD_REPLY_MAGIC);
        reply.error = 0;
        while (go_on) {
                char buf[BUFSIZE];
+               char* p;
                size_t len;
+               size_t currlen;
+               size_t writelen;
+               uint16_t command;
 #ifdef DODBG
                i++;
                printf("%d: ", i);
 #endif
                readit(client->net, &request, sizeof(request));
+               if (client->transactionlogfd != -1)
+                       writeit(client->transactionlogfd, &request, sizeof(request));
+
                request.from = ntohll(request.from);
                request.type = ntohl(request.type);
+               command = request.type & NBD_CMD_MASK_COMMAND;
+               len = ntohl(request.len);
+
+               DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
+                               (unsigned long long)request.from,
+                               (unsigned long long)request.from / 512, (unsigned int)len);
+
+               if (request.magic != htonl(NBD_REQUEST_MAGIC))
+                       err("Not enough magic.");
 
-               if (request.type==NBD_CMD_DISC) {
+               memcpy(reply.handle, request.handle, sizeof(reply.handle));
+
+               if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
+                       if ((request.from + len) > (OFFT_MAX)) {
+                               DEBUG("[Number too large!]");
+                               ERROR(client, reply, EINVAL);
+                               continue;
+                       }
+
+                       if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
+                               DEBUG("[RANGE!]");
+                               ERROR(client, reply, EINVAL);
+                               continue;
+                       }
+
+                       currlen = len;
+                       if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
+                               currlen = BUFSIZE - sizeof(struct nbd_reply);
+                               msg2(LOG_INFO, "oversized request (this is not a problem)");
+                       }
+               }
+
+               switch (command) {
+
+               case NBD_CMD_DISC:
                        msg2(LOG_INFO, "Disconnect request received.");
                        if (client->server->flags & F_COPYONWRITE) { 
                                if (client->difmap) g_free(client->difmap) ;
@@ -1258,64 +1563,74 @@ int mainloop(CLIENT *client) {
                        }
                        go_on=FALSE;
                        continue;
-               }
-
-               len = ntohl(request.len);
 
-               if (request.magic != htonl(NBD_REQUEST_MAGIC))
-                       err("Not enough magic.");
-               if (len > BUFSIZE + sizeof(struct nbd_reply))
-                       err("Request too big!");
-#ifdef DODBG
-               printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" :
-                               "READ", (unsigned long long)request.from,
-                               (unsigned long long)request.from / 512, len);
-#endif
-               memcpy(reply.handle, request.handle, sizeof(reply.handle));
-               if ((request.from + len) > (OFFT_MAX)) {
-                       DEBUG("[Number too large!]");
-                       ERROR(client, reply, EINVAL);
-                       continue;
-               }
-
-               if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
-                       DEBUG("[RANGE!]");
-                       ERROR(client, reply, EINVAL);
-                       continue;
-               }
-
-               if (request.type==NBD_CMD_WRITE) {
+               case NBD_CMD_WRITE:
                        DEBUG("wr: net->buf, ");
-                       readit(client->net, buf, len);
-                       DEBUG("buf->exp, ");
-                       if ((client->server->flags & F_READONLY) ||
-                           (client->server->flags & F_AUTOREADONLY)) {
-                               DEBUG("[WRITE to READONLY!]");
-                               ERROR(client, reply, EPERM);
-                               continue;
+                       while(len > 0) {
+                               readit(client->net, buf, currlen);
+                               DEBUG("buf->exp, ");
+                               if ((client->server->flags & F_READONLY) ||
+                                   (client->server->flags & F_AUTOREADONLY)) {
+                                       DEBUG("[WRITE to READONLY!]");
+                                       ERROR(client, reply, EPERM);
+                                       consume(client->net, buf, len-currlen, BUFSIZE);
+                                       continue;
+                               }
+                               if (expwrite(request.from, buf, currlen, client,
+                                            request.type & NBD_CMD_FLAG_FUA)) {
+                                       DEBUG("Write failed: %m" );
+                                       ERROR(client, reply, errno);
+                                       consume(client->net, buf, len-currlen, BUFSIZE);
+                                       continue;
+                               }
+                               len -= currlen;
+                               request.from += currlen;
+                               currlen = (len < BUFSIZE) ? len : BUFSIZE;
                        }
-                       if (expwrite(request.from, buf, len, client)) {
-                               DEBUG("Write failed: %m" );
+                       SEND(client->net, reply);
+                       DEBUG("OK!\n");
+                       continue;
+
+               case NBD_CMD_FLUSH:
+                       DEBUG("fl: ");
+                       if (expflush(client)) {
+                               DEBUG("Flush failed: %m");
                                ERROR(client, reply, errno);
                                continue;
                        }
                        SEND(client->net, reply);
                        DEBUG("OK!\n");
                        continue;
-               }
-               /* READ */
 
-               DEBUG("exp->buf, ");
-               if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
-                       DEBUG("Read failed: %m");
-                       ERROR(client, reply, errno);
+               case NBD_CMD_READ:
+                       DEBUG("exp->buf, ");
+                       memcpy(buf, &reply, sizeof(struct nbd_reply));
+                       if (client->transactionlogfd != -1)
+                               writeit(client->transactionlogfd, &reply, sizeof(reply));
+                       p = buf + sizeof(struct nbd_reply);
+                       writelen = currlen + sizeof(struct nbd_reply);
+                       while(len > 0) {
+                               if (expread(request.from, p, currlen, client)) {
+                                       DEBUG("Read failed: %m");
+                                       ERROR(client, reply, errno);
+                                       continue;
+                               }
+                               
+                               DEBUG("buf->net, ");
+                               writeit(client->net, buf, writelen);
+                               len -= currlen;
+                               request.from += currlen;
+                               currlen = (len < BUFSIZE) ? len : BUFSIZE;
+                               p = buf;
+                               writelen = currlen;
+                       }
+                       DEBUG("OK!\n");
+                       continue;
+
+               default:
+                       DEBUG ("Ignoring unknown command\n");
                        continue;
                }
-
-               DEBUG("buf->net, ");
-               memcpy(buf, &reply, sizeof(struct nbd_reply));
-               writeit(client->net, buf, len + sizeof(struct nbd_reply));
-               DEBUG("OK!\n");
        }
        return 0;
 }
@@ -1346,7 +1661,7 @@ void setupexport(CLIENT* client) {
                } else {
                        tmpname=g_strdup(client->exportname);
                }
-               DEBUG2( "Opening %s\n", tmpname );
+               DEBUG( "Opening %s\n", tmpname );
                fi.fhandle = open(tmpname, mode);
                if(fi.fhandle == -1 && mode == O_RDWR) {
                        /* Try again because maybe media was read-only */
@@ -1445,6 +1760,15 @@ int do_run(gchar* command, gchar* file) {
  * @param client a connected client
  **/
 void serveconnection(CLIENT *client) {
+       if (client->server->transactionlog && (client->transactionlogfd == -1))
+       {
+               if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
+                                                          O_WRONLY | O_CREAT,
+                                                          S_IRUSR | S_IWUSR)))
+                       g_warning("Could not open transaction log %s",
+                                 client->server->transactionlog);
+       }
+
        if(do_run(client->server->prerun, client->exportname)) {
                exit(EXIT_FAILURE);
        }
@@ -1458,6 +1782,12 @@ void serveconnection(CLIENT *client) {
 
        mainloop(client);
        do_run(client->server->postrun, client->exportname);
+
+       if (-1 != client->transactionlogfd)
+       {
+               close(client->transactionlogfd);
+               client->transactionlogfd = -1;
+       }
 }
 
 /**
@@ -1569,7 +1899,6 @@ void destroy_pid_t(gpointer data) {
 int serveloop(GArray* servers) {
        struct sockaddr_storage addrin;
        socklen_t addrinlen=sizeof(addrin);
-       SERVER *serve;
        int i;
        int max;
        int sock;
@@ -1586,44 +1915,76 @@ int serveloop(GArray* servers) {
        max=0;
        FD_ZERO(&mset);
        for(i=0;i<servers->len;i++) {
-               sock=(g_array_index(servers, SERVER, i)).socket;
-               FD_SET(sock, &mset);
-               max=sock>max?sock:max;
+               if((sock=(g_array_index(servers, SERVER, i)).socket)) {
+                       FD_SET(sock, &mset);
+                       max=sock>max?sock:max;
+               }
+       }
+       if(modernsock) {
+               FD_SET(modernsock, &mset);
+               max=modernsock>max?modernsock:max;
        }
        for(;;) {
-               CLIENT *client;
-               int net;
+               CLIENT *client = NULL;
                pid_t *pid;
 
                memcpy(&rset, &mset, sizeof(fd_set));
                if(select(max+1, &rset, NULL, NULL, NULL)>0) {
+                       int net = 0;
+                       SERVER* serve=NULL;
+
                        DEBUG("accept, ");
-                       for(i=0;i<servers->len;i++) {
+                       if(FD_ISSET(modernsock, &rset)) {
+                               if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
+                                       err("accept: %m");
+                               client = negotiate(net, NULL, servers, NEG_INIT | NEG_MODERN);
+                               if(!client) {
+                                       err_nonfatal("negotiation failed");
+                                       close(net);
+                                       net=0;
+                                       continue;
+                               }
+                               serve = client->server;
+                       }
+                       for(i=0;i<servers->len && !net;i++) {
                                serve=&(g_array_index(servers, SERVER, i));
                                if(FD_ISSET(serve->socket, &rset)) {
-                                       int sock_flags;
                                        if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
                                                err("accept: %m");
-
-                                       if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
-                                               err("fcntl F_GETFL");
-                                       }
-                                       if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
-                                               err("fcntl F_SETFL ~O_NONBLOCK");
-                                       }
-                                       client = g_malloc(sizeof(CLIENT));
+                               }
+                       }
+                       if(net) {
+                               int sock_flags;
+
+                               if(serve->max_connections > 0 &&
+                                  g_hash_table_size(children) >= serve->max_connections) {
+                                       msg2(LOG_INFO, "Max connections reached");
+                                       close(net);
+                                       continue;
+                               }
+                               if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
+                                       err("fcntl F_GETFL");
+                               }
+                               if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
+                                       err("fcntl F_SETFL ~O_NONBLOCK");
+                               }
+                               if(!client) {
+                                       client = g_new0(CLIENT, 1);
                                        client->server=serve;
                                        client->exportsize=OFFT_MAX;
                                        client->net=net;
-                                       set_peername(net, client);
-                                       if (!authorized_client(client)) {
-                                               msg2(LOG_INFO,"Unauthorized client") ;
-                                               close(net);
-                                               continue;
-                                       }
-                                       msg2(LOG_INFO,"Authorized client") ;
-                                       pid=g_malloc(sizeof(pid_t));
-#ifndef NOFORK
+                                       client->transactionlogfd = -1;
+                               }
+                               set_peername(net, client);
+                               if (!authorized_client(client)) {
+                                       msg2(LOG_INFO,"Unauthorized client") ;
+                                       close(net);
+                                       continue;
+                               }
+                               msg2(LOG_INFO,"Authorized client") ;
+                               pid=g_malloc(sizeof(pid_t));
+
+                               if (!dontfork) {
                                        if ((*pid=fork())<0) {
                                                msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
                                                close(net);
@@ -1641,42 +2002,61 @@ int serveloop(GArray* servers) {
                                                close(serve->socket);
                                        }
                                        /* FALSE does not free the
-                                       actual data. This is required,
-                                       because the client has a
-                                       direct reference into that
-                                       data, and otherwise we get a
-                                       segfault... */
+                                          actual data. This is required,
+                                          because the client has a
+                                          direct reference into that
+                                          data, and otherwise we get a
+                                          segfault... */
                                        g_array_free(servers, FALSE);
-#endif // NOFORK
-                                       msg2(LOG_INFO,"Starting to serve");
-                                       serveconnection(client);
-                                       exit(EXIT_SUCCESS);
                                }
+
+                               msg2(LOG_INFO,"Starting to serve");
+                               serveconnection(client);
+                               exit(EXIT_SUCCESS);
                        }
                }
        }
 }
 
+void dosockopts(int socket) {
+#ifndef sun
+       int yes=1;
+#else
+       char yes='1';
+#endif /* sun */
+       int sock_flags;
+
+       /* lose the pesky "Address already in use" error message */
+       if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
+               err("setsockopt SO_REUSEADDR");
+       }
+       if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
+               err("setsockopt SO_KEEPALIVE");
+       }
+
+       /* make the listening socket non-blocking */
+       if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
+               err("fcntl F_GETFL");
+       }
+       if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
+               err("fcntl F_SETFL O_NONBLOCK");
+       }
+}
+
 /**
  * Connect a server's socket.
  *
  * @param serve the server we want to connect.
  **/
-void setup_serve(SERVER *serve) {
-       struct sockaddr_storage addrin;
+int setup_serve(SERVER *serve) {
        struct addrinfo hints;
        struct addrinfo *ai = NULL;
-       struct sigaction sa;
-       int addrinlen = sizeof(addrin);
-       int sock_flags;
-#ifndef sun
-       int yes=1;
-#else
-       char yes='1';
-#endif /* sun */
        gchar *port = NULL;
        int e;
 
+       if(!do_oldstyle) {
+               return serve->servename ? 1 : 0;
+       }
        memset(&hints,'\0',sizeof(hints));
        hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
        hints.ai_socktype = SOCK_STREAM;
@@ -1684,7 +2064,7 @@ void setup_serve(SERVER *serve) {
 
        port = g_strdup_printf ("%d", serve->port);
        if (port == NULL)
-               return;
+               return 0;
 
        e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
 
@@ -1711,21 +2091,7 @@ void setup_serve(SERVER *serve) {
        if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
                err("socket: %m");
 
-       /* lose the pesky "Address already in use" error message */
-       if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
-               err("setsockopt SO_REUSEADDR");
-       }
-       if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
-               err("setsockopt SO_KEEPALIVE");
-       }
-
-       /* make the listening socket non-blocking */
-       if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
-               err("fcntl F_GETFL");
-       }
-       if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
-               err("fcntl F_SETFL O_NONBLOCK");
-       }
+       dosockopts(serve->socket);
 
        DEBUG("Waiting for connections... bind, ");
        e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
@@ -1736,17 +2102,43 @@ void setup_serve(SERVER *serve) {
                err("listen: %m");
 
        freeaddrinfo (ai);
+       if(serve->servename) {
+               return 1;
+       } else {
+               return 0;
+       }
+}
 
-       sa.sa_handler = sigchld_handler;
-       sigemptyset(&sa.sa_mask);
-       sa.sa_flags = SA_RESTART;
-       if(sigaction(SIGCHLD, &sa, NULL) == -1)
-               err("sigaction: %m");
-       sa.sa_handler = sigterm_handler;
-       sigemptyset(&sa.sa_mask);
-       sa.sa_flags = SA_RESTART;
-       if(sigaction(SIGTERM, &sa, NULL) == -1)
-               err("sigaction: %m");
+void open_modern(void) {
+       struct addrinfo hints;
+       struct addrinfo* ai = NULL;
+       struct sock_flags;
+       int e;
+
+       memset(&hints, '\0', sizeof(hints));
+       hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
+       hints.ai_socktype = SOCK_STREAM;
+       hints.ai_family = AF_UNSPEC;
+       hints.ai_protocol = IPPROTO_TCP;
+       e = getaddrinfo(modern_listen, modernport, &hints, &ai);
+       if(e != 0) {
+               fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
+               exit(EXIT_FAILURE);
+       }
+       if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
+               err("socket: %m");
+       }
+
+       dosockopts(modernsock);
+
+       if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
+               err("bind: %m");
+       }
+       if(listen(modernsock, 10) <0) {
+               err("listen: %m");
+       }
+
+       freeaddrinfo(ai);
 }
 
 /**
@@ -1754,11 +2146,27 @@ void setup_serve(SERVER *serve) {
  **/
 void setup_servers(GArray* servers) {
        int i;
+       struct sigaction sa;
+       int want_modern=0;
 
        for(i=0;i<servers->len;i++) {
-               setup_serve(&(g_array_index(servers, SERVER, i)));
+               want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
+       }
+       if(want_modern) {
+               open_modern();
        }
        children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
+
+       sa.sa_handler = sigchld_handler;
+       sigemptyset(&sa.sa_mask);
+       sa.sa_flags = SA_RESTART;
+       if(sigaction(SIGCHLD, &sa, NULL) == -1)
+               err("sigaction: %m");
+       sa.sa_handler = sigterm_handler;
+       sigemptyset(&sa.sa_mask);
+       sa.sa_flags = SA_RESTART;
+       if(sigaction(SIGTERM, &sa, NULL) == -1)
+               err("sigaction: %m");
 }
 
 /**
@@ -1768,7 +2176,7 @@ void setup_servers(GArray* servers) {
  *     is only used to create a PID file of the form
  *     /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
  **/
-#if !defined(NODAEMON) && !defined(NOFORK)
+#if !defined(NODAEMON)
 void daemonize(SERVER* serve) {
        FILE*pidf;
 
@@ -1797,7 +2205,7 @@ void daemonize(SERVER* serve) {
 }
 #else
 #define daemonize(serve)
-#endif /* !defined(NODAEMON) && !defined(NOFORK) */
+#endif /* !defined(NODAEMON) */
 
 /*
  * Everything beyond this point (in the file) is run in non-daemon mode.
@@ -1865,10 +2273,11 @@ void glib_message_syslog_redirect(const gchar *log_domain,
         break;
       case G_LOG_LEVEL_DEBUG:
         level=LOG_DEBUG;
+       break;
       default:
         level=LOG_ERR;
     }
-    syslog(level, message);
+    syslog(level, "%s", message);
 }
 #endif
 
@@ -1921,8 +2330,11 @@ int main(int argc, char *argv[]) {
        }
     
        if(!servers || !servers->len) {
-               g_warning("Could not parse config file: %s", 
-                               err ? err->message : "Unknown error");
+               if(err && !(err->domain == g_quark_from_string("parse_cfile")
+                               && err->code == CFILE_NOTFOUND)) {
+                       g_warning("Could not parse config file: %s", 
+                                       err ? err->message : "Unknown error");
+               }
        }
        if(serve) {
                g_warning("Specifying an export on the command line is deprecated.");
@@ -1930,10 +2342,11 @@ int main(int argc, char *argv[]) {
        }
 
        if((!serve) && (!servers||!servers->len)) {
-               g_message("Nothing to do! Bye!");
+               g_message("No configured exports; quitting.");
                exit(EXIT_FAILURE);
        }
-       daemonize(serve);
+       if (!dontfork)
+               daemonize(serve);
        setup_servers(servers);
        dousers();
        serveloop(servers);