fix documentation of NBD_CMD_FLUSH
[nbd.git] / nbd-server.c
1 /*
2  * Network Block Device - server
3  *
4  * Copyright 1996-1998 Pavel Machek, distribute under GPL
5  *  <pavel@atrey.karlin.mff.cuni.cz>
6  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8  *
9  * Version 1.0 - hopefully 64-bit-clean
10  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13  *      type, or don't have 64 bit file offsets by defining FS_32BIT
14  *      in compile options for nbd-server *only*. This can be done
15  *      with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16  *      original autoconf input file, or I would make it a configure
17  *      option.) Ken Yap <ken@nlc.net.au>.
18  * Version 1.6 - fix autodetection of block device size and really make 64 bit
19  *      clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20  * Version 2.0 - Version synchronised with client
21  * Version 2.1 - Reap zombie client processes when they exit. Removed
22  *      (uncommented) the _IO magic, it's no longer necessary. Wouter
23  *      Verhelst <wouter@debian.org>
24  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25  * Version 2.3 - Fixed code so that Large File Support works. This
26  *      removes the FS_32BIT compile-time directive; define
27  *      _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28  *      using FS_32BIT. This will allow you to use files >2GB instead of
29  *      having to use the -m option. Wouter Verhelst <wouter@debian.org>
30  * Version 2.4 - Added code to keep track of children, so that we can
31  *      properly kill them from initscripts. Add a call to daemon(),
32  *      so that processes don't think they have to wait for us, which is
33  *      interesting for initscripts as well. Wouter Verhelst
34  *      <wouter@debian.org>
35  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36  *      zero after fork()ing, resulting in nbd-server going berserk
37  *      when it receives a signal with at least one child open. Wouter
38  *      Verhelst <wouter@debian.org>
39  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40  *      rectified type of mainloop::size_host (sf.net bugs 814435 and
41  *      817385); close the PID file after writing to it, so that the
42  *      daemon can actually be found. Wouter Verhelst
43  *      <wouter@debian.org>
44  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45  *      correctly put in network endianness. Many types were corrected
46  *      (size_t and off_t instead of int).  <vspaceg@sourceforge.net>
47  * Version 2.6 - Some code cleanup.
48  * Version 2.7 - Better build system.
49  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 
50  *      lot more work, but this is a start. Wouter Verhelst
51  *      <wouter@debian.org>
52  * 16/03/2010 - Add IPv6 support.
53  *      Kitt Tientanopajai <kitt@kitty.in.th>
54  *      Neutron Soutmun <neo.neutron@gmail.com>
55  *      Suriya Soutmun <darksolar@gmail.com>
56  */
57
58 /* Includes LFS defines, which defines behaviours of some of the following
59  * headers, so must come before those */
60 #include "lfs.h"
61
62 #include <sys/types.h>
63 #include <sys/socket.h>
64 #include <sys/stat.h>
65 #include <sys/select.h>         /* select */
66 #include <sys/wait.h>           /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
69 #endif
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h>          /* For BLKGETSIZE */
73 #endif
74 #include <signal.h>             /* sigaction */
75 #include <errno.h>
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
78 #include <netdb.h>
79 #include <syslog.h>
80 #include <unistd.h>
81 #include <stdio.h>
82 #include <stdlib.h>
83 #include <string.h>
84 #include <fcntl.h>
85 #include <arpa/inet.h>
86 #include <strings.h>
87 #include <dirent.h>
88 #include <unistd.h>
89 #include <getopt.h>
90 #include <pwd.h>
91 #include <grp.h>
92
93 #include <glib.h>
94
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
97 #include "cliserv.h"
98
99 #ifdef WITH_SDP
100 #include <sdp_inet.h>
101 #endif
102
103 /** Default position of the config file */
104 #ifndef SYSCONFDIR
105 #define SYSCONFDIR "/etc"
106 #endif
107 #define CFILE SYSCONFDIR "/nbd-server/config"
108
109 /** Where our config file actually is */
110 gchar* config_file_pos;
111
112 /** What user we're running as */
113 gchar* runuser=NULL;
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
118
119 /* Whether we should avoid forking */
120 int dontfork = 0;
121
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
123 #ifdef ISSERVER
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
127 #else
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
131 #endif
132
133 /* Debugging macros */
134 //#define DODBG
135 #ifdef DODBG
136 #define DEBUG( a ) printf( a )
137 #define DEBUG2( a,b ) printf( a,b )
138 #define DEBUG3( a,b,c ) printf( a,b,c )
139 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
140 #define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e )
141 #else
142 #define DEBUG( a )
143 #define DEBUG2( a,b ) 
144 #define DEBUG3( a,b,c ) 
145 #define DEBUG4( a,b,c,d ) 
146 #define DEBUG5( a,b,c,d,e ) 
147 #endif
148 #ifndef PACKAGE_VERSION
149 #define PACKAGE_VERSION ""
150 #endif
151 /**
152  * The highest value a variable of type off_t can reach. This is a signed
153  * integer, so set all bits except for the leftmost one.
154  **/
155 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
156 #define LINELEN 256       /**< Size of static buffer used to read the
157                                authorization file (yuck) */
158 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
159 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
160 #define F_READONLY 1      /**< flag to tell us a file is readonly */
161 #define F_MULTIFILE 2     /**< flag to tell us a file is exported using -m */
162 #define F_COPYONWRITE 4   /**< flag to tell us a file is exported using
163                             copyonwrite */
164 #define F_AUTOREADONLY 8  /**< flag to tell us a file is set to autoreadonly */
165 #define F_SPARSE 16       /**< flag to tell us copyronwrite should use a sparse file */
166 #define F_SDP 32          /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
167 #define F_SYNC 64         /**< Whether to fsync() after a write */
168 #define F_FLUSH 128       /**< Whether server wants FLUSH to be sent by the client */
169 #define F_FUA 256         /**< Whether server wants FUA to be sent by the client */
170 #define F_ROTATIONAL 512  /**< Whether server wants the client to implement the elevator algorithm */
171 GHashTable *children;
172 char pidfname[256]; /**< name of our PID file */
173 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
174 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
175
176 int modernsock=0;         /**< Socket for the modern handler. Not used
177                                if a client was only specified on the
178                                command line; only port used if
179                                oldstyle is set to false (and then the
180                                command-line client isn't used, gna gna) */
181 char* modern_listen;      /**< listenaddr value for modernsock */
182
183 /**
184  * Types of virtuatlization
185  **/
186 typedef enum {
187         VIRT_NONE=0,    /**< No virtualization */
188         VIRT_IPLIT,     /**< Literal IP address as part of the filename */
189         VIRT_IPHASH,    /**< Replacing all dots in an ip address by a / before
190                              doing the same as in IPLIT */
191         VIRT_CIDR,      /**< Every subnet in its own directory */
192 } VIRT_STYLE;
193
194 /**
195  * Variables associated with a server.
196  **/
197 typedef struct {
198         gchar* exportname;    /**< (unprocessed) filename of the file we're exporting */
199         off_t expected_size; /**< size of the exported file as it was told to
200                                us through configuration */
201         gchar* listenaddr;   /**< The IP address we're listening on */
202         unsigned int port;   /**< port we're exporting this file at */
203         char* authname;      /**< filename of the authorization file */
204         int flags;           /**< flags associated with this exported file */
205         int socket;          /**< The socket of this server. */
206         int socket_family;   /**< family of the socket */
207         VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
208         uint8_t cidrlen;     /**< The length of the mask when we use
209                                   CIDR-style virtualization */
210         gchar* prerun;       /**< command to be ran after connecting a client,
211                                   but before starting to serve */
212         gchar* postrun;      /**< command that will be ran after the client
213                                   disconnects */
214         gchar* servename;    /**< name of the export as selected by nbd-client */
215         int max_connections; /**< maximum number of opened connections */
216 } SERVER;
217
218 /**
219  * Variables associated with a client socket.
220  **/
221 typedef struct {
222         int fhandle;      /**< file descriptor */
223         off_t startoff;   /**< starting offset of this file */
224 } FILE_INFO;
225
226 typedef struct {
227         off_t exportsize;    /**< size of the file we're exporting */
228         char *clientname;    /**< peer */
229         char *exportname;    /**< (processed) filename of the file we're exporting */
230         GArray *export;    /**< array of FILE_INFO of exported files;
231                                array size is always 1 unless we're
232                                doing the multiple file option */
233         int net;             /**< The actual client socket */
234         SERVER *server;      /**< The server this client is getting data from */
235         char* difffilename;  /**< filename of the copy-on-write file, if any */
236         int difffile;        /**< filedescriptor of copyonwrite file. @todo
237                                shouldn't this be an array too? (cfr export) Or
238                                make -m and -c mutually exclusive */
239         u32 difffilelen;     /**< number of pages in difffile */
240         u32 *difmap;         /**< see comment on the global difmap for this one */
241         gboolean modern;     /**< client was negotiated using modern negotiation protocol */
242 } CLIENT;
243
244 /**
245  * Type of configuration file values
246  **/
247 typedef enum {
248         PARAM_INT,              /**< This parameter is an integer */
249         PARAM_STRING,           /**< This parameter is a string */
250         PARAM_BOOL,             /**< This parameter is a boolean */
251 } PARAM_TYPE;
252
253 /**
254  * Configuration file values
255  **/
256 typedef struct {
257         gchar *paramname;       /**< Name of the parameter, as it appears in
258                                   the config file */
259         gboolean required;      /**< Whether this is a required (as opposed to
260                                   optional) parameter */
261         PARAM_TYPE ptype;       /**< Type of the parameter. */
262         gpointer target;        /**< Pointer to where the data of this
263                                   parameter should be written. If ptype is
264                                   PARAM_BOOL, the data is or'ed rather than
265                                   overwritten. */
266         gint flagval;           /**< Flag mask for this parameter in case ptype
267                                   is PARAM_BOOL. */
268 } PARAM;
269
270 /**
271  * Check whether a client is allowed to connect. Works with an authorization
272  * file which contains one line per machine, no wildcards.
273  *
274  * @param opts The client who's trying to connect.
275  * @return 0 - authorization refused, 1 - OK
276  **/
277 int authorized_client(CLIENT *opts) {
278         const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
279         FILE *f ;
280         char line[LINELEN]; 
281         char *tmp;
282         struct in_addr addr;
283         struct in_addr client;
284         struct in_addr cltemp;
285         int len;
286
287         if ((f=fopen(opts->server->authname,"r"))==NULL) {
288                 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
289                      opts->server->authname,strerror(errno)) ;
290                 return 1 ; 
291         }
292   
293         inet_aton(opts->clientname, &client);
294         while (fgets(line,LINELEN,f)!=NULL) {
295                 if((tmp=index(line, '/'))) {
296                         if(strlen(line)<=tmp-line) {
297                                 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
298                                 return 0;
299                         }
300                         *(tmp++)=0;
301                         if(!inet_aton(line,&addr)) {
302                                 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
303                                 return 0;
304                         }
305                         len=strtol(tmp, NULL, 0);
306                         addr.s_addr>>=32-len;
307                         addr.s_addr<<=32-len;
308                         memcpy(&cltemp,&client,sizeof(client));
309                         cltemp.s_addr>>=32-len;
310                         cltemp.s_addr<<=32-len;
311                         if(addr.s_addr == cltemp.s_addr) {
312                                 return 1;
313                         }
314                 }
315                 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
316                         fclose(f);
317                         return 1;
318                 }
319         }
320         fclose(f);
321         return 0;
322 }
323
324 /**
325  * Read data from a file descriptor into a buffer
326  *
327  * @param f a file descriptor
328  * @param buf a buffer
329  * @param len the number of bytes to be read
330  **/
331 static inline void readit(int f, void *buf, size_t len) {
332         ssize_t res;
333         while (len > 0) {
334                 DEBUG("*");
335                 if ((res = read(f, buf, len)) <= 0) {
336                         if(errno != EAGAIN) {
337                                 err("Read failed: %m");
338                         }
339                 } else {
340                         len -= res;
341                         buf += res;
342                 }
343         }
344 }
345
346 /**
347  * Write data from a buffer into a filedescriptor
348  *
349  * @param f a file descriptor
350  * @param buf a buffer containing data
351  * @param len the number of bytes to be written
352  **/
353 static inline void writeit(int f, void *buf, size_t len) {
354         ssize_t res;
355         while (len > 0) {
356                 DEBUG("+");
357                 if ((res = write(f, buf, len)) <= 0)
358                         err("Send failed: %m");
359                 len -= res;
360                 buf += res;
361         }
362 }
363
364 /**
365  * Print out a message about how to use nbd-server. Split out to a separate
366  * function so that we can call it from multiple places
367  */
368 void usage() {
369         printf("This is nbd-server version " VERSION "\n");
370         printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
371                "\t-r|--read-only\t\tread only\n"
372                "\t-m|--multi-file\t\tmultiple file\n"
373                "\t-c|--copy-on-write\tcopy on write\n"
374                "\t-C|--config-file\tspecify an alternate configuration file\n"
375                "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
376                "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
377                "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
378                "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
379                "\tif port is set to 0, stdin is used (for running from inetd)\n"
380                "\tif file_to_export contains '%%s', it is substituted with the IP\n"
381                "\t\taddress of the machine trying to connect\n" 
382                "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
383         printf("Using configuration file %s\n", CFILE);
384 }
385
386 /* Dumps a config file section of the given SERVER*, and exits. */
387 void dump_section(SERVER* serve, gchar* section_header) {
388         printf("[%s]\n", section_header);
389         printf("\texportname = %s\n", serve->exportname);
390         printf("\tlistenaddr = %s\n", serve->listenaddr);
391         printf("\tport = %d\n", serve->port);
392         if(serve->flags & F_READONLY) {
393                 printf("\treadonly = true\n");
394         }
395         if(serve->flags & F_MULTIFILE) {
396                 printf("\tmultifile = true\n");
397         }
398         if(serve->flags & F_COPYONWRITE) {
399                 printf("\tcopyonwrite = true\n");
400         }
401         if(serve->expected_size) {
402                 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
403         }
404         if(serve->authname) {
405                 printf("\tauthfile = %s\n", serve->authname);
406         }
407         exit(EXIT_SUCCESS);
408 }
409
410 /**
411  * Parse the command line.
412  *
413  * @param argc the argc argument to main()
414  * @param argv the argv argument to main()
415  **/
416 SERVER* cmdline(int argc, char *argv[]) {
417         int i=0;
418         int nonspecial=0;
419         int c;
420         struct option long_options[] = {
421                 {"read-only", no_argument, NULL, 'r'},
422                 {"multi-file", no_argument, NULL, 'm'},
423                 {"copy-on-write", no_argument, NULL, 'c'},
424                 {"dont-fork", no_argument, NULL, 'd'},
425                 {"authorize-file", required_argument, NULL, 'l'},
426                 {"config-file", required_argument, NULL, 'C'},
427                 {"pid-file", required_argument, NULL, 'p'},
428                 {"output-config", required_argument, NULL, 'o'},
429                 {"max-connection", required_argument, NULL, 'M'},
430                 {0,0,0,0}
431         };
432         SERVER *serve;
433         off_t es;
434         size_t last;
435         char suffix;
436         gboolean do_output=FALSE;
437         gchar* section_header="";
438         gchar** addr_port;
439
440         if(argc==1) {
441                 return NULL;
442         }
443         serve=g_new0(SERVER, 1);
444         serve->authname = g_strdup(default_authname);
445         serve->virtstyle=VIRT_IPLIT;
446         while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
447                 switch (c) {
448                 case 1:
449                         /* non-option argument */
450                         switch(nonspecial++) {
451                         case 0:
452                                 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
453                                         addr_port=g_strsplit(optarg, ":", 2);
454
455                                         /* Check for "@" - maybe user using this separator
456                                                  for IPv4 address */
457                                         if(!addr_port[1]) {
458                                                 g_strfreev(addr_port);
459                                                 addr_port=g_strsplit(optarg, "@", 2);
460                                         }
461                                 } else {
462                                         addr_port=g_strsplit(optarg, "@", 2);
463                                 }
464
465                                 if(addr_port[1]) {
466                                         serve->port=strtol(addr_port[1], NULL, 0);
467                                         serve->listenaddr=g_strdup(addr_port[0]);
468                                 } else {
469                                         serve->listenaddr=NULL;
470                                         serve->port=strtol(addr_port[0], NULL, 0);
471                                 }
472                                 g_strfreev(addr_port);
473                                 break;
474                         case 1:
475                                 serve->exportname = g_strdup(optarg);
476                                 if(serve->exportname[0] != '/') {
477                                         fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
478                                         exit(EXIT_FAILURE);
479                                 }
480                                 break;
481                         case 2:
482                                 last=strlen(optarg)-1;
483                                 suffix=optarg[last];
484                                 if (suffix == 'k' || suffix == 'K' ||
485                                     suffix == 'm' || suffix == 'M')
486                                         optarg[last] = '\0';
487                                 es = (off_t)atoll(optarg);
488                                 switch (suffix) {
489                                         case 'm':
490                                         case 'M':  es <<= 10;
491                                         case 'k':
492                                         case 'K':  es <<= 10;
493                                         default :  break;
494                                 }
495                                 serve->expected_size = es;
496                                 break;
497                         }
498                         break;
499                 case 'r':
500                         serve->flags |= F_READONLY;
501                         break;
502                 case 'm':
503                         serve->flags |= F_MULTIFILE;
504                         break;
505                 case 'o':
506                         do_output = TRUE;
507                         section_header = g_strdup(optarg);
508                         break;
509                 case 'p':
510                         strncpy(pidftemplate, optarg, 256);
511                         break;
512                 case 'c': 
513                         serve->flags |=F_COPYONWRITE;
514                         break;
515                 case 'd': 
516                         dontfork = 1;
517                         break;
518                 case 'C':
519                         g_free(config_file_pos);
520                         config_file_pos=g_strdup(optarg);
521                         break;
522                 case 'l':
523                         g_free(serve->authname);
524                         serve->authname=g_strdup(optarg);
525                         break;
526                 case 'M':
527                         serve->max_connections = strtol(optarg, NULL, 0);
528                         break;
529                 default:
530                         usage();
531                         exit(EXIT_FAILURE);
532                         break;
533                 }
534         }
535         /* What's left: the port to export, the name of the to be exported
536          * file, and, optionally, the size of the file, in that order. */
537         if(nonspecial<2) {
538                 g_free(serve);
539                 serve=NULL;
540         } else {
541                 do_oldstyle = TRUE;
542         }
543         if(do_output) {
544                 if(!serve) {
545                         g_critical("Need a complete configuration on the command line to output a config file section!");
546                         exit(EXIT_FAILURE);
547                 }
548                 dump_section(serve, section_header);
549         }
550         return serve;
551 }
552
553 /**
554  * Error codes for config file parsing
555  **/
556 typedef enum {
557         CFILE_NOTFOUND,         /**< The configuration file is not found */
558         CFILE_MISSING_GENERIC,  /**< The (required) group "generic" is missing */
559         CFILE_KEY_MISSING,      /**< A (required) key is missing */
560         CFILE_VALUE_INVALID,    /**< A value is syntactically invalid */
561         CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
562         CFILE_PROGERR,          /**< Programmer error */
563         CFILE_NO_EXPORTS,       /**< A config file was specified that does not
564                                      define any exports */
565         CFILE_INCORRECT_PORT,   /**< The reserved port was specified for an
566                                      old-style export. */
567 } CFILE_ERRORS;
568
569 /**
570  * Remove a SERVER from memory. Used from the hash table
571  **/
572 void remove_server(gpointer s) {
573         SERVER *server;
574
575         server=(SERVER*)s;
576         g_free(server->exportname);
577         if(server->authname)
578                 g_free(server->authname);
579         if(server->listenaddr)
580                 g_free(server->listenaddr);
581         if(server->prerun)
582                 g_free(server->prerun);
583         if(server->postrun)
584                 g_free(server->postrun);
585         g_free(server);
586 }
587
588 /**
589  * duplicate server
590  * @param s the old server we want to duplicate
591  * @return new duplicated server
592  **/
593 SERVER* dup_serve(SERVER *s) {
594         SERVER *serve = NULL;
595
596         serve=g_new0(SERVER, 1);
597         if(serve == NULL)
598                 return NULL;
599
600         if(s->exportname)
601                 serve->exportname = g_strdup(s->exportname);
602
603         serve->expected_size = s->expected_size;
604
605         if(s->listenaddr)
606                 serve->listenaddr = g_strdup(s->listenaddr);
607
608         serve->port = s->port;
609
610         if(s->authname)
611                 serve->authname = strdup(s->authname);
612
613         serve->flags = s->flags;
614         serve->socket = s->socket;
615         serve->socket_family = s->socket_family;
616         serve->virtstyle = s->virtstyle;
617         serve->cidrlen = s->cidrlen;
618
619         if(s->prerun)
620                 serve->prerun = g_strdup(s->prerun);
621
622         if(s->postrun)
623                 serve->postrun = g_strdup(s->postrun);
624         
625         if(s->servename)
626                 serve->servename = g_strdup(s->servename);
627
628         serve->max_connections = s->max_connections;
629
630         return serve;
631 }
632
633 /**
634  * append new server to array
635  * @param s server
636  * @param a server array
637  * @return 0 success, -1 error
638  */
639 int append_serve(SERVER *s, GArray *a) {
640         SERVER *ns = NULL;
641         struct addrinfo hints;
642         struct addrinfo *ai = NULL;
643         struct addrinfo *rp = NULL;
644         char   host[NI_MAXHOST];
645         gchar  *port = NULL;
646         int e;
647         int ret;
648
649         if(!s) {
650                 err("Invalid parsing server");
651                 return -1;
652         }
653
654         port = g_strdup_printf("%d", s->port);
655
656         memset(&hints,'\0',sizeof(hints));
657         hints.ai_family = AF_UNSPEC;
658         hints.ai_socktype = SOCK_STREAM;
659         hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
660         hints.ai_protocol = IPPROTO_TCP;
661
662         e = getaddrinfo(s->listenaddr, port, &hints, &ai);
663
664         if (port)
665                 g_free(port);
666
667         if(e == 0) {
668                 for (rp = ai; rp != NULL; rp = rp->ai_next) {
669                         e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
670
671                         if (e != 0) { // error
672                                 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
673                                 continue;
674                         }
675
676                         // duplicate server and set listenaddr to resolved IP address
677                         ns = dup_serve (s);
678                         if (ns) {
679                                 ns->listenaddr = g_strdup(host);
680                                 ns->socket_family = rp->ai_family;
681                                 g_array_append_val(a, *ns);
682                                 free(ns);
683                                 ns = NULL;
684                         }
685                 }
686
687                 ret = 0;
688         } else {
689                 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
690                 ret = -1;
691         }
692
693         if (ai)
694                 freeaddrinfo(ai);
695
696         return ret;
697 }
698
699 /**
700  * Parse the config file.
701  *
702  * @param f the name of the config file
703  * @param e a GError. @see CFILE_ERRORS for what error values this function can
704  *      return.
705  * @return a Array of SERVER* pointers, If the config file is empty or does not
706  *      exist, returns an empty GHashTable; if the config file contains an
707  *      error, returns NULL, and e is set appropriately
708  **/
709 GArray* parse_cfile(gchar* f, GError** e) {
710         const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
711         const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
712         SERVER s;
713         gchar *virtstyle=NULL;
714         PARAM lp[] = {
715                 { "exportname", TRUE,   PARAM_STRING,   &(s.exportname),        0 },
716                 { "port",       TRUE,   PARAM_INT,      &(s.port),              0 },
717                 { "authfile",   FALSE,  PARAM_STRING,   &(s.authname),          0 },
718                 { "filesize",   FALSE,  PARAM_INT,      &(s.expected_size),     0 },
719                 { "virtstyle",  FALSE,  PARAM_STRING,   &(virtstyle),           0 },
720                 { "prerun",     FALSE,  PARAM_STRING,   &(s.prerun),            0 },
721                 { "postrun",    FALSE,  PARAM_STRING,   &(s.postrun),           0 },
722                 { "readonly",   FALSE,  PARAM_BOOL,     &(s.flags),             F_READONLY },
723                 { "multifile",  FALSE,  PARAM_BOOL,     &(s.flags),             F_MULTIFILE },
724                 { "copyonwrite", FALSE, PARAM_BOOL,     &(s.flags),             F_COPYONWRITE },
725                 { "sparse_cow", FALSE,  PARAM_BOOL,     &(s.flags),             F_SPARSE },
726                 { "sdp",        FALSE,  PARAM_BOOL,     &(s.flags),             F_SDP },
727                 { "sync",       FALSE,  PARAM_BOOL,     &(s.flags),             F_SYNC },
728                 { "flush",      FALSE,  PARAM_BOOL,     &(s.flags),             F_FLUSH },
729                 { "fua",        FALSE,  PARAM_BOOL,     &(s.flags),             F_FUA },
730                 { "rotational", FALSE,  PARAM_BOOL,     &(s.flags),             F_ROTATIONAL },
731                 { "listenaddr", FALSE,  PARAM_STRING,   &(s.listenaddr),        0 },
732                 { "maxconnections", FALSE, PARAM_INT,   &(s.max_connections),   0 },
733         };
734         const int lp_size=sizeof(lp)/sizeof(PARAM);
735         PARAM gp[] = {
736                 { "user",       FALSE, PARAM_STRING,    &runuser,       0 },
737                 { "group",      FALSE, PARAM_STRING,    &rungroup,      0 },
738                 { "oldstyle",   FALSE, PARAM_BOOL,      &do_oldstyle,   1 },
739                 { "listenaddr", FALSE, PARAM_STRING,    &modern_listen, 0 },
740         };
741         PARAM* p=gp;
742         int p_size=sizeof(gp)/sizeof(PARAM);
743         GKeyFile *cfile;
744         GError *err = NULL;
745         const char *err_msg=NULL;
746         GQuark errdomain;
747         GArray *retval=NULL;
748         gchar **groups;
749         gboolean value;
750         gchar* startgroup;
751         gint i;
752         gint j;
753
754         errdomain = g_quark_from_string("parse_cfile");
755         cfile = g_key_file_new();
756         retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
757         if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
758                         G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
759                 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
760                 g_key_file_free(cfile);
761                 return retval;
762         }
763         startgroup = g_key_file_get_start_group(cfile);
764         if(!startgroup || strcmp(startgroup, "generic")) {
765                 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
766                 g_key_file_free(cfile);
767                 return NULL;
768         }
769         groups = g_key_file_get_groups(cfile, NULL);
770         for(i=0;groups[i];i++) {
771                 memset(&s, '\0', sizeof(SERVER));
772
773                 /* After the [generic] group, start parsing exports */
774                 if(i==1) {
775                         p=lp;
776                         p_size=lp_size;
777                 } 
778                 for(j=0;j<p_size;j++) {
779                         g_assert(p[j].target != NULL);
780                         g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
781                         switch(p[j].ptype) {
782                                 case PARAM_INT:
783                                         *((gint*)p[j].target) =
784                                                 g_key_file_get_integer(cfile,
785                                                                 groups[i],
786                                                                 p[j].paramname,
787                                                                 &err);
788                                         break;
789                                 case PARAM_STRING:
790                                         *((gchar**)p[j].target) =
791                                                 g_key_file_get_string(cfile,
792                                                                 groups[i],
793                                                                 p[j].paramname,
794                                                                 &err);
795                                         break;
796                                 case PARAM_BOOL:
797                                         value = g_key_file_get_boolean(cfile,
798                                                         groups[i],
799                                                         p[j].paramname, &err);
800                                         if(!err) {
801                                                 if(value) {
802                                                         *((gint*)p[j].target) |= p[j].flagval;
803                                                 } else {
804                                                         *((gint*)p[j].target) &= ~(p[j].flagval);
805                                                 }
806                                         }
807                                         break;
808                         }
809                         if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
810                                 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
811                                 g_key_file_free(cfile);
812                                 return NULL;
813                         }
814                         if(err) {
815                                 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
816                                         if(!p[j].required) {
817                                                 /* Ignore not-found error for optional values */
818                                                 g_clear_error(&err);
819                                                 continue;
820                                         } else {
821                                                 err_msg = MISSING_REQUIRED_ERROR;
822                                         }
823                                 } else {
824                                         err_msg = DEFAULT_ERROR;
825                                 }
826                                 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
827                                 g_array_free(retval, TRUE);
828                                 g_error_free(err);
829                                 g_key_file_free(cfile);
830                                 return NULL;
831                         }
832                 }
833                 if(virtstyle) {
834                         if(!strncmp(virtstyle, "none", 4)) {
835                                 s.virtstyle=VIRT_NONE;
836                         } else if(!strncmp(virtstyle, "ipliteral", 9)) {
837                                 s.virtstyle=VIRT_IPLIT;
838                         } else if(!strncmp(virtstyle, "iphash", 6)) {
839                                 s.virtstyle=VIRT_IPHASH;
840                         } else if(!strncmp(virtstyle, "cidrhash", 8)) {
841                                 s.virtstyle=VIRT_CIDR;
842                                 if(strlen(virtstyle)<10) {
843                                         g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
844                                         g_array_free(retval, TRUE);
845                                         g_key_file_free(cfile);
846                                         return NULL;
847                                 }
848                                 s.cidrlen=strtol(virtstyle+8, NULL, 0);
849                         } else {
850                                 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
851                                 g_array_free(retval, TRUE);
852                                 g_key_file_free(cfile);
853                                 return NULL;
854                         }
855                         if(s.port && !do_oldstyle) {
856                                 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
857                                 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
858                         }
859                 } else {
860                         s.virtstyle=VIRT_IPLIT;
861                 }
862                 /* Don't need to free this, it's not our string */
863                 virtstyle=NULL;
864                 /* Don't append values for the [generic] group */
865                 if(i>0) {
866                         s.socket_family = AF_UNSPEC;
867                         s.servename = groups[i];
868
869                         append_serve(&s, retval);
870                 } else {
871                         if(!do_oldstyle) {
872                                 lp[1].required = 0;
873                         }
874                 }
875 #ifndef WITH_SDP
876                 if(s.flags & F_SDP) {
877                         g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
878                         g_array_free(retval, TRUE);
879                         g_key_file_free(cfile);
880                         return NULL;
881                 }
882 #endif
883         }
884         if(i==1) {
885                 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
886         }
887         g_key_file_free(cfile);
888         return retval;
889 }
890
891 /**
892  * Signal handler for SIGCHLD
893  * @param s the signal we're handling (must be SIGCHLD, or something
894  * is severely wrong)
895  **/
896 void sigchld_handler(int s) {
897         int status;
898         int* i;
899         pid_t pid;
900
901         while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
902                 if(WIFEXITED(status)) {
903                         msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
904                 }
905                 i=g_hash_table_lookup(children, &pid);
906                 if(!i) {
907                         msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
908                 } else {
909                         DEBUG2("Removing %d from the list of children", pid);
910                         g_hash_table_remove(children, &pid);
911                 }
912         }
913 }
914
915 /**
916  * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
917  *
918  * @param key the key
919  * @param value the value corresponding to the above key
920  * @param user_data a pointer which we always set to 1, so that we know what
921  * will happen next.
922  **/
923 void killchild(gpointer key, gpointer value, gpointer user_data) {
924         pid_t *pid=value;
925         int *parent=user_data;
926
927         kill(*pid, SIGTERM);
928         *parent=1;
929 }
930
931 /**
932  * Handle SIGTERM and dispatch it to our children
933  * @param s the signal we're handling (must be SIGTERM, or something
934  * is severely wrong).
935  **/
936 void sigterm_handler(int s) {
937         int parent=0;
938
939         g_hash_table_foreach(children, killchild, &parent);
940
941         if(parent) {
942                 unlink(pidfname);
943         }
944
945         exit(EXIT_SUCCESS);
946 }
947
948 /**
949  * Detect the size of a file.
950  *
951  * @param fhandle An open filedescriptor
952  * @return the size of the file, or OFFT_MAX if detection was
953  * impossible.
954  **/
955 off_t size_autodetect(int fhandle) {
956         off_t es;
957         u64 bytes;
958         struct stat stat_buf;
959         int error;
960
961 #ifdef HAVE_SYS_MOUNT_H
962 #ifdef HAVE_SYS_IOCTL_H
963 #ifdef BLKGETSIZE64
964         DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
965         if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
966                 return (off_t)bytes;
967         }
968 #endif /* BLKGETSIZE64 */
969 #endif /* HAVE_SYS_IOCTL_H */
970 #endif /* HAVE_SYS_MOUNT_H */
971
972         DEBUG("looking for fhandle size with fstat\n");
973         stat_buf.st_size = 0;
974         error = fstat(fhandle, &stat_buf);
975         if (!error) {
976                 if(stat_buf.st_size > 0)
977                         return (off_t)stat_buf.st_size;
978         } else {
979                 err("fstat failed: %m");
980         }
981
982         DEBUG("looking for fhandle size with lseek SEEK_END\n");
983         es = lseek(fhandle, (off_t)0, SEEK_END);
984         if (es > ((off_t)0)) {
985                 return es;
986         } else {
987                 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
988         }
989
990         err("Could not find size of exported block device: %m");
991         return OFFT_MAX;
992 }
993
994 /**
995  * Get the file handle and offset, given an export offset.
996  *
997  * @param export An array of export files
998  * @param a The offset to get corresponding file/offset for
999  * @param fhandle [out] File descriptor
1000  * @param foffset [out] Offset into fhandle
1001  * @param maxbytes [out] Tells how many bytes can be read/written
1002  * from fhandle starting at foffset (0 if there is no limit)
1003  * @return 0 on success, -1 on failure
1004  **/
1005 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1006         /* Negative offset not allowed */
1007         if(a < 0)
1008                 return -1;
1009
1010         /* Binary search for last file with starting offset <= a */
1011         FILE_INFO fi;
1012         int start = 0;
1013         int end = export->len - 1;
1014         while( start <= end ) {
1015                 int mid = (start + end) / 2;
1016                 fi = g_array_index(export, FILE_INFO, mid);
1017                 if( fi.startoff < a ) {
1018                         start = mid + 1;
1019                 } else if( fi.startoff > a ) {
1020                         end = mid - 1;
1021                 } else {
1022                         start = end = mid;
1023                         break;
1024                 }
1025         }
1026
1027         /* end should never go negative, since first startoff is 0 and a >= 0 */
1028         g_assert(end >= 0);
1029
1030         fi = g_array_index(export, FILE_INFO, end);
1031         *fhandle = fi.fhandle;
1032         *foffset = a - fi.startoff;
1033         *maxbytes = 0;
1034         if( end+1 < export->len ) {
1035                 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1036                 *maxbytes = fi_next.startoff - a;
1037         }
1038
1039         return 0;
1040 }
1041
1042 /**
1043  * seek to a position in a file, with error handling.
1044  * @param handle a filedescriptor
1045  * @param a position to seek to
1046  * @todo get rid of this; lastpoint is a global variable right now, but it
1047  * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1048  * easier.
1049  **/
1050 void myseek(int handle,off_t a) {
1051         if (lseek(handle, a, SEEK_SET) < 0) {
1052                 err("Can not seek locally!\n");
1053         }
1054 }
1055
1056 /**
1057  * Write an amount of bytes at a given offset to the right file. This
1058  * abstracts the write-side of the multiple file option.
1059  *
1060  * @param a The offset where the write should start
1061  * @param buf The buffer to write from
1062  * @param len The length of buf
1063  * @param client The client we're serving for
1064  * @return The number of bytes actually written, or -1 in case of an error
1065  **/
1066 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1067         int fhandle;
1068         off_t foffset;
1069         size_t maxbytes;
1070         ssize_t retval;
1071
1072         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1073                 return -1;
1074         if(maxbytes && len > maxbytes)
1075                 len = maxbytes;
1076
1077         DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua);
1078
1079         myseek(fhandle, foffset);
1080         retval = write(fhandle, buf, len);
1081         if(client->server->flags & F_SYNC) {
1082                 fsync(fhandle);
1083         } else if (fua) {
1084 #ifdef USE_SYNC_FILE_RANGE
1085                 sync_file_range(fhandle, foffset, len,
1086                                 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1087                                 SYNC_FILE_RANGE_WAIT_AFTER);
1088 #else
1089                 fdatasync(fhandle);
1090 #endif
1091         }
1092         return retval;
1093 }
1094
1095 /**
1096  * Call rawexpwrite repeatedly until all data has been written.
1097  * @return 0 on success, nonzero on failure
1098  **/
1099 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1100         ssize_t ret=0;
1101
1102         while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1103                 a += ret;
1104                 buf += ret;
1105                 len -= ret;
1106         }
1107         return (ret < 0 || len != 0);
1108 }
1109
1110 /**
1111  * Read an amount of bytes at a given offset from the right file. This
1112  * abstracts the read-side of the multiple files option.
1113  *
1114  * @param a The offset where the read should start
1115  * @param buf A buffer to read into
1116  * @param len The size of buf
1117  * @param client The client we're serving for
1118  * @return The number of bytes actually read, or -1 in case of an
1119  * error.
1120  **/
1121 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1122         int fhandle;
1123         off_t foffset;
1124         size_t maxbytes;
1125
1126         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1127                 return -1;
1128         if(maxbytes && len > maxbytes)
1129                 len = maxbytes;
1130
1131         DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
1132
1133         myseek(fhandle, foffset);
1134         return read(fhandle, buf, len);
1135 }
1136
1137 /**
1138  * Call rawexpread repeatedly until all data has been read.
1139  * @return 0 on success, nonzero on failure
1140  **/
1141 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1142         ssize_t ret=0;
1143
1144         while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1145                 a += ret;
1146                 buf += ret;
1147                 len -= ret;
1148         }
1149         return (ret < 0 || len != 0);
1150 }
1151
1152 /**
1153  * Read an amount of bytes at a given offset from the right file. This
1154  * abstracts the read-side of the copyonwrite stuff, and calls
1155  * rawexpread() with the right parameters to do the actual work.
1156  * @param a The offset where the read should start
1157  * @param buf A buffer to read into
1158  * @param len The size of buf
1159  * @param client The client we're going to read for
1160  * @return 0 on success, nonzero on failure
1161  **/
1162 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1163         off_t rdlen, offset;
1164         off_t mapcnt, mapl, maph, pagestart;
1165
1166         if (!(client->server->flags & F_COPYONWRITE))
1167                 return(rawexpread_fully(a, buf, len, client));
1168         DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
1169
1170         mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1171
1172         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1173                 pagestart=mapcnt*DIFFPAGESIZE;
1174                 offset=a-pagestart;
1175                 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1176                         len : (size_t)DIFFPAGESIZE-offset;
1177                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1178                         DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1179                                (unsigned long)(client->difmap[mapcnt]));
1180                         myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1181                         if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1182                 } else { /* the block is not there */
1183                         DEBUG2("Page %llu is not here, we read the original one\n",
1184                                (unsigned long long)mapcnt);
1185                         if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1186                 }
1187                 len-=rdlen; a+=rdlen; buf+=rdlen;
1188         }
1189         return 0;
1190 }
1191
1192 /**
1193  * Write an amount of bytes at a given offset to the right file. This
1194  * abstracts the write-side of the copyonwrite option, and calls
1195  * rawexpwrite() with the right parameters to do the actual work.
1196  *
1197  * @param a The offset where the write should start
1198  * @param buf The buffer to write from
1199  * @param len The length of buf
1200  * @param client The client we're going to write for.
1201  * @return 0 on success, nonzero on failure
1202  **/
1203 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1204         char pagebuf[DIFFPAGESIZE];
1205         off_t mapcnt,mapl,maph;
1206         off_t wrlen,rdlen; 
1207         off_t pagestart;
1208         off_t offset;
1209
1210         if (!(client->server->flags & F_COPYONWRITE))
1211                 return(rawexpwrite_fully(a, buf, len, client, fua)); 
1212         DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
1213
1214         mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1215
1216         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1217                 pagestart=mapcnt*DIFFPAGESIZE ;
1218                 offset=a-pagestart ;
1219                 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1220                         len : (size_t)DIFFPAGESIZE-offset;
1221
1222                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1223                         DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1224                                (unsigned long)(client->difmap[mapcnt])) ;
1225                         myseek(client->difffile,
1226                                         client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1227                         if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1228                 } else { /* the block is not there */
1229                         myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1230                         client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1231                         DEBUG3("Page %llu is not here, we put it at %lu\n",
1232                                (unsigned long long)mapcnt,
1233                                (unsigned long)(client->difmap[mapcnt]));
1234                         rdlen=DIFFPAGESIZE ;
1235                         if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1236                                 return -1;
1237                         memcpy(pagebuf+offset,buf,wrlen) ;
1238                         if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1239                                         DIFFPAGESIZE)
1240                                 return -1;
1241                 }                                                   
1242                 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1243         }
1244         if (client->server->flags & F_SYNC) {
1245                 fsync(client->difffile);
1246         } else if (fua) {
1247                 /* open question: would it be cheaper to do multiple sync_file_ranges?
1248                    as we iterate through the above?
1249                  */
1250                 fdatasync(client->difffile);
1251         }
1252         return 0;
1253 }
1254
1255 int expflush(CLIENT *client) {
1256         int fhandle;
1257         off_t foffset;
1258         size_t maxbytes;
1259         gint i;
1260
1261         if (client->server->flags & F_COPYONWRITE) {
1262                 return fsync(client->difffile);
1263         }
1264         
1265         for (i = 0; i < client->export->len; i++) {
1266                 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1267                 if (fsync(fi.fhandle) < 0)
1268                         return -1;
1269         }
1270         
1271         return 0;
1272 }
1273
1274 /**
1275  * Do the initial negotiation.
1276  *
1277  * @param client The client we're negotiating with.
1278  **/
1279 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1280         char zeros[128];
1281         uint64_t size_host;
1282         uint32_t flags = NBD_FLAG_HAS_FLAGS;
1283         uint16_t smallflags = 0;
1284         uint64_t magic;
1285
1286         memset(zeros, '\0', sizeof(zeros));
1287         if(!client || !client->modern) {
1288                 /* common */
1289                 if (write(net, INIT_PASSWD, 8) < 0) {
1290                         err_nonfatal("Negotiation failed: %m");
1291                         if(client)
1292                                 exit(EXIT_FAILURE);
1293                 }
1294                 if(!client || client->modern) {
1295                         /* modern */
1296                         magic = htonll(opts_magic);
1297                 } else {
1298                         /* oldstyle */
1299                         magic = htonll(cliserv_magic);
1300                 }
1301                 if (write(net, &magic, sizeof(magic)) < 0) {
1302                         err_nonfatal("Negotiation failed: %m");
1303                         if(client)
1304                                 exit(EXIT_FAILURE);
1305                 }
1306         }
1307         if(!client) {
1308                 /* modern */
1309                 uint32_t reserved;
1310                 uint32_t opt;
1311                 uint32_t namelen;
1312                 char* name;
1313                 int i;
1314
1315                 if(!servers)
1316                         err("programmer error");
1317                 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1318                         err("Negotiation failed: %m");
1319                 if (read(net, &reserved, sizeof(reserved)) < 0)
1320                         err("Negotiation failed: %m");
1321                 if (read(net, &magic, sizeof(magic)) < 0)
1322                         err("Negotiation failed: %m");
1323                 magic = ntohll(magic);
1324                 if(magic != opts_magic) {
1325                         close(net);
1326                         return NULL;
1327                 }
1328                 if (read(net, &opt, sizeof(opt)) < 0)
1329                         err("Negotiation failed: %m");
1330                 opt = ntohl(opt);
1331                 if(opt != NBD_OPT_EXPORT_NAME) {
1332                         close(net);
1333                         return NULL;
1334                 }
1335                 if (read(net, &namelen, sizeof(namelen)) < 0)
1336                         err("Negotiation failed: %m");
1337                 namelen = ntohl(namelen);
1338                 name = malloc(namelen+1);
1339                 name[namelen]=0;
1340                 if (read(net, name, namelen) < 0)
1341                         err("Negotiation failed: %m");
1342                 for(i=0; i<servers->len; i++) {
1343                         SERVER* serve = &(g_array_index(servers, SERVER, i));
1344                         if(!strcmp(serve->servename, name)) {
1345                                 CLIENT* client = g_new0(CLIENT, 1);
1346                                 client->server = serve;
1347                                 client->exportsize = OFFT_MAX;
1348                                 client->net = net;
1349                                 client->modern = TRUE;
1350                                 free(name);
1351                                 return client;
1352                         }
1353                 }
1354                 free(name);
1355                 return NULL;
1356         }
1357         /* common */
1358         size_host = htonll((u64)(client->exportsize));
1359         if (write(net, &size_host, 8) < 0)
1360                 err("Negotiation failed: %m");
1361         if (client->server->flags & F_READONLY)
1362                 flags |= NBD_FLAG_READ_ONLY;
1363         if (client->server->flags & F_FLUSH)
1364                 flags |= NBD_FLAG_SEND_FLUSH;
1365         if (client->server->flags & F_FUA)
1366                 flags |= NBD_FLAG_SEND_FUA;
1367         if (client->server->flags & F_ROTATIONAL)
1368                 flags |= NBD_FLAG_ROTATIONAL;
1369         if (!client->modern) {
1370                 /* oldstyle */
1371                 flags = htonl(flags);
1372                 if (write(client->net, &flags, 4) < 0)
1373                         err("Negotiation failed: %m");
1374         } else {
1375                 /* modern */
1376                 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1377                 smallflags = htons(smallflags);
1378                 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1379                         err("Negotiation failed: %m");
1380                 }
1381         }
1382         /* common */
1383         if (write(client->net, zeros, 124) < 0)
1384                 err("Negotiation failed: %m");
1385         return NULL;
1386 }
1387
1388 /** sending macro. */
1389 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1390 /** error macro. */
1391 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1392 /**
1393  * Serve a file to a single client.
1394  *
1395  * @todo This beast needs to be split up in many tiny little manageable
1396  * pieces. Preferably with a chainsaw.
1397  *
1398  * @param client The client we're going to serve to.
1399  * @return when the client disconnects
1400  **/
1401 int mainloop(CLIENT *client) {
1402         struct nbd_request request;
1403         struct nbd_reply reply;
1404         gboolean go_on=TRUE;
1405 #ifdef DODBG
1406         int i = 0;
1407 #endif
1408         negotiate(client->net, client, NULL);
1409         DEBUG("Entering request loop!\n");
1410         reply.magic = htonl(NBD_REPLY_MAGIC);
1411         reply.error = 0;
1412         while (go_on) {
1413                 char buf[BUFSIZE];
1414                 char* p;
1415                 size_t len;
1416                 size_t currlen;
1417                 size_t writelen;
1418                 uint16_t command;
1419 #ifdef DODBG
1420                 i++;
1421                 printf("%d: ", i);
1422 #endif
1423                 readit(client->net, &request, sizeof(request));
1424                 request.from = ntohll(request.from);
1425                 request.type = ntohl(request.type);
1426                 command = request.type & NBD_CMD_MASK_COMMAND;
1427
1428                 if (command==NBD_CMD_DISC) {
1429                         msg2(LOG_INFO, "Disconnect request received.");
1430                         if (client->server->flags & F_COPYONWRITE) { 
1431                                 if (client->difmap) g_free(client->difmap) ;
1432                                 close(client->difffile);
1433                                 unlink(client->difffilename);
1434                                 free(client->difffilename);
1435                         }
1436                         go_on=FALSE;
1437                         continue;
1438                 }
1439
1440                 len = ntohl(request.len);
1441
1442                 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1443                         err("Not enough magic.");
1444                 if (len > BUFSIZE - sizeof(struct nbd_reply)) {
1445                         currlen = BUFSIZE - sizeof(struct nbd_reply);
1446                         msg2(LOG_INFO, "oversized request (this is not a problem)");
1447                 } else {
1448                         currlen = len;
1449                 }
1450 #ifdef DODBG
1451                 printf("%s from %llu (%llu) len %d, ", command ? "WRITE" :
1452                                 "READ", (unsigned long long)request.from,
1453                                 (unsigned long long)request.from / 512, len);
1454 #endif
1455                 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1456
1457                 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1458                         if ((request.from + len) > (OFFT_MAX)) {
1459                                 DEBUG("[Number too large!]");
1460                                 ERROR(client, reply, EINVAL);
1461                                 continue;
1462                         }
1463
1464                         if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1465                                 DEBUG("[RANGE!]");
1466                                 ERROR(client, reply, EINVAL);
1467                                 continue;
1468                         }
1469                 }
1470
1471                 if (command==NBD_CMD_WRITE) {
1472                         DEBUG("wr: net->buf, ");
1473                         while(len > 0) {
1474                                 readit(client->net, buf, currlen);
1475                                 DEBUG("buf->exp, ");
1476                                 if ((client->server->flags & F_READONLY) ||
1477                                     (client->server->flags & F_AUTOREADONLY)) {
1478                                         DEBUG("[WRITE to READONLY!]");
1479                                         ERROR(client, reply, EPERM);
1480                                         continue;
1481                                 }
1482                                 if (expwrite(request.from, buf, len, client,
1483                                              request.type & NBD_CMD_FLAG_FUA)) {
1484                                         DEBUG("Write failed: %m" );
1485                                         ERROR(client, reply, errno);
1486                                         continue;
1487                                 }
1488                                 SEND(client->net, reply);
1489                                 DEBUG("OK!\n");
1490                                 len -= currlen;
1491                                 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1492                         }
1493                         continue;
1494                 }
1495
1496                 if (command==NBD_CMD_FLUSH) {
1497                         DEBUG("fl: ");
1498                         if (expflush(client)) {
1499                                 DEBUG("Flush failed: %m");
1500                                 ERROR(client, reply, errno);
1501                                 continue;
1502                         }
1503                         SEND(client->net, reply);
1504                         DEBUG("OK!\n");
1505                         continue;
1506                 }
1507
1508                 if (command==NBD_CMD_READ) {
1509                         DEBUG("exp->buf, ");
1510                         memcpy(buf, &reply, sizeof(struct nbd_reply));
1511                         p = buf + sizeof(struct nbd_reply);
1512                         writelen = currlen + sizeof(struct nbd_reply);
1513                         while(len > 0) {
1514                                 if (expread(request.from, p, currlen, client)) {
1515                                         DEBUG("Read failed: %m");
1516                                         ERROR(client, reply, errno);
1517                                         continue;
1518                                 }
1519                                 
1520                                 DEBUG("buf->net, ");
1521                                 writeit(client->net, buf, writelen);
1522                                 len -= currlen;
1523                                 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1524                                 p = buf;
1525                                 writelen = currlen;
1526                         }
1527                         DEBUG("OK!\n");
1528                         continue;
1529                 }
1530
1531                 DEBUG ("Ignoring unknown command\n");
1532         }
1533         return 0;
1534 }
1535
1536 /**
1537  * Set up client export array, which is an array of FILE_INFO.
1538  * Also, split a single exportfile into multiple ones, if that was asked.
1539  * @param client information on the client which we want to setup export for
1540  **/
1541 void setupexport(CLIENT* client) {
1542         int i;
1543         off_t laststartoff = 0, lastsize = 0;
1544         int multifile = (client->server->flags & F_MULTIFILE);
1545
1546         client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1547
1548         /* If multi-file, open as many files as we can.
1549          * If not, open exactly one file.
1550          * Calculate file sizes as we go to get total size. */
1551         for(i=0; ; i++) {
1552                 FILE_INFO fi;
1553                 gchar *tmpname;
1554                 gchar* error_string;
1555                 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1556
1557                 if(multifile) {
1558                         tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1559                 } else {
1560                         tmpname=g_strdup(client->exportname);
1561                 }
1562                 DEBUG2( "Opening %s\n", tmpname );
1563                 fi.fhandle = open(tmpname, mode);
1564                 if(fi.fhandle == -1 && mode == O_RDWR) {
1565                         /* Try again because maybe media was read-only */
1566                         fi.fhandle = open(tmpname, O_RDONLY);
1567                         if(fi.fhandle != -1) {
1568                                 /* Opening the base file in copyonwrite mode is
1569                                  * okay */
1570                                 if(!(client->server->flags & F_COPYONWRITE)) {
1571                                         client->server->flags |= F_AUTOREADONLY;
1572                                         client->server->flags |= F_READONLY;
1573                                 }
1574                         }
1575                 }
1576                 if(fi.fhandle == -1) {
1577                         if(multifile && i>0)
1578                                 break;
1579                         error_string=g_strdup_printf(
1580                                 "Could not open exported file %s: %%m",
1581                                 tmpname);
1582                         err(error_string);
1583                 }
1584                 fi.startoff = laststartoff + lastsize;
1585                 g_array_append_val(client->export, fi);
1586                 g_free(tmpname);
1587
1588                 /* Starting offset and size of this file will be used to
1589                  * calculate starting offset of next file */
1590                 laststartoff = fi.startoff;
1591                 lastsize = size_autodetect(fi.fhandle);
1592
1593                 if(!multifile)
1594                         break;
1595         }
1596
1597         /* Set export size to total calculated size */
1598         client->exportsize = laststartoff + lastsize;
1599
1600         /* Export size may be overridden */
1601         if(client->server->expected_size) {
1602                 /* desired size must be <= total calculated size */
1603                 if(client->server->expected_size > client->exportsize) {
1604                         err("Size of exported file is too big\n");
1605                 }
1606
1607                 client->exportsize = client->server->expected_size;
1608         }
1609
1610         msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1611         if(multifile) {
1612                 msg3(LOG_INFO, "Total number of files: %d", i);
1613         }
1614 }
1615
1616 int copyonwrite_prepare(CLIENT* client) {
1617         off_t i;
1618         if ((client->difffilename = malloc(1024))==NULL)
1619                 err("Failed to allocate string for diff file name");
1620         snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1621                 (int)getpid()) ;
1622         client->difffilename[1023]='\0';
1623         msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1624         client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1625         if (client->difffile<0) err("Could not create diff file (%m)") ;
1626         if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1627                 err("Could not allocate memory") ;
1628         for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1629
1630         return 0;
1631 }
1632
1633 /**
1634  * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1635  * options
1636  *
1637  * @param command the command to be ran. Read from the config file
1638  * @param file the file name we're about to export
1639  **/
1640 int do_run(gchar* command, gchar* file) {
1641         gchar* cmd;
1642         int retval=0;
1643
1644         if(command && *command) {
1645                 cmd = g_strdup_printf(command, file);
1646                 retval=system(cmd);
1647                 g_free(cmd);
1648         }
1649         return retval;
1650 }
1651
1652 /**
1653  * Serve a connection. 
1654  *
1655  * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1656  * follow the road map.
1657  *
1658  * @param client a connected client
1659  **/
1660 void serveconnection(CLIENT *client) {
1661         if(do_run(client->server->prerun, client->exportname)) {
1662                 exit(EXIT_FAILURE);
1663         }
1664         setupexport(client);
1665
1666         if (client->server->flags & F_COPYONWRITE) {
1667                 copyonwrite_prepare(client);
1668         }
1669
1670         setmysockopt(client->net);
1671
1672         mainloop(client);
1673         do_run(client->server->postrun, client->exportname);
1674 }
1675
1676 /**
1677  * Find the name of the file we have to serve. This will use g_strdup_printf
1678  * to put the IP address of the client inside a filename containing
1679  * "%s" (in the form as specified by the "virtstyle" option). That name
1680  * is then written to client->exportname.
1681  *
1682  * @param net A socket connected to an nbd client
1683  * @param client information about the client. The IP address in human-readable
1684  * format will be written to a new char* buffer, the address of which will be
1685  * stored in client->clientname.
1686  **/
1687 void set_peername(int net, CLIENT *client) {
1688         struct sockaddr_storage addrin;
1689         struct sockaddr_storage netaddr;
1690         struct sockaddr_in  *netaddr4 = NULL;
1691         struct sockaddr_in6 *netaddr6 = NULL;
1692         size_t addrinlen = sizeof( addrin );
1693         struct addrinfo hints;
1694         struct addrinfo *ai = NULL;
1695         char peername[NI_MAXHOST];
1696         char netname[NI_MAXHOST];
1697         char *tmp = NULL;
1698         int i;
1699         int e;
1700         int shift;
1701
1702         if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1703                 err("getsockname failed: %m");
1704
1705         getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1706                 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1707
1708         memset(&hints, '\0', sizeof (hints));
1709         hints.ai_flags = AI_ADDRCONFIG;
1710         e = getaddrinfo(peername, NULL, &hints, &ai);
1711
1712         if(e != 0) {
1713                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1714                 freeaddrinfo(ai);
1715                 return;
1716         }
1717
1718         switch(client->server->virtstyle) {
1719                 case VIRT_NONE:
1720                         client->exportname=g_strdup(client->server->exportname);
1721                         break;
1722                 case VIRT_IPHASH:
1723                         for(i=0;i<strlen(peername);i++) {
1724                                 if(peername[i]=='.') {
1725                                         peername[i]='/';
1726                                 }
1727                         }
1728                 case VIRT_IPLIT:
1729                         client->exportname=g_strdup_printf(client->server->exportname, peername);
1730                         break;
1731                 case VIRT_CIDR:
1732                         memcpy(&netaddr, &addrin, addrinlen);
1733                         if(ai->ai_family == AF_INET) {
1734                                 netaddr4 = (struct sockaddr_in *)&netaddr;
1735                                 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1736                                 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1737
1738                                 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1739                                                         netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1740                                 tmp=g_strdup_printf("%s/%s", netname, peername);
1741                         }else if(ai->ai_family == AF_INET6) {
1742                                 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1743
1744                                 shift = 128-(client->server->cidrlen);
1745                                 i = 3;
1746                                 while(shift >= 32) {
1747                                         ((netaddr6->sin6_addr).s6_addr32[i])=0;
1748                                         shift-=32;
1749                                         i--;
1750                                 }
1751                                 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1752                                 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1753
1754                                 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1755                                             netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1756                                 tmp=g_strdup_printf("%s/%s", netname, peername);
1757                         }
1758
1759                         if(tmp != NULL)
1760                           client->exportname=g_strdup_printf(client->server->exportname, tmp);
1761
1762                         break;
1763         }
1764
1765         freeaddrinfo(ai);
1766         msg4(LOG_INFO, "connect from %s, assigned file is %s", 
1767              peername, client->exportname);
1768         client->clientname=g_strdup(peername);
1769 }
1770
1771 /**
1772  * Destroy a pid_t*
1773  * @param data a pointer to pid_t which should be freed
1774  **/
1775 void destroy_pid_t(gpointer data) {
1776         g_free(data);
1777 }
1778
1779 /**
1780  * Loop through the available servers, and serve them. Never returns.
1781  **/
1782 int serveloop(GArray* servers) {
1783         struct sockaddr_storage addrin;
1784         socklen_t addrinlen=sizeof(addrin);
1785         int i;
1786         int max;
1787         int sock;
1788         fd_set mset;
1789         fd_set rset;
1790
1791         /* 
1792          * Set up the master fd_set. The set of descriptors we need
1793          * to select() for never changes anyway and it buys us a *lot*
1794          * of time to only build this once. However, if we ever choose
1795          * to not fork() for clients anymore, we may have to revisit
1796          * this.
1797          */
1798         max=0;
1799         FD_ZERO(&mset);
1800         for(i=0;i<servers->len;i++) {
1801                 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1802                         FD_SET(sock, &mset);
1803                         max=sock>max?sock:max;
1804                 }
1805         }
1806         if(modernsock) {
1807                 FD_SET(modernsock, &mset);
1808                 max=modernsock>max?modernsock:max;
1809         }
1810         for(;;) {
1811                 CLIENT *client = NULL;
1812                 pid_t *pid;
1813
1814                 memcpy(&rset, &mset, sizeof(fd_set));
1815                 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1816                         int net = 0;
1817                         SERVER* serve=NULL;
1818
1819                         DEBUG("accept, ");
1820                         if(FD_ISSET(modernsock, &rset)) {
1821                                 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1822                                         err("accept: %m");
1823                                 client = negotiate(net, NULL, servers);
1824                                 if(!client) {
1825                                         err_nonfatal("negotiation failed");
1826                                         close(net);
1827                                         net=0;
1828                                         continue;
1829                                 }
1830                                 serve = client->server;
1831                         }
1832                         for(i=0;i<servers->len && !net;i++) {
1833                                 serve=&(g_array_index(servers, SERVER, i));
1834                                 if(FD_ISSET(serve->socket, &rset)) {
1835                                         if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1836                                                 err("accept: %m");
1837                                 }
1838                         }
1839                         if(net) {
1840                                 int sock_flags;
1841
1842                                 if(serve->max_connections > 0 &&
1843                                    g_hash_table_size(children) >= serve->max_connections) {
1844                                         msg2(LOG_INFO, "Max connections reached");
1845                                         close(net);
1846                                         continue;
1847                                 }
1848                                 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1849                                         err("fcntl F_GETFL");
1850                                 }
1851                                 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1852                                         err("fcntl F_SETFL ~O_NONBLOCK");
1853                                 }
1854                                 if(!client) {
1855                                         client = g_new0(CLIENT, 1);
1856                                         client->server=serve;
1857                                         client->exportsize=OFFT_MAX;
1858                                         client->net=net;
1859                                 }
1860                                 set_peername(net, client);
1861                                 if (!authorized_client(client)) {
1862                                         msg2(LOG_INFO,"Unauthorized client") ;
1863                                         close(net);
1864                                         continue;
1865                                 }
1866                                 msg2(LOG_INFO,"Authorized client") ;
1867                                 pid=g_malloc(sizeof(pid_t));
1868
1869                                 if (!dontfork) {
1870                                         if ((*pid=fork())<0) {
1871                                                 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1872                                                 close(net);
1873                                                 continue;
1874                                         }
1875                                         if (*pid>0) { /* parent */
1876                                                 close(net);
1877                                                 g_hash_table_insert(children, pid, pid);
1878                                                 continue;
1879                                         }
1880                                         /* child */
1881                                         g_hash_table_destroy(children);
1882                                         for(i=0;i<servers->len;i++) {
1883                                                 serve=&g_array_index(servers, SERVER, i);
1884                                                 close(serve->socket);
1885                                         }
1886                                         /* FALSE does not free the
1887                                            actual data. This is required,
1888                                            because the client has a
1889                                            direct reference into that
1890                                            data, and otherwise we get a
1891                                            segfault... */
1892                                         g_array_free(servers, FALSE);
1893                                 }
1894
1895                                 msg2(LOG_INFO,"Starting to serve");
1896                                 serveconnection(client);
1897                                 exit(EXIT_SUCCESS);
1898                         }
1899                 }
1900         }
1901 }
1902
1903 void dosockopts(int socket) {
1904 #ifndef sun
1905         int yes=1;
1906 #else
1907         char yes='1';
1908 #endif /* sun */
1909         int sock_flags;
1910
1911         /* lose the pesky "Address already in use" error message */
1912         if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1913                 err("setsockopt SO_REUSEADDR");
1914         }
1915         if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1916                 err("setsockopt SO_KEEPALIVE");
1917         }
1918
1919         /* make the listening socket non-blocking */
1920         if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
1921                 err("fcntl F_GETFL");
1922         }
1923         if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1924                 err("fcntl F_SETFL O_NONBLOCK");
1925         }
1926 }
1927
1928 /**
1929  * Connect a server's socket.
1930  *
1931  * @param serve the server we want to connect.
1932  **/
1933 int setup_serve(SERVER *serve) {
1934         struct addrinfo hints;
1935         struct addrinfo *ai = NULL;
1936         gchar *port = NULL;
1937         int e;
1938
1939         if(!do_oldstyle) {
1940                 return serve->servename ? 1 : 0;
1941         }
1942         memset(&hints,'\0',sizeof(hints));
1943         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
1944         hints.ai_socktype = SOCK_STREAM;
1945         hints.ai_family = serve->socket_family;
1946
1947         port = g_strdup_printf ("%d", serve->port);
1948         if (port == NULL)
1949                 return 0;
1950
1951         e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
1952
1953         g_free(port);
1954
1955         if(e != 0) {
1956                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1957                 serve->socket = -1;
1958                 freeaddrinfo(ai);
1959                 exit(EXIT_FAILURE);
1960         }
1961
1962         if(serve->socket_family == AF_UNSPEC)
1963                 serve->socket_family = ai->ai_family;
1964
1965 #ifdef WITH_SDP
1966         if ((serve->flags) && F_SDP) {
1967                 if (ai->ai_family == AF_INET)
1968                         ai->ai_family = AF_INET_SDP;
1969                 else (ai->ai_family == AF_INET6)
1970                         ai->ai_family = AF_INET6_SDP;
1971         }
1972 #endif
1973         if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
1974                 err("socket: %m");
1975
1976         dosockopts(serve->socket);
1977
1978         DEBUG("Waiting for connections... bind, ");
1979         e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
1980         if (e != 0 && errno != EADDRINUSE)
1981                 err("bind: %m");
1982         DEBUG("listen, ");
1983         if (listen(serve->socket, 1) < 0)
1984                 err("listen: %m");
1985
1986         freeaddrinfo (ai);
1987         if(serve->servename) {
1988                 return 1;
1989         } else {
1990                 return 0;
1991         }
1992 }
1993
1994 void open_modern(void) {
1995         struct addrinfo hints;
1996         struct addrinfo* ai = NULL;
1997         struct sock_flags;
1998         int e;
1999
2000         memset(&hints, '\0', sizeof(hints));
2001         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2002         hints.ai_socktype = SOCK_STREAM;
2003         hints.ai_family = AF_UNSPEC;
2004         hints.ai_protocol = IPPROTO_TCP;
2005         e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
2006         if(e != 0) {
2007                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2008                 exit(EXIT_FAILURE);
2009         }
2010         if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2011                 err("socket: %m");
2012         }
2013
2014         dosockopts(modernsock);
2015
2016         if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2017                 err("bind: %m");
2018         }
2019         if(listen(modernsock, 10) <0) {
2020                 err("listen: %m");
2021         }
2022
2023         freeaddrinfo(ai);
2024 }
2025
2026 /**
2027  * Connect our servers.
2028  **/
2029 void setup_servers(GArray* servers) {
2030         int i;
2031         struct sigaction sa;
2032         int want_modern=0;
2033
2034         for(i=0;i<servers->len;i++) {
2035                 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2036         }
2037         if(want_modern) {
2038                 open_modern();
2039         }
2040         children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2041
2042         sa.sa_handler = sigchld_handler;
2043         sigemptyset(&sa.sa_mask);
2044         sa.sa_flags = SA_RESTART;
2045         if(sigaction(SIGCHLD, &sa, NULL) == -1)
2046                 err("sigaction: %m");
2047         sa.sa_handler = sigterm_handler;
2048         sigemptyset(&sa.sa_mask);
2049         sa.sa_flags = SA_RESTART;
2050         if(sigaction(SIGTERM, &sa, NULL) == -1)
2051                 err("sigaction: %m");
2052 }
2053
2054 /**
2055  * Go daemon (unless we specified at compile time that we didn't want this)
2056  * @param serve the first server of our configuration. If its port is zero,
2057  *      then do not daemonize, because we're doing inetd then. This parameter
2058  *      is only used to create a PID file of the form
2059  *      /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
2060  **/
2061 #if !defined(NODAEMON)
2062 void daemonize(SERVER* serve) {
2063         FILE*pidf;
2064
2065         if(serve && !(serve->port)) {
2066                 return;
2067         }
2068         if(daemon(0,0)<0) {
2069                 err("daemon");
2070         }
2071         if(!*pidftemplate) {
2072                 if(serve) {
2073                         strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2074                 } else {
2075                         strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2076                 }
2077         }
2078         snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2079         pidf=fopen(pidfname, "w");
2080         if(pidf) {
2081                 fprintf(pidf,"%d\n", (int)getpid());
2082                 fclose(pidf);
2083         } else {
2084                 perror("fopen");
2085                 fprintf(stderr, "Not fatal; continuing");
2086         }
2087 }
2088 #else
2089 #define daemonize(serve)
2090 #endif /* !defined(NODAEMON) */
2091
2092 /*
2093  * Everything beyond this point (in the file) is run in non-daemon mode.
2094  * The stuff above daemonize() isn't.
2095  */
2096
2097 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2098
2099 void serve_err(SERVER* serve, const char* msg) {
2100         g_message("Export of %s on port %d failed:", serve->exportname,
2101                         serve->port);
2102         err(msg);
2103 }
2104
2105 /**
2106  * Set up user-ID and/or group-ID
2107  **/
2108 void dousers(void) {
2109         struct passwd *pw;
2110         struct group *gr;
2111         gchar* str;
2112         if(rungroup) {
2113                 gr=getgrnam(rungroup);
2114                 if(!gr) {
2115                         str = g_strdup_printf("Invalid group name: %s", rungroup);
2116                         err(str);
2117                 }
2118                 if(setgid(gr->gr_gid)<0) {
2119                         err("Could not set GID: %m"); 
2120                 }
2121         }
2122         if(runuser) {
2123                 pw=getpwnam(runuser);
2124                 if(!pw) {
2125                         str = g_strdup_printf("Invalid user name: %s", runuser);
2126                         err(str);
2127                 }
2128                 if(setuid(pw->pw_uid)<0) {
2129                         err("Could not set UID: %m");
2130                 }
2131         }
2132 }
2133
2134 #ifndef ISSERVER
2135 void glib_message_syslog_redirect(const gchar *log_domain,
2136                                   GLogLevelFlags log_level,
2137                                   const gchar *message,
2138                                   gpointer user_data)
2139 {
2140     int level=LOG_DEBUG;
2141     
2142     switch( log_level )
2143     {
2144       case G_LOG_FLAG_FATAL:
2145       case G_LOG_LEVEL_CRITICAL:
2146       case G_LOG_LEVEL_ERROR:    
2147         level=LOG_ERR; 
2148         break;
2149       case G_LOG_LEVEL_WARNING:
2150         level=LOG_WARNING;
2151         break;
2152       case G_LOG_LEVEL_MESSAGE:
2153       case G_LOG_LEVEL_INFO:
2154         level=LOG_INFO;
2155         break;
2156       case G_LOG_LEVEL_DEBUG:
2157         level=LOG_DEBUG;
2158       default:
2159         level=LOG_ERR;
2160     }
2161     syslog(level, "%s", message);
2162 }
2163 #endif
2164
2165 /**
2166  * Main entry point...
2167  **/
2168 int main(int argc, char *argv[]) {
2169         SERVER *serve;
2170         GArray *servers;
2171         GError *err=NULL;
2172
2173         if (sizeof( struct nbd_request )!=28) {
2174                 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2175                 exit(EXIT_FAILURE) ;
2176         }
2177
2178         memset(pidftemplate, '\0', 256);
2179
2180         logging();
2181         config_file_pos = g_strdup(CFILE);
2182         serve=cmdline(argc, argv);
2183         servers = parse_cfile(config_file_pos, &err);
2184         
2185         if(serve) {
2186                 serve->socket_family = AF_UNSPEC;
2187
2188                 append_serve(serve, servers);
2189      
2190                 if (!(serve->port)) {
2191                         CLIENT *client;
2192 #ifndef ISSERVER
2193                         /* You really should define ISSERVER if you're going to use
2194                          * inetd mode, but if you don't, closing stdout and stderr
2195                          * (which inetd had connected to the client socket) will let it
2196                          * work. */
2197                         close(1);
2198                         close(2);
2199                         open("/dev/null", O_WRONLY);
2200                         open("/dev/null", O_WRONLY);
2201                         g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2202 #endif
2203                         client=g_malloc(sizeof(CLIENT));
2204                         client->server=serve;
2205                         client->net=0;
2206                         client->exportsize=OFFT_MAX;
2207                         set_peername(0,client);
2208                         serveconnection(client);
2209                         return 0;
2210                 }
2211         }
2212     
2213         if(!servers || !servers->len) {
2214                 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2215                                 && err->code == CFILE_NOTFOUND)) {
2216                         g_warning("Could not parse config file: %s", 
2217                                         err ? err->message : "Unknown error");
2218                 }
2219         }
2220         if(serve) {
2221                 g_warning("Specifying an export on the command line is deprecated.");
2222                 g_warning("Please use a configuration file instead.");
2223         }
2224
2225         if((!serve) && (!servers||!servers->len)) {
2226                 g_message("No configured exports; quitting.");
2227                 exit(EXIT_FAILURE);
2228         }
2229         if (!dontfork)
2230                 daemonize(serve);
2231         setup_servers(servers);
2232         dousers();
2233         serveloop(servers);
2234         return 0 ;
2235 }