ea446dbb560ed78005f37120b249e61a4c999153
[nbd.git] / nbd-server.c
1 /*
2  * Network Block Device - server
3  *
4  * Copyright 1996-1998 Pavel Machek, distribute under GPL
5  *  <pavel@atrey.karlin.mff.cuni.cz>
6  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8  *
9  * Version 1.0 - hopefully 64-bit-clean
10  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13  *      type, or don't have 64 bit file offsets by defining FS_32BIT
14  *      in compile options for nbd-server *only*. This can be done
15  *      with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16  *      original autoconf input file, or I would make it a configure
17  *      option.) Ken Yap <ken@nlc.net.au>.
18  * Version 1.6 - fix autodetection of block device size and really make 64 bit
19  *      clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20  * Version 2.0 - Version synchronised with client
21  * Version 2.1 - Reap zombie client processes when they exit. Removed
22  *      (uncommented) the _IO magic, it's no longer necessary. Wouter
23  *      Verhelst <wouter@debian.org>
24  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25  * Version 2.3 - Fixed code so that Large File Support works. This
26  *      removes the FS_32BIT compile-time directive; define
27  *      _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28  *      using FS_32BIT. This will allow you to use files >2GB instead of
29  *      having to use the -m option. Wouter Verhelst <wouter@debian.org>
30  * Version 2.4 - Added code to keep track of children, so that we can
31  *      properly kill them from initscripts. Add a call to daemon(),
32  *      so that processes don't think they have to wait for us, which is
33  *      interesting for initscripts as well. Wouter Verhelst
34  *      <wouter@debian.org>
35  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36  *      zero after fork()ing, resulting in nbd-server going berserk
37  *      when it receives a signal with at least one child open. Wouter
38  *      Verhelst <wouter@debian.org>
39  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40  *      rectified type of mainloop::size_host (sf.net bugs 814435 and
41  *      817385); close the PID file after writing to it, so that the
42  *      daemon can actually be found. Wouter Verhelst
43  *      <wouter@debian.org>
44  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45  *      correctly put in network endianness. Many types were corrected
46  *      (size_t and off_t instead of int).  <vspaceg@sourceforge.net>
47  * Version 2.6 - Some code cleanup.
48  * Version 2.7 - Better build system.
49  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 
50  *      lot more work, but this is a start. Wouter Verhelst
51  *      <wouter@debian.org>
52  * 16/03/2010 - Add IPv6 support.
53  *      Kitt Tientanopajai <kitt@kitty.in.th>
54  *      Neutron Soutmun <neo.neutron@gmail.com>
55  *      Suriya Soutmun <darksolar@gmail.com>
56  */
57
58 /* Includes LFS defines, which defines behaviours of some of the following
59  * headers, so must come before those */
60 #include "lfs.h"
61
62 #include <sys/types.h>
63 #include <sys/socket.h>
64 #include <sys/stat.h>
65 #include <sys/select.h>         /* select */
66 #include <sys/wait.h>           /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
69 #endif
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h>          /* For BLKGETSIZE */
73 #endif
74 #include <signal.h>             /* sigaction */
75 #include <errno.h>
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
78 #include <netdb.h>
79 #include <syslog.h>
80 #include <unistd.h>
81 #include <stdio.h>
82 #include <stdlib.h>
83 #include <string.h>
84 #include <fcntl.h>
85 #include <arpa/inet.h>
86 #include <strings.h>
87 #include <dirent.h>
88 #include <unistd.h>
89 #include <getopt.h>
90 #include <pwd.h>
91 #include <grp.h>
92
93 #include <glib.h>
94
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
97 #include "cliserv.h"
98
99 /** Default position of the config file */
100 #ifndef SYSCONFDIR
101 #define SYSCONFDIR "/etc"
102 #endif
103 #define CFILE SYSCONFDIR "/nbd-server/config"
104
105 /** Where our config file actually is */
106 gchar* config_file_pos;
107
108 /** What user we're running as */
109 gchar* runuser=NULL;
110 /** What group we're running as */
111 gchar* rungroup=NULL;
112 /** whether to export using the old negotiation protocol (port-based) */
113 gboolean do_oldstyle=FALSE;
114
115 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
116 #ifdef ISSERVER
117 #define msg2(a,b) syslog(a,b)
118 #define msg3(a,b,c) syslog(a,b,c)
119 #define msg4(a,b,c,d) syslog(a,b,c,d)
120 #else
121 #define msg2(a,b) g_message(b)
122 #define msg3(a,b,c) g_message(b,c)
123 #define msg4(a,b,c,d) g_message(b,c,d)
124 #endif
125
126 /* Debugging macros */
127 //#define DODBG
128 #ifdef DODBG
129 #define DEBUG( a ) printf( a )
130 #define DEBUG2( a,b ) printf( a,b )
131 #define DEBUG3( a,b,c ) printf( a,b,c )
132 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
133 #else
134 #define DEBUG( a )
135 #define DEBUG2( a,b ) 
136 #define DEBUG3( a,b,c ) 
137 #define DEBUG4( a,b,c,d ) 
138 #endif
139 #ifndef PACKAGE_VERSION
140 #define PACKAGE_VERSION ""
141 #endif
142 /**
143  * The highest value a variable of type off_t can reach. This is a signed
144  * integer, so set all bits except for the leftmost one.
145  **/
146 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
147 #define LINELEN 256       /**< Size of static buffer used to read the
148                                authorization file (yuck) */
149 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
150 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
151 #define F_READONLY 1      /**< flag to tell us a file is readonly */
152 #define F_MULTIFILE 2     /**< flag to tell us a file is exported using -m */
153 #define F_COPYONWRITE 4   /**< flag to tell us a file is exported using
154                             copyonwrite */
155 #define F_AUTOREADONLY 8  /**< flag to tell us a file is set to autoreadonly */
156 #define F_SPARSE 16       /**< flag to tell us copyronwrite should use a sparse file */
157 #define F_SDP 32          /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
158 #define F_SYNC 64         /**< Whether to fsync() after a write */
159 GHashTable *children;
160 char pidfname[256]; /**< name of our PID file */
161 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
162 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
163
164 int modernsock=0;         /**< Socket for the modern handler. Not used
165                                if a client was only specified on the
166                                command line; only port used if
167                                oldstyle is set to false (and then the
168                                command-line client isn't used, gna gna) */
169 char* modern_listen;      /**< listenaddr value for modernsock */
170
171 /**
172  * Types of virtuatlization
173  **/
174 typedef enum {
175         VIRT_NONE=0,    /**< No virtualization */
176         VIRT_IPLIT,     /**< Literal IP address as part of the filename */
177         VIRT_IPHASH,    /**< Replacing all dots in an ip address by a / before
178                              doing the same as in IPLIT */
179         VIRT_CIDR,      /**< Every subnet in its own directory */
180 } VIRT_STYLE;
181
182 /**
183  * Variables associated with a server.
184  **/
185 typedef struct {
186         gchar* exportname;    /**< (unprocessed) filename of the file we're exporting */
187         off_t expected_size; /**< size of the exported file as it was told to
188                                us through configuration */
189         gchar* listenaddr;   /**< The IP address we're listening on */
190         unsigned int port;   /**< port we're exporting this file at */
191         char* authname;      /**< filename of the authorization file */
192         int flags;           /**< flags associated with this exported file */
193         int socket;          /**< The socket of this server. */
194         int socket_family;   /**< family of the socket */
195         VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
196         uint8_t cidrlen;     /**< The length of the mask when we use
197                                   CIDR-style virtualization */
198         gchar* prerun;       /**< command to be ran after connecting a client,
199                                   but before starting to serve */
200         gchar* postrun;      /**< command that will be ran after the client
201                                   disconnects */
202         gchar* servename;    /**< name of the export as selected by nbd-client */
203 } SERVER;
204
205 /**
206  * Variables associated with a client socket.
207  **/
208 typedef struct {
209         int fhandle;      /**< file descriptor */
210         off_t startoff;   /**< starting offset of this file */
211 } FILE_INFO;
212
213 typedef struct {
214         off_t exportsize;    /**< size of the file we're exporting */
215         char *clientname;    /**< peer */
216         char *exportname;    /**< (processed) filename of the file we're exporting */
217         GArray *export;    /**< array of FILE_INFO of exported files;
218                                array size is always 1 unless we're
219                                doing the multiple file option */
220         int net;             /**< The actual client socket */
221         SERVER *server;      /**< The server this client is getting data from */
222         char* difffilename;  /**< filename of the copy-on-write file, if any */
223         int difffile;        /**< filedescriptor of copyonwrite file. @todo
224                                shouldn't this be an array too? (cfr export) Or
225                                make -m and -c mutually exclusive */
226         u32 difffilelen;     /**< number of pages in difffile */
227         u32 *difmap;         /**< see comment on the global difmap for this one */
228         gboolean modern;     /**< client was negotiated using modern negotiation protocol */
229 } CLIENT;
230
231 /**
232  * Type of configuration file values
233  **/
234 typedef enum {
235         PARAM_INT,              /**< This parameter is an integer */
236         PARAM_STRING,           /**< This parameter is a string */
237         PARAM_BOOL,             /**< This parameter is a boolean */
238 } PARAM_TYPE;
239
240 /**
241  * Configuration file values
242  **/
243 typedef struct {
244         gchar *paramname;       /**< Name of the parameter, as it appears in
245                                   the config file */
246         gboolean required;      /**< Whether this is a required (as opposed to
247                                   optional) parameter */
248         PARAM_TYPE ptype;       /**< Type of the parameter. */
249         gpointer target;        /**< Pointer to where the data of this
250                                   parameter should be written. If ptype is
251                                   PARAM_BOOL, the data is or'ed rather than
252                                   overwritten. */
253         gint flagval;           /**< Flag mask for this parameter in case ptype
254                                   is PARAM_BOOL. */
255 } PARAM;
256
257 /**
258  * Check whether a client is allowed to connect. Works with an authorization
259  * file which contains one line per machine, no wildcards.
260  *
261  * @param opts The client who's trying to connect.
262  * @return 0 - authorization refused, 1 - OK
263  **/
264 int authorized_client(CLIENT *opts) {
265         const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
266         FILE *f ;
267         char line[LINELEN]; 
268         char *tmp;
269         struct in_addr addr;
270         struct in_addr client;
271         struct in_addr cltemp;
272         int len;
273
274         if ((f=fopen(opts->server->authname,"r"))==NULL) {
275                 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
276                      opts->server->authname,strerror(errno)) ;
277                 return 1 ; 
278         }
279   
280         inet_aton(opts->clientname, &client);
281         while (fgets(line,LINELEN,f)!=NULL) {
282                 if((tmp=index(line, '/'))) {
283                         if(strlen(line)<=tmp-line) {
284                                 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
285                                 return 0;
286                         }
287                         *(tmp++)=0;
288                         if(!inet_aton(line,&addr)) {
289                                 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
290                                 return 0;
291                         }
292                         len=strtol(tmp, NULL, 0);
293                         addr.s_addr>>=32-len;
294                         addr.s_addr<<=32-len;
295                         memcpy(&cltemp,&client,sizeof(client));
296                         cltemp.s_addr>>=32-len;
297                         cltemp.s_addr<<=32-len;
298                         if(addr.s_addr == cltemp.s_addr) {
299                                 return 1;
300                         }
301                 }
302                 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
303                         fclose(f);
304                         return 1;
305                 }
306         }
307         fclose(f);
308         return 0;
309 }
310
311 /**
312  * Read data from a file descriptor into a buffer
313  *
314  * @param f a file descriptor
315  * @param buf a buffer
316  * @param len the number of bytes to be read
317  **/
318 inline void readit(int f, void *buf, size_t len) {
319         ssize_t res;
320         while (len > 0) {
321                 DEBUG("*");
322                 if ((res = read(f, buf, len)) <= 0) {
323                         if(errno != EAGAIN) {
324                                 err("Read failed: %m");
325                         }
326                 } else {
327                         len -= res;
328                         buf += res;
329                 }
330         }
331 }
332
333 /**
334  * Write data from a buffer into a filedescriptor
335  *
336  * @param f a file descriptor
337  * @param buf a buffer containing data
338  * @param len the number of bytes to be written
339  **/
340 inline void writeit(int f, void *buf, size_t len) {
341         ssize_t res;
342         while (len > 0) {
343                 DEBUG("+");
344                 if ((res = write(f, buf, len)) <= 0)
345                         err("Send failed: %m");
346                 len -= res;
347                 buf += res;
348         }
349 }
350
351 /**
352  * Print out a message about how to use nbd-server. Split out to a separate
353  * function so that we can call it from multiple places
354  */
355 void usage() {
356         printf("This is nbd-server version " VERSION "\n");
357         printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name]\n"
358                "\t-r|--read-only\t\tread only\n"
359                "\t-m|--multi-file\t\tmultiple file\n"
360                "\t-c|--copy-on-write\tcopy on write\n"
361                "\t-C|--config-file\tspecify an alternate configuration file\n"
362                "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
363                "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
364                "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
365                "\tif port is set to 0, stdin is used (for running from inetd)\n"
366                "\tif file_to_export contains '%%s', it is substituted with the IP\n"
367                "\t\taddress of the machine trying to connect\n" 
368                "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
369         printf("Using configuration file %s\n", CFILE);
370 }
371
372 /* Dumps a config file section of the given SERVER*, and exits. */
373 void dump_section(SERVER* serve, gchar* section_header) {
374         printf("[%s]\n", section_header);
375         printf("\texportname = %s\n", serve->exportname);
376         printf("\tlistenaddr = %s\n", serve->listenaddr);
377         printf("\tport = %d\n", serve->port);
378         if(serve->flags & F_READONLY) {
379                 printf("\treadonly = true\n");
380         }
381         if(serve->flags & F_MULTIFILE) {
382                 printf("\tmultifile = true\n");
383         }
384         if(serve->flags & F_COPYONWRITE) {
385                 printf("\tcopyonwrite = true\n");
386         }
387         if(serve->expected_size) {
388                 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
389         }
390         if(serve->authname) {
391                 printf("\tauthfile = %s\n", serve->authname);
392         }
393         exit(EXIT_SUCCESS);
394 }
395
396 /**
397  * Parse the command line.
398  *
399  * @param argc the argc argument to main()
400  * @param argv the argv argument to main()
401  **/
402 SERVER* cmdline(int argc, char *argv[]) {
403         int i=0;
404         int nonspecial=0;
405         int c;
406         struct option long_options[] = {
407                 {"read-only", no_argument, NULL, 'r'},
408                 {"multi-file", no_argument, NULL, 'm'},
409                 {"copy-on-write", no_argument, NULL, 'c'},
410                 {"authorize-file", required_argument, NULL, 'l'},
411                 {"config-file", required_argument, NULL, 'C'},
412                 {"pid-file", required_argument, NULL, 'p'},
413                 {"output-config", required_argument, NULL, 'o'},
414                 {0,0,0,0}
415         };
416         SERVER *serve;
417         off_t es;
418         size_t last;
419         char suffix;
420         gboolean do_output=FALSE;
421         gchar* section_header="";
422         gchar** addr_port;
423
424         if(argc==1) {
425                 return NULL;
426         }
427         serve=g_new0(SERVER, 1);
428         serve->authname = g_strdup(default_authname);
429         serve->virtstyle=VIRT_IPLIT;
430         while((c=getopt_long(argc, argv, "-C:cl:mo:rp:", long_options, &i))>=0) {
431                 switch (c) {
432                 case 1:
433                         /* non-option argument */
434                         switch(nonspecial++) {
435                         case 0:
436                                 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
437                                         addr_port=g_strsplit(optarg, ":", 2);
438
439                                         /* Check for "@" - maybe user using this separator
440                                                  for IPv4 address */
441                                         if(!addr_port[1]) {
442                                                 g_strfreev(addr_port);
443                                                 addr_port=g_strsplit(optarg, "@", 2);
444                                         }
445                                 } else {
446                                         addr_port=g_strsplit(optarg, "@", 2);
447                                 }
448
449                                 if(addr_port[1]) {
450                                         serve->port=strtol(addr_port[1], NULL, 0);
451                                         serve->listenaddr=g_strdup(addr_port[0]);
452                                 } else {
453                                         serve->listenaddr=NULL;
454                                         serve->port=strtol(addr_port[0], NULL, 0);
455                                 }
456                                 g_strfreev(addr_port);
457                                 break;
458                         case 1:
459                                 serve->exportname = g_strdup(optarg);
460                                 if(serve->exportname[0] != '/') {
461                                         fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
462                                         exit(EXIT_FAILURE);
463                                 }
464                                 break;
465                         case 2:
466                                 last=strlen(optarg)-1;
467                                 suffix=optarg[last];
468                                 if (suffix == 'k' || suffix == 'K' ||
469                                     suffix == 'm' || suffix == 'M')
470                                         optarg[last] = '\0';
471                                 es = (off_t)atoll(optarg);
472                                 switch (suffix) {
473                                         case 'm':
474                                         case 'M':  es <<= 10;
475                                         case 'k':
476                                         case 'K':  es <<= 10;
477                                         default :  break;
478                                 }
479                                 serve->expected_size = es;
480                                 break;
481                         }
482                         break;
483                 case 'r':
484                         serve->flags |= F_READONLY;
485                         break;
486                 case 'm':
487                         serve->flags |= F_MULTIFILE;
488                         break;
489                 case 'o':
490                         do_output = TRUE;
491                         section_header = g_strdup(optarg);
492                         break;
493                 case 'p':
494                         strncpy(pidftemplate, optarg, 256);
495                         break;
496                 case 'c': 
497                         serve->flags |=F_COPYONWRITE;
498                         break;
499                 case 'C':
500                         g_free(config_file_pos);
501                         config_file_pos=g_strdup(optarg);
502                         break;
503                 case 'l':
504                         g_free(serve->authname);
505                         serve->authname=g_strdup(optarg);
506                         break;
507                 default:
508                         usage();
509                         exit(EXIT_FAILURE);
510                         break;
511                 }
512         }
513         /* What's left: the port to export, the name of the to be exported
514          * file, and, optionally, the size of the file, in that order. */
515         if(nonspecial<2) {
516                 g_free(serve);
517                 serve=NULL;
518         } else {
519                 do_oldstyle = TRUE;
520         }
521         if(do_output) {
522                 if(!serve) {
523                         g_critical("Need a complete configuration on the command line to output a config file section!");
524                         exit(EXIT_FAILURE);
525                 }
526                 dump_section(serve, section_header);
527         }
528         return serve;
529 }
530
531 /**
532  * Error codes for config file parsing
533  **/
534 typedef enum {
535         CFILE_NOTFOUND,         /**< The configuration file is not found */
536         CFILE_MISSING_GENERIC,  /**< The (required) group "generic" is missing */
537         CFILE_KEY_MISSING,      /**< A (required) key is missing */
538         CFILE_VALUE_INVALID,    /**< A value is syntactically invalid */
539         CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
540         CFILE_PROGERR,          /**< Programmer error */
541         CFILE_NO_EXPORTS,       /**< A config file was specified that does not
542                                      define any exports */
543         CFILE_INCORRECT_PORT,   /**< The reserved port was specified for an
544                                      old-style export. */
545 } CFILE_ERRORS;
546
547 /**
548  * Remove a SERVER from memory. Used from the hash table
549  **/
550 void remove_server(gpointer s) {
551         SERVER *server;
552
553         server=(SERVER*)s;
554         g_free(server->exportname);
555         if(server->authname)
556                 g_free(server->authname);
557         if(server->listenaddr)
558                 g_free(server->listenaddr);
559         if(server->prerun)
560                 g_free(server->prerun);
561         if(server->postrun)
562                 g_free(server->postrun);
563         g_free(server);
564 }
565
566 /**
567  * duplicate server
568  * @param s the old server we want to duplicate
569  * @return new duplicated server
570  **/
571 SERVER* dup_serve(SERVER *s) {
572         SERVER *serve = NULL;
573
574         serve=g_new0(SERVER, 1);
575         if(serve == NULL)
576                 return NULL;
577
578         if(s->exportname)
579                 serve->exportname = g_strdup(s->exportname);
580
581         serve->expected_size = s->expected_size;
582
583         if(s->listenaddr)
584                 serve->listenaddr = g_strdup(s->listenaddr);
585
586         serve->port = s->port;
587
588         if(s->authname)
589                 serve->authname = strdup(s->authname);
590
591         serve->flags = s->flags;
592         serve->socket = serve->socket;
593         serve->socket_family = serve->socket_family;
594         serve->cidrlen = s->cidrlen;
595
596         if(s->prerun)
597                 serve->prerun = g_strdup(s->prerun);
598
599         if(s->postrun)
600                 serve->postrun = g_strdup(s->postrun);
601         
602         if(s->servename)
603                 serve->servename = g_strdup(s->servename);
604
605         return serve;
606 }
607
608 /**
609  * append new server to array
610  * @param s server
611  * @param a server array
612  * @return 0 success, -1 error
613  */
614 int append_serve(SERVER *s, GArray *a) {
615         SERVER *ns = NULL;
616         struct addrinfo hints;
617         struct addrinfo *ai = NULL;
618         struct addrinfo *rp = NULL;
619         char   host[NI_MAXHOST];
620         gchar  *port = NULL;
621         int e;
622         int ret;
623
624         if(!s) {
625                 err("Invalid parsing server");
626                 return -1;
627         }
628
629         port = g_strdup_printf("%d", s->port);
630
631         memset(&hints,'\0',sizeof(hints));
632         hints.ai_family = AF_UNSPEC;
633         hints.ai_socktype = SOCK_STREAM;
634         hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
635         hints.ai_protocol = IPPROTO_TCP;
636
637         e = getaddrinfo(s->listenaddr, port, &hints, &ai);
638
639         if (port)
640                 g_free(port);
641
642         if(e == 0) {
643                 for (rp = ai; rp != NULL; rp = rp->ai_next) {
644                         e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
645
646                         if (e != 0) { // error
647                                 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
648                                 continue;
649                         }
650
651                         // duplicate server and set listenaddr to resolved IP address
652                         ns = dup_serve (s);
653                         if (ns) {
654                                 ns->listenaddr = g_strdup(host);
655                                 ns->socket_family = rp->ai_family;
656                                 g_array_append_val(a, *ns);
657                                 free(ns);
658                                 ns = NULL;
659                         }
660                 }
661
662                 ret = 0;
663         } else {
664                 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
665                 ret = -1;
666         }
667
668         if (ai)
669                 freeaddrinfo(ai);
670
671         return ret;
672 }
673
674 /**
675  * Parse the config file.
676  *
677  * @param f the name of the config file
678  * @param e a GError. @see CFILE_ERRORS for what error values this function can
679  *      return.
680  * @return a Array of SERVER* pointers, If the config file is empty or does not
681  *      exist, returns an empty GHashTable; if the config file contains an
682  *      error, returns NULL, and e is set appropriately
683  **/
684 GArray* parse_cfile(gchar* f, GError** e) {
685         const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
686         const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
687         SERVER s;
688         gchar *virtstyle=NULL;
689         PARAM lp[] = {
690                 { "exportname", TRUE,   PARAM_STRING,   NULL, 0 },
691                 { "port",       TRUE,   PARAM_INT,      NULL, 0 },
692                 { "authfile",   FALSE,  PARAM_STRING,   NULL, 0 },
693                 { "filesize",   FALSE,  PARAM_INT,      NULL, 0 },
694                 { "virtstyle",  FALSE,  PARAM_STRING,   NULL, 0 },
695                 { "prerun",     FALSE,  PARAM_STRING,   NULL, 0 },
696                 { "postrun",    FALSE,  PARAM_STRING,   NULL, 0 },
697                 { "readonly",   FALSE,  PARAM_BOOL,     NULL, F_READONLY },
698                 { "multifile",  FALSE,  PARAM_BOOL,     NULL, F_MULTIFILE },
699                 { "copyonwrite", FALSE, PARAM_BOOL,     NULL, F_COPYONWRITE },
700                 { "sparse_cow", FALSE,  PARAM_BOOL,     NULL, F_SPARSE },
701                 { "sdp",        FALSE,  PARAM_BOOL,     NULL, F_SDP },
702                 { "sync",       FALSE,  PARAM_BOOL,     NULL, F_SYNC },
703                 { "listenaddr", FALSE,  PARAM_STRING,   NULL, 0 },
704         };
705         const int lp_size=sizeof(lp)/sizeof(PARAM);
706         PARAM gp[] = {
707                 { "user",       FALSE, PARAM_STRING,    &runuser,       0 },
708                 { "group",      FALSE, PARAM_STRING,    &rungroup,      0 },
709                 { "oldstyle",   FALSE, PARAM_BOOL,      &do_oldstyle,   1 },
710                 { "listenaddr", FALSE, PARAM_STRING,    &modern_listen, 0 },
711         };
712         PARAM* p=gp;
713         int p_size=sizeof(gp)/sizeof(PARAM);
714         GKeyFile *cfile;
715         GError *err = NULL;
716         const char *err_msg=NULL;
717         GQuark errdomain;
718         GArray *retval=NULL;
719         gchar **groups;
720         gboolean value;
721         gchar* startgroup;
722         gint i;
723         gint j;
724
725         errdomain = g_quark_from_string("parse_cfile");
726         cfile = g_key_file_new();
727         retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
728         if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
729                         G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
730                 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
731                 g_key_file_free(cfile);
732                 return retval;
733         }
734         startgroup = g_key_file_get_start_group(cfile);
735         if(!startgroup || strcmp(startgroup, "generic")) {
736                 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
737                 g_key_file_free(cfile);
738                 return NULL;
739         }
740         groups = g_key_file_get_groups(cfile, NULL);
741         for(i=0;groups[i];i++) {
742                 memset(&s, '\0', sizeof(SERVER));
743                 lp[0].target=&(s.exportname);
744                 lp[1].target=&(s.port);
745                 lp[2].target=&(s.authname);
746                 lp[3].target=&(s.expected_size);
747                 lp[4].target=&(virtstyle);
748                 lp[5].target=&(s.prerun);
749                 lp[6].target=&(s.postrun);
750                 lp[7].target=lp[8].target=lp[9].target=
751                                 lp[10].target=lp[11].target=
752                                 lp[12].target=&(s.flags);
753                 lp[13].target=&(s.listenaddr);
754
755                 /* After the [generic] group, start parsing exports */
756                 if(i==1) {
757                         p=lp;
758                         p_size=lp_size;
759                 } 
760                 for(j=0;j<p_size;j++) {
761                         g_assert(p[j].target != NULL);
762                         g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
763                         switch(p[j].ptype) {
764                                 case PARAM_INT:
765                                         *((gint*)p[j].target) =
766                                                 g_key_file_get_integer(cfile,
767                                                                 groups[i],
768                                                                 p[j].paramname,
769                                                                 &err);
770                                         break;
771                                 case PARAM_STRING:
772                                         *((gchar**)p[j].target) =
773                                                 g_key_file_get_string(cfile,
774                                                                 groups[i],
775                                                                 p[j].paramname,
776                                                                 &err);
777                                         break;
778                                 case PARAM_BOOL:
779                                         value = g_key_file_get_boolean(cfile,
780                                                         groups[i],
781                                                         p[j].paramname, &err);
782                                         if(!err) {
783                                                 if(value) {
784                                                         *((gint*)p[j].target) |= p[j].flagval;
785                                                 } else {
786                                                         *((gint*)p[j].target) &= ~(p[j].flagval);
787                                                 }
788                                         }
789                                         break;
790                         }
791                         if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
792                                 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
793                                 g_key_file_free(cfile);
794                                 return NULL;
795                         }
796                         if(err) {
797                                 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
798                                         if(!p[j].required) {
799                                                 /* Ignore not-found error for optional values */
800                                                 g_clear_error(&err);
801                                                 continue;
802                                         } else {
803                                                 err_msg = MISSING_REQUIRED_ERROR;
804                                         }
805                                 } else {
806                                         err_msg = DEFAULT_ERROR;
807                                 }
808                                 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
809                                 g_array_free(retval, TRUE);
810                                 g_error_free(err);
811                                 g_key_file_free(cfile);
812                                 return NULL;
813                         }
814                 }
815                 if(virtstyle) {
816                         if(!strncmp(virtstyle, "none", 4)) {
817                                 s.virtstyle=VIRT_NONE;
818                         } else if(!strncmp(virtstyle, "ipliteral", 9)) {
819                                 s.virtstyle=VIRT_IPLIT;
820                         } else if(!strncmp(virtstyle, "iphash", 6)) {
821                                 s.virtstyle=VIRT_IPHASH;
822                         } else if(!strncmp(virtstyle, "cidrhash", 8)) {
823                                 s.virtstyle=VIRT_CIDR;
824                                 if(strlen(virtstyle)<10) {
825                                         g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
826                                         g_array_free(retval, TRUE);
827                                         g_key_file_free(cfile);
828                                         return NULL;
829                                 }
830                                 s.cidrlen=strtol(virtstyle+8, NULL, 0);
831                         } else {
832                                 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
833                                 g_array_free(retval, TRUE);
834                                 g_key_file_free(cfile);
835                                 return NULL;
836                         }
837                         if(s.port && !do_oldstyle) {
838                                 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
839                                 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
840                         }
841                 } else {
842                         s.virtstyle=VIRT_IPLIT;
843                 }
844                 /* Don't need to free this, it's not our string */
845                 virtstyle=NULL;
846                 /* Don't append values for the [generic] group */
847                 if(i>0) {
848                         s.socket_family = AF_UNSPEC;
849                         s.servename = groups[i];
850
851                         append_serve(&s, retval);
852                 } else {
853                         if(!do_oldstyle) {
854                                 lp[1].required = 0;
855                         }
856                 }
857 #ifndef WITH_SDP
858                 if(s.flags & F_SDP) {
859                         g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
860                         g_array_free(retval, TRUE);
861                         g_key_file_free(cfile);
862                         return NULL;
863                 }
864 #endif
865         }
866         if(i==1) {
867                 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
868         }
869         g_key_file_free(cfile);
870         return retval;
871 }
872
873 /**
874  * Signal handler for SIGCHLD
875  * @param s the signal we're handling (must be SIGCHLD, or something
876  * is severely wrong)
877  **/
878 void sigchld_handler(int s) {
879         int status;
880         int* i;
881         pid_t pid;
882
883         while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
884                 if(WIFEXITED(status)) {
885                         msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
886                 }
887                 i=g_hash_table_lookup(children, &pid);
888                 if(!i) {
889                         msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
890                 } else {
891                         DEBUG2("Removing %d from the list of children", pid);
892                         g_hash_table_remove(children, &pid);
893                 }
894         }
895 }
896
897 /**
898  * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
899  *
900  * @param key the key
901  * @param value the value corresponding to the above key
902  * @param user_data a pointer which we always set to 1, so that we know what
903  * will happen next.
904  **/
905 void killchild(gpointer key, gpointer value, gpointer user_data) {
906         pid_t *pid=value;
907         int *parent=user_data;
908
909         kill(*pid, SIGTERM);
910         *parent=1;
911 }
912
913 /**
914  * Handle SIGTERM and dispatch it to our children
915  * @param s the signal we're handling (must be SIGTERM, or something
916  * is severely wrong).
917  **/
918 void sigterm_handler(int s) {
919         int parent=0;
920
921         g_hash_table_foreach(children, killchild, &parent);
922
923         if(parent) {
924                 unlink(pidfname);
925         }
926
927         exit(EXIT_SUCCESS);
928 }
929
930 /**
931  * Detect the size of a file.
932  *
933  * @param fhandle An open filedescriptor
934  * @return the size of the file, or OFFT_MAX if detection was
935  * impossible.
936  **/
937 off_t size_autodetect(int fhandle) {
938         off_t es;
939         u64 bytes;
940         struct stat stat_buf;
941         int error;
942
943 #ifdef HAVE_SYS_MOUNT_H
944 #ifdef HAVE_SYS_IOCTL_H
945 #ifdef BLKGETSIZE64
946         DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
947         if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
948                 return (off_t)bytes;
949         }
950 #endif /* BLKGETSIZE64 */
951 #endif /* HAVE_SYS_IOCTL_H */
952 #endif /* HAVE_SYS_MOUNT_H */
953
954         DEBUG("looking for fhandle size with fstat\n");
955         stat_buf.st_size = 0;
956         error = fstat(fhandle, &stat_buf);
957         if (!error) {
958                 if(stat_buf.st_size > 0)
959                         return (off_t)stat_buf.st_size;
960         } else {
961                 err("fstat failed: %m");
962         }
963
964         DEBUG("looking for fhandle size with lseek SEEK_END\n");
965         es = lseek(fhandle, (off_t)0, SEEK_END);
966         if (es > ((off_t)0)) {
967                 return es;
968         } else {
969                 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
970         }
971
972         err("Could not find size of exported block device: %m");
973         return OFFT_MAX;
974 }
975
976 /**
977  * Get the file handle and offset, given an export offset.
978  *
979  * @param export An array of export files
980  * @param a The offset to get corresponding file/offset for
981  * @param fhandle [out] File descriptor
982  * @param foffset [out] Offset into fhandle
983  * @param maxbytes [out] Tells how many bytes can be read/written
984  * from fhandle starting at foffset (0 if there is no limit)
985  * @return 0 on success, -1 on failure
986  **/
987 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
988         /* Negative offset not allowed */
989         if(a < 0)
990                 return -1;
991
992         /* Binary search for last file with starting offset <= a */
993         FILE_INFO fi;
994         int start = 0;
995         int end = export->len - 1;
996         while( start <= end ) {
997                 int mid = (start + end) / 2;
998                 fi = g_array_index(export, FILE_INFO, mid);
999                 if( fi.startoff < a ) {
1000                         start = mid + 1;
1001                 } else if( fi.startoff > a ) {
1002                         end = mid - 1;
1003                 } else {
1004                         start = end = mid;
1005                         break;
1006                 }
1007         }
1008
1009         /* end should never go negative, since first startoff is 0 and a >= 0 */
1010         g_assert(end >= 0);
1011
1012         fi = g_array_index(export, FILE_INFO, end);
1013         *fhandle = fi.fhandle;
1014         *foffset = a - fi.startoff;
1015         *maxbytes = 0;
1016         if( end+1 < export->len ) {
1017                 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1018                 *maxbytes = fi_next.startoff - a;
1019         }
1020
1021         return 0;
1022 }
1023
1024 /**
1025  * seek to a position in a file, with error handling.
1026  * @param handle a filedescriptor
1027  * @param a position to seek to
1028  * @todo get rid of this; lastpoint is a global variable right now, but it
1029  * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1030  * easier.
1031  **/
1032 void myseek(int handle,off_t a) {
1033         if (lseek(handle, a, SEEK_SET) < 0) {
1034                 err("Can not seek locally!\n");
1035         }
1036 }
1037
1038 /**
1039  * Write an amount of bytes at a given offset to the right file. This
1040  * abstracts the write-side of the multiple file option.
1041  *
1042  * @param a The offset where the write should start
1043  * @param buf The buffer to write from
1044  * @param len The length of buf
1045  * @param client The client we're serving for
1046  * @return The number of bytes actually written, or -1 in case of an error
1047  **/
1048 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
1049         int fhandle;
1050         off_t foffset;
1051         size_t maxbytes;
1052         ssize_t retval;
1053
1054         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1055                 return -1;
1056         if(maxbytes && len > maxbytes)
1057                 len = maxbytes;
1058
1059         DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len);
1060
1061         myseek(fhandle, foffset);
1062         retval = write(fhandle, buf, len);
1063         if(client->server->flags & F_SYNC) {
1064                 fsync(fhandle);
1065         }
1066         return retval;
1067 }
1068
1069 /**
1070  * Call rawexpwrite repeatedly until all data has been written.
1071  * @return 0 on success, nonzero on failure
1072  **/
1073 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1074         ssize_t ret=0;
1075
1076         while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
1077                 a += ret;
1078                 buf += ret;
1079                 len -= ret;
1080         }
1081         return (ret < 0 || len != 0);
1082 }
1083
1084 /**
1085  * Read an amount of bytes at a given offset from the right file. This
1086  * abstracts the read-side of the multiple files option.
1087  *
1088  * @param a The offset where the read should start
1089  * @param buf A buffer to read into
1090  * @param len The size of buf
1091  * @param client The client we're serving for
1092  * @return The number of bytes actually read, or -1 in case of an
1093  * error.
1094  **/
1095 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1096         int fhandle;
1097         off_t foffset;
1098         size_t maxbytes;
1099
1100         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1101                 return -1;
1102         if(maxbytes && len > maxbytes)
1103                 len = maxbytes;
1104
1105         DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
1106
1107         myseek(fhandle, foffset);
1108         return read(fhandle, buf, len);
1109 }
1110
1111 /**
1112  * Call rawexpread repeatedly until all data has been read.
1113  * @return 0 on success, nonzero on failure
1114  **/
1115 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1116         ssize_t ret=0;
1117
1118         while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1119                 a += ret;
1120                 buf += ret;
1121                 len -= ret;
1122         }
1123         return (ret < 0 || len != 0);
1124 }
1125
1126 /**
1127  * Read an amount of bytes at a given offset from the right file. This
1128  * abstracts the read-side of the copyonwrite stuff, and calls
1129  * rawexpread() with the right parameters to do the actual work.
1130  * @param a The offset where the read should start
1131  * @param buf A buffer to read into
1132  * @param len The size of buf
1133  * @param client The client we're going to read for
1134  * @return 0 on success, nonzero on failure
1135  **/
1136 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1137         off_t rdlen, offset;
1138         off_t mapcnt, mapl, maph, pagestart;
1139
1140         if (!(client->server->flags & F_COPYONWRITE))
1141                 return(rawexpread_fully(a, buf, len, client));
1142         DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
1143
1144         mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1145
1146         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1147                 pagestart=mapcnt*DIFFPAGESIZE;
1148                 offset=a-pagestart;
1149                 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1150                         len : (size_t)DIFFPAGESIZE-offset;
1151                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1152                         DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1153                                (unsigned long)(client->difmap[mapcnt]));
1154                         myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1155                         if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1156                 } else { /* the block is not there */
1157                         DEBUG2("Page %llu is not here, we read the original one\n",
1158                                (unsigned long long)mapcnt);
1159                         if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1160                 }
1161                 len-=rdlen; a+=rdlen; buf+=rdlen;
1162         }
1163         return 0;
1164 }
1165
1166 /**
1167  * Write an amount of bytes at a given offset to the right file. This
1168  * abstracts the write-side of the copyonwrite option, and calls
1169  * rawexpwrite() with the right parameters to do the actual work.
1170  *
1171  * @param a The offset where the write should start
1172  * @param buf The buffer to write from
1173  * @param len The length of buf
1174  * @param client The client we're going to write for.
1175  * @return 0 on success, nonzero on failure
1176  **/
1177 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
1178         char pagebuf[DIFFPAGESIZE];
1179         off_t mapcnt,mapl,maph;
1180         off_t wrlen,rdlen; 
1181         off_t pagestart;
1182         off_t offset;
1183
1184         if (!(client->server->flags & F_COPYONWRITE))
1185                 return(rawexpwrite_fully(a, buf, len, client)); 
1186         DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
1187
1188         mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1189
1190         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1191                 pagestart=mapcnt*DIFFPAGESIZE ;
1192                 offset=a-pagestart ;
1193                 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1194                         len : (size_t)DIFFPAGESIZE-offset;
1195
1196                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1197                         DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1198                                (unsigned long)(client->difmap[mapcnt])) ;
1199                         myseek(client->difffile,
1200                                         client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1201                         if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1202                 } else { /* the block is not there */
1203                         myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1204                         client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1205                         DEBUG3("Page %llu is not here, we put it at %lu\n",
1206                                (unsigned long long)mapcnt,
1207                                (unsigned long)(client->difmap[mapcnt]));
1208                         rdlen=DIFFPAGESIZE ;
1209                         if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1210                                 return -1;
1211                         memcpy(pagebuf+offset,buf,wrlen) ;
1212                         if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1213                                         DIFFPAGESIZE)
1214                                 return -1;
1215                 }                                                   
1216                 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1217         }
1218         return 0;
1219 }
1220
1221 /**
1222  * Do the initial negotiation.
1223  *
1224  * @param client The client we're negotiating with.
1225  **/
1226 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1227         char zeros[128];
1228         uint64_t size_host;
1229         uint32_t flags = NBD_FLAG_HAS_FLAGS;
1230         uint16_t smallflags = 0;
1231         uint64_t magic;
1232
1233         memset(zeros, '\0', sizeof(zeros));
1234         if(!client || !client->modern) {
1235                 if (write(net, INIT_PASSWD, 8) < 0) {
1236                         err_nonfatal("Negotiation failed: %m");
1237                         if(client)
1238                                 exit(EXIT_FAILURE);
1239                 }
1240                 if(client && client->modern) {
1241                         magic = htonll(opts_magic);
1242                 } else {
1243                         magic = htonll(cliserv_magic);
1244                 }
1245                 if (write(net, &magic, sizeof(magic)) < 0) {
1246                         err_nonfatal("Negotiation failed: %m");
1247                         if(client)
1248                                 exit(EXIT_FAILURE);
1249                 }
1250         }
1251         if(!client) {
1252                 uint32_t reserved;
1253                 uint32_t opt;
1254                 uint32_t namelen;
1255                 char* name;
1256                 int i;
1257
1258                 if(!servers)
1259                         err("programmer error");
1260                 write(net, &smallflags, sizeof(uint16_t));
1261                 read(net, &reserved, sizeof(reserved));
1262                 read(net, &magic, sizeof(magic));
1263                 magic = ntohll(magic);
1264                 if(magic != opts_magic) {
1265                         close(net);
1266                         return NULL;
1267                 }
1268                 read(net, &opt, sizeof(opt));
1269                 opt = ntohl(opt);
1270                 if(opt != NBD_OPT_EXPORT_NAME) {
1271                         close(net);
1272                         return NULL;
1273                 }
1274                 read(net, &namelen, sizeof(namelen));
1275                 namelen = ntohl(namelen);
1276                 name = malloc(namelen+1);
1277                 name[namelen+1]=0;
1278                 read(net, name, namelen);
1279                 for(i=0; i<servers->len; i++) {
1280                         SERVER* serve = &(g_array_index(servers, SERVER, i));
1281                         if(!strcmp(serve->servename, name)) {
1282                                 CLIENT* client = g_new0(CLIENT, 1);
1283                                 client->server = serve;
1284                                 client->exportsize = OFFT_MAX;
1285                                 client->net = net;
1286                                 client->modern = TRUE;
1287                                 return client;
1288                         }
1289                 }
1290         }
1291         size_host = htonll((u64)(client->exportsize));
1292         if (write(net, &size_host, 8) < 0)
1293                 err("Negotiation failed: %m");
1294         if (client->server->flags & F_READONLY)
1295                 flags |= NBD_FLAG_READ_ONLY;
1296         if (!client->modern) {
1297                 flags = htonl(flags);
1298                 if (write(client->net, &flags, 4) < 0)
1299                         err("Negotiation failed: %m");
1300         } else {
1301                 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1302                 smallflags = htons(smallflags);
1303                 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1304                         err("Negotiation failed: %m");
1305                 }
1306         }
1307         if (write(client->net, zeros, 124) < 0)
1308                 err("Negotiation failed: %m");
1309         return NULL;
1310 }
1311
1312 /** sending macro. */
1313 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1314 /** error macro. */
1315 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1316 /**
1317  * Serve a file to a single client.
1318  *
1319  * @todo This beast needs to be split up in many tiny little manageable
1320  * pieces. Preferably with a chainsaw.
1321  *
1322  * @param client The client we're going to serve to.
1323  * @return when the client disconnects
1324  **/
1325 int mainloop(CLIENT *client) {
1326         struct nbd_request request;
1327         struct nbd_reply reply;
1328         gboolean go_on=TRUE;
1329 #ifdef DODBG
1330         int i = 0;
1331 #endif
1332         negotiate(client->net, client, NULL);
1333         DEBUG("Entering request loop!\n");
1334         reply.magic = htonl(NBD_REPLY_MAGIC);
1335         reply.error = 0;
1336         while (go_on) {
1337                 char buf[BUFSIZE];
1338                 size_t len;
1339 #ifdef DODBG
1340                 i++;
1341                 printf("%d: ", i);
1342 #endif
1343                 readit(client->net, &request, sizeof(request));
1344                 request.from = ntohll(request.from);
1345                 request.type = ntohl(request.type);
1346
1347                 if (request.type==NBD_CMD_DISC) {
1348                         msg2(LOG_INFO, "Disconnect request received.");
1349                         if (client->server->flags & F_COPYONWRITE) { 
1350                                 if (client->difmap) g_free(client->difmap) ;
1351                                 close(client->difffile);
1352                                 unlink(client->difffilename);
1353                                 free(client->difffilename);
1354                         }
1355                         go_on=FALSE;
1356                         continue;
1357                 }
1358
1359                 len = ntohl(request.len);
1360
1361                 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1362                         err("Not enough magic.");
1363                 if (len > BUFSIZE + sizeof(struct nbd_reply))
1364                         err("Request too big!");
1365 #ifdef DODBG
1366                 printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" :
1367                                 "READ", (unsigned long long)request.from,
1368                                 (unsigned long long)request.from / 512, len);
1369 #endif
1370                 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1371                 if ((request.from + len) > (OFFT_MAX)) {
1372                         DEBUG("[Number too large!]");
1373                         ERROR(client, reply, EINVAL);
1374                         continue;
1375                 }
1376
1377                 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1378                         DEBUG("[RANGE!]");
1379                         ERROR(client, reply, EINVAL);
1380                         continue;
1381                 }
1382
1383                 if (request.type==NBD_CMD_WRITE) {
1384                         DEBUG("wr: net->buf, ");
1385                         readit(client->net, buf, len);
1386                         DEBUG("buf->exp, ");
1387                         if ((client->server->flags & F_READONLY) ||
1388                             (client->server->flags & F_AUTOREADONLY)) {
1389                                 DEBUG("[WRITE to READONLY!]");
1390                                 ERROR(client, reply, EPERM);
1391                                 continue;
1392                         }
1393                         if (expwrite(request.from, buf, len, client)) {
1394                                 DEBUG("Write failed: %m" );
1395                                 ERROR(client, reply, errno);
1396                                 continue;
1397                         }
1398                         SEND(client->net, reply);
1399                         DEBUG("OK!\n");
1400                         continue;
1401                 }
1402                 /* READ */
1403
1404                 DEBUG("exp->buf, ");
1405                 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1406                         DEBUG("Read failed: %m");
1407                         ERROR(client, reply, errno);
1408                         continue;
1409                 }
1410
1411                 DEBUG("buf->net, ");
1412                 memcpy(buf, &reply, sizeof(struct nbd_reply));
1413                 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1414                 DEBUG("OK!\n");
1415         }
1416         return 0;
1417 }
1418
1419 /**
1420  * Set up client export array, which is an array of FILE_INFO.
1421  * Also, split a single exportfile into multiple ones, if that was asked.
1422  * @param client information on the client which we want to setup export for
1423  **/
1424 void setupexport(CLIENT* client) {
1425         int i;
1426         off_t laststartoff = 0, lastsize = 0;
1427         int multifile = (client->server->flags & F_MULTIFILE);
1428
1429         client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1430
1431         /* If multi-file, open as many files as we can.
1432          * If not, open exactly one file.
1433          * Calculate file sizes as we go to get total size. */
1434         for(i=0; ; i++) {
1435                 FILE_INFO fi;
1436                 gchar *tmpname;
1437                 gchar* error_string;
1438                 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1439
1440                 if(multifile) {
1441                         tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1442                 } else {
1443                         tmpname=g_strdup(client->exportname);
1444                 }
1445                 DEBUG2( "Opening %s\n", tmpname );
1446                 fi.fhandle = open(tmpname, mode);
1447                 if(fi.fhandle == -1 && mode == O_RDWR) {
1448                         /* Try again because maybe media was read-only */
1449                         fi.fhandle = open(tmpname, O_RDONLY);
1450                         if(fi.fhandle != -1) {
1451                                 /* Opening the base file in copyonwrite mode is
1452                                  * okay */
1453                                 if(!(client->server->flags & F_COPYONWRITE)) {
1454                                         client->server->flags |= F_AUTOREADONLY;
1455                                         client->server->flags |= F_READONLY;
1456                                 }
1457                         }
1458                 }
1459                 if(fi.fhandle == -1) {
1460                         if(multifile && i>0)
1461                                 break;
1462                         error_string=g_strdup_printf(
1463                                 "Could not open exported file %s: %%m",
1464                                 tmpname);
1465                         err(error_string);
1466                 }
1467                 fi.startoff = laststartoff + lastsize;
1468                 g_array_append_val(client->export, fi);
1469                 g_free(tmpname);
1470
1471                 /* Starting offset and size of this file will be used to
1472                  * calculate starting offset of next file */
1473                 laststartoff = fi.startoff;
1474                 lastsize = size_autodetect(fi.fhandle);
1475
1476                 if(!multifile)
1477                         break;
1478         }
1479
1480         /* Set export size to total calculated size */
1481         client->exportsize = laststartoff + lastsize;
1482
1483         /* Export size may be overridden */
1484         if(client->server->expected_size) {
1485                 /* desired size must be <= total calculated size */
1486                 if(client->server->expected_size > client->exportsize) {
1487                         err("Size of exported file is too big\n");
1488                 }
1489
1490                 client->exportsize = client->server->expected_size;
1491         }
1492
1493         msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1494         if(multifile) {
1495                 msg3(LOG_INFO, "Total number of files: %d", i);
1496         }
1497 }
1498
1499 int copyonwrite_prepare(CLIENT* client) {
1500         off_t i;
1501         if ((client->difffilename = malloc(1024))==NULL)
1502                 err("Failed to allocate string for diff file name");
1503         snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1504                 (int)getpid()) ;
1505         client->difffilename[1023]='\0';
1506         msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1507         client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1508         if (client->difffile<0) err("Could not create diff file (%m)") ;
1509         if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1510                 err("Could not allocate memory") ;
1511         for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1512
1513         return 0;
1514 }
1515
1516 /**
1517  * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1518  * options
1519  *
1520  * @param command the command to be ran. Read from the config file
1521  * @param file the file name we're about to export
1522  **/
1523 int do_run(gchar* command, gchar* file) {
1524         gchar* cmd;
1525         int retval=0;
1526
1527         if(command && *command) {
1528                 cmd = g_strdup_printf(command, file);
1529                 retval=system(cmd);
1530                 g_free(cmd);
1531         }
1532         return retval;
1533 }
1534
1535 /**
1536  * Serve a connection. 
1537  *
1538  * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1539  * follow the road map.
1540  *
1541  * @param client a connected client
1542  **/
1543 void serveconnection(CLIENT *client) {
1544         if(do_run(client->server->prerun, client->exportname)) {
1545                 exit(EXIT_FAILURE);
1546         }
1547         setupexport(client);
1548
1549         if (client->server->flags & F_COPYONWRITE) {
1550                 copyonwrite_prepare(client);
1551         }
1552
1553         setmysockopt(client->net);
1554
1555         mainloop(client);
1556         do_run(client->server->postrun, client->exportname);
1557 }
1558
1559 /**
1560  * Find the name of the file we have to serve. This will use g_strdup_printf
1561  * to put the IP address of the client inside a filename containing
1562  * "%s" (in the form as specified by the "virtstyle" option). That name
1563  * is then written to client->exportname.
1564  *
1565  * @param net A socket connected to an nbd client
1566  * @param client information about the client. The IP address in human-readable
1567  * format will be written to a new char* buffer, the address of which will be
1568  * stored in client->clientname.
1569  **/
1570 void set_peername(int net, CLIENT *client) {
1571         struct sockaddr_storage addrin;
1572         struct sockaddr_storage netaddr;
1573         struct sockaddr_in  *netaddr4 = NULL;
1574         struct sockaddr_in6 *netaddr6 = NULL;
1575         size_t addrinlen = sizeof( addrin );
1576         struct addrinfo hints;
1577         struct addrinfo *ai = NULL;
1578         char peername[NI_MAXHOST];
1579         char netname[NI_MAXHOST];
1580         char *tmp = NULL;
1581         int i;
1582         int e;
1583         int shift;
1584
1585         if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1586                 err("getsockname failed: %m");
1587
1588         getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1589                 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1590
1591         memset(&hints, '\0', sizeof (hints));
1592         hints.ai_flags = AI_ADDRCONFIG;
1593         e = getaddrinfo(peername, NULL, &hints, &ai);
1594
1595         if(e != 0) {
1596                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1597                 freeaddrinfo(ai);
1598                 return;
1599         }
1600
1601         switch(client->server->virtstyle) {
1602                 case VIRT_NONE:
1603                         client->exportname=g_strdup(client->server->exportname);
1604                         break;
1605                 case VIRT_IPHASH:
1606                         for(i=0;i<strlen(peername);i++) {
1607                                 if(peername[i]=='.') {
1608                                         peername[i]='/';
1609                                 }
1610                         }
1611                 case VIRT_IPLIT:
1612                         client->exportname=g_strdup_printf(client->server->exportname, peername);
1613                         break;
1614                 case VIRT_CIDR:
1615                         memcpy(&netaddr, &addrin, addrinlen);
1616                         if(ai->ai_family == AF_INET) {
1617                                 netaddr4 = (struct sockaddr_in *)&netaddr;
1618                                 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1619                                 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1620
1621                                 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1622                                                         netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1623                                 tmp=g_strdup_printf("%s/%s", netname, peername);
1624                         }else if(ai->ai_family == AF_INET6) {
1625                                 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1626
1627                                 shift = 128-(client->server->cidrlen);
1628                                 i = 3;
1629                                 while(shift >= 32) {
1630                                         ((netaddr6->sin6_addr).s6_addr32[i])=0;
1631                                         shift-=32;
1632                                         i--;
1633                                 }
1634                                 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1635                                 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1636
1637                                 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1638                                             netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1639                                 tmp=g_strdup_printf("%s/%s", netname, peername);
1640                         }
1641
1642                         if(tmp != NULL)
1643                           client->exportname=g_strdup_printf(client->server->exportname, tmp);
1644
1645                         break;
1646         }
1647
1648         freeaddrinfo(ai);
1649         msg4(LOG_INFO, "connect from %s, assigned file is %s", 
1650              peername, client->exportname);
1651         client->clientname=g_strdup(peername);
1652 }
1653
1654 /**
1655  * Destroy a pid_t*
1656  * @param data a pointer to pid_t which should be freed
1657  **/
1658 void destroy_pid_t(gpointer data) {
1659         g_free(data);
1660 }
1661
1662 /**
1663  * Loop through the available servers, and serve them. Never returns.
1664  **/
1665 int serveloop(GArray* servers) {
1666         struct sockaddr_storage addrin;
1667         socklen_t addrinlen=sizeof(addrin);
1668         int i;
1669         int max;
1670         int sock;
1671         fd_set mset;
1672         fd_set rset;
1673
1674         /* 
1675          * Set up the master fd_set. The set of descriptors we need
1676          * to select() for never changes anyway and it buys us a *lot*
1677          * of time to only build this once. However, if we ever choose
1678          * to not fork() for clients anymore, we may have to revisit
1679          * this.
1680          */
1681         max=0;
1682         FD_ZERO(&mset);
1683         for(i=0;i<servers->len;i++) {
1684                 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1685                         FD_SET(sock, &mset);
1686                         max=sock>max?sock:max;
1687                 }
1688         }
1689         if(modernsock) {
1690                 FD_SET(modernsock, &mset);
1691                 max=modernsock>max?modernsock:max;
1692         }
1693         for(;;) {
1694                 CLIENT *client = NULL;
1695                 pid_t *pid;
1696
1697                 memcpy(&rset, &mset, sizeof(fd_set));
1698                 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1699                         int net = 0;
1700                         SERVER* serve;
1701
1702                         DEBUG("accept, ");
1703                         if(FD_ISSET(modernsock, &rset)) {
1704                                 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1705                                         err("accept: %m");
1706                                 client = negotiate(net, NULL, servers);
1707                                 if(!client) {
1708                                         err_nonfatal("negotiation failed");
1709                                         close(net);
1710                                 }
1711                         }
1712                         for(i=0;i<servers->len && !net;i++) {
1713                                 serve=&(g_array_index(servers, SERVER, i));
1714                                 if(FD_ISSET(serve->socket, &rset)) {
1715                                         if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1716                                                 err("accept: %m");
1717                                 }
1718                         }
1719                         if(net) {
1720                                 int sock_flags;
1721
1722                                 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1723                                         err("fcntl F_GETFL");
1724                                 }
1725                                 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1726                                         err("fcntl F_SETFL ~O_NONBLOCK");
1727                                 }
1728                                 if(!client) {
1729                                         client = g_new0(CLIENT, 1);
1730                                         client->server=serve;
1731                                         client->exportsize=OFFT_MAX;
1732                                         client->net=net;
1733                                 }
1734                                 set_peername(net, client);
1735                                 if (!authorized_client(client)) {
1736                                         msg2(LOG_INFO,"Unauthorized client") ;
1737                                         close(net);
1738                                         continue;
1739                                 }
1740                                 msg2(LOG_INFO,"Authorized client") ;
1741                                 pid=g_malloc(sizeof(pid_t));
1742 #ifndef NOFORK
1743                                 if ((*pid=fork())<0) {
1744                                         msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1745                                         close(net);
1746                                         continue;
1747                                 }
1748                                 if (*pid>0) { /* parent */
1749                                         close(net);
1750                                         g_hash_table_insert(children, pid, pid);
1751                                         continue;
1752                                 }
1753                                 /* child */
1754                                 g_hash_table_destroy(children);
1755                                 for(i=0;i<servers->len;i++) {
1756                                         serve=&g_array_index(servers, SERVER, i);
1757                                         close(serve->socket);
1758                                 }
1759                                 /* FALSE does not free the
1760                                 actual data. This is required,
1761                                 because the client has a
1762                                 direct reference into that
1763                                 data, and otherwise we get a
1764                                 segfault... */
1765                                 g_array_free(servers, FALSE);
1766 #endif // NOFORK
1767                                 msg2(LOG_INFO,"Starting to serve");
1768                                 serveconnection(client);
1769                                 exit(EXIT_SUCCESS);
1770                         }
1771                 }
1772         }
1773 }
1774
1775 void dosockopts(int socket) {
1776 #ifndef sun
1777         int yes=1;
1778 #else
1779         char yes='1';
1780 #endif /* sun */
1781         int sock_flags;
1782
1783         /* lose the pesky "Address already in use" error message */
1784         if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1785                 err("setsockopt SO_REUSEADDR");
1786         }
1787         if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1788                 err("setsockopt SO_KEEPALIVE");
1789         }
1790
1791         /* make the listening socket non-blocking */
1792         if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
1793                 err("fcntl F_GETFL");
1794         }
1795         if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1796                 err("fcntl F_SETFL O_NONBLOCK");
1797         }
1798 }
1799
1800 /**
1801  * Connect a server's socket.
1802  *
1803  * @param serve the server we want to connect.
1804  **/
1805 int setup_serve(SERVER *serve) {
1806         struct addrinfo hints;
1807         struct addrinfo *ai = NULL;
1808         gchar *port = NULL;
1809         int e;
1810
1811         if(!do_oldstyle) {
1812                 return serve->servename ? 1 : 0;
1813         }
1814         memset(&hints,'\0',sizeof(hints));
1815         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
1816         hints.ai_socktype = SOCK_STREAM;
1817         hints.ai_family = serve->socket_family;
1818
1819         port = g_strdup_printf ("%d", serve->port);
1820         if (port == NULL)
1821                 return 0;
1822
1823         e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
1824
1825         g_free(port);
1826
1827         if(e != 0) {
1828                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1829                 serve->socket = -1;
1830                 freeaddrinfo(ai);
1831                 exit(EXIT_FAILURE);
1832         }
1833
1834         if(serve->socket_family == AF_UNSPEC)
1835                 serve->socket_family = ai->ai_family;
1836
1837 #ifdef WITH_SDP
1838         if ((serve->flags) && F_SDP) {
1839                 if (ai->ai_family == AF_INET)
1840                         ai->ai_family = AF_INET_SDP;
1841                 else (ai->ai_family == AF_INET6)
1842                         ai->ai_family = AF_INET6_SDP;
1843         }
1844 #endif
1845         if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
1846                 err("socket: %m");
1847
1848         dosockopts(serve->socket);
1849
1850         DEBUG("Waiting for connections... bind, ");
1851         e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
1852         if (e != 0 && errno != EADDRINUSE)
1853                 err("bind: %m");
1854         DEBUG("listen, ");
1855         if (listen(serve->socket, 1) < 0)
1856                 err("listen: %m");
1857
1858         freeaddrinfo (ai);
1859         if(serve->servename) {
1860                 return 1;
1861         } else {
1862                 return 0;
1863         }
1864 }
1865
1866 void open_modern(void) {
1867         struct addrinfo hints;
1868         struct addrinfo* ai = NULL;
1869         struct sock_flags;
1870         int e;
1871
1872         memset(&hints, '\0', sizeof(hints));
1873         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
1874         hints.ai_socktype = SOCK_STREAM;
1875         hints.ai_family = AF_UNSPEC;
1876         hints.ai_protocol = IPPROTO_TCP;
1877         e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
1878         if(e != 0) {
1879                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1880                 exit(EXIT_FAILURE);
1881         }
1882         if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
1883                 err("socket: %m");
1884         }
1885
1886         dosockopts(modernsock);
1887
1888         if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
1889                 err("bind: %m");
1890         }
1891         if(listen(modernsock, 10) <0) {
1892                 err("listen: %m");
1893         }
1894
1895         freeaddrinfo(ai);
1896 }
1897
1898 /**
1899  * Connect our servers.
1900  **/
1901 void setup_servers(GArray* servers) {
1902         int i;
1903         struct sigaction sa;
1904         int want_modern=0;
1905
1906         for(i=0;i<servers->len;i++) {
1907                 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
1908         }
1909         if(want_modern) {
1910                 open_modern();
1911         }
1912         children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1913
1914         sa.sa_handler = sigchld_handler;
1915         sigemptyset(&sa.sa_mask);
1916         sa.sa_flags = SA_RESTART;
1917         if(sigaction(SIGCHLD, &sa, NULL) == -1)
1918                 err("sigaction: %m");
1919         sa.sa_handler = sigterm_handler;
1920         sigemptyset(&sa.sa_mask);
1921         sa.sa_flags = SA_RESTART;
1922         if(sigaction(SIGTERM, &sa, NULL) == -1)
1923                 err("sigaction: %m");
1924 }
1925
1926 /**
1927  * Go daemon (unless we specified at compile time that we didn't want this)
1928  * @param serve the first server of our configuration. If its port is zero,
1929  *      then do not daemonize, because we're doing inetd then. This parameter
1930  *      is only used to create a PID file of the form
1931  *      /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
1932  **/
1933 #if !defined(NODAEMON) && !defined(NOFORK)
1934 void daemonize(SERVER* serve) {
1935         FILE*pidf;
1936
1937         if(serve && !(serve->port)) {
1938                 return;
1939         }
1940         if(daemon(0,0)<0) {
1941                 err("daemon");
1942         }
1943         if(!*pidftemplate) {
1944                 if(serve) {
1945                         strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
1946                 } else {
1947                         strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
1948                 }
1949         }
1950         snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1951         pidf=fopen(pidfname, "w");
1952         if(pidf) {
1953                 fprintf(pidf,"%d\n", (int)getpid());
1954                 fclose(pidf);
1955         } else {
1956                 perror("fopen");
1957                 fprintf(stderr, "Not fatal; continuing");
1958         }
1959 }
1960 #else
1961 #define daemonize(serve)
1962 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1963
1964 /*
1965  * Everything beyond this point (in the file) is run in non-daemon mode.
1966  * The stuff above daemonize() isn't.
1967  */
1968
1969 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
1970
1971 void serve_err(SERVER* serve, const char* msg) {
1972         g_message("Export of %s on port %d failed:", serve->exportname,
1973                         serve->port);
1974         err(msg);
1975 }
1976
1977 /**
1978  * Set up user-ID and/or group-ID
1979  **/
1980 void dousers(void) {
1981         struct passwd *pw;
1982         struct group *gr;
1983         gchar* str;
1984         if(rungroup) {
1985                 gr=getgrnam(rungroup);
1986                 if(!gr) {
1987                         str = g_strdup_printf("Invalid group name: %s", rungroup);
1988                         err(str);
1989                 }
1990                 if(setgid(gr->gr_gid)<0) {
1991                         err("Could not set GID: %m"); 
1992                 }
1993         }
1994         if(runuser) {
1995                 pw=getpwnam(runuser);
1996                 if(!pw) {
1997                         str = g_strdup_printf("Invalid user name: %s", runuser);
1998                         err(str);
1999                 }
2000                 if(setuid(pw->pw_uid)<0) {
2001                         err("Could not set UID: %m");
2002                 }
2003         }
2004 }
2005
2006 #ifndef ISSERVER
2007 void glib_message_syslog_redirect(const gchar *log_domain,
2008                                   GLogLevelFlags log_level,
2009                                   const gchar *message,
2010                                   gpointer user_data)
2011 {
2012     int level=LOG_DEBUG;
2013     
2014     switch( log_level )
2015     {
2016       case G_LOG_FLAG_FATAL:
2017       case G_LOG_LEVEL_CRITICAL:
2018       case G_LOG_LEVEL_ERROR:    
2019         level=LOG_ERR; 
2020         break;
2021       case G_LOG_LEVEL_WARNING:
2022         level=LOG_WARNING;
2023         break;
2024       case G_LOG_LEVEL_MESSAGE:
2025       case G_LOG_LEVEL_INFO:
2026         level=LOG_INFO;
2027         break;
2028       case G_LOG_LEVEL_DEBUG:
2029         level=LOG_DEBUG;
2030       default:
2031         level=LOG_ERR;
2032     }
2033     syslog(level, message);
2034 }
2035 #endif
2036
2037 /**
2038  * Main entry point...
2039  **/
2040 int main(int argc, char *argv[]) {
2041         SERVER *serve;
2042         GArray *servers;
2043         GError *err=NULL;
2044
2045         if (sizeof( struct nbd_request )!=28) {
2046                 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2047                 exit(EXIT_FAILURE) ;
2048         }
2049
2050         memset(pidftemplate, '\0', 256);
2051
2052         logging();
2053         config_file_pos = g_strdup(CFILE);
2054         serve=cmdline(argc, argv);
2055         servers = parse_cfile(config_file_pos, &err);
2056         
2057         if(serve) {
2058                 serve->socket_family = AF_UNSPEC;
2059
2060                 append_serve(serve, servers);
2061      
2062                 if (!(serve->port)) {
2063                         CLIENT *client;
2064 #ifndef ISSERVER
2065                         /* You really should define ISSERVER if you're going to use
2066                          * inetd mode, but if you don't, closing stdout and stderr
2067                          * (which inetd had connected to the client socket) will let it
2068                          * work. */
2069                         close(1);
2070                         close(2);
2071                         open("/dev/null", O_WRONLY);
2072                         open("/dev/null", O_WRONLY);
2073                         g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2074 #endif
2075                         client=g_malloc(sizeof(CLIENT));
2076                         client->server=serve;
2077                         client->net=0;
2078                         client->exportsize=OFFT_MAX;
2079                         set_peername(0,client);
2080                         serveconnection(client);
2081                         return 0;
2082                 }
2083         }
2084     
2085         if(!servers || !servers->len) {
2086                 g_warning("Could not parse config file: %s", 
2087                                 err ? err->message : "Unknown error");
2088         }
2089         if(serve) {
2090                 g_warning("Specifying an export on the command line is deprecated.");
2091                 g_warning("Please use a configuration file instead.");
2092         }
2093
2094         if((!serve) && (!servers||!servers->len)) {
2095                 g_message("Nothing to do! Bye!");
2096                 exit(EXIT_FAILURE);
2097         }
2098         daemonize(serve);
2099         setup_servers(servers);
2100         dousers();
2101         serveloop(servers);
2102         return 0 ;
2103 }