Assign a value to serve when using modern protocol
[nbd.git] / nbd-server.c
1 /*
2  * Network Block Device - server
3  *
4  * Copyright 1996-1998 Pavel Machek, distribute under GPL
5  *  <pavel@atrey.karlin.mff.cuni.cz>
6  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8  *
9  * Version 1.0 - hopefully 64-bit-clean
10  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13  *      type, or don't have 64 bit file offsets by defining FS_32BIT
14  *      in compile options for nbd-server *only*. This can be done
15  *      with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16  *      original autoconf input file, or I would make it a configure
17  *      option.) Ken Yap <ken@nlc.net.au>.
18  * Version 1.6 - fix autodetection of block device size and really make 64 bit
19  *      clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20  * Version 2.0 - Version synchronised with client
21  * Version 2.1 - Reap zombie client processes when they exit. Removed
22  *      (uncommented) the _IO magic, it's no longer necessary. Wouter
23  *      Verhelst <wouter@debian.org>
24  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25  * Version 2.3 - Fixed code so that Large File Support works. This
26  *      removes the FS_32BIT compile-time directive; define
27  *      _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28  *      using FS_32BIT. This will allow you to use files >2GB instead of
29  *      having to use the -m option. Wouter Verhelst <wouter@debian.org>
30  * Version 2.4 - Added code to keep track of children, so that we can
31  *      properly kill them from initscripts. Add a call to daemon(),
32  *      so that processes don't think they have to wait for us, which is
33  *      interesting for initscripts as well. Wouter Verhelst
34  *      <wouter@debian.org>
35  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36  *      zero after fork()ing, resulting in nbd-server going berserk
37  *      when it receives a signal with at least one child open. Wouter
38  *      Verhelst <wouter@debian.org>
39  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40  *      rectified type of mainloop::size_host (sf.net bugs 814435 and
41  *      817385); close the PID file after writing to it, so that the
42  *      daemon can actually be found. Wouter Verhelst
43  *      <wouter@debian.org>
44  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45  *      correctly put in network endianness. Many types were corrected
46  *      (size_t and off_t instead of int).  <vspaceg@sourceforge.net>
47  * Version 2.6 - Some code cleanup.
48  * Version 2.7 - Better build system.
49  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 
50  *      lot more work, but this is a start. Wouter Verhelst
51  *      <wouter@debian.org>
52  * 16/03/2010 - Add IPv6 support.
53  *      Kitt Tientanopajai <kitt@kitty.in.th>
54  *      Neutron Soutmun <neo.neutron@gmail.com>
55  *      Suriya Soutmun <darksolar@gmail.com>
56  */
57
58 /* Includes LFS defines, which defines behaviours of some of the following
59  * headers, so must come before those */
60 #include "lfs.h"
61
62 #include <sys/types.h>
63 #include <sys/socket.h>
64 #include <sys/stat.h>
65 #include <sys/select.h>         /* select */
66 #include <sys/wait.h>           /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
69 #endif
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h>          /* For BLKGETSIZE */
73 #endif
74 #include <signal.h>             /* sigaction */
75 #include <errno.h>
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
78 #include <netdb.h>
79 #include <syslog.h>
80 #include <unistd.h>
81 #include <stdio.h>
82 #include <stdlib.h>
83 #include <string.h>
84 #include <fcntl.h>
85 #include <arpa/inet.h>
86 #include <strings.h>
87 #include <dirent.h>
88 #include <unistd.h>
89 #include <getopt.h>
90 #include <pwd.h>
91 #include <grp.h>
92
93 #include <glib.h>
94
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
97 #include "cliserv.h"
98
99 #ifdef WITH_SDP
100 #include <sdp_inet.h>
101 #endif
102
103 /** Default position of the config file */
104 #ifndef SYSCONFDIR
105 #define SYSCONFDIR "/etc"
106 #endif
107 #define CFILE SYSCONFDIR "/nbd-server/config"
108
109 /** Where our config file actually is */
110 gchar* config_file_pos;
111
112 /** What user we're running as */
113 gchar* runuser=NULL;
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
118
119 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
120 #ifdef ISSERVER
121 #define msg2(a,b) syslog(a,b)
122 #define msg3(a,b,c) syslog(a,b,c)
123 #define msg4(a,b,c,d) syslog(a,b,c,d)
124 #else
125 #define msg2(a,b) g_message(b)
126 #define msg3(a,b,c) g_message(b,c)
127 #define msg4(a,b,c,d) g_message(b,c,d)
128 #endif
129
130 /* Debugging macros */
131 //#define DODBG
132 #ifdef DODBG
133 #define DEBUG( a ) printf( a )
134 #define DEBUG2( a,b ) printf( a,b )
135 #define DEBUG3( a,b,c ) printf( a,b,c )
136 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
137 #else
138 #define DEBUG( a )
139 #define DEBUG2( a,b ) 
140 #define DEBUG3( a,b,c ) 
141 #define DEBUG4( a,b,c,d ) 
142 #endif
143 #ifndef PACKAGE_VERSION
144 #define PACKAGE_VERSION ""
145 #endif
146 /**
147  * The highest value a variable of type off_t can reach. This is a signed
148  * integer, so set all bits except for the leftmost one.
149  **/
150 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
151 #define LINELEN 256       /**< Size of static buffer used to read the
152                                authorization file (yuck) */
153 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
154 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
155 #define F_READONLY 1      /**< flag to tell us a file is readonly */
156 #define F_MULTIFILE 2     /**< flag to tell us a file is exported using -m */
157 #define F_COPYONWRITE 4   /**< flag to tell us a file is exported using
158                             copyonwrite */
159 #define F_AUTOREADONLY 8  /**< flag to tell us a file is set to autoreadonly */
160 #define F_SPARSE 16       /**< flag to tell us copyronwrite should use a sparse file */
161 #define F_SDP 32          /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
162 #define F_SYNC 64         /**< Whether to fsync() after a write */
163 GHashTable *children;
164 char pidfname[256]; /**< name of our PID file */
165 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
166 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
167
168 int modernsock=0;         /**< Socket for the modern handler. Not used
169                                if a client was only specified on the
170                                command line; only port used if
171                                oldstyle is set to false (and then the
172                                command-line client isn't used, gna gna) */
173 char* modern_listen;      /**< listenaddr value for modernsock */
174
175 /**
176  * Types of virtuatlization
177  **/
178 typedef enum {
179         VIRT_NONE=0,    /**< No virtualization */
180         VIRT_IPLIT,     /**< Literal IP address as part of the filename */
181         VIRT_IPHASH,    /**< Replacing all dots in an ip address by a / before
182                              doing the same as in IPLIT */
183         VIRT_CIDR,      /**< Every subnet in its own directory */
184 } VIRT_STYLE;
185
186 /**
187  * Variables associated with a server.
188  **/
189 typedef struct {
190         gchar* exportname;    /**< (unprocessed) filename of the file we're exporting */
191         off_t expected_size; /**< size of the exported file as it was told to
192                                us through configuration */
193         gchar* listenaddr;   /**< The IP address we're listening on */
194         unsigned int port;   /**< port we're exporting this file at */
195         char* authname;      /**< filename of the authorization file */
196         int flags;           /**< flags associated with this exported file */
197         int socket;          /**< The socket of this server. */
198         int socket_family;   /**< family of the socket */
199         VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
200         uint8_t cidrlen;     /**< The length of the mask when we use
201                                   CIDR-style virtualization */
202         gchar* prerun;       /**< command to be ran after connecting a client,
203                                   but before starting to serve */
204         gchar* postrun;      /**< command that will be ran after the client
205                                   disconnects */
206         gchar* servename;    /**< name of the export as selected by nbd-client */
207         int max_connections; /**< maximum number of opened connections */
208 } SERVER;
209
210 /**
211  * Variables associated with a client socket.
212  **/
213 typedef struct {
214         int fhandle;      /**< file descriptor */
215         off_t startoff;   /**< starting offset of this file */
216 } FILE_INFO;
217
218 typedef struct {
219         off_t exportsize;    /**< size of the file we're exporting */
220         char *clientname;    /**< peer */
221         char *exportname;    /**< (processed) filename of the file we're exporting */
222         GArray *export;    /**< array of FILE_INFO of exported files;
223                                array size is always 1 unless we're
224                                doing the multiple file option */
225         int net;             /**< The actual client socket */
226         SERVER *server;      /**< The server this client is getting data from */
227         char* difffilename;  /**< filename of the copy-on-write file, if any */
228         int difffile;        /**< filedescriptor of copyonwrite file. @todo
229                                shouldn't this be an array too? (cfr export) Or
230                                make -m and -c mutually exclusive */
231         u32 difffilelen;     /**< number of pages in difffile */
232         u32 *difmap;         /**< see comment on the global difmap for this one */
233         gboolean modern;     /**< client was negotiated using modern negotiation protocol */
234 } CLIENT;
235
236 /**
237  * Type of configuration file values
238  **/
239 typedef enum {
240         PARAM_INT,              /**< This parameter is an integer */
241         PARAM_STRING,           /**< This parameter is a string */
242         PARAM_BOOL,             /**< This parameter is a boolean */
243 } PARAM_TYPE;
244
245 /**
246  * Configuration file values
247  **/
248 typedef struct {
249         gchar *paramname;       /**< Name of the parameter, as it appears in
250                                   the config file */
251         gboolean required;      /**< Whether this is a required (as opposed to
252                                   optional) parameter */
253         PARAM_TYPE ptype;       /**< Type of the parameter. */
254         gpointer target;        /**< Pointer to where the data of this
255                                   parameter should be written. If ptype is
256                                   PARAM_BOOL, the data is or'ed rather than
257                                   overwritten. */
258         gint flagval;           /**< Flag mask for this parameter in case ptype
259                                   is PARAM_BOOL. */
260 } PARAM;
261
262 /**
263  * Check whether a client is allowed to connect. Works with an authorization
264  * file which contains one line per machine, no wildcards.
265  *
266  * @param opts The client who's trying to connect.
267  * @return 0 - authorization refused, 1 - OK
268  **/
269 int authorized_client(CLIENT *opts) {
270         const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
271         FILE *f ;
272         char line[LINELEN]; 
273         char *tmp;
274         struct in_addr addr;
275         struct in_addr client;
276         struct in_addr cltemp;
277         int len;
278
279         if ((f=fopen(opts->server->authname,"r"))==NULL) {
280                 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
281                      opts->server->authname,strerror(errno)) ;
282                 return 1 ; 
283         }
284   
285         inet_aton(opts->clientname, &client);
286         while (fgets(line,LINELEN,f)!=NULL) {
287                 if((tmp=index(line, '/'))) {
288                         if(strlen(line)<=tmp-line) {
289                                 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
290                                 return 0;
291                         }
292                         *(tmp++)=0;
293                         if(!inet_aton(line,&addr)) {
294                                 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
295                                 return 0;
296                         }
297                         len=strtol(tmp, NULL, 0);
298                         addr.s_addr>>=32-len;
299                         addr.s_addr<<=32-len;
300                         memcpy(&cltemp,&client,sizeof(client));
301                         cltemp.s_addr>>=32-len;
302                         cltemp.s_addr<<=32-len;
303                         if(addr.s_addr == cltemp.s_addr) {
304                                 return 1;
305                         }
306                 }
307                 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
308                         fclose(f);
309                         return 1;
310                 }
311         }
312         fclose(f);
313         return 0;
314 }
315
316 /**
317  * Read data from a file descriptor into a buffer
318  *
319  * @param f a file descriptor
320  * @param buf a buffer
321  * @param len the number of bytes to be read
322  **/
323 inline void readit(int f, void *buf, size_t len) {
324         ssize_t res;
325         while (len > 0) {
326                 DEBUG("*");
327                 if ((res = read(f, buf, len)) <= 0) {
328                         if(errno != EAGAIN) {
329                                 err("Read failed: %m");
330                         }
331                 } else {
332                         len -= res;
333                         buf += res;
334                 }
335         }
336 }
337
338 /**
339  * Write data from a buffer into a filedescriptor
340  *
341  * @param f a file descriptor
342  * @param buf a buffer containing data
343  * @param len the number of bytes to be written
344  **/
345 inline void writeit(int f, void *buf, size_t len) {
346         ssize_t res;
347         while (len > 0) {
348                 DEBUG("+");
349                 if ((res = write(f, buf, len)) <= 0)
350                         err("Send failed: %m");
351                 len -= res;
352                 buf += res;
353         }
354 }
355
356 /**
357  * Print out a message about how to use nbd-server. Split out to a separate
358  * function so that we can call it from multiple places
359  */
360 void usage() {
361         printf("This is nbd-server version " VERSION "\n");
362         printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
363                "\t-r|--read-only\t\tread only\n"
364                "\t-m|--multi-file\t\tmultiple file\n"
365                "\t-c|--copy-on-write\tcopy on write\n"
366                "\t-C|--config-file\tspecify an alternate configuration file\n"
367                "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
368                "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
369                "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
370                "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
371                "\tif port is set to 0, stdin is used (for running from inetd)\n"
372                "\tif file_to_export contains '%%s', it is substituted with the IP\n"
373                "\t\taddress of the machine trying to connect\n" 
374                "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
375         printf("Using configuration file %s\n", CFILE);
376 }
377
378 /* Dumps a config file section of the given SERVER*, and exits. */
379 void dump_section(SERVER* serve, gchar* section_header) {
380         printf("[%s]\n", section_header);
381         printf("\texportname = %s\n", serve->exportname);
382         printf("\tlistenaddr = %s\n", serve->listenaddr);
383         printf("\tport = %d\n", serve->port);
384         if(serve->flags & F_READONLY) {
385                 printf("\treadonly = true\n");
386         }
387         if(serve->flags & F_MULTIFILE) {
388                 printf("\tmultifile = true\n");
389         }
390         if(serve->flags & F_COPYONWRITE) {
391                 printf("\tcopyonwrite = true\n");
392         }
393         if(serve->expected_size) {
394                 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
395         }
396         if(serve->authname) {
397                 printf("\tauthfile = %s\n", serve->authname);
398         }
399         exit(EXIT_SUCCESS);
400 }
401
402 /**
403  * Parse the command line.
404  *
405  * @param argc the argc argument to main()
406  * @param argv the argv argument to main()
407  **/
408 SERVER* cmdline(int argc, char *argv[]) {
409         int i=0;
410         int nonspecial=0;
411         int c;
412         struct option long_options[] = {
413                 {"read-only", no_argument, NULL, 'r'},
414                 {"multi-file", no_argument, NULL, 'm'},
415                 {"copy-on-write", no_argument, NULL, 'c'},
416                 {"authorize-file", required_argument, NULL, 'l'},
417                 {"config-file", required_argument, NULL, 'C'},
418                 {"pid-file", required_argument, NULL, 'p'},
419                 {"output-config", required_argument, NULL, 'o'},
420                 {"max-connection", required_argument, NULL, 'M'},
421                 {0,0,0,0}
422         };
423         SERVER *serve;
424         off_t es;
425         size_t last;
426         char suffix;
427         gboolean do_output=FALSE;
428         gchar* section_header="";
429         gchar** addr_port;
430
431         if(argc==1) {
432                 return NULL;
433         }
434         serve=g_new0(SERVER, 1);
435         serve->authname = g_strdup(default_authname);
436         serve->virtstyle=VIRT_IPLIT;
437         while((c=getopt_long(argc, argv, "-C:cl:mo:rp:M:", long_options, &i))>=0) {
438                 switch (c) {
439                 case 1:
440                         /* non-option argument */
441                         switch(nonspecial++) {
442                         case 0:
443                                 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
444                                         addr_port=g_strsplit(optarg, ":", 2);
445
446                                         /* Check for "@" - maybe user using this separator
447                                                  for IPv4 address */
448                                         if(!addr_port[1]) {
449                                                 g_strfreev(addr_port);
450                                                 addr_port=g_strsplit(optarg, "@", 2);
451                                         }
452                                 } else {
453                                         addr_port=g_strsplit(optarg, "@", 2);
454                                 }
455
456                                 if(addr_port[1]) {
457                                         serve->port=strtol(addr_port[1], NULL, 0);
458                                         serve->listenaddr=g_strdup(addr_port[0]);
459                                 } else {
460                                         serve->listenaddr=NULL;
461                                         serve->port=strtol(addr_port[0], NULL, 0);
462                                 }
463                                 g_strfreev(addr_port);
464                                 break;
465                         case 1:
466                                 serve->exportname = g_strdup(optarg);
467                                 if(serve->exportname[0] != '/') {
468                                         fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
469                                         exit(EXIT_FAILURE);
470                                 }
471                                 break;
472                         case 2:
473                                 last=strlen(optarg)-1;
474                                 suffix=optarg[last];
475                                 if (suffix == 'k' || suffix == 'K' ||
476                                     suffix == 'm' || suffix == 'M')
477                                         optarg[last] = '\0';
478                                 es = (off_t)atoll(optarg);
479                                 switch (suffix) {
480                                         case 'm':
481                                         case 'M':  es <<= 10;
482                                         case 'k':
483                                         case 'K':  es <<= 10;
484                                         default :  break;
485                                 }
486                                 serve->expected_size = es;
487                                 break;
488                         }
489                         break;
490                 case 'r':
491                         serve->flags |= F_READONLY;
492                         break;
493                 case 'm':
494                         serve->flags |= F_MULTIFILE;
495                         break;
496                 case 'o':
497                         do_output = TRUE;
498                         section_header = g_strdup(optarg);
499                         break;
500                 case 'p':
501                         strncpy(pidftemplate, optarg, 256);
502                         break;
503                 case 'c': 
504                         serve->flags |=F_COPYONWRITE;
505                         break;
506                 case 'C':
507                         g_free(config_file_pos);
508                         config_file_pos=g_strdup(optarg);
509                         break;
510                 case 'l':
511                         g_free(serve->authname);
512                         serve->authname=g_strdup(optarg);
513                         break;
514                 case 'M':
515                         serve->max_connections = strtol(optarg, NULL, 0);
516                         break;
517                 default:
518                         usage();
519                         exit(EXIT_FAILURE);
520                         break;
521                 }
522         }
523         /* What's left: the port to export, the name of the to be exported
524          * file, and, optionally, the size of the file, in that order. */
525         if(nonspecial<2) {
526                 g_free(serve);
527                 serve=NULL;
528         } else {
529                 do_oldstyle = TRUE;
530         }
531         if(do_output) {
532                 if(!serve) {
533                         g_critical("Need a complete configuration on the command line to output a config file section!");
534                         exit(EXIT_FAILURE);
535                 }
536                 dump_section(serve, section_header);
537         }
538         return serve;
539 }
540
541 /**
542  * Error codes for config file parsing
543  **/
544 typedef enum {
545         CFILE_NOTFOUND,         /**< The configuration file is not found */
546         CFILE_MISSING_GENERIC,  /**< The (required) group "generic" is missing */
547         CFILE_KEY_MISSING,      /**< A (required) key is missing */
548         CFILE_VALUE_INVALID,    /**< A value is syntactically invalid */
549         CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
550         CFILE_PROGERR,          /**< Programmer error */
551         CFILE_NO_EXPORTS,       /**< A config file was specified that does not
552                                      define any exports */
553         CFILE_INCORRECT_PORT,   /**< The reserved port was specified for an
554                                      old-style export. */
555 } CFILE_ERRORS;
556
557 /**
558  * Remove a SERVER from memory. Used from the hash table
559  **/
560 void remove_server(gpointer s) {
561         SERVER *server;
562
563         server=(SERVER*)s;
564         g_free(server->exportname);
565         if(server->authname)
566                 g_free(server->authname);
567         if(server->listenaddr)
568                 g_free(server->listenaddr);
569         if(server->prerun)
570                 g_free(server->prerun);
571         if(server->postrun)
572                 g_free(server->postrun);
573         g_free(server);
574 }
575
576 /**
577  * duplicate server
578  * @param s the old server we want to duplicate
579  * @return new duplicated server
580  **/
581 SERVER* dup_serve(SERVER *s) {
582         SERVER *serve = NULL;
583
584         serve=g_new0(SERVER, 1);
585         if(serve == NULL)
586                 return NULL;
587
588         if(s->exportname)
589                 serve->exportname = g_strdup(s->exportname);
590
591         serve->expected_size = s->expected_size;
592
593         if(s->listenaddr)
594                 serve->listenaddr = g_strdup(s->listenaddr);
595
596         serve->port = s->port;
597
598         if(s->authname)
599                 serve->authname = strdup(s->authname);
600
601         serve->flags = s->flags;
602         serve->socket = serve->socket;
603         serve->socket_family = serve->socket_family;
604         serve->cidrlen = s->cidrlen;
605
606         if(s->prerun)
607                 serve->prerun = g_strdup(s->prerun);
608
609         if(s->postrun)
610                 serve->postrun = g_strdup(s->postrun);
611         
612         if(s->servename)
613                 serve->servename = g_strdup(s->servename);
614
615         serve->max_connections = s->max_connections;
616
617         return serve;
618 }
619
620 /**
621  * append new server to array
622  * @param s server
623  * @param a server array
624  * @return 0 success, -1 error
625  */
626 int append_serve(SERVER *s, GArray *a) {
627         SERVER *ns = NULL;
628         struct addrinfo hints;
629         struct addrinfo *ai = NULL;
630         struct addrinfo *rp = NULL;
631         char   host[NI_MAXHOST];
632         gchar  *port = NULL;
633         int e;
634         int ret;
635
636         if(!s) {
637                 err("Invalid parsing server");
638                 return -1;
639         }
640
641         port = g_strdup_printf("%d", s->port);
642
643         memset(&hints,'\0',sizeof(hints));
644         hints.ai_family = AF_UNSPEC;
645         hints.ai_socktype = SOCK_STREAM;
646         hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
647         hints.ai_protocol = IPPROTO_TCP;
648
649         e = getaddrinfo(s->listenaddr, port, &hints, &ai);
650
651         if (port)
652                 g_free(port);
653
654         if(e == 0) {
655                 for (rp = ai; rp != NULL; rp = rp->ai_next) {
656                         e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
657
658                         if (e != 0) { // error
659                                 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
660                                 continue;
661                         }
662
663                         // duplicate server and set listenaddr to resolved IP address
664                         ns = dup_serve (s);
665                         if (ns) {
666                                 ns->listenaddr = g_strdup(host);
667                                 ns->socket_family = rp->ai_family;
668                                 g_array_append_val(a, *ns);
669                                 free(ns);
670                                 ns = NULL;
671                         }
672                 }
673
674                 ret = 0;
675         } else {
676                 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
677                 ret = -1;
678         }
679
680         if (ai)
681                 freeaddrinfo(ai);
682
683         return ret;
684 }
685
686 /**
687  * Parse the config file.
688  *
689  * @param f the name of the config file
690  * @param e a GError. @see CFILE_ERRORS for what error values this function can
691  *      return.
692  * @return a Array of SERVER* pointers, If the config file is empty or does not
693  *      exist, returns an empty GHashTable; if the config file contains an
694  *      error, returns NULL, and e is set appropriately
695  **/
696 GArray* parse_cfile(gchar* f, GError** e) {
697         const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
698         const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
699         SERVER s;
700         gchar *virtstyle=NULL;
701         PARAM lp[] = {
702                 { "exportname", TRUE,   PARAM_STRING,   NULL, 0 },
703                 { "port",       TRUE,   PARAM_INT,      NULL, 0 },
704                 { "authfile",   FALSE,  PARAM_STRING,   NULL, 0 },
705                 { "filesize",   FALSE,  PARAM_INT,      NULL, 0 },
706                 { "virtstyle",  FALSE,  PARAM_STRING,   NULL, 0 },
707                 { "prerun",     FALSE,  PARAM_STRING,   NULL, 0 },
708                 { "postrun",    FALSE,  PARAM_STRING,   NULL, 0 },
709                 { "readonly",   FALSE,  PARAM_BOOL,     NULL, F_READONLY },
710                 { "multifile",  FALSE,  PARAM_BOOL,     NULL, F_MULTIFILE },
711                 { "copyonwrite", FALSE, PARAM_BOOL,     NULL, F_COPYONWRITE },
712                 { "sparse_cow", FALSE,  PARAM_BOOL,     NULL, F_SPARSE },
713                 { "sdp",        FALSE,  PARAM_BOOL,     NULL, F_SDP },
714                 { "sync",       FALSE,  PARAM_BOOL,     NULL, F_SYNC },
715                 { "listenaddr", FALSE,  PARAM_STRING,   NULL, 0 },
716                 { "maxconnections", FALSE, PARAM_INT,   NULL, 0 },
717         };
718         const int lp_size=sizeof(lp)/sizeof(PARAM);
719         PARAM gp[] = {
720                 { "user",       FALSE, PARAM_STRING,    &runuser,       0 },
721                 { "group",      FALSE, PARAM_STRING,    &rungroup,      0 },
722                 { "oldstyle",   FALSE, PARAM_BOOL,      &do_oldstyle,   1 },
723                 { "listenaddr", FALSE, PARAM_STRING,    &modern_listen, 0 },
724         };
725         PARAM* p=gp;
726         int p_size=sizeof(gp)/sizeof(PARAM);
727         GKeyFile *cfile;
728         GError *err = NULL;
729         const char *err_msg=NULL;
730         GQuark errdomain;
731         GArray *retval=NULL;
732         gchar **groups;
733         gboolean value;
734         gchar* startgroup;
735         gint i;
736         gint j;
737
738         errdomain = g_quark_from_string("parse_cfile");
739         cfile = g_key_file_new();
740         retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
741         if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
742                         G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
743                 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
744                 g_key_file_free(cfile);
745                 return retval;
746         }
747         startgroup = g_key_file_get_start_group(cfile);
748         if(!startgroup || strcmp(startgroup, "generic")) {
749                 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
750                 g_key_file_free(cfile);
751                 return NULL;
752         }
753         groups = g_key_file_get_groups(cfile, NULL);
754         for(i=0;groups[i];i++) {
755                 memset(&s, '\0', sizeof(SERVER));
756                 lp[0].target=&(s.exportname);
757                 lp[1].target=&(s.port);
758                 lp[2].target=&(s.authname);
759                 lp[3].target=&(s.expected_size);
760                 lp[4].target=&(virtstyle);
761                 lp[5].target=&(s.prerun);
762                 lp[6].target=&(s.postrun);
763                 lp[7].target=lp[8].target=lp[9].target=
764                                 lp[10].target=lp[11].target=
765                                 lp[12].target=&(s.flags);
766                 lp[13].target=&(s.listenaddr);
767                 lp[14].target=&(s.max_connections);
768
769                 /* After the [generic] group, start parsing exports */
770                 if(i==1) {
771                         p=lp;
772                         p_size=lp_size;
773                 } 
774                 for(j=0;j<p_size;j++) {
775                         g_assert(p[j].target != NULL);
776                         g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
777                         switch(p[j].ptype) {
778                                 case PARAM_INT:
779                                         *((gint*)p[j].target) =
780                                                 g_key_file_get_integer(cfile,
781                                                                 groups[i],
782                                                                 p[j].paramname,
783                                                                 &err);
784                                         break;
785                                 case PARAM_STRING:
786                                         *((gchar**)p[j].target) =
787                                                 g_key_file_get_string(cfile,
788                                                                 groups[i],
789                                                                 p[j].paramname,
790                                                                 &err);
791                                         break;
792                                 case PARAM_BOOL:
793                                         value = g_key_file_get_boolean(cfile,
794                                                         groups[i],
795                                                         p[j].paramname, &err);
796                                         if(!err) {
797                                                 if(value) {
798                                                         *((gint*)p[j].target) |= p[j].flagval;
799                                                 } else {
800                                                         *((gint*)p[j].target) &= ~(p[j].flagval);
801                                                 }
802                                         }
803                                         break;
804                         }
805                         if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
806                                 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
807                                 g_key_file_free(cfile);
808                                 return NULL;
809                         }
810                         if(err) {
811                                 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
812                                         if(!p[j].required) {
813                                                 /* Ignore not-found error for optional values */
814                                                 g_clear_error(&err);
815                                                 continue;
816                                         } else {
817                                                 err_msg = MISSING_REQUIRED_ERROR;
818                                         }
819                                 } else {
820                                         err_msg = DEFAULT_ERROR;
821                                 }
822                                 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
823                                 g_array_free(retval, TRUE);
824                                 g_error_free(err);
825                                 g_key_file_free(cfile);
826                                 return NULL;
827                         }
828                 }
829                 if(virtstyle) {
830                         if(!strncmp(virtstyle, "none", 4)) {
831                                 s.virtstyle=VIRT_NONE;
832                         } else if(!strncmp(virtstyle, "ipliteral", 9)) {
833                                 s.virtstyle=VIRT_IPLIT;
834                         } else if(!strncmp(virtstyle, "iphash", 6)) {
835                                 s.virtstyle=VIRT_IPHASH;
836                         } else if(!strncmp(virtstyle, "cidrhash", 8)) {
837                                 s.virtstyle=VIRT_CIDR;
838                                 if(strlen(virtstyle)<10) {
839                                         g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
840                                         g_array_free(retval, TRUE);
841                                         g_key_file_free(cfile);
842                                         return NULL;
843                                 }
844                                 s.cidrlen=strtol(virtstyle+8, NULL, 0);
845                         } else {
846                                 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
847                                 g_array_free(retval, TRUE);
848                                 g_key_file_free(cfile);
849                                 return NULL;
850                         }
851                         if(s.port && !do_oldstyle) {
852                                 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
853                                 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
854                         }
855                 } else {
856                         s.virtstyle=VIRT_IPLIT;
857                 }
858                 /* Don't need to free this, it's not our string */
859                 virtstyle=NULL;
860                 /* Don't append values for the [generic] group */
861                 if(i>0) {
862                         s.socket_family = AF_UNSPEC;
863                         s.servename = groups[i];
864
865                         append_serve(&s, retval);
866                 } else {
867                         if(!do_oldstyle) {
868                                 lp[1].required = 0;
869                         }
870                 }
871 #ifndef WITH_SDP
872                 if(s.flags & F_SDP) {
873                         g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
874                         g_array_free(retval, TRUE);
875                         g_key_file_free(cfile);
876                         return NULL;
877                 }
878 #endif
879         }
880         if(i==1) {
881                 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
882         }
883         g_key_file_free(cfile);
884         return retval;
885 }
886
887 /**
888  * Signal handler for SIGCHLD
889  * @param s the signal we're handling (must be SIGCHLD, or something
890  * is severely wrong)
891  **/
892 void sigchld_handler(int s) {
893         int status;
894         int* i;
895         pid_t pid;
896
897         while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
898                 if(WIFEXITED(status)) {
899                         msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
900                 }
901                 i=g_hash_table_lookup(children, &pid);
902                 if(!i) {
903                         msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
904                 } else {
905                         DEBUG2("Removing %d from the list of children", pid);
906                         g_hash_table_remove(children, &pid);
907                 }
908         }
909 }
910
911 /**
912  * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
913  *
914  * @param key the key
915  * @param value the value corresponding to the above key
916  * @param user_data a pointer which we always set to 1, so that we know what
917  * will happen next.
918  **/
919 void killchild(gpointer key, gpointer value, gpointer user_data) {
920         pid_t *pid=value;
921         int *parent=user_data;
922
923         kill(*pid, SIGTERM);
924         *parent=1;
925 }
926
927 /**
928  * Handle SIGTERM and dispatch it to our children
929  * @param s the signal we're handling (must be SIGTERM, or something
930  * is severely wrong).
931  **/
932 void sigterm_handler(int s) {
933         int parent=0;
934
935         g_hash_table_foreach(children, killchild, &parent);
936
937         if(parent) {
938                 unlink(pidfname);
939         }
940
941         exit(EXIT_SUCCESS);
942 }
943
944 /**
945  * Detect the size of a file.
946  *
947  * @param fhandle An open filedescriptor
948  * @return the size of the file, or OFFT_MAX if detection was
949  * impossible.
950  **/
951 off_t size_autodetect(int fhandle) {
952         off_t es;
953         u64 bytes;
954         struct stat stat_buf;
955         int error;
956
957 #ifdef HAVE_SYS_MOUNT_H
958 #ifdef HAVE_SYS_IOCTL_H
959 #ifdef BLKGETSIZE64
960         DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
961         if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
962                 return (off_t)bytes;
963         }
964 #endif /* BLKGETSIZE64 */
965 #endif /* HAVE_SYS_IOCTL_H */
966 #endif /* HAVE_SYS_MOUNT_H */
967
968         DEBUG("looking for fhandle size with fstat\n");
969         stat_buf.st_size = 0;
970         error = fstat(fhandle, &stat_buf);
971         if (!error) {
972                 if(stat_buf.st_size > 0)
973                         return (off_t)stat_buf.st_size;
974         } else {
975                 err("fstat failed: %m");
976         }
977
978         DEBUG("looking for fhandle size with lseek SEEK_END\n");
979         es = lseek(fhandle, (off_t)0, SEEK_END);
980         if (es > ((off_t)0)) {
981                 return es;
982         } else {
983                 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
984         }
985
986         err("Could not find size of exported block device: %m");
987         return OFFT_MAX;
988 }
989
990 /**
991  * Get the file handle and offset, given an export offset.
992  *
993  * @param export An array of export files
994  * @param a The offset to get corresponding file/offset for
995  * @param fhandle [out] File descriptor
996  * @param foffset [out] Offset into fhandle
997  * @param maxbytes [out] Tells how many bytes can be read/written
998  * from fhandle starting at foffset (0 if there is no limit)
999  * @return 0 on success, -1 on failure
1000  **/
1001 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1002         /* Negative offset not allowed */
1003         if(a < 0)
1004                 return -1;
1005
1006         /* Binary search for last file with starting offset <= a */
1007         FILE_INFO fi;
1008         int start = 0;
1009         int end = export->len - 1;
1010         while( start <= end ) {
1011                 int mid = (start + end) / 2;
1012                 fi = g_array_index(export, FILE_INFO, mid);
1013                 if( fi.startoff < a ) {
1014                         start = mid + 1;
1015                 } else if( fi.startoff > a ) {
1016                         end = mid - 1;
1017                 } else {
1018                         start = end = mid;
1019                         break;
1020                 }
1021         }
1022
1023         /* end should never go negative, since first startoff is 0 and a >= 0 */
1024         g_assert(end >= 0);
1025
1026         fi = g_array_index(export, FILE_INFO, end);
1027         *fhandle = fi.fhandle;
1028         *foffset = a - fi.startoff;
1029         *maxbytes = 0;
1030         if( end+1 < export->len ) {
1031                 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1032                 *maxbytes = fi_next.startoff - a;
1033         }
1034
1035         return 0;
1036 }
1037
1038 /**
1039  * seek to a position in a file, with error handling.
1040  * @param handle a filedescriptor
1041  * @param a position to seek to
1042  * @todo get rid of this; lastpoint is a global variable right now, but it
1043  * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1044  * easier.
1045  **/
1046 void myseek(int handle,off_t a) {
1047         if (lseek(handle, a, SEEK_SET) < 0) {
1048                 err("Can not seek locally!\n");
1049         }
1050 }
1051
1052 /**
1053  * Write an amount of bytes at a given offset to the right file. This
1054  * abstracts the write-side of the multiple file option.
1055  *
1056  * @param a The offset where the write should start
1057  * @param buf The buffer to write from
1058  * @param len The length of buf
1059  * @param client The client we're serving for
1060  * @return The number of bytes actually written, or -1 in case of an error
1061  **/
1062 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
1063         int fhandle;
1064         off_t foffset;
1065         size_t maxbytes;
1066         ssize_t retval;
1067
1068         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1069                 return -1;
1070         if(maxbytes && len > maxbytes)
1071                 len = maxbytes;
1072
1073         DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len);
1074
1075         myseek(fhandle, foffset);
1076         retval = write(fhandle, buf, len);
1077         if(client->server->flags & F_SYNC) {
1078                 fsync(fhandle);
1079         }
1080         return retval;
1081 }
1082
1083 /**
1084  * Call rawexpwrite repeatedly until all data has been written.
1085  * @return 0 on success, nonzero on failure
1086  **/
1087 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1088         ssize_t ret=0;
1089
1090         while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
1091                 a += ret;
1092                 buf += ret;
1093                 len -= ret;
1094         }
1095         return (ret < 0 || len != 0);
1096 }
1097
1098 /**
1099  * Read an amount of bytes at a given offset from the right file. This
1100  * abstracts the read-side of the multiple files option.
1101  *
1102  * @param a The offset where the read should start
1103  * @param buf A buffer to read into
1104  * @param len The size of buf
1105  * @param client The client we're serving for
1106  * @return The number of bytes actually read, or -1 in case of an
1107  * error.
1108  **/
1109 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1110         int fhandle;
1111         off_t foffset;
1112         size_t maxbytes;
1113
1114         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1115                 return -1;
1116         if(maxbytes && len > maxbytes)
1117                 len = maxbytes;
1118
1119         DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
1120
1121         myseek(fhandle, foffset);
1122         return read(fhandle, buf, len);
1123 }
1124
1125 /**
1126  * Call rawexpread repeatedly until all data has been read.
1127  * @return 0 on success, nonzero on failure
1128  **/
1129 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1130         ssize_t ret=0;
1131
1132         while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1133                 a += ret;
1134                 buf += ret;
1135                 len -= ret;
1136         }
1137         return (ret < 0 || len != 0);
1138 }
1139
1140 /**
1141  * Read an amount of bytes at a given offset from the right file. This
1142  * abstracts the read-side of the copyonwrite stuff, and calls
1143  * rawexpread() with the right parameters to do the actual work.
1144  * @param a The offset where the read should start
1145  * @param buf A buffer to read into
1146  * @param len The size of buf
1147  * @param client The client we're going to read for
1148  * @return 0 on success, nonzero on failure
1149  **/
1150 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1151         off_t rdlen, offset;
1152         off_t mapcnt, mapl, maph, pagestart;
1153
1154         if (!(client->server->flags & F_COPYONWRITE))
1155                 return(rawexpread_fully(a, buf, len, client));
1156         DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
1157
1158         mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1159
1160         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1161                 pagestart=mapcnt*DIFFPAGESIZE;
1162                 offset=a-pagestart;
1163                 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1164                         len : (size_t)DIFFPAGESIZE-offset;
1165                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1166                         DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1167                                (unsigned long)(client->difmap[mapcnt]));
1168                         myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1169                         if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1170                 } else { /* the block is not there */
1171                         DEBUG2("Page %llu is not here, we read the original one\n",
1172                                (unsigned long long)mapcnt);
1173                         if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1174                 }
1175                 len-=rdlen; a+=rdlen; buf+=rdlen;
1176         }
1177         return 0;
1178 }
1179
1180 /**
1181  * Write an amount of bytes at a given offset to the right file. This
1182  * abstracts the write-side of the copyonwrite option, and calls
1183  * rawexpwrite() with the right parameters to do the actual work.
1184  *
1185  * @param a The offset where the write should start
1186  * @param buf The buffer to write from
1187  * @param len The length of buf
1188  * @param client The client we're going to write for.
1189  * @return 0 on success, nonzero on failure
1190  **/
1191 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
1192         char pagebuf[DIFFPAGESIZE];
1193         off_t mapcnt,mapl,maph;
1194         off_t wrlen,rdlen; 
1195         off_t pagestart;
1196         off_t offset;
1197
1198         if (!(client->server->flags & F_COPYONWRITE))
1199                 return(rawexpwrite_fully(a, buf, len, client)); 
1200         DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
1201
1202         mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1203
1204         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1205                 pagestart=mapcnt*DIFFPAGESIZE ;
1206                 offset=a-pagestart ;
1207                 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1208                         len : (size_t)DIFFPAGESIZE-offset;
1209
1210                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1211                         DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1212                                (unsigned long)(client->difmap[mapcnt])) ;
1213                         myseek(client->difffile,
1214                                         client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1215                         if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1216                 } else { /* the block is not there */
1217                         myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1218                         client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1219                         DEBUG3("Page %llu is not here, we put it at %lu\n",
1220                                (unsigned long long)mapcnt,
1221                                (unsigned long)(client->difmap[mapcnt]));
1222                         rdlen=DIFFPAGESIZE ;
1223                         if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1224                                 return -1;
1225                         memcpy(pagebuf+offset,buf,wrlen) ;
1226                         if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1227                                         DIFFPAGESIZE)
1228                                 return -1;
1229                 }                                                   
1230                 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1231         }
1232         return 0;
1233 }
1234
1235 /**
1236  * Do the initial negotiation.
1237  *
1238  * @param client The client we're negotiating with.
1239  **/
1240 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1241         char zeros[128];
1242         uint64_t size_host;
1243         uint32_t flags = NBD_FLAG_HAS_FLAGS;
1244         uint16_t smallflags = 0;
1245         uint64_t magic;
1246
1247         memset(zeros, '\0', sizeof(zeros));
1248         if(!client || !client->modern) {
1249                 /* common */
1250                 if (write(net, INIT_PASSWD, 8) < 0) {
1251                         err_nonfatal("Negotiation failed: %m");
1252                         if(client)
1253                                 exit(EXIT_FAILURE);
1254                 }
1255                 if(!client || client->modern) {
1256                         /* modern */
1257                         magic = htonll(opts_magic);
1258                 } else {
1259                         /* oldstyle */
1260                         magic = htonll(cliserv_magic);
1261                 }
1262                 if (write(net, &magic, sizeof(magic)) < 0) {
1263                         err_nonfatal("Negotiation failed: %m");
1264                         if(client)
1265                                 exit(EXIT_FAILURE);
1266                 }
1267         }
1268         if(!client) {
1269                 /* modern */
1270                 uint32_t reserved;
1271                 uint32_t opt;
1272                 uint32_t namelen;
1273                 char* name;
1274                 int i;
1275
1276                 if(!servers)
1277                         err("programmer error");
1278                 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1279                         err("Negotiation failed: %m");
1280                 if (read(net, &reserved, sizeof(reserved)) < 0)
1281                         err("Negotiation failed: %m");
1282                 if (read(net, &magic, sizeof(magic)) < 0)
1283                         err("Negotiation failed: %m");
1284                 magic = ntohll(magic);
1285                 if(magic != opts_magic) {
1286                         close(net);
1287                         return NULL;
1288                 }
1289                 if (read(net, &opt, sizeof(opt)) < 0)
1290                         err("Negotiation failed: %m");
1291                 opt = ntohl(opt);
1292                 if(opt != NBD_OPT_EXPORT_NAME) {
1293                         close(net);
1294                         return NULL;
1295                 }
1296                 if (read(net, &namelen, sizeof(namelen)) < 0)
1297                         err("Negotiation failed: %m");
1298                 namelen = ntohl(namelen);
1299                 name = malloc(namelen+1);
1300                 name[namelen]=0;
1301                 if (read(net, name, namelen) < 0)
1302                         err("Negotiation failed: %m");
1303                 for(i=0; i<servers->len; i++) {
1304                         SERVER* serve = &(g_array_index(servers, SERVER, i));
1305                         if(!strcmp(serve->servename, name)) {
1306                                 CLIENT* client = g_new0(CLIENT, 1);
1307                                 client->server = serve;
1308                                 client->exportsize = OFFT_MAX;
1309                                 client->net = net;
1310                                 client->modern = TRUE;
1311                                 return client;
1312                         }
1313                 }
1314                 return NULL;
1315         }
1316         /* common */
1317         size_host = htonll((u64)(client->exportsize));
1318         if (write(net, &size_host, 8) < 0)
1319                 err("Negotiation failed: %m");
1320         if (client->server->flags & F_READONLY)
1321                 flags |= NBD_FLAG_READ_ONLY;
1322         if (!client->modern) {
1323                 /* oldstyle */
1324                 flags = htonl(flags);
1325                 if (write(client->net, &flags, 4) < 0)
1326                         err("Negotiation failed: %m");
1327         } else {
1328                 /* modern */
1329                 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1330                 smallflags = htons(smallflags);
1331                 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1332                         err("Negotiation failed: %m");
1333                 }
1334         }
1335         /* common */
1336         if (write(client->net, zeros, 124) < 0)
1337                 err("Negotiation failed: %m");
1338         return NULL;
1339 }
1340
1341 /** sending macro. */
1342 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1343 /** error macro. */
1344 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1345 /**
1346  * Serve a file to a single client.
1347  *
1348  * @todo This beast needs to be split up in many tiny little manageable
1349  * pieces. Preferably with a chainsaw.
1350  *
1351  * @param client The client we're going to serve to.
1352  * @return when the client disconnects
1353  **/
1354 int mainloop(CLIENT *client) {
1355         struct nbd_request request;
1356         struct nbd_reply reply;
1357         gboolean go_on=TRUE;
1358 #ifdef DODBG
1359         int i = 0;
1360 #endif
1361         negotiate(client->net, client, NULL);
1362         DEBUG("Entering request loop!\n");
1363         reply.magic = htonl(NBD_REPLY_MAGIC);
1364         reply.error = 0;
1365         while (go_on) {
1366                 char buf[BUFSIZE];
1367                 char* p;
1368                 size_t len;
1369                 size_t currlen;
1370                 size_t writelen;
1371 #ifdef DODBG
1372                 i++;
1373                 printf("%d: ", i);
1374 #endif
1375                 readit(client->net, &request, sizeof(request));
1376                 request.from = ntohll(request.from);
1377                 request.type = ntohl(request.type);
1378
1379                 if (request.type==NBD_CMD_DISC) {
1380                         msg2(LOG_INFO, "Disconnect request received.");
1381                         if (client->server->flags & F_COPYONWRITE) { 
1382                                 if (client->difmap) g_free(client->difmap) ;
1383                                 close(client->difffile);
1384                                 unlink(client->difffilename);
1385                                 free(client->difffilename);
1386                         }
1387                         go_on=FALSE;
1388                         continue;
1389                 }
1390
1391                 len = ntohl(request.len);
1392
1393                 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1394                         err("Not enough magic.");
1395                 if (len > BUFSIZE - sizeof(struct nbd_reply)) {
1396                         currlen = BUFSIZE - sizeof(struct nbd_reply);
1397                         msg2(LOG_INFO, "oversized request (this is not a problem)");
1398                 } else {
1399                         currlen = len;
1400                 }
1401 #ifdef DODBG
1402                 printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" :
1403                                 "READ", (unsigned long long)request.from,
1404                                 (unsigned long long)request.from / 512, len);
1405 #endif
1406                 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1407                 if ((request.from + len) > (OFFT_MAX)) {
1408                         DEBUG("[Number too large!]");
1409                         ERROR(client, reply, EINVAL);
1410                         continue;
1411                 }
1412
1413                 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1414                         DEBUG("[RANGE!]");
1415                         ERROR(client, reply, EINVAL);
1416                         continue;
1417                 }
1418
1419                 if (request.type==NBD_CMD_WRITE) {
1420                         DEBUG("wr: net->buf, ");
1421                         while(len > 0) {
1422                                 readit(client->net, buf, currlen);
1423                                 DEBUG("buf->exp, ");
1424                                 if ((client->server->flags & F_READONLY) ||
1425                                     (client->server->flags & F_AUTOREADONLY)) {
1426                                         DEBUG("[WRITE to READONLY!]");
1427                                         ERROR(client, reply, EPERM);
1428                                         continue;
1429                                 }
1430                                 if (expwrite(request.from, buf, len, client)) {
1431                                         DEBUG("Write failed: %m" );
1432                                         ERROR(client, reply, errno);
1433                                         continue;
1434                                 }
1435                                 SEND(client->net, reply);
1436                                 DEBUG("OK!\n");
1437                                 len -= currlen;
1438                                 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1439                         }
1440                         continue;
1441                 }
1442                 /* READ */
1443
1444                 DEBUG("exp->buf, ");
1445                 memcpy(buf, &reply, sizeof(struct nbd_reply));
1446                 p = buf + sizeof(struct nbd_reply);
1447                 writelen = currlen + sizeof(struct nbd_reply);
1448                 while(len > 0) {
1449                         if (expread(request.from, p, currlen, client)) {
1450                                 DEBUG("Read failed: %m");
1451                                 ERROR(client, reply, errno);
1452                                 continue;
1453                         }
1454
1455                         DEBUG("buf->net, ");
1456                         writeit(client->net, buf, writelen);
1457                         len -= currlen;
1458                         currlen = (len < BUFSIZE) ? len : BUFSIZE;
1459                         p = buf;
1460                         writelen = currlen;
1461                 }
1462                 DEBUG("OK!\n");
1463         }
1464         return 0;
1465 }
1466
1467 /**
1468  * Set up client export array, which is an array of FILE_INFO.
1469  * Also, split a single exportfile into multiple ones, if that was asked.
1470  * @param client information on the client which we want to setup export for
1471  **/
1472 void setupexport(CLIENT* client) {
1473         int i;
1474         off_t laststartoff = 0, lastsize = 0;
1475         int multifile = (client->server->flags & F_MULTIFILE);
1476
1477         client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1478
1479         /* If multi-file, open as many files as we can.
1480          * If not, open exactly one file.
1481          * Calculate file sizes as we go to get total size. */
1482         for(i=0; ; i++) {
1483                 FILE_INFO fi;
1484                 gchar *tmpname;
1485                 gchar* error_string;
1486                 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1487
1488                 if(multifile) {
1489                         tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1490                 } else {
1491                         tmpname=g_strdup(client->exportname);
1492                 }
1493                 DEBUG2( "Opening %s\n", tmpname );
1494                 fi.fhandle = open(tmpname, mode);
1495                 if(fi.fhandle == -1 && mode == O_RDWR) {
1496                         /* Try again because maybe media was read-only */
1497                         fi.fhandle = open(tmpname, O_RDONLY);
1498                         if(fi.fhandle != -1) {
1499                                 /* Opening the base file in copyonwrite mode is
1500                                  * okay */
1501                                 if(!(client->server->flags & F_COPYONWRITE)) {
1502                                         client->server->flags |= F_AUTOREADONLY;
1503                                         client->server->flags |= F_READONLY;
1504                                 }
1505                         }
1506                 }
1507                 if(fi.fhandle == -1) {
1508                         if(multifile && i>0)
1509                                 break;
1510                         error_string=g_strdup_printf(
1511                                 "Could not open exported file %s: %%m",
1512                                 tmpname);
1513                         err(error_string);
1514                 }
1515                 fi.startoff = laststartoff + lastsize;
1516                 g_array_append_val(client->export, fi);
1517                 g_free(tmpname);
1518
1519                 /* Starting offset and size of this file will be used to
1520                  * calculate starting offset of next file */
1521                 laststartoff = fi.startoff;
1522                 lastsize = size_autodetect(fi.fhandle);
1523
1524                 if(!multifile)
1525                         break;
1526         }
1527
1528         /* Set export size to total calculated size */
1529         client->exportsize = laststartoff + lastsize;
1530
1531         /* Export size may be overridden */
1532         if(client->server->expected_size) {
1533                 /* desired size must be <= total calculated size */
1534                 if(client->server->expected_size > client->exportsize) {
1535                         err("Size of exported file is too big\n");
1536                 }
1537
1538                 client->exportsize = client->server->expected_size;
1539         }
1540
1541         msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1542         if(multifile) {
1543                 msg3(LOG_INFO, "Total number of files: %d", i);
1544         }
1545 }
1546
1547 int copyonwrite_prepare(CLIENT* client) {
1548         off_t i;
1549         if ((client->difffilename = malloc(1024))==NULL)
1550                 err("Failed to allocate string for diff file name");
1551         snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1552                 (int)getpid()) ;
1553         client->difffilename[1023]='\0';
1554         msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1555         client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1556         if (client->difffile<0) err("Could not create diff file (%m)") ;
1557         if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1558                 err("Could not allocate memory") ;
1559         for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1560
1561         return 0;
1562 }
1563
1564 /**
1565  * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1566  * options
1567  *
1568  * @param command the command to be ran. Read from the config file
1569  * @param file the file name we're about to export
1570  **/
1571 int do_run(gchar* command, gchar* file) {
1572         gchar* cmd;
1573         int retval=0;
1574
1575         if(command && *command) {
1576                 cmd = g_strdup_printf(command, file);
1577                 retval=system(cmd);
1578                 g_free(cmd);
1579         }
1580         return retval;
1581 }
1582
1583 /**
1584  * Serve a connection. 
1585  *
1586  * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1587  * follow the road map.
1588  *
1589  * @param client a connected client
1590  **/
1591 void serveconnection(CLIENT *client) {
1592         if(do_run(client->server->prerun, client->exportname)) {
1593                 exit(EXIT_FAILURE);
1594         }
1595         setupexport(client);
1596
1597         if (client->server->flags & F_COPYONWRITE) {
1598                 copyonwrite_prepare(client);
1599         }
1600
1601         setmysockopt(client->net);
1602
1603         mainloop(client);
1604         do_run(client->server->postrun, client->exportname);
1605 }
1606
1607 /**
1608  * Find the name of the file we have to serve. This will use g_strdup_printf
1609  * to put the IP address of the client inside a filename containing
1610  * "%s" (in the form as specified by the "virtstyle" option). That name
1611  * is then written to client->exportname.
1612  *
1613  * @param net A socket connected to an nbd client
1614  * @param client information about the client. The IP address in human-readable
1615  * format will be written to a new char* buffer, the address of which will be
1616  * stored in client->clientname.
1617  **/
1618 void set_peername(int net, CLIENT *client) {
1619         struct sockaddr_storage addrin;
1620         struct sockaddr_storage netaddr;
1621         struct sockaddr_in  *netaddr4 = NULL;
1622         struct sockaddr_in6 *netaddr6 = NULL;
1623         size_t addrinlen = sizeof( addrin );
1624         struct addrinfo hints;
1625         struct addrinfo *ai = NULL;
1626         char peername[NI_MAXHOST];
1627         char netname[NI_MAXHOST];
1628         char *tmp = NULL;
1629         int i;
1630         int e;
1631         int shift;
1632
1633         if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1634                 err("getsockname failed: %m");
1635
1636         getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1637                 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1638
1639         memset(&hints, '\0', sizeof (hints));
1640         hints.ai_flags = AI_ADDRCONFIG;
1641         e = getaddrinfo(peername, NULL, &hints, &ai);
1642
1643         if(e != 0) {
1644                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1645                 freeaddrinfo(ai);
1646                 return;
1647         }
1648
1649         switch(client->server->virtstyle) {
1650                 case VIRT_NONE:
1651                         client->exportname=g_strdup(client->server->exportname);
1652                         break;
1653                 case VIRT_IPHASH:
1654                         for(i=0;i<strlen(peername);i++) {
1655                                 if(peername[i]=='.') {
1656                                         peername[i]='/';
1657                                 }
1658                         }
1659                 case VIRT_IPLIT:
1660                         client->exportname=g_strdup_printf(client->server->exportname, peername);
1661                         break;
1662                 case VIRT_CIDR:
1663                         memcpy(&netaddr, &addrin, addrinlen);
1664                         if(ai->ai_family == AF_INET) {
1665                                 netaddr4 = (struct sockaddr_in *)&netaddr;
1666                                 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1667                                 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1668
1669                                 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1670                                                         netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1671                                 tmp=g_strdup_printf("%s/%s", netname, peername);
1672                         }else if(ai->ai_family == AF_INET6) {
1673                                 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1674
1675                                 shift = 128-(client->server->cidrlen);
1676                                 i = 3;
1677                                 while(shift >= 32) {
1678                                         ((netaddr6->sin6_addr).s6_addr32[i])=0;
1679                                         shift-=32;
1680                                         i--;
1681                                 }
1682                                 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1683                                 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1684
1685                                 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1686                                             netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1687                                 tmp=g_strdup_printf("%s/%s", netname, peername);
1688                         }
1689
1690                         if(tmp != NULL)
1691                           client->exportname=g_strdup_printf(client->server->exportname, tmp);
1692
1693                         break;
1694         }
1695
1696         freeaddrinfo(ai);
1697         msg4(LOG_INFO, "connect from %s, assigned file is %s", 
1698              peername, client->exportname);
1699         client->clientname=g_strdup(peername);
1700 }
1701
1702 /**
1703  * Destroy a pid_t*
1704  * @param data a pointer to pid_t which should be freed
1705  **/
1706 void destroy_pid_t(gpointer data) {
1707         g_free(data);
1708 }
1709
1710 /**
1711  * Loop through the available servers, and serve them. Never returns.
1712  **/
1713 int serveloop(GArray* servers) {
1714         struct sockaddr_storage addrin;
1715         socklen_t addrinlen=sizeof(addrin);
1716         int i;
1717         int max;
1718         int sock;
1719         fd_set mset;
1720         fd_set rset;
1721
1722         /* 
1723          * Set up the master fd_set. The set of descriptors we need
1724          * to select() for never changes anyway and it buys us a *lot*
1725          * of time to only build this once. However, if we ever choose
1726          * to not fork() for clients anymore, we may have to revisit
1727          * this.
1728          */
1729         max=0;
1730         FD_ZERO(&mset);
1731         for(i=0;i<servers->len;i++) {
1732                 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1733                         FD_SET(sock, &mset);
1734                         max=sock>max?sock:max;
1735                 }
1736         }
1737         if(modernsock) {
1738                 FD_SET(modernsock, &mset);
1739                 max=modernsock>max?modernsock:max;
1740         }
1741         for(;;) {
1742                 CLIENT *client = NULL;
1743                 pid_t *pid;
1744
1745                 memcpy(&rset, &mset, sizeof(fd_set));
1746                 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1747                         int net = 0;
1748                         SERVER* serve=NULL;
1749
1750                         DEBUG("accept, ");
1751                         if(FD_ISSET(modernsock, &rset)) {
1752                                 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1753                                         err("accept: %m");
1754                                 client = negotiate(net, NULL, servers);
1755                                 if(!client) {
1756                                         err_nonfatal("negotiation failed");
1757                                         close(net);
1758                                         net=0;
1759                                 }
1760                                 serve = client->server;
1761                         }
1762                         for(i=0;i<servers->len && !net;i++) {
1763                                 serve=&(g_array_index(servers, SERVER, i));
1764                                 if(FD_ISSET(serve->socket, &rset)) {
1765                                         if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1766                                                 err("accept: %m");
1767                                 }
1768                         }
1769                         if(net) {
1770                                 int sock_flags;
1771
1772                                 if(serve->max_connections > 0 &&
1773                                    g_hash_table_size(children) >= serve->max_connections) {
1774                                         msg2(LOG_INFO, "Max connections reached");
1775                                         close(net);
1776                                         continue;
1777                                 }
1778                                 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1779                                         err("fcntl F_GETFL");
1780                                 }
1781                                 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1782                                         err("fcntl F_SETFL ~O_NONBLOCK");
1783                                 }
1784                                 if(!client) {
1785                                         client = g_new0(CLIENT, 1);
1786                                         client->server=serve;
1787                                         client->exportsize=OFFT_MAX;
1788                                         client->net=net;
1789                                 }
1790                                 set_peername(net, client);
1791                                 if (!authorized_client(client)) {
1792                                         msg2(LOG_INFO,"Unauthorized client") ;
1793                                         close(net);
1794                                         continue;
1795                                 }
1796                                 msg2(LOG_INFO,"Authorized client") ;
1797                                 pid=g_malloc(sizeof(pid_t));
1798 #ifndef NOFORK
1799                                 if ((*pid=fork())<0) {
1800                                         msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1801                                         close(net);
1802                                         continue;
1803                                 }
1804                                 if (*pid>0) { /* parent */
1805                                         close(net);
1806                                         g_hash_table_insert(children, pid, pid);
1807                                         continue;
1808                                 }
1809                                 /* child */
1810                                 g_hash_table_destroy(children);
1811                                 for(i=0;i<servers->len;i++) {
1812                                         serve=&g_array_index(servers, SERVER, i);
1813                                         close(serve->socket);
1814                                 }
1815                                 /* FALSE does not free the
1816                                 actual data. This is required,
1817                                 because the client has a
1818                                 direct reference into that
1819                                 data, and otherwise we get a
1820                                 segfault... */
1821                                 g_array_free(servers, FALSE);
1822 #endif // NOFORK
1823                                 msg2(LOG_INFO,"Starting to serve");
1824                                 serveconnection(client);
1825                                 exit(EXIT_SUCCESS);
1826                         }
1827                 }
1828         }
1829 }
1830
1831 void dosockopts(int socket) {
1832 #ifndef sun
1833         int yes=1;
1834 #else
1835         char yes='1';
1836 #endif /* sun */
1837         int sock_flags;
1838
1839         /* lose the pesky "Address already in use" error message */
1840         if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1841                 err("setsockopt SO_REUSEADDR");
1842         }
1843         if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1844                 err("setsockopt SO_KEEPALIVE");
1845         }
1846
1847         /* make the listening socket non-blocking */
1848         if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
1849                 err("fcntl F_GETFL");
1850         }
1851         if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1852                 err("fcntl F_SETFL O_NONBLOCK");
1853         }
1854 }
1855
1856 /**
1857  * Connect a server's socket.
1858  *
1859  * @param serve the server we want to connect.
1860  **/
1861 int setup_serve(SERVER *serve) {
1862         struct addrinfo hints;
1863         struct addrinfo *ai = NULL;
1864         gchar *port = NULL;
1865         int e;
1866
1867         if(!do_oldstyle) {
1868                 return serve->servename ? 1 : 0;
1869         }
1870         memset(&hints,'\0',sizeof(hints));
1871         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
1872         hints.ai_socktype = SOCK_STREAM;
1873         hints.ai_family = serve->socket_family;
1874
1875         port = g_strdup_printf ("%d", serve->port);
1876         if (port == NULL)
1877                 return 0;
1878
1879         e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
1880
1881         g_free(port);
1882
1883         if(e != 0) {
1884                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1885                 serve->socket = -1;
1886                 freeaddrinfo(ai);
1887                 exit(EXIT_FAILURE);
1888         }
1889
1890         if(serve->socket_family == AF_UNSPEC)
1891                 serve->socket_family = ai->ai_family;
1892
1893 #ifdef WITH_SDP
1894         if ((serve->flags) && F_SDP) {
1895                 if (ai->ai_family == AF_INET)
1896                         ai->ai_family = AF_INET_SDP;
1897                 else (ai->ai_family == AF_INET6)
1898                         ai->ai_family = AF_INET6_SDP;
1899         }
1900 #endif
1901         if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
1902                 err("socket: %m");
1903
1904         dosockopts(serve->socket);
1905
1906         DEBUG("Waiting for connections... bind, ");
1907         e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
1908         if (e != 0 && errno != EADDRINUSE)
1909                 err("bind: %m");
1910         DEBUG("listen, ");
1911         if (listen(serve->socket, 1) < 0)
1912                 err("listen: %m");
1913
1914         freeaddrinfo (ai);
1915         if(serve->servename) {
1916                 return 1;
1917         } else {
1918                 return 0;
1919         }
1920 }
1921
1922 void open_modern(void) {
1923         struct addrinfo hints;
1924         struct addrinfo* ai = NULL;
1925         struct sock_flags;
1926         int e;
1927
1928         memset(&hints, '\0', sizeof(hints));
1929         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
1930         hints.ai_socktype = SOCK_STREAM;
1931         hints.ai_family = AF_UNSPEC;
1932         hints.ai_protocol = IPPROTO_TCP;
1933         e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
1934         if(e != 0) {
1935                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1936                 exit(EXIT_FAILURE);
1937         }
1938         if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
1939                 err("socket: %m");
1940         }
1941
1942         dosockopts(modernsock);
1943
1944         if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
1945                 err("bind: %m");
1946         }
1947         if(listen(modernsock, 10) <0) {
1948                 err("listen: %m");
1949         }
1950
1951         freeaddrinfo(ai);
1952 }
1953
1954 /**
1955  * Connect our servers.
1956  **/
1957 void setup_servers(GArray* servers) {
1958         int i;
1959         struct sigaction sa;
1960         int want_modern=0;
1961
1962         for(i=0;i<servers->len;i++) {
1963                 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
1964         }
1965         if(want_modern) {
1966                 open_modern();
1967         }
1968         children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1969
1970         sa.sa_handler = sigchld_handler;
1971         sigemptyset(&sa.sa_mask);
1972         sa.sa_flags = SA_RESTART;
1973         if(sigaction(SIGCHLD, &sa, NULL) == -1)
1974                 err("sigaction: %m");
1975         sa.sa_handler = sigterm_handler;
1976         sigemptyset(&sa.sa_mask);
1977         sa.sa_flags = SA_RESTART;
1978         if(sigaction(SIGTERM, &sa, NULL) == -1)
1979                 err("sigaction: %m");
1980 }
1981
1982 /**
1983  * Go daemon (unless we specified at compile time that we didn't want this)
1984  * @param serve the first server of our configuration. If its port is zero,
1985  *      then do not daemonize, because we're doing inetd then. This parameter
1986  *      is only used to create a PID file of the form
1987  *      /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
1988  **/
1989 #if !defined(NODAEMON) && !defined(NOFORK)
1990 void daemonize(SERVER* serve) {
1991         FILE*pidf;
1992
1993         if(serve && !(serve->port)) {
1994                 return;
1995         }
1996         if(daemon(0,0)<0) {
1997                 err("daemon");
1998         }
1999         if(!*pidftemplate) {
2000                 if(serve) {
2001                         strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2002                 } else {
2003                         strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2004                 }
2005         }
2006         snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2007         pidf=fopen(pidfname, "w");
2008         if(pidf) {
2009                 fprintf(pidf,"%d\n", (int)getpid());
2010                 fclose(pidf);
2011         } else {
2012                 perror("fopen");
2013                 fprintf(stderr, "Not fatal; continuing");
2014         }
2015 }
2016 #else
2017 #define daemonize(serve)
2018 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
2019
2020 /*
2021  * Everything beyond this point (in the file) is run in non-daemon mode.
2022  * The stuff above daemonize() isn't.
2023  */
2024
2025 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2026
2027 void serve_err(SERVER* serve, const char* msg) {
2028         g_message("Export of %s on port %d failed:", serve->exportname,
2029                         serve->port);
2030         err(msg);
2031 }
2032
2033 /**
2034  * Set up user-ID and/or group-ID
2035  **/
2036 void dousers(void) {
2037         struct passwd *pw;
2038         struct group *gr;
2039         gchar* str;
2040         if(rungroup) {
2041                 gr=getgrnam(rungroup);
2042                 if(!gr) {
2043                         str = g_strdup_printf("Invalid group name: %s", rungroup);
2044                         err(str);
2045                 }
2046                 if(setgid(gr->gr_gid)<0) {
2047                         err("Could not set GID: %m"); 
2048                 }
2049         }
2050         if(runuser) {
2051                 pw=getpwnam(runuser);
2052                 if(!pw) {
2053                         str = g_strdup_printf("Invalid user name: %s", runuser);
2054                         err(str);
2055                 }
2056                 if(setuid(pw->pw_uid)<0) {
2057                         err("Could not set UID: %m");
2058                 }
2059         }
2060 }
2061
2062 #ifndef ISSERVER
2063 void glib_message_syslog_redirect(const gchar *log_domain,
2064                                   GLogLevelFlags log_level,
2065                                   const gchar *message,
2066                                   gpointer user_data)
2067 {
2068     int level=LOG_DEBUG;
2069     
2070     switch( log_level )
2071     {
2072       case G_LOG_FLAG_FATAL:
2073       case G_LOG_LEVEL_CRITICAL:
2074       case G_LOG_LEVEL_ERROR:    
2075         level=LOG_ERR; 
2076         break;
2077       case G_LOG_LEVEL_WARNING:
2078         level=LOG_WARNING;
2079         break;
2080       case G_LOG_LEVEL_MESSAGE:
2081       case G_LOG_LEVEL_INFO:
2082         level=LOG_INFO;
2083         break;
2084       case G_LOG_LEVEL_DEBUG:
2085         level=LOG_DEBUG;
2086       default:
2087         level=LOG_ERR;
2088     }
2089     syslog(level, "%s", message);
2090 }
2091 #endif
2092
2093 /**
2094  * Main entry point...
2095  **/
2096 int main(int argc, char *argv[]) {
2097         SERVER *serve;
2098         GArray *servers;
2099         GError *err=NULL;
2100
2101         if (sizeof( struct nbd_request )!=28) {
2102                 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2103                 exit(EXIT_FAILURE) ;
2104         }
2105
2106         memset(pidftemplate, '\0', 256);
2107
2108         logging();
2109         config_file_pos = g_strdup(CFILE);
2110         serve=cmdline(argc, argv);
2111         servers = parse_cfile(config_file_pos, &err);
2112         
2113         if(serve) {
2114                 serve->socket_family = AF_UNSPEC;
2115
2116                 append_serve(serve, servers);
2117      
2118                 if (!(serve->port)) {
2119                         CLIENT *client;
2120 #ifndef ISSERVER
2121                         /* You really should define ISSERVER if you're going to use
2122                          * inetd mode, but if you don't, closing stdout and stderr
2123                          * (which inetd had connected to the client socket) will let it
2124                          * work. */
2125                         close(1);
2126                         close(2);
2127                         open("/dev/null", O_WRONLY);
2128                         open("/dev/null", O_WRONLY);
2129                         g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2130 #endif
2131                         client=g_malloc(sizeof(CLIENT));
2132                         client->server=serve;
2133                         client->net=0;
2134                         client->exportsize=OFFT_MAX;
2135                         set_peername(0,client);
2136                         serveconnection(client);
2137                         return 0;
2138                 }
2139         }
2140     
2141         if(!servers || !servers->len) {
2142                 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2143                                 && err->code == CFILE_NOTFOUND)) {
2144                         g_warning("Could not parse config file: %s", 
2145                                         err ? err->message : "Unknown error");
2146                 }
2147         }
2148         if(serve) {
2149                 g_warning("Specifying an export on the command line is deprecated.");
2150                 g_warning("Please use a configuration file instead.");
2151         }
2152
2153         if((!serve) && (!servers||!servers->len)) {
2154                 g_message("No configured exports; quitting.");
2155                 exit(EXIT_FAILURE);
2156         }
2157         daemonize(serve);
2158         setup_servers(servers);
2159         dousers();
2160         serveloop(servers);
2161         return 0 ;
2162 }