a11e37ad887efb1c77560b0dcacbd049a35afd16
[nbd.git] / nbd-server.c
1 /*
2  * Network Block Device - server
3  *
4  * Copyright 1996-1998 Pavel Machek, distribute under GPL
5  *  <pavel@atrey.karlin.mff.cuni.cz>
6  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8  *
9  * Version 1.0 - hopefully 64-bit-clean
10  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13  *      type, or don't have 64 bit file offsets by defining FS_32BIT
14  *      in compile options for nbd-server *only*. This can be done
15  *      with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16  *      original autoconf input file, or I would make it a configure
17  *      option.) Ken Yap <ken@nlc.net.au>.
18  * Version 1.6 - fix autodetection of block device size and really make 64 bit
19  *      clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20  * Version 2.0 - Version synchronised with client
21  * Version 2.1 - Reap zombie client processes when they exit. Removed
22  *      (uncommented) the _IO magic, it's no longer necessary. Wouter
23  *      Verhelst <wouter@debian.org>
24  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25  * Version 2.3 - Fixed code so that Large File Support works. This
26  *      removes the FS_32BIT compile-time directive; define
27  *      _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28  *      using FS_32BIT. This will allow you to use files >2GB instead of
29  *      having to use the -m option. Wouter Verhelst <wouter@debian.org>
30  * Version 2.4 - Added code to keep track of children, so that we can
31  *      properly kill them from initscripts. Add a call to daemon(),
32  *      so that processes don't think they have to wait for us, which is
33  *      interesting for initscripts as well. Wouter Verhelst
34  *      <wouter@debian.org>
35  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36  *      zero after fork()ing, resulting in nbd-server going berserk
37  *      when it receives a signal with at least one child open. Wouter
38  *      Verhelst <wouter@debian.org>
39  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40  *      rectified type of mainloop::size_host (sf.net bugs 814435 and
41  *      817385); close the PID file after writing to it, so that the
42  *      daemon can actually be found. Wouter Verhelst
43  *      <wouter@debian.org>
44  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45  *      correctly put in network endianness. Many types were corrected
46  *      (size_t and off_t instead of int).  <vspaceg@sourceforge.net>
47  * Version 2.6 - Some code cleanup.
48  * Version 2.7 - Better build system.
49  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 
50  *      lot more work, but this is a start. Wouter Verhelst
51  *      <wouter@debian.org>
52  * 16/03/2010 - Add IPv6 support.
53  *      Kitt Tientanopajai <kitt@kitty.in.th>
54  *      Neutron Soutmun <neo.neutron@gmail.com>
55  *      Suriya Soutmun <darksolar@gmail.com>
56  */
57
58 /* Includes LFS defines, which defines behaviours of some of the following
59  * headers, so must come before those */
60 #include "lfs.h"
61
62 #include <sys/types.h>
63 #include <sys/socket.h>
64 #include <sys/stat.h>
65 #include <sys/select.h>         /* select */
66 #include <sys/wait.h>           /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
69 #endif
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h>          /* For BLKGETSIZE */
73 #endif
74 #include <signal.h>             /* sigaction */
75 #include <errno.h>
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
78 #include <netdb.h>
79 #include <syslog.h>
80 #include <unistd.h>
81 #include <stdio.h>
82 #include <stdlib.h>
83 #include <string.h>
84 #include <fcntl.h>
85 #include <arpa/inet.h>
86 #include <strings.h>
87 #include <dirent.h>
88 #include <unistd.h>
89 #include <getopt.h>
90 #include <pwd.h>
91 #include <grp.h>
92
93 #include <glib.h>
94
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
97 #include "cliserv.h"
98
99 #ifdef WITH_SDP
100 #include <sdp_inet.h>
101 #endif
102
103 /** Default position of the config file */
104 #ifndef SYSCONFDIR
105 #define SYSCONFDIR "/etc"
106 #endif
107 #define CFILE SYSCONFDIR "/nbd-server/config"
108
109 /** Where our config file actually is */
110 gchar* config_file_pos;
111
112 /** What user we're running as */
113 gchar* runuser=NULL;
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
118
119 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
120 #ifdef ISSERVER
121 #define msg2(a,b) syslog(a,b)
122 #define msg3(a,b,c) syslog(a,b,c)
123 #define msg4(a,b,c,d) syslog(a,b,c,d)
124 #else
125 #define msg2(a,b) g_message(b)
126 #define msg3(a,b,c) g_message(b,c)
127 #define msg4(a,b,c,d) g_message(b,c,d)
128 #endif
129
130 /* Debugging macros */
131 //#define DODBG
132 #ifdef DODBG
133 #define DEBUG( a ) printf( a )
134 #define DEBUG2( a,b ) printf( a,b )
135 #define DEBUG3( a,b,c ) printf( a,b,c )
136 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
137 #else
138 #define DEBUG( a )
139 #define DEBUG2( a,b ) 
140 #define DEBUG3( a,b,c ) 
141 #define DEBUG4( a,b,c,d ) 
142 #endif
143 #ifndef PACKAGE_VERSION
144 #define PACKAGE_VERSION ""
145 #endif
146 /**
147  * The highest value a variable of type off_t can reach. This is a signed
148  * integer, so set all bits except for the leftmost one.
149  **/
150 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
151 #define LINELEN 256       /**< Size of static buffer used to read the
152                                authorization file (yuck) */
153 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
154 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
155 #define F_READONLY 1      /**< flag to tell us a file is readonly */
156 #define F_MULTIFILE 2     /**< flag to tell us a file is exported using -m */
157 #define F_COPYONWRITE 4   /**< flag to tell us a file is exported using
158                             copyonwrite */
159 #define F_AUTOREADONLY 8  /**< flag to tell us a file is set to autoreadonly */
160 #define F_SPARSE 16       /**< flag to tell us copyronwrite should use a sparse file */
161 #define F_SDP 32          /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
162 #define F_SYNC 64         /**< Whether to fsync() after a write */
163 GHashTable *children;
164 char pidfname[256]; /**< name of our PID file */
165 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
166 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
167
168 int modernsock=0;         /**< Socket for the modern handler. Not used
169                                if a client was only specified on the
170                                command line; only port used if
171                                oldstyle is set to false (and then the
172                                command-line client isn't used, gna gna) */
173 char* modern_listen;      /**< listenaddr value for modernsock */
174
175 /**
176  * Types of virtuatlization
177  **/
178 typedef enum {
179         VIRT_NONE=0,    /**< No virtualization */
180         VIRT_IPLIT,     /**< Literal IP address as part of the filename */
181         VIRT_IPHASH,    /**< Replacing all dots in an ip address by a / before
182                              doing the same as in IPLIT */
183         VIRT_CIDR,      /**< Every subnet in its own directory */
184 } VIRT_STYLE;
185
186 /**
187  * Variables associated with a server.
188  **/
189 typedef struct {
190         gchar* exportname;    /**< (unprocessed) filename of the file we're exporting */
191         off_t expected_size; /**< size of the exported file as it was told to
192                                us through configuration */
193         gchar* listenaddr;   /**< The IP address we're listening on */
194         unsigned int port;   /**< port we're exporting this file at */
195         char* authname;      /**< filename of the authorization file */
196         int flags;           /**< flags associated with this exported file */
197         int socket;          /**< The socket of this server. */
198         int socket_family;   /**< family of the socket */
199         VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
200         uint8_t cidrlen;     /**< The length of the mask when we use
201                                   CIDR-style virtualization */
202         gchar* prerun;       /**< command to be ran after connecting a client,
203                                   but before starting to serve */
204         gchar* postrun;      /**< command that will be ran after the client
205                                   disconnects */
206         gchar* servename;    /**< name of the export as selected by nbd-client */
207 } SERVER;
208
209 /**
210  * Variables associated with a client socket.
211  **/
212 typedef struct {
213         int fhandle;      /**< file descriptor */
214         off_t startoff;   /**< starting offset of this file */
215 } FILE_INFO;
216
217 typedef struct {
218         off_t exportsize;    /**< size of the file we're exporting */
219         char *clientname;    /**< peer */
220         char *exportname;    /**< (processed) filename of the file we're exporting */
221         GArray *export;    /**< array of FILE_INFO of exported files;
222                                array size is always 1 unless we're
223                                doing the multiple file option */
224         int net;             /**< The actual client socket */
225         SERVER *server;      /**< The server this client is getting data from */
226         char* difffilename;  /**< filename of the copy-on-write file, if any */
227         int difffile;        /**< filedescriptor of copyonwrite file. @todo
228                                shouldn't this be an array too? (cfr export) Or
229                                make -m and -c mutually exclusive */
230         u32 difffilelen;     /**< number of pages in difffile */
231         u32 *difmap;         /**< see comment on the global difmap for this one */
232         gboolean modern;     /**< client was negotiated using modern negotiation protocol */
233 } CLIENT;
234
235 /**
236  * Type of configuration file values
237  **/
238 typedef enum {
239         PARAM_INT,              /**< This parameter is an integer */
240         PARAM_STRING,           /**< This parameter is a string */
241         PARAM_BOOL,             /**< This parameter is a boolean */
242 } PARAM_TYPE;
243
244 /**
245  * Configuration file values
246  **/
247 typedef struct {
248         gchar *paramname;       /**< Name of the parameter, as it appears in
249                                   the config file */
250         gboolean required;      /**< Whether this is a required (as opposed to
251                                   optional) parameter */
252         PARAM_TYPE ptype;       /**< Type of the parameter. */
253         gpointer target;        /**< Pointer to where the data of this
254                                   parameter should be written. If ptype is
255                                   PARAM_BOOL, the data is or'ed rather than
256                                   overwritten. */
257         gint flagval;           /**< Flag mask for this parameter in case ptype
258                                   is PARAM_BOOL. */
259 } PARAM;
260
261 /**
262  * Check whether a client is allowed to connect. Works with an authorization
263  * file which contains one line per machine, no wildcards.
264  *
265  * @param opts The client who's trying to connect.
266  * @return 0 - authorization refused, 1 - OK
267  **/
268 int authorized_client(CLIENT *opts) {
269         const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
270         FILE *f ;
271         char line[LINELEN]; 
272         char *tmp;
273         struct in_addr addr;
274         struct in_addr client;
275         struct in_addr cltemp;
276         int len;
277
278         if ((f=fopen(opts->server->authname,"r"))==NULL) {
279                 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
280                      opts->server->authname,strerror(errno)) ;
281                 return 1 ; 
282         }
283   
284         inet_aton(opts->clientname, &client);
285         while (fgets(line,LINELEN,f)!=NULL) {
286                 if((tmp=index(line, '/'))) {
287                         if(strlen(line)<=tmp-line) {
288                                 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
289                                 return 0;
290                         }
291                         *(tmp++)=0;
292                         if(!inet_aton(line,&addr)) {
293                                 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
294                                 return 0;
295                         }
296                         len=strtol(tmp, NULL, 0);
297                         addr.s_addr>>=32-len;
298                         addr.s_addr<<=32-len;
299                         memcpy(&cltemp,&client,sizeof(client));
300                         cltemp.s_addr>>=32-len;
301                         cltemp.s_addr<<=32-len;
302                         if(addr.s_addr == cltemp.s_addr) {
303                                 return 1;
304                         }
305                 }
306                 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
307                         fclose(f);
308                         return 1;
309                 }
310         }
311         fclose(f);
312         return 0;
313 }
314
315 /**
316  * Read data from a file descriptor into a buffer
317  *
318  * @param f a file descriptor
319  * @param buf a buffer
320  * @param len the number of bytes to be read
321  **/
322 inline void readit(int f, void *buf, size_t len) {
323         ssize_t res;
324         while (len > 0) {
325                 DEBUG("*");
326                 if ((res = read(f, buf, len)) <= 0) {
327                         if(errno != EAGAIN) {
328                                 err("Read failed: %m");
329                         }
330                 } else {
331                         len -= res;
332                         buf += res;
333                 }
334         }
335 }
336
337 /**
338  * Write data from a buffer into a filedescriptor
339  *
340  * @param f a file descriptor
341  * @param buf a buffer containing data
342  * @param len the number of bytes to be written
343  **/
344 inline void writeit(int f, void *buf, size_t len) {
345         ssize_t res;
346         while (len > 0) {
347                 DEBUG("+");
348                 if ((res = write(f, buf, len)) <= 0)
349                         err("Send failed: %m");
350                 len -= res;
351                 buf += res;
352         }
353 }
354
355 /**
356  * Print out a message about how to use nbd-server. Split out to a separate
357  * function so that we can call it from multiple places
358  */
359 void usage() {
360         printf("This is nbd-server version " VERSION "\n");
361         printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name]\n"
362                "\t-r|--read-only\t\tread only\n"
363                "\t-m|--multi-file\t\tmultiple file\n"
364                "\t-c|--copy-on-write\tcopy on write\n"
365                "\t-C|--config-file\tspecify an alternate configuration file\n"
366                "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
367                "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
368                "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
369                "\tif port is set to 0, stdin is used (for running from inetd)\n"
370                "\tif file_to_export contains '%%s', it is substituted with the IP\n"
371                "\t\taddress of the machine trying to connect\n" 
372                "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
373         printf("Using configuration file %s\n", CFILE);
374 }
375
376 /* Dumps a config file section of the given SERVER*, and exits. */
377 void dump_section(SERVER* serve, gchar* section_header) {
378         printf("[%s]\n", section_header);
379         printf("\texportname = %s\n", serve->exportname);
380         printf("\tlistenaddr = %s\n", serve->listenaddr);
381         printf("\tport = %d\n", serve->port);
382         if(serve->flags & F_READONLY) {
383                 printf("\treadonly = true\n");
384         }
385         if(serve->flags & F_MULTIFILE) {
386                 printf("\tmultifile = true\n");
387         }
388         if(serve->flags & F_COPYONWRITE) {
389                 printf("\tcopyonwrite = true\n");
390         }
391         if(serve->expected_size) {
392                 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
393         }
394         if(serve->authname) {
395                 printf("\tauthfile = %s\n", serve->authname);
396         }
397         exit(EXIT_SUCCESS);
398 }
399
400 /**
401  * Parse the command line.
402  *
403  * @param argc the argc argument to main()
404  * @param argv the argv argument to main()
405  **/
406 SERVER* cmdline(int argc, char *argv[]) {
407         int i=0;
408         int nonspecial=0;
409         int c;
410         struct option long_options[] = {
411                 {"read-only", no_argument, NULL, 'r'},
412                 {"multi-file", no_argument, NULL, 'm'},
413                 {"copy-on-write", no_argument, NULL, 'c'},
414                 {"authorize-file", required_argument, NULL, 'l'},
415                 {"config-file", required_argument, NULL, 'C'},
416                 {"pid-file", required_argument, NULL, 'p'},
417                 {"output-config", required_argument, NULL, 'o'},
418                 {0,0,0,0}
419         };
420         SERVER *serve;
421         off_t es;
422         size_t last;
423         char suffix;
424         gboolean do_output=FALSE;
425         gchar* section_header="";
426         gchar** addr_port;
427
428         if(argc==1) {
429                 return NULL;
430         }
431         serve=g_new0(SERVER, 1);
432         serve->authname = g_strdup(default_authname);
433         serve->virtstyle=VIRT_IPLIT;
434         while((c=getopt_long(argc, argv, "-C:cl:mo:rp:", long_options, &i))>=0) {
435                 switch (c) {
436                 case 1:
437                         /* non-option argument */
438                         switch(nonspecial++) {
439                         case 0:
440                                 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
441                                         addr_port=g_strsplit(optarg, ":", 2);
442
443                                         /* Check for "@" - maybe user using this separator
444                                                  for IPv4 address */
445                                         if(!addr_port[1]) {
446                                                 g_strfreev(addr_port);
447                                                 addr_port=g_strsplit(optarg, "@", 2);
448                                         }
449                                 } else {
450                                         addr_port=g_strsplit(optarg, "@", 2);
451                                 }
452
453                                 if(addr_port[1]) {
454                                         serve->port=strtol(addr_port[1], NULL, 0);
455                                         serve->listenaddr=g_strdup(addr_port[0]);
456                                 } else {
457                                         serve->listenaddr=NULL;
458                                         serve->port=strtol(addr_port[0], NULL, 0);
459                                 }
460                                 g_strfreev(addr_port);
461                                 break;
462                         case 1:
463                                 serve->exportname = g_strdup(optarg);
464                                 if(serve->exportname[0] != '/') {
465                                         fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
466                                         exit(EXIT_FAILURE);
467                                 }
468                                 break;
469                         case 2:
470                                 last=strlen(optarg)-1;
471                                 suffix=optarg[last];
472                                 if (suffix == 'k' || suffix == 'K' ||
473                                     suffix == 'm' || suffix == 'M')
474                                         optarg[last] = '\0';
475                                 es = (off_t)atoll(optarg);
476                                 switch (suffix) {
477                                         case 'm':
478                                         case 'M':  es <<= 10;
479                                         case 'k':
480                                         case 'K':  es <<= 10;
481                                         default :  break;
482                                 }
483                                 serve->expected_size = es;
484                                 break;
485                         }
486                         break;
487                 case 'r':
488                         serve->flags |= F_READONLY;
489                         break;
490                 case 'm':
491                         serve->flags |= F_MULTIFILE;
492                         break;
493                 case 'o':
494                         do_output = TRUE;
495                         section_header = g_strdup(optarg);
496                         break;
497                 case 'p':
498                         strncpy(pidftemplate, optarg, 256);
499                         break;
500                 case 'c': 
501                         serve->flags |=F_COPYONWRITE;
502                         break;
503                 case 'C':
504                         g_free(config_file_pos);
505                         config_file_pos=g_strdup(optarg);
506                         break;
507                 case 'l':
508                         g_free(serve->authname);
509                         serve->authname=g_strdup(optarg);
510                         break;
511                 default:
512                         usage();
513                         exit(EXIT_FAILURE);
514                         break;
515                 }
516         }
517         /* What's left: the port to export, the name of the to be exported
518          * file, and, optionally, the size of the file, in that order. */
519         if(nonspecial<2) {
520                 g_free(serve);
521                 serve=NULL;
522         } else {
523                 do_oldstyle = TRUE;
524         }
525         if(do_output) {
526                 if(!serve) {
527                         g_critical("Need a complete configuration on the command line to output a config file section!");
528                         exit(EXIT_FAILURE);
529                 }
530                 dump_section(serve, section_header);
531         }
532         return serve;
533 }
534
535 /**
536  * Error codes for config file parsing
537  **/
538 typedef enum {
539         CFILE_NOTFOUND,         /**< The configuration file is not found */
540         CFILE_MISSING_GENERIC,  /**< The (required) group "generic" is missing */
541         CFILE_KEY_MISSING,      /**< A (required) key is missing */
542         CFILE_VALUE_INVALID,    /**< A value is syntactically invalid */
543         CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
544         CFILE_PROGERR,          /**< Programmer error */
545         CFILE_NO_EXPORTS,       /**< A config file was specified that does not
546                                      define any exports */
547         CFILE_INCORRECT_PORT,   /**< The reserved port was specified for an
548                                      old-style export. */
549 } CFILE_ERRORS;
550
551 /**
552  * Remove a SERVER from memory. Used from the hash table
553  **/
554 void remove_server(gpointer s) {
555         SERVER *server;
556
557         server=(SERVER*)s;
558         g_free(server->exportname);
559         if(server->authname)
560                 g_free(server->authname);
561         if(server->listenaddr)
562                 g_free(server->listenaddr);
563         if(server->prerun)
564                 g_free(server->prerun);
565         if(server->postrun)
566                 g_free(server->postrun);
567         g_free(server);
568 }
569
570 /**
571  * duplicate server
572  * @param s the old server we want to duplicate
573  * @return new duplicated server
574  **/
575 SERVER* dup_serve(SERVER *s) {
576         SERVER *serve = NULL;
577
578         serve=g_new0(SERVER, 1);
579         if(serve == NULL)
580                 return NULL;
581
582         if(s->exportname)
583                 serve->exportname = g_strdup(s->exportname);
584
585         serve->expected_size = s->expected_size;
586
587         if(s->listenaddr)
588                 serve->listenaddr = g_strdup(s->listenaddr);
589
590         serve->port = s->port;
591
592         if(s->authname)
593                 serve->authname = strdup(s->authname);
594
595         serve->flags = s->flags;
596         serve->socket = serve->socket;
597         serve->socket_family = serve->socket_family;
598         serve->cidrlen = s->cidrlen;
599
600         if(s->prerun)
601                 serve->prerun = g_strdup(s->prerun);
602
603         if(s->postrun)
604                 serve->postrun = g_strdup(s->postrun);
605         
606         if(s->servename)
607                 serve->servename = g_strdup(s->servename);
608
609         return serve;
610 }
611
612 /**
613  * append new server to array
614  * @param s server
615  * @param a server array
616  * @return 0 success, -1 error
617  */
618 int append_serve(SERVER *s, GArray *a) {
619         SERVER *ns = NULL;
620         struct addrinfo hints;
621         struct addrinfo *ai = NULL;
622         struct addrinfo *rp = NULL;
623         char   host[NI_MAXHOST];
624         gchar  *port = NULL;
625         int e;
626         int ret;
627
628         if(!s) {
629                 err("Invalid parsing server");
630                 return -1;
631         }
632
633         port = g_strdup_printf("%d", s->port);
634
635         memset(&hints,'\0',sizeof(hints));
636         hints.ai_family = AF_UNSPEC;
637         hints.ai_socktype = SOCK_STREAM;
638         hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
639         hints.ai_protocol = IPPROTO_TCP;
640
641         e = getaddrinfo(s->listenaddr, port, &hints, &ai);
642
643         if (port)
644                 g_free(port);
645
646         if(e == 0) {
647                 for (rp = ai; rp != NULL; rp = rp->ai_next) {
648                         e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
649
650                         if (e != 0) { // error
651                                 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
652                                 continue;
653                         }
654
655                         // duplicate server and set listenaddr to resolved IP address
656                         ns = dup_serve (s);
657                         if (ns) {
658                                 ns->listenaddr = g_strdup(host);
659                                 ns->socket_family = rp->ai_family;
660                                 g_array_append_val(a, *ns);
661                                 free(ns);
662                                 ns = NULL;
663                         }
664                 }
665
666                 ret = 0;
667         } else {
668                 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
669                 ret = -1;
670         }
671
672         if (ai)
673                 freeaddrinfo(ai);
674
675         return ret;
676 }
677
678 /**
679  * Parse the config file.
680  *
681  * @param f the name of the config file
682  * @param e a GError. @see CFILE_ERRORS for what error values this function can
683  *      return.
684  * @return a Array of SERVER* pointers, If the config file is empty or does not
685  *      exist, returns an empty GHashTable; if the config file contains an
686  *      error, returns NULL, and e is set appropriately
687  **/
688 GArray* parse_cfile(gchar* f, GError** e) {
689         const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
690         const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
691         SERVER s;
692         gchar *virtstyle=NULL;
693         PARAM lp[] = {
694                 { "exportname", TRUE,   PARAM_STRING,   NULL, 0 },
695                 { "port",       TRUE,   PARAM_INT,      NULL, 0 },
696                 { "authfile",   FALSE,  PARAM_STRING,   NULL, 0 },
697                 { "filesize",   FALSE,  PARAM_INT,      NULL, 0 },
698                 { "virtstyle",  FALSE,  PARAM_STRING,   NULL, 0 },
699                 { "prerun",     FALSE,  PARAM_STRING,   NULL, 0 },
700                 { "postrun",    FALSE,  PARAM_STRING,   NULL, 0 },
701                 { "readonly",   FALSE,  PARAM_BOOL,     NULL, F_READONLY },
702                 { "multifile",  FALSE,  PARAM_BOOL,     NULL, F_MULTIFILE },
703                 { "copyonwrite", FALSE, PARAM_BOOL,     NULL, F_COPYONWRITE },
704                 { "sparse_cow", FALSE,  PARAM_BOOL,     NULL, F_SPARSE },
705                 { "sdp",        FALSE,  PARAM_BOOL,     NULL, F_SDP },
706                 { "sync",       FALSE,  PARAM_BOOL,     NULL, F_SYNC },
707                 { "listenaddr", FALSE,  PARAM_STRING,   NULL, 0 },
708         };
709         const int lp_size=sizeof(lp)/sizeof(PARAM);
710         PARAM gp[] = {
711                 { "user",       FALSE, PARAM_STRING,    &runuser,       0 },
712                 { "group",      FALSE, PARAM_STRING,    &rungroup,      0 },
713                 { "oldstyle",   FALSE, PARAM_BOOL,      &do_oldstyle,   1 },
714                 { "listenaddr", FALSE, PARAM_STRING,    &modern_listen, 0 },
715         };
716         PARAM* p=gp;
717         int p_size=sizeof(gp)/sizeof(PARAM);
718         GKeyFile *cfile;
719         GError *err = NULL;
720         const char *err_msg=NULL;
721         GQuark errdomain;
722         GArray *retval=NULL;
723         gchar **groups;
724         gboolean value;
725         gchar* startgroup;
726         gint i;
727         gint j;
728
729         errdomain = g_quark_from_string("parse_cfile");
730         cfile = g_key_file_new();
731         retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
732         if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
733                         G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
734                 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
735                 g_key_file_free(cfile);
736                 return retval;
737         }
738         startgroup = g_key_file_get_start_group(cfile);
739         if(!startgroup || strcmp(startgroup, "generic")) {
740                 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
741                 g_key_file_free(cfile);
742                 return NULL;
743         }
744         groups = g_key_file_get_groups(cfile, NULL);
745         for(i=0;groups[i];i++) {
746                 memset(&s, '\0', sizeof(SERVER));
747                 lp[0].target=&(s.exportname);
748                 lp[1].target=&(s.port);
749                 lp[2].target=&(s.authname);
750                 lp[3].target=&(s.expected_size);
751                 lp[4].target=&(virtstyle);
752                 lp[5].target=&(s.prerun);
753                 lp[6].target=&(s.postrun);
754                 lp[7].target=lp[8].target=lp[9].target=
755                                 lp[10].target=lp[11].target=
756                                 lp[12].target=&(s.flags);
757                 lp[13].target=&(s.listenaddr);
758
759                 /* After the [generic] group, start parsing exports */
760                 if(i==1) {
761                         p=lp;
762                         p_size=lp_size;
763                 } 
764                 for(j=0;j<p_size;j++) {
765                         g_assert(p[j].target != NULL);
766                         g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
767                         switch(p[j].ptype) {
768                                 case PARAM_INT:
769                                         *((gint*)p[j].target) =
770                                                 g_key_file_get_integer(cfile,
771                                                                 groups[i],
772                                                                 p[j].paramname,
773                                                                 &err);
774                                         break;
775                                 case PARAM_STRING:
776                                         *((gchar**)p[j].target) =
777                                                 g_key_file_get_string(cfile,
778                                                                 groups[i],
779                                                                 p[j].paramname,
780                                                                 &err);
781                                         break;
782                                 case PARAM_BOOL:
783                                         value = g_key_file_get_boolean(cfile,
784                                                         groups[i],
785                                                         p[j].paramname, &err);
786                                         if(!err) {
787                                                 if(value) {
788                                                         *((gint*)p[j].target) |= p[j].flagval;
789                                                 } else {
790                                                         *((gint*)p[j].target) &= ~(p[j].flagval);
791                                                 }
792                                         }
793                                         break;
794                         }
795                         if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
796                                 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
797                                 g_key_file_free(cfile);
798                                 return NULL;
799                         }
800                         if(err) {
801                                 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
802                                         if(!p[j].required) {
803                                                 /* Ignore not-found error for optional values */
804                                                 g_clear_error(&err);
805                                                 continue;
806                                         } else {
807                                                 err_msg = MISSING_REQUIRED_ERROR;
808                                         }
809                                 } else {
810                                         err_msg = DEFAULT_ERROR;
811                                 }
812                                 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
813                                 g_array_free(retval, TRUE);
814                                 g_error_free(err);
815                                 g_key_file_free(cfile);
816                                 return NULL;
817                         }
818                 }
819                 if(virtstyle) {
820                         if(!strncmp(virtstyle, "none", 4)) {
821                                 s.virtstyle=VIRT_NONE;
822                         } else if(!strncmp(virtstyle, "ipliteral", 9)) {
823                                 s.virtstyle=VIRT_IPLIT;
824                         } else if(!strncmp(virtstyle, "iphash", 6)) {
825                                 s.virtstyle=VIRT_IPHASH;
826                         } else if(!strncmp(virtstyle, "cidrhash", 8)) {
827                                 s.virtstyle=VIRT_CIDR;
828                                 if(strlen(virtstyle)<10) {
829                                         g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
830                                         g_array_free(retval, TRUE);
831                                         g_key_file_free(cfile);
832                                         return NULL;
833                                 }
834                                 s.cidrlen=strtol(virtstyle+8, NULL, 0);
835                         } else {
836                                 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
837                                 g_array_free(retval, TRUE);
838                                 g_key_file_free(cfile);
839                                 return NULL;
840                         }
841                         if(s.port && !do_oldstyle) {
842                                 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
843                                 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
844                         }
845                 } else {
846                         s.virtstyle=VIRT_IPLIT;
847                 }
848                 /* Don't need to free this, it's not our string */
849                 virtstyle=NULL;
850                 /* Don't append values for the [generic] group */
851                 if(i>0) {
852                         s.socket_family = AF_UNSPEC;
853                         s.servename = groups[i];
854
855                         append_serve(&s, retval);
856                 } else {
857                         if(!do_oldstyle) {
858                                 lp[1].required = 0;
859                         }
860                 }
861 #ifndef WITH_SDP
862                 if(s.flags & F_SDP) {
863                         g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
864                         g_array_free(retval, TRUE);
865                         g_key_file_free(cfile);
866                         return NULL;
867                 }
868 #endif
869         }
870         if(i==1) {
871                 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
872         }
873         g_key_file_free(cfile);
874         return retval;
875 }
876
877 /**
878  * Signal handler for SIGCHLD
879  * @param s the signal we're handling (must be SIGCHLD, or something
880  * is severely wrong)
881  **/
882 void sigchld_handler(int s) {
883         int status;
884         int* i;
885         pid_t pid;
886
887         while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
888                 if(WIFEXITED(status)) {
889                         msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
890                 }
891                 i=g_hash_table_lookup(children, &pid);
892                 if(!i) {
893                         msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
894                 } else {
895                         DEBUG2("Removing %d from the list of children", pid);
896                         g_hash_table_remove(children, &pid);
897                 }
898         }
899 }
900
901 /**
902  * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
903  *
904  * @param key the key
905  * @param value the value corresponding to the above key
906  * @param user_data a pointer which we always set to 1, so that we know what
907  * will happen next.
908  **/
909 void killchild(gpointer key, gpointer value, gpointer user_data) {
910         pid_t *pid=value;
911         int *parent=user_data;
912
913         kill(*pid, SIGTERM);
914         *parent=1;
915 }
916
917 /**
918  * Handle SIGTERM and dispatch it to our children
919  * @param s the signal we're handling (must be SIGTERM, or something
920  * is severely wrong).
921  **/
922 void sigterm_handler(int s) {
923         int parent=0;
924
925         g_hash_table_foreach(children, killchild, &parent);
926
927         if(parent) {
928                 unlink(pidfname);
929         }
930
931         exit(EXIT_SUCCESS);
932 }
933
934 /**
935  * Detect the size of a file.
936  *
937  * @param fhandle An open filedescriptor
938  * @return the size of the file, or OFFT_MAX if detection was
939  * impossible.
940  **/
941 off_t size_autodetect(int fhandle) {
942         off_t es;
943         u64 bytes;
944         struct stat stat_buf;
945         int error;
946
947 #ifdef HAVE_SYS_MOUNT_H
948 #ifdef HAVE_SYS_IOCTL_H
949 #ifdef BLKGETSIZE64
950         DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
951         if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
952                 return (off_t)bytes;
953         }
954 #endif /* BLKGETSIZE64 */
955 #endif /* HAVE_SYS_IOCTL_H */
956 #endif /* HAVE_SYS_MOUNT_H */
957
958         DEBUG("looking for fhandle size with fstat\n");
959         stat_buf.st_size = 0;
960         error = fstat(fhandle, &stat_buf);
961         if (!error) {
962                 if(stat_buf.st_size > 0)
963                         return (off_t)stat_buf.st_size;
964         } else {
965                 err("fstat failed: %m");
966         }
967
968         DEBUG("looking for fhandle size with lseek SEEK_END\n");
969         es = lseek(fhandle, (off_t)0, SEEK_END);
970         if (es > ((off_t)0)) {
971                 return es;
972         } else {
973                 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
974         }
975
976         err("Could not find size of exported block device: %m");
977         return OFFT_MAX;
978 }
979
980 /**
981  * Get the file handle and offset, given an export offset.
982  *
983  * @param export An array of export files
984  * @param a The offset to get corresponding file/offset for
985  * @param fhandle [out] File descriptor
986  * @param foffset [out] Offset into fhandle
987  * @param maxbytes [out] Tells how many bytes can be read/written
988  * from fhandle starting at foffset (0 if there is no limit)
989  * @return 0 on success, -1 on failure
990  **/
991 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
992         /* Negative offset not allowed */
993         if(a < 0)
994                 return -1;
995
996         /* Binary search for last file with starting offset <= a */
997         FILE_INFO fi;
998         int start = 0;
999         int end = export->len - 1;
1000         while( start <= end ) {
1001                 int mid = (start + end) / 2;
1002                 fi = g_array_index(export, FILE_INFO, mid);
1003                 if( fi.startoff < a ) {
1004                         start = mid + 1;
1005                 } else if( fi.startoff > a ) {
1006                         end = mid - 1;
1007                 } else {
1008                         start = end = mid;
1009                         break;
1010                 }
1011         }
1012
1013         /* end should never go negative, since first startoff is 0 and a >= 0 */
1014         g_assert(end >= 0);
1015
1016         fi = g_array_index(export, FILE_INFO, end);
1017         *fhandle = fi.fhandle;
1018         *foffset = a - fi.startoff;
1019         *maxbytes = 0;
1020         if( end+1 < export->len ) {
1021                 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1022                 *maxbytes = fi_next.startoff - a;
1023         }
1024
1025         return 0;
1026 }
1027
1028 /**
1029  * seek to a position in a file, with error handling.
1030  * @param handle a filedescriptor
1031  * @param a position to seek to
1032  * @todo get rid of this; lastpoint is a global variable right now, but it
1033  * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1034  * easier.
1035  **/
1036 void myseek(int handle,off_t a) {
1037         if (lseek(handle, a, SEEK_SET) < 0) {
1038                 err("Can not seek locally!\n");
1039         }
1040 }
1041
1042 /**
1043  * Write an amount of bytes at a given offset to the right file. This
1044  * abstracts the write-side of the multiple file option.
1045  *
1046  * @param a The offset where the write should start
1047  * @param buf The buffer to write from
1048  * @param len The length of buf
1049  * @param client The client we're serving for
1050  * @return The number of bytes actually written, or -1 in case of an error
1051  **/
1052 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
1053         int fhandle;
1054         off_t foffset;
1055         size_t maxbytes;
1056         ssize_t retval;
1057
1058         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1059                 return -1;
1060         if(maxbytes && len > maxbytes)
1061                 len = maxbytes;
1062
1063         DEBUG4("(WRITE to fd %d offset %llu len %u), ", fhandle, foffset, len);
1064
1065         myseek(fhandle, foffset);
1066         retval = write(fhandle, buf, len);
1067         if(client->server->flags & F_SYNC) {
1068                 fsync(fhandle);
1069         }
1070         return retval;
1071 }
1072
1073 /**
1074  * Call rawexpwrite repeatedly until all data has been written.
1075  * @return 0 on success, nonzero on failure
1076  **/
1077 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1078         ssize_t ret=0;
1079
1080         while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
1081                 a += ret;
1082                 buf += ret;
1083                 len -= ret;
1084         }
1085         return (ret < 0 || len != 0);
1086 }
1087
1088 /**
1089  * Read an amount of bytes at a given offset from the right file. This
1090  * abstracts the read-side of the multiple files option.
1091  *
1092  * @param a The offset where the read should start
1093  * @param buf A buffer to read into
1094  * @param len The size of buf
1095  * @param client The client we're serving for
1096  * @return The number of bytes actually read, or -1 in case of an
1097  * error.
1098  **/
1099 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1100         int fhandle;
1101         off_t foffset;
1102         size_t maxbytes;
1103
1104         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1105                 return -1;
1106         if(maxbytes && len > maxbytes)
1107                 len = maxbytes;
1108
1109         DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
1110
1111         myseek(fhandle, foffset);
1112         return read(fhandle, buf, len);
1113 }
1114
1115 /**
1116  * Call rawexpread repeatedly until all data has been read.
1117  * @return 0 on success, nonzero on failure
1118  **/
1119 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1120         ssize_t ret=0;
1121
1122         while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1123                 a += ret;
1124                 buf += ret;
1125                 len -= ret;
1126         }
1127         return (ret < 0 || len != 0);
1128 }
1129
1130 /**
1131  * Read an amount of bytes at a given offset from the right file. This
1132  * abstracts the read-side of the copyonwrite stuff, and calls
1133  * rawexpread() with the right parameters to do the actual work.
1134  * @param a The offset where the read should start
1135  * @param buf A buffer to read into
1136  * @param len The size of buf
1137  * @param client The client we're going to read for
1138  * @return 0 on success, nonzero on failure
1139  **/
1140 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1141         off_t rdlen, offset;
1142         off_t mapcnt, mapl, maph, pagestart;
1143
1144         if (!(client->server->flags & F_COPYONWRITE))
1145                 return(rawexpread_fully(a, buf, len, client));
1146         DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
1147
1148         mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1149
1150         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1151                 pagestart=mapcnt*DIFFPAGESIZE;
1152                 offset=a-pagestart;
1153                 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1154                         len : (size_t)DIFFPAGESIZE-offset;
1155                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1156                         DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1157                                (unsigned long)(client->difmap[mapcnt]));
1158                         myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1159                         if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1160                 } else { /* the block is not there */
1161                         DEBUG2("Page %llu is not here, we read the original one\n",
1162                                (unsigned long long)mapcnt);
1163                         if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1164                 }
1165                 len-=rdlen; a+=rdlen; buf+=rdlen;
1166         }
1167         return 0;
1168 }
1169
1170 /**
1171  * Write an amount of bytes at a given offset to the right file. This
1172  * abstracts the write-side of the copyonwrite option, and calls
1173  * rawexpwrite() with the right parameters to do the actual work.
1174  *
1175  * @param a The offset where the write should start
1176  * @param buf The buffer to write from
1177  * @param len The length of buf
1178  * @param client The client we're going to write for.
1179  * @return 0 on success, nonzero on failure
1180  **/
1181 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
1182         char pagebuf[DIFFPAGESIZE];
1183         off_t mapcnt,mapl,maph;
1184         off_t wrlen,rdlen; 
1185         off_t pagestart;
1186         off_t offset;
1187
1188         if (!(client->server->flags & F_COPYONWRITE))
1189                 return(rawexpwrite_fully(a, buf, len, client)); 
1190         DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
1191
1192         mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1193
1194         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1195                 pagestart=mapcnt*DIFFPAGESIZE ;
1196                 offset=a-pagestart ;
1197                 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1198                         len : (size_t)DIFFPAGESIZE-offset;
1199
1200                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1201                         DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1202                                (unsigned long)(client->difmap[mapcnt])) ;
1203                         myseek(client->difffile,
1204                                         client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1205                         if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1206                 } else { /* the block is not there */
1207                         myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1208                         client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1209                         DEBUG3("Page %llu is not here, we put it at %lu\n",
1210                                (unsigned long long)mapcnt,
1211                                (unsigned long)(client->difmap[mapcnt]));
1212                         rdlen=DIFFPAGESIZE ;
1213                         if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1214                                 return -1;
1215                         memcpy(pagebuf+offset,buf,wrlen) ;
1216                         if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1217                                         DIFFPAGESIZE)
1218                                 return -1;
1219                 }                                                   
1220                 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1221         }
1222         return 0;
1223 }
1224
1225 /**
1226  * Do the initial negotiation.
1227  *
1228  * @param client The client we're negotiating with.
1229  **/
1230 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1231         char zeros[128];
1232         uint64_t size_host;
1233         uint32_t flags = NBD_FLAG_HAS_FLAGS;
1234         uint16_t smallflags = 0;
1235         uint64_t magic;
1236
1237         memset(zeros, '\0', sizeof(zeros));
1238         if(!client || !client->modern) {
1239                 /* common */
1240                 if (write(net, INIT_PASSWD, 8) < 0) {
1241                         err_nonfatal("Negotiation failed: %m");
1242                         if(client)
1243                                 exit(EXIT_FAILURE);
1244                 }
1245                 if(!client || client->modern) {
1246                         /* modern */
1247                         magic = htonll(opts_magic);
1248                 } else {
1249                         /* oldstyle */
1250                         magic = htonll(cliserv_magic);
1251                 }
1252                 if (write(net, &magic, sizeof(magic)) < 0) {
1253                         err_nonfatal("Negotiation failed: %m");
1254                         if(client)
1255                                 exit(EXIT_FAILURE);
1256                 }
1257         }
1258         if(!client) {
1259                 /* modern */
1260                 uint32_t reserved;
1261                 uint32_t opt;
1262                 uint32_t namelen;
1263                 char* name;
1264                 int i;
1265
1266                 if(!servers)
1267                         err("programmer error");
1268                 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1269                         err("Negotiation failed: %m");
1270                 if (read(net, &reserved, sizeof(reserved)) < 0)
1271                         err("Negotiation failed: %m");
1272                 if (read(net, &magic, sizeof(magic)) < 0)
1273                         err("Negotiation failed: %m");
1274                 magic = ntohll(magic);
1275                 if(magic != opts_magic) {
1276                         close(net);
1277                         return NULL;
1278                 }
1279                 if (read(net, &opt, sizeof(opt)) < 0)
1280                         err("Negotiation failed: %m");
1281                 opt = ntohl(opt);
1282                 if(opt != NBD_OPT_EXPORT_NAME) {
1283                         close(net);
1284                         return NULL;
1285                 }
1286                 if (read(net, &namelen, sizeof(namelen)) < 0)
1287                         err("Negotiation failed: %m");
1288                 namelen = ntohl(namelen);
1289                 name = malloc(namelen+1);
1290                 name[namelen]=0;
1291                 if (read(net, name, namelen) < 0)
1292                         err("Negotiation failed: %m");
1293                 for(i=0; i<servers->len; i++) {
1294                         SERVER* serve = &(g_array_index(servers, SERVER, i));
1295                         if(!strcmp(serve->servename, name)) {
1296                                 CLIENT* client = g_new0(CLIENT, 1);
1297                                 client->server = serve;
1298                                 client->exportsize = OFFT_MAX;
1299                                 client->net = net;
1300                                 client->modern = TRUE;
1301                                 return client;
1302                         }
1303                 }
1304                 return NULL;
1305         }
1306         /* common */
1307         size_host = htonll((u64)(client->exportsize));
1308         if (write(net, &size_host, 8) < 0)
1309                 err("Negotiation failed: %m");
1310         if (client->server->flags & F_READONLY)
1311                 flags |= NBD_FLAG_READ_ONLY;
1312         if (!client->modern) {
1313                 /* oldstyle */
1314                 flags = htonl(flags);
1315                 if (write(client->net, &flags, 4) < 0)
1316                         err("Negotiation failed: %m");
1317         } else {
1318                 /* modern */
1319                 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1320                 smallflags = htons(smallflags);
1321                 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1322                         err("Negotiation failed: %m");
1323                 }
1324         }
1325         /* common */
1326         if (write(client->net, zeros, 124) < 0)
1327                 err("Negotiation failed: %m");
1328         return NULL;
1329 }
1330
1331 /** sending macro. */
1332 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1333 /** error macro. */
1334 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1335 /**
1336  * Serve a file to a single client.
1337  *
1338  * @todo This beast needs to be split up in many tiny little manageable
1339  * pieces. Preferably with a chainsaw.
1340  *
1341  * @param client The client we're going to serve to.
1342  * @return when the client disconnects
1343  **/
1344 int mainloop(CLIENT *client) {
1345         struct nbd_request request;
1346         struct nbd_reply reply;
1347         gboolean go_on=TRUE;
1348 #ifdef DODBG
1349         int i = 0;
1350 #endif
1351         negotiate(client->net, client, NULL);
1352         DEBUG("Entering request loop!\n");
1353         reply.magic = htonl(NBD_REPLY_MAGIC);
1354         reply.error = 0;
1355         while (go_on) {
1356                 char buf[BUFSIZE];
1357                 size_t len;
1358 #ifdef DODBG
1359                 i++;
1360                 printf("%d: ", i);
1361 #endif
1362                 readit(client->net, &request, sizeof(request));
1363                 request.from = ntohll(request.from);
1364                 request.type = ntohl(request.type);
1365
1366                 if (request.type==NBD_CMD_DISC) {
1367                         msg2(LOG_INFO, "Disconnect request received.");
1368                         if (client->server->flags & F_COPYONWRITE) { 
1369                                 if (client->difmap) g_free(client->difmap) ;
1370                                 close(client->difffile);
1371                                 unlink(client->difffilename);
1372                                 free(client->difffilename);
1373                         }
1374                         go_on=FALSE;
1375                         continue;
1376                 }
1377
1378                 len = ntohl(request.len);
1379
1380                 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1381                         err("Not enough magic.");
1382                 if (len > BUFSIZE + sizeof(struct nbd_reply))
1383                         err("Request too big!");
1384 #ifdef DODBG
1385                 printf("%s from %llu (%llu) len %d, ", request.type ? "WRITE" :
1386                                 "READ", (unsigned long long)request.from,
1387                                 (unsigned long long)request.from / 512, len);
1388 #endif
1389                 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1390                 if ((request.from + len) > (OFFT_MAX)) {
1391                         DEBUG("[Number too large!]");
1392                         ERROR(client, reply, EINVAL);
1393                         continue;
1394                 }
1395
1396                 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1397                         DEBUG("[RANGE!]");
1398                         ERROR(client, reply, EINVAL);
1399                         continue;
1400                 }
1401
1402                 if (request.type==NBD_CMD_WRITE) {
1403                         DEBUG("wr: net->buf, ");
1404                         readit(client->net, buf, len);
1405                         DEBUG("buf->exp, ");
1406                         if ((client->server->flags & F_READONLY) ||
1407                             (client->server->flags & F_AUTOREADONLY)) {
1408                                 DEBUG("[WRITE to READONLY!]");
1409                                 ERROR(client, reply, EPERM);
1410                                 continue;
1411                         }
1412                         if (expwrite(request.from, buf, len, client)) {
1413                                 DEBUG("Write failed: %m" );
1414                                 ERROR(client, reply, errno);
1415                                 continue;
1416                         }
1417                         SEND(client->net, reply);
1418                         DEBUG("OK!\n");
1419                         continue;
1420                 }
1421                 /* READ */
1422
1423                 DEBUG("exp->buf, ");
1424                 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1425                         DEBUG("Read failed: %m");
1426                         ERROR(client, reply, errno);
1427                         continue;
1428                 }
1429
1430                 DEBUG("buf->net, ");
1431                 memcpy(buf, &reply, sizeof(struct nbd_reply));
1432                 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1433                 DEBUG("OK!\n");
1434         }
1435         return 0;
1436 }
1437
1438 /**
1439  * Set up client export array, which is an array of FILE_INFO.
1440  * Also, split a single exportfile into multiple ones, if that was asked.
1441  * @param client information on the client which we want to setup export for
1442  **/
1443 void setupexport(CLIENT* client) {
1444         int i;
1445         off_t laststartoff = 0, lastsize = 0;
1446         int multifile = (client->server->flags & F_MULTIFILE);
1447
1448         client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1449
1450         /* If multi-file, open as many files as we can.
1451          * If not, open exactly one file.
1452          * Calculate file sizes as we go to get total size. */
1453         for(i=0; ; i++) {
1454                 FILE_INFO fi;
1455                 gchar *tmpname;
1456                 gchar* error_string;
1457                 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1458
1459                 if(multifile) {
1460                         tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1461                 } else {
1462                         tmpname=g_strdup(client->exportname);
1463                 }
1464                 DEBUG2( "Opening %s\n", tmpname );
1465                 fi.fhandle = open(tmpname, mode);
1466                 if(fi.fhandle == -1 && mode == O_RDWR) {
1467                         /* Try again because maybe media was read-only */
1468                         fi.fhandle = open(tmpname, O_RDONLY);
1469                         if(fi.fhandle != -1) {
1470                                 /* Opening the base file in copyonwrite mode is
1471                                  * okay */
1472                                 if(!(client->server->flags & F_COPYONWRITE)) {
1473                                         client->server->flags |= F_AUTOREADONLY;
1474                                         client->server->flags |= F_READONLY;
1475                                 }
1476                         }
1477                 }
1478                 if(fi.fhandle == -1) {
1479                         if(multifile && i>0)
1480                                 break;
1481                         error_string=g_strdup_printf(
1482                                 "Could not open exported file %s: %%m",
1483                                 tmpname);
1484                         err(error_string);
1485                 }
1486                 fi.startoff = laststartoff + lastsize;
1487                 g_array_append_val(client->export, fi);
1488                 g_free(tmpname);
1489
1490                 /* Starting offset and size of this file will be used to
1491                  * calculate starting offset of next file */
1492                 laststartoff = fi.startoff;
1493                 lastsize = size_autodetect(fi.fhandle);
1494
1495                 if(!multifile)
1496                         break;
1497         }
1498
1499         /* Set export size to total calculated size */
1500         client->exportsize = laststartoff + lastsize;
1501
1502         /* Export size may be overridden */
1503         if(client->server->expected_size) {
1504                 /* desired size must be <= total calculated size */
1505                 if(client->server->expected_size > client->exportsize) {
1506                         err("Size of exported file is too big\n");
1507                 }
1508
1509                 client->exportsize = client->server->expected_size;
1510         }
1511
1512         msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1513         if(multifile) {
1514                 msg3(LOG_INFO, "Total number of files: %d", i);
1515         }
1516 }
1517
1518 int copyonwrite_prepare(CLIENT* client) {
1519         off_t i;
1520         if ((client->difffilename = malloc(1024))==NULL)
1521                 err("Failed to allocate string for diff file name");
1522         snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1523                 (int)getpid()) ;
1524         client->difffilename[1023]='\0';
1525         msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1526         client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1527         if (client->difffile<0) err("Could not create diff file (%m)") ;
1528         if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1529                 err("Could not allocate memory") ;
1530         for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1531
1532         return 0;
1533 }
1534
1535 /**
1536  * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1537  * options
1538  *
1539  * @param command the command to be ran. Read from the config file
1540  * @param file the file name we're about to export
1541  **/
1542 int do_run(gchar* command, gchar* file) {
1543         gchar* cmd;
1544         int retval=0;
1545
1546         if(command && *command) {
1547                 cmd = g_strdup_printf(command, file);
1548                 retval=system(cmd);
1549                 g_free(cmd);
1550         }
1551         return retval;
1552 }
1553
1554 /**
1555  * Serve a connection. 
1556  *
1557  * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1558  * follow the road map.
1559  *
1560  * @param client a connected client
1561  **/
1562 void serveconnection(CLIENT *client) {
1563         if(do_run(client->server->prerun, client->exportname)) {
1564                 exit(EXIT_FAILURE);
1565         }
1566         setupexport(client);
1567
1568         if (client->server->flags & F_COPYONWRITE) {
1569                 copyonwrite_prepare(client);
1570         }
1571
1572         setmysockopt(client->net);
1573
1574         mainloop(client);
1575         do_run(client->server->postrun, client->exportname);
1576 }
1577
1578 /**
1579  * Find the name of the file we have to serve. This will use g_strdup_printf
1580  * to put the IP address of the client inside a filename containing
1581  * "%s" (in the form as specified by the "virtstyle" option). That name
1582  * is then written to client->exportname.
1583  *
1584  * @param net A socket connected to an nbd client
1585  * @param client information about the client. The IP address in human-readable
1586  * format will be written to a new char* buffer, the address of which will be
1587  * stored in client->clientname.
1588  **/
1589 void set_peername(int net, CLIENT *client) {
1590         struct sockaddr_storage addrin;
1591         struct sockaddr_storage netaddr;
1592         struct sockaddr_in  *netaddr4 = NULL;
1593         struct sockaddr_in6 *netaddr6 = NULL;
1594         size_t addrinlen = sizeof( addrin );
1595         struct addrinfo hints;
1596         struct addrinfo *ai = NULL;
1597         char peername[NI_MAXHOST];
1598         char netname[NI_MAXHOST];
1599         char *tmp = NULL;
1600         int i;
1601         int e;
1602         int shift;
1603
1604         if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1605                 err("getsockname failed: %m");
1606
1607         getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1608                 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1609
1610         memset(&hints, '\0', sizeof (hints));
1611         hints.ai_flags = AI_ADDRCONFIG;
1612         e = getaddrinfo(peername, NULL, &hints, &ai);
1613
1614         if(e != 0) {
1615                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1616                 freeaddrinfo(ai);
1617                 return;
1618         }
1619
1620         switch(client->server->virtstyle) {
1621                 case VIRT_NONE:
1622                         client->exportname=g_strdup(client->server->exportname);
1623                         break;
1624                 case VIRT_IPHASH:
1625                         for(i=0;i<strlen(peername);i++) {
1626                                 if(peername[i]=='.') {
1627                                         peername[i]='/';
1628                                 }
1629                         }
1630                 case VIRT_IPLIT:
1631                         client->exportname=g_strdup_printf(client->server->exportname, peername);
1632                         break;
1633                 case VIRT_CIDR:
1634                         memcpy(&netaddr, &addrin, addrinlen);
1635                         if(ai->ai_family == AF_INET) {
1636                                 netaddr4 = (struct sockaddr_in *)&netaddr;
1637                                 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1638                                 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1639
1640                                 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1641                                                         netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1642                                 tmp=g_strdup_printf("%s/%s", netname, peername);
1643                         }else if(ai->ai_family == AF_INET6) {
1644                                 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1645
1646                                 shift = 128-(client->server->cidrlen);
1647                                 i = 3;
1648                                 while(shift >= 32) {
1649                                         ((netaddr6->sin6_addr).s6_addr32[i])=0;
1650                                         shift-=32;
1651                                         i--;
1652                                 }
1653                                 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1654                                 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1655
1656                                 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1657                                             netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1658                                 tmp=g_strdup_printf("%s/%s", netname, peername);
1659                         }
1660
1661                         if(tmp != NULL)
1662                           client->exportname=g_strdup_printf(client->server->exportname, tmp);
1663
1664                         break;
1665         }
1666
1667         freeaddrinfo(ai);
1668         msg4(LOG_INFO, "connect from %s, assigned file is %s", 
1669              peername, client->exportname);
1670         client->clientname=g_strdup(peername);
1671 }
1672
1673 /**
1674  * Destroy a pid_t*
1675  * @param data a pointer to pid_t which should be freed
1676  **/
1677 void destroy_pid_t(gpointer data) {
1678         g_free(data);
1679 }
1680
1681 /**
1682  * Loop through the available servers, and serve them. Never returns.
1683  **/
1684 int serveloop(GArray* servers) {
1685         struct sockaddr_storage addrin;
1686         socklen_t addrinlen=sizeof(addrin);
1687         int i;
1688         int max;
1689         int sock;
1690         fd_set mset;
1691         fd_set rset;
1692
1693         /* 
1694          * Set up the master fd_set. The set of descriptors we need
1695          * to select() for never changes anyway and it buys us a *lot*
1696          * of time to only build this once. However, if we ever choose
1697          * to not fork() for clients anymore, we may have to revisit
1698          * this.
1699          */
1700         max=0;
1701         FD_ZERO(&mset);
1702         for(i=0;i<servers->len;i++) {
1703                 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1704                         FD_SET(sock, &mset);
1705                         max=sock>max?sock:max;
1706                 }
1707         }
1708         if(modernsock) {
1709                 FD_SET(modernsock, &mset);
1710                 max=modernsock>max?modernsock:max;
1711         }
1712         for(;;) {
1713                 CLIENT *client = NULL;
1714                 pid_t *pid;
1715
1716                 memcpy(&rset, &mset, sizeof(fd_set));
1717                 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1718                         int net = 0;
1719                         SERVER* serve;
1720
1721                         DEBUG("accept, ");
1722                         if(FD_ISSET(modernsock, &rset)) {
1723                                 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1724                                         err("accept: %m");
1725                                 client = negotiate(net, NULL, servers);
1726                                 if(!client) {
1727                                         err_nonfatal("negotiation failed");
1728                                         close(net);
1729                                         net=0;
1730                                 }
1731                         }
1732                         for(i=0;i<servers->len && !net;i++) {
1733                                 serve=&(g_array_index(servers, SERVER, i));
1734                                 if(FD_ISSET(serve->socket, &rset)) {
1735                                         if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1736                                                 err("accept: %m");
1737                                 }
1738                         }
1739                         if(net) {
1740                                 int sock_flags;
1741
1742                                 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1743                                         err("fcntl F_GETFL");
1744                                 }
1745                                 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1746                                         err("fcntl F_SETFL ~O_NONBLOCK");
1747                                 }
1748                                 if(!client) {
1749                                         client = g_new0(CLIENT, 1);
1750                                         client->server=serve;
1751                                         client->exportsize=OFFT_MAX;
1752                                         client->net=net;
1753                                 }
1754                                 set_peername(net, client);
1755                                 if (!authorized_client(client)) {
1756                                         msg2(LOG_INFO,"Unauthorized client") ;
1757                                         close(net);
1758                                         continue;
1759                                 }
1760                                 msg2(LOG_INFO,"Authorized client") ;
1761                                 pid=g_malloc(sizeof(pid_t));
1762 #ifndef NOFORK
1763                                 if ((*pid=fork())<0) {
1764                                         msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1765                                         close(net);
1766                                         continue;
1767                                 }
1768                                 if (*pid>0) { /* parent */
1769                                         close(net);
1770                                         g_hash_table_insert(children, pid, pid);
1771                                         continue;
1772                                 }
1773                                 /* child */
1774                                 g_hash_table_destroy(children);
1775                                 for(i=0;i<servers->len;i++) {
1776                                         serve=&g_array_index(servers, SERVER, i);
1777                                         close(serve->socket);
1778                                 }
1779                                 /* FALSE does not free the
1780                                 actual data. This is required,
1781                                 because the client has a
1782                                 direct reference into that
1783                                 data, and otherwise we get a
1784                                 segfault... */
1785                                 g_array_free(servers, FALSE);
1786 #endif // NOFORK
1787                                 msg2(LOG_INFO,"Starting to serve");
1788                                 serveconnection(client);
1789                                 exit(EXIT_SUCCESS);
1790                         }
1791                 }
1792         }
1793 }
1794
1795 void dosockopts(int socket) {
1796 #ifndef sun
1797         int yes=1;
1798 #else
1799         char yes='1';
1800 #endif /* sun */
1801         int sock_flags;
1802
1803         /* lose the pesky "Address already in use" error message */
1804         if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1805                 err("setsockopt SO_REUSEADDR");
1806         }
1807         if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1808                 err("setsockopt SO_KEEPALIVE");
1809         }
1810
1811         /* make the listening socket non-blocking */
1812         if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
1813                 err("fcntl F_GETFL");
1814         }
1815         if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1816                 err("fcntl F_SETFL O_NONBLOCK");
1817         }
1818 }
1819
1820 /**
1821  * Connect a server's socket.
1822  *
1823  * @param serve the server we want to connect.
1824  **/
1825 int setup_serve(SERVER *serve) {
1826         struct addrinfo hints;
1827         struct addrinfo *ai = NULL;
1828         gchar *port = NULL;
1829         int e;
1830
1831         if(!do_oldstyle) {
1832                 return serve->servename ? 1 : 0;
1833         }
1834         memset(&hints,'\0',sizeof(hints));
1835         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
1836         hints.ai_socktype = SOCK_STREAM;
1837         hints.ai_family = serve->socket_family;
1838
1839         port = g_strdup_printf ("%d", serve->port);
1840         if (port == NULL)
1841                 return 0;
1842
1843         e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
1844
1845         g_free(port);
1846
1847         if(e != 0) {
1848                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1849                 serve->socket = -1;
1850                 freeaddrinfo(ai);
1851                 exit(EXIT_FAILURE);
1852         }
1853
1854         if(serve->socket_family == AF_UNSPEC)
1855                 serve->socket_family = ai->ai_family;
1856
1857 #ifdef WITH_SDP
1858         if ((serve->flags) && F_SDP) {
1859                 if (ai->ai_family == AF_INET)
1860                         ai->ai_family = AF_INET_SDP;
1861                 else (ai->ai_family == AF_INET6)
1862                         ai->ai_family = AF_INET6_SDP;
1863         }
1864 #endif
1865         if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
1866                 err("socket: %m");
1867
1868         dosockopts(serve->socket);
1869
1870         DEBUG("Waiting for connections... bind, ");
1871         e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
1872         if (e != 0 && errno != EADDRINUSE)
1873                 err("bind: %m");
1874         DEBUG("listen, ");
1875         if (listen(serve->socket, 1) < 0)
1876                 err("listen: %m");
1877
1878         freeaddrinfo (ai);
1879         if(serve->servename) {
1880                 return 1;
1881         } else {
1882                 return 0;
1883         }
1884 }
1885
1886 void open_modern(void) {
1887         struct addrinfo hints;
1888         struct addrinfo* ai = NULL;
1889         struct sock_flags;
1890         int e;
1891
1892         memset(&hints, '\0', sizeof(hints));
1893         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
1894         hints.ai_socktype = SOCK_STREAM;
1895         hints.ai_family = AF_UNSPEC;
1896         hints.ai_protocol = IPPROTO_TCP;
1897         e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
1898         if(e != 0) {
1899                 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1900                 exit(EXIT_FAILURE);
1901         }
1902         if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
1903                 err("socket: %m");
1904         }
1905
1906         dosockopts(modernsock);
1907
1908         if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
1909                 err("bind: %m");
1910         }
1911         if(listen(modernsock, 10) <0) {
1912                 err("listen: %m");
1913         }
1914
1915         freeaddrinfo(ai);
1916 }
1917
1918 /**
1919  * Connect our servers.
1920  **/
1921 void setup_servers(GArray* servers) {
1922         int i;
1923         struct sigaction sa;
1924         int want_modern=0;
1925
1926         for(i=0;i<servers->len;i++) {
1927                 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
1928         }
1929         if(want_modern) {
1930                 open_modern();
1931         }
1932         children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1933
1934         sa.sa_handler = sigchld_handler;
1935         sigemptyset(&sa.sa_mask);
1936         sa.sa_flags = SA_RESTART;
1937         if(sigaction(SIGCHLD, &sa, NULL) == -1)
1938                 err("sigaction: %m");
1939         sa.sa_handler = sigterm_handler;
1940         sigemptyset(&sa.sa_mask);
1941         sa.sa_flags = SA_RESTART;
1942         if(sigaction(SIGTERM, &sa, NULL) == -1)
1943                 err("sigaction: %m");
1944 }
1945
1946 /**
1947  * Go daemon (unless we specified at compile time that we didn't want this)
1948  * @param serve the first server of our configuration. If its port is zero,
1949  *      then do not daemonize, because we're doing inetd then. This parameter
1950  *      is only used to create a PID file of the form
1951  *      /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
1952  **/
1953 #if !defined(NODAEMON) && !defined(NOFORK)
1954 void daemonize(SERVER* serve) {
1955         FILE*pidf;
1956
1957         if(serve && !(serve->port)) {
1958                 return;
1959         }
1960         if(daemon(0,0)<0) {
1961                 err("daemon");
1962         }
1963         if(!*pidftemplate) {
1964                 if(serve) {
1965                         strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
1966                 } else {
1967                         strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
1968                 }
1969         }
1970         snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1971         pidf=fopen(pidfname, "w");
1972         if(pidf) {
1973                 fprintf(pidf,"%d\n", (int)getpid());
1974                 fclose(pidf);
1975         } else {
1976                 perror("fopen");
1977                 fprintf(stderr, "Not fatal; continuing");
1978         }
1979 }
1980 #else
1981 #define daemonize(serve)
1982 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1983
1984 /*
1985  * Everything beyond this point (in the file) is run in non-daemon mode.
1986  * The stuff above daemonize() isn't.
1987  */
1988
1989 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
1990
1991 void serve_err(SERVER* serve, const char* msg) {
1992         g_message("Export of %s on port %d failed:", serve->exportname,
1993                         serve->port);
1994         err(msg);
1995 }
1996
1997 /**
1998  * Set up user-ID and/or group-ID
1999  **/
2000 void dousers(void) {
2001         struct passwd *pw;
2002         struct group *gr;
2003         gchar* str;
2004         if(rungroup) {
2005                 gr=getgrnam(rungroup);
2006                 if(!gr) {
2007                         str = g_strdup_printf("Invalid group name: %s", rungroup);
2008                         err(str);
2009                 }
2010                 if(setgid(gr->gr_gid)<0) {
2011                         err("Could not set GID: %m"); 
2012                 }
2013         }
2014         if(runuser) {
2015                 pw=getpwnam(runuser);
2016                 if(!pw) {
2017                         str = g_strdup_printf("Invalid user name: %s", runuser);
2018                         err(str);
2019                 }
2020                 if(setuid(pw->pw_uid)<0) {
2021                         err("Could not set UID: %m");
2022                 }
2023         }
2024 }
2025
2026 #ifndef ISSERVER
2027 void glib_message_syslog_redirect(const gchar *log_domain,
2028                                   GLogLevelFlags log_level,
2029                                   const gchar *message,
2030                                   gpointer user_data)
2031 {
2032     int level=LOG_DEBUG;
2033     
2034     switch( log_level )
2035     {
2036       case G_LOG_FLAG_FATAL:
2037       case G_LOG_LEVEL_CRITICAL:
2038       case G_LOG_LEVEL_ERROR:    
2039         level=LOG_ERR; 
2040         break;
2041       case G_LOG_LEVEL_WARNING:
2042         level=LOG_WARNING;
2043         break;
2044       case G_LOG_LEVEL_MESSAGE:
2045       case G_LOG_LEVEL_INFO:
2046         level=LOG_INFO;
2047         break;
2048       case G_LOG_LEVEL_DEBUG:
2049         level=LOG_DEBUG;
2050       default:
2051         level=LOG_ERR;
2052     }
2053     syslog(level, "%s", message);
2054 }
2055 #endif
2056
2057 /**
2058  * Main entry point...
2059  **/
2060 int main(int argc, char *argv[]) {
2061         SERVER *serve;
2062         GArray *servers;
2063         GError *err=NULL;
2064
2065         if (sizeof( struct nbd_request )!=28) {
2066                 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2067                 exit(EXIT_FAILURE) ;
2068         }
2069
2070         memset(pidftemplate, '\0', 256);
2071
2072         logging();
2073         config_file_pos = g_strdup(CFILE);
2074         serve=cmdline(argc, argv);
2075         servers = parse_cfile(config_file_pos, &err);
2076         
2077         if(serve) {
2078                 serve->socket_family = AF_UNSPEC;
2079
2080                 append_serve(serve, servers);
2081      
2082                 if (!(serve->port)) {
2083                         CLIENT *client;
2084 #ifndef ISSERVER
2085                         /* You really should define ISSERVER if you're going to use
2086                          * inetd mode, but if you don't, closing stdout and stderr
2087                          * (which inetd had connected to the client socket) will let it
2088                          * work. */
2089                         close(1);
2090                         close(2);
2091                         open("/dev/null", O_WRONLY);
2092                         open("/dev/null", O_WRONLY);
2093                         g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2094 #endif
2095                         client=g_malloc(sizeof(CLIENT));
2096                         client->server=serve;
2097                         client->net=0;
2098                         client->exportsize=OFFT_MAX;
2099                         set_peername(0,client);
2100                         serveconnection(client);
2101                         return 0;
2102                 }
2103         }
2104     
2105         if(!servers || !servers->len) {
2106                 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2107                                 && err->code == CFILE_NOTFOUND)) {
2108                         g_warning("Could not parse config file: %s", 
2109                                         err ? err->message : "Unknown error");
2110                 }
2111         }
2112         if(serve) {
2113                 g_warning("Specifying an export on the command line is deprecated.");
2114                 g_warning("Please use a configuration file instead.");
2115         }
2116
2117         if((!serve) && (!servers||!servers->len)) {
2118                 g_message("No configured exports; quitting.");
2119                 exit(EXIT_FAILURE);
2120         }
2121         daemonize(serve);
2122         setup_servers(servers);
2123         dousers();
2124         serveloop(servers);
2125         return 0 ;
2126 }