2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <sys/select.h> /* select */
66 #include <sys/wait.h> /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h> /* For BLKGETSIZE */
74 #include <signal.h> /* sigaction */
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
85 #include <arpa/inet.h>
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
100 #include <sdp_inet.h>
103 /** Default position of the config file */
105 #define SYSCONFDIR "/etc"
107 #define CFILE SYSCONFDIR "/nbd-server/config"
109 /** Where our config file actually is */
110 gchar* config_file_pos;
112 /** What user we're running as */
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
119 /* Whether we should avoid forking */
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
133 /* Debugging macros */
136 #define DEBUG(...) printf(__VA_ARGS__)
140 #ifndef PACKAGE_VERSION
141 #define PACKAGE_VERSION ""
144 * The highest value a variable of type off_t can reach. This is a signed
145 * integer, so set all bits except for the leftmost one.
147 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
148 #define LINELEN 256 /**< Size of static buffer used to read the
149 authorization file (yuck) */
150 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
151 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
152 #define F_READONLY 1 /**< flag to tell us a file is readonly */
153 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
154 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
156 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
157 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
158 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
159 #define F_SYNC 64 /**< Whether to fsync() after a write */
160 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
161 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
162 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
163 GHashTable *children;
164 char pidfname[256]; /**< name of our PID file */
165 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
166 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
168 int modernsock=0; /**< Socket for the modern handler. Not used
169 if a client was only specified on the
170 command line; only port used if
171 oldstyle is set to false (and then the
172 command-line client isn't used, gna gna) */
173 char* modern_listen; /**< listenaddr value for modernsock */
176 * Types of virtuatlization
179 VIRT_NONE=0, /**< No virtualization */
180 VIRT_IPLIT, /**< Literal IP address as part of the filename */
181 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
182 doing the same as in IPLIT */
183 VIRT_CIDR, /**< Every subnet in its own directory */
187 * Variables associated with a server.
190 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
191 off_t expected_size; /**< size of the exported file as it was told to
192 us through configuration */
193 gchar* listenaddr; /**< The IP address we're listening on */
194 unsigned int port; /**< port we're exporting this file at */
195 char* authname; /**< filename of the authorization file */
196 int flags; /**< flags associated with this exported file */
197 int socket; /**< The socket of this server. */
198 int socket_family; /**< family of the socket */
199 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
200 uint8_t cidrlen; /**< The length of the mask when we use
201 CIDR-style virtualization */
202 gchar* prerun; /**< command to be ran after connecting a client,
203 but before starting to serve */
204 gchar* postrun; /**< command that will be ran after the client
206 gchar* servename; /**< name of the export as selected by nbd-client */
207 int max_connections; /**< maximum number of opened connections */
208 gchar* transactionlog;/**< filename for transaction log */
212 * Variables associated with a client socket.
215 int fhandle; /**< file descriptor */
216 off_t startoff; /**< starting offset of this file */
220 off_t exportsize; /**< size of the file we're exporting */
221 char *clientname; /**< peer */
222 char *exportname; /**< (processed) filename of the file we're exporting */
223 GArray *export; /**< array of FILE_INFO of exported files;
224 array size is always 1 unless we're
225 doing the multiple file option */
226 int net; /**< The actual client socket */
227 SERVER *server; /**< The server this client is getting data from */
228 char* difffilename; /**< filename of the copy-on-write file, if any */
229 int difffile; /**< filedescriptor of copyonwrite file. @todo
230 shouldn't this be an array too? (cfr export) Or
231 make -m and -c mutually exclusive */
232 u32 difffilelen; /**< number of pages in difffile */
233 u32 *difmap; /**< see comment on the global difmap for this one */
234 gboolean modern; /**< client was negotiated using modern negotiation protocol */
235 int transactionlogfd;/**< fd for transaction log */
239 * Type of configuration file values
242 PARAM_INT, /**< This parameter is an integer */
243 PARAM_STRING, /**< This parameter is a string */
244 PARAM_BOOL, /**< This parameter is a boolean */
248 * Configuration file values
251 gchar *paramname; /**< Name of the parameter, as it appears in
253 gboolean required; /**< Whether this is a required (as opposed to
254 optional) parameter */
255 PARAM_TYPE ptype; /**< Type of the parameter. */
256 gpointer target; /**< Pointer to where the data of this
257 parameter should be written. If ptype is
258 PARAM_BOOL, the data is or'ed rather than
260 gint flagval; /**< Flag mask for this parameter in case ptype
265 * Check whether a client is allowed to connect. Works with an authorization
266 * file which contains one line per machine, no wildcards.
268 * @param opts The client who's trying to connect.
269 * @return 0 - authorization refused, 1 - OK
271 int authorized_client(CLIENT *opts) {
272 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
277 struct in_addr client;
278 struct in_addr cltemp;
281 if ((f=fopen(opts->server->authname,"r"))==NULL) {
282 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
283 opts->server->authname,strerror(errno)) ;
287 inet_aton(opts->clientname, &client);
288 while (fgets(line,LINELEN,f)!=NULL) {
289 if((tmp=index(line, '/'))) {
290 if(strlen(line)<=tmp-line) {
291 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
295 if(!inet_aton(line,&addr)) {
296 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
299 len=strtol(tmp, NULL, 0);
300 addr.s_addr>>=32-len;
301 addr.s_addr<<=32-len;
302 memcpy(&cltemp,&client,sizeof(client));
303 cltemp.s_addr>>=32-len;
304 cltemp.s_addr<<=32-len;
305 if(addr.s_addr == cltemp.s_addr) {
309 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
319 * Read data from a file descriptor into a buffer
321 * @param f a file descriptor
322 * @param buf a buffer
323 * @param len the number of bytes to be read
325 static inline void readit(int f, void *buf, size_t len) {
329 if ((res = read(f, buf, len)) <= 0) {
330 if(errno != EAGAIN) {
331 err("Read failed: %m");
341 * Write data from a buffer into a filedescriptor
343 * @param f a file descriptor
344 * @param buf a buffer containing data
345 * @param len the number of bytes to be written
347 static inline void writeit(int f, void *buf, size_t len) {
351 if ((res = write(f, buf, len)) <= 0)
352 err("Send failed: %m");
359 * Print out a message about how to use nbd-server. Split out to a separate
360 * function so that we can call it from multiple places
363 printf("This is nbd-server version " VERSION "\n");
364 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
365 "\t-r|--read-only\t\tread only\n"
366 "\t-m|--multi-file\t\tmultiple file\n"
367 "\t-c|--copy-on-write\tcopy on write\n"
368 "\t-C|--config-file\tspecify an alternate configuration file\n"
369 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
370 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
371 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
372 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
373 "\tif port is set to 0, stdin is used (for running from inetd)\n"
374 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
375 "\t\taddress of the machine trying to connect\n"
376 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
377 printf("Using configuration file %s\n", CFILE);
380 /* Dumps a config file section of the given SERVER*, and exits. */
381 void dump_section(SERVER* serve, gchar* section_header) {
382 printf("[%s]\n", section_header);
383 printf("\texportname = %s\n", serve->exportname);
384 printf("\tlistenaddr = %s\n", serve->listenaddr);
385 printf("\tport = %d\n", serve->port);
386 if(serve->flags & F_READONLY) {
387 printf("\treadonly = true\n");
389 if(serve->flags & F_MULTIFILE) {
390 printf("\tmultifile = true\n");
392 if(serve->flags & F_COPYONWRITE) {
393 printf("\tcopyonwrite = true\n");
395 if(serve->expected_size) {
396 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
398 if(serve->authname) {
399 printf("\tauthfile = %s\n", serve->authname);
405 * Parse the command line.
407 * @param argc the argc argument to main()
408 * @param argv the argv argument to main()
410 SERVER* cmdline(int argc, char *argv[]) {
414 struct option long_options[] = {
415 {"read-only", no_argument, NULL, 'r'},
416 {"multi-file", no_argument, NULL, 'm'},
417 {"copy-on-write", no_argument, NULL, 'c'},
418 {"dont-fork", no_argument, NULL, 'd'},
419 {"authorize-file", required_argument, NULL, 'l'},
420 {"config-file", required_argument, NULL, 'C'},
421 {"pid-file", required_argument, NULL, 'p'},
422 {"output-config", required_argument, NULL, 'o'},
423 {"max-connection", required_argument, NULL, 'M'},
430 gboolean do_output=FALSE;
431 gchar* section_header="";
437 serve=g_new0(SERVER, 1);
438 serve->authname = g_strdup(default_authname);
439 serve->virtstyle=VIRT_IPLIT;
440 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
443 /* non-option argument */
444 switch(nonspecial++) {
446 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
447 addr_port=g_strsplit(optarg, ":", 2);
449 /* Check for "@" - maybe user using this separator
452 g_strfreev(addr_port);
453 addr_port=g_strsplit(optarg, "@", 2);
456 addr_port=g_strsplit(optarg, "@", 2);
460 serve->port=strtol(addr_port[1], NULL, 0);
461 serve->listenaddr=g_strdup(addr_port[0]);
463 serve->listenaddr=NULL;
464 serve->port=strtol(addr_port[0], NULL, 0);
466 g_strfreev(addr_port);
469 serve->exportname = g_strdup(optarg);
470 if(serve->exportname[0] != '/') {
471 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
476 last=strlen(optarg)-1;
478 if (suffix == 'k' || suffix == 'K' ||
479 suffix == 'm' || suffix == 'M')
481 es = (off_t)atoll(optarg);
489 serve->expected_size = es;
494 serve->flags |= F_READONLY;
497 serve->flags |= F_MULTIFILE;
501 section_header = g_strdup(optarg);
504 strncpy(pidftemplate, optarg, 256);
507 serve->flags |=F_COPYONWRITE;
513 g_free(config_file_pos);
514 config_file_pos=g_strdup(optarg);
517 g_free(serve->authname);
518 serve->authname=g_strdup(optarg);
521 serve->max_connections = strtol(optarg, NULL, 0);
529 /* What's left: the port to export, the name of the to be exported
530 * file, and, optionally, the size of the file, in that order. */
539 g_critical("Need a complete configuration on the command line to output a config file section!");
542 dump_section(serve, section_header);
548 * Error codes for config file parsing
551 CFILE_NOTFOUND, /**< The configuration file is not found */
552 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
553 CFILE_KEY_MISSING, /**< A (required) key is missing */
554 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
555 CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
556 CFILE_PROGERR, /**< Programmer error */
557 CFILE_NO_EXPORTS, /**< A config file was specified that does not
558 define any exports */
559 CFILE_INCORRECT_PORT, /**< The reserved port was specified for an
564 * Remove a SERVER from memory. Used from the hash table
566 void remove_server(gpointer s) {
570 g_free(server->exportname);
572 g_free(server->authname);
573 if(server->listenaddr)
574 g_free(server->listenaddr);
576 g_free(server->prerun);
578 g_free(server->postrun);
579 if(server->transactionlog)
580 g_free(server->transactionlog);
586 * @param s the old server we want to duplicate
587 * @return new duplicated server
589 SERVER* dup_serve(SERVER *s) {
590 SERVER *serve = NULL;
592 serve=g_new0(SERVER, 1);
597 serve->exportname = g_strdup(s->exportname);
599 serve->expected_size = s->expected_size;
602 serve->listenaddr = g_strdup(s->listenaddr);
604 serve->port = s->port;
607 serve->authname = strdup(s->authname);
609 serve->flags = s->flags;
610 serve->socket = s->socket;
611 serve->socket_family = s->socket_family;
612 serve->virtstyle = s->virtstyle;
613 serve->cidrlen = s->cidrlen;
616 serve->prerun = g_strdup(s->prerun);
619 serve->postrun = g_strdup(s->postrun);
621 if(s->transactionlog)
622 serve->transactionlog = g_strdup(s->transactionlog);
625 serve->servename = g_strdup(s->servename);
627 serve->max_connections = s->max_connections;
633 * append new server to array
635 * @param a server array
636 * @return 0 success, -1 error
638 int append_serve(SERVER *s, GArray *a) {
640 struct addrinfo hints;
641 struct addrinfo *ai = NULL;
642 struct addrinfo *rp = NULL;
643 char host[NI_MAXHOST];
649 err("Invalid parsing server");
653 port = g_strdup_printf("%d", s->port);
655 memset(&hints,'\0',sizeof(hints));
656 hints.ai_family = AF_UNSPEC;
657 hints.ai_socktype = SOCK_STREAM;
658 hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
659 hints.ai_protocol = IPPROTO_TCP;
661 e = getaddrinfo(s->listenaddr, port, &hints, &ai);
667 for (rp = ai; rp != NULL; rp = rp->ai_next) {
668 e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
670 if (e != 0) { // error
671 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
675 // duplicate server and set listenaddr to resolved IP address
678 ns->listenaddr = g_strdup(host);
679 ns->socket_family = rp->ai_family;
680 g_array_append_val(a, *ns);
688 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
699 * Parse the config file.
701 * @param f the name of the config file
702 * @param e a GError. @see CFILE_ERRORS for what error values this function can
704 * @return a Array of SERVER* pointers, If the config file is empty or does not
705 * exist, returns an empty GHashTable; if the config file contains an
706 * error, returns NULL, and e is set appropriately
708 GArray* parse_cfile(gchar* f, GError** e) {
709 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
710 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
712 gchar *virtstyle=NULL;
714 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
715 { "port", TRUE, PARAM_INT, &(s.port), 0 },
716 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
717 { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
718 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
719 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
720 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
721 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
722 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
723 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
724 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
725 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
726 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
727 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
728 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
729 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
730 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
731 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
732 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
734 const int lp_size=sizeof(lp)/sizeof(PARAM);
736 { "user", FALSE, PARAM_STRING, &runuser, 0 },
737 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
738 { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
739 { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
742 int p_size=sizeof(gp)/sizeof(PARAM);
745 const char *err_msg=NULL;
754 errdomain = g_quark_from_string("parse_cfile");
755 cfile = g_key_file_new();
756 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
757 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
758 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
759 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
760 g_key_file_free(cfile);
763 startgroup = g_key_file_get_start_group(cfile);
764 if(!startgroup || strcmp(startgroup, "generic")) {
765 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
766 g_key_file_free(cfile);
769 groups = g_key_file_get_groups(cfile, NULL);
770 for(i=0;groups[i];i++) {
771 memset(&s, '\0', sizeof(SERVER));
773 /* After the [generic] group, start parsing exports */
778 for(j=0;j<p_size;j++) {
779 g_assert(p[j].target != NULL);
780 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
783 *((gint*)p[j].target) =
784 g_key_file_get_integer(cfile,
790 *((gchar**)p[j].target) =
791 g_key_file_get_string(cfile,
797 value = g_key_file_get_boolean(cfile,
799 p[j].paramname, &err);
802 *((gint*)p[j].target) |= p[j].flagval;
804 *((gint*)p[j].target) &= ~(p[j].flagval);
809 if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
810 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
811 g_key_file_free(cfile);
815 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
817 /* Ignore not-found error for optional values */
821 err_msg = MISSING_REQUIRED_ERROR;
824 err_msg = DEFAULT_ERROR;
826 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
827 g_array_free(retval, TRUE);
829 g_key_file_free(cfile);
834 if(!strncmp(virtstyle, "none", 4)) {
835 s.virtstyle=VIRT_NONE;
836 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
837 s.virtstyle=VIRT_IPLIT;
838 } else if(!strncmp(virtstyle, "iphash", 6)) {
839 s.virtstyle=VIRT_IPHASH;
840 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
841 s.virtstyle=VIRT_CIDR;
842 if(strlen(virtstyle)<10) {
843 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
844 g_array_free(retval, TRUE);
845 g_key_file_free(cfile);
848 s.cidrlen=strtol(virtstyle+8, NULL, 0);
850 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
851 g_array_free(retval, TRUE);
852 g_key_file_free(cfile);
855 if(s.port && !do_oldstyle) {
856 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
857 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
860 s.virtstyle=VIRT_IPLIT;
862 /* Don't need to free this, it's not our string */
864 /* Don't append values for the [generic] group */
866 s.socket_family = AF_UNSPEC;
867 s.servename = groups[i];
869 append_serve(&s, retval);
876 if(s.flags & F_SDP) {
877 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
878 g_array_free(retval, TRUE);
879 g_key_file_free(cfile);
885 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
887 g_key_file_free(cfile);
892 * Signal handler for SIGCHLD
893 * @param s the signal we're handling (must be SIGCHLD, or something
896 void sigchld_handler(int s) {
901 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
902 if(WIFEXITED(status)) {
903 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
905 i=g_hash_table_lookup(children, &pid);
907 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
909 DEBUG("Removing %d from the list of children", pid);
910 g_hash_table_remove(children, &pid);
916 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
919 * @param value the value corresponding to the above key
920 * @param user_data a pointer which we always set to 1, so that we know what
923 void killchild(gpointer key, gpointer value, gpointer user_data) {
925 int *parent=user_data;
932 * Handle SIGTERM and dispatch it to our children
933 * @param s the signal we're handling (must be SIGTERM, or something
934 * is severely wrong).
936 void sigterm_handler(int s) {
939 g_hash_table_foreach(children, killchild, &parent);
949 * Detect the size of a file.
951 * @param fhandle An open filedescriptor
952 * @return the size of the file, or OFFT_MAX if detection was
955 off_t size_autodetect(int fhandle) {
958 struct stat stat_buf;
961 #ifdef HAVE_SYS_MOUNT_H
962 #ifdef HAVE_SYS_IOCTL_H
964 DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
965 if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
968 #endif /* BLKGETSIZE64 */
969 #endif /* HAVE_SYS_IOCTL_H */
970 #endif /* HAVE_SYS_MOUNT_H */
972 DEBUG("looking for fhandle size with fstat\n");
973 stat_buf.st_size = 0;
974 error = fstat(fhandle, &stat_buf);
976 if(stat_buf.st_size > 0)
977 return (off_t)stat_buf.st_size;
979 err("fstat failed: %m");
982 DEBUG("looking for fhandle size with lseek SEEK_END\n");
983 es = lseek(fhandle, (off_t)0, SEEK_END);
984 if (es > ((off_t)0)) {
987 DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
990 err("Could not find size of exported block device: %m");
995 * Get the file handle and offset, given an export offset.
997 * @param export An array of export files
998 * @param a The offset to get corresponding file/offset for
999 * @param fhandle [out] File descriptor
1000 * @param foffset [out] Offset into fhandle
1001 * @param maxbytes [out] Tells how many bytes can be read/written
1002 * from fhandle starting at foffset (0 if there is no limit)
1003 * @return 0 on success, -1 on failure
1005 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1006 /* Negative offset not allowed */
1010 /* Binary search for last file with starting offset <= a */
1013 int end = export->len - 1;
1014 while( start <= end ) {
1015 int mid = (start + end) / 2;
1016 fi = g_array_index(export, FILE_INFO, mid);
1017 if( fi.startoff < a ) {
1019 } else if( fi.startoff > a ) {
1027 /* end should never go negative, since first startoff is 0 and a >= 0 */
1030 fi = g_array_index(export, FILE_INFO, end);
1031 *fhandle = fi.fhandle;
1032 *foffset = a - fi.startoff;
1034 if( end+1 < export->len ) {
1035 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1036 *maxbytes = fi_next.startoff - a;
1043 * seek to a position in a file, with error handling.
1044 * @param handle a filedescriptor
1045 * @param a position to seek to
1046 * @todo get rid of this; lastpoint is a global variable right now, but it
1047 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1050 void myseek(int handle,off_t a) {
1051 if (lseek(handle, a, SEEK_SET) < 0) {
1052 err("Can not seek locally!\n");
1057 * Write an amount of bytes at a given offset to the right file. This
1058 * abstracts the write-side of the multiple file option.
1060 * @param a The offset where the write should start
1061 * @param buf The buffer to write from
1062 * @param len The length of buf
1063 * @param client The client we're serving for
1064 * @return The number of bytes actually written, or -1 in case of an error
1066 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1072 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1074 if(maxbytes && len > maxbytes)
1077 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1079 myseek(fhandle, foffset);
1080 retval = write(fhandle, buf, len);
1081 if(client->server->flags & F_SYNC) {
1085 /* This is where we would do the following
1086 * #ifdef USE_SYNC_FILE_RANGE
1087 * However, we don't, for the reasons set out below
1088 * by Christoph Hellwig <hch@infradead.org>
1091 * fdatasync is equivalent to fsync except that it does not flush
1092 * non-essential metadata (basically just timestamps in practice), but it
1093 * does flush metadata requried to find the data again, e.g. allocation
1094 * information and extent maps. sync_file_range does nothing but flush
1095 * out pagecache content - it means you basically won't get your data
1096 * back in case of a crash if you either:
1098 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1099 * b) are using a sparse file on a filesystem
1100 * c) are using a fallocate-preallocated file on a filesystem
1101 * d) use any file on a COW filesystem like btrfs
1103 * e.g. it only does anything useful for you if you do not have a volatile
1104 * write cache, and either use a raw block device node, or just overwrite
1105 * an already fully allocated (and not preallocated) file on a non-COW
1109 * What we should do is open a second FD with O_DSYNC set, then write to
1110 * that when appropriate. However, with a Linux client, every REQ_FUA
1111 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1116 sync_file_range(fhandle, foffset, len,
1117 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1118 SYNC_FILE_RANGE_WAIT_AFTER);
1127 * Call rawexpwrite repeatedly until all data has been written.
1128 * @return 0 on success, nonzero on failure
1130 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1133 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1138 return (ret < 0 || len != 0);
1142 * Read an amount of bytes at a given offset from the right file. This
1143 * abstracts the read-side of the multiple files option.
1145 * @param a The offset where the read should start
1146 * @param buf A buffer to read into
1147 * @param len The size of buf
1148 * @param client The client we're serving for
1149 * @return The number of bytes actually read, or -1 in case of an
1152 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1157 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1159 if(maxbytes && len > maxbytes)
1162 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1164 myseek(fhandle, foffset);
1165 return read(fhandle, buf, len);
1169 * Call rawexpread repeatedly until all data has been read.
1170 * @return 0 on success, nonzero on failure
1172 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1175 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1180 return (ret < 0 || len != 0);
1184 * Read an amount of bytes at a given offset from the right file. This
1185 * abstracts the read-side of the copyonwrite stuff, and calls
1186 * rawexpread() with the right parameters to do the actual work.
1187 * @param a The offset where the read should start
1188 * @param buf A buffer to read into
1189 * @param len The size of buf
1190 * @param client The client we're going to read for
1191 * @return 0 on success, nonzero on failure
1193 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1194 off_t rdlen, offset;
1195 off_t mapcnt, mapl, maph, pagestart;
1197 if (!(client->server->flags & F_COPYONWRITE))
1198 return(rawexpread_fully(a, buf, len, client));
1199 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1201 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1203 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1204 pagestart=mapcnt*DIFFPAGESIZE;
1206 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1207 len : (size_t)DIFFPAGESIZE-offset;
1208 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1209 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1210 (unsigned long)(client->difmap[mapcnt]));
1211 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1212 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1213 } else { /* the block is not there */
1214 DEBUG("Page %llu is not here, we read the original one\n",
1215 (unsigned long long)mapcnt);
1216 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1218 len-=rdlen; a+=rdlen; buf+=rdlen;
1224 * Write an amount of bytes at a given offset to the right file. This
1225 * abstracts the write-side of the copyonwrite option, and calls
1226 * rawexpwrite() with the right parameters to do the actual work.
1228 * @param a The offset where the write should start
1229 * @param buf The buffer to write from
1230 * @param len The length of buf
1231 * @param client The client we're going to write for.
1232 * @return 0 on success, nonzero on failure
1234 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1235 char pagebuf[DIFFPAGESIZE];
1236 off_t mapcnt,mapl,maph;
1241 if (!(client->server->flags & F_COPYONWRITE))
1242 return(rawexpwrite_fully(a, buf, len, client, fua));
1243 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1245 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1247 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1248 pagestart=mapcnt*DIFFPAGESIZE ;
1249 offset=a-pagestart ;
1250 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1251 len : (size_t)DIFFPAGESIZE-offset;
1253 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1254 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1255 (unsigned long)(client->difmap[mapcnt])) ;
1256 myseek(client->difffile,
1257 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1258 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1259 } else { /* the block is not there */
1260 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1261 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1262 DEBUG("Page %llu is not here, we put it at %lu\n",
1263 (unsigned long long)mapcnt,
1264 (unsigned long)(client->difmap[mapcnt]));
1265 rdlen=DIFFPAGESIZE ;
1266 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1268 memcpy(pagebuf+offset,buf,wrlen) ;
1269 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1273 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1275 if (client->server->flags & F_SYNC) {
1276 fsync(client->difffile);
1278 /* open question: would it be cheaper to do multiple sync_file_ranges?
1279 as we iterate through the above?
1281 fdatasync(client->difffile);
1286 int expflush(CLIENT *client) {
1289 if (client->server->flags & F_COPYONWRITE) {
1290 return fsync(client->difffile);
1293 for (i = 0; i < client->export->len; i++) {
1294 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1295 if (fsync(fi.fhandle) < 0)
1303 * Do the initial negotiation.
1305 * @param client The client we're negotiating with.
1307 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1310 uint32_t flags = NBD_FLAG_HAS_FLAGS;
1311 uint16_t smallflags = 0;
1314 memset(zeros, '\0', sizeof(zeros));
1315 if(!client || !client->modern) {
1317 if (write(net, INIT_PASSWD, 8) < 0) {
1318 err_nonfatal("Negotiation failed: %m");
1322 if(!client || client->modern) {
1324 magic = htonll(opts_magic);
1327 magic = htonll(cliserv_magic);
1329 if (write(net, &magic, sizeof(magic)) < 0) {
1330 err_nonfatal("Negotiation failed: %m");
1344 err("programmer error");
1345 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1346 err("Negotiation failed: %m");
1347 if (read(net, &reserved, sizeof(reserved)) < 0)
1348 err("Negotiation failed: %m");
1349 if (read(net, &magic, sizeof(magic)) < 0)
1350 err("Negotiation failed: %m");
1351 magic = ntohll(magic);
1352 if(magic != opts_magic) {
1356 if (read(net, &opt, sizeof(opt)) < 0)
1357 err("Negotiation failed: %m");
1359 if(opt != NBD_OPT_EXPORT_NAME) {
1363 if (read(net, &namelen, sizeof(namelen)) < 0)
1364 err("Negotiation failed: %m");
1365 namelen = ntohl(namelen);
1366 name = malloc(namelen+1);
1368 if (read(net, name, namelen) < 0)
1369 err("Negotiation failed: %m");
1370 for(i=0; i<servers->len; i++) {
1371 SERVER* serve = &(g_array_index(servers, SERVER, i));
1372 if(!strcmp(serve->servename, name)) {
1373 CLIENT* client = g_new0(CLIENT, 1);
1374 client->server = serve;
1375 client->exportsize = OFFT_MAX;
1377 client->modern = TRUE;
1378 client->transactionlogfd = -1;
1387 size_host = htonll((u64)(client->exportsize));
1388 if (write(net, &size_host, 8) < 0)
1389 err("Negotiation failed: %m");
1390 if (client->server->flags & F_READONLY)
1391 flags |= NBD_FLAG_READ_ONLY;
1392 if (client->server->flags & F_FLUSH)
1393 flags |= NBD_FLAG_SEND_FLUSH;
1394 if (client->server->flags & F_FUA)
1395 flags |= NBD_FLAG_SEND_FUA;
1396 if (client->server->flags & F_ROTATIONAL)
1397 flags |= NBD_FLAG_ROTATIONAL;
1398 if (!client->modern) {
1400 flags = htonl(flags);
1401 if (write(client->net, &flags, 4) < 0)
1402 err("Negotiation failed: %m");
1405 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1406 smallflags = htons(smallflags);
1407 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1408 err("Negotiation failed: %m");
1412 if (write(client->net, zeros, 124) < 0)
1413 err("Negotiation failed: %m");
1417 /** sending macro. */
1418 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1419 if (client->transactionlogfd != -1) \
1420 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1422 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1424 * Serve a file to a single client.
1426 * @todo This beast needs to be split up in many tiny little manageable
1427 * pieces. Preferably with a chainsaw.
1429 * @param client The client we're going to serve to.
1430 * @return when the client disconnects
1432 int mainloop(CLIENT *client) {
1433 struct nbd_request request;
1434 struct nbd_reply reply;
1435 gboolean go_on=TRUE;
1439 negotiate(client->net, client, NULL);
1440 DEBUG("Entering request loop!\n");
1441 reply.magic = htonl(NBD_REPLY_MAGIC);
1454 readit(client->net, &request, sizeof(request));
1455 if (client->transactionlogfd != -1)
1456 writeit(client->transactionlogfd, &request, sizeof(request));
1458 request.from = ntohll(request.from);
1459 request.type = ntohl(request.type);
1460 command = request.type & NBD_CMD_MASK_COMMAND;
1462 if (command==NBD_CMD_DISC) {
1463 msg2(LOG_INFO, "Disconnect request received.");
1464 if (client->server->flags & F_COPYONWRITE) {
1465 if (client->difmap) g_free(client->difmap) ;
1466 close(client->difffile);
1467 unlink(client->difffilename);
1468 free(client->difffilename);
1474 len = ntohl(request.len);
1476 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1477 err("Not enough magic.");
1478 if (len > BUFSIZE - sizeof(struct nbd_reply)) {
1479 currlen = BUFSIZE - sizeof(struct nbd_reply);
1480 msg2(LOG_INFO, "oversized request (this is not a problem)");
1484 DEBUG("%s from %llu (%llu) len %d, ", command ? "WRITE" :
1485 "READ", (unsigned long long)request.from,
1486 (unsigned long long)request.from / 512, (unsigned int)len);
1487 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1489 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1490 if ((request.from + len) > (OFFT_MAX)) {
1491 DEBUG("[Number too large!]");
1492 ERROR(client, reply, EINVAL);
1496 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1498 ERROR(client, reply, EINVAL);
1503 if (command==NBD_CMD_WRITE) {
1504 DEBUG("wr: net->buf, ");
1506 readit(client->net, buf, currlen);
1507 DEBUG("buf->exp, ");
1508 if ((client->server->flags & F_READONLY) ||
1509 (client->server->flags & F_AUTOREADONLY)) {
1510 DEBUG("[WRITE to READONLY!]");
1511 ERROR(client, reply, EPERM);
1514 if (expwrite(request.from, buf, len, client,
1515 request.type & NBD_CMD_FLAG_FUA)) {
1516 DEBUG("Write failed: %m" );
1517 ERROR(client, reply, errno);
1520 SEND(client->net, reply);
1523 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1528 if (command==NBD_CMD_FLUSH) {
1530 if (expflush(client)) {
1531 DEBUG("Flush failed: %m");
1532 ERROR(client, reply, errno);
1535 SEND(client->net, reply);
1540 if (command==NBD_CMD_READ) {
1541 DEBUG("exp->buf, ");
1542 memcpy(buf, &reply, sizeof(struct nbd_reply));
1543 if (client->transactionlogfd != -1)
1544 writeit(client->transactionlogfd, &reply, sizeof(reply));
1545 p = buf + sizeof(struct nbd_reply);
1546 writelen = currlen + sizeof(struct nbd_reply);
1548 if (expread(request.from, p, currlen, client)) {
1549 DEBUG("Read failed: %m");
1550 ERROR(client, reply, errno);
1554 DEBUG("buf->net, ");
1555 writeit(client->net, buf, writelen);
1557 request.from += currlen;
1558 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1566 DEBUG ("Ignoring unknown command\n");
1572 * Set up client export array, which is an array of FILE_INFO.
1573 * Also, split a single exportfile into multiple ones, if that was asked.
1574 * @param client information on the client which we want to setup export for
1576 void setupexport(CLIENT* client) {
1578 off_t laststartoff = 0, lastsize = 0;
1579 int multifile = (client->server->flags & F_MULTIFILE);
1581 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1583 /* If multi-file, open as many files as we can.
1584 * If not, open exactly one file.
1585 * Calculate file sizes as we go to get total size. */
1589 gchar* error_string;
1590 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1593 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1595 tmpname=g_strdup(client->exportname);
1597 DEBUG( "Opening %s\n", tmpname );
1598 fi.fhandle = open(tmpname, mode);
1599 if(fi.fhandle == -1 && mode == O_RDWR) {
1600 /* Try again because maybe media was read-only */
1601 fi.fhandle = open(tmpname, O_RDONLY);
1602 if(fi.fhandle != -1) {
1603 /* Opening the base file in copyonwrite mode is
1605 if(!(client->server->flags & F_COPYONWRITE)) {
1606 client->server->flags |= F_AUTOREADONLY;
1607 client->server->flags |= F_READONLY;
1611 if(fi.fhandle == -1) {
1612 if(multifile && i>0)
1614 error_string=g_strdup_printf(
1615 "Could not open exported file %s: %%m",
1619 fi.startoff = laststartoff + lastsize;
1620 g_array_append_val(client->export, fi);
1623 /* Starting offset and size of this file will be used to
1624 * calculate starting offset of next file */
1625 laststartoff = fi.startoff;
1626 lastsize = size_autodetect(fi.fhandle);
1632 /* Set export size to total calculated size */
1633 client->exportsize = laststartoff + lastsize;
1635 /* Export size may be overridden */
1636 if(client->server->expected_size) {
1637 /* desired size must be <= total calculated size */
1638 if(client->server->expected_size > client->exportsize) {
1639 err("Size of exported file is too big\n");
1642 client->exportsize = client->server->expected_size;
1645 msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1647 msg3(LOG_INFO, "Total number of files: %d", i);
1651 int copyonwrite_prepare(CLIENT* client) {
1653 if ((client->difffilename = malloc(1024))==NULL)
1654 err("Failed to allocate string for diff file name");
1655 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1657 client->difffilename[1023]='\0';
1658 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1659 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1660 if (client->difffile<0) err("Could not create diff file (%m)") ;
1661 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1662 err("Could not allocate memory") ;
1663 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1669 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1672 * @param command the command to be ran. Read from the config file
1673 * @param file the file name we're about to export
1675 int do_run(gchar* command, gchar* file) {
1679 if(command && *command) {
1680 cmd = g_strdup_printf(command, file);
1688 * Serve a connection.
1690 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1691 * follow the road map.
1693 * @param client a connected client
1695 void serveconnection(CLIENT *client) {
1696 if (client->server->transactionlog && (client->transactionlogfd == -1))
1698 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1700 S_IRUSR | S_IWUSR)))
1701 g_warning("Could not open transaction log %s",
1702 client->server->transactionlog);
1705 if(do_run(client->server->prerun, client->exportname)) {
1708 setupexport(client);
1710 if (client->server->flags & F_COPYONWRITE) {
1711 copyonwrite_prepare(client);
1714 setmysockopt(client->net);
1717 do_run(client->server->postrun, client->exportname);
1719 if (-1 != client->transactionlogfd)
1721 close(client->transactionlogfd);
1722 client->transactionlogfd = -1;
1727 * Find the name of the file we have to serve. This will use g_strdup_printf
1728 * to put the IP address of the client inside a filename containing
1729 * "%s" (in the form as specified by the "virtstyle" option). That name
1730 * is then written to client->exportname.
1732 * @param net A socket connected to an nbd client
1733 * @param client information about the client. The IP address in human-readable
1734 * format will be written to a new char* buffer, the address of which will be
1735 * stored in client->clientname.
1737 void set_peername(int net, CLIENT *client) {
1738 struct sockaddr_storage addrin;
1739 struct sockaddr_storage netaddr;
1740 struct sockaddr_in *netaddr4 = NULL;
1741 struct sockaddr_in6 *netaddr6 = NULL;
1742 size_t addrinlen = sizeof( addrin );
1743 struct addrinfo hints;
1744 struct addrinfo *ai = NULL;
1745 char peername[NI_MAXHOST];
1746 char netname[NI_MAXHOST];
1752 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1753 err("getsockname failed: %m");
1755 getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1756 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1758 memset(&hints, '\0', sizeof (hints));
1759 hints.ai_flags = AI_ADDRCONFIG;
1760 e = getaddrinfo(peername, NULL, &hints, &ai);
1763 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1768 switch(client->server->virtstyle) {
1770 client->exportname=g_strdup(client->server->exportname);
1773 for(i=0;i<strlen(peername);i++) {
1774 if(peername[i]=='.') {
1779 client->exportname=g_strdup_printf(client->server->exportname, peername);
1782 memcpy(&netaddr, &addrin, addrinlen);
1783 if(ai->ai_family == AF_INET) {
1784 netaddr4 = (struct sockaddr_in *)&netaddr;
1785 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1786 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1788 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1789 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1790 tmp=g_strdup_printf("%s/%s", netname, peername);
1791 }else if(ai->ai_family == AF_INET6) {
1792 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1794 shift = 128-(client->server->cidrlen);
1796 while(shift >= 32) {
1797 ((netaddr6->sin6_addr).s6_addr32[i])=0;
1801 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1802 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1804 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1805 netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1806 tmp=g_strdup_printf("%s/%s", netname, peername);
1810 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1816 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1817 peername, client->exportname);
1818 client->clientname=g_strdup(peername);
1823 * @param data a pointer to pid_t which should be freed
1825 void destroy_pid_t(gpointer data) {
1830 * Loop through the available servers, and serve them. Never returns.
1832 int serveloop(GArray* servers) {
1833 struct sockaddr_storage addrin;
1834 socklen_t addrinlen=sizeof(addrin);
1842 * Set up the master fd_set. The set of descriptors we need
1843 * to select() for never changes anyway and it buys us a *lot*
1844 * of time to only build this once. However, if we ever choose
1845 * to not fork() for clients anymore, we may have to revisit
1850 for(i=0;i<servers->len;i++) {
1851 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1852 FD_SET(sock, &mset);
1853 max=sock>max?sock:max;
1857 FD_SET(modernsock, &mset);
1858 max=modernsock>max?modernsock:max;
1861 CLIENT *client = NULL;
1864 memcpy(&rset, &mset, sizeof(fd_set));
1865 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1870 if(FD_ISSET(modernsock, &rset)) {
1871 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1873 client = negotiate(net, NULL, servers);
1875 err_nonfatal("negotiation failed");
1880 serve = client->server;
1882 for(i=0;i<servers->len && !net;i++) {
1883 serve=&(g_array_index(servers, SERVER, i));
1884 if(FD_ISSET(serve->socket, &rset)) {
1885 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1892 if(serve->max_connections > 0 &&
1893 g_hash_table_size(children) >= serve->max_connections) {
1894 msg2(LOG_INFO, "Max connections reached");
1898 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1899 err("fcntl F_GETFL");
1901 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1902 err("fcntl F_SETFL ~O_NONBLOCK");
1905 client = g_new0(CLIENT, 1);
1906 client->server=serve;
1907 client->exportsize=OFFT_MAX;
1909 client->transactionlogfd = -1;
1911 set_peername(net, client);
1912 if (!authorized_client(client)) {
1913 msg2(LOG_INFO,"Unauthorized client") ;
1917 msg2(LOG_INFO,"Authorized client") ;
1918 pid=g_malloc(sizeof(pid_t));
1921 if ((*pid=fork())<0) {
1922 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1926 if (*pid>0) { /* parent */
1928 g_hash_table_insert(children, pid, pid);
1932 g_hash_table_destroy(children);
1933 for(i=0;i<servers->len;i++) {
1934 serve=&g_array_index(servers, SERVER, i);
1935 close(serve->socket);
1937 /* FALSE does not free the
1938 actual data. This is required,
1939 because the client has a
1940 direct reference into that
1941 data, and otherwise we get a
1943 g_array_free(servers, FALSE);
1946 msg2(LOG_INFO,"Starting to serve");
1947 serveconnection(client);
1954 void dosockopts(int socket) {
1962 /* lose the pesky "Address already in use" error message */
1963 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1964 err("setsockopt SO_REUSEADDR");
1966 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1967 err("setsockopt SO_KEEPALIVE");
1970 /* make the listening socket non-blocking */
1971 if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
1972 err("fcntl F_GETFL");
1974 if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1975 err("fcntl F_SETFL O_NONBLOCK");
1980 * Connect a server's socket.
1982 * @param serve the server we want to connect.
1984 int setup_serve(SERVER *serve) {
1985 struct addrinfo hints;
1986 struct addrinfo *ai = NULL;
1991 return serve->servename ? 1 : 0;
1993 memset(&hints,'\0',sizeof(hints));
1994 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
1995 hints.ai_socktype = SOCK_STREAM;
1996 hints.ai_family = serve->socket_family;
1998 port = g_strdup_printf ("%d", serve->port);
2002 e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2007 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2013 if(serve->socket_family == AF_UNSPEC)
2014 serve->socket_family = ai->ai_family;
2017 if ((serve->flags) && F_SDP) {
2018 if (ai->ai_family == AF_INET)
2019 ai->ai_family = AF_INET_SDP;
2020 else (ai->ai_family == AF_INET6)
2021 ai->ai_family = AF_INET6_SDP;
2024 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
2027 dosockopts(serve->socket);
2029 DEBUG("Waiting for connections... bind, ");
2030 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2031 if (e != 0 && errno != EADDRINUSE)
2034 if (listen(serve->socket, 1) < 0)
2038 if(serve->servename) {
2045 void open_modern(void) {
2046 struct addrinfo hints;
2047 struct addrinfo* ai = NULL;
2051 memset(&hints, '\0', sizeof(hints));
2052 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2053 hints.ai_socktype = SOCK_STREAM;
2054 hints.ai_family = AF_UNSPEC;
2055 hints.ai_protocol = IPPROTO_TCP;
2056 e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
2058 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2061 if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2065 dosockopts(modernsock);
2067 if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2070 if(listen(modernsock, 10) <0) {
2078 * Connect our servers.
2080 void setup_servers(GArray* servers) {
2082 struct sigaction sa;
2085 for(i=0;i<servers->len;i++) {
2086 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2091 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2093 sa.sa_handler = sigchld_handler;
2094 sigemptyset(&sa.sa_mask);
2095 sa.sa_flags = SA_RESTART;
2096 if(sigaction(SIGCHLD, &sa, NULL) == -1)
2097 err("sigaction: %m");
2098 sa.sa_handler = sigterm_handler;
2099 sigemptyset(&sa.sa_mask);
2100 sa.sa_flags = SA_RESTART;
2101 if(sigaction(SIGTERM, &sa, NULL) == -1)
2102 err("sigaction: %m");
2106 * Go daemon (unless we specified at compile time that we didn't want this)
2107 * @param serve the first server of our configuration. If its port is zero,
2108 * then do not daemonize, because we're doing inetd then. This parameter
2109 * is only used to create a PID file of the form
2110 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
2112 #if !defined(NODAEMON)
2113 void daemonize(SERVER* serve) {
2116 if(serve && !(serve->port)) {
2122 if(!*pidftemplate) {
2124 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2126 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2129 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2130 pidf=fopen(pidfname, "w");
2132 fprintf(pidf,"%d\n", (int)getpid());
2136 fprintf(stderr, "Not fatal; continuing");
2140 #define daemonize(serve)
2141 #endif /* !defined(NODAEMON) */
2144 * Everything beyond this point (in the file) is run in non-daemon mode.
2145 * The stuff above daemonize() isn't.
2148 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2150 void serve_err(SERVER* serve, const char* msg) {
2151 g_message("Export of %s on port %d failed:", serve->exportname,
2157 * Set up user-ID and/or group-ID
2159 void dousers(void) {
2164 gr=getgrnam(rungroup);
2166 str = g_strdup_printf("Invalid group name: %s", rungroup);
2169 if(setgid(gr->gr_gid)<0) {
2170 err("Could not set GID: %m");
2174 pw=getpwnam(runuser);
2176 str = g_strdup_printf("Invalid user name: %s", runuser);
2179 if(setuid(pw->pw_uid)<0) {
2180 err("Could not set UID: %m");
2186 void glib_message_syslog_redirect(const gchar *log_domain,
2187 GLogLevelFlags log_level,
2188 const gchar *message,
2191 int level=LOG_DEBUG;
2195 case G_LOG_FLAG_FATAL:
2196 case G_LOG_LEVEL_CRITICAL:
2197 case G_LOG_LEVEL_ERROR:
2200 case G_LOG_LEVEL_WARNING:
2203 case G_LOG_LEVEL_MESSAGE:
2204 case G_LOG_LEVEL_INFO:
2207 case G_LOG_LEVEL_DEBUG:
2212 syslog(level, "%s", message);
2217 * Main entry point...
2219 int main(int argc, char *argv[]) {
2224 if (sizeof( struct nbd_request )!=28) {
2225 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2226 exit(EXIT_FAILURE) ;
2229 memset(pidftemplate, '\0', 256);
2232 config_file_pos = g_strdup(CFILE);
2233 serve=cmdline(argc, argv);
2234 servers = parse_cfile(config_file_pos, &err);
2237 serve->socket_family = AF_UNSPEC;
2239 append_serve(serve, servers);
2241 if (!(serve->port)) {
2244 /* You really should define ISSERVER if you're going to use
2245 * inetd mode, but if you don't, closing stdout and stderr
2246 * (which inetd had connected to the client socket) will let it
2250 open("/dev/null", O_WRONLY);
2251 open("/dev/null", O_WRONLY);
2252 g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2254 client=g_malloc(sizeof(CLIENT));
2255 client->server=serve;
2257 client->exportsize=OFFT_MAX;
2258 set_peername(0,client);
2259 serveconnection(client);
2264 if(!servers || !servers->len) {
2265 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2266 && err->code == CFILE_NOTFOUND)) {
2267 g_warning("Could not parse config file: %s",
2268 err ? err->message : "Unknown error");
2272 g_warning("Specifying an export on the command line is deprecated.");
2273 g_warning("Please use a configuration file instead.");
2276 if((!serve) && (!servers||!servers->len)) {
2277 g_message("No configured exports; quitting.");
2282 setup_servers(servers);