2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <sys/select.h> /* select */
66 #include <sys/wait.h> /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h> /* For BLKGETSIZE */
74 #include <signal.h> /* sigaction */
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
85 #include <arpa/inet.h>
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
100 #include <sdp_inet.h>
103 /** Default position of the config file */
105 #define SYSCONFDIR "/etc"
107 #define CFILE SYSCONFDIR "/nbd-server/config"
109 /** Where our config file actually is */
110 gchar* config_file_pos;
112 /** What user we're running as */
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
119 /* Whether we should avoid forking */
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
133 /* Debugging macros */
136 #define DEBUG(...) printf(__VA_ARGS__)
140 #ifndef PACKAGE_VERSION
141 #define PACKAGE_VERSION ""
144 * The highest value a variable of type off_t can reach. This is a signed
145 * integer, so set all bits except for the leftmost one.
147 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
148 #define LINELEN 256 /**< Size of static buffer used to read the
149 authorization file (yuck) */
150 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
151 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
152 #define F_READONLY 1 /**< flag to tell us a file is readonly */
153 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
154 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
156 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
157 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
158 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
159 #define F_SYNC 64 /**< Whether to fsync() after a write */
160 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
161 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
162 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
163 GHashTable *children;
164 char pidfname[256]; /**< name of our PID file */
165 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
166 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
168 int modernsock=0; /**< Socket for the modern handler. Not used
169 if a client was only specified on the
170 command line; only port used if
171 oldstyle is set to false (and then the
172 command-line client isn't used, gna gna) */
173 char* modern_listen; /**< listenaddr value for modernsock */
174 char* modernport=NBD_DEFAULT_PORT; /**< Port number on which to listen for
175 new-style nbd-client connections */
178 * Types of virtuatlization
181 VIRT_NONE=0, /**< No virtualization */
182 VIRT_IPLIT, /**< Literal IP address as part of the filename */
183 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
184 doing the same as in IPLIT */
185 VIRT_CIDR, /**< Every subnet in its own directory */
189 * Variables associated with a server.
192 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
193 off_t expected_size; /**< size of the exported file as it was told to
194 us through configuration */
195 gchar* listenaddr; /**< The IP address we're listening on */
196 unsigned int port; /**< port we're exporting this file at */
197 char* authname; /**< filename of the authorization file */
198 int flags; /**< flags associated with this exported file */
199 int socket; /**< The socket of this server. */
200 int socket_family; /**< family of the socket */
201 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
202 uint8_t cidrlen; /**< The length of the mask when we use
203 CIDR-style virtualization */
204 gchar* prerun; /**< command to be ran after connecting a client,
205 but before starting to serve */
206 gchar* postrun; /**< command that will be ran after the client
208 gchar* servename; /**< name of the export as selected by nbd-client */
209 int max_connections; /**< maximum number of opened connections */
210 gchar* transactionlog;/**< filename for transaction log */
214 * Variables associated with a client socket.
217 int fhandle; /**< file descriptor */
218 off_t startoff; /**< starting offset of this file */
222 off_t exportsize; /**< size of the file we're exporting */
223 char *clientname; /**< peer */
224 char *exportname; /**< (processed) filename of the file we're exporting */
225 GArray *export; /**< array of FILE_INFO of exported files;
226 array size is always 1 unless we're
227 doing the multiple file option */
228 int net; /**< The actual client socket */
229 SERVER *server; /**< The server this client is getting data from */
230 char* difffilename; /**< filename of the copy-on-write file, if any */
231 int difffile; /**< filedescriptor of copyonwrite file. @todo
232 shouldn't this be an array too? (cfr export) Or
233 make -m and -c mutually exclusive */
234 u32 difffilelen; /**< number of pages in difffile */
235 u32 *difmap; /**< see comment on the global difmap for this one */
236 gboolean modern; /**< client was negotiated using modern negotiation protocol */
237 int transactionlogfd;/**< fd for transaction log */
241 * Type of configuration file values
244 PARAM_INT, /**< This parameter is an integer */
245 PARAM_STRING, /**< This parameter is a string */
246 PARAM_BOOL, /**< This parameter is a boolean */
250 * Configuration file values
253 gchar *paramname; /**< Name of the parameter, as it appears in
255 gboolean required; /**< Whether this is a required (as opposed to
256 optional) parameter */
257 PARAM_TYPE ptype; /**< Type of the parameter. */
258 gpointer target; /**< Pointer to where the data of this
259 parameter should be written. If ptype is
260 PARAM_BOOL, the data is or'ed rather than
262 gint flagval; /**< Flag mask for this parameter in case ptype
267 * Translate a command name into human readable form
269 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
270 * @return pointer to the command name
272 static inline const char * getcommandname(uint64_t command) {
275 return "NBD_CMD_READ";
277 return "NBD_CMD_WRITE";
279 return "NBD_CMD_DISC";
281 return "NBD_CMD_FLUSH";
289 * Check whether a client is allowed to connect. Works with an authorization
290 * file which contains one line per machine, no wildcards.
292 * @param opts The client who's trying to connect.
293 * @return 0 - authorization refused, 1 - OK
295 int authorized_client(CLIENT *opts) {
296 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
301 struct in_addr client;
302 struct in_addr cltemp;
305 if ((f=fopen(opts->server->authname,"r"))==NULL) {
306 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
307 opts->server->authname,strerror(errno)) ;
311 inet_aton(opts->clientname, &client);
312 while (fgets(line,LINELEN,f)!=NULL) {
313 if((tmp=index(line, '/'))) {
314 if(strlen(line)<=tmp-line) {
315 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
319 if(!inet_aton(line,&addr)) {
320 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
323 len=strtol(tmp, NULL, 0);
324 addr.s_addr>>=32-len;
325 addr.s_addr<<=32-len;
326 memcpy(&cltemp,&client,sizeof(client));
327 cltemp.s_addr>>=32-len;
328 cltemp.s_addr<<=32-len;
329 if(addr.s_addr == cltemp.s_addr) {
333 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
343 * Read data from a file descriptor into a buffer
345 * @param f a file descriptor
346 * @param buf a buffer
347 * @param len the number of bytes to be read
349 static inline void readit(int f, void *buf, size_t len) {
353 if ((res = read(f, buf, len)) <= 0) {
354 if(errno != EAGAIN) {
355 err("Read failed: %m");
365 * Consume data from an FD that we don't want
367 * @param f a file descriptor
368 * @param buf a buffer
369 * @param len the number of bytes to consume
370 * @param bufsiz the size of the buffer
372 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
375 curlen = (len>bufsiz)?bufsiz:len;
376 readit(f, buf, curlen);
383 * Write data from a buffer into a filedescriptor
385 * @param f a file descriptor
386 * @param buf a buffer containing data
387 * @param len the number of bytes to be written
389 static inline void writeit(int f, void *buf, size_t len) {
393 if ((res = write(f, buf, len)) <= 0)
394 err("Send failed: %m");
401 * Print out a message about how to use nbd-server. Split out to a separate
402 * function so that we can call it from multiple places
405 printf("This is nbd-server version " VERSION "\n");
406 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
407 "\t-r|--read-only\t\tread only\n"
408 "\t-m|--multi-file\t\tmultiple file\n"
409 "\t-c|--copy-on-write\tcopy on write\n"
410 "\t-C|--config-file\tspecify an alternate configuration file\n"
411 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
412 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
413 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
414 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
415 "\tif port is set to 0, stdin is used (for running from inetd).\n"
416 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
417 "\t\taddress of the machine trying to connect\n"
418 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
419 printf("Using configuration file %s\n", CFILE);
422 /* Dumps a config file section of the given SERVER*, and exits. */
423 void dump_section(SERVER* serve, gchar* section_header) {
424 printf("[%s]\n", section_header);
425 printf("\texportname = %s\n", serve->exportname);
426 printf("\tlistenaddr = %s\n", serve->listenaddr);
427 printf("\tport = %d\n", serve->port);
428 if(serve->flags & F_READONLY) {
429 printf("\treadonly = true\n");
431 if(serve->flags & F_MULTIFILE) {
432 printf("\tmultifile = true\n");
434 if(serve->flags & F_COPYONWRITE) {
435 printf("\tcopyonwrite = true\n");
437 if(serve->expected_size) {
438 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
440 if(serve->authname) {
441 printf("\tauthfile = %s\n", serve->authname);
447 * Parse the command line.
449 * @param argc the argc argument to main()
450 * @param argv the argv argument to main()
452 SERVER* cmdline(int argc, char *argv[]) {
456 struct option long_options[] = {
457 {"read-only", no_argument, NULL, 'r'},
458 {"multi-file", no_argument, NULL, 'm'},
459 {"copy-on-write", no_argument, NULL, 'c'},
460 {"dont-fork", no_argument, NULL, 'd'},
461 {"authorize-file", required_argument, NULL, 'l'},
462 {"config-file", required_argument, NULL, 'C'},
463 {"pid-file", required_argument, NULL, 'p'},
464 {"output-config", required_argument, NULL, 'o'},
465 {"max-connection", required_argument, NULL, 'M'},
472 gboolean do_output=FALSE;
473 gchar* section_header="";
479 serve=g_new0(SERVER, 1);
480 serve->authname = g_strdup(default_authname);
481 serve->virtstyle=VIRT_IPLIT;
482 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
485 /* non-option argument */
486 switch(nonspecial++) {
488 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
489 addr_port=g_strsplit(optarg, ":", 2);
491 /* Check for "@" - maybe user using this separator
494 g_strfreev(addr_port);
495 addr_port=g_strsplit(optarg, "@", 2);
498 addr_port=g_strsplit(optarg, "@", 2);
502 serve->port=strtol(addr_port[1], NULL, 0);
503 serve->listenaddr=g_strdup(addr_port[0]);
505 serve->listenaddr=NULL;
506 serve->port=strtol(addr_port[0], NULL, 0);
508 g_strfreev(addr_port);
511 serve->exportname = g_strdup(optarg);
512 if(serve->exportname[0] != '/') {
513 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
518 last=strlen(optarg)-1;
520 if (suffix == 'k' || suffix == 'K' ||
521 suffix == 'm' || suffix == 'M')
523 es = (off_t)atoll(optarg);
531 serve->expected_size = es;
536 serve->flags |= F_READONLY;
539 serve->flags |= F_MULTIFILE;
543 section_header = g_strdup(optarg);
546 strncpy(pidftemplate, optarg, 256);
549 serve->flags |=F_COPYONWRITE;
555 g_free(config_file_pos);
556 config_file_pos=g_strdup(optarg);
559 g_free(serve->authname);
560 serve->authname=g_strdup(optarg);
563 serve->max_connections = strtol(optarg, NULL, 0);
571 /* What's left: the port to export, the name of the to be exported
572 * file, and, optionally, the size of the file, in that order. */
581 g_critical("Need a complete configuration on the command line to output a config file section!");
584 dump_section(serve, section_header);
590 * Error codes for config file parsing
593 CFILE_NOTFOUND, /**< The configuration file is not found */
594 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
595 CFILE_KEY_MISSING, /**< A (required) key is missing */
596 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
597 CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
598 CFILE_PROGERR, /**< Programmer error */
599 CFILE_NO_EXPORTS, /**< A config file was specified that does not
600 define any exports */
601 CFILE_INCORRECT_PORT, /**< The reserved port was specified for an
606 * Remove a SERVER from memory. Used from the hash table
608 void remove_server(gpointer s) {
612 g_free(server->exportname);
614 g_free(server->authname);
615 if(server->listenaddr)
616 g_free(server->listenaddr);
618 g_free(server->prerun);
620 g_free(server->postrun);
621 if(server->transactionlog)
622 g_free(server->transactionlog);
628 * @param s the old server we want to duplicate
629 * @return new duplicated server
631 SERVER* dup_serve(SERVER *s) {
632 SERVER *serve = NULL;
634 serve=g_new0(SERVER, 1);
639 serve->exportname = g_strdup(s->exportname);
641 serve->expected_size = s->expected_size;
644 serve->listenaddr = g_strdup(s->listenaddr);
646 serve->port = s->port;
649 serve->authname = strdup(s->authname);
651 serve->flags = s->flags;
652 serve->socket = s->socket;
653 serve->socket_family = s->socket_family;
654 serve->virtstyle = s->virtstyle;
655 serve->cidrlen = s->cidrlen;
658 serve->prerun = g_strdup(s->prerun);
661 serve->postrun = g_strdup(s->postrun);
663 if(s->transactionlog)
664 serve->transactionlog = g_strdup(s->transactionlog);
667 serve->servename = g_strdup(s->servename);
669 serve->max_connections = s->max_connections;
675 * append new server to array
677 * @param a server array
678 * @return 0 success, -1 error
680 int append_serve(SERVER *s, GArray *a) {
682 struct addrinfo hints;
683 struct addrinfo *ai = NULL;
684 struct addrinfo *rp = NULL;
685 char host[NI_MAXHOST];
691 err("Invalid parsing server");
695 port = g_strdup_printf("%d", s->port);
697 memset(&hints,'\0',sizeof(hints));
698 hints.ai_family = AF_UNSPEC;
699 hints.ai_socktype = SOCK_STREAM;
700 hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
701 hints.ai_protocol = IPPROTO_TCP;
703 e = getaddrinfo(s->listenaddr, port, &hints, &ai);
709 for (rp = ai; rp != NULL; rp = rp->ai_next) {
710 e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
712 if (e != 0) { // error
713 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
717 // duplicate server and set listenaddr to resolved IP address
720 ns->listenaddr = g_strdup(host);
721 ns->socket_family = rp->ai_family;
722 g_array_append_val(a, *ns);
730 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
741 * Parse the config file.
743 * @param f the name of the config file
744 * @param e a GError. @see CFILE_ERRORS for what error values this function can
746 * @return a Array of SERVER* pointers, If the config file is empty or does not
747 * exist, returns an empty GHashTable; if the config file contains an
748 * error, returns NULL, and e is set appropriately
750 GArray* parse_cfile(gchar* f, GError** e) {
751 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
752 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
754 gchar *virtstyle=NULL;
756 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
757 { "port", TRUE, PARAM_INT, &(s.port), 0 },
758 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
759 { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
760 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
761 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
762 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
763 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
764 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
765 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
766 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
767 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
768 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
769 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
770 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
771 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
772 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
773 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
774 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
776 const int lp_size=sizeof(lp)/sizeof(PARAM);
778 { "user", FALSE, PARAM_STRING, &runuser, 0 },
779 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
780 { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
781 { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
782 { "port", FALSE, PARAM_STRING, &modernport, 0 },
785 int p_size=sizeof(gp)/sizeof(PARAM);
788 const char *err_msg=NULL;
797 errdomain = g_quark_from_string("parse_cfile");
798 cfile = g_key_file_new();
799 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
800 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
801 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
802 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
803 g_key_file_free(cfile);
806 startgroup = g_key_file_get_start_group(cfile);
807 if(!startgroup || strcmp(startgroup, "generic")) {
808 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
809 g_key_file_free(cfile);
812 groups = g_key_file_get_groups(cfile, NULL);
813 for(i=0;groups[i];i++) {
814 memset(&s, '\0', sizeof(SERVER));
816 /* After the [generic] group, start parsing exports */
821 for(j=0;j<p_size;j++) {
822 g_assert(p[j].target != NULL);
823 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
826 *((gint*)p[j].target) =
827 g_key_file_get_integer(cfile,
833 *((gchar**)p[j].target) =
834 g_key_file_get_string(cfile,
840 value = g_key_file_get_boolean(cfile,
842 p[j].paramname, &err);
845 *((gint*)p[j].target) |= p[j].flagval;
847 *((gint*)p[j].target) &= ~(p[j].flagval);
852 if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, modernport)) {
853 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies new-style port for oldstyle export");
854 g_key_file_free(cfile);
858 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
860 /* Ignore not-found error for optional values */
864 err_msg = MISSING_REQUIRED_ERROR;
867 err_msg = DEFAULT_ERROR;
869 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
870 g_array_free(retval, TRUE);
872 g_key_file_free(cfile);
877 if(!strncmp(virtstyle, "none", 4)) {
878 s.virtstyle=VIRT_NONE;
879 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
880 s.virtstyle=VIRT_IPLIT;
881 } else if(!strncmp(virtstyle, "iphash", 6)) {
882 s.virtstyle=VIRT_IPHASH;
883 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
884 s.virtstyle=VIRT_CIDR;
885 if(strlen(virtstyle)<10) {
886 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
887 g_array_free(retval, TRUE);
888 g_key_file_free(cfile);
891 s.cidrlen=strtol(virtstyle+8, NULL, 0);
893 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
894 g_array_free(retval, TRUE);
895 g_key_file_free(cfile);
898 if(s.port && !do_oldstyle) {
899 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
900 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
903 s.virtstyle=VIRT_IPLIT;
905 /* Don't need to free this, it's not our string */
907 /* Don't append values for the [generic] group */
909 s.socket_family = AF_UNSPEC;
910 s.servename = groups[i];
912 append_serve(&s, retval);
919 if(s.flags & F_SDP) {
920 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
921 g_array_free(retval, TRUE);
922 g_key_file_free(cfile);
928 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
930 g_key_file_free(cfile);
935 * Signal handler for SIGCHLD
936 * @param s the signal we're handling (must be SIGCHLD, or something
939 void sigchld_handler(int s) {
944 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
945 if(WIFEXITED(status)) {
946 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
948 i=g_hash_table_lookup(children, &pid);
950 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
952 DEBUG("Removing %d from the list of children", pid);
953 g_hash_table_remove(children, &pid);
959 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
962 * @param value the value corresponding to the above key
963 * @param user_data a pointer which we always set to 1, so that we know what
966 void killchild(gpointer key, gpointer value, gpointer user_data) {
968 int *parent=user_data;
975 * Handle SIGTERM and dispatch it to our children
976 * @param s the signal we're handling (must be SIGTERM, or something
977 * is severely wrong).
979 void sigterm_handler(int s) {
982 g_hash_table_foreach(children, killchild, &parent);
992 * Detect the size of a file.
994 * @param fhandle An open filedescriptor
995 * @return the size of the file, or OFFT_MAX if detection was
998 off_t size_autodetect(int fhandle) {
1001 struct stat stat_buf;
1004 #ifdef HAVE_SYS_MOUNT_H
1005 #ifdef HAVE_SYS_IOCTL_H
1007 DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
1008 if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
1009 return (off_t)bytes;
1011 #endif /* BLKGETSIZE64 */
1012 #endif /* HAVE_SYS_IOCTL_H */
1013 #endif /* HAVE_SYS_MOUNT_H */
1015 DEBUG("looking for fhandle size with fstat\n");
1016 stat_buf.st_size = 0;
1017 error = fstat(fhandle, &stat_buf);
1019 if(stat_buf.st_size > 0)
1020 return (off_t)stat_buf.st_size;
1022 err("fstat failed: %m");
1025 DEBUG("looking for fhandle size with lseek SEEK_END\n");
1026 es = lseek(fhandle, (off_t)0, SEEK_END);
1027 if (es > ((off_t)0)) {
1030 DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
1033 err("Could not find size of exported block device: %m");
1038 * Get the file handle and offset, given an export offset.
1040 * @param export An array of export files
1041 * @param a The offset to get corresponding file/offset for
1042 * @param fhandle [out] File descriptor
1043 * @param foffset [out] Offset into fhandle
1044 * @param maxbytes [out] Tells how many bytes can be read/written
1045 * from fhandle starting at foffset (0 if there is no limit)
1046 * @return 0 on success, -1 on failure
1048 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1049 /* Negative offset not allowed */
1053 /* Binary search for last file with starting offset <= a */
1056 int end = export->len - 1;
1057 while( start <= end ) {
1058 int mid = (start + end) / 2;
1059 fi = g_array_index(export, FILE_INFO, mid);
1060 if( fi.startoff < a ) {
1062 } else if( fi.startoff > a ) {
1070 /* end should never go negative, since first startoff is 0 and a >= 0 */
1073 fi = g_array_index(export, FILE_INFO, end);
1074 *fhandle = fi.fhandle;
1075 *foffset = a - fi.startoff;
1077 if( end+1 < export->len ) {
1078 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1079 *maxbytes = fi_next.startoff - a;
1086 * seek to a position in a file, with error handling.
1087 * @param handle a filedescriptor
1088 * @param a position to seek to
1089 * @todo get rid of this; lastpoint is a global variable right now, but it
1090 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1093 void myseek(int handle,off_t a) {
1094 if (lseek(handle, a, SEEK_SET) < 0) {
1095 err("Can not seek locally!\n");
1100 * Write an amount of bytes at a given offset to the right file. This
1101 * abstracts the write-side of the multiple file option.
1103 * @param a The offset where the write should start
1104 * @param buf The buffer to write from
1105 * @param len The length of buf
1106 * @param client The client we're serving for
1107 * @param fua Flag to indicate 'Force Unit Access'
1108 * @return The number of bytes actually written, or -1 in case of an error
1110 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1116 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1118 if(maxbytes && len > maxbytes)
1121 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1123 myseek(fhandle, foffset);
1124 retval = write(fhandle, buf, len);
1125 if(client->server->flags & F_SYNC) {
1129 /* This is where we would do the following
1130 * #ifdef USE_SYNC_FILE_RANGE
1131 * However, we don't, for the reasons set out below
1132 * by Christoph Hellwig <hch@infradead.org>
1135 * fdatasync is equivalent to fsync except that it does not flush
1136 * non-essential metadata (basically just timestamps in practice), but it
1137 * does flush metadata requried to find the data again, e.g. allocation
1138 * information and extent maps. sync_file_range does nothing but flush
1139 * out pagecache content - it means you basically won't get your data
1140 * back in case of a crash if you either:
1142 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1143 * b) are using a sparse file on a filesystem
1144 * c) are using a fallocate-preallocated file on a filesystem
1145 * d) use any file on a COW filesystem like btrfs
1147 * e.g. it only does anything useful for you if you do not have a volatile
1148 * write cache, and either use a raw block device node, or just overwrite
1149 * an already fully allocated (and not preallocated) file on a non-COW
1153 * What we should do is open a second FD with O_DSYNC set, then write to
1154 * that when appropriate. However, with a Linux client, every REQ_FUA
1155 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1160 sync_file_range(fhandle, foffset, len,
1161 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1162 SYNC_FILE_RANGE_WAIT_AFTER);
1171 * Call rawexpwrite repeatedly until all data has been written.
1173 * @param a The offset where the write should start
1174 * @param buf The buffer to write from
1175 * @param len The length of buf
1176 * @param client The client we're serving for
1177 * @param fua Flag to indicate 'Force Unit Access'
1178 * @return 0 on success, nonzero on failure
1180 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1183 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1188 return (ret < 0 || len != 0);
1192 * Read an amount of bytes at a given offset from the right file. This
1193 * abstracts the read-side of the multiple files option.
1195 * @param a The offset where the read should start
1196 * @param buf A buffer to read into
1197 * @param len The size of buf
1198 * @param client The client we're serving for
1199 * @return The number of bytes actually read, or -1 in case of an
1202 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1207 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1209 if(maxbytes && len > maxbytes)
1212 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1214 myseek(fhandle, foffset);
1215 return read(fhandle, buf, len);
1219 * Call rawexpread repeatedly until all data has been read.
1220 * @return 0 on success, nonzero on failure
1222 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1225 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1230 return (ret < 0 || len != 0);
1234 * Read an amount of bytes at a given offset from the right file. This
1235 * abstracts the read-side of the copyonwrite stuff, and calls
1236 * rawexpread() with the right parameters to do the actual work.
1237 * @param a The offset where the read should start
1238 * @param buf A buffer to read into
1239 * @param len The size of buf
1240 * @param client The client we're going to read for
1241 * @return 0 on success, nonzero on failure
1243 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1244 off_t rdlen, offset;
1245 off_t mapcnt, mapl, maph, pagestart;
1247 if (!(client->server->flags & F_COPYONWRITE))
1248 return(rawexpread_fully(a, buf, len, client));
1249 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1251 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1253 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1254 pagestart=mapcnt*DIFFPAGESIZE;
1256 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1257 len : (size_t)DIFFPAGESIZE-offset;
1258 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1259 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1260 (unsigned long)(client->difmap[mapcnt]));
1261 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1262 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1263 } else { /* the block is not there */
1264 DEBUG("Page %llu is not here, we read the original one\n",
1265 (unsigned long long)mapcnt);
1266 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1268 len-=rdlen; a+=rdlen; buf+=rdlen;
1274 * Write an amount of bytes at a given offset to the right file. This
1275 * abstracts the write-side of the copyonwrite option, and calls
1276 * rawexpwrite() with the right parameters to do the actual work.
1278 * @param a The offset where the write should start
1279 * @param buf The buffer to write from
1280 * @param len The length of buf
1281 * @param client The client we're going to write for.
1282 * @param fua Flag to indicate 'Force Unit Access'
1283 * @return 0 on success, nonzero on failure
1285 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1286 char pagebuf[DIFFPAGESIZE];
1287 off_t mapcnt,mapl,maph;
1292 if (!(client->server->flags & F_COPYONWRITE))
1293 return(rawexpwrite_fully(a, buf, len, client, fua));
1294 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1296 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1298 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1299 pagestart=mapcnt*DIFFPAGESIZE ;
1300 offset=a-pagestart ;
1301 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1302 len : (size_t)DIFFPAGESIZE-offset;
1304 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1305 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1306 (unsigned long)(client->difmap[mapcnt])) ;
1307 myseek(client->difffile,
1308 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1309 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1310 } else { /* the block is not there */
1311 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1312 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1313 DEBUG("Page %llu is not here, we put it at %lu\n",
1314 (unsigned long long)mapcnt,
1315 (unsigned long)(client->difmap[mapcnt]));
1316 rdlen=DIFFPAGESIZE ;
1317 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1319 memcpy(pagebuf+offset,buf,wrlen) ;
1320 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1324 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1326 if (client->server->flags & F_SYNC) {
1327 fsync(client->difffile);
1329 /* open question: would it be cheaper to do multiple sync_file_ranges?
1330 as we iterate through the above?
1332 fdatasync(client->difffile);
1338 * Flush data to a client
1340 * @param client The client we're going to write for.
1341 * @return 0 on success, nonzero on failure
1343 int expflush(CLIENT *client) {
1346 if (client->server->flags & F_COPYONWRITE) {
1347 return fsync(client->difffile);
1350 for (i = 0; i < client->export->len; i++) {
1351 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1352 if (fsync(fi.fhandle) < 0)
1360 * Do the initial negotiation.
1362 * @param client The client we're negotiating with.
1364 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1367 uint32_t flags = NBD_FLAG_HAS_FLAGS;
1368 uint16_t smallflags = 0;
1371 memset(zeros, '\0', sizeof(zeros));
1372 if(!client || !client->modern) {
1374 if (write(net, INIT_PASSWD, 8) < 0) {
1375 err_nonfatal("Negotiation failed: %m");
1379 if(!client || client->modern) {
1381 magic = htonll(opts_magic);
1384 magic = htonll(cliserv_magic);
1386 if (write(net, &magic, sizeof(magic)) < 0) {
1387 err_nonfatal("Negotiation failed: %m");
1401 err("programmer error");
1402 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1403 err("Negotiation failed: %m");
1404 if (read(net, &reserved, sizeof(reserved)) < 0)
1405 err("Negotiation failed: %m");
1406 if (read(net, &magic, sizeof(magic)) < 0)
1407 err("Negotiation failed: %m");
1408 magic = ntohll(magic);
1409 if(magic != opts_magic) {
1413 if (read(net, &opt, sizeof(opt)) < 0)
1414 err("Negotiation failed: %m");
1416 if(opt != NBD_OPT_EXPORT_NAME) {
1420 if (read(net, &namelen, sizeof(namelen)) < 0)
1421 err("Negotiation failed: %m");
1422 namelen = ntohl(namelen);
1423 name = malloc(namelen+1);
1425 if (read(net, name, namelen) < 0)
1426 err("Negotiation failed: %m");
1427 for(i=0; i<servers->len; i++) {
1428 SERVER* serve = &(g_array_index(servers, SERVER, i));
1429 if(!strcmp(serve->servename, name)) {
1430 CLIENT* client = g_new0(CLIENT, 1);
1431 client->server = serve;
1432 client->exportsize = OFFT_MAX;
1434 client->modern = TRUE;
1435 client->transactionlogfd = -1;
1444 size_host = htonll((u64)(client->exportsize));
1445 if (write(net, &size_host, 8) < 0)
1446 err("Negotiation failed: %m");
1447 if (client->server->flags & F_READONLY)
1448 flags |= NBD_FLAG_READ_ONLY;
1449 if (client->server->flags & F_FLUSH)
1450 flags |= NBD_FLAG_SEND_FLUSH;
1451 if (client->server->flags & F_FUA)
1452 flags |= NBD_FLAG_SEND_FUA;
1453 if (client->server->flags & F_ROTATIONAL)
1454 flags |= NBD_FLAG_ROTATIONAL;
1455 if (!client->modern) {
1457 flags = htonl(flags);
1458 if (write(client->net, &flags, 4) < 0)
1459 err("Negotiation failed: %m");
1462 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1463 smallflags = htons(smallflags);
1464 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1465 err("Negotiation failed: %m");
1469 if (write(client->net, zeros, 124) < 0)
1470 err("Negotiation failed: %m");
1474 /** sending macro. */
1475 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1476 if (client->transactionlogfd != -1) \
1477 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1479 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1481 * Serve a file to a single client.
1483 * @todo This beast needs to be split up in many tiny little manageable
1484 * pieces. Preferably with a chainsaw.
1486 * @param client The client we're going to serve to.
1487 * @return when the client disconnects
1489 int mainloop(CLIENT *client) {
1490 struct nbd_request request;
1491 struct nbd_reply reply;
1492 gboolean go_on=TRUE;
1496 negotiate(client->net, client, NULL);
1497 DEBUG("Entering request loop!\n");
1498 reply.magic = htonl(NBD_REPLY_MAGIC);
1511 readit(client->net, &request, sizeof(request));
1512 if (client->transactionlogfd != -1)
1513 writeit(client->transactionlogfd, &request, sizeof(request));
1515 request.from = ntohll(request.from);
1516 request.type = ntohl(request.type);
1517 command = request.type & NBD_CMD_MASK_COMMAND;
1518 len = ntohl(request.len);
1520 DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
1521 (unsigned long long)request.from,
1522 (unsigned long long)request.from / 512, (unsigned int)len);
1524 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1525 err("Not enough magic.");
1527 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1529 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1530 if ((request.from + len) > (OFFT_MAX)) {
1531 DEBUG("[Number too large!]");
1532 ERROR(client, reply, EINVAL);
1536 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1538 ERROR(client, reply, EINVAL);
1543 if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
1544 currlen = BUFSIZE - sizeof(struct nbd_reply);
1545 msg2(LOG_INFO, "oversized request (this is not a problem)");
1552 msg2(LOG_INFO, "Disconnect request received.");
1553 if (client->server->flags & F_COPYONWRITE) {
1554 if (client->difmap) g_free(client->difmap) ;
1555 close(client->difffile);
1556 unlink(client->difffilename);
1557 free(client->difffilename);
1563 DEBUG("wr: net->buf, ");
1565 readit(client->net, buf, currlen);
1566 DEBUG("buf->exp, ");
1567 if ((client->server->flags & F_READONLY) ||
1568 (client->server->flags & F_AUTOREADONLY)) {
1569 DEBUG("[WRITE to READONLY!]");
1570 ERROR(client, reply, EPERM);
1571 consume(client->net, buf, len-currlen, BUFSIZE);
1574 if (expwrite(request.from, buf, currlen, client,
1575 request.type & NBD_CMD_FLAG_FUA)) {
1576 DEBUG("Write failed: %m" );
1577 ERROR(client, reply, errno);
1578 consume(client->net, buf, len-currlen, BUFSIZE);
1582 request.from += currlen;
1583 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1585 SEND(client->net, reply);
1591 if (expflush(client)) {
1592 DEBUG("Flush failed: %m");
1593 ERROR(client, reply, errno);
1596 SEND(client->net, reply);
1601 DEBUG("exp->buf, ");
1602 memcpy(buf, &reply, sizeof(struct nbd_reply));
1603 if (client->transactionlogfd != -1)
1604 writeit(client->transactionlogfd, &reply, sizeof(reply));
1605 p = buf + sizeof(struct nbd_reply);
1606 writelen = currlen + sizeof(struct nbd_reply);
1608 if (expread(request.from, p, currlen, client)) {
1609 DEBUG("Read failed: %m");
1610 ERROR(client, reply, errno);
1614 DEBUG("buf->net, ");
1615 writeit(client->net, buf, writelen);
1617 request.from += currlen;
1618 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1626 DEBUG ("Ignoring unknown command\n");
1634 * Set up client export array, which is an array of FILE_INFO.
1635 * Also, split a single exportfile into multiple ones, if that was asked.
1636 * @param client information on the client which we want to setup export for
1638 void setupexport(CLIENT* client) {
1640 off_t laststartoff = 0, lastsize = 0;
1641 int multifile = (client->server->flags & F_MULTIFILE);
1643 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1645 /* If multi-file, open as many files as we can.
1646 * If not, open exactly one file.
1647 * Calculate file sizes as we go to get total size. */
1651 gchar* error_string;
1652 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1655 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1657 tmpname=g_strdup(client->exportname);
1659 DEBUG( "Opening %s\n", tmpname );
1660 fi.fhandle = open(tmpname, mode);
1661 if(fi.fhandle == -1 && mode == O_RDWR) {
1662 /* Try again because maybe media was read-only */
1663 fi.fhandle = open(tmpname, O_RDONLY);
1664 if(fi.fhandle != -1) {
1665 /* Opening the base file in copyonwrite mode is
1667 if(!(client->server->flags & F_COPYONWRITE)) {
1668 client->server->flags |= F_AUTOREADONLY;
1669 client->server->flags |= F_READONLY;
1673 if(fi.fhandle == -1) {
1674 if(multifile && i>0)
1676 error_string=g_strdup_printf(
1677 "Could not open exported file %s: %%m",
1681 fi.startoff = laststartoff + lastsize;
1682 g_array_append_val(client->export, fi);
1685 /* Starting offset and size of this file will be used to
1686 * calculate starting offset of next file */
1687 laststartoff = fi.startoff;
1688 lastsize = size_autodetect(fi.fhandle);
1694 /* Set export size to total calculated size */
1695 client->exportsize = laststartoff + lastsize;
1697 /* Export size may be overridden */
1698 if(client->server->expected_size) {
1699 /* desired size must be <= total calculated size */
1700 if(client->server->expected_size > client->exportsize) {
1701 err("Size of exported file is too big\n");
1704 client->exportsize = client->server->expected_size;
1707 msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1709 msg3(LOG_INFO, "Total number of files: %d", i);
1713 int copyonwrite_prepare(CLIENT* client) {
1715 if ((client->difffilename = malloc(1024))==NULL)
1716 err("Failed to allocate string for diff file name");
1717 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1719 client->difffilename[1023]='\0';
1720 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1721 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1722 if (client->difffile<0) err("Could not create diff file (%m)") ;
1723 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1724 err("Could not allocate memory") ;
1725 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1731 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1734 * @param command the command to be ran. Read from the config file
1735 * @param file the file name we're about to export
1737 int do_run(gchar* command, gchar* file) {
1741 if(command && *command) {
1742 cmd = g_strdup_printf(command, file);
1750 * Serve a connection.
1752 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1753 * follow the road map.
1755 * @param client a connected client
1757 void serveconnection(CLIENT *client) {
1758 if (client->server->transactionlog && (client->transactionlogfd == -1))
1760 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1762 S_IRUSR | S_IWUSR)))
1763 g_warning("Could not open transaction log %s",
1764 client->server->transactionlog);
1767 if(do_run(client->server->prerun, client->exportname)) {
1770 setupexport(client);
1772 if (client->server->flags & F_COPYONWRITE) {
1773 copyonwrite_prepare(client);
1776 setmysockopt(client->net);
1779 do_run(client->server->postrun, client->exportname);
1781 if (-1 != client->transactionlogfd)
1783 close(client->transactionlogfd);
1784 client->transactionlogfd = -1;
1789 * Find the name of the file we have to serve. This will use g_strdup_printf
1790 * to put the IP address of the client inside a filename containing
1791 * "%s" (in the form as specified by the "virtstyle" option). That name
1792 * is then written to client->exportname.
1794 * @param net A socket connected to an nbd client
1795 * @param client information about the client. The IP address in human-readable
1796 * format will be written to a new char* buffer, the address of which will be
1797 * stored in client->clientname.
1799 void set_peername(int net, CLIENT *client) {
1800 struct sockaddr_storage addrin;
1801 struct sockaddr_storage netaddr;
1802 struct sockaddr_in *netaddr4 = NULL;
1803 struct sockaddr_in6 *netaddr6 = NULL;
1804 size_t addrinlen = sizeof( addrin );
1805 struct addrinfo hints;
1806 struct addrinfo *ai = NULL;
1807 char peername[NI_MAXHOST];
1808 char netname[NI_MAXHOST];
1814 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1815 err("getsockname failed: %m");
1817 getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1818 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1820 memset(&hints, '\0', sizeof (hints));
1821 hints.ai_flags = AI_ADDRCONFIG;
1822 e = getaddrinfo(peername, NULL, &hints, &ai);
1825 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1830 switch(client->server->virtstyle) {
1832 client->exportname=g_strdup(client->server->exportname);
1835 for(i=0;i<strlen(peername);i++) {
1836 if(peername[i]=='.') {
1841 client->exportname=g_strdup_printf(client->server->exportname, peername);
1844 memcpy(&netaddr, &addrin, addrinlen);
1845 if(ai->ai_family == AF_INET) {
1846 netaddr4 = (struct sockaddr_in *)&netaddr;
1847 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1848 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1850 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1851 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1852 tmp=g_strdup_printf("%s/%s", netname, peername);
1853 }else if(ai->ai_family == AF_INET6) {
1854 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1856 shift = 128-(client->server->cidrlen);
1858 while(shift >= 32) {
1859 ((netaddr6->sin6_addr).s6_addr32[i])=0;
1863 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1864 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1866 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1867 netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1868 tmp=g_strdup_printf("%s/%s", netname, peername);
1872 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1878 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1879 peername, client->exportname);
1880 client->clientname=g_strdup(peername);
1885 * @param data a pointer to pid_t which should be freed
1887 void destroy_pid_t(gpointer data) {
1892 * Loop through the available servers, and serve them. Never returns.
1894 int serveloop(GArray* servers) {
1895 struct sockaddr_storage addrin;
1896 socklen_t addrinlen=sizeof(addrin);
1904 * Set up the master fd_set. The set of descriptors we need
1905 * to select() for never changes anyway and it buys us a *lot*
1906 * of time to only build this once. However, if we ever choose
1907 * to not fork() for clients anymore, we may have to revisit
1912 for(i=0;i<servers->len;i++) {
1913 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1914 FD_SET(sock, &mset);
1915 max=sock>max?sock:max;
1919 FD_SET(modernsock, &mset);
1920 max=modernsock>max?modernsock:max;
1923 CLIENT *client = NULL;
1926 memcpy(&rset, &mset, sizeof(fd_set));
1927 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1932 if(FD_ISSET(modernsock, &rset)) {
1933 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1935 client = negotiate(net, NULL, servers);
1937 err_nonfatal("negotiation failed");
1942 serve = client->server;
1944 for(i=0;i<servers->len && !net;i++) {
1945 serve=&(g_array_index(servers, SERVER, i));
1946 if(FD_ISSET(serve->socket, &rset)) {
1947 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1954 if(serve->max_connections > 0 &&
1955 g_hash_table_size(children) >= serve->max_connections) {
1956 msg2(LOG_INFO, "Max connections reached");
1960 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1961 err("fcntl F_GETFL");
1963 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1964 err("fcntl F_SETFL ~O_NONBLOCK");
1967 client = g_new0(CLIENT, 1);
1968 client->server=serve;
1969 client->exportsize=OFFT_MAX;
1971 client->transactionlogfd = -1;
1973 set_peername(net, client);
1974 if (!authorized_client(client)) {
1975 msg2(LOG_INFO,"Unauthorized client") ;
1979 msg2(LOG_INFO,"Authorized client") ;
1980 pid=g_malloc(sizeof(pid_t));
1983 if ((*pid=fork())<0) {
1984 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1988 if (*pid>0) { /* parent */
1990 g_hash_table_insert(children, pid, pid);
1994 g_hash_table_destroy(children);
1995 for(i=0;i<servers->len;i++) {
1996 serve=&g_array_index(servers, SERVER, i);
1997 close(serve->socket);
1999 /* FALSE does not free the
2000 actual data. This is required,
2001 because the client has a
2002 direct reference into that
2003 data, and otherwise we get a
2005 g_array_free(servers, FALSE);
2008 msg2(LOG_INFO,"Starting to serve");
2009 serveconnection(client);
2016 void dosockopts(int socket) {
2024 /* lose the pesky "Address already in use" error message */
2025 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
2026 err("setsockopt SO_REUSEADDR");
2028 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
2029 err("setsockopt SO_KEEPALIVE");
2032 /* make the listening socket non-blocking */
2033 if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
2034 err("fcntl F_GETFL");
2036 if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
2037 err("fcntl F_SETFL O_NONBLOCK");
2042 * Connect a server's socket.
2044 * @param serve the server we want to connect.
2046 int setup_serve(SERVER *serve) {
2047 struct addrinfo hints;
2048 struct addrinfo *ai = NULL;
2053 return serve->servename ? 1 : 0;
2055 memset(&hints,'\0',sizeof(hints));
2056 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2057 hints.ai_socktype = SOCK_STREAM;
2058 hints.ai_family = serve->socket_family;
2060 port = g_strdup_printf ("%d", serve->port);
2064 e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2069 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2075 if(serve->socket_family == AF_UNSPEC)
2076 serve->socket_family = ai->ai_family;
2079 if ((serve->flags) && F_SDP) {
2080 if (ai->ai_family == AF_INET)
2081 ai->ai_family = AF_INET_SDP;
2082 else (ai->ai_family == AF_INET6)
2083 ai->ai_family = AF_INET6_SDP;
2086 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
2089 dosockopts(serve->socket);
2091 DEBUG("Waiting for connections... bind, ");
2092 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2093 if (e != 0 && errno != EADDRINUSE)
2096 if (listen(serve->socket, 1) < 0)
2100 if(serve->servename) {
2107 void open_modern(void) {
2108 struct addrinfo hints;
2109 struct addrinfo* ai = NULL;
2113 memset(&hints, '\0', sizeof(hints));
2114 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2115 hints.ai_socktype = SOCK_STREAM;
2116 hints.ai_family = AF_UNSPEC;
2117 hints.ai_protocol = IPPROTO_TCP;
2118 e = getaddrinfo(modern_listen, modernport, &hints, &ai);
2120 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2123 if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2127 dosockopts(modernsock);
2129 if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2132 if(listen(modernsock, 10) <0) {
2140 * Connect our servers.
2142 void setup_servers(GArray* servers) {
2144 struct sigaction sa;
2147 for(i=0;i<servers->len;i++) {
2148 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2153 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2155 sa.sa_handler = sigchld_handler;
2156 sigemptyset(&sa.sa_mask);
2157 sa.sa_flags = SA_RESTART;
2158 if(sigaction(SIGCHLD, &sa, NULL) == -1)
2159 err("sigaction: %m");
2160 sa.sa_handler = sigterm_handler;
2161 sigemptyset(&sa.sa_mask);
2162 sa.sa_flags = SA_RESTART;
2163 if(sigaction(SIGTERM, &sa, NULL) == -1)
2164 err("sigaction: %m");
2168 * Go daemon (unless we specified at compile time that we didn't want this)
2169 * @param serve the first server of our configuration. If its port is zero,
2170 * then do not daemonize, because we're doing inetd then. This parameter
2171 * is only used to create a PID file of the form
2172 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
2174 #if !defined(NODAEMON)
2175 void daemonize(SERVER* serve) {
2178 if(serve && !(serve->port)) {
2184 if(!*pidftemplate) {
2186 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2188 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2191 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2192 pidf=fopen(pidfname, "w");
2194 fprintf(pidf,"%d\n", (int)getpid());
2198 fprintf(stderr, "Not fatal; continuing");
2202 #define daemonize(serve)
2203 #endif /* !defined(NODAEMON) */
2206 * Everything beyond this point (in the file) is run in non-daemon mode.
2207 * The stuff above daemonize() isn't.
2210 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2212 void serve_err(SERVER* serve, const char* msg) {
2213 g_message("Export of %s on port %d failed:", serve->exportname,
2219 * Set up user-ID and/or group-ID
2221 void dousers(void) {
2226 gr=getgrnam(rungroup);
2228 str = g_strdup_printf("Invalid group name: %s", rungroup);
2231 if(setgid(gr->gr_gid)<0) {
2232 err("Could not set GID: %m");
2236 pw=getpwnam(runuser);
2238 str = g_strdup_printf("Invalid user name: %s", runuser);
2241 if(setuid(pw->pw_uid)<0) {
2242 err("Could not set UID: %m");
2248 void glib_message_syslog_redirect(const gchar *log_domain,
2249 GLogLevelFlags log_level,
2250 const gchar *message,
2253 int level=LOG_DEBUG;
2257 case G_LOG_FLAG_FATAL:
2258 case G_LOG_LEVEL_CRITICAL:
2259 case G_LOG_LEVEL_ERROR:
2262 case G_LOG_LEVEL_WARNING:
2265 case G_LOG_LEVEL_MESSAGE:
2266 case G_LOG_LEVEL_INFO:
2269 case G_LOG_LEVEL_DEBUG:
2274 syslog(level, "%s", message);
2279 * Main entry point...
2281 int main(int argc, char *argv[]) {
2286 if (sizeof( struct nbd_request )!=28) {
2287 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2288 exit(EXIT_FAILURE) ;
2291 memset(pidftemplate, '\0', 256);
2294 config_file_pos = g_strdup(CFILE);
2295 serve=cmdline(argc, argv);
2296 servers = parse_cfile(config_file_pos, &err);
2299 serve->socket_family = AF_UNSPEC;
2301 append_serve(serve, servers);
2303 if (!(serve->port)) {
2306 /* You really should define ISSERVER if you're going to use
2307 * inetd mode, but if you don't, closing stdout and stderr
2308 * (which inetd had connected to the client socket) will let it
2312 open("/dev/null", O_WRONLY);
2313 open("/dev/null", O_WRONLY);
2314 g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2316 client=g_malloc(sizeof(CLIENT));
2317 client->server=serve;
2319 client->exportsize=OFFT_MAX;
2320 set_peername(0,client);
2321 serveconnection(client);
2326 if(!servers || !servers->len) {
2327 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2328 && err->code == CFILE_NOTFOUND)) {
2329 g_warning("Could not parse config file: %s",
2330 err ? err->message : "Unknown error");
2334 g_warning("Specifying an export on the command line is deprecated.");
2335 g_warning("Please use a configuration file instead.");
2338 if((!serve) && (!servers||!servers->len)) {
2339 g_message("No configured exports; quitting.");
2344 setup_servers(servers);