2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <sys/select.h> /* select */
66 #include <sys/wait.h> /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h> /* For BLKGETSIZE */
74 #include <signal.h> /* sigaction */
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
85 #include <arpa/inet.h>
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
100 #include <sdp_inet.h>
103 /** Default position of the config file */
105 #define SYSCONFDIR "/etc"
107 #define CFILE SYSCONFDIR "/nbd-server/config"
109 /** Where our config file actually is */
110 gchar* config_file_pos;
112 /** What user we're running as */
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
119 /* Whether we should avoid forking */
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
133 /* Debugging macros */
136 #define DEBUG(...) printf(__VA_ARGS__)
140 #ifndef PACKAGE_VERSION
141 #define PACKAGE_VERSION ""
144 * The highest value a variable of type off_t can reach. This is a signed
145 * integer, so set all bits except for the leftmost one.
147 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
148 #define LINELEN 256 /**< Size of static buffer used to read the
149 authorization file (yuck) */
150 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
151 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
152 #define F_READONLY 1 /**< flag to tell us a file is readonly */
153 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
154 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
156 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
157 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
158 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
159 #define F_SYNC 64 /**< Whether to fsync() after a write */
160 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
161 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
162 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
163 GHashTable *children;
164 char pidfname[256]; /**< name of our PID file */
165 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
166 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
168 int modernsock=0; /**< Socket for the modern handler. Not used
169 if a client was only specified on the
170 command line; only port used if
171 oldstyle is set to false (and then the
172 command-line client isn't used, gna gna) */
173 char* modern_listen; /**< listenaddr value for modernsock */
176 * Types of virtuatlization
179 VIRT_NONE=0, /**< No virtualization */
180 VIRT_IPLIT, /**< Literal IP address as part of the filename */
181 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
182 doing the same as in IPLIT */
183 VIRT_CIDR, /**< Every subnet in its own directory */
187 * Variables associated with a server.
190 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
191 off_t expected_size; /**< size of the exported file as it was told to
192 us through configuration */
193 gchar* listenaddr; /**< The IP address we're listening on */
194 unsigned int port; /**< port we're exporting this file at */
195 char* authname; /**< filename of the authorization file */
196 int flags; /**< flags associated with this exported file */
197 int socket; /**< The socket of this server. */
198 int socket_family; /**< family of the socket */
199 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
200 uint8_t cidrlen; /**< The length of the mask when we use
201 CIDR-style virtualization */
202 gchar* prerun; /**< command to be ran after connecting a client,
203 but before starting to serve */
204 gchar* postrun; /**< command that will be ran after the client
206 gchar* servename; /**< name of the export as selected by nbd-client */
207 int max_connections; /**< maximum number of opened connections */
208 gchar* transactionlog;/**< filename for transaction log */
212 * Variables associated with a client socket.
215 int fhandle; /**< file descriptor */
216 off_t startoff; /**< starting offset of this file */
220 off_t exportsize; /**< size of the file we're exporting */
221 char *clientname; /**< peer */
222 char *exportname; /**< (processed) filename of the file we're exporting */
223 GArray *export; /**< array of FILE_INFO of exported files;
224 array size is always 1 unless we're
225 doing the multiple file option */
226 int net; /**< The actual client socket */
227 SERVER *server; /**< The server this client is getting data from */
228 char* difffilename; /**< filename of the copy-on-write file, if any */
229 int difffile; /**< filedescriptor of copyonwrite file. @todo
230 shouldn't this be an array too? (cfr export) Or
231 make -m and -c mutually exclusive */
232 u32 difffilelen; /**< number of pages in difffile */
233 u32 *difmap; /**< see comment on the global difmap for this one */
234 gboolean modern; /**< client was negotiated using modern negotiation protocol */
235 int transactionlogfd;/**< fd for transaction log */
239 * Type of configuration file values
242 PARAM_INT, /**< This parameter is an integer */
243 PARAM_STRING, /**< This parameter is a string */
244 PARAM_BOOL, /**< This parameter is a boolean */
248 * Configuration file values
251 gchar *paramname; /**< Name of the parameter, as it appears in
253 gboolean required; /**< Whether this is a required (as opposed to
254 optional) parameter */
255 PARAM_TYPE ptype; /**< Type of the parameter. */
256 gpointer target; /**< Pointer to where the data of this
257 parameter should be written. If ptype is
258 PARAM_BOOL, the data is or'ed rather than
260 gint flagval; /**< Flag mask for this parameter in case ptype
264 static inline const char * getcommandname(uint64_t command) {
267 return "NBD_CMD_READ";
269 return "NBD_CMD_WRITE";
271 return "NBD_CMD_DISC";
273 return "NBD_CMD_FLUSH";
281 * Check whether a client is allowed to connect. Works with an authorization
282 * file which contains one line per machine, no wildcards.
284 * @param opts The client who's trying to connect.
285 * @return 0 - authorization refused, 1 - OK
287 int authorized_client(CLIENT *opts) {
288 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
293 struct in_addr client;
294 struct in_addr cltemp;
297 if ((f=fopen(opts->server->authname,"r"))==NULL) {
298 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
299 opts->server->authname,strerror(errno)) ;
303 inet_aton(opts->clientname, &client);
304 while (fgets(line,LINELEN,f)!=NULL) {
305 if((tmp=index(line, '/'))) {
306 if(strlen(line)<=tmp-line) {
307 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
311 if(!inet_aton(line,&addr)) {
312 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
315 len=strtol(tmp, NULL, 0);
316 addr.s_addr>>=32-len;
317 addr.s_addr<<=32-len;
318 memcpy(&cltemp,&client,sizeof(client));
319 cltemp.s_addr>>=32-len;
320 cltemp.s_addr<<=32-len;
321 if(addr.s_addr == cltemp.s_addr) {
325 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
335 * Read data from a file descriptor into a buffer
337 * @param f a file descriptor
338 * @param buf a buffer
339 * @param len the number of bytes to be read
341 static inline void readit(int f, void *buf, size_t len) {
345 if ((res = read(f, buf, len)) <= 0) {
346 if(errno != EAGAIN) {
347 err("Read failed: %m");
357 * Write data from a buffer into a filedescriptor
359 * @param f a file descriptor
360 * @param buf a buffer containing data
361 * @param len the number of bytes to be written
363 static inline void writeit(int f, void *buf, size_t len) {
367 if ((res = write(f, buf, len)) <= 0)
368 err("Send failed: %m");
375 * Print out a message about how to use nbd-server. Split out to a separate
376 * function so that we can call it from multiple places
379 printf("This is nbd-server version " VERSION "\n");
380 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
381 "\t-r|--read-only\t\tread only\n"
382 "\t-m|--multi-file\t\tmultiple file\n"
383 "\t-c|--copy-on-write\tcopy on write\n"
384 "\t-C|--config-file\tspecify an alternate configuration file\n"
385 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
386 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
387 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
388 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
389 "\tif port is set to 0, stdin is used (for running from inetd)\n"
390 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
391 "\t\taddress of the machine trying to connect\n"
392 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
393 printf("Using configuration file %s\n", CFILE);
396 /* Dumps a config file section of the given SERVER*, and exits. */
397 void dump_section(SERVER* serve, gchar* section_header) {
398 printf("[%s]\n", section_header);
399 printf("\texportname = %s\n", serve->exportname);
400 printf("\tlistenaddr = %s\n", serve->listenaddr);
401 printf("\tport = %d\n", serve->port);
402 if(serve->flags & F_READONLY) {
403 printf("\treadonly = true\n");
405 if(serve->flags & F_MULTIFILE) {
406 printf("\tmultifile = true\n");
408 if(serve->flags & F_COPYONWRITE) {
409 printf("\tcopyonwrite = true\n");
411 if(serve->expected_size) {
412 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
414 if(serve->authname) {
415 printf("\tauthfile = %s\n", serve->authname);
421 * Parse the command line.
423 * @param argc the argc argument to main()
424 * @param argv the argv argument to main()
426 SERVER* cmdline(int argc, char *argv[]) {
430 struct option long_options[] = {
431 {"read-only", no_argument, NULL, 'r'},
432 {"multi-file", no_argument, NULL, 'm'},
433 {"copy-on-write", no_argument, NULL, 'c'},
434 {"dont-fork", no_argument, NULL, 'd'},
435 {"authorize-file", required_argument, NULL, 'l'},
436 {"config-file", required_argument, NULL, 'C'},
437 {"pid-file", required_argument, NULL, 'p'},
438 {"output-config", required_argument, NULL, 'o'},
439 {"max-connection", required_argument, NULL, 'M'},
446 gboolean do_output=FALSE;
447 gchar* section_header="";
453 serve=g_new0(SERVER, 1);
454 serve->authname = g_strdup(default_authname);
455 serve->virtstyle=VIRT_IPLIT;
456 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
459 /* non-option argument */
460 switch(nonspecial++) {
462 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
463 addr_port=g_strsplit(optarg, ":", 2);
465 /* Check for "@" - maybe user using this separator
468 g_strfreev(addr_port);
469 addr_port=g_strsplit(optarg, "@", 2);
472 addr_port=g_strsplit(optarg, "@", 2);
476 serve->port=strtol(addr_port[1], NULL, 0);
477 serve->listenaddr=g_strdup(addr_port[0]);
479 serve->listenaddr=NULL;
480 serve->port=strtol(addr_port[0], NULL, 0);
482 g_strfreev(addr_port);
485 serve->exportname = g_strdup(optarg);
486 if(serve->exportname[0] != '/') {
487 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
492 last=strlen(optarg)-1;
494 if (suffix == 'k' || suffix == 'K' ||
495 suffix == 'm' || suffix == 'M')
497 es = (off_t)atoll(optarg);
505 serve->expected_size = es;
510 serve->flags |= F_READONLY;
513 serve->flags |= F_MULTIFILE;
517 section_header = g_strdup(optarg);
520 strncpy(pidftemplate, optarg, 256);
523 serve->flags |=F_COPYONWRITE;
529 g_free(config_file_pos);
530 config_file_pos=g_strdup(optarg);
533 g_free(serve->authname);
534 serve->authname=g_strdup(optarg);
537 serve->max_connections = strtol(optarg, NULL, 0);
545 /* What's left: the port to export, the name of the to be exported
546 * file, and, optionally, the size of the file, in that order. */
555 g_critical("Need a complete configuration on the command line to output a config file section!");
558 dump_section(serve, section_header);
564 * Error codes for config file parsing
567 CFILE_NOTFOUND, /**< The configuration file is not found */
568 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
569 CFILE_KEY_MISSING, /**< A (required) key is missing */
570 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
571 CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
572 CFILE_PROGERR, /**< Programmer error */
573 CFILE_NO_EXPORTS, /**< A config file was specified that does not
574 define any exports */
575 CFILE_INCORRECT_PORT, /**< The reserved port was specified for an
580 * Remove a SERVER from memory. Used from the hash table
582 void remove_server(gpointer s) {
586 g_free(server->exportname);
588 g_free(server->authname);
589 if(server->listenaddr)
590 g_free(server->listenaddr);
592 g_free(server->prerun);
594 g_free(server->postrun);
595 if(server->transactionlog)
596 g_free(server->transactionlog);
602 * @param s the old server we want to duplicate
603 * @return new duplicated server
605 SERVER* dup_serve(SERVER *s) {
606 SERVER *serve = NULL;
608 serve=g_new0(SERVER, 1);
613 serve->exportname = g_strdup(s->exportname);
615 serve->expected_size = s->expected_size;
618 serve->listenaddr = g_strdup(s->listenaddr);
620 serve->port = s->port;
623 serve->authname = strdup(s->authname);
625 serve->flags = s->flags;
626 serve->socket = s->socket;
627 serve->socket_family = s->socket_family;
628 serve->virtstyle = s->virtstyle;
629 serve->cidrlen = s->cidrlen;
632 serve->prerun = g_strdup(s->prerun);
635 serve->postrun = g_strdup(s->postrun);
637 if(s->transactionlog)
638 serve->transactionlog = g_strdup(s->transactionlog);
641 serve->servename = g_strdup(s->servename);
643 serve->max_connections = s->max_connections;
649 * append new server to array
651 * @param a server array
652 * @return 0 success, -1 error
654 int append_serve(SERVER *s, GArray *a) {
656 struct addrinfo hints;
657 struct addrinfo *ai = NULL;
658 struct addrinfo *rp = NULL;
659 char host[NI_MAXHOST];
665 err("Invalid parsing server");
669 port = g_strdup_printf("%d", s->port);
671 memset(&hints,'\0',sizeof(hints));
672 hints.ai_family = AF_UNSPEC;
673 hints.ai_socktype = SOCK_STREAM;
674 hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
675 hints.ai_protocol = IPPROTO_TCP;
677 e = getaddrinfo(s->listenaddr, port, &hints, &ai);
683 for (rp = ai; rp != NULL; rp = rp->ai_next) {
684 e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
686 if (e != 0) { // error
687 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
691 // duplicate server and set listenaddr to resolved IP address
694 ns->listenaddr = g_strdup(host);
695 ns->socket_family = rp->ai_family;
696 g_array_append_val(a, *ns);
704 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
715 * Parse the config file.
717 * @param f the name of the config file
718 * @param e a GError. @see CFILE_ERRORS for what error values this function can
720 * @return a Array of SERVER* pointers, If the config file is empty or does not
721 * exist, returns an empty GHashTable; if the config file contains an
722 * error, returns NULL, and e is set appropriately
724 GArray* parse_cfile(gchar* f, GError** e) {
725 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
726 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
728 gchar *virtstyle=NULL;
730 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
731 { "port", TRUE, PARAM_INT, &(s.port), 0 },
732 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
733 { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
734 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
735 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
736 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
737 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
738 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
739 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
740 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
741 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
742 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
743 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
744 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
745 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
746 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
747 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
748 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
750 const int lp_size=sizeof(lp)/sizeof(PARAM);
752 { "user", FALSE, PARAM_STRING, &runuser, 0 },
753 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
754 { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
755 { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
758 int p_size=sizeof(gp)/sizeof(PARAM);
761 const char *err_msg=NULL;
770 errdomain = g_quark_from_string("parse_cfile");
771 cfile = g_key_file_new();
772 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
773 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
774 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
775 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
776 g_key_file_free(cfile);
779 startgroup = g_key_file_get_start_group(cfile);
780 if(!startgroup || strcmp(startgroup, "generic")) {
781 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
782 g_key_file_free(cfile);
785 groups = g_key_file_get_groups(cfile, NULL);
786 for(i=0;groups[i];i++) {
787 memset(&s, '\0', sizeof(SERVER));
789 /* After the [generic] group, start parsing exports */
794 for(j=0;j<p_size;j++) {
795 g_assert(p[j].target != NULL);
796 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
799 *((gint*)p[j].target) =
800 g_key_file_get_integer(cfile,
806 *((gchar**)p[j].target) =
807 g_key_file_get_string(cfile,
813 value = g_key_file_get_boolean(cfile,
815 p[j].paramname, &err);
818 *((gint*)p[j].target) |= p[j].flagval;
820 *((gint*)p[j].target) &= ~(p[j].flagval);
825 if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
826 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
827 g_key_file_free(cfile);
831 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
833 /* Ignore not-found error for optional values */
837 err_msg = MISSING_REQUIRED_ERROR;
840 err_msg = DEFAULT_ERROR;
842 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
843 g_array_free(retval, TRUE);
845 g_key_file_free(cfile);
850 if(!strncmp(virtstyle, "none", 4)) {
851 s.virtstyle=VIRT_NONE;
852 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
853 s.virtstyle=VIRT_IPLIT;
854 } else if(!strncmp(virtstyle, "iphash", 6)) {
855 s.virtstyle=VIRT_IPHASH;
856 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
857 s.virtstyle=VIRT_CIDR;
858 if(strlen(virtstyle)<10) {
859 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
860 g_array_free(retval, TRUE);
861 g_key_file_free(cfile);
864 s.cidrlen=strtol(virtstyle+8, NULL, 0);
866 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
867 g_array_free(retval, TRUE);
868 g_key_file_free(cfile);
871 if(s.port && !do_oldstyle) {
872 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
873 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
876 s.virtstyle=VIRT_IPLIT;
878 /* Don't need to free this, it's not our string */
880 /* Don't append values for the [generic] group */
882 s.socket_family = AF_UNSPEC;
883 s.servename = groups[i];
885 append_serve(&s, retval);
892 if(s.flags & F_SDP) {
893 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
894 g_array_free(retval, TRUE);
895 g_key_file_free(cfile);
901 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
903 g_key_file_free(cfile);
908 * Signal handler for SIGCHLD
909 * @param s the signal we're handling (must be SIGCHLD, or something
912 void sigchld_handler(int s) {
917 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
918 if(WIFEXITED(status)) {
919 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
921 i=g_hash_table_lookup(children, &pid);
923 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
925 DEBUG("Removing %d from the list of children", pid);
926 g_hash_table_remove(children, &pid);
932 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
935 * @param value the value corresponding to the above key
936 * @param user_data a pointer which we always set to 1, so that we know what
939 void killchild(gpointer key, gpointer value, gpointer user_data) {
941 int *parent=user_data;
948 * Handle SIGTERM and dispatch it to our children
949 * @param s the signal we're handling (must be SIGTERM, or something
950 * is severely wrong).
952 void sigterm_handler(int s) {
955 g_hash_table_foreach(children, killchild, &parent);
965 * Detect the size of a file.
967 * @param fhandle An open filedescriptor
968 * @return the size of the file, or OFFT_MAX if detection was
971 off_t size_autodetect(int fhandle) {
974 struct stat stat_buf;
977 #ifdef HAVE_SYS_MOUNT_H
978 #ifdef HAVE_SYS_IOCTL_H
980 DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
981 if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
984 #endif /* BLKGETSIZE64 */
985 #endif /* HAVE_SYS_IOCTL_H */
986 #endif /* HAVE_SYS_MOUNT_H */
988 DEBUG("looking for fhandle size with fstat\n");
989 stat_buf.st_size = 0;
990 error = fstat(fhandle, &stat_buf);
992 if(stat_buf.st_size > 0)
993 return (off_t)stat_buf.st_size;
995 err("fstat failed: %m");
998 DEBUG("looking for fhandle size with lseek SEEK_END\n");
999 es = lseek(fhandle, (off_t)0, SEEK_END);
1000 if (es > ((off_t)0)) {
1003 DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
1006 err("Could not find size of exported block device: %m");
1011 * Get the file handle and offset, given an export offset.
1013 * @param export An array of export files
1014 * @param a The offset to get corresponding file/offset for
1015 * @param fhandle [out] File descriptor
1016 * @param foffset [out] Offset into fhandle
1017 * @param maxbytes [out] Tells how many bytes can be read/written
1018 * from fhandle starting at foffset (0 if there is no limit)
1019 * @return 0 on success, -1 on failure
1021 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1022 /* Negative offset not allowed */
1026 /* Binary search for last file with starting offset <= a */
1029 int end = export->len - 1;
1030 while( start <= end ) {
1031 int mid = (start + end) / 2;
1032 fi = g_array_index(export, FILE_INFO, mid);
1033 if( fi.startoff < a ) {
1035 } else if( fi.startoff > a ) {
1043 /* end should never go negative, since first startoff is 0 and a >= 0 */
1046 fi = g_array_index(export, FILE_INFO, end);
1047 *fhandle = fi.fhandle;
1048 *foffset = a - fi.startoff;
1050 if( end+1 < export->len ) {
1051 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1052 *maxbytes = fi_next.startoff - a;
1059 * seek to a position in a file, with error handling.
1060 * @param handle a filedescriptor
1061 * @param a position to seek to
1062 * @todo get rid of this; lastpoint is a global variable right now, but it
1063 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1066 void myseek(int handle,off_t a) {
1067 if (lseek(handle, a, SEEK_SET) < 0) {
1068 err("Can not seek locally!\n");
1073 * Write an amount of bytes at a given offset to the right file. This
1074 * abstracts the write-side of the multiple file option.
1076 * @param a The offset where the write should start
1077 * @param buf The buffer to write from
1078 * @param len The length of buf
1079 * @param client The client we're serving for
1080 * @return The number of bytes actually written, or -1 in case of an error
1082 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1088 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1090 if(maxbytes && len > maxbytes)
1093 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1095 myseek(fhandle, foffset);
1096 retval = write(fhandle, buf, len);
1097 if(client->server->flags & F_SYNC) {
1101 /* This is where we would do the following
1102 * #ifdef USE_SYNC_FILE_RANGE
1103 * However, we don't, for the reasons set out below
1104 * by Christoph Hellwig <hch@infradead.org>
1107 * fdatasync is equivalent to fsync except that it does not flush
1108 * non-essential metadata (basically just timestamps in practice), but it
1109 * does flush metadata requried to find the data again, e.g. allocation
1110 * information and extent maps. sync_file_range does nothing but flush
1111 * out pagecache content - it means you basically won't get your data
1112 * back in case of a crash if you either:
1114 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1115 * b) are using a sparse file on a filesystem
1116 * c) are using a fallocate-preallocated file on a filesystem
1117 * d) use any file on a COW filesystem like btrfs
1119 * e.g. it only does anything useful for you if you do not have a volatile
1120 * write cache, and either use a raw block device node, or just overwrite
1121 * an already fully allocated (and not preallocated) file on a non-COW
1125 * What we should do is open a second FD with O_DSYNC set, then write to
1126 * that when appropriate. However, with a Linux client, every REQ_FUA
1127 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1132 sync_file_range(fhandle, foffset, len,
1133 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1134 SYNC_FILE_RANGE_WAIT_AFTER);
1143 * Call rawexpwrite repeatedly until all data has been written.
1144 * @return 0 on success, nonzero on failure
1146 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1149 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1154 return (ret < 0 || len != 0);
1158 * Read an amount of bytes at a given offset from the right file. This
1159 * abstracts the read-side of the multiple files option.
1161 * @param a The offset where the read should start
1162 * @param buf A buffer to read into
1163 * @param len The size of buf
1164 * @param client The client we're serving for
1165 * @return The number of bytes actually read, or -1 in case of an
1168 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1173 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1175 if(maxbytes && len > maxbytes)
1178 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1180 myseek(fhandle, foffset);
1181 return read(fhandle, buf, len);
1185 * Call rawexpread repeatedly until all data has been read.
1186 * @return 0 on success, nonzero on failure
1188 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1191 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1196 return (ret < 0 || len != 0);
1200 * Read an amount of bytes at a given offset from the right file. This
1201 * abstracts the read-side of the copyonwrite stuff, and calls
1202 * rawexpread() with the right parameters to do the actual work.
1203 * @param a The offset where the read should start
1204 * @param buf A buffer to read into
1205 * @param len The size of buf
1206 * @param client The client we're going to read for
1207 * @return 0 on success, nonzero on failure
1209 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1210 off_t rdlen, offset;
1211 off_t mapcnt, mapl, maph, pagestart;
1213 if (!(client->server->flags & F_COPYONWRITE))
1214 return(rawexpread_fully(a, buf, len, client));
1215 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1217 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1219 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1220 pagestart=mapcnt*DIFFPAGESIZE;
1222 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1223 len : (size_t)DIFFPAGESIZE-offset;
1224 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1225 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1226 (unsigned long)(client->difmap[mapcnt]));
1227 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1228 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1229 } else { /* the block is not there */
1230 DEBUG("Page %llu is not here, we read the original one\n",
1231 (unsigned long long)mapcnt);
1232 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1234 len-=rdlen; a+=rdlen; buf+=rdlen;
1240 * Write an amount of bytes at a given offset to the right file. This
1241 * abstracts the write-side of the copyonwrite option, and calls
1242 * rawexpwrite() with the right parameters to do the actual work.
1244 * @param a The offset where the write should start
1245 * @param buf The buffer to write from
1246 * @param len The length of buf
1247 * @param client The client we're going to write for.
1248 * @return 0 on success, nonzero on failure
1250 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1251 char pagebuf[DIFFPAGESIZE];
1252 off_t mapcnt,mapl,maph;
1257 if (!(client->server->flags & F_COPYONWRITE))
1258 return(rawexpwrite_fully(a, buf, len, client, fua));
1259 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1261 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1263 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1264 pagestart=mapcnt*DIFFPAGESIZE ;
1265 offset=a-pagestart ;
1266 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1267 len : (size_t)DIFFPAGESIZE-offset;
1269 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1270 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1271 (unsigned long)(client->difmap[mapcnt])) ;
1272 myseek(client->difffile,
1273 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1274 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1275 } else { /* the block is not there */
1276 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1277 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1278 DEBUG("Page %llu is not here, we put it at %lu\n",
1279 (unsigned long long)mapcnt,
1280 (unsigned long)(client->difmap[mapcnt]));
1281 rdlen=DIFFPAGESIZE ;
1282 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1284 memcpy(pagebuf+offset,buf,wrlen) ;
1285 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1289 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1291 if (client->server->flags & F_SYNC) {
1292 fsync(client->difffile);
1294 /* open question: would it be cheaper to do multiple sync_file_ranges?
1295 as we iterate through the above?
1297 fdatasync(client->difffile);
1302 int expflush(CLIENT *client) {
1305 if (client->server->flags & F_COPYONWRITE) {
1306 return fsync(client->difffile);
1309 for (i = 0; i < client->export->len; i++) {
1310 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1311 if (fsync(fi.fhandle) < 0)
1319 * Do the initial negotiation.
1321 * @param client The client we're negotiating with.
1323 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1326 uint32_t flags = NBD_FLAG_HAS_FLAGS;
1327 uint16_t smallflags = 0;
1330 memset(zeros, '\0', sizeof(zeros));
1331 if(!client || !client->modern) {
1333 if (write(net, INIT_PASSWD, 8) < 0) {
1334 err_nonfatal("Negotiation failed: %m");
1338 if(!client || client->modern) {
1340 magic = htonll(opts_magic);
1343 magic = htonll(cliserv_magic);
1345 if (write(net, &magic, sizeof(magic)) < 0) {
1346 err_nonfatal("Negotiation failed: %m");
1360 err("programmer error");
1361 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1362 err("Negotiation failed: %m");
1363 if (read(net, &reserved, sizeof(reserved)) < 0)
1364 err("Negotiation failed: %m");
1365 if (read(net, &magic, sizeof(magic)) < 0)
1366 err("Negotiation failed: %m");
1367 magic = ntohll(magic);
1368 if(magic != opts_magic) {
1372 if (read(net, &opt, sizeof(opt)) < 0)
1373 err("Negotiation failed: %m");
1375 if(opt != NBD_OPT_EXPORT_NAME) {
1379 if (read(net, &namelen, sizeof(namelen)) < 0)
1380 err("Negotiation failed: %m");
1381 namelen = ntohl(namelen);
1382 name = malloc(namelen+1);
1384 if (read(net, name, namelen) < 0)
1385 err("Negotiation failed: %m");
1386 for(i=0; i<servers->len; i++) {
1387 SERVER* serve = &(g_array_index(servers, SERVER, i));
1388 if(!strcmp(serve->servename, name)) {
1389 CLIENT* client = g_new0(CLIENT, 1);
1390 client->server = serve;
1391 client->exportsize = OFFT_MAX;
1393 client->modern = TRUE;
1394 client->transactionlogfd = -1;
1403 size_host = htonll((u64)(client->exportsize));
1404 if (write(net, &size_host, 8) < 0)
1405 err("Negotiation failed: %m");
1406 if (client->server->flags & F_READONLY)
1407 flags |= NBD_FLAG_READ_ONLY;
1408 if (client->server->flags & F_FLUSH)
1409 flags |= NBD_FLAG_SEND_FLUSH;
1410 if (client->server->flags & F_FUA)
1411 flags |= NBD_FLAG_SEND_FUA;
1412 if (client->server->flags & F_ROTATIONAL)
1413 flags |= NBD_FLAG_ROTATIONAL;
1414 if (!client->modern) {
1416 flags = htonl(flags);
1417 if (write(client->net, &flags, 4) < 0)
1418 err("Negotiation failed: %m");
1421 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1422 smallflags = htons(smallflags);
1423 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1424 err("Negotiation failed: %m");
1428 if (write(client->net, zeros, 124) < 0)
1429 err("Negotiation failed: %m");
1433 /** sending macro. */
1434 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1435 if (client->transactionlogfd != -1) \
1436 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1438 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1440 * Serve a file to a single client.
1442 * @todo This beast needs to be split up in many tiny little manageable
1443 * pieces. Preferably with a chainsaw.
1445 * @param client The client we're going to serve to.
1446 * @return when the client disconnects
1448 int mainloop(CLIENT *client) {
1449 struct nbd_request request;
1450 struct nbd_reply reply;
1451 gboolean go_on=TRUE;
1455 negotiate(client->net, client, NULL);
1456 DEBUG("Entering request loop!\n");
1457 reply.magic = htonl(NBD_REPLY_MAGIC);
1470 readit(client->net, &request, sizeof(request));
1471 if (client->transactionlogfd != -1)
1472 writeit(client->transactionlogfd, &request, sizeof(request));
1474 request.from = ntohll(request.from);
1475 request.type = ntohl(request.type);
1476 command = request.type & NBD_CMD_MASK_COMMAND;
1477 len = ntohl(request.len);
1479 DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
1480 (unsigned long long)request.from,
1481 (unsigned long long)request.from / 512, (unsigned int)len);
1483 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1484 err("Not enough magic.");
1486 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1488 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1489 if ((request.from + len) > (OFFT_MAX)) {
1490 DEBUG("[Number too large!]");
1491 ERROR(client, reply, EINVAL);
1495 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1497 ERROR(client, reply, EINVAL);
1502 if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
1503 currlen = BUFSIZE - sizeof(struct nbd_reply);
1504 msg2(LOG_INFO, "oversized request (this is not a problem)");
1511 msg2(LOG_INFO, "Disconnect request received.");
1512 if (client->server->flags & F_COPYONWRITE) {
1513 if (client->difmap) g_free(client->difmap) ;
1514 close(client->difffile);
1515 unlink(client->difffilename);
1516 free(client->difffilename);
1522 DEBUG("wr: net->buf, ");
1524 readit(client->net, buf, currlen);
1525 DEBUG("buf->exp, ");
1526 if ((client->server->flags & F_READONLY) ||
1527 (client->server->flags & F_AUTOREADONLY)) {
1528 DEBUG("[WRITE to READONLY!]");
1529 ERROR(client, reply, EPERM);
1532 if (expwrite(request.from, buf, len, client,
1533 request.type & NBD_CMD_FLAG_FUA)) {
1534 DEBUG("Write failed: %m" );
1535 ERROR(client, reply, errno);
1539 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1541 SEND(client->net, reply);
1547 if (expflush(client)) {
1548 DEBUG("Flush failed: %m");
1549 ERROR(client, reply, errno);
1552 SEND(client->net, reply);
1557 DEBUG("exp->buf, ");
1558 memcpy(buf, &reply, sizeof(struct nbd_reply));
1559 if (client->transactionlogfd != -1)
1560 writeit(client->transactionlogfd, &reply, sizeof(reply));
1561 p = buf + sizeof(struct nbd_reply);
1562 writelen = currlen + sizeof(struct nbd_reply);
1564 if (expread(request.from, p, currlen, client)) {
1565 DEBUG("Read failed: %m");
1566 ERROR(client, reply, errno);
1570 DEBUG("buf->net, ");
1571 writeit(client->net, buf, writelen);
1573 request.from += currlen;
1574 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1582 DEBUG ("Ignoring unknown command\n");
1590 * Set up client export array, which is an array of FILE_INFO.
1591 * Also, split a single exportfile into multiple ones, if that was asked.
1592 * @param client information on the client which we want to setup export for
1594 void setupexport(CLIENT* client) {
1596 off_t laststartoff = 0, lastsize = 0;
1597 int multifile = (client->server->flags & F_MULTIFILE);
1599 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1601 /* If multi-file, open as many files as we can.
1602 * If not, open exactly one file.
1603 * Calculate file sizes as we go to get total size. */
1607 gchar* error_string;
1608 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1611 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1613 tmpname=g_strdup(client->exportname);
1615 DEBUG( "Opening %s\n", tmpname );
1616 fi.fhandle = open(tmpname, mode);
1617 if(fi.fhandle == -1 && mode == O_RDWR) {
1618 /* Try again because maybe media was read-only */
1619 fi.fhandle = open(tmpname, O_RDONLY);
1620 if(fi.fhandle != -1) {
1621 /* Opening the base file in copyonwrite mode is
1623 if(!(client->server->flags & F_COPYONWRITE)) {
1624 client->server->flags |= F_AUTOREADONLY;
1625 client->server->flags |= F_READONLY;
1629 if(fi.fhandle == -1) {
1630 if(multifile && i>0)
1632 error_string=g_strdup_printf(
1633 "Could not open exported file %s: %%m",
1637 fi.startoff = laststartoff + lastsize;
1638 g_array_append_val(client->export, fi);
1641 /* Starting offset and size of this file will be used to
1642 * calculate starting offset of next file */
1643 laststartoff = fi.startoff;
1644 lastsize = size_autodetect(fi.fhandle);
1650 /* Set export size to total calculated size */
1651 client->exportsize = laststartoff + lastsize;
1653 /* Export size may be overridden */
1654 if(client->server->expected_size) {
1655 /* desired size must be <= total calculated size */
1656 if(client->server->expected_size > client->exportsize) {
1657 err("Size of exported file is too big\n");
1660 client->exportsize = client->server->expected_size;
1663 msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1665 msg3(LOG_INFO, "Total number of files: %d", i);
1669 int copyonwrite_prepare(CLIENT* client) {
1671 if ((client->difffilename = malloc(1024))==NULL)
1672 err("Failed to allocate string for diff file name");
1673 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1675 client->difffilename[1023]='\0';
1676 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1677 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1678 if (client->difffile<0) err("Could not create diff file (%m)") ;
1679 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1680 err("Could not allocate memory") ;
1681 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1687 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1690 * @param command the command to be ran. Read from the config file
1691 * @param file the file name we're about to export
1693 int do_run(gchar* command, gchar* file) {
1697 if(command && *command) {
1698 cmd = g_strdup_printf(command, file);
1706 * Serve a connection.
1708 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1709 * follow the road map.
1711 * @param client a connected client
1713 void serveconnection(CLIENT *client) {
1714 if (client->server->transactionlog && (client->transactionlogfd == -1))
1716 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1718 S_IRUSR | S_IWUSR)))
1719 g_warning("Could not open transaction log %s",
1720 client->server->transactionlog);
1723 if(do_run(client->server->prerun, client->exportname)) {
1726 setupexport(client);
1728 if (client->server->flags & F_COPYONWRITE) {
1729 copyonwrite_prepare(client);
1732 setmysockopt(client->net);
1735 do_run(client->server->postrun, client->exportname);
1737 if (-1 != client->transactionlogfd)
1739 close(client->transactionlogfd);
1740 client->transactionlogfd = -1;
1745 * Find the name of the file we have to serve. This will use g_strdup_printf
1746 * to put the IP address of the client inside a filename containing
1747 * "%s" (in the form as specified by the "virtstyle" option). That name
1748 * is then written to client->exportname.
1750 * @param net A socket connected to an nbd client
1751 * @param client information about the client. The IP address in human-readable
1752 * format will be written to a new char* buffer, the address of which will be
1753 * stored in client->clientname.
1755 void set_peername(int net, CLIENT *client) {
1756 struct sockaddr_storage addrin;
1757 struct sockaddr_storage netaddr;
1758 struct sockaddr_in *netaddr4 = NULL;
1759 struct sockaddr_in6 *netaddr6 = NULL;
1760 size_t addrinlen = sizeof( addrin );
1761 struct addrinfo hints;
1762 struct addrinfo *ai = NULL;
1763 char peername[NI_MAXHOST];
1764 char netname[NI_MAXHOST];
1770 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1771 err("getsockname failed: %m");
1773 getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1774 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1776 memset(&hints, '\0', sizeof (hints));
1777 hints.ai_flags = AI_ADDRCONFIG;
1778 e = getaddrinfo(peername, NULL, &hints, &ai);
1781 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1786 switch(client->server->virtstyle) {
1788 client->exportname=g_strdup(client->server->exportname);
1791 for(i=0;i<strlen(peername);i++) {
1792 if(peername[i]=='.') {
1797 client->exportname=g_strdup_printf(client->server->exportname, peername);
1800 memcpy(&netaddr, &addrin, addrinlen);
1801 if(ai->ai_family == AF_INET) {
1802 netaddr4 = (struct sockaddr_in *)&netaddr;
1803 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1804 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1806 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1807 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1808 tmp=g_strdup_printf("%s/%s", netname, peername);
1809 }else if(ai->ai_family == AF_INET6) {
1810 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1812 shift = 128-(client->server->cidrlen);
1814 while(shift >= 32) {
1815 ((netaddr6->sin6_addr).s6_addr32[i])=0;
1819 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1820 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1822 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1823 netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1824 tmp=g_strdup_printf("%s/%s", netname, peername);
1828 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1834 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1835 peername, client->exportname);
1836 client->clientname=g_strdup(peername);
1841 * @param data a pointer to pid_t which should be freed
1843 void destroy_pid_t(gpointer data) {
1848 * Loop through the available servers, and serve them. Never returns.
1850 int serveloop(GArray* servers) {
1851 struct sockaddr_storage addrin;
1852 socklen_t addrinlen=sizeof(addrin);
1860 * Set up the master fd_set. The set of descriptors we need
1861 * to select() for never changes anyway and it buys us a *lot*
1862 * of time to only build this once. However, if we ever choose
1863 * to not fork() for clients anymore, we may have to revisit
1868 for(i=0;i<servers->len;i++) {
1869 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1870 FD_SET(sock, &mset);
1871 max=sock>max?sock:max;
1875 FD_SET(modernsock, &mset);
1876 max=modernsock>max?modernsock:max;
1879 CLIENT *client = NULL;
1882 memcpy(&rset, &mset, sizeof(fd_set));
1883 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1888 if(FD_ISSET(modernsock, &rset)) {
1889 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1891 client = negotiate(net, NULL, servers);
1893 err_nonfatal("negotiation failed");
1898 serve = client->server;
1900 for(i=0;i<servers->len && !net;i++) {
1901 serve=&(g_array_index(servers, SERVER, i));
1902 if(FD_ISSET(serve->socket, &rset)) {
1903 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1910 if(serve->max_connections > 0 &&
1911 g_hash_table_size(children) >= serve->max_connections) {
1912 msg2(LOG_INFO, "Max connections reached");
1916 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1917 err("fcntl F_GETFL");
1919 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1920 err("fcntl F_SETFL ~O_NONBLOCK");
1923 client = g_new0(CLIENT, 1);
1924 client->server=serve;
1925 client->exportsize=OFFT_MAX;
1927 client->transactionlogfd = -1;
1929 set_peername(net, client);
1930 if (!authorized_client(client)) {
1931 msg2(LOG_INFO,"Unauthorized client") ;
1935 msg2(LOG_INFO,"Authorized client") ;
1936 pid=g_malloc(sizeof(pid_t));
1939 if ((*pid=fork())<0) {
1940 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1944 if (*pid>0) { /* parent */
1946 g_hash_table_insert(children, pid, pid);
1950 g_hash_table_destroy(children);
1951 for(i=0;i<servers->len;i++) {
1952 serve=&g_array_index(servers, SERVER, i);
1953 close(serve->socket);
1955 /* FALSE does not free the
1956 actual data. This is required,
1957 because the client has a
1958 direct reference into that
1959 data, and otherwise we get a
1961 g_array_free(servers, FALSE);
1964 msg2(LOG_INFO,"Starting to serve");
1965 serveconnection(client);
1972 void dosockopts(int socket) {
1980 /* lose the pesky "Address already in use" error message */
1981 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1982 err("setsockopt SO_REUSEADDR");
1984 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1985 err("setsockopt SO_KEEPALIVE");
1988 /* make the listening socket non-blocking */
1989 if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
1990 err("fcntl F_GETFL");
1992 if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1993 err("fcntl F_SETFL O_NONBLOCK");
1998 * Connect a server's socket.
2000 * @param serve the server we want to connect.
2002 int setup_serve(SERVER *serve) {
2003 struct addrinfo hints;
2004 struct addrinfo *ai = NULL;
2009 return serve->servename ? 1 : 0;
2011 memset(&hints,'\0',sizeof(hints));
2012 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2013 hints.ai_socktype = SOCK_STREAM;
2014 hints.ai_family = serve->socket_family;
2016 port = g_strdup_printf ("%d", serve->port);
2020 e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2025 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2031 if(serve->socket_family == AF_UNSPEC)
2032 serve->socket_family = ai->ai_family;
2035 if ((serve->flags) && F_SDP) {
2036 if (ai->ai_family == AF_INET)
2037 ai->ai_family = AF_INET_SDP;
2038 else (ai->ai_family == AF_INET6)
2039 ai->ai_family = AF_INET6_SDP;
2042 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
2045 dosockopts(serve->socket);
2047 DEBUG("Waiting for connections... bind, ");
2048 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2049 if (e != 0 && errno != EADDRINUSE)
2052 if (listen(serve->socket, 1) < 0)
2056 if(serve->servename) {
2063 void open_modern(void) {
2064 struct addrinfo hints;
2065 struct addrinfo* ai = NULL;
2069 memset(&hints, '\0', sizeof(hints));
2070 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2071 hints.ai_socktype = SOCK_STREAM;
2072 hints.ai_family = AF_UNSPEC;
2073 hints.ai_protocol = IPPROTO_TCP;
2074 e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
2076 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2079 if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2083 dosockopts(modernsock);
2085 if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2088 if(listen(modernsock, 10) <0) {
2096 * Connect our servers.
2098 void setup_servers(GArray* servers) {
2100 struct sigaction sa;
2103 for(i=0;i<servers->len;i++) {
2104 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2109 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2111 sa.sa_handler = sigchld_handler;
2112 sigemptyset(&sa.sa_mask);
2113 sa.sa_flags = SA_RESTART;
2114 if(sigaction(SIGCHLD, &sa, NULL) == -1)
2115 err("sigaction: %m");
2116 sa.sa_handler = sigterm_handler;
2117 sigemptyset(&sa.sa_mask);
2118 sa.sa_flags = SA_RESTART;
2119 if(sigaction(SIGTERM, &sa, NULL) == -1)
2120 err("sigaction: %m");
2124 * Go daemon (unless we specified at compile time that we didn't want this)
2125 * @param serve the first server of our configuration. If its port is zero,
2126 * then do not daemonize, because we're doing inetd then. This parameter
2127 * is only used to create a PID file of the form
2128 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
2130 #if !defined(NODAEMON)
2131 void daemonize(SERVER* serve) {
2134 if(serve && !(serve->port)) {
2140 if(!*pidftemplate) {
2142 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2144 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2147 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2148 pidf=fopen(pidfname, "w");
2150 fprintf(pidf,"%d\n", (int)getpid());
2154 fprintf(stderr, "Not fatal; continuing");
2158 #define daemonize(serve)
2159 #endif /* !defined(NODAEMON) */
2162 * Everything beyond this point (in the file) is run in non-daemon mode.
2163 * The stuff above daemonize() isn't.
2166 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2168 void serve_err(SERVER* serve, const char* msg) {
2169 g_message("Export of %s on port %d failed:", serve->exportname,
2175 * Set up user-ID and/or group-ID
2177 void dousers(void) {
2182 gr=getgrnam(rungroup);
2184 str = g_strdup_printf("Invalid group name: %s", rungroup);
2187 if(setgid(gr->gr_gid)<0) {
2188 err("Could not set GID: %m");
2192 pw=getpwnam(runuser);
2194 str = g_strdup_printf("Invalid user name: %s", runuser);
2197 if(setuid(pw->pw_uid)<0) {
2198 err("Could not set UID: %m");
2204 void glib_message_syslog_redirect(const gchar *log_domain,
2205 GLogLevelFlags log_level,
2206 const gchar *message,
2209 int level=LOG_DEBUG;
2213 case G_LOG_FLAG_FATAL:
2214 case G_LOG_LEVEL_CRITICAL:
2215 case G_LOG_LEVEL_ERROR:
2218 case G_LOG_LEVEL_WARNING:
2221 case G_LOG_LEVEL_MESSAGE:
2222 case G_LOG_LEVEL_INFO:
2225 case G_LOG_LEVEL_DEBUG:
2230 syslog(level, "%s", message);
2235 * Main entry point...
2237 int main(int argc, char *argv[]) {
2242 if (sizeof( struct nbd_request )!=28) {
2243 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2244 exit(EXIT_FAILURE) ;
2247 memset(pidftemplate, '\0', 256);
2250 config_file_pos = g_strdup(CFILE);
2251 serve=cmdline(argc, argv);
2252 servers = parse_cfile(config_file_pos, &err);
2255 serve->socket_family = AF_UNSPEC;
2257 append_serve(serve, servers);
2259 if (!(serve->port)) {
2262 /* You really should define ISSERVER if you're going to use
2263 * inetd mode, but if you don't, closing stdout and stderr
2264 * (which inetd had connected to the client socket) will let it
2268 open("/dev/null", O_WRONLY);
2269 open("/dev/null", O_WRONLY);
2270 g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2272 client=g_malloc(sizeof(CLIENT));
2273 client->server=serve;
2275 client->exportsize=OFFT_MAX;
2276 set_peername(0,client);
2277 serveconnection(client);
2282 if(!servers || !servers->len) {
2283 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2284 && err->code == CFILE_NOTFOUND)) {
2285 g_warning("Could not parse config file: %s",
2286 err ? err->message : "Unknown error");
2290 g_warning("Specifying an export on the command line is deprecated.");
2291 g_warning("Please use a configuration file instead.");
2294 if((!serve) && (!servers||!servers->len)) {
2295 g_message("No configured exports; quitting.");
2300 setup_servers(servers);