2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <sys/select.h> /* select */
66 #include <sys/wait.h> /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h> /* For BLKGETSIZE */
74 #include <signal.h> /* sigaction */
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
85 #include <arpa/inet.h>
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
100 #include <sdp_inet.h>
103 /** Default position of the config file */
105 #define SYSCONFDIR "/etc"
107 #define CFILE SYSCONFDIR "/nbd-server/config"
109 /** Where our config file actually is */
110 gchar* config_file_pos;
112 /** What user we're running as */
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
119 /* Whether we should avoid forking */
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
133 /* Debugging macros */
136 #define DEBUG( a ) printf( a )
137 #define DEBUG2( a,b ) printf( a,b )
138 #define DEBUG3( a,b,c ) printf( a,b,c )
139 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
140 #define DEBUG5( a,b,c,d,e ) printf( a,b,c,d,e )
143 #define DEBUG2( a,b )
144 #define DEBUG3( a,b,c )
145 #define DEBUG4( a,b,c,d )
146 #define DEBUG5( a,b,c,d,e )
148 #ifndef PACKAGE_VERSION
149 #define PACKAGE_VERSION ""
152 * The highest value a variable of type off_t can reach. This is a signed
153 * integer, so set all bits except for the leftmost one.
155 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
156 #define LINELEN 256 /**< Size of static buffer used to read the
157 authorization file (yuck) */
158 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
159 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
160 #define F_READONLY 1 /**< flag to tell us a file is readonly */
161 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
162 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
164 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
165 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
166 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
167 #define F_SYNC 64 /**< Whether to fsync() after a write */
168 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
169 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
170 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
171 GHashTable *children;
172 char pidfname[256]; /**< name of our PID file */
173 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
174 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
176 int modernsock=0; /**< Socket for the modern handler. Not used
177 if a client was only specified on the
178 command line; only port used if
179 oldstyle is set to false (and then the
180 command-line client isn't used, gna gna) */
181 char* modern_listen; /**< listenaddr value for modernsock */
184 * Types of virtuatlization
187 VIRT_NONE=0, /**< No virtualization */
188 VIRT_IPLIT, /**< Literal IP address as part of the filename */
189 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
190 doing the same as in IPLIT */
191 VIRT_CIDR, /**< Every subnet in its own directory */
195 * Variables associated with a server.
198 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
199 off_t expected_size; /**< size of the exported file as it was told to
200 us through configuration */
201 gchar* listenaddr; /**< The IP address we're listening on */
202 unsigned int port; /**< port we're exporting this file at */
203 char* authname; /**< filename of the authorization file */
204 int flags; /**< flags associated with this exported file */
205 int socket; /**< The socket of this server. */
206 int socket_family; /**< family of the socket */
207 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
208 uint8_t cidrlen; /**< The length of the mask when we use
209 CIDR-style virtualization */
210 gchar* prerun; /**< command to be ran after connecting a client,
211 but before starting to serve */
212 gchar* postrun; /**< command that will be ran after the client
214 gchar* servename; /**< name of the export as selected by nbd-client */
215 int max_connections; /**< maximum number of opened connections */
216 gchar* transactionlog;/**< filename for transaction log */
220 * Variables associated with a client socket.
223 int fhandle; /**< file descriptor */
224 off_t startoff; /**< starting offset of this file */
228 off_t exportsize; /**< size of the file we're exporting */
229 char *clientname; /**< peer */
230 char *exportname; /**< (processed) filename of the file we're exporting */
231 GArray *export; /**< array of FILE_INFO of exported files;
232 array size is always 1 unless we're
233 doing the multiple file option */
234 int net; /**< The actual client socket */
235 SERVER *server; /**< The server this client is getting data from */
236 char* difffilename; /**< filename of the copy-on-write file, if any */
237 int difffile; /**< filedescriptor of copyonwrite file. @todo
238 shouldn't this be an array too? (cfr export) Or
239 make -m and -c mutually exclusive */
240 u32 difffilelen; /**< number of pages in difffile */
241 u32 *difmap; /**< see comment on the global difmap for this one */
242 gboolean modern; /**< client was negotiated using modern negotiation protocol */
243 int transactionlogfd;/**< fd for transaction log */
247 * Type of configuration file values
250 PARAM_INT, /**< This parameter is an integer */
251 PARAM_STRING, /**< This parameter is a string */
252 PARAM_BOOL, /**< This parameter is a boolean */
256 * Configuration file values
259 gchar *paramname; /**< Name of the parameter, as it appears in
261 gboolean required; /**< Whether this is a required (as opposed to
262 optional) parameter */
263 PARAM_TYPE ptype; /**< Type of the parameter. */
264 gpointer target; /**< Pointer to where the data of this
265 parameter should be written. If ptype is
266 PARAM_BOOL, the data is or'ed rather than
268 gint flagval; /**< Flag mask for this parameter in case ptype
273 * Check whether a client is allowed to connect. Works with an authorization
274 * file which contains one line per machine, no wildcards.
276 * @param opts The client who's trying to connect.
277 * @return 0 - authorization refused, 1 - OK
279 int authorized_client(CLIENT *opts) {
280 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
285 struct in_addr client;
286 struct in_addr cltemp;
289 if ((f=fopen(opts->server->authname,"r"))==NULL) {
290 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
291 opts->server->authname,strerror(errno)) ;
295 inet_aton(opts->clientname, &client);
296 while (fgets(line,LINELEN,f)!=NULL) {
297 if((tmp=index(line, '/'))) {
298 if(strlen(line)<=tmp-line) {
299 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
303 if(!inet_aton(line,&addr)) {
304 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
307 len=strtol(tmp, NULL, 0);
308 addr.s_addr>>=32-len;
309 addr.s_addr<<=32-len;
310 memcpy(&cltemp,&client,sizeof(client));
311 cltemp.s_addr>>=32-len;
312 cltemp.s_addr<<=32-len;
313 if(addr.s_addr == cltemp.s_addr) {
317 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
327 * Read data from a file descriptor into a buffer
329 * @param f a file descriptor
330 * @param buf a buffer
331 * @param len the number of bytes to be read
333 static inline void readit(int f, void *buf, size_t len) {
337 if ((res = read(f, buf, len)) <= 0) {
338 if(errno != EAGAIN) {
339 err("Read failed: %m");
349 * Write data from a buffer into a filedescriptor
351 * @param f a file descriptor
352 * @param buf a buffer containing data
353 * @param len the number of bytes to be written
355 static inline void writeit(int f, void *buf, size_t len) {
359 if ((res = write(f, buf, len)) <= 0)
360 err("Send failed: %m");
367 * Print out a message about how to use nbd-server. Split out to a separate
368 * function so that we can call it from multiple places
371 printf("This is nbd-server version " VERSION "\n");
372 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
373 "\t-r|--read-only\t\tread only\n"
374 "\t-m|--multi-file\t\tmultiple file\n"
375 "\t-c|--copy-on-write\tcopy on write\n"
376 "\t-C|--config-file\tspecify an alternate configuration file\n"
377 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
378 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
379 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
380 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
381 "\tif port is set to 0, stdin is used (for running from inetd)\n"
382 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
383 "\t\taddress of the machine trying to connect\n"
384 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
385 printf("Using configuration file %s\n", CFILE);
388 /* Dumps a config file section of the given SERVER*, and exits. */
389 void dump_section(SERVER* serve, gchar* section_header) {
390 printf("[%s]\n", section_header);
391 printf("\texportname = %s\n", serve->exportname);
392 printf("\tlistenaddr = %s\n", serve->listenaddr);
393 printf("\tport = %d\n", serve->port);
394 if(serve->flags & F_READONLY) {
395 printf("\treadonly = true\n");
397 if(serve->flags & F_MULTIFILE) {
398 printf("\tmultifile = true\n");
400 if(serve->flags & F_COPYONWRITE) {
401 printf("\tcopyonwrite = true\n");
403 if(serve->expected_size) {
404 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
406 if(serve->authname) {
407 printf("\tauthfile = %s\n", serve->authname);
413 * Parse the command line.
415 * @param argc the argc argument to main()
416 * @param argv the argv argument to main()
418 SERVER* cmdline(int argc, char *argv[]) {
422 struct option long_options[] = {
423 {"read-only", no_argument, NULL, 'r'},
424 {"multi-file", no_argument, NULL, 'm'},
425 {"copy-on-write", no_argument, NULL, 'c'},
426 {"dont-fork", no_argument, NULL, 'd'},
427 {"authorize-file", required_argument, NULL, 'l'},
428 {"config-file", required_argument, NULL, 'C'},
429 {"pid-file", required_argument, NULL, 'p'},
430 {"output-config", required_argument, NULL, 'o'},
431 {"max-connection", required_argument, NULL, 'M'},
438 gboolean do_output=FALSE;
439 gchar* section_header="";
445 serve=g_new0(SERVER, 1);
446 serve->authname = g_strdup(default_authname);
447 serve->virtstyle=VIRT_IPLIT;
448 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
451 /* non-option argument */
452 switch(nonspecial++) {
454 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
455 addr_port=g_strsplit(optarg, ":", 2);
457 /* Check for "@" - maybe user using this separator
460 g_strfreev(addr_port);
461 addr_port=g_strsplit(optarg, "@", 2);
464 addr_port=g_strsplit(optarg, "@", 2);
468 serve->port=strtol(addr_port[1], NULL, 0);
469 serve->listenaddr=g_strdup(addr_port[0]);
471 serve->listenaddr=NULL;
472 serve->port=strtol(addr_port[0], NULL, 0);
474 g_strfreev(addr_port);
477 serve->exportname = g_strdup(optarg);
478 if(serve->exportname[0] != '/') {
479 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
484 last=strlen(optarg)-1;
486 if (suffix == 'k' || suffix == 'K' ||
487 suffix == 'm' || suffix == 'M')
489 es = (off_t)atoll(optarg);
497 serve->expected_size = es;
502 serve->flags |= F_READONLY;
505 serve->flags |= F_MULTIFILE;
509 section_header = g_strdup(optarg);
512 strncpy(pidftemplate, optarg, 256);
515 serve->flags |=F_COPYONWRITE;
521 g_free(config_file_pos);
522 config_file_pos=g_strdup(optarg);
525 g_free(serve->authname);
526 serve->authname=g_strdup(optarg);
529 serve->max_connections = strtol(optarg, NULL, 0);
537 /* What's left: the port to export, the name of the to be exported
538 * file, and, optionally, the size of the file, in that order. */
547 g_critical("Need a complete configuration on the command line to output a config file section!");
550 dump_section(serve, section_header);
556 * Error codes for config file parsing
559 CFILE_NOTFOUND, /**< The configuration file is not found */
560 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
561 CFILE_KEY_MISSING, /**< A (required) key is missing */
562 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
563 CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
564 CFILE_PROGERR, /**< Programmer error */
565 CFILE_NO_EXPORTS, /**< A config file was specified that does not
566 define any exports */
567 CFILE_INCORRECT_PORT, /**< The reserved port was specified for an
572 * Remove a SERVER from memory. Used from the hash table
574 void remove_server(gpointer s) {
578 g_free(server->exportname);
580 g_free(server->authname);
581 if(server->listenaddr)
582 g_free(server->listenaddr);
584 g_free(server->prerun);
586 g_free(server->postrun);
587 if(server->transactionlog)
588 g_free(server->transactionlog);
594 * @param s the old server we want to duplicate
595 * @return new duplicated server
597 SERVER* dup_serve(SERVER *s) {
598 SERVER *serve = NULL;
600 serve=g_new0(SERVER, 1);
605 serve->exportname = g_strdup(s->exportname);
607 serve->expected_size = s->expected_size;
610 serve->listenaddr = g_strdup(s->listenaddr);
612 serve->port = s->port;
615 serve->authname = strdup(s->authname);
617 serve->flags = s->flags;
618 serve->socket = s->socket;
619 serve->socket_family = s->socket_family;
620 serve->virtstyle = s->virtstyle;
621 serve->cidrlen = s->cidrlen;
624 serve->prerun = g_strdup(s->prerun);
627 serve->postrun = g_strdup(s->postrun);
629 if(s->transactionlog)
630 serve->transactionlog = g_strdup(s->transactionlog);
633 serve->servename = g_strdup(s->servename);
635 serve->max_connections = s->max_connections;
641 * append new server to array
643 * @param a server array
644 * @return 0 success, -1 error
646 int append_serve(SERVER *s, GArray *a) {
648 struct addrinfo hints;
649 struct addrinfo *ai = NULL;
650 struct addrinfo *rp = NULL;
651 char host[NI_MAXHOST];
657 err("Invalid parsing server");
661 port = g_strdup_printf("%d", s->port);
663 memset(&hints,'\0',sizeof(hints));
664 hints.ai_family = AF_UNSPEC;
665 hints.ai_socktype = SOCK_STREAM;
666 hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
667 hints.ai_protocol = IPPROTO_TCP;
669 e = getaddrinfo(s->listenaddr, port, &hints, &ai);
675 for (rp = ai; rp != NULL; rp = rp->ai_next) {
676 e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
678 if (e != 0) { // error
679 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
683 // duplicate server and set listenaddr to resolved IP address
686 ns->listenaddr = g_strdup(host);
687 ns->socket_family = rp->ai_family;
688 g_array_append_val(a, *ns);
696 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
707 * Parse the config file.
709 * @param f the name of the config file
710 * @param e a GError. @see CFILE_ERRORS for what error values this function can
712 * @return a Array of SERVER* pointers, If the config file is empty or does not
713 * exist, returns an empty GHashTable; if the config file contains an
714 * error, returns NULL, and e is set appropriately
716 GArray* parse_cfile(gchar* f, GError** e) {
717 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
718 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
720 gchar *virtstyle=NULL;
722 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
723 { "port", TRUE, PARAM_INT, &(s.port), 0 },
724 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
725 { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
726 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
727 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
728 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
729 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
730 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
731 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
732 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
733 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
734 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
735 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
736 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
737 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
738 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
739 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
740 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
742 const int lp_size=sizeof(lp)/sizeof(PARAM);
744 { "user", FALSE, PARAM_STRING, &runuser, 0 },
745 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
746 { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
747 { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
750 int p_size=sizeof(gp)/sizeof(PARAM);
753 const char *err_msg=NULL;
762 errdomain = g_quark_from_string("parse_cfile");
763 cfile = g_key_file_new();
764 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
765 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
766 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
767 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
768 g_key_file_free(cfile);
771 startgroup = g_key_file_get_start_group(cfile);
772 if(!startgroup || strcmp(startgroup, "generic")) {
773 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
774 g_key_file_free(cfile);
777 groups = g_key_file_get_groups(cfile, NULL);
778 for(i=0;groups[i];i++) {
779 memset(&s, '\0', sizeof(SERVER));
781 /* After the [generic] group, start parsing exports */
786 for(j=0;j<p_size;j++) {
787 g_assert(p[j].target != NULL);
788 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
791 *((gint*)p[j].target) =
792 g_key_file_get_integer(cfile,
798 *((gchar**)p[j].target) =
799 g_key_file_get_string(cfile,
805 value = g_key_file_get_boolean(cfile,
807 p[j].paramname, &err);
810 *((gint*)p[j].target) |= p[j].flagval;
812 *((gint*)p[j].target) &= ~(p[j].flagval);
817 if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
818 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
819 g_key_file_free(cfile);
823 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
825 /* Ignore not-found error for optional values */
829 err_msg = MISSING_REQUIRED_ERROR;
832 err_msg = DEFAULT_ERROR;
834 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
835 g_array_free(retval, TRUE);
837 g_key_file_free(cfile);
842 if(!strncmp(virtstyle, "none", 4)) {
843 s.virtstyle=VIRT_NONE;
844 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
845 s.virtstyle=VIRT_IPLIT;
846 } else if(!strncmp(virtstyle, "iphash", 6)) {
847 s.virtstyle=VIRT_IPHASH;
848 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
849 s.virtstyle=VIRT_CIDR;
850 if(strlen(virtstyle)<10) {
851 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
852 g_array_free(retval, TRUE);
853 g_key_file_free(cfile);
856 s.cidrlen=strtol(virtstyle+8, NULL, 0);
858 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
859 g_array_free(retval, TRUE);
860 g_key_file_free(cfile);
863 if(s.port && !do_oldstyle) {
864 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
865 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
868 s.virtstyle=VIRT_IPLIT;
870 /* Don't need to free this, it's not our string */
872 /* Don't append values for the [generic] group */
874 s.socket_family = AF_UNSPEC;
875 s.servename = groups[i];
877 append_serve(&s, retval);
884 if(s.flags & F_SDP) {
885 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
886 g_array_free(retval, TRUE);
887 g_key_file_free(cfile);
893 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
895 g_key_file_free(cfile);
900 * Signal handler for SIGCHLD
901 * @param s the signal we're handling (must be SIGCHLD, or something
904 void sigchld_handler(int s) {
909 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
910 if(WIFEXITED(status)) {
911 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
913 i=g_hash_table_lookup(children, &pid);
915 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
917 DEBUG2("Removing %d from the list of children", pid);
918 g_hash_table_remove(children, &pid);
924 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
927 * @param value the value corresponding to the above key
928 * @param user_data a pointer which we always set to 1, so that we know what
931 void killchild(gpointer key, gpointer value, gpointer user_data) {
933 int *parent=user_data;
940 * Handle SIGTERM and dispatch it to our children
941 * @param s the signal we're handling (must be SIGTERM, or something
942 * is severely wrong).
944 void sigterm_handler(int s) {
947 g_hash_table_foreach(children, killchild, &parent);
957 * Detect the size of a file.
959 * @param fhandle An open filedescriptor
960 * @return the size of the file, or OFFT_MAX if detection was
963 off_t size_autodetect(int fhandle) {
966 struct stat stat_buf;
969 #ifdef HAVE_SYS_MOUNT_H
970 #ifdef HAVE_SYS_IOCTL_H
972 DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
973 if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
976 #endif /* BLKGETSIZE64 */
977 #endif /* HAVE_SYS_IOCTL_H */
978 #endif /* HAVE_SYS_MOUNT_H */
980 DEBUG("looking for fhandle size with fstat\n");
981 stat_buf.st_size = 0;
982 error = fstat(fhandle, &stat_buf);
984 if(stat_buf.st_size > 0)
985 return (off_t)stat_buf.st_size;
987 err("fstat failed: %m");
990 DEBUG("looking for fhandle size with lseek SEEK_END\n");
991 es = lseek(fhandle, (off_t)0, SEEK_END);
992 if (es > ((off_t)0)) {
995 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
998 err("Could not find size of exported block device: %m");
1003 * Get the file handle and offset, given an export offset.
1005 * @param export An array of export files
1006 * @param a The offset to get corresponding file/offset for
1007 * @param fhandle [out] File descriptor
1008 * @param foffset [out] Offset into fhandle
1009 * @param maxbytes [out] Tells how many bytes can be read/written
1010 * from fhandle starting at foffset (0 if there is no limit)
1011 * @return 0 on success, -1 on failure
1013 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1014 /* Negative offset not allowed */
1018 /* Binary search for last file with starting offset <= a */
1021 int end = export->len - 1;
1022 while( start <= end ) {
1023 int mid = (start + end) / 2;
1024 fi = g_array_index(export, FILE_INFO, mid);
1025 if( fi.startoff < a ) {
1027 } else if( fi.startoff > a ) {
1035 /* end should never go negative, since first startoff is 0 and a >= 0 */
1038 fi = g_array_index(export, FILE_INFO, end);
1039 *fhandle = fi.fhandle;
1040 *foffset = a - fi.startoff;
1042 if( end+1 < export->len ) {
1043 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1044 *maxbytes = fi_next.startoff - a;
1051 * seek to a position in a file, with error handling.
1052 * @param handle a filedescriptor
1053 * @param a position to seek to
1054 * @todo get rid of this; lastpoint is a global variable right now, but it
1055 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1058 void myseek(int handle,off_t a) {
1059 if (lseek(handle, a, SEEK_SET) < 0) {
1060 err("Can not seek locally!\n");
1065 * Write an amount of bytes at a given offset to the right file. This
1066 * abstracts the write-side of the multiple file option.
1068 * @param a The offset where the write should start
1069 * @param buf The buffer to write from
1070 * @param len The length of buf
1071 * @param client The client we're serving for
1072 * @return The number of bytes actually written, or -1 in case of an error
1074 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1080 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1082 if(maxbytes && len > maxbytes)
1085 DEBUG5("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, foffset, len, fua);
1087 myseek(fhandle, foffset);
1088 retval = write(fhandle, buf, len);
1089 if(client->server->flags & F_SYNC) {
1093 /* This is where we would do the following
1094 * #ifdef USE_SYNC_FILE_RANGE
1095 * However, we don't, for the reasons set out below
1096 * by Christoph Hellwig <hch@infradead.org>
1099 * fdatasync is equivalent to fsync except that it does not flush
1100 * non-essential metadata (basically just timestamps in practice), but it
1101 * does flush metadata requried to find the data again, e.g. allocation
1102 * information and extent maps. sync_file_range does nothing but flush
1103 * out pagecache content - it means you basically won't get your data
1104 * back in case of a crash if you either:
1106 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1107 * b) are using a sparse file on a filesystem
1108 * c) are using a fallocate-preallocated file on a filesystem
1109 * d) use any file on a COW filesystem like btrfs
1111 * e.g. it only does anything useful for you if you do not have a volatile
1112 * write cache, and either use a raw block device node, or just overwrite
1113 * an already fully allocated (and not preallocated) file on a non-COW
1117 * What we should do is open a second FD with O_DSYNC set, then write to
1118 * that when appropriate. However, with a Linux client, every REQ_FUA
1119 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1124 sync_file_range(fhandle, foffset, len,
1125 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1126 SYNC_FILE_RANGE_WAIT_AFTER);
1135 * Call rawexpwrite repeatedly until all data has been written.
1136 * @return 0 on success, nonzero on failure
1138 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1141 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1146 return (ret < 0 || len != 0);
1150 * Read an amount of bytes at a given offset from the right file. This
1151 * abstracts the read-side of the multiple files option.
1153 * @param a The offset where the read should start
1154 * @param buf A buffer to read into
1155 * @param len The size of buf
1156 * @param client The client we're serving for
1157 * @return The number of bytes actually read, or -1 in case of an
1160 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1165 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1167 if(maxbytes && len > maxbytes)
1170 DEBUG4("(READ from fd %d offset %llu len %u), ", fhandle, foffset, len);
1172 myseek(fhandle, foffset);
1173 return read(fhandle, buf, len);
1177 * Call rawexpread repeatedly until all data has been read.
1178 * @return 0 on success, nonzero on failure
1180 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1183 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1188 return (ret < 0 || len != 0);
1192 * Read an amount of bytes at a given offset from the right file. This
1193 * abstracts the read-side of the copyonwrite stuff, and calls
1194 * rawexpread() with the right parameters to do the actual work.
1195 * @param a The offset where the read should start
1196 * @param buf A buffer to read into
1197 * @param len The size of buf
1198 * @param client The client we're going to read for
1199 * @return 0 on success, nonzero on failure
1201 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1202 off_t rdlen, offset;
1203 off_t mapcnt, mapl, maph, pagestart;
1205 if (!(client->server->flags & F_COPYONWRITE))
1206 return(rawexpread_fully(a, buf, len, client));
1207 DEBUG3("Asked to read %d bytes at %llu.\n", len, (unsigned long long)a);
1209 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1211 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1212 pagestart=mapcnt*DIFFPAGESIZE;
1214 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1215 len : (size_t)DIFFPAGESIZE-offset;
1216 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1217 DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1218 (unsigned long)(client->difmap[mapcnt]));
1219 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1220 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1221 } else { /* the block is not there */
1222 DEBUG2("Page %llu is not here, we read the original one\n",
1223 (unsigned long long)mapcnt);
1224 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1226 len-=rdlen; a+=rdlen; buf+=rdlen;
1232 * Write an amount of bytes at a given offset to the right file. This
1233 * abstracts the write-side of the copyonwrite option, and calls
1234 * rawexpwrite() with the right parameters to do the actual work.
1236 * @param a The offset where the write should start
1237 * @param buf The buffer to write from
1238 * @param len The length of buf
1239 * @param client The client we're going to write for.
1240 * @return 0 on success, nonzero on failure
1242 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1243 char pagebuf[DIFFPAGESIZE];
1244 off_t mapcnt,mapl,maph;
1249 if (!(client->server->flags & F_COPYONWRITE))
1250 return(rawexpwrite_fully(a, buf, len, client, fua));
1251 DEBUG3("Asked to write %d bytes at %llu.\n", len, (unsigned long long)a);
1253 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1255 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1256 pagestart=mapcnt*DIFFPAGESIZE ;
1257 offset=a-pagestart ;
1258 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1259 len : (size_t)DIFFPAGESIZE-offset;
1261 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1262 DEBUG3("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1263 (unsigned long)(client->difmap[mapcnt])) ;
1264 myseek(client->difffile,
1265 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1266 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1267 } else { /* the block is not there */
1268 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1269 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1270 DEBUG3("Page %llu is not here, we put it at %lu\n",
1271 (unsigned long long)mapcnt,
1272 (unsigned long)(client->difmap[mapcnt]));
1273 rdlen=DIFFPAGESIZE ;
1274 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1276 memcpy(pagebuf+offset,buf,wrlen) ;
1277 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1281 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1283 if (client->server->flags & F_SYNC) {
1284 fsync(client->difffile);
1286 /* open question: would it be cheaper to do multiple sync_file_ranges?
1287 as we iterate through the above?
1289 fdatasync(client->difffile);
1294 int expflush(CLIENT *client) {
1300 if (client->server->flags & F_COPYONWRITE) {
1301 return fsync(client->difffile);
1304 for (i = 0; i < client->export->len; i++) {
1305 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1306 if (fsync(fi.fhandle) < 0)
1314 * Do the initial negotiation.
1316 * @param client The client we're negotiating with.
1318 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1321 uint32_t flags = NBD_FLAG_HAS_FLAGS;
1322 uint16_t smallflags = 0;
1325 memset(zeros, '\0', sizeof(zeros));
1326 if(!client || !client->modern) {
1328 if (write(net, INIT_PASSWD, 8) < 0) {
1329 err_nonfatal("Negotiation failed: %m");
1333 if(!client || client->modern) {
1335 magic = htonll(opts_magic);
1338 magic = htonll(cliserv_magic);
1340 if (write(net, &magic, sizeof(magic)) < 0) {
1341 err_nonfatal("Negotiation failed: %m");
1355 err("programmer error");
1356 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1357 err("Negotiation failed: %m");
1358 if (read(net, &reserved, sizeof(reserved)) < 0)
1359 err("Negotiation failed: %m");
1360 if (read(net, &magic, sizeof(magic)) < 0)
1361 err("Negotiation failed: %m");
1362 magic = ntohll(magic);
1363 if(magic != opts_magic) {
1367 if (read(net, &opt, sizeof(opt)) < 0)
1368 err("Negotiation failed: %m");
1370 if(opt != NBD_OPT_EXPORT_NAME) {
1374 if (read(net, &namelen, sizeof(namelen)) < 0)
1375 err("Negotiation failed: %m");
1376 namelen = ntohl(namelen);
1377 name = malloc(namelen+1);
1379 if (read(net, name, namelen) < 0)
1380 err("Negotiation failed: %m");
1381 for(i=0; i<servers->len; i++) {
1382 SERVER* serve = &(g_array_index(servers, SERVER, i));
1383 if(!strcmp(serve->servename, name)) {
1384 CLIENT* client = g_new0(CLIENT, 1);
1385 client->server = serve;
1386 client->exportsize = OFFT_MAX;
1388 client->modern = TRUE;
1389 client->transactionlogfd = -1;
1398 size_host = htonll((u64)(client->exportsize));
1399 if (write(net, &size_host, 8) < 0)
1400 err("Negotiation failed: %m");
1401 if (client->server->flags & F_READONLY)
1402 flags |= NBD_FLAG_READ_ONLY;
1403 if (client->server->flags & F_FLUSH)
1404 flags |= NBD_FLAG_SEND_FLUSH;
1405 if (client->server->flags & F_FUA)
1406 flags |= NBD_FLAG_SEND_FUA;
1407 if (client->server->flags & F_ROTATIONAL)
1408 flags |= NBD_FLAG_ROTATIONAL;
1409 if (!client->modern) {
1411 flags = htonl(flags);
1412 if (write(client->net, &flags, 4) < 0)
1413 err("Negotiation failed: %m");
1416 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1417 smallflags = htons(smallflags);
1418 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1419 err("Negotiation failed: %m");
1423 if (write(client->net, zeros, 124) < 0)
1424 err("Negotiation failed: %m");
1428 /** sending macro. */
1429 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1430 if (client->transactionlogfd != -1) \
1431 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1433 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1435 * Serve a file to a single client.
1437 * @todo This beast needs to be split up in many tiny little manageable
1438 * pieces. Preferably with a chainsaw.
1440 * @param client The client we're going to serve to.
1441 * @return when the client disconnects
1443 int mainloop(CLIENT *client) {
1444 struct nbd_request request;
1445 struct nbd_reply reply;
1446 gboolean go_on=TRUE;
1450 negotiate(client->net, client, NULL);
1451 DEBUG("Entering request loop!\n");
1452 reply.magic = htonl(NBD_REPLY_MAGIC);
1465 readit(client->net, &request, sizeof(request));
1466 if (client->transactionlogfd != -1)
1467 writeit(client->transactionlogfd, &request, sizeof(request));
1469 request.from = ntohll(request.from);
1470 request.type = ntohl(request.type);
1471 command = request.type & NBD_CMD_MASK_COMMAND;
1473 if (command==NBD_CMD_DISC) {
1474 msg2(LOG_INFO, "Disconnect request received.");
1475 if (client->server->flags & F_COPYONWRITE) {
1476 if (client->difmap) g_free(client->difmap) ;
1477 close(client->difffile);
1478 unlink(client->difffilename);
1479 free(client->difffilename);
1485 len = ntohl(request.len);
1487 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1488 err("Not enough magic.");
1489 if (len > BUFSIZE - sizeof(struct nbd_reply)) {
1490 currlen = BUFSIZE - sizeof(struct nbd_reply);
1491 msg2(LOG_INFO, "oversized request (this is not a problem)");
1496 printf("%s from %llu (%llu) len %d, ", command ? "WRITE" :
1497 "READ", (unsigned long long)request.from,
1498 (unsigned long long)request.from / 512, len);
1500 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1502 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1503 if ((request.from + len) > (OFFT_MAX)) {
1504 DEBUG("[Number too large!]");
1505 ERROR(client, reply, EINVAL);
1509 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1511 ERROR(client, reply, EINVAL);
1516 if (command==NBD_CMD_WRITE) {
1517 DEBUG("wr: net->buf, ");
1519 readit(client->net, buf, currlen);
1520 DEBUG("buf->exp, ");
1521 if ((client->server->flags & F_READONLY) ||
1522 (client->server->flags & F_AUTOREADONLY)) {
1523 DEBUG("[WRITE to READONLY!]");
1524 ERROR(client, reply, EPERM);
1527 if (expwrite(request.from, buf, len, client,
1528 request.type & NBD_CMD_FLAG_FUA)) {
1529 DEBUG("Write failed: %m" );
1530 ERROR(client, reply, errno);
1533 SEND(client->net, reply);
1536 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1541 if (command==NBD_CMD_FLUSH) {
1543 if (expflush(client)) {
1544 DEBUG("Flush failed: %m");
1545 ERROR(client, reply, errno);
1548 SEND(client->net, reply);
1553 if (command==NBD_CMD_READ) {
1554 DEBUG("exp->buf, ");
1555 memcpy(buf, &reply, sizeof(struct nbd_reply));
1556 if (client->transactionlogfd != -1)
1557 writeit(client->transactionlogfd, &reply, sizeof(reply));
1558 p = buf + sizeof(struct nbd_reply);
1559 writelen = currlen + sizeof(struct nbd_reply);
1561 if (expread(request.from, p, currlen, client)) {
1562 DEBUG("Read failed: %m");
1563 ERROR(client, reply, errno);
1567 DEBUG("buf->net, ");
1568 writeit(client->net, buf, writelen);
1570 request.from += currlen;
1571 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1579 DEBUG ("Ignoring unknown command\n");
1585 * Set up client export array, which is an array of FILE_INFO.
1586 * Also, split a single exportfile into multiple ones, if that was asked.
1587 * @param client information on the client which we want to setup export for
1589 void setupexport(CLIENT* client) {
1591 off_t laststartoff = 0, lastsize = 0;
1592 int multifile = (client->server->flags & F_MULTIFILE);
1594 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1596 /* If multi-file, open as many files as we can.
1597 * If not, open exactly one file.
1598 * Calculate file sizes as we go to get total size. */
1602 gchar* error_string;
1603 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1606 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1608 tmpname=g_strdup(client->exportname);
1610 DEBUG2( "Opening %s\n", tmpname );
1611 fi.fhandle = open(tmpname, mode);
1612 if(fi.fhandle == -1 && mode == O_RDWR) {
1613 /* Try again because maybe media was read-only */
1614 fi.fhandle = open(tmpname, O_RDONLY);
1615 if(fi.fhandle != -1) {
1616 /* Opening the base file in copyonwrite mode is
1618 if(!(client->server->flags & F_COPYONWRITE)) {
1619 client->server->flags |= F_AUTOREADONLY;
1620 client->server->flags |= F_READONLY;
1624 if(fi.fhandle == -1) {
1625 if(multifile && i>0)
1627 error_string=g_strdup_printf(
1628 "Could not open exported file %s: %%m",
1632 fi.startoff = laststartoff + lastsize;
1633 g_array_append_val(client->export, fi);
1636 /* Starting offset and size of this file will be used to
1637 * calculate starting offset of next file */
1638 laststartoff = fi.startoff;
1639 lastsize = size_autodetect(fi.fhandle);
1645 /* Set export size to total calculated size */
1646 client->exportsize = laststartoff + lastsize;
1648 /* Export size may be overridden */
1649 if(client->server->expected_size) {
1650 /* desired size must be <= total calculated size */
1651 if(client->server->expected_size > client->exportsize) {
1652 err("Size of exported file is too big\n");
1655 client->exportsize = client->server->expected_size;
1658 msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1660 msg3(LOG_INFO, "Total number of files: %d", i);
1664 int copyonwrite_prepare(CLIENT* client) {
1666 if ((client->difffilename = malloc(1024))==NULL)
1667 err("Failed to allocate string for diff file name");
1668 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1670 client->difffilename[1023]='\0';
1671 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1672 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1673 if (client->difffile<0) err("Could not create diff file (%m)") ;
1674 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1675 err("Could not allocate memory") ;
1676 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1682 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1685 * @param command the command to be ran. Read from the config file
1686 * @param file the file name we're about to export
1688 int do_run(gchar* command, gchar* file) {
1692 if(command && *command) {
1693 cmd = g_strdup_printf(command, file);
1701 * Serve a connection.
1703 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1704 * follow the road map.
1706 * @param client a connected client
1708 void serveconnection(CLIENT *client) {
1709 if (client->server->transactionlog && (client->transactionlogfd == -1))
1711 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1713 S_IRUSR | S_IWUSR)))
1714 g_warning("Could not open transaction log %s",
1715 client->server->transactionlog);
1718 if(do_run(client->server->prerun, client->exportname)) {
1721 setupexport(client);
1723 if (client->server->flags & F_COPYONWRITE) {
1724 copyonwrite_prepare(client);
1727 setmysockopt(client->net);
1730 do_run(client->server->postrun, client->exportname);
1732 if (-1 != client->transactionlogfd)
1734 close(client->transactionlogfd);
1735 client->transactionlogfd = -1;
1740 * Find the name of the file we have to serve. This will use g_strdup_printf
1741 * to put the IP address of the client inside a filename containing
1742 * "%s" (in the form as specified by the "virtstyle" option). That name
1743 * is then written to client->exportname.
1745 * @param net A socket connected to an nbd client
1746 * @param client information about the client. The IP address in human-readable
1747 * format will be written to a new char* buffer, the address of which will be
1748 * stored in client->clientname.
1750 void set_peername(int net, CLIENT *client) {
1751 struct sockaddr_storage addrin;
1752 struct sockaddr_storage netaddr;
1753 struct sockaddr_in *netaddr4 = NULL;
1754 struct sockaddr_in6 *netaddr6 = NULL;
1755 size_t addrinlen = sizeof( addrin );
1756 struct addrinfo hints;
1757 struct addrinfo *ai = NULL;
1758 char peername[NI_MAXHOST];
1759 char netname[NI_MAXHOST];
1765 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1766 err("getsockname failed: %m");
1768 getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1769 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1771 memset(&hints, '\0', sizeof (hints));
1772 hints.ai_flags = AI_ADDRCONFIG;
1773 e = getaddrinfo(peername, NULL, &hints, &ai);
1776 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1781 switch(client->server->virtstyle) {
1783 client->exportname=g_strdup(client->server->exportname);
1786 for(i=0;i<strlen(peername);i++) {
1787 if(peername[i]=='.') {
1792 client->exportname=g_strdup_printf(client->server->exportname, peername);
1795 memcpy(&netaddr, &addrin, addrinlen);
1796 if(ai->ai_family == AF_INET) {
1797 netaddr4 = (struct sockaddr_in *)&netaddr;
1798 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1799 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1801 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1802 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1803 tmp=g_strdup_printf("%s/%s", netname, peername);
1804 }else if(ai->ai_family == AF_INET6) {
1805 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1807 shift = 128-(client->server->cidrlen);
1809 while(shift >= 32) {
1810 ((netaddr6->sin6_addr).s6_addr32[i])=0;
1814 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1815 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1817 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1818 netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1819 tmp=g_strdup_printf("%s/%s", netname, peername);
1823 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1829 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1830 peername, client->exportname);
1831 client->clientname=g_strdup(peername);
1836 * @param data a pointer to pid_t which should be freed
1838 void destroy_pid_t(gpointer data) {
1843 * Loop through the available servers, and serve them. Never returns.
1845 int serveloop(GArray* servers) {
1846 struct sockaddr_storage addrin;
1847 socklen_t addrinlen=sizeof(addrin);
1855 * Set up the master fd_set. The set of descriptors we need
1856 * to select() for never changes anyway and it buys us a *lot*
1857 * of time to only build this once. However, if we ever choose
1858 * to not fork() for clients anymore, we may have to revisit
1863 for(i=0;i<servers->len;i++) {
1864 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1865 FD_SET(sock, &mset);
1866 max=sock>max?sock:max;
1870 FD_SET(modernsock, &mset);
1871 max=modernsock>max?modernsock:max;
1874 CLIENT *client = NULL;
1877 memcpy(&rset, &mset, sizeof(fd_set));
1878 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1883 if(FD_ISSET(modernsock, &rset)) {
1884 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1886 client = negotiate(net, NULL, servers);
1888 err_nonfatal("negotiation failed");
1893 serve = client->server;
1895 for(i=0;i<servers->len && !net;i++) {
1896 serve=&(g_array_index(servers, SERVER, i));
1897 if(FD_ISSET(serve->socket, &rset)) {
1898 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1905 if(serve->max_connections > 0 &&
1906 g_hash_table_size(children) >= serve->max_connections) {
1907 msg2(LOG_INFO, "Max connections reached");
1911 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1912 err("fcntl F_GETFL");
1914 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1915 err("fcntl F_SETFL ~O_NONBLOCK");
1918 client = g_new0(CLIENT, 1);
1919 client->server=serve;
1920 client->exportsize=OFFT_MAX;
1922 client->transactionlogfd = -1;
1924 set_peername(net, client);
1925 if (!authorized_client(client)) {
1926 msg2(LOG_INFO,"Unauthorized client") ;
1930 msg2(LOG_INFO,"Authorized client") ;
1931 pid=g_malloc(sizeof(pid_t));
1934 if ((*pid=fork())<0) {
1935 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1939 if (*pid>0) { /* parent */
1941 g_hash_table_insert(children, pid, pid);
1945 g_hash_table_destroy(children);
1946 for(i=0;i<servers->len;i++) {
1947 serve=&g_array_index(servers, SERVER, i);
1948 close(serve->socket);
1950 /* FALSE does not free the
1951 actual data. This is required,
1952 because the client has a
1953 direct reference into that
1954 data, and otherwise we get a
1956 g_array_free(servers, FALSE);
1959 msg2(LOG_INFO,"Starting to serve");
1960 serveconnection(client);
1967 void dosockopts(int socket) {
1975 /* lose the pesky "Address already in use" error message */
1976 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1977 err("setsockopt SO_REUSEADDR");
1979 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1980 err("setsockopt SO_KEEPALIVE");
1983 /* make the listening socket non-blocking */
1984 if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
1985 err("fcntl F_GETFL");
1987 if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1988 err("fcntl F_SETFL O_NONBLOCK");
1993 * Connect a server's socket.
1995 * @param serve the server we want to connect.
1997 int setup_serve(SERVER *serve) {
1998 struct addrinfo hints;
1999 struct addrinfo *ai = NULL;
2004 return serve->servename ? 1 : 0;
2006 memset(&hints,'\0',sizeof(hints));
2007 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2008 hints.ai_socktype = SOCK_STREAM;
2009 hints.ai_family = serve->socket_family;
2011 port = g_strdup_printf ("%d", serve->port);
2015 e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2020 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2026 if(serve->socket_family == AF_UNSPEC)
2027 serve->socket_family = ai->ai_family;
2030 if ((serve->flags) && F_SDP) {
2031 if (ai->ai_family == AF_INET)
2032 ai->ai_family = AF_INET_SDP;
2033 else (ai->ai_family == AF_INET6)
2034 ai->ai_family = AF_INET6_SDP;
2037 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
2040 dosockopts(serve->socket);
2042 DEBUG("Waiting for connections... bind, ");
2043 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2044 if (e != 0 && errno != EADDRINUSE)
2047 if (listen(serve->socket, 1) < 0)
2051 if(serve->servename) {
2058 void open_modern(void) {
2059 struct addrinfo hints;
2060 struct addrinfo* ai = NULL;
2064 memset(&hints, '\0', sizeof(hints));
2065 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2066 hints.ai_socktype = SOCK_STREAM;
2067 hints.ai_family = AF_UNSPEC;
2068 hints.ai_protocol = IPPROTO_TCP;
2069 e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
2071 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2074 if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2078 dosockopts(modernsock);
2080 if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2083 if(listen(modernsock, 10) <0) {
2091 * Connect our servers.
2093 void setup_servers(GArray* servers) {
2095 struct sigaction sa;
2098 for(i=0;i<servers->len;i++) {
2099 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2104 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2106 sa.sa_handler = sigchld_handler;
2107 sigemptyset(&sa.sa_mask);
2108 sa.sa_flags = SA_RESTART;
2109 if(sigaction(SIGCHLD, &sa, NULL) == -1)
2110 err("sigaction: %m");
2111 sa.sa_handler = sigterm_handler;
2112 sigemptyset(&sa.sa_mask);
2113 sa.sa_flags = SA_RESTART;
2114 if(sigaction(SIGTERM, &sa, NULL) == -1)
2115 err("sigaction: %m");
2119 * Go daemon (unless we specified at compile time that we didn't want this)
2120 * @param serve the first server of our configuration. If its port is zero,
2121 * then do not daemonize, because we're doing inetd then. This parameter
2122 * is only used to create a PID file of the form
2123 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
2125 #if !defined(NODAEMON)
2126 void daemonize(SERVER* serve) {
2129 if(serve && !(serve->port)) {
2135 if(!*pidftemplate) {
2137 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2139 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2142 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2143 pidf=fopen(pidfname, "w");
2145 fprintf(pidf,"%d\n", (int)getpid());
2149 fprintf(stderr, "Not fatal; continuing");
2153 #define daemonize(serve)
2154 #endif /* !defined(NODAEMON) */
2157 * Everything beyond this point (in the file) is run in non-daemon mode.
2158 * The stuff above daemonize() isn't.
2161 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2163 void serve_err(SERVER* serve, const char* msg) {
2164 g_message("Export of %s on port %d failed:", serve->exportname,
2170 * Set up user-ID and/or group-ID
2172 void dousers(void) {
2177 gr=getgrnam(rungroup);
2179 str = g_strdup_printf("Invalid group name: %s", rungroup);
2182 if(setgid(gr->gr_gid)<0) {
2183 err("Could not set GID: %m");
2187 pw=getpwnam(runuser);
2189 str = g_strdup_printf("Invalid user name: %s", runuser);
2192 if(setuid(pw->pw_uid)<0) {
2193 err("Could not set UID: %m");
2199 void glib_message_syslog_redirect(const gchar *log_domain,
2200 GLogLevelFlags log_level,
2201 const gchar *message,
2204 int level=LOG_DEBUG;
2208 case G_LOG_FLAG_FATAL:
2209 case G_LOG_LEVEL_CRITICAL:
2210 case G_LOG_LEVEL_ERROR:
2213 case G_LOG_LEVEL_WARNING:
2216 case G_LOG_LEVEL_MESSAGE:
2217 case G_LOG_LEVEL_INFO:
2220 case G_LOG_LEVEL_DEBUG:
2225 syslog(level, "%s", message);
2230 * Main entry point...
2232 int main(int argc, char *argv[]) {
2237 if (sizeof( struct nbd_request )!=28) {
2238 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2239 exit(EXIT_FAILURE) ;
2242 memset(pidftemplate, '\0', 256);
2245 config_file_pos = g_strdup(CFILE);
2246 serve=cmdline(argc, argv);
2247 servers = parse_cfile(config_file_pos, &err);
2250 serve->socket_family = AF_UNSPEC;
2252 append_serve(serve, servers);
2254 if (!(serve->port)) {
2257 /* You really should define ISSERVER if you're going to use
2258 * inetd mode, but if you don't, closing stdout and stderr
2259 * (which inetd had connected to the client socket) will let it
2263 open("/dev/null", O_WRONLY);
2264 open("/dev/null", O_WRONLY);
2265 g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2267 client=g_malloc(sizeof(CLIENT));
2268 client->server=serve;
2270 client->exportsize=OFFT_MAX;
2271 set_peername(0,client);
2272 serveconnection(client);
2277 if(!servers || !servers->len) {
2278 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2279 && err->code == CFILE_NOTFOUND)) {
2280 g_warning("Could not parse config file: %s",
2281 err ? err->message : "Unknown error");
2285 g_warning("Specifying an export on the command line is deprecated.");
2286 g_warning("Please use a configuration file instead.");
2289 if((!serve) && (!servers||!servers->len)) {
2290 g_message("No configured exports; quitting.");
2295 setup_servers(servers);