2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <sys/select.h> /* select */
66 #include <sys/wait.h> /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h> /* For BLKGETSIZE */
74 #include <signal.h> /* sigaction */
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
85 #include <arpa/inet.h>
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
100 #include <sdp_inet.h>
103 /** Default position of the config file */
105 #define SYSCONFDIR "/etc"
107 #define CFILE SYSCONFDIR "/nbd-server/config"
109 /** Where our config file actually is */
110 gchar* config_file_pos;
112 /** What user we're running as */
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
119 /* Whether we should avoid forking */
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
133 /* Debugging macros */
136 #define DEBUG(...) printf(__VA_ARGS__)
140 #ifndef PACKAGE_VERSION
141 #define PACKAGE_VERSION ""
144 * The highest value a variable of type off_t can reach. This is a signed
145 * integer, so set all bits except for the leftmost one.
147 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
148 #define LINELEN 256 /**< Size of static buffer used to read the
149 authorization file (yuck) */
150 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
151 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
152 #define F_READONLY 1 /**< flag to tell us a file is readonly */
153 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
154 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
156 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
157 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
158 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
159 #define F_SYNC 64 /**< Whether to fsync() after a write */
160 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
161 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
162 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
163 GHashTable *children;
164 char pidfname[256]; /**< name of our PID file */
165 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
166 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
168 int modernsock=0; /**< Socket for the modern handler. Not used
169 if a client was only specified on the
170 command line; only port used if
171 oldstyle is set to false (and then the
172 command-line client isn't used, gna gna) */
173 char* modern_listen; /**< listenaddr value for modernsock */
176 * Types of virtuatlization
179 VIRT_NONE=0, /**< No virtualization */
180 VIRT_IPLIT, /**< Literal IP address as part of the filename */
181 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
182 doing the same as in IPLIT */
183 VIRT_CIDR, /**< Every subnet in its own directory */
187 * Variables associated with a server.
190 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
191 off_t expected_size; /**< size of the exported file as it was told to
192 us through configuration */
193 gchar* listenaddr; /**< The IP address we're listening on */
194 unsigned int port; /**< port we're exporting this file at */
195 char* authname; /**< filename of the authorization file */
196 int flags; /**< flags associated with this exported file */
197 int socket; /**< The socket of this server. */
198 int socket_family; /**< family of the socket */
199 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
200 uint8_t cidrlen; /**< The length of the mask when we use
201 CIDR-style virtualization */
202 gchar* prerun; /**< command to be ran after connecting a client,
203 but before starting to serve */
204 gchar* postrun; /**< command that will be ran after the client
206 gchar* servename; /**< name of the export as selected by nbd-client */
207 int max_connections; /**< maximum number of opened connections */
208 gchar* transactionlog;/**< filename for transaction log */
212 * Variables associated with a client socket.
215 int fhandle; /**< file descriptor */
216 off_t startoff; /**< starting offset of this file */
220 off_t exportsize; /**< size of the file we're exporting */
221 char *clientname; /**< peer */
222 char *exportname; /**< (processed) filename of the file we're exporting */
223 GArray *export; /**< array of FILE_INFO of exported files;
224 array size is always 1 unless we're
225 doing the multiple file option */
226 int net; /**< The actual client socket */
227 SERVER *server; /**< The server this client is getting data from */
228 char* difffilename; /**< filename of the copy-on-write file, if any */
229 int difffile; /**< filedescriptor of copyonwrite file. @todo
230 shouldn't this be an array too? (cfr export) Or
231 make -m and -c mutually exclusive */
232 u32 difffilelen; /**< number of pages in difffile */
233 u32 *difmap; /**< see comment on the global difmap for this one */
234 gboolean modern; /**< client was negotiated using modern negotiation protocol */
235 int transactionlogfd;/**< fd for transaction log */
239 * Type of configuration file values
242 PARAM_INT, /**< This parameter is an integer */
243 PARAM_STRING, /**< This parameter is a string */
244 PARAM_BOOL, /**< This parameter is a boolean */
248 * Configuration file values
251 gchar *paramname; /**< Name of the parameter, as it appears in
253 gboolean required; /**< Whether this is a required (as opposed to
254 optional) parameter */
255 PARAM_TYPE ptype; /**< Type of the parameter. */
256 gpointer target; /**< Pointer to where the data of this
257 parameter should be written. If ptype is
258 PARAM_BOOL, the data is or'ed rather than
260 gint flagval; /**< Flag mask for this parameter in case ptype
265 * Translate a command name into human readable form
267 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
268 * @return pointer to the command name
270 static inline const char * getcommandname(uint64_t command) {
273 return "NBD_CMD_READ";
275 return "NBD_CMD_WRITE";
277 return "NBD_CMD_DISC";
279 return "NBD_CMD_FLUSH";
287 * Check whether a client is allowed to connect. Works with an authorization
288 * file which contains one line per machine, no wildcards.
290 * @param opts The client who's trying to connect.
291 * @return 0 - authorization refused, 1 - OK
293 int authorized_client(CLIENT *opts) {
294 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
299 struct in_addr client;
300 struct in_addr cltemp;
303 if ((f=fopen(opts->server->authname,"r"))==NULL) {
304 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
305 opts->server->authname,strerror(errno)) ;
309 inet_aton(opts->clientname, &client);
310 while (fgets(line,LINELEN,f)!=NULL) {
311 if((tmp=index(line, '/'))) {
312 if(strlen(line)<=tmp-line) {
313 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
317 if(!inet_aton(line,&addr)) {
318 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
321 len=strtol(tmp, NULL, 0);
322 addr.s_addr>>=32-len;
323 addr.s_addr<<=32-len;
324 memcpy(&cltemp,&client,sizeof(client));
325 cltemp.s_addr>>=32-len;
326 cltemp.s_addr<<=32-len;
327 if(addr.s_addr == cltemp.s_addr) {
331 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
341 * Read data from a file descriptor into a buffer
343 * @param f a file descriptor
344 * @param buf a buffer
345 * @param len the number of bytes to be read
347 static inline void readit(int f, void *buf, size_t len) {
351 if ((res = read(f, buf, len)) <= 0) {
352 if(errno != EAGAIN) {
353 err("Read failed: %m");
363 * Write data from a buffer into a filedescriptor
365 * @param f a file descriptor
366 * @param buf a buffer containing data
367 * @param len the number of bytes to be written
369 static inline void writeit(int f, void *buf, size_t len) {
373 if ((res = write(f, buf, len)) <= 0)
374 err("Send failed: %m");
381 * Print out a message about how to use nbd-server. Split out to a separate
382 * function so that we can call it from multiple places
385 printf("This is nbd-server version " VERSION "\n");
386 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
387 "\t-r|--read-only\t\tread only\n"
388 "\t-m|--multi-file\t\tmultiple file\n"
389 "\t-c|--copy-on-write\tcopy on write\n"
390 "\t-C|--config-file\tspecify an alternate configuration file\n"
391 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
392 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
393 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
394 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
395 "\tif port is set to 0, stdin is used (for running from inetd)\n"
396 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
397 "\t\taddress of the machine trying to connect\n"
398 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
399 printf("Using configuration file %s\n", CFILE);
402 /* Dumps a config file section of the given SERVER*, and exits. */
403 void dump_section(SERVER* serve, gchar* section_header) {
404 printf("[%s]\n", section_header);
405 printf("\texportname = %s\n", serve->exportname);
406 printf("\tlistenaddr = %s\n", serve->listenaddr);
407 printf("\tport = %d\n", serve->port);
408 if(serve->flags & F_READONLY) {
409 printf("\treadonly = true\n");
411 if(serve->flags & F_MULTIFILE) {
412 printf("\tmultifile = true\n");
414 if(serve->flags & F_COPYONWRITE) {
415 printf("\tcopyonwrite = true\n");
417 if(serve->expected_size) {
418 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
420 if(serve->authname) {
421 printf("\tauthfile = %s\n", serve->authname);
427 * Parse the command line.
429 * @param argc the argc argument to main()
430 * @param argv the argv argument to main()
432 SERVER* cmdline(int argc, char *argv[]) {
436 struct option long_options[] = {
437 {"read-only", no_argument, NULL, 'r'},
438 {"multi-file", no_argument, NULL, 'm'},
439 {"copy-on-write", no_argument, NULL, 'c'},
440 {"dont-fork", no_argument, NULL, 'd'},
441 {"authorize-file", required_argument, NULL, 'l'},
442 {"config-file", required_argument, NULL, 'C'},
443 {"pid-file", required_argument, NULL, 'p'},
444 {"output-config", required_argument, NULL, 'o'},
445 {"max-connection", required_argument, NULL, 'M'},
452 gboolean do_output=FALSE;
453 gchar* section_header="";
459 serve=g_new0(SERVER, 1);
460 serve->authname = g_strdup(default_authname);
461 serve->virtstyle=VIRT_IPLIT;
462 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
465 /* non-option argument */
466 switch(nonspecial++) {
468 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
469 addr_port=g_strsplit(optarg, ":", 2);
471 /* Check for "@" - maybe user using this separator
474 g_strfreev(addr_port);
475 addr_port=g_strsplit(optarg, "@", 2);
478 addr_port=g_strsplit(optarg, "@", 2);
482 serve->port=strtol(addr_port[1], NULL, 0);
483 serve->listenaddr=g_strdup(addr_port[0]);
485 serve->listenaddr=NULL;
486 serve->port=strtol(addr_port[0], NULL, 0);
488 g_strfreev(addr_port);
491 serve->exportname = g_strdup(optarg);
492 if(serve->exportname[0] != '/') {
493 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
498 last=strlen(optarg)-1;
500 if (suffix == 'k' || suffix == 'K' ||
501 suffix == 'm' || suffix == 'M')
503 es = (off_t)atoll(optarg);
511 serve->expected_size = es;
516 serve->flags |= F_READONLY;
519 serve->flags |= F_MULTIFILE;
523 section_header = g_strdup(optarg);
526 strncpy(pidftemplate, optarg, 256);
529 serve->flags |=F_COPYONWRITE;
535 g_free(config_file_pos);
536 config_file_pos=g_strdup(optarg);
539 g_free(serve->authname);
540 serve->authname=g_strdup(optarg);
543 serve->max_connections = strtol(optarg, NULL, 0);
551 /* What's left: the port to export, the name of the to be exported
552 * file, and, optionally, the size of the file, in that order. */
561 g_critical("Need a complete configuration on the command line to output a config file section!");
564 dump_section(serve, section_header);
570 * Error codes for config file parsing
573 CFILE_NOTFOUND, /**< The configuration file is not found */
574 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
575 CFILE_KEY_MISSING, /**< A (required) key is missing */
576 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
577 CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
578 CFILE_PROGERR, /**< Programmer error */
579 CFILE_NO_EXPORTS, /**< A config file was specified that does not
580 define any exports */
581 CFILE_INCORRECT_PORT, /**< The reserved port was specified for an
586 * Remove a SERVER from memory. Used from the hash table
588 void remove_server(gpointer s) {
592 g_free(server->exportname);
594 g_free(server->authname);
595 if(server->listenaddr)
596 g_free(server->listenaddr);
598 g_free(server->prerun);
600 g_free(server->postrun);
601 if(server->transactionlog)
602 g_free(server->transactionlog);
608 * @param s the old server we want to duplicate
609 * @return new duplicated server
611 SERVER* dup_serve(SERVER *s) {
612 SERVER *serve = NULL;
614 serve=g_new0(SERVER, 1);
619 serve->exportname = g_strdup(s->exportname);
621 serve->expected_size = s->expected_size;
624 serve->listenaddr = g_strdup(s->listenaddr);
626 serve->port = s->port;
629 serve->authname = strdup(s->authname);
631 serve->flags = s->flags;
632 serve->socket = s->socket;
633 serve->socket_family = s->socket_family;
634 serve->virtstyle = s->virtstyle;
635 serve->cidrlen = s->cidrlen;
638 serve->prerun = g_strdup(s->prerun);
641 serve->postrun = g_strdup(s->postrun);
643 if(s->transactionlog)
644 serve->transactionlog = g_strdup(s->transactionlog);
647 serve->servename = g_strdup(s->servename);
649 serve->max_connections = s->max_connections;
655 * append new server to array
657 * @param a server array
658 * @return 0 success, -1 error
660 int append_serve(SERVER *s, GArray *a) {
662 struct addrinfo hints;
663 struct addrinfo *ai = NULL;
664 struct addrinfo *rp = NULL;
665 char host[NI_MAXHOST];
671 err("Invalid parsing server");
675 port = g_strdup_printf("%d", s->port);
677 memset(&hints,'\0',sizeof(hints));
678 hints.ai_family = AF_UNSPEC;
679 hints.ai_socktype = SOCK_STREAM;
680 hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
681 hints.ai_protocol = IPPROTO_TCP;
683 e = getaddrinfo(s->listenaddr, port, &hints, &ai);
689 for (rp = ai; rp != NULL; rp = rp->ai_next) {
690 e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
692 if (e != 0) { // error
693 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
697 // duplicate server and set listenaddr to resolved IP address
700 ns->listenaddr = g_strdup(host);
701 ns->socket_family = rp->ai_family;
702 g_array_append_val(a, *ns);
710 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
721 * Parse the config file.
723 * @param f the name of the config file
724 * @param e a GError. @see CFILE_ERRORS for what error values this function can
726 * @return a Array of SERVER* pointers, If the config file is empty or does not
727 * exist, returns an empty GHashTable; if the config file contains an
728 * error, returns NULL, and e is set appropriately
730 GArray* parse_cfile(gchar* f, GError** e) {
731 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
732 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
734 gchar *virtstyle=NULL;
736 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
737 { "port", TRUE, PARAM_INT, &(s.port), 0 },
738 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
739 { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
740 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
741 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
742 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
743 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
744 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
745 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
746 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
747 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
748 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
749 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
750 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
751 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
752 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
753 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
754 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
756 const int lp_size=sizeof(lp)/sizeof(PARAM);
758 { "user", FALSE, PARAM_STRING, &runuser, 0 },
759 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
760 { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
761 { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
764 int p_size=sizeof(gp)/sizeof(PARAM);
767 const char *err_msg=NULL;
776 errdomain = g_quark_from_string("parse_cfile");
777 cfile = g_key_file_new();
778 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
779 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
780 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
781 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
782 g_key_file_free(cfile);
785 startgroup = g_key_file_get_start_group(cfile);
786 if(!startgroup || strcmp(startgroup, "generic")) {
787 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
788 g_key_file_free(cfile);
791 groups = g_key_file_get_groups(cfile, NULL);
792 for(i=0;groups[i];i++) {
793 memset(&s, '\0', sizeof(SERVER));
795 /* After the [generic] group, start parsing exports */
800 for(j=0;j<p_size;j++) {
801 g_assert(p[j].target != NULL);
802 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
805 *((gint*)p[j].target) =
806 g_key_file_get_integer(cfile,
812 *((gchar**)p[j].target) =
813 g_key_file_get_string(cfile,
819 value = g_key_file_get_boolean(cfile,
821 p[j].paramname, &err);
824 *((gint*)p[j].target) |= p[j].flagval;
826 *((gint*)p[j].target) &= ~(p[j].flagval);
831 if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
832 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
833 g_key_file_free(cfile);
837 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
839 /* Ignore not-found error for optional values */
843 err_msg = MISSING_REQUIRED_ERROR;
846 err_msg = DEFAULT_ERROR;
848 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
849 g_array_free(retval, TRUE);
851 g_key_file_free(cfile);
856 if(!strncmp(virtstyle, "none", 4)) {
857 s.virtstyle=VIRT_NONE;
858 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
859 s.virtstyle=VIRT_IPLIT;
860 } else if(!strncmp(virtstyle, "iphash", 6)) {
861 s.virtstyle=VIRT_IPHASH;
862 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
863 s.virtstyle=VIRT_CIDR;
864 if(strlen(virtstyle)<10) {
865 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
866 g_array_free(retval, TRUE);
867 g_key_file_free(cfile);
870 s.cidrlen=strtol(virtstyle+8, NULL, 0);
872 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
873 g_array_free(retval, TRUE);
874 g_key_file_free(cfile);
877 if(s.port && !do_oldstyle) {
878 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
879 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
882 s.virtstyle=VIRT_IPLIT;
884 /* Don't need to free this, it's not our string */
886 /* Don't append values for the [generic] group */
888 s.socket_family = AF_UNSPEC;
889 s.servename = groups[i];
891 append_serve(&s, retval);
898 if(s.flags & F_SDP) {
899 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
900 g_array_free(retval, TRUE);
901 g_key_file_free(cfile);
907 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
909 g_key_file_free(cfile);
914 * Signal handler for SIGCHLD
915 * @param s the signal we're handling (must be SIGCHLD, or something
918 void sigchld_handler(int s) {
923 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
924 if(WIFEXITED(status)) {
925 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
927 i=g_hash_table_lookup(children, &pid);
929 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
931 DEBUG("Removing %d from the list of children", pid);
932 g_hash_table_remove(children, &pid);
938 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
941 * @param value the value corresponding to the above key
942 * @param user_data a pointer which we always set to 1, so that we know what
945 void killchild(gpointer key, gpointer value, gpointer user_data) {
947 int *parent=user_data;
954 * Handle SIGTERM and dispatch it to our children
955 * @param s the signal we're handling (must be SIGTERM, or something
956 * is severely wrong).
958 void sigterm_handler(int s) {
961 g_hash_table_foreach(children, killchild, &parent);
971 * Detect the size of a file.
973 * @param fhandle An open filedescriptor
974 * @return the size of the file, or OFFT_MAX if detection was
977 off_t size_autodetect(int fhandle) {
980 struct stat stat_buf;
983 #ifdef HAVE_SYS_MOUNT_H
984 #ifdef HAVE_SYS_IOCTL_H
986 DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
987 if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
990 #endif /* BLKGETSIZE64 */
991 #endif /* HAVE_SYS_IOCTL_H */
992 #endif /* HAVE_SYS_MOUNT_H */
994 DEBUG("looking for fhandle size with fstat\n");
995 stat_buf.st_size = 0;
996 error = fstat(fhandle, &stat_buf);
998 if(stat_buf.st_size > 0)
999 return (off_t)stat_buf.st_size;
1001 err("fstat failed: %m");
1004 DEBUG("looking for fhandle size with lseek SEEK_END\n");
1005 es = lseek(fhandle, (off_t)0, SEEK_END);
1006 if (es > ((off_t)0)) {
1009 DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
1012 err("Could not find size of exported block device: %m");
1017 * Get the file handle and offset, given an export offset.
1019 * @param export An array of export files
1020 * @param a The offset to get corresponding file/offset for
1021 * @param fhandle [out] File descriptor
1022 * @param foffset [out] Offset into fhandle
1023 * @param maxbytes [out] Tells how many bytes can be read/written
1024 * from fhandle starting at foffset (0 if there is no limit)
1025 * @return 0 on success, -1 on failure
1027 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1028 /* Negative offset not allowed */
1032 /* Binary search for last file with starting offset <= a */
1035 int end = export->len - 1;
1036 while( start <= end ) {
1037 int mid = (start + end) / 2;
1038 fi = g_array_index(export, FILE_INFO, mid);
1039 if( fi.startoff < a ) {
1041 } else if( fi.startoff > a ) {
1049 /* end should never go negative, since first startoff is 0 and a >= 0 */
1052 fi = g_array_index(export, FILE_INFO, end);
1053 *fhandle = fi.fhandle;
1054 *foffset = a - fi.startoff;
1056 if( end+1 < export->len ) {
1057 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1058 *maxbytes = fi_next.startoff - a;
1065 * seek to a position in a file, with error handling.
1066 * @param handle a filedescriptor
1067 * @param a position to seek to
1068 * @todo get rid of this; lastpoint is a global variable right now, but it
1069 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1072 void myseek(int handle,off_t a) {
1073 if (lseek(handle, a, SEEK_SET) < 0) {
1074 err("Can not seek locally!\n");
1079 * Write an amount of bytes at a given offset to the right file. This
1080 * abstracts the write-side of the multiple file option.
1082 * @param a The offset where the write should start
1083 * @param buf The buffer to write from
1084 * @param len The length of buf
1085 * @param client The client we're serving for
1086 * @param fua Flag to indicate 'Force Unit Access'
1087 * @return The number of bytes actually written, or -1 in case of an error
1089 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1095 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1097 if(maxbytes && len > maxbytes)
1100 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1102 myseek(fhandle, foffset);
1103 retval = write(fhandle, buf, len);
1104 if(client->server->flags & F_SYNC) {
1108 /* This is where we would do the following
1109 * #ifdef USE_SYNC_FILE_RANGE
1110 * However, we don't, for the reasons set out below
1111 * by Christoph Hellwig <hch@infradead.org>
1114 * fdatasync is equivalent to fsync except that it does not flush
1115 * non-essential metadata (basically just timestamps in practice), but it
1116 * does flush metadata requried to find the data again, e.g. allocation
1117 * information and extent maps. sync_file_range does nothing but flush
1118 * out pagecache content - it means you basically won't get your data
1119 * back in case of a crash if you either:
1121 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1122 * b) are using a sparse file on a filesystem
1123 * c) are using a fallocate-preallocated file on a filesystem
1124 * d) use any file on a COW filesystem like btrfs
1126 * e.g. it only does anything useful for you if you do not have a volatile
1127 * write cache, and either use a raw block device node, or just overwrite
1128 * an already fully allocated (and not preallocated) file on a non-COW
1132 * What we should do is open a second FD with O_DSYNC set, then write to
1133 * that when appropriate. However, with a Linux client, every REQ_FUA
1134 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1139 sync_file_range(fhandle, foffset, len,
1140 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1141 SYNC_FILE_RANGE_WAIT_AFTER);
1150 * Call rawexpwrite repeatedly until all data has been written.
1152 * @param a The offset where the write should start
1153 * @param buf The buffer to write from
1154 * @param len The length of buf
1155 * @param client The client we're serving for
1156 * @param fua Flag to indicate 'Force Unit Access'
1157 * @return 0 on success, nonzero on failure
1159 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1162 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1167 return (ret < 0 || len != 0);
1171 * Read an amount of bytes at a given offset from the right file. This
1172 * abstracts the read-side of the multiple files option.
1174 * @param a The offset where the read should start
1175 * @param buf A buffer to read into
1176 * @param len The size of buf
1177 * @param client The client we're serving for
1178 * @return The number of bytes actually read, or -1 in case of an
1181 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1186 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1188 if(maxbytes && len > maxbytes)
1191 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1193 myseek(fhandle, foffset);
1194 return read(fhandle, buf, len);
1198 * Call rawexpread repeatedly until all data has been read.
1199 * @return 0 on success, nonzero on failure
1201 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1204 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1209 return (ret < 0 || len != 0);
1213 * Read an amount of bytes at a given offset from the right file. This
1214 * abstracts the read-side of the copyonwrite stuff, and calls
1215 * rawexpread() with the right parameters to do the actual work.
1216 * @param a The offset where the read should start
1217 * @param buf A buffer to read into
1218 * @param len The size of buf
1219 * @param client The client we're going to read for
1220 * @return 0 on success, nonzero on failure
1222 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1223 off_t rdlen, offset;
1224 off_t mapcnt, mapl, maph, pagestart;
1226 if (!(client->server->flags & F_COPYONWRITE))
1227 return(rawexpread_fully(a, buf, len, client));
1228 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1230 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1232 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1233 pagestart=mapcnt*DIFFPAGESIZE;
1235 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1236 len : (size_t)DIFFPAGESIZE-offset;
1237 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1238 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1239 (unsigned long)(client->difmap[mapcnt]));
1240 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1241 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1242 } else { /* the block is not there */
1243 DEBUG("Page %llu is not here, we read the original one\n",
1244 (unsigned long long)mapcnt);
1245 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1247 len-=rdlen; a+=rdlen; buf+=rdlen;
1253 * Write an amount of bytes at a given offset to the right file. This
1254 * abstracts the write-side of the copyonwrite option, and calls
1255 * rawexpwrite() with the right parameters to do the actual work.
1257 * @param a The offset where the write should start
1258 * @param buf The buffer to write from
1259 * @param len The length of buf
1260 * @param client The client we're going to write for.
1261 * @param fua Flag to indicate 'Force Unit Access'
1262 * @return 0 on success, nonzero on failure
1264 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1265 char pagebuf[DIFFPAGESIZE];
1266 off_t mapcnt,mapl,maph;
1271 if (!(client->server->flags & F_COPYONWRITE))
1272 return(rawexpwrite_fully(a, buf, len, client, fua));
1273 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1275 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1277 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1278 pagestart=mapcnt*DIFFPAGESIZE ;
1279 offset=a-pagestart ;
1280 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1281 len : (size_t)DIFFPAGESIZE-offset;
1283 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1284 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1285 (unsigned long)(client->difmap[mapcnt])) ;
1286 myseek(client->difffile,
1287 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1288 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1289 } else { /* the block is not there */
1290 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1291 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1292 DEBUG("Page %llu is not here, we put it at %lu\n",
1293 (unsigned long long)mapcnt,
1294 (unsigned long)(client->difmap[mapcnt]));
1295 rdlen=DIFFPAGESIZE ;
1296 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1298 memcpy(pagebuf+offset,buf,wrlen) ;
1299 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1303 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1305 if (client->server->flags & F_SYNC) {
1306 fsync(client->difffile);
1308 /* open question: would it be cheaper to do multiple sync_file_ranges?
1309 as we iterate through the above?
1311 fdatasync(client->difffile);
1317 * Flush data to a client
1319 * @param client The client we're going to write for.
1320 * @return 0 on success, nonzero on failure
1322 int expflush(CLIENT *client) {
1325 if (client->server->flags & F_COPYONWRITE) {
1326 return fsync(client->difffile);
1329 for (i = 0; i < client->export->len; i++) {
1330 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1331 if (fsync(fi.fhandle) < 0)
1339 * Do the initial negotiation.
1341 * @param client The client we're negotiating with.
1343 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1346 uint32_t flags = NBD_FLAG_HAS_FLAGS;
1347 uint16_t smallflags = 0;
1350 memset(zeros, '\0', sizeof(zeros));
1351 if(!client || !client->modern) {
1353 if (write(net, INIT_PASSWD, 8) < 0) {
1354 err_nonfatal("Negotiation failed: %m");
1358 if(!client || client->modern) {
1360 magic = htonll(opts_magic);
1363 magic = htonll(cliserv_magic);
1365 if (write(net, &magic, sizeof(magic)) < 0) {
1366 err_nonfatal("Negotiation failed: %m");
1380 err("programmer error");
1381 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1382 err("Negotiation failed: %m");
1383 if (read(net, &reserved, sizeof(reserved)) < 0)
1384 err("Negotiation failed: %m");
1385 if (read(net, &magic, sizeof(magic)) < 0)
1386 err("Negotiation failed: %m");
1387 magic = ntohll(magic);
1388 if(magic != opts_magic) {
1392 if (read(net, &opt, sizeof(opt)) < 0)
1393 err("Negotiation failed: %m");
1395 if(opt != NBD_OPT_EXPORT_NAME) {
1399 if (read(net, &namelen, sizeof(namelen)) < 0)
1400 err("Negotiation failed: %m");
1401 namelen = ntohl(namelen);
1402 name = malloc(namelen+1);
1404 if (read(net, name, namelen) < 0)
1405 err("Negotiation failed: %m");
1406 for(i=0; i<servers->len; i++) {
1407 SERVER* serve = &(g_array_index(servers, SERVER, i));
1408 if(!strcmp(serve->servename, name)) {
1409 CLIENT* client = g_new0(CLIENT, 1);
1410 client->server = serve;
1411 client->exportsize = OFFT_MAX;
1413 client->modern = TRUE;
1414 client->transactionlogfd = -1;
1423 size_host = htonll((u64)(client->exportsize));
1424 if (write(net, &size_host, 8) < 0)
1425 err("Negotiation failed: %m");
1426 if (client->server->flags & F_READONLY)
1427 flags |= NBD_FLAG_READ_ONLY;
1428 if (client->server->flags & F_FLUSH)
1429 flags |= NBD_FLAG_SEND_FLUSH;
1430 if (client->server->flags & F_FUA)
1431 flags |= NBD_FLAG_SEND_FUA;
1432 if (client->server->flags & F_ROTATIONAL)
1433 flags |= NBD_FLAG_ROTATIONAL;
1434 if (!client->modern) {
1436 flags = htonl(flags);
1437 if (write(client->net, &flags, 4) < 0)
1438 err("Negotiation failed: %m");
1441 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1442 smallflags = htons(smallflags);
1443 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1444 err("Negotiation failed: %m");
1448 if (write(client->net, zeros, 124) < 0)
1449 err("Negotiation failed: %m");
1453 /** sending macro. */
1454 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1455 if (client->transactionlogfd != -1) \
1456 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1458 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1460 * Serve a file to a single client.
1462 * @todo This beast needs to be split up in many tiny little manageable
1463 * pieces. Preferably with a chainsaw.
1465 * @param client The client we're going to serve to.
1466 * @return when the client disconnects
1468 int mainloop(CLIENT *client) {
1469 struct nbd_request request;
1470 struct nbd_reply reply;
1471 gboolean go_on=TRUE;
1475 negotiate(client->net, client, NULL);
1476 DEBUG("Entering request loop!\n");
1477 reply.magic = htonl(NBD_REPLY_MAGIC);
1490 readit(client->net, &request, sizeof(request));
1491 if (client->transactionlogfd != -1)
1492 writeit(client->transactionlogfd, &request, sizeof(request));
1494 request.from = ntohll(request.from);
1495 request.type = ntohl(request.type);
1496 command = request.type & NBD_CMD_MASK_COMMAND;
1497 len = ntohl(request.len);
1499 DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
1500 (unsigned long long)request.from,
1501 (unsigned long long)request.from / 512, (unsigned int)len);
1503 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1504 err("Not enough magic.");
1506 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1508 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1509 if ((request.from + len) > (OFFT_MAX)) {
1510 DEBUG("[Number too large!]");
1511 ERROR(client, reply, EINVAL);
1515 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1517 ERROR(client, reply, EINVAL);
1522 if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
1523 currlen = BUFSIZE - sizeof(struct nbd_reply);
1524 msg2(LOG_INFO, "oversized request (this is not a problem)");
1531 msg2(LOG_INFO, "Disconnect request received.");
1532 if (client->server->flags & F_COPYONWRITE) {
1533 if (client->difmap) g_free(client->difmap) ;
1534 close(client->difffile);
1535 unlink(client->difffilename);
1536 free(client->difffilename);
1542 DEBUG("wr: net->buf, ");
1544 readit(client->net, buf, currlen);
1545 DEBUG("buf->exp, ");
1546 if ((client->server->flags & F_READONLY) ||
1547 (client->server->flags & F_AUTOREADONLY)) {
1548 DEBUG("[WRITE to READONLY!]");
1549 ERROR(client, reply, EPERM);
1552 if (expwrite(request.from, buf, len, client,
1553 request.type & NBD_CMD_FLAG_FUA)) {
1554 DEBUG("Write failed: %m" );
1555 ERROR(client, reply, errno);
1559 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1561 SEND(client->net, reply);
1567 if (expflush(client)) {
1568 DEBUG("Flush failed: %m");
1569 ERROR(client, reply, errno);
1572 SEND(client->net, reply);
1577 DEBUG("exp->buf, ");
1578 memcpy(buf, &reply, sizeof(struct nbd_reply));
1579 if (client->transactionlogfd != -1)
1580 writeit(client->transactionlogfd, &reply, sizeof(reply));
1581 p = buf + sizeof(struct nbd_reply);
1582 writelen = currlen + sizeof(struct nbd_reply);
1584 if (expread(request.from, p, currlen, client)) {
1585 DEBUG("Read failed: %m");
1586 ERROR(client, reply, errno);
1590 DEBUG("buf->net, ");
1591 writeit(client->net, buf, writelen);
1593 request.from += currlen;
1594 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1602 DEBUG ("Ignoring unknown command\n");
1610 * Set up client export array, which is an array of FILE_INFO.
1611 * Also, split a single exportfile into multiple ones, if that was asked.
1612 * @param client information on the client which we want to setup export for
1614 void setupexport(CLIENT* client) {
1616 off_t laststartoff = 0, lastsize = 0;
1617 int multifile = (client->server->flags & F_MULTIFILE);
1619 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1621 /* If multi-file, open as many files as we can.
1622 * If not, open exactly one file.
1623 * Calculate file sizes as we go to get total size. */
1627 gchar* error_string;
1628 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1631 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1633 tmpname=g_strdup(client->exportname);
1635 DEBUG( "Opening %s\n", tmpname );
1636 fi.fhandle = open(tmpname, mode);
1637 if(fi.fhandle == -1 && mode == O_RDWR) {
1638 /* Try again because maybe media was read-only */
1639 fi.fhandle = open(tmpname, O_RDONLY);
1640 if(fi.fhandle != -1) {
1641 /* Opening the base file in copyonwrite mode is
1643 if(!(client->server->flags & F_COPYONWRITE)) {
1644 client->server->flags |= F_AUTOREADONLY;
1645 client->server->flags |= F_READONLY;
1649 if(fi.fhandle == -1) {
1650 if(multifile && i>0)
1652 error_string=g_strdup_printf(
1653 "Could not open exported file %s: %%m",
1657 fi.startoff = laststartoff + lastsize;
1658 g_array_append_val(client->export, fi);
1661 /* Starting offset and size of this file will be used to
1662 * calculate starting offset of next file */
1663 laststartoff = fi.startoff;
1664 lastsize = size_autodetect(fi.fhandle);
1670 /* Set export size to total calculated size */
1671 client->exportsize = laststartoff + lastsize;
1673 /* Export size may be overridden */
1674 if(client->server->expected_size) {
1675 /* desired size must be <= total calculated size */
1676 if(client->server->expected_size > client->exportsize) {
1677 err("Size of exported file is too big\n");
1680 client->exportsize = client->server->expected_size;
1683 msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1685 msg3(LOG_INFO, "Total number of files: %d", i);
1689 int copyonwrite_prepare(CLIENT* client) {
1691 if ((client->difffilename = malloc(1024))==NULL)
1692 err("Failed to allocate string for diff file name");
1693 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1695 client->difffilename[1023]='\0';
1696 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1697 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1698 if (client->difffile<0) err("Could not create diff file (%m)") ;
1699 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1700 err("Could not allocate memory") ;
1701 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1707 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1710 * @param command the command to be ran. Read from the config file
1711 * @param file the file name we're about to export
1713 int do_run(gchar* command, gchar* file) {
1717 if(command && *command) {
1718 cmd = g_strdup_printf(command, file);
1726 * Serve a connection.
1728 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1729 * follow the road map.
1731 * @param client a connected client
1733 void serveconnection(CLIENT *client) {
1734 if (client->server->transactionlog && (client->transactionlogfd == -1))
1736 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1738 S_IRUSR | S_IWUSR)))
1739 g_warning("Could not open transaction log %s",
1740 client->server->transactionlog);
1743 if(do_run(client->server->prerun, client->exportname)) {
1746 setupexport(client);
1748 if (client->server->flags & F_COPYONWRITE) {
1749 copyonwrite_prepare(client);
1752 setmysockopt(client->net);
1755 do_run(client->server->postrun, client->exportname);
1757 if (-1 != client->transactionlogfd)
1759 close(client->transactionlogfd);
1760 client->transactionlogfd = -1;
1765 * Find the name of the file we have to serve. This will use g_strdup_printf
1766 * to put the IP address of the client inside a filename containing
1767 * "%s" (in the form as specified by the "virtstyle" option). That name
1768 * is then written to client->exportname.
1770 * @param net A socket connected to an nbd client
1771 * @param client information about the client. The IP address in human-readable
1772 * format will be written to a new char* buffer, the address of which will be
1773 * stored in client->clientname.
1775 void set_peername(int net, CLIENT *client) {
1776 struct sockaddr_storage addrin;
1777 struct sockaddr_storage netaddr;
1778 struct sockaddr_in *netaddr4 = NULL;
1779 struct sockaddr_in6 *netaddr6 = NULL;
1780 size_t addrinlen = sizeof( addrin );
1781 struct addrinfo hints;
1782 struct addrinfo *ai = NULL;
1783 char peername[NI_MAXHOST];
1784 char netname[NI_MAXHOST];
1790 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1791 err("getsockname failed: %m");
1793 getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1794 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1796 memset(&hints, '\0', sizeof (hints));
1797 hints.ai_flags = AI_ADDRCONFIG;
1798 e = getaddrinfo(peername, NULL, &hints, &ai);
1801 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1806 switch(client->server->virtstyle) {
1808 client->exportname=g_strdup(client->server->exportname);
1811 for(i=0;i<strlen(peername);i++) {
1812 if(peername[i]=='.') {
1817 client->exportname=g_strdup_printf(client->server->exportname, peername);
1820 memcpy(&netaddr, &addrin, addrinlen);
1821 if(ai->ai_family == AF_INET) {
1822 netaddr4 = (struct sockaddr_in *)&netaddr;
1823 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1824 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1826 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1827 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1828 tmp=g_strdup_printf("%s/%s", netname, peername);
1829 }else if(ai->ai_family == AF_INET6) {
1830 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1832 shift = 128-(client->server->cidrlen);
1834 while(shift >= 32) {
1835 ((netaddr6->sin6_addr).s6_addr32[i])=0;
1839 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1840 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1842 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1843 netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1844 tmp=g_strdup_printf("%s/%s", netname, peername);
1848 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1854 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1855 peername, client->exportname);
1856 client->clientname=g_strdup(peername);
1861 * @param data a pointer to pid_t which should be freed
1863 void destroy_pid_t(gpointer data) {
1868 * Loop through the available servers, and serve them. Never returns.
1870 int serveloop(GArray* servers) {
1871 struct sockaddr_storage addrin;
1872 socklen_t addrinlen=sizeof(addrin);
1880 * Set up the master fd_set. The set of descriptors we need
1881 * to select() for never changes anyway and it buys us a *lot*
1882 * of time to only build this once. However, if we ever choose
1883 * to not fork() for clients anymore, we may have to revisit
1888 for(i=0;i<servers->len;i++) {
1889 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1890 FD_SET(sock, &mset);
1891 max=sock>max?sock:max;
1895 FD_SET(modernsock, &mset);
1896 max=modernsock>max?modernsock:max;
1899 CLIENT *client = NULL;
1902 memcpy(&rset, &mset, sizeof(fd_set));
1903 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1908 if(FD_ISSET(modernsock, &rset)) {
1909 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1911 client = negotiate(net, NULL, servers);
1913 err_nonfatal("negotiation failed");
1918 serve = client->server;
1920 for(i=0;i<servers->len && !net;i++) {
1921 serve=&(g_array_index(servers, SERVER, i));
1922 if(FD_ISSET(serve->socket, &rset)) {
1923 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1930 if(serve->max_connections > 0 &&
1931 g_hash_table_size(children) >= serve->max_connections) {
1932 msg2(LOG_INFO, "Max connections reached");
1936 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1937 err("fcntl F_GETFL");
1939 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1940 err("fcntl F_SETFL ~O_NONBLOCK");
1943 client = g_new0(CLIENT, 1);
1944 client->server=serve;
1945 client->exportsize=OFFT_MAX;
1947 client->transactionlogfd = -1;
1949 set_peername(net, client);
1950 if (!authorized_client(client)) {
1951 msg2(LOG_INFO,"Unauthorized client") ;
1955 msg2(LOG_INFO,"Authorized client") ;
1956 pid=g_malloc(sizeof(pid_t));
1959 if ((*pid=fork())<0) {
1960 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1964 if (*pid>0) { /* parent */
1966 g_hash_table_insert(children, pid, pid);
1970 g_hash_table_destroy(children);
1971 for(i=0;i<servers->len;i++) {
1972 serve=&g_array_index(servers, SERVER, i);
1973 close(serve->socket);
1975 /* FALSE does not free the
1976 actual data. This is required,
1977 because the client has a
1978 direct reference into that
1979 data, and otherwise we get a
1981 g_array_free(servers, FALSE);
1984 msg2(LOG_INFO,"Starting to serve");
1985 serveconnection(client);
1992 void dosockopts(int socket) {
2000 /* lose the pesky "Address already in use" error message */
2001 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
2002 err("setsockopt SO_REUSEADDR");
2004 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
2005 err("setsockopt SO_KEEPALIVE");
2008 /* make the listening socket non-blocking */
2009 if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
2010 err("fcntl F_GETFL");
2012 if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
2013 err("fcntl F_SETFL O_NONBLOCK");
2018 * Connect a server's socket.
2020 * @param serve the server we want to connect.
2022 int setup_serve(SERVER *serve) {
2023 struct addrinfo hints;
2024 struct addrinfo *ai = NULL;
2029 return serve->servename ? 1 : 0;
2031 memset(&hints,'\0',sizeof(hints));
2032 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2033 hints.ai_socktype = SOCK_STREAM;
2034 hints.ai_family = serve->socket_family;
2036 port = g_strdup_printf ("%d", serve->port);
2040 e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2045 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2051 if(serve->socket_family == AF_UNSPEC)
2052 serve->socket_family = ai->ai_family;
2055 if ((serve->flags) && F_SDP) {
2056 if (ai->ai_family == AF_INET)
2057 ai->ai_family = AF_INET_SDP;
2058 else (ai->ai_family == AF_INET6)
2059 ai->ai_family = AF_INET6_SDP;
2062 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
2065 dosockopts(serve->socket);
2067 DEBUG("Waiting for connections... bind, ");
2068 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2069 if (e != 0 && errno != EADDRINUSE)
2072 if (listen(serve->socket, 1) < 0)
2076 if(serve->servename) {
2083 void open_modern(void) {
2084 struct addrinfo hints;
2085 struct addrinfo* ai = NULL;
2089 memset(&hints, '\0', sizeof(hints));
2090 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2091 hints.ai_socktype = SOCK_STREAM;
2092 hints.ai_family = AF_UNSPEC;
2093 hints.ai_protocol = IPPROTO_TCP;
2094 e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
2096 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2099 if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2103 dosockopts(modernsock);
2105 if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2108 if(listen(modernsock, 10) <0) {
2116 * Connect our servers.
2118 void setup_servers(GArray* servers) {
2120 struct sigaction sa;
2123 for(i=0;i<servers->len;i++) {
2124 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2129 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2131 sa.sa_handler = sigchld_handler;
2132 sigemptyset(&sa.sa_mask);
2133 sa.sa_flags = SA_RESTART;
2134 if(sigaction(SIGCHLD, &sa, NULL) == -1)
2135 err("sigaction: %m");
2136 sa.sa_handler = sigterm_handler;
2137 sigemptyset(&sa.sa_mask);
2138 sa.sa_flags = SA_RESTART;
2139 if(sigaction(SIGTERM, &sa, NULL) == -1)
2140 err("sigaction: %m");
2144 * Go daemon (unless we specified at compile time that we didn't want this)
2145 * @param serve the first server of our configuration. If its port is zero,
2146 * then do not daemonize, because we're doing inetd then. This parameter
2147 * is only used to create a PID file of the form
2148 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
2150 #if !defined(NODAEMON)
2151 void daemonize(SERVER* serve) {
2154 if(serve && !(serve->port)) {
2160 if(!*pidftemplate) {
2162 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2164 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2167 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2168 pidf=fopen(pidfname, "w");
2170 fprintf(pidf,"%d\n", (int)getpid());
2174 fprintf(stderr, "Not fatal; continuing");
2178 #define daemonize(serve)
2179 #endif /* !defined(NODAEMON) */
2182 * Everything beyond this point (in the file) is run in non-daemon mode.
2183 * The stuff above daemonize() isn't.
2186 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2188 void serve_err(SERVER* serve, const char* msg) {
2189 g_message("Export of %s on port %d failed:", serve->exportname,
2195 * Set up user-ID and/or group-ID
2197 void dousers(void) {
2202 gr=getgrnam(rungroup);
2204 str = g_strdup_printf("Invalid group name: %s", rungroup);
2207 if(setgid(gr->gr_gid)<0) {
2208 err("Could not set GID: %m");
2212 pw=getpwnam(runuser);
2214 str = g_strdup_printf("Invalid user name: %s", runuser);
2217 if(setuid(pw->pw_uid)<0) {
2218 err("Could not set UID: %m");
2224 void glib_message_syslog_redirect(const gchar *log_domain,
2225 GLogLevelFlags log_level,
2226 const gchar *message,
2229 int level=LOG_DEBUG;
2233 case G_LOG_FLAG_FATAL:
2234 case G_LOG_LEVEL_CRITICAL:
2235 case G_LOG_LEVEL_ERROR:
2238 case G_LOG_LEVEL_WARNING:
2241 case G_LOG_LEVEL_MESSAGE:
2242 case G_LOG_LEVEL_INFO:
2245 case G_LOG_LEVEL_DEBUG:
2250 syslog(level, "%s", message);
2255 * Main entry point...
2257 int main(int argc, char *argv[]) {
2262 if (sizeof( struct nbd_request )!=28) {
2263 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2264 exit(EXIT_FAILURE) ;
2267 memset(pidftemplate, '\0', 256);
2270 config_file_pos = g_strdup(CFILE);
2271 serve=cmdline(argc, argv);
2272 servers = parse_cfile(config_file_pos, &err);
2275 serve->socket_family = AF_UNSPEC;
2277 append_serve(serve, servers);
2279 if (!(serve->port)) {
2282 /* You really should define ISSERVER if you're going to use
2283 * inetd mode, but if you don't, closing stdout and stderr
2284 * (which inetd had connected to the client socket) will let it
2288 open("/dev/null", O_WRONLY);
2289 open("/dev/null", O_WRONLY);
2290 g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2292 client=g_malloc(sizeof(CLIENT));
2293 client->server=serve;
2295 client->exportsize=OFFT_MAX;
2296 set_peername(0,client);
2297 serveconnection(client);
2302 if(!servers || !servers->len) {
2303 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2304 && err->code == CFILE_NOTFOUND)) {
2305 g_warning("Could not parse config file: %s",
2306 err ? err->message : "Unknown error");
2310 g_warning("Specifying an export on the command line is deprecated.");
2311 g_warning("Please use a configuration file instead.");
2314 if((!serve) && (!servers||!servers->len)) {
2315 g_message("No configured exports; quitting.");
2320 setup_servers(servers);