2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <sys/select.h> /* select */
66 #include <sys/wait.h> /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h> /* For BLKGETSIZE */
74 #include <signal.h> /* sigaction */
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
85 #include <arpa/inet.h>
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
100 #include <sdp_inet.h>
103 /** Default position of the config file */
105 #define SYSCONFDIR "/etc"
107 #define CFILE SYSCONFDIR "/nbd-server/config"
109 /** Where our config file actually is */
110 gchar* config_file_pos;
112 /** What user we're running as */
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
119 /* Whether we should avoid forking */
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
133 /* Debugging macros */
136 #define DEBUG(...) printf(__VA_ARGS__)
140 #ifndef PACKAGE_VERSION
141 #define PACKAGE_VERSION ""
144 * The highest value a variable of type off_t can reach. This is a signed
145 * integer, so set all bits except for the leftmost one.
147 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
148 #define LINELEN 256 /**< Size of static buffer used to read the
149 authorization file (yuck) */
150 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
151 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
152 #define F_READONLY 1 /**< flag to tell us a file is readonly */
153 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
154 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
156 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
157 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
158 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
159 #define F_SYNC 64 /**< Whether to fsync() after a write */
160 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
161 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
162 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
163 GHashTable *children;
164 char pidfname[256]; /**< name of our PID file */
165 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
166 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
168 int modernsock=0; /**< Socket for the modern handler. Not used
169 if a client was only specified on the
170 command line; only port used if
171 oldstyle is set to false (and then the
172 command-line client isn't used, gna gna) */
173 char* modern_listen; /**< listenaddr value for modernsock */
176 * Types of virtuatlization
179 VIRT_NONE=0, /**< No virtualization */
180 VIRT_IPLIT, /**< Literal IP address as part of the filename */
181 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
182 doing the same as in IPLIT */
183 VIRT_CIDR, /**< Every subnet in its own directory */
187 * Variables associated with a server.
190 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
191 off_t expected_size; /**< size of the exported file as it was told to
192 us through configuration */
193 gchar* listenaddr; /**< The IP address we're listening on */
194 unsigned int port; /**< port we're exporting this file at */
195 char* authname; /**< filename of the authorization file */
196 int flags; /**< flags associated with this exported file */
197 int socket; /**< The socket of this server. */
198 int socket_family; /**< family of the socket */
199 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
200 uint8_t cidrlen; /**< The length of the mask when we use
201 CIDR-style virtualization */
202 gchar* prerun; /**< command to be ran after connecting a client,
203 but before starting to serve */
204 gchar* postrun; /**< command that will be ran after the client
206 gchar* servename; /**< name of the export as selected by nbd-client */
207 int max_connections; /**< maximum number of opened connections */
208 gchar* transactionlog;/**< filename for transaction log */
212 * Variables associated with a client socket.
215 int fhandle; /**< file descriptor */
216 off_t startoff; /**< starting offset of this file */
220 off_t exportsize; /**< size of the file we're exporting */
221 char *clientname; /**< peer */
222 char *exportname; /**< (processed) filename of the file we're exporting */
223 GArray *export; /**< array of FILE_INFO of exported files;
224 array size is always 1 unless we're
225 doing the multiple file option */
226 int net; /**< The actual client socket */
227 SERVER *server; /**< The server this client is getting data from */
228 char* difffilename; /**< filename of the copy-on-write file, if any */
229 int difffile; /**< filedescriptor of copyonwrite file. @todo
230 shouldn't this be an array too? (cfr export) Or
231 make -m and -c mutually exclusive */
232 u32 difffilelen; /**< number of pages in difffile */
233 u32 *difmap; /**< see comment on the global difmap for this one */
234 gboolean modern; /**< client was negotiated using modern negotiation protocol */
235 int transactionlogfd;/**< fd for transaction log */
239 * Type of configuration file values
242 PARAM_INT, /**< This parameter is an integer */
243 PARAM_STRING, /**< This parameter is a string */
244 PARAM_BOOL, /**< This parameter is a boolean */
248 * Configuration file values
251 gchar *paramname; /**< Name of the parameter, as it appears in
253 gboolean required; /**< Whether this is a required (as opposed to
254 optional) parameter */
255 PARAM_TYPE ptype; /**< Type of the parameter. */
256 gpointer target; /**< Pointer to where the data of this
257 parameter should be written. If ptype is
258 PARAM_BOOL, the data is or'ed rather than
260 gint flagval; /**< Flag mask for this parameter in case ptype
265 * Translate a command name into human readable form
267 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
268 * @return pointer to the command name
270 static inline const char * getcommandname(uint64_t command) {
273 return "NBD_CMD_READ";
275 return "NBD_CMD_WRITE";
277 return "NBD_CMD_DISC";
279 return "NBD_CMD_FLUSH";
287 * Check whether a client is allowed to connect. Works with an authorization
288 * file which contains one line per machine, no wildcards.
290 * @param opts The client who's trying to connect.
291 * @return 0 - authorization refused, 1 - OK
293 int authorized_client(CLIENT *opts) {
294 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
299 struct in_addr client;
300 struct in_addr cltemp;
303 if ((f=fopen(opts->server->authname,"r"))==NULL) {
304 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
305 opts->server->authname,strerror(errno)) ;
309 inet_aton(opts->clientname, &client);
310 while (fgets(line,LINELEN,f)!=NULL) {
311 if((tmp=index(line, '/'))) {
312 if(strlen(line)<=tmp-line) {
313 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
317 if(!inet_aton(line,&addr)) {
318 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
321 len=strtol(tmp, NULL, 0);
322 addr.s_addr>>=32-len;
323 addr.s_addr<<=32-len;
324 memcpy(&cltemp,&client,sizeof(client));
325 cltemp.s_addr>>=32-len;
326 cltemp.s_addr<<=32-len;
327 if(addr.s_addr == cltemp.s_addr) {
331 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
341 * Read data from a file descriptor into a buffer
343 * @param f a file descriptor
344 * @param buf a buffer
345 * @param len the number of bytes to be read
347 static inline void readit(int f, void *buf, size_t len) {
351 if ((res = read(f, buf, len)) <= 0) {
352 if(errno != EAGAIN) {
353 err("Read failed: %m");
363 * Consume data from an FD that we don't want
365 * @param f a file descriptor
366 * @param buf a buffer
367 * @param len the number of bytes to consume
368 * @param bufsiz the size of the buffer
370 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
373 curlen = (len>bufsiz)?bufsiz:len;
374 readit(f, buf, curlen);
381 * Write data from a buffer into a filedescriptor
383 * @param f a file descriptor
384 * @param buf a buffer containing data
385 * @param len the number of bytes to be written
387 static inline void writeit(int f, void *buf, size_t len) {
391 if ((res = write(f, buf, len)) <= 0)
392 err("Send failed: %m");
399 * Print out a message about how to use nbd-server. Split out to a separate
400 * function so that we can call it from multiple places
403 printf("This is nbd-server version " VERSION "\n");
404 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
405 "\t-r|--read-only\t\tread only\n"
406 "\t-m|--multi-file\t\tmultiple file\n"
407 "\t-c|--copy-on-write\tcopy on write\n"
408 "\t-C|--config-file\tspecify an alternate configuration file\n"
409 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
410 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
411 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
412 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
413 "\tif port is set to 0, stdin is used (for running from inetd)\n"
414 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
415 "\t\taddress of the machine trying to connect\n"
416 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
417 printf("Using configuration file %s\n", CFILE);
420 /* Dumps a config file section of the given SERVER*, and exits. */
421 void dump_section(SERVER* serve, gchar* section_header) {
422 printf("[%s]\n", section_header);
423 printf("\texportname = %s\n", serve->exportname);
424 printf("\tlistenaddr = %s\n", serve->listenaddr);
425 printf("\tport = %d\n", serve->port);
426 if(serve->flags & F_READONLY) {
427 printf("\treadonly = true\n");
429 if(serve->flags & F_MULTIFILE) {
430 printf("\tmultifile = true\n");
432 if(serve->flags & F_COPYONWRITE) {
433 printf("\tcopyonwrite = true\n");
435 if(serve->expected_size) {
436 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
438 if(serve->authname) {
439 printf("\tauthfile = %s\n", serve->authname);
445 * Parse the command line.
447 * @param argc the argc argument to main()
448 * @param argv the argv argument to main()
450 SERVER* cmdline(int argc, char *argv[]) {
454 struct option long_options[] = {
455 {"read-only", no_argument, NULL, 'r'},
456 {"multi-file", no_argument, NULL, 'm'},
457 {"copy-on-write", no_argument, NULL, 'c'},
458 {"dont-fork", no_argument, NULL, 'd'},
459 {"authorize-file", required_argument, NULL, 'l'},
460 {"config-file", required_argument, NULL, 'C'},
461 {"pid-file", required_argument, NULL, 'p'},
462 {"output-config", required_argument, NULL, 'o'},
463 {"max-connection", required_argument, NULL, 'M'},
470 gboolean do_output=FALSE;
471 gchar* section_header="";
477 serve=g_new0(SERVER, 1);
478 serve->authname = g_strdup(default_authname);
479 serve->virtstyle=VIRT_IPLIT;
480 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
483 /* non-option argument */
484 switch(nonspecial++) {
486 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
487 addr_port=g_strsplit(optarg, ":", 2);
489 /* Check for "@" - maybe user using this separator
492 g_strfreev(addr_port);
493 addr_port=g_strsplit(optarg, "@", 2);
496 addr_port=g_strsplit(optarg, "@", 2);
500 serve->port=strtol(addr_port[1], NULL, 0);
501 serve->listenaddr=g_strdup(addr_port[0]);
503 serve->listenaddr=NULL;
504 serve->port=strtol(addr_port[0], NULL, 0);
506 g_strfreev(addr_port);
509 serve->exportname = g_strdup(optarg);
510 if(serve->exportname[0] != '/') {
511 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
516 last=strlen(optarg)-1;
518 if (suffix == 'k' || suffix == 'K' ||
519 suffix == 'm' || suffix == 'M')
521 es = (off_t)atoll(optarg);
529 serve->expected_size = es;
534 serve->flags |= F_READONLY;
537 serve->flags |= F_MULTIFILE;
541 section_header = g_strdup(optarg);
544 strncpy(pidftemplate, optarg, 256);
547 serve->flags |=F_COPYONWRITE;
553 g_free(config_file_pos);
554 config_file_pos=g_strdup(optarg);
557 g_free(serve->authname);
558 serve->authname=g_strdup(optarg);
561 serve->max_connections = strtol(optarg, NULL, 0);
569 /* What's left: the port to export, the name of the to be exported
570 * file, and, optionally, the size of the file, in that order. */
579 g_critical("Need a complete configuration on the command line to output a config file section!");
582 dump_section(serve, section_header);
588 * Error codes for config file parsing
591 CFILE_NOTFOUND, /**< The configuration file is not found */
592 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
593 CFILE_KEY_MISSING, /**< A (required) key is missing */
594 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
595 CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
596 CFILE_PROGERR, /**< Programmer error */
597 CFILE_NO_EXPORTS, /**< A config file was specified that does not
598 define any exports */
599 CFILE_INCORRECT_PORT, /**< The reserved port was specified for an
604 * Remove a SERVER from memory. Used from the hash table
606 void remove_server(gpointer s) {
610 g_free(server->exportname);
612 g_free(server->authname);
613 if(server->listenaddr)
614 g_free(server->listenaddr);
616 g_free(server->prerun);
618 g_free(server->postrun);
619 if(server->transactionlog)
620 g_free(server->transactionlog);
626 * @param s the old server we want to duplicate
627 * @return new duplicated server
629 SERVER* dup_serve(SERVER *s) {
630 SERVER *serve = NULL;
632 serve=g_new0(SERVER, 1);
637 serve->exportname = g_strdup(s->exportname);
639 serve->expected_size = s->expected_size;
642 serve->listenaddr = g_strdup(s->listenaddr);
644 serve->port = s->port;
647 serve->authname = strdup(s->authname);
649 serve->flags = s->flags;
650 serve->socket = s->socket;
651 serve->socket_family = s->socket_family;
652 serve->virtstyle = s->virtstyle;
653 serve->cidrlen = s->cidrlen;
656 serve->prerun = g_strdup(s->prerun);
659 serve->postrun = g_strdup(s->postrun);
661 if(s->transactionlog)
662 serve->transactionlog = g_strdup(s->transactionlog);
665 serve->servename = g_strdup(s->servename);
667 serve->max_connections = s->max_connections;
673 * append new server to array
675 * @param a server array
676 * @return 0 success, -1 error
678 int append_serve(SERVER *s, GArray *a) {
680 struct addrinfo hints;
681 struct addrinfo *ai = NULL;
682 struct addrinfo *rp = NULL;
683 char host[NI_MAXHOST];
689 err("Invalid parsing server");
693 port = g_strdup_printf("%d", s->port);
695 memset(&hints,'\0',sizeof(hints));
696 hints.ai_family = AF_UNSPEC;
697 hints.ai_socktype = SOCK_STREAM;
698 hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
699 hints.ai_protocol = IPPROTO_TCP;
701 e = getaddrinfo(s->listenaddr, port, &hints, &ai);
707 for (rp = ai; rp != NULL; rp = rp->ai_next) {
708 e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
710 if (e != 0) { // error
711 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
715 // duplicate server and set listenaddr to resolved IP address
718 ns->listenaddr = g_strdup(host);
719 ns->socket_family = rp->ai_family;
720 g_array_append_val(a, *ns);
728 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
739 * Parse the config file.
741 * @param f the name of the config file
742 * @param e a GError. @see CFILE_ERRORS for what error values this function can
744 * @return a Array of SERVER* pointers, If the config file is empty or does not
745 * exist, returns an empty GHashTable; if the config file contains an
746 * error, returns NULL, and e is set appropriately
748 GArray* parse_cfile(gchar* f, GError** e) {
749 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
750 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
752 gchar *virtstyle=NULL;
754 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
755 { "port", TRUE, PARAM_INT, &(s.port), 0 },
756 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
757 { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
758 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
759 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
760 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
761 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
762 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
763 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
764 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
765 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
766 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
767 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
768 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
769 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
770 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
771 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
772 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
774 const int lp_size=sizeof(lp)/sizeof(PARAM);
776 { "user", FALSE, PARAM_STRING, &runuser, 0 },
777 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
778 { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
779 { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
782 int p_size=sizeof(gp)/sizeof(PARAM);
785 const char *err_msg=NULL;
794 errdomain = g_quark_from_string("parse_cfile");
795 cfile = g_key_file_new();
796 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
797 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
798 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
799 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
800 g_key_file_free(cfile);
803 startgroup = g_key_file_get_start_group(cfile);
804 if(!startgroup || strcmp(startgroup, "generic")) {
805 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
806 g_key_file_free(cfile);
809 groups = g_key_file_get_groups(cfile, NULL);
810 for(i=0;groups[i];i++) {
811 memset(&s, '\0', sizeof(SERVER));
813 /* After the [generic] group, start parsing exports */
818 for(j=0;j<p_size;j++) {
819 g_assert(p[j].target != NULL);
820 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
823 *((gint*)p[j].target) =
824 g_key_file_get_integer(cfile,
830 *((gchar**)p[j].target) =
831 g_key_file_get_string(cfile,
837 value = g_key_file_get_boolean(cfile,
839 p[j].paramname, &err);
842 *((gint*)p[j].target) |= p[j].flagval;
844 *((gint*)p[j].target) &= ~(p[j].flagval);
849 if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) {
850 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export");
851 g_key_file_free(cfile);
855 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
857 /* Ignore not-found error for optional values */
861 err_msg = MISSING_REQUIRED_ERROR;
864 err_msg = DEFAULT_ERROR;
866 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
867 g_array_free(retval, TRUE);
869 g_key_file_free(cfile);
874 if(!strncmp(virtstyle, "none", 4)) {
875 s.virtstyle=VIRT_NONE;
876 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
877 s.virtstyle=VIRT_IPLIT;
878 } else if(!strncmp(virtstyle, "iphash", 6)) {
879 s.virtstyle=VIRT_IPHASH;
880 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
881 s.virtstyle=VIRT_CIDR;
882 if(strlen(virtstyle)<10) {
883 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
884 g_array_free(retval, TRUE);
885 g_key_file_free(cfile);
888 s.cidrlen=strtol(virtstyle+8, NULL, 0);
890 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
891 g_array_free(retval, TRUE);
892 g_key_file_free(cfile);
895 if(s.port && !do_oldstyle) {
896 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
897 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
900 s.virtstyle=VIRT_IPLIT;
902 /* Don't need to free this, it's not our string */
904 /* Don't append values for the [generic] group */
906 s.socket_family = AF_UNSPEC;
907 s.servename = groups[i];
909 append_serve(&s, retval);
916 if(s.flags & F_SDP) {
917 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
918 g_array_free(retval, TRUE);
919 g_key_file_free(cfile);
925 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
927 g_key_file_free(cfile);
932 * Signal handler for SIGCHLD
933 * @param s the signal we're handling (must be SIGCHLD, or something
936 void sigchld_handler(int s) {
941 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
942 if(WIFEXITED(status)) {
943 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
945 i=g_hash_table_lookup(children, &pid);
947 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
949 DEBUG("Removing %d from the list of children", pid);
950 g_hash_table_remove(children, &pid);
956 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
959 * @param value the value corresponding to the above key
960 * @param user_data a pointer which we always set to 1, so that we know what
963 void killchild(gpointer key, gpointer value, gpointer user_data) {
965 int *parent=user_data;
972 * Handle SIGTERM and dispatch it to our children
973 * @param s the signal we're handling (must be SIGTERM, or something
974 * is severely wrong).
976 void sigterm_handler(int s) {
979 g_hash_table_foreach(children, killchild, &parent);
989 * Detect the size of a file.
991 * @param fhandle An open filedescriptor
992 * @return the size of the file, or OFFT_MAX if detection was
995 off_t size_autodetect(int fhandle) {
998 struct stat stat_buf;
1001 #ifdef HAVE_SYS_MOUNT_H
1002 #ifdef HAVE_SYS_IOCTL_H
1004 DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
1005 if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
1006 return (off_t)bytes;
1008 #endif /* BLKGETSIZE64 */
1009 #endif /* HAVE_SYS_IOCTL_H */
1010 #endif /* HAVE_SYS_MOUNT_H */
1012 DEBUG("looking for fhandle size with fstat\n");
1013 stat_buf.st_size = 0;
1014 error = fstat(fhandle, &stat_buf);
1016 if(stat_buf.st_size > 0)
1017 return (off_t)stat_buf.st_size;
1019 err("fstat failed: %m");
1022 DEBUG("looking for fhandle size with lseek SEEK_END\n");
1023 es = lseek(fhandle, (off_t)0, SEEK_END);
1024 if (es > ((off_t)0)) {
1027 DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
1030 err("Could not find size of exported block device: %m");
1035 * Get the file handle and offset, given an export offset.
1037 * @param export An array of export files
1038 * @param a The offset to get corresponding file/offset for
1039 * @param fhandle [out] File descriptor
1040 * @param foffset [out] Offset into fhandle
1041 * @param maxbytes [out] Tells how many bytes can be read/written
1042 * from fhandle starting at foffset (0 if there is no limit)
1043 * @return 0 on success, -1 on failure
1045 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1046 /* Negative offset not allowed */
1050 /* Binary search for last file with starting offset <= a */
1053 int end = export->len - 1;
1054 while( start <= end ) {
1055 int mid = (start + end) / 2;
1056 fi = g_array_index(export, FILE_INFO, mid);
1057 if( fi.startoff < a ) {
1059 } else if( fi.startoff > a ) {
1067 /* end should never go negative, since first startoff is 0 and a >= 0 */
1070 fi = g_array_index(export, FILE_INFO, end);
1071 *fhandle = fi.fhandle;
1072 *foffset = a - fi.startoff;
1074 if( end+1 < export->len ) {
1075 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1076 *maxbytes = fi_next.startoff - a;
1083 * seek to a position in a file, with error handling.
1084 * @param handle a filedescriptor
1085 * @param a position to seek to
1086 * @todo get rid of this; lastpoint is a global variable right now, but it
1087 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1090 void myseek(int handle,off_t a) {
1091 if (lseek(handle, a, SEEK_SET) < 0) {
1092 err("Can not seek locally!\n");
1097 * Write an amount of bytes at a given offset to the right file. This
1098 * abstracts the write-side of the multiple file option.
1100 * @param a The offset where the write should start
1101 * @param buf The buffer to write from
1102 * @param len The length of buf
1103 * @param client The client we're serving for
1104 * @param fua Flag to indicate 'Force Unit Access'
1105 * @return The number of bytes actually written, or -1 in case of an error
1107 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1113 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1115 if(maxbytes && len > maxbytes)
1118 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1120 myseek(fhandle, foffset);
1121 retval = write(fhandle, buf, len);
1122 if(client->server->flags & F_SYNC) {
1126 /* This is where we would do the following
1127 * #ifdef USE_SYNC_FILE_RANGE
1128 * However, we don't, for the reasons set out below
1129 * by Christoph Hellwig <hch@infradead.org>
1132 * fdatasync is equivalent to fsync except that it does not flush
1133 * non-essential metadata (basically just timestamps in practice), but it
1134 * does flush metadata requried to find the data again, e.g. allocation
1135 * information and extent maps. sync_file_range does nothing but flush
1136 * out pagecache content - it means you basically won't get your data
1137 * back in case of a crash if you either:
1139 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1140 * b) are using a sparse file on a filesystem
1141 * c) are using a fallocate-preallocated file on a filesystem
1142 * d) use any file on a COW filesystem like btrfs
1144 * e.g. it only does anything useful for you if you do not have a volatile
1145 * write cache, and either use a raw block device node, or just overwrite
1146 * an already fully allocated (and not preallocated) file on a non-COW
1150 * What we should do is open a second FD with O_DSYNC set, then write to
1151 * that when appropriate. However, with a Linux client, every REQ_FUA
1152 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1157 sync_file_range(fhandle, foffset, len,
1158 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1159 SYNC_FILE_RANGE_WAIT_AFTER);
1168 * Call rawexpwrite repeatedly until all data has been written.
1170 * @param a The offset where the write should start
1171 * @param buf The buffer to write from
1172 * @param len The length of buf
1173 * @param client The client we're serving for
1174 * @param fua Flag to indicate 'Force Unit Access'
1175 * @return 0 on success, nonzero on failure
1177 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1180 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1185 return (ret < 0 || len != 0);
1189 * Read an amount of bytes at a given offset from the right file. This
1190 * abstracts the read-side of the multiple files option.
1192 * @param a The offset where the read should start
1193 * @param buf A buffer to read into
1194 * @param len The size of buf
1195 * @param client The client we're serving for
1196 * @return The number of bytes actually read, or -1 in case of an
1199 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1204 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1206 if(maxbytes && len > maxbytes)
1209 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1211 myseek(fhandle, foffset);
1212 return read(fhandle, buf, len);
1216 * Call rawexpread repeatedly until all data has been read.
1217 * @return 0 on success, nonzero on failure
1219 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1222 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1227 return (ret < 0 || len != 0);
1231 * Read an amount of bytes at a given offset from the right file. This
1232 * abstracts the read-side of the copyonwrite stuff, and calls
1233 * rawexpread() with the right parameters to do the actual work.
1234 * @param a The offset where the read should start
1235 * @param buf A buffer to read into
1236 * @param len The size of buf
1237 * @param client The client we're going to read for
1238 * @return 0 on success, nonzero on failure
1240 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1241 off_t rdlen, offset;
1242 off_t mapcnt, mapl, maph, pagestart;
1244 if (!(client->server->flags & F_COPYONWRITE))
1245 return(rawexpread_fully(a, buf, len, client));
1246 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1248 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1250 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1251 pagestart=mapcnt*DIFFPAGESIZE;
1253 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1254 len : (size_t)DIFFPAGESIZE-offset;
1255 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1256 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1257 (unsigned long)(client->difmap[mapcnt]));
1258 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1259 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1260 } else { /* the block is not there */
1261 DEBUG("Page %llu is not here, we read the original one\n",
1262 (unsigned long long)mapcnt);
1263 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1265 len-=rdlen; a+=rdlen; buf+=rdlen;
1271 * Write an amount of bytes at a given offset to the right file. This
1272 * abstracts the write-side of the copyonwrite option, and calls
1273 * rawexpwrite() with the right parameters to do the actual work.
1275 * @param a The offset where the write should start
1276 * @param buf The buffer to write from
1277 * @param len The length of buf
1278 * @param client The client we're going to write for.
1279 * @param fua Flag to indicate 'Force Unit Access'
1280 * @return 0 on success, nonzero on failure
1282 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1283 char pagebuf[DIFFPAGESIZE];
1284 off_t mapcnt,mapl,maph;
1289 if (!(client->server->flags & F_COPYONWRITE))
1290 return(rawexpwrite_fully(a, buf, len, client, fua));
1291 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1293 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1295 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1296 pagestart=mapcnt*DIFFPAGESIZE ;
1297 offset=a-pagestart ;
1298 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1299 len : (size_t)DIFFPAGESIZE-offset;
1301 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1302 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1303 (unsigned long)(client->difmap[mapcnt])) ;
1304 myseek(client->difffile,
1305 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1306 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1307 } else { /* the block is not there */
1308 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1309 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1310 DEBUG("Page %llu is not here, we put it at %lu\n",
1311 (unsigned long long)mapcnt,
1312 (unsigned long)(client->difmap[mapcnt]));
1313 rdlen=DIFFPAGESIZE ;
1314 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1316 memcpy(pagebuf+offset,buf,wrlen) ;
1317 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1321 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1323 if (client->server->flags & F_SYNC) {
1324 fsync(client->difffile);
1326 /* open question: would it be cheaper to do multiple sync_file_ranges?
1327 as we iterate through the above?
1329 fdatasync(client->difffile);
1335 * Flush data to a client
1337 * @param client The client we're going to write for.
1338 * @return 0 on success, nonzero on failure
1340 int expflush(CLIENT *client) {
1343 if (client->server->flags & F_COPYONWRITE) {
1344 return fsync(client->difffile);
1347 for (i = 0; i < client->export->len; i++) {
1348 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1349 if (fsync(fi.fhandle) < 0)
1357 * Do the initial negotiation.
1359 * @param client The client we're negotiating with.
1361 CLIENT* negotiate(int net, CLIENT *client, GArray* servers) {
1364 uint32_t flags = NBD_FLAG_HAS_FLAGS;
1365 uint16_t smallflags = 0;
1368 memset(zeros, '\0', sizeof(zeros));
1369 if(!client || !client->modern) {
1371 if (write(net, INIT_PASSWD, 8) < 0) {
1372 err_nonfatal("Negotiation failed: %m");
1376 if(!client || client->modern) {
1378 magic = htonll(opts_magic);
1381 magic = htonll(cliserv_magic);
1383 if (write(net, &magic, sizeof(magic)) < 0) {
1384 err_nonfatal("Negotiation failed: %m");
1398 err("programmer error");
1399 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1400 err("Negotiation failed: %m");
1401 if (read(net, &reserved, sizeof(reserved)) < 0)
1402 err("Negotiation failed: %m");
1403 if (read(net, &magic, sizeof(magic)) < 0)
1404 err("Negotiation failed: %m");
1405 magic = ntohll(magic);
1406 if(magic != opts_magic) {
1410 if (read(net, &opt, sizeof(opt)) < 0)
1411 err("Negotiation failed: %m");
1413 if(opt != NBD_OPT_EXPORT_NAME) {
1417 if (read(net, &namelen, sizeof(namelen)) < 0)
1418 err("Negotiation failed: %m");
1419 namelen = ntohl(namelen);
1420 name = malloc(namelen+1);
1422 if (read(net, name, namelen) < 0)
1423 err("Negotiation failed: %m");
1424 for(i=0; i<servers->len; i++) {
1425 SERVER* serve = &(g_array_index(servers, SERVER, i));
1426 if(!strcmp(serve->servename, name)) {
1427 CLIENT* client = g_new0(CLIENT, 1);
1428 client->server = serve;
1429 client->exportsize = OFFT_MAX;
1431 client->modern = TRUE;
1432 client->transactionlogfd = -1;
1441 size_host = htonll((u64)(client->exportsize));
1442 if (write(net, &size_host, 8) < 0)
1443 err("Negotiation failed: %m");
1444 if (client->server->flags & F_READONLY)
1445 flags |= NBD_FLAG_READ_ONLY;
1446 if (client->server->flags & F_FLUSH)
1447 flags |= NBD_FLAG_SEND_FLUSH;
1448 if (client->server->flags & F_FUA)
1449 flags |= NBD_FLAG_SEND_FUA;
1450 if (client->server->flags & F_ROTATIONAL)
1451 flags |= NBD_FLAG_ROTATIONAL;
1452 if (!client->modern) {
1454 flags = htonl(flags);
1455 if (write(client->net, &flags, 4) < 0)
1456 err("Negotiation failed: %m");
1459 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1460 smallflags = htons(smallflags);
1461 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1462 err("Negotiation failed: %m");
1466 if (write(client->net, zeros, 124) < 0)
1467 err("Negotiation failed: %m");
1471 /** sending macro. */
1472 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1473 if (client->transactionlogfd != -1) \
1474 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1476 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1478 * Serve a file to a single client.
1480 * @todo This beast needs to be split up in many tiny little manageable
1481 * pieces. Preferably with a chainsaw.
1483 * @param client The client we're going to serve to.
1484 * @return when the client disconnects
1486 int mainloop(CLIENT *client) {
1487 struct nbd_request request;
1488 struct nbd_reply reply;
1489 gboolean go_on=TRUE;
1493 negotiate(client->net, client, NULL);
1494 DEBUG("Entering request loop!\n");
1495 reply.magic = htonl(NBD_REPLY_MAGIC);
1508 readit(client->net, &request, sizeof(request));
1509 if (client->transactionlogfd != -1)
1510 writeit(client->transactionlogfd, &request, sizeof(request));
1512 request.from = ntohll(request.from);
1513 request.type = ntohl(request.type);
1514 command = request.type & NBD_CMD_MASK_COMMAND;
1515 len = ntohl(request.len);
1517 DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
1518 (unsigned long long)request.from,
1519 (unsigned long long)request.from / 512, (unsigned int)len);
1521 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1522 err("Not enough magic.");
1524 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1526 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1527 if ((request.from + len) > (OFFT_MAX)) {
1528 DEBUG("[Number too large!]");
1529 ERROR(client, reply, EINVAL);
1533 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1535 ERROR(client, reply, EINVAL);
1540 if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
1541 currlen = BUFSIZE - sizeof(struct nbd_reply);
1542 msg2(LOG_INFO, "oversized request (this is not a problem)");
1549 msg2(LOG_INFO, "Disconnect request received.");
1550 if (client->server->flags & F_COPYONWRITE) {
1551 if (client->difmap) g_free(client->difmap) ;
1552 close(client->difffile);
1553 unlink(client->difffilename);
1554 free(client->difffilename);
1560 DEBUG("wr: net->buf, ");
1562 readit(client->net, buf, currlen);
1563 DEBUG("buf->exp, ");
1564 if ((client->server->flags & F_READONLY) ||
1565 (client->server->flags & F_AUTOREADONLY)) {
1566 DEBUG("[WRITE to READONLY!]");
1567 ERROR(client, reply, EPERM);
1568 consume(client->net, buf, len-currlen, BUFSIZE);
1571 if (expwrite(request.from, buf, currlen, client,
1572 request.type & NBD_CMD_FLAG_FUA)) {
1573 DEBUG("Write failed: %m" );
1574 ERROR(client, reply, errno);
1575 consume(client->net, buf, len-currlen, BUFSIZE);
1579 request.from += currlen;
1580 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1582 SEND(client->net, reply);
1588 if (expflush(client)) {
1589 DEBUG("Flush failed: %m");
1590 ERROR(client, reply, errno);
1593 SEND(client->net, reply);
1598 DEBUG("exp->buf, ");
1599 memcpy(buf, &reply, sizeof(struct nbd_reply));
1600 if (client->transactionlogfd != -1)
1601 writeit(client->transactionlogfd, &reply, sizeof(reply));
1602 p = buf + sizeof(struct nbd_reply);
1603 writelen = currlen + sizeof(struct nbd_reply);
1605 if (expread(request.from, p, currlen, client)) {
1606 DEBUG("Read failed: %m");
1607 ERROR(client, reply, errno);
1611 DEBUG("buf->net, ");
1612 writeit(client->net, buf, writelen);
1614 request.from += currlen;
1615 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1623 DEBUG ("Ignoring unknown command\n");
1631 * Set up client export array, which is an array of FILE_INFO.
1632 * Also, split a single exportfile into multiple ones, if that was asked.
1633 * @param client information on the client which we want to setup export for
1635 void setupexport(CLIENT* client) {
1637 off_t laststartoff = 0, lastsize = 0;
1638 int multifile = (client->server->flags & F_MULTIFILE);
1640 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1642 /* If multi-file, open as many files as we can.
1643 * If not, open exactly one file.
1644 * Calculate file sizes as we go to get total size. */
1648 gchar* error_string;
1649 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1652 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1654 tmpname=g_strdup(client->exportname);
1656 DEBUG( "Opening %s\n", tmpname );
1657 fi.fhandle = open(tmpname, mode);
1658 if(fi.fhandle == -1 && mode == O_RDWR) {
1659 /* Try again because maybe media was read-only */
1660 fi.fhandle = open(tmpname, O_RDONLY);
1661 if(fi.fhandle != -1) {
1662 /* Opening the base file in copyonwrite mode is
1664 if(!(client->server->flags & F_COPYONWRITE)) {
1665 client->server->flags |= F_AUTOREADONLY;
1666 client->server->flags |= F_READONLY;
1670 if(fi.fhandle == -1) {
1671 if(multifile && i>0)
1673 error_string=g_strdup_printf(
1674 "Could not open exported file %s: %%m",
1678 fi.startoff = laststartoff + lastsize;
1679 g_array_append_val(client->export, fi);
1682 /* Starting offset and size of this file will be used to
1683 * calculate starting offset of next file */
1684 laststartoff = fi.startoff;
1685 lastsize = size_autodetect(fi.fhandle);
1691 /* Set export size to total calculated size */
1692 client->exportsize = laststartoff + lastsize;
1694 /* Export size may be overridden */
1695 if(client->server->expected_size) {
1696 /* desired size must be <= total calculated size */
1697 if(client->server->expected_size > client->exportsize) {
1698 err("Size of exported file is too big\n");
1701 client->exportsize = client->server->expected_size;
1704 msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1706 msg3(LOG_INFO, "Total number of files: %d", i);
1710 int copyonwrite_prepare(CLIENT* client) {
1712 if ((client->difffilename = malloc(1024))==NULL)
1713 err("Failed to allocate string for diff file name");
1714 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1716 client->difffilename[1023]='\0';
1717 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1718 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1719 if (client->difffile<0) err("Could not create diff file (%m)") ;
1720 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1721 err("Could not allocate memory") ;
1722 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1728 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1731 * @param command the command to be ran. Read from the config file
1732 * @param file the file name we're about to export
1734 int do_run(gchar* command, gchar* file) {
1738 if(command && *command) {
1739 cmd = g_strdup_printf(command, file);
1747 * Serve a connection.
1749 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1750 * follow the road map.
1752 * @param client a connected client
1754 void serveconnection(CLIENT *client) {
1755 if (client->server->transactionlog && (client->transactionlogfd == -1))
1757 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1759 S_IRUSR | S_IWUSR)))
1760 g_warning("Could not open transaction log %s",
1761 client->server->transactionlog);
1764 if(do_run(client->server->prerun, client->exportname)) {
1767 setupexport(client);
1769 if (client->server->flags & F_COPYONWRITE) {
1770 copyonwrite_prepare(client);
1773 setmysockopt(client->net);
1776 do_run(client->server->postrun, client->exportname);
1778 if (-1 != client->transactionlogfd)
1780 close(client->transactionlogfd);
1781 client->transactionlogfd = -1;
1786 * Find the name of the file we have to serve. This will use g_strdup_printf
1787 * to put the IP address of the client inside a filename containing
1788 * "%s" (in the form as specified by the "virtstyle" option). That name
1789 * is then written to client->exportname.
1791 * @param net A socket connected to an nbd client
1792 * @param client information about the client. The IP address in human-readable
1793 * format will be written to a new char* buffer, the address of which will be
1794 * stored in client->clientname.
1796 void set_peername(int net, CLIENT *client) {
1797 struct sockaddr_storage addrin;
1798 struct sockaddr_storage netaddr;
1799 struct sockaddr_in *netaddr4 = NULL;
1800 struct sockaddr_in6 *netaddr6 = NULL;
1801 size_t addrinlen = sizeof( addrin );
1802 struct addrinfo hints;
1803 struct addrinfo *ai = NULL;
1804 char peername[NI_MAXHOST];
1805 char netname[NI_MAXHOST];
1811 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1812 err("getsockname failed: %m");
1814 getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1815 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1817 memset(&hints, '\0', sizeof (hints));
1818 hints.ai_flags = AI_ADDRCONFIG;
1819 e = getaddrinfo(peername, NULL, &hints, &ai);
1822 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1827 switch(client->server->virtstyle) {
1829 client->exportname=g_strdup(client->server->exportname);
1832 for(i=0;i<strlen(peername);i++) {
1833 if(peername[i]=='.') {
1838 client->exportname=g_strdup_printf(client->server->exportname, peername);
1841 memcpy(&netaddr, &addrin, addrinlen);
1842 if(ai->ai_family == AF_INET) {
1843 netaddr4 = (struct sockaddr_in *)&netaddr;
1844 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1845 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1847 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1848 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1849 tmp=g_strdup_printf("%s/%s", netname, peername);
1850 }else if(ai->ai_family == AF_INET6) {
1851 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1853 shift = 128-(client->server->cidrlen);
1855 while(shift >= 32) {
1856 ((netaddr6->sin6_addr).s6_addr32[i])=0;
1860 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1861 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1863 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1864 netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1865 tmp=g_strdup_printf("%s/%s", netname, peername);
1869 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1875 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1876 peername, client->exportname);
1877 client->clientname=g_strdup(peername);
1882 * @param data a pointer to pid_t which should be freed
1884 void destroy_pid_t(gpointer data) {
1889 * Loop through the available servers, and serve them. Never returns.
1891 int serveloop(GArray* servers) {
1892 struct sockaddr_storage addrin;
1893 socklen_t addrinlen=sizeof(addrin);
1901 * Set up the master fd_set. The set of descriptors we need
1902 * to select() for never changes anyway and it buys us a *lot*
1903 * of time to only build this once. However, if we ever choose
1904 * to not fork() for clients anymore, we may have to revisit
1909 for(i=0;i<servers->len;i++) {
1910 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1911 FD_SET(sock, &mset);
1912 max=sock>max?sock:max;
1916 FD_SET(modernsock, &mset);
1917 max=modernsock>max?modernsock:max;
1920 CLIENT *client = NULL;
1923 memcpy(&rset, &mset, sizeof(fd_set));
1924 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1929 if(FD_ISSET(modernsock, &rset)) {
1930 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1932 client = negotiate(net, NULL, servers);
1934 err_nonfatal("negotiation failed");
1939 serve = client->server;
1941 for(i=0;i<servers->len && !net;i++) {
1942 serve=&(g_array_index(servers, SERVER, i));
1943 if(FD_ISSET(serve->socket, &rset)) {
1944 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1951 if(serve->max_connections > 0 &&
1952 g_hash_table_size(children) >= serve->max_connections) {
1953 msg2(LOG_INFO, "Max connections reached");
1957 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1958 err("fcntl F_GETFL");
1960 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1961 err("fcntl F_SETFL ~O_NONBLOCK");
1964 client = g_new0(CLIENT, 1);
1965 client->server=serve;
1966 client->exportsize=OFFT_MAX;
1968 client->transactionlogfd = -1;
1970 set_peername(net, client);
1971 if (!authorized_client(client)) {
1972 msg2(LOG_INFO,"Unauthorized client") ;
1976 msg2(LOG_INFO,"Authorized client") ;
1977 pid=g_malloc(sizeof(pid_t));
1980 if ((*pid=fork())<0) {
1981 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1985 if (*pid>0) { /* parent */
1987 g_hash_table_insert(children, pid, pid);
1991 g_hash_table_destroy(children);
1992 for(i=0;i<servers->len;i++) {
1993 serve=&g_array_index(servers, SERVER, i);
1994 close(serve->socket);
1996 /* FALSE does not free the
1997 actual data. This is required,
1998 because the client has a
1999 direct reference into that
2000 data, and otherwise we get a
2002 g_array_free(servers, FALSE);
2005 msg2(LOG_INFO,"Starting to serve");
2006 serveconnection(client);
2013 void dosockopts(int socket) {
2021 /* lose the pesky "Address already in use" error message */
2022 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
2023 err("setsockopt SO_REUSEADDR");
2025 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
2026 err("setsockopt SO_KEEPALIVE");
2029 /* make the listening socket non-blocking */
2030 if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
2031 err("fcntl F_GETFL");
2033 if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
2034 err("fcntl F_SETFL O_NONBLOCK");
2039 * Connect a server's socket.
2041 * @param serve the server we want to connect.
2043 int setup_serve(SERVER *serve) {
2044 struct addrinfo hints;
2045 struct addrinfo *ai = NULL;
2050 return serve->servename ? 1 : 0;
2052 memset(&hints,'\0',sizeof(hints));
2053 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2054 hints.ai_socktype = SOCK_STREAM;
2055 hints.ai_family = serve->socket_family;
2057 port = g_strdup_printf ("%d", serve->port);
2061 e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2066 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2072 if(serve->socket_family == AF_UNSPEC)
2073 serve->socket_family = ai->ai_family;
2076 if ((serve->flags) && F_SDP) {
2077 if (ai->ai_family == AF_INET)
2078 ai->ai_family = AF_INET_SDP;
2079 else (ai->ai_family == AF_INET6)
2080 ai->ai_family = AF_INET6_SDP;
2083 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
2086 dosockopts(serve->socket);
2088 DEBUG("Waiting for connections... bind, ");
2089 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2090 if (e != 0 && errno != EADDRINUSE)
2093 if (listen(serve->socket, 1) < 0)
2097 if(serve->servename) {
2104 void open_modern(void) {
2105 struct addrinfo hints;
2106 struct addrinfo* ai = NULL;
2110 memset(&hints, '\0', sizeof(hints));
2111 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2112 hints.ai_socktype = SOCK_STREAM;
2113 hints.ai_family = AF_UNSPEC;
2114 hints.ai_protocol = IPPROTO_TCP;
2115 e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai);
2117 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2120 if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2124 dosockopts(modernsock);
2126 if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2129 if(listen(modernsock, 10) <0) {
2137 * Connect our servers.
2139 void setup_servers(GArray* servers) {
2141 struct sigaction sa;
2144 for(i=0;i<servers->len;i++) {
2145 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2150 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2152 sa.sa_handler = sigchld_handler;
2153 sigemptyset(&sa.sa_mask);
2154 sa.sa_flags = SA_RESTART;
2155 if(sigaction(SIGCHLD, &sa, NULL) == -1)
2156 err("sigaction: %m");
2157 sa.sa_handler = sigterm_handler;
2158 sigemptyset(&sa.sa_mask);
2159 sa.sa_flags = SA_RESTART;
2160 if(sigaction(SIGTERM, &sa, NULL) == -1)
2161 err("sigaction: %m");
2165 * Go daemon (unless we specified at compile time that we didn't want this)
2166 * @param serve the first server of our configuration. If its port is zero,
2167 * then do not daemonize, because we're doing inetd then. This parameter
2168 * is only used to create a PID file of the form
2169 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
2171 #if !defined(NODAEMON)
2172 void daemonize(SERVER* serve) {
2175 if(serve && !(serve->port)) {
2181 if(!*pidftemplate) {
2183 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2185 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2188 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2189 pidf=fopen(pidfname, "w");
2191 fprintf(pidf,"%d\n", (int)getpid());
2195 fprintf(stderr, "Not fatal; continuing");
2199 #define daemonize(serve)
2200 #endif /* !defined(NODAEMON) */
2203 * Everything beyond this point (in the file) is run in non-daemon mode.
2204 * The stuff above daemonize() isn't.
2207 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2209 void serve_err(SERVER* serve, const char* msg) {
2210 g_message("Export of %s on port %d failed:", serve->exportname,
2216 * Set up user-ID and/or group-ID
2218 void dousers(void) {
2223 gr=getgrnam(rungroup);
2225 str = g_strdup_printf("Invalid group name: %s", rungroup);
2228 if(setgid(gr->gr_gid)<0) {
2229 err("Could not set GID: %m");
2233 pw=getpwnam(runuser);
2235 str = g_strdup_printf("Invalid user name: %s", runuser);
2238 if(setuid(pw->pw_uid)<0) {
2239 err("Could not set UID: %m");
2245 void glib_message_syslog_redirect(const gchar *log_domain,
2246 GLogLevelFlags log_level,
2247 const gchar *message,
2250 int level=LOG_DEBUG;
2254 case G_LOG_FLAG_FATAL:
2255 case G_LOG_LEVEL_CRITICAL:
2256 case G_LOG_LEVEL_ERROR:
2259 case G_LOG_LEVEL_WARNING:
2262 case G_LOG_LEVEL_MESSAGE:
2263 case G_LOG_LEVEL_INFO:
2266 case G_LOG_LEVEL_DEBUG:
2271 syslog(level, "%s", message);
2276 * Main entry point...
2278 int main(int argc, char *argv[]) {
2283 if (sizeof( struct nbd_request )!=28) {
2284 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2285 exit(EXIT_FAILURE) ;
2288 memset(pidftemplate, '\0', 256);
2291 config_file_pos = g_strdup(CFILE);
2292 serve=cmdline(argc, argv);
2293 servers = parse_cfile(config_file_pos, &err);
2296 serve->socket_family = AF_UNSPEC;
2298 append_serve(serve, servers);
2300 if (!(serve->port)) {
2303 /* You really should define ISSERVER if you're going to use
2304 * inetd mode, but if you don't, closing stdout and stderr
2305 * (which inetd had connected to the client socket) will let it
2309 open("/dev/null", O_WRONLY);
2310 open("/dev/null", O_WRONLY);
2311 g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2313 client=g_malloc(sizeof(CLIENT));
2314 client->server=serve;
2316 client->exportsize=OFFT_MAX;
2317 set_peername(0,client);
2318 serveconnection(client);
2323 if(!servers || !servers->len) {
2324 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2325 && err->code == CFILE_NOTFOUND)) {
2326 g_warning("Could not parse config file: %s",
2327 err ? err->message : "Unknown error");
2331 g_warning("Specifying an export on the command line is deprecated.");
2332 g_warning("Please use a configuration file instead.");
2335 if((!serve) && (!servers||!servers->len)) {
2336 g_message("No configured exports; quitting.");
2341 setup_servers(servers);