2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <sys/select.h> /* select */
66 #include <sys/wait.h> /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h> /* For BLKGETSIZE */
74 #include <signal.h> /* sigaction */
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
85 #include <arpa/inet.h>
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
100 #include <sdp_inet.h>
103 /** Default position of the config file */
105 #define SYSCONFDIR "/etc"
107 #define CFILE SYSCONFDIR "/nbd-server/config"
109 /** Where our config file actually is */
110 gchar* config_file_pos;
112 /** What user we're running as */
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
119 /* Whether we should avoid forking */
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
133 /* Debugging macros */
136 #define DEBUG(...) printf(__VA_ARGS__)
140 #ifndef PACKAGE_VERSION
141 #define PACKAGE_VERSION ""
144 * The highest value a variable of type off_t can reach. This is a signed
145 * integer, so set all bits except for the leftmost one.
147 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
148 #define LINELEN 256 /**< Size of static buffer used to read the
149 authorization file (yuck) */
150 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
151 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
152 #define F_READONLY 1 /**< flag to tell us a file is readonly */
153 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
154 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
156 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
157 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
158 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
159 #define F_SYNC 64 /**< Whether to fsync() after a write */
160 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
161 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
162 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
163 GHashTable *children;
164 char pidfname[256]; /**< name of our PID file */
165 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
166 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
168 #define NEG_INIT (1 << 0)
169 #define NEG_OLD (1 << 1)
170 #define NEG_MODERN (1 << 2)
172 int modernsock=0; /**< Socket for the modern handler. Not used
173 if a client was only specified on the
174 command line; only port used if
175 oldstyle is set to false (and then the
176 command-line client isn't used, gna gna) */
177 char* modern_listen; /**< listenaddr value for modernsock */
178 char* modernport=NBD_DEFAULT_PORT; /**< Port number on which to listen for
179 new-style nbd-client connections */
182 * Types of virtuatlization
185 VIRT_NONE=0, /**< No virtualization */
186 VIRT_IPLIT, /**< Literal IP address as part of the filename */
187 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
188 doing the same as in IPLIT */
189 VIRT_CIDR, /**< Every subnet in its own directory */
193 * Variables associated with a server.
196 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
197 off_t expected_size; /**< size of the exported file as it was told to
198 us through configuration */
199 gchar* listenaddr; /**< The IP address we're listening on */
200 unsigned int port; /**< port we're exporting this file at */
201 char* authname; /**< filename of the authorization file */
202 int flags; /**< flags associated with this exported file */
203 int socket; /**< The socket of this server. */
204 int socket_family; /**< family of the socket */
205 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
206 uint8_t cidrlen; /**< The length of the mask when we use
207 CIDR-style virtualization */
208 gchar* prerun; /**< command to be ran after connecting a client,
209 but before starting to serve */
210 gchar* postrun; /**< command that will be ran after the client
212 gchar* servename; /**< name of the export as selected by nbd-client */
213 int max_connections; /**< maximum number of opened connections */
214 gchar* transactionlog;/**< filename for transaction log */
218 * Variables associated with a client socket.
221 int fhandle; /**< file descriptor */
222 off_t startoff; /**< starting offset of this file */
226 off_t exportsize; /**< size of the file we're exporting */
227 char *clientname; /**< peer */
228 char *exportname; /**< (processed) filename of the file we're exporting */
229 GArray *export; /**< array of FILE_INFO of exported files;
230 array size is always 1 unless we're
231 doing the multiple file option */
232 int net; /**< The actual client socket */
233 SERVER *server; /**< The server this client is getting data from */
234 char* difffilename; /**< filename of the copy-on-write file, if any */
235 int difffile; /**< filedescriptor of copyonwrite file. @todo
236 shouldn't this be an array too? (cfr export) Or
237 make -m and -c mutually exclusive */
238 u32 difffilelen; /**< number of pages in difffile */
239 u32 *difmap; /**< see comment on the global difmap for this one */
240 gboolean modern; /**< client was negotiated using modern negotiation protocol */
241 int transactionlogfd;/**< fd for transaction log */
245 * Type of configuration file values
248 PARAM_INT, /**< This parameter is an integer */
249 PARAM_STRING, /**< This parameter is a string */
250 PARAM_BOOL, /**< This parameter is a boolean */
254 * Configuration file values
257 gchar *paramname; /**< Name of the parameter, as it appears in
259 gboolean required; /**< Whether this is a required (as opposed to
260 optional) parameter */
261 PARAM_TYPE ptype; /**< Type of the parameter. */
262 gpointer target; /**< Pointer to where the data of this
263 parameter should be written. If ptype is
264 PARAM_BOOL, the data is or'ed rather than
266 gint flagval; /**< Flag mask for this parameter in case ptype
271 * Translate a command name into human readable form
273 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
274 * @return pointer to the command name
276 static inline const char * getcommandname(uint64_t command) {
279 return "NBD_CMD_READ";
281 return "NBD_CMD_WRITE";
283 return "NBD_CMD_DISC";
285 return "NBD_CMD_FLUSH";
293 * Check whether a client is allowed to connect. Works with an authorization
294 * file which contains one line per machine, no wildcards.
296 * @param opts The client who's trying to connect.
297 * @return 0 - authorization refused, 1 - OK
299 int authorized_client(CLIENT *opts) {
300 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
305 struct in_addr client;
306 struct in_addr cltemp;
309 if ((f=fopen(opts->server->authname,"r"))==NULL) {
310 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
311 opts->server->authname,strerror(errno)) ;
315 inet_aton(opts->clientname, &client);
316 while (fgets(line,LINELEN,f)!=NULL) {
317 if((tmp=index(line, '/'))) {
318 if(strlen(line)<=tmp-line) {
319 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
323 if(!inet_aton(line,&addr)) {
324 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
327 len=strtol(tmp, NULL, 0);
328 addr.s_addr>>=32-len;
329 addr.s_addr<<=32-len;
330 memcpy(&cltemp,&client,sizeof(client));
331 cltemp.s_addr>>=32-len;
332 cltemp.s_addr<<=32-len;
333 if(addr.s_addr == cltemp.s_addr) {
337 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
347 * Read data from a file descriptor into a buffer
349 * @param f a file descriptor
350 * @param buf a buffer
351 * @param len the number of bytes to be read
353 static inline void readit(int f, void *buf, size_t len) {
357 if ((res = read(f, buf, len)) <= 0) {
358 if(errno != EAGAIN) {
359 err("Read failed: %m");
369 * Consume data from an FD that we don't want
371 * @param f a file descriptor
372 * @param buf a buffer
373 * @param len the number of bytes to consume
374 * @param bufsiz the size of the buffer
376 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
379 curlen = (len>bufsiz)?bufsiz:len;
380 readit(f, buf, curlen);
387 * Write data from a buffer into a filedescriptor
389 * @param f a file descriptor
390 * @param buf a buffer containing data
391 * @param len the number of bytes to be written
393 static inline void writeit(int f, void *buf, size_t len) {
397 if ((res = write(f, buf, len)) <= 0)
398 err("Send failed: %m");
405 * Print out a message about how to use nbd-server. Split out to a separate
406 * function so that we can call it from multiple places
409 printf("This is nbd-server version " VERSION "\n");
410 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
411 "\t-r|--read-only\t\tread only\n"
412 "\t-m|--multi-file\t\tmultiple file\n"
413 "\t-c|--copy-on-write\tcopy on write\n"
414 "\t-C|--config-file\tspecify an alternate configuration file\n"
415 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
416 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
417 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
418 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
419 "\tif port is set to 0, stdin is used (for running from inetd).\n"
420 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
421 "\t\taddress of the machine trying to connect\n"
422 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
423 printf("Using configuration file %s\n", CFILE);
426 /* Dumps a config file section of the given SERVER*, and exits. */
427 void dump_section(SERVER* serve, gchar* section_header) {
428 printf("[%s]\n", section_header);
429 printf("\texportname = %s\n", serve->exportname);
430 printf("\tlistenaddr = %s\n", serve->listenaddr);
431 printf("\tport = %d\n", serve->port);
432 if(serve->flags & F_READONLY) {
433 printf("\treadonly = true\n");
435 if(serve->flags & F_MULTIFILE) {
436 printf("\tmultifile = true\n");
438 if(serve->flags & F_COPYONWRITE) {
439 printf("\tcopyonwrite = true\n");
441 if(serve->expected_size) {
442 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
444 if(serve->authname) {
445 printf("\tauthfile = %s\n", serve->authname);
451 * Parse the command line.
453 * @param argc the argc argument to main()
454 * @param argv the argv argument to main()
456 SERVER* cmdline(int argc, char *argv[]) {
460 struct option long_options[] = {
461 {"read-only", no_argument, NULL, 'r'},
462 {"multi-file", no_argument, NULL, 'm'},
463 {"copy-on-write", no_argument, NULL, 'c'},
464 {"dont-fork", no_argument, NULL, 'd'},
465 {"authorize-file", required_argument, NULL, 'l'},
466 {"config-file", required_argument, NULL, 'C'},
467 {"pid-file", required_argument, NULL, 'p'},
468 {"output-config", required_argument, NULL, 'o'},
469 {"max-connection", required_argument, NULL, 'M'},
476 gboolean do_output=FALSE;
477 gchar* section_header="";
483 serve=g_new0(SERVER, 1);
484 serve->authname = g_strdup(default_authname);
485 serve->virtstyle=VIRT_IPLIT;
486 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
489 /* non-option argument */
490 switch(nonspecial++) {
492 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
493 addr_port=g_strsplit(optarg, ":", 2);
495 /* Check for "@" - maybe user using this separator
498 g_strfreev(addr_port);
499 addr_port=g_strsplit(optarg, "@", 2);
502 addr_port=g_strsplit(optarg, "@", 2);
506 serve->port=strtol(addr_port[1], NULL, 0);
507 serve->listenaddr=g_strdup(addr_port[0]);
509 serve->listenaddr=NULL;
510 serve->port=strtol(addr_port[0], NULL, 0);
512 g_strfreev(addr_port);
515 serve->exportname = g_strdup(optarg);
516 if(serve->exportname[0] != '/') {
517 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
522 last=strlen(optarg)-1;
524 if (suffix == 'k' || suffix == 'K' ||
525 suffix == 'm' || suffix == 'M')
527 es = (off_t)atoll(optarg);
535 serve->expected_size = es;
540 serve->flags |= F_READONLY;
543 serve->flags |= F_MULTIFILE;
547 section_header = g_strdup(optarg);
550 strncpy(pidftemplate, optarg, 256);
553 serve->flags |=F_COPYONWRITE;
559 g_free(config_file_pos);
560 config_file_pos=g_strdup(optarg);
563 g_free(serve->authname);
564 serve->authname=g_strdup(optarg);
567 serve->max_connections = strtol(optarg, NULL, 0);
575 /* What's left: the port to export, the name of the to be exported
576 * file, and, optionally, the size of the file, in that order. */
585 g_critical("Need a complete configuration on the command line to output a config file section!");
588 dump_section(serve, section_header);
594 * Error codes for config file parsing
597 CFILE_NOTFOUND, /**< The configuration file is not found */
598 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
599 CFILE_KEY_MISSING, /**< A (required) key is missing */
600 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
601 CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
602 CFILE_PROGERR, /**< Programmer error */
603 CFILE_NO_EXPORTS, /**< A config file was specified that does not
604 define any exports */
605 CFILE_INCORRECT_PORT, /**< The reserved port was specified for an
610 * Remove a SERVER from memory. Used from the hash table
612 void remove_server(gpointer s) {
616 g_free(server->exportname);
618 g_free(server->authname);
619 if(server->listenaddr)
620 g_free(server->listenaddr);
622 g_free(server->prerun);
624 g_free(server->postrun);
625 if(server->transactionlog)
626 g_free(server->transactionlog);
632 * @param s the old server we want to duplicate
633 * @return new duplicated server
635 SERVER* dup_serve(SERVER *s) {
636 SERVER *serve = NULL;
638 serve=g_new0(SERVER, 1);
643 serve->exportname = g_strdup(s->exportname);
645 serve->expected_size = s->expected_size;
648 serve->listenaddr = g_strdup(s->listenaddr);
650 serve->port = s->port;
653 serve->authname = strdup(s->authname);
655 serve->flags = s->flags;
656 serve->socket = s->socket;
657 serve->socket_family = s->socket_family;
658 serve->virtstyle = s->virtstyle;
659 serve->cidrlen = s->cidrlen;
662 serve->prerun = g_strdup(s->prerun);
665 serve->postrun = g_strdup(s->postrun);
667 if(s->transactionlog)
668 serve->transactionlog = g_strdup(s->transactionlog);
671 serve->servename = g_strdup(s->servename);
673 serve->max_connections = s->max_connections;
679 * append new server to array
681 * @param a server array
682 * @return 0 success, -1 error
684 int append_serve(SERVER *s, GArray *a) {
686 struct addrinfo hints;
687 struct addrinfo *ai = NULL;
688 struct addrinfo *rp = NULL;
689 char host[NI_MAXHOST];
695 err("Invalid parsing server");
699 port = g_strdup_printf("%d", s->port);
701 memset(&hints,'\0',sizeof(hints));
702 hints.ai_family = AF_UNSPEC;
703 hints.ai_socktype = SOCK_STREAM;
704 hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
705 hints.ai_protocol = IPPROTO_TCP;
707 e = getaddrinfo(s->listenaddr, port, &hints, &ai);
713 for (rp = ai; rp != NULL; rp = rp->ai_next) {
714 e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
716 if (e != 0) { // error
717 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
721 // duplicate server and set listenaddr to resolved IP address
724 ns->listenaddr = g_strdup(host);
725 ns->socket_family = rp->ai_family;
726 g_array_append_val(a, *ns);
734 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
745 * Parse the config file.
747 * @param f the name of the config file
748 * @param e a GError. @see CFILE_ERRORS for what error values this function can
750 * @return a Array of SERVER* pointers, If the config file is empty or does not
751 * exist, returns an empty GHashTable; if the config file contains an
752 * error, returns NULL, and e is set appropriately
754 GArray* parse_cfile(gchar* f, GError** e) {
755 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
756 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
758 gchar *virtstyle=NULL;
760 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
761 { "port", TRUE, PARAM_INT, &(s.port), 0 },
762 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
763 { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
764 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
765 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
766 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
767 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
768 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
769 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
770 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
771 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
772 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
773 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
774 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
775 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
776 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
777 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
778 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
780 const int lp_size=sizeof(lp)/sizeof(PARAM);
782 { "user", FALSE, PARAM_STRING, &runuser, 0 },
783 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
784 { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
785 { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
786 { "port", FALSE, PARAM_STRING, &modernport, 0 },
789 int p_size=sizeof(gp)/sizeof(PARAM);
792 const char *err_msg=NULL;
801 errdomain = g_quark_from_string("parse_cfile");
802 cfile = g_key_file_new();
803 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
804 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
805 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
806 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
807 g_key_file_free(cfile);
810 startgroup = g_key_file_get_start_group(cfile);
811 if(!startgroup || strcmp(startgroup, "generic")) {
812 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
813 g_key_file_free(cfile);
816 groups = g_key_file_get_groups(cfile, NULL);
817 for(i=0;groups[i];i++) {
818 memset(&s, '\0', sizeof(SERVER));
820 /* After the [generic] group, start parsing exports */
825 for(j=0;j<p_size;j++) {
826 g_assert(p[j].target != NULL);
827 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
830 *((gint*)p[j].target) =
831 g_key_file_get_integer(cfile,
837 *((gchar**)p[j].target) =
838 g_key_file_get_string(cfile,
844 value = g_key_file_get_boolean(cfile,
846 p[j].paramname, &err);
849 *((gint*)p[j].target) |= p[j].flagval;
851 *((gint*)p[j].target) &= ~(p[j].flagval);
856 if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, modernport)) {
857 g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies new-style port for oldstyle export");
858 g_key_file_free(cfile);
862 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
864 /* Ignore not-found error for optional values */
868 err_msg = MISSING_REQUIRED_ERROR;
871 err_msg = DEFAULT_ERROR;
873 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
874 g_array_free(retval, TRUE);
876 g_key_file_free(cfile);
881 if(!strncmp(virtstyle, "none", 4)) {
882 s.virtstyle=VIRT_NONE;
883 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
884 s.virtstyle=VIRT_IPLIT;
885 } else if(!strncmp(virtstyle, "iphash", 6)) {
886 s.virtstyle=VIRT_IPHASH;
887 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
888 s.virtstyle=VIRT_CIDR;
889 if(strlen(virtstyle)<10) {
890 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
891 g_array_free(retval, TRUE);
892 g_key_file_free(cfile);
895 s.cidrlen=strtol(virtstyle+8, NULL, 0);
897 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
898 g_array_free(retval, TRUE);
899 g_key_file_free(cfile);
902 if(s.port && !do_oldstyle) {
903 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
904 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
907 s.virtstyle=VIRT_IPLIT;
909 /* Don't need to free this, it's not our string */
911 /* Don't append values for the [generic] group */
913 s.socket_family = AF_UNSPEC;
914 s.servename = groups[i];
916 append_serve(&s, retval);
923 if(s.flags & F_SDP) {
924 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
925 g_array_free(retval, TRUE);
926 g_key_file_free(cfile);
932 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
934 g_key_file_free(cfile);
939 * Signal handler for SIGCHLD
940 * @param s the signal we're handling (must be SIGCHLD, or something
943 void sigchld_handler(int s) {
948 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
949 if(WIFEXITED(status)) {
950 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
952 i=g_hash_table_lookup(children, &pid);
954 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
956 DEBUG("Removing %d from the list of children", pid);
957 g_hash_table_remove(children, &pid);
963 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
966 * @param value the value corresponding to the above key
967 * @param user_data a pointer which we always set to 1, so that we know what
970 void killchild(gpointer key, gpointer value, gpointer user_data) {
972 int *parent=user_data;
979 * Handle SIGTERM and dispatch it to our children
980 * @param s the signal we're handling (must be SIGTERM, or something
981 * is severely wrong).
983 void sigterm_handler(int s) {
986 g_hash_table_foreach(children, killchild, &parent);
996 * Detect the size of a file.
998 * @param fhandle An open filedescriptor
999 * @return the size of the file, or OFFT_MAX if detection was
1002 off_t size_autodetect(int fhandle) {
1005 struct stat stat_buf;
1008 #ifdef HAVE_SYS_MOUNT_H
1009 #ifdef HAVE_SYS_IOCTL_H
1011 DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
1012 if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
1013 return (off_t)bytes;
1015 #endif /* BLKGETSIZE64 */
1016 #endif /* HAVE_SYS_IOCTL_H */
1017 #endif /* HAVE_SYS_MOUNT_H */
1019 DEBUG("looking for fhandle size with fstat\n");
1020 stat_buf.st_size = 0;
1021 error = fstat(fhandle, &stat_buf);
1023 if(stat_buf.st_size > 0)
1024 return (off_t)stat_buf.st_size;
1026 err("fstat failed: %m");
1029 DEBUG("looking for fhandle size with lseek SEEK_END\n");
1030 es = lseek(fhandle, (off_t)0, SEEK_END);
1031 if (es > ((off_t)0)) {
1034 DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
1037 err("Could not find size of exported block device: %m");
1042 * Get the file handle and offset, given an export offset.
1044 * @param export An array of export files
1045 * @param a The offset to get corresponding file/offset for
1046 * @param fhandle [out] File descriptor
1047 * @param foffset [out] Offset into fhandle
1048 * @param maxbytes [out] Tells how many bytes can be read/written
1049 * from fhandle starting at foffset (0 if there is no limit)
1050 * @return 0 on success, -1 on failure
1052 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1053 /* Negative offset not allowed */
1057 /* Binary search for last file with starting offset <= a */
1060 int end = export->len - 1;
1061 while( start <= end ) {
1062 int mid = (start + end) / 2;
1063 fi = g_array_index(export, FILE_INFO, mid);
1064 if( fi.startoff < a ) {
1066 } else if( fi.startoff > a ) {
1074 /* end should never go negative, since first startoff is 0 and a >= 0 */
1077 fi = g_array_index(export, FILE_INFO, end);
1078 *fhandle = fi.fhandle;
1079 *foffset = a - fi.startoff;
1081 if( end+1 < export->len ) {
1082 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1083 *maxbytes = fi_next.startoff - a;
1090 * seek to a position in a file, with error handling.
1091 * @param handle a filedescriptor
1092 * @param a position to seek to
1093 * @todo get rid of this; lastpoint is a global variable right now, but it
1094 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1097 void myseek(int handle,off_t a) {
1098 if (lseek(handle, a, SEEK_SET) < 0) {
1099 err("Can not seek locally!\n");
1104 * Write an amount of bytes at a given offset to the right file. This
1105 * abstracts the write-side of the multiple file option.
1107 * @param a The offset where the write should start
1108 * @param buf The buffer to write from
1109 * @param len The length of buf
1110 * @param client The client we're serving for
1111 * @param fua Flag to indicate 'Force Unit Access'
1112 * @return The number of bytes actually written, or -1 in case of an error
1114 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1120 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1122 if(maxbytes && len > maxbytes)
1125 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1127 myseek(fhandle, foffset);
1128 retval = write(fhandle, buf, len);
1129 if(client->server->flags & F_SYNC) {
1133 /* This is where we would do the following
1134 * #ifdef USE_SYNC_FILE_RANGE
1135 * However, we don't, for the reasons set out below
1136 * by Christoph Hellwig <hch@infradead.org>
1139 * fdatasync is equivalent to fsync except that it does not flush
1140 * non-essential metadata (basically just timestamps in practice), but it
1141 * does flush metadata requried to find the data again, e.g. allocation
1142 * information and extent maps. sync_file_range does nothing but flush
1143 * out pagecache content - it means you basically won't get your data
1144 * back in case of a crash if you either:
1146 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1147 * b) are using a sparse file on a filesystem
1148 * c) are using a fallocate-preallocated file on a filesystem
1149 * d) use any file on a COW filesystem like btrfs
1151 * e.g. it only does anything useful for you if you do not have a volatile
1152 * write cache, and either use a raw block device node, or just overwrite
1153 * an already fully allocated (and not preallocated) file on a non-COW
1157 * What we should do is open a second FD with O_DSYNC set, then write to
1158 * that when appropriate. However, with a Linux client, every REQ_FUA
1159 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1164 sync_file_range(fhandle, foffset, len,
1165 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1166 SYNC_FILE_RANGE_WAIT_AFTER);
1175 * Call rawexpwrite repeatedly until all data has been written.
1177 * @param a The offset where the write should start
1178 * @param buf The buffer to write from
1179 * @param len The length of buf
1180 * @param client The client we're serving for
1181 * @param fua Flag to indicate 'Force Unit Access'
1182 * @return 0 on success, nonzero on failure
1184 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1187 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1192 return (ret < 0 || len != 0);
1196 * Read an amount of bytes at a given offset from the right file. This
1197 * abstracts the read-side of the multiple files option.
1199 * @param a The offset where the read should start
1200 * @param buf A buffer to read into
1201 * @param len The size of buf
1202 * @param client The client we're serving for
1203 * @return The number of bytes actually read, or -1 in case of an
1206 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1211 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1213 if(maxbytes && len > maxbytes)
1216 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1218 myseek(fhandle, foffset);
1219 return read(fhandle, buf, len);
1223 * Call rawexpread repeatedly until all data has been read.
1224 * @return 0 on success, nonzero on failure
1226 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1229 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1234 return (ret < 0 || len != 0);
1238 * Read an amount of bytes at a given offset from the right file. This
1239 * abstracts the read-side of the copyonwrite stuff, and calls
1240 * rawexpread() with the right parameters to do the actual work.
1241 * @param a The offset where the read should start
1242 * @param buf A buffer to read into
1243 * @param len The size of buf
1244 * @param client The client we're going to read for
1245 * @return 0 on success, nonzero on failure
1247 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1248 off_t rdlen, offset;
1249 off_t mapcnt, mapl, maph, pagestart;
1251 if (!(client->server->flags & F_COPYONWRITE))
1252 return(rawexpread_fully(a, buf, len, client));
1253 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1255 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1257 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1258 pagestart=mapcnt*DIFFPAGESIZE;
1260 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1261 len : (size_t)DIFFPAGESIZE-offset;
1262 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1263 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1264 (unsigned long)(client->difmap[mapcnt]));
1265 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1266 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1267 } else { /* the block is not there */
1268 DEBUG("Page %llu is not here, we read the original one\n",
1269 (unsigned long long)mapcnt);
1270 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1272 len-=rdlen; a+=rdlen; buf+=rdlen;
1278 * Write an amount of bytes at a given offset to the right file. This
1279 * abstracts the write-side of the copyonwrite option, and calls
1280 * rawexpwrite() with the right parameters to do the actual work.
1282 * @param a The offset where the write should start
1283 * @param buf The buffer to write from
1284 * @param len The length of buf
1285 * @param client The client we're going to write for.
1286 * @param fua Flag to indicate 'Force Unit Access'
1287 * @return 0 on success, nonzero on failure
1289 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1290 char pagebuf[DIFFPAGESIZE];
1291 off_t mapcnt,mapl,maph;
1296 if (!(client->server->flags & F_COPYONWRITE))
1297 return(rawexpwrite_fully(a, buf, len, client, fua));
1298 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1300 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1302 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1303 pagestart=mapcnt*DIFFPAGESIZE ;
1304 offset=a-pagestart ;
1305 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1306 len : (size_t)DIFFPAGESIZE-offset;
1308 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1309 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1310 (unsigned long)(client->difmap[mapcnt])) ;
1311 myseek(client->difffile,
1312 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1313 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1314 } else { /* the block is not there */
1315 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1316 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1317 DEBUG("Page %llu is not here, we put it at %lu\n",
1318 (unsigned long long)mapcnt,
1319 (unsigned long)(client->difmap[mapcnt]));
1320 rdlen=DIFFPAGESIZE ;
1321 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1323 memcpy(pagebuf+offset,buf,wrlen) ;
1324 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1328 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1330 if (client->server->flags & F_SYNC) {
1331 fsync(client->difffile);
1333 /* open question: would it be cheaper to do multiple sync_file_ranges?
1334 as we iterate through the above?
1336 fdatasync(client->difffile);
1342 * Flush data to a client
1344 * @param client The client we're going to write for.
1345 * @return 0 on success, nonzero on failure
1347 int expflush(CLIENT *client) {
1350 if (client->server->flags & F_COPYONWRITE) {
1351 return fsync(client->difffile);
1354 for (i = 0; i < client->export->len; i++) {
1355 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1356 if (fsync(fi.fhandle) < 0)
1364 * Do the initial negotiation.
1366 * @param client The client we're negotiating with.
1368 CLIENT* negotiate(int net, CLIENT *client, GArray* servers, int phase) {
1371 uint32_t flags = NBD_FLAG_HAS_FLAGS;
1372 uint16_t smallflags = 0;
1375 memset(zeros, '\0', sizeof(zeros));
1376 if(phase & NEG_INIT) {
1378 if (write(net, INIT_PASSWD, 8) < 0) {
1379 err_nonfatal("Negotiation failed: %m");
1383 if(phase & NEG_MODERN) {
1385 magic = htonll(opts_magic);
1388 magic = htonll(cliserv_magic);
1390 if (write(net, &magic, sizeof(magic)) < 0) {
1391 err_nonfatal("Negotiation failed: %m");
1396 if(phase & NEG_MODERN) {
1405 err("programmer error");
1406 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1407 err("Negotiation failed: %m");
1408 if (read(net, &reserved, sizeof(reserved)) < 0)
1409 err("Negotiation failed: %m");
1410 if (read(net, &magic, sizeof(magic)) < 0)
1411 err("Negotiation failed: %m");
1412 magic = ntohll(magic);
1413 if(magic != opts_magic) {
1417 if (read(net, &opt, sizeof(opt)) < 0)
1418 err("Negotiation failed: %m");
1420 if(opt != NBD_OPT_EXPORT_NAME) {
1424 if (read(net, &namelen, sizeof(namelen)) < 0)
1425 err("Negotiation failed: %m");
1426 namelen = ntohl(namelen);
1427 name = malloc(namelen+1);
1429 if (read(net, name, namelen) < 0)
1430 err("Negotiation failed: %m");
1431 for(i=0; i<servers->len; i++) {
1432 SERVER* serve = &(g_array_index(servers, SERVER, i));
1433 if(!strcmp(serve->servename, name)) {
1434 CLIENT* client = g_new0(CLIENT, 1);
1435 client->server = serve;
1436 client->exportsize = OFFT_MAX;
1438 client->modern = TRUE;
1439 client->transactionlogfd = -1;
1448 size_host = htonll((u64)(client->exportsize));
1449 if (write(net, &size_host, 8) < 0)
1450 err("Negotiation failed: %m");
1451 if (client->server->flags & F_READONLY)
1452 flags |= NBD_FLAG_READ_ONLY;
1453 if (client->server->flags & F_FLUSH)
1454 flags |= NBD_FLAG_SEND_FLUSH;
1455 if (client->server->flags & F_FUA)
1456 flags |= NBD_FLAG_SEND_FUA;
1457 if (client->server->flags & F_ROTATIONAL)
1458 flags |= NBD_FLAG_ROTATIONAL;
1459 if (phase & NEG_OLD) {
1461 flags = htonl(flags);
1462 if (write(client->net, &flags, 4) < 0)
1463 err("Negotiation failed: %m");
1466 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1467 smallflags = htons(smallflags);
1468 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1469 err("Negotiation failed: %m");
1473 if (write(client->net, zeros, 124) < 0)
1474 err("Negotiation failed: %m");
1478 /** sending macro. */
1479 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1480 if (client->transactionlogfd != -1) \
1481 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1483 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1485 * Serve a file to a single client.
1487 * @todo This beast needs to be split up in many tiny little manageable
1488 * pieces. Preferably with a chainsaw.
1490 * @param client The client we're going to serve to.
1491 * @return when the client disconnects
1493 int mainloop(CLIENT *client) {
1494 struct nbd_request request;
1495 struct nbd_reply reply;
1496 gboolean go_on=TRUE;
1500 negotiate(client->net, client, NULL, client->modern ? NEG_MODERN : (NEG_OLD | NEG_INIT));
1501 DEBUG("Entering request loop!\n");
1502 reply.magic = htonl(NBD_REPLY_MAGIC);
1515 readit(client->net, &request, sizeof(request));
1516 if (client->transactionlogfd != -1)
1517 writeit(client->transactionlogfd, &request, sizeof(request));
1519 request.from = ntohll(request.from);
1520 request.type = ntohl(request.type);
1521 command = request.type & NBD_CMD_MASK_COMMAND;
1522 len = ntohl(request.len);
1524 DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
1525 (unsigned long long)request.from,
1526 (unsigned long long)request.from / 512, (unsigned int)len);
1528 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1529 err("Not enough magic.");
1531 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1533 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1534 if ((request.from + len) > (OFFT_MAX)) {
1535 DEBUG("[Number too large!]");
1536 ERROR(client, reply, EINVAL);
1540 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1542 ERROR(client, reply, EINVAL);
1547 if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
1548 currlen = BUFSIZE - sizeof(struct nbd_reply);
1549 msg2(LOG_INFO, "oversized request (this is not a problem)");
1556 msg2(LOG_INFO, "Disconnect request received.");
1557 if (client->server->flags & F_COPYONWRITE) {
1558 if (client->difmap) g_free(client->difmap) ;
1559 close(client->difffile);
1560 unlink(client->difffilename);
1561 free(client->difffilename);
1567 DEBUG("wr: net->buf, ");
1569 readit(client->net, buf, currlen);
1570 DEBUG("buf->exp, ");
1571 if ((client->server->flags & F_READONLY) ||
1572 (client->server->flags & F_AUTOREADONLY)) {
1573 DEBUG("[WRITE to READONLY!]");
1574 ERROR(client, reply, EPERM);
1575 consume(client->net, buf, len-currlen, BUFSIZE);
1578 if (expwrite(request.from, buf, currlen, client,
1579 request.type & NBD_CMD_FLAG_FUA)) {
1580 DEBUG("Write failed: %m" );
1581 ERROR(client, reply, errno);
1582 consume(client->net, buf, len-currlen, BUFSIZE);
1586 request.from += currlen;
1587 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1589 SEND(client->net, reply);
1595 if (expflush(client)) {
1596 DEBUG("Flush failed: %m");
1597 ERROR(client, reply, errno);
1600 SEND(client->net, reply);
1605 DEBUG("exp->buf, ");
1606 memcpy(buf, &reply, sizeof(struct nbd_reply));
1607 if (client->transactionlogfd != -1)
1608 writeit(client->transactionlogfd, &reply, sizeof(reply));
1609 p = buf + sizeof(struct nbd_reply);
1610 writelen = currlen + sizeof(struct nbd_reply);
1612 if (expread(request.from, p, currlen, client)) {
1613 DEBUG("Read failed: %m");
1614 ERROR(client, reply, errno);
1618 DEBUG("buf->net, ");
1619 writeit(client->net, buf, writelen);
1621 request.from += currlen;
1622 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1630 DEBUG ("Ignoring unknown command\n");
1638 * Set up client export array, which is an array of FILE_INFO.
1639 * Also, split a single exportfile into multiple ones, if that was asked.
1640 * @param client information on the client which we want to setup export for
1642 void setupexport(CLIENT* client) {
1644 off_t laststartoff = 0, lastsize = 0;
1645 int multifile = (client->server->flags & F_MULTIFILE);
1647 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1649 /* If multi-file, open as many files as we can.
1650 * If not, open exactly one file.
1651 * Calculate file sizes as we go to get total size. */
1655 gchar* error_string;
1656 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1659 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1661 tmpname=g_strdup(client->exportname);
1663 DEBUG( "Opening %s\n", tmpname );
1664 fi.fhandle = open(tmpname, mode);
1665 if(fi.fhandle == -1 && mode == O_RDWR) {
1666 /* Try again because maybe media was read-only */
1667 fi.fhandle = open(tmpname, O_RDONLY);
1668 if(fi.fhandle != -1) {
1669 /* Opening the base file in copyonwrite mode is
1671 if(!(client->server->flags & F_COPYONWRITE)) {
1672 client->server->flags |= F_AUTOREADONLY;
1673 client->server->flags |= F_READONLY;
1677 if(fi.fhandle == -1) {
1678 if(multifile && i>0)
1680 error_string=g_strdup_printf(
1681 "Could not open exported file %s: %%m",
1685 fi.startoff = laststartoff + lastsize;
1686 g_array_append_val(client->export, fi);
1689 /* Starting offset and size of this file will be used to
1690 * calculate starting offset of next file */
1691 laststartoff = fi.startoff;
1692 lastsize = size_autodetect(fi.fhandle);
1698 /* Set export size to total calculated size */
1699 client->exportsize = laststartoff + lastsize;
1701 /* Export size may be overridden */
1702 if(client->server->expected_size) {
1703 /* desired size must be <= total calculated size */
1704 if(client->server->expected_size > client->exportsize) {
1705 err("Size of exported file is too big\n");
1708 client->exportsize = client->server->expected_size;
1711 msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1713 msg3(LOG_INFO, "Total number of files: %d", i);
1717 int copyonwrite_prepare(CLIENT* client) {
1719 if ((client->difffilename = malloc(1024))==NULL)
1720 err("Failed to allocate string for diff file name");
1721 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1723 client->difffilename[1023]='\0';
1724 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1725 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1726 if (client->difffile<0) err("Could not create diff file (%m)") ;
1727 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1728 err("Could not allocate memory") ;
1729 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1735 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1738 * @param command the command to be ran. Read from the config file
1739 * @param file the file name we're about to export
1741 int do_run(gchar* command, gchar* file) {
1745 if(command && *command) {
1746 cmd = g_strdup_printf(command, file);
1754 * Serve a connection.
1756 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1757 * follow the road map.
1759 * @param client a connected client
1761 void serveconnection(CLIENT *client) {
1762 if (client->server->transactionlog && (client->transactionlogfd == -1))
1764 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1766 S_IRUSR | S_IWUSR)))
1767 g_warning("Could not open transaction log %s",
1768 client->server->transactionlog);
1771 if(do_run(client->server->prerun, client->exportname)) {
1774 setupexport(client);
1776 if (client->server->flags & F_COPYONWRITE) {
1777 copyonwrite_prepare(client);
1780 setmysockopt(client->net);
1783 do_run(client->server->postrun, client->exportname);
1785 if (-1 != client->transactionlogfd)
1787 close(client->transactionlogfd);
1788 client->transactionlogfd = -1;
1793 * Find the name of the file we have to serve. This will use g_strdup_printf
1794 * to put the IP address of the client inside a filename containing
1795 * "%s" (in the form as specified by the "virtstyle" option). That name
1796 * is then written to client->exportname.
1798 * @param net A socket connected to an nbd client
1799 * @param client information about the client. The IP address in human-readable
1800 * format will be written to a new char* buffer, the address of which will be
1801 * stored in client->clientname.
1803 void set_peername(int net, CLIENT *client) {
1804 struct sockaddr_storage addrin;
1805 struct sockaddr_storage netaddr;
1806 struct sockaddr_in *netaddr4 = NULL;
1807 struct sockaddr_in6 *netaddr6 = NULL;
1808 size_t addrinlen = sizeof( addrin );
1809 struct addrinfo hints;
1810 struct addrinfo *ai = NULL;
1811 char peername[NI_MAXHOST];
1812 char netname[NI_MAXHOST];
1818 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1819 err("getsockname failed: %m");
1821 getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1822 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1824 memset(&hints, '\0', sizeof (hints));
1825 hints.ai_flags = AI_ADDRCONFIG;
1826 e = getaddrinfo(peername, NULL, &hints, &ai);
1829 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1834 switch(client->server->virtstyle) {
1836 client->exportname=g_strdup(client->server->exportname);
1839 for(i=0;i<strlen(peername);i++) {
1840 if(peername[i]=='.') {
1845 client->exportname=g_strdup_printf(client->server->exportname, peername);
1848 memcpy(&netaddr, &addrin, addrinlen);
1849 if(ai->ai_family == AF_INET) {
1850 netaddr4 = (struct sockaddr_in *)&netaddr;
1851 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1852 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1854 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1855 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1856 tmp=g_strdup_printf("%s/%s", netname, peername);
1857 }else if(ai->ai_family == AF_INET6) {
1858 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1860 shift = 128-(client->server->cidrlen);
1862 while(shift >= 32) {
1863 ((netaddr6->sin6_addr).s6_addr32[i])=0;
1867 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1868 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1870 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1871 netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1872 tmp=g_strdup_printf("%s/%s", netname, peername);
1876 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1882 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1883 peername, client->exportname);
1884 client->clientname=g_strdup(peername);
1889 * @param data a pointer to pid_t which should be freed
1891 void destroy_pid_t(gpointer data) {
1896 * Loop through the available servers, and serve them. Never returns.
1898 int serveloop(GArray* servers) {
1899 struct sockaddr_storage addrin;
1900 socklen_t addrinlen=sizeof(addrin);
1908 * Set up the master fd_set. The set of descriptors we need
1909 * to select() for never changes anyway and it buys us a *lot*
1910 * of time to only build this once. However, if we ever choose
1911 * to not fork() for clients anymore, we may have to revisit
1916 for(i=0;i<servers->len;i++) {
1917 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1918 FD_SET(sock, &mset);
1919 max=sock>max?sock:max;
1923 FD_SET(modernsock, &mset);
1924 max=modernsock>max?modernsock:max;
1927 CLIENT *client = NULL;
1930 memcpy(&rset, &mset, sizeof(fd_set));
1931 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1936 if(FD_ISSET(modernsock, &rset)) {
1937 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1939 client = negotiate(net, NULL, servers, NEG_INIT | NEG_MODERN);
1941 err_nonfatal("negotiation failed");
1946 serve = client->server;
1948 for(i=0;i<servers->len && !net;i++) {
1949 serve=&(g_array_index(servers, SERVER, i));
1950 if(FD_ISSET(serve->socket, &rset)) {
1951 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1958 if(serve->max_connections > 0 &&
1959 g_hash_table_size(children) >= serve->max_connections) {
1960 msg2(LOG_INFO, "Max connections reached");
1964 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1965 err("fcntl F_GETFL");
1967 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
1968 err("fcntl F_SETFL ~O_NONBLOCK");
1971 client = g_new0(CLIENT, 1);
1972 client->server=serve;
1973 client->exportsize=OFFT_MAX;
1975 client->transactionlogfd = -1;
1977 set_peername(net, client);
1978 if (!authorized_client(client)) {
1979 msg2(LOG_INFO,"Unauthorized client") ;
1983 msg2(LOG_INFO,"Authorized client") ;
1984 pid=g_malloc(sizeof(pid_t));
1987 if ((*pid=fork())<0) {
1988 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1992 if (*pid>0) { /* parent */
1994 g_hash_table_insert(children, pid, pid);
1998 g_hash_table_destroy(children);
1999 for(i=0;i<servers->len;i++) {
2000 serve=&g_array_index(servers, SERVER, i);
2001 close(serve->socket);
2003 /* FALSE does not free the
2004 actual data. This is required,
2005 because the client has a
2006 direct reference into that
2007 data, and otherwise we get a
2009 g_array_free(servers, FALSE);
2012 msg2(LOG_INFO,"Starting to serve");
2013 serveconnection(client);
2020 void dosockopts(int socket) {
2028 /* lose the pesky "Address already in use" error message */
2029 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
2030 err("setsockopt SO_REUSEADDR");
2032 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
2033 err("setsockopt SO_KEEPALIVE");
2036 /* make the listening socket non-blocking */
2037 if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
2038 err("fcntl F_GETFL");
2040 if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
2041 err("fcntl F_SETFL O_NONBLOCK");
2046 * Connect a server's socket.
2048 * @param serve the server we want to connect.
2050 int setup_serve(SERVER *serve) {
2051 struct addrinfo hints;
2052 struct addrinfo *ai = NULL;
2057 return serve->servename ? 1 : 0;
2059 memset(&hints,'\0',sizeof(hints));
2060 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2061 hints.ai_socktype = SOCK_STREAM;
2062 hints.ai_family = serve->socket_family;
2064 port = g_strdup_printf ("%d", serve->port);
2068 e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2073 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2079 if(serve->socket_family == AF_UNSPEC)
2080 serve->socket_family = ai->ai_family;
2083 if ((serve->flags) && F_SDP) {
2084 if (ai->ai_family == AF_INET)
2085 ai->ai_family = AF_INET_SDP;
2086 else (ai->ai_family == AF_INET6)
2087 ai->ai_family = AF_INET6_SDP;
2090 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
2093 dosockopts(serve->socket);
2095 DEBUG("Waiting for connections... bind, ");
2096 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2097 if (e != 0 && errno != EADDRINUSE)
2100 if (listen(serve->socket, 1) < 0)
2104 if(serve->servename) {
2111 void open_modern(void) {
2112 struct addrinfo hints;
2113 struct addrinfo* ai = NULL;
2117 memset(&hints, '\0', sizeof(hints));
2118 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2119 hints.ai_socktype = SOCK_STREAM;
2120 hints.ai_family = AF_UNSPEC;
2121 hints.ai_protocol = IPPROTO_TCP;
2122 e = getaddrinfo(modern_listen, modernport, &hints, &ai);
2124 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2127 if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2131 dosockopts(modernsock);
2133 if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2136 if(listen(modernsock, 10) <0) {
2144 * Connect our servers.
2146 void setup_servers(GArray* servers) {
2148 struct sigaction sa;
2151 for(i=0;i<servers->len;i++) {
2152 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2157 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2159 sa.sa_handler = sigchld_handler;
2160 sigemptyset(&sa.sa_mask);
2161 sa.sa_flags = SA_RESTART;
2162 if(sigaction(SIGCHLD, &sa, NULL) == -1)
2163 err("sigaction: %m");
2164 sa.sa_handler = sigterm_handler;
2165 sigemptyset(&sa.sa_mask);
2166 sa.sa_flags = SA_RESTART;
2167 if(sigaction(SIGTERM, &sa, NULL) == -1)
2168 err("sigaction: %m");
2172 * Go daemon (unless we specified at compile time that we didn't want this)
2173 * @param serve the first server of our configuration. If its port is zero,
2174 * then do not daemonize, because we're doing inetd then. This parameter
2175 * is only used to create a PID file of the form
2176 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
2178 #if !defined(NODAEMON)
2179 void daemonize(SERVER* serve) {
2182 if(serve && !(serve->port)) {
2188 if(!*pidftemplate) {
2190 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2192 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2195 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2196 pidf=fopen(pidfname, "w");
2198 fprintf(pidf,"%d\n", (int)getpid());
2202 fprintf(stderr, "Not fatal; continuing");
2206 #define daemonize(serve)
2207 #endif /* !defined(NODAEMON) */
2210 * Everything beyond this point (in the file) is run in non-daemon mode.
2211 * The stuff above daemonize() isn't.
2214 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2216 void serve_err(SERVER* serve, const char* msg) {
2217 g_message("Export of %s on port %d failed:", serve->exportname,
2223 * Set up user-ID and/or group-ID
2225 void dousers(void) {
2230 gr=getgrnam(rungroup);
2232 str = g_strdup_printf("Invalid group name: %s", rungroup);
2235 if(setgid(gr->gr_gid)<0) {
2236 err("Could not set GID: %m");
2240 pw=getpwnam(runuser);
2242 str = g_strdup_printf("Invalid user name: %s", runuser);
2245 if(setuid(pw->pw_uid)<0) {
2246 err("Could not set UID: %m");
2252 void glib_message_syslog_redirect(const gchar *log_domain,
2253 GLogLevelFlags log_level,
2254 const gchar *message,
2257 int level=LOG_DEBUG;
2261 case G_LOG_FLAG_FATAL:
2262 case G_LOG_LEVEL_CRITICAL:
2263 case G_LOG_LEVEL_ERROR:
2266 case G_LOG_LEVEL_WARNING:
2269 case G_LOG_LEVEL_MESSAGE:
2270 case G_LOG_LEVEL_INFO:
2273 case G_LOG_LEVEL_DEBUG:
2278 syslog(level, "%s", message);
2283 * Main entry point...
2285 int main(int argc, char *argv[]) {
2290 if (sizeof( struct nbd_request )!=28) {
2291 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2292 exit(EXIT_FAILURE) ;
2295 memset(pidftemplate, '\0', 256);
2298 config_file_pos = g_strdup(CFILE);
2299 serve=cmdline(argc, argv);
2300 servers = parse_cfile(config_file_pos, &err);
2303 serve->socket_family = AF_UNSPEC;
2305 append_serve(serve, servers);
2307 if (!(serve->port)) {
2310 /* You really should define ISSERVER if you're going to use
2311 * inetd mode, but if you don't, closing stdout and stderr
2312 * (which inetd had connected to the client socket) will let it
2316 open("/dev/null", O_WRONLY);
2317 open("/dev/null", O_WRONLY);
2318 g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2320 client=g_malloc(sizeof(CLIENT));
2321 client->server=serve;
2323 client->exportsize=OFFT_MAX;
2324 set_peername(0,client);
2325 serveconnection(client);
2330 if(!servers || !servers->len) {
2331 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2332 && err->code == CFILE_NOTFOUND)) {
2333 g_warning("Could not parse config file: %s",
2334 err ? err->message : "Unknown error");
2338 g_warning("Specifying an export on the command line is deprecated.");
2339 g_warning("Please use a configuration file instead.");
2342 if((!serve) && (!servers||!servers->len)) {
2343 g_message("No configured exports; quitting.");
2348 setup_servers(servers);