2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <sys/select.h> /* select */
66 #include <sys/wait.h> /* wait */
67 #ifdef HAVE_SYS_IOCTL_H
68 #include <sys/ioctl.h>
70 #include <sys/param.h>
71 #ifdef HAVE_SYS_MOUNT_H
72 #include <sys/mount.h> /* For BLKGETSIZE */
74 #include <signal.h> /* sigaction */
76 #include <netinet/tcp.h>
77 #include <netinet/in.h>
85 #include <arpa/inet.h>
95 /* used in cliserv.h, so must come first */
96 #define MY_NAME "nbd_server"
100 #include <sdp_inet.h>
103 /** Default position of the config file */
105 #define SYSCONFDIR "/etc"
107 #define CFILE SYSCONFDIR "/nbd-server/config"
109 /** Where our config file actually is */
110 gchar* config_file_pos;
112 /** What user we're running as */
114 /** What group we're running as */
115 gchar* rungroup=NULL;
116 /** whether to export using the old negotiation protocol (port-based) */
117 gboolean do_oldstyle=FALSE;
119 /* Whether we should avoid forking */
122 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
124 #define msg2(a,b) syslog(a,b)
125 #define msg3(a,b,c) syslog(a,b,c)
126 #define msg4(a,b,c,d) syslog(a,b,c,d)
128 #define msg2(a,b) g_message(b)
129 #define msg3(a,b,c) g_message(b,c)
130 #define msg4(a,b,c,d) g_message(b,c,d)
133 /* Debugging macros */
136 #define DEBUG(...) printf(__VA_ARGS__)
140 #ifndef PACKAGE_VERSION
141 #define PACKAGE_VERSION ""
144 * The highest value a variable of type off_t can reach. This is a signed
145 * integer, so set all bits except for the leftmost one.
147 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
148 #define LINELEN 256 /**< Size of static buffer used to read the
149 authorization file (yuck) */
150 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
151 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
152 #define F_READONLY 1 /**< flag to tell us a file is readonly */
153 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
154 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
156 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
157 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
158 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
159 #define F_SYNC 64 /**< Whether to fsync() after a write */
160 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
161 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
162 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
163 #define F_TEMPORARY 1024 /**< Whether the backing file is temporary and should be created then unlinked */
164 GHashTable *children;
165 char pidfname[256]; /**< name of our PID file */
166 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
167 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
169 #define NEG_INIT (1 << 0)
170 #define NEG_OLD (1 << 1)
171 #define NEG_MODERN (1 << 2)
173 int modernsock=0; /**< Socket for the modern handler. Not used
174 if a client was only specified on the
175 command line; only port used if
176 oldstyle is set to false (and then the
177 command-line client isn't used, gna gna) */
178 char* modern_listen; /**< listenaddr value for modernsock */
179 char* modernport=NBD_DEFAULT_PORT; /**< Port number on which to listen for
180 new-style nbd-client connections */
183 * Types of virtuatlization
186 VIRT_NONE=0, /**< No virtualization */
187 VIRT_IPLIT, /**< Literal IP address as part of the filename */
188 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
189 doing the same as in IPLIT */
190 VIRT_CIDR, /**< Every subnet in its own directory */
194 * Variables associated with a server.
197 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
198 off_t expected_size; /**< size of the exported file as it was told to
199 us through configuration */
200 gchar* listenaddr; /**< The IP address we're listening on */
201 unsigned int port; /**< port we're exporting this file at */
202 char* authname; /**< filename of the authorization file */
203 int flags; /**< flags associated with this exported file */
204 int socket; /**< The socket of this server. */
205 int socket_family; /**< family of the socket */
206 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
207 uint8_t cidrlen; /**< The length of the mask when we use
208 CIDR-style virtualization */
209 gchar* prerun; /**< command to be ran after connecting a client,
210 but before starting to serve */
211 gchar* postrun; /**< command that will be ran after the client
213 gchar* servename; /**< name of the export as selected by nbd-client */
214 int max_connections; /**< maximum number of opened connections */
215 gchar* transactionlog;/**< filename for transaction log */
219 * Variables associated with a client socket.
222 int fhandle; /**< file descriptor */
223 off_t startoff; /**< starting offset of this file */
227 off_t exportsize; /**< size of the file we're exporting */
228 char *clientname; /**< peer */
229 char *exportname; /**< (processed) filename of the file we're exporting */
230 GArray *export; /**< array of FILE_INFO of exported files;
231 array size is always 1 unless we're
232 doing the multiple file option */
233 int net; /**< The actual client socket */
234 SERVER *server; /**< The server this client is getting data from */
235 char* difffilename; /**< filename of the copy-on-write file, if any */
236 int difffile; /**< filedescriptor of copyonwrite file. @todo
237 shouldn't this be an array too? (cfr export) Or
238 make -m and -c mutually exclusive */
239 u32 difffilelen; /**< number of pages in difffile */
240 u32 *difmap; /**< see comment on the global difmap for this one */
241 gboolean modern; /**< client was negotiated using modern negotiation protocol */
242 int transactionlogfd;/**< fd for transaction log */
246 * Type of configuration file values
249 PARAM_INT, /**< This parameter is an integer */
250 PARAM_STRING, /**< This parameter is a string */
251 PARAM_BOOL, /**< This parameter is a boolean */
255 * Configuration file values
258 gchar *paramname; /**< Name of the parameter, as it appears in
260 gboolean required; /**< Whether this is a required (as opposed to
261 optional) parameter */
262 PARAM_TYPE ptype; /**< Type of the parameter. */
263 gpointer target; /**< Pointer to where the data of this
264 parameter should be written. If ptype is
265 PARAM_BOOL, the data is or'ed rather than
267 gint flagval; /**< Flag mask for this parameter in case ptype
272 * Translate a command name into human readable form
274 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
275 * @return pointer to the command name
277 static inline const char * getcommandname(uint64_t command) {
280 return "NBD_CMD_READ";
282 return "NBD_CMD_WRITE";
284 return "NBD_CMD_DISC";
286 return "NBD_CMD_FLUSH";
294 * Check whether a client is allowed to connect. Works with an authorization
295 * file which contains one line per machine, no wildcards.
297 * @param opts The client who's trying to connect.
298 * @return 0 - authorization refused, 1 - OK
300 int authorized_client(CLIENT *opts) {
301 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
306 struct in_addr client;
307 struct in_addr cltemp;
310 if ((f=fopen(opts->server->authname,"r"))==NULL) {
311 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
312 opts->server->authname,strerror(errno)) ;
316 inet_aton(opts->clientname, &client);
317 while (fgets(line,LINELEN,f)!=NULL) {
318 if((tmp=index(line, '/'))) {
319 if(strlen(line)<=tmp-line) {
320 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
324 if(!inet_aton(line,&addr)) {
325 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
328 len=strtol(tmp, NULL, 0);
329 addr.s_addr>>=32-len;
330 addr.s_addr<<=32-len;
331 memcpy(&cltemp,&client,sizeof(client));
332 cltemp.s_addr>>=32-len;
333 cltemp.s_addr<<=32-len;
334 if(addr.s_addr == cltemp.s_addr) {
338 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
348 * Read data from a file descriptor into a buffer
350 * @param f a file descriptor
351 * @param buf a buffer
352 * @param len the number of bytes to be read
354 static inline void readit(int f, void *buf, size_t len) {
358 if ((res = read(f, buf, len)) <= 0) {
359 if(errno != EAGAIN) {
360 err("Read failed: %m");
370 * Consume data from an FD that we don't want
372 * @param f a file descriptor
373 * @param buf a buffer
374 * @param len the number of bytes to consume
375 * @param bufsiz the size of the buffer
377 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
380 curlen = (len>bufsiz)?bufsiz:len;
381 readit(f, buf, curlen);
388 * Write data from a buffer into a filedescriptor
390 * @param f a file descriptor
391 * @param buf a buffer containing data
392 * @param len the number of bytes to be written
394 static inline void writeit(int f, void *buf, size_t len) {
398 if ((res = write(f, buf, len)) <= 0)
399 err("Send failed: %m");
406 * Print out a message about how to use nbd-server. Split out to a separate
407 * function so that we can call it from multiple places
410 printf("This is nbd-server version " VERSION "\n");
411 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
412 "\t-r|--read-only\t\tread only\n"
413 "\t-m|--multi-file\t\tmultiple file\n"
414 "\t-c|--copy-on-write\tcopy on write\n"
415 "\t-C|--config-file\tspecify an alternate configuration file\n"
416 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
417 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
418 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
419 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
420 "\tif port is set to 0, stdin is used (for running from inetd).\n"
421 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
422 "\t\taddress of the machine trying to connect\n"
423 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
424 printf("Using configuration file %s\n", CFILE);
427 /* Dumps a config file section of the given SERVER*, and exits. */
428 void dump_section(SERVER* serve, gchar* section_header) {
429 printf("[%s]\n", section_header);
430 printf("\texportname = %s\n", serve->exportname);
431 printf("\tlistenaddr = %s\n", serve->listenaddr);
432 printf("\tport = %d\n", serve->port);
433 if(serve->flags & F_READONLY) {
434 printf("\treadonly = true\n");
436 if(serve->flags & F_MULTIFILE) {
437 printf("\tmultifile = true\n");
439 if(serve->flags & F_COPYONWRITE) {
440 printf("\tcopyonwrite = true\n");
442 if(serve->expected_size) {
443 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
445 if(serve->authname) {
446 printf("\tauthfile = %s\n", serve->authname);
452 * Parse the command line.
454 * @param argc the argc argument to main()
455 * @param argv the argv argument to main()
457 SERVER* cmdline(int argc, char *argv[]) {
461 struct option long_options[] = {
462 {"read-only", no_argument, NULL, 'r'},
463 {"multi-file", no_argument, NULL, 'm'},
464 {"copy-on-write", no_argument, NULL, 'c'},
465 {"dont-fork", no_argument, NULL, 'd'},
466 {"authorize-file", required_argument, NULL, 'l'},
467 {"config-file", required_argument, NULL, 'C'},
468 {"pid-file", required_argument, NULL, 'p'},
469 {"output-config", required_argument, NULL, 'o'},
470 {"max-connection", required_argument, NULL, 'M'},
477 gboolean do_output=FALSE;
478 gchar* section_header="";
484 serve=g_new0(SERVER, 1);
485 serve->authname = g_strdup(default_authname);
486 serve->virtstyle=VIRT_IPLIT;
487 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
490 /* non-option argument */
491 switch(nonspecial++) {
493 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
494 addr_port=g_strsplit(optarg, ":", 2);
496 /* Check for "@" - maybe user using this separator
499 g_strfreev(addr_port);
500 addr_port=g_strsplit(optarg, "@", 2);
503 addr_port=g_strsplit(optarg, "@", 2);
507 serve->port=strtol(addr_port[1], NULL, 0);
508 serve->listenaddr=g_strdup(addr_port[0]);
510 serve->listenaddr=NULL;
511 serve->port=strtol(addr_port[0], NULL, 0);
513 g_strfreev(addr_port);
516 serve->exportname = g_strdup(optarg);
517 if(serve->exportname[0] != '/') {
518 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
523 last=strlen(optarg)-1;
525 if (suffix == 'k' || suffix == 'K' ||
526 suffix == 'm' || suffix == 'M')
528 es = (off_t)atoll(optarg);
536 serve->expected_size = es;
541 serve->flags |= F_READONLY;
544 serve->flags |= F_MULTIFILE;
548 section_header = g_strdup(optarg);
551 strncpy(pidftemplate, optarg, 256);
554 serve->flags |=F_COPYONWRITE;
560 g_free(config_file_pos);
561 config_file_pos=g_strdup(optarg);
564 g_free(serve->authname);
565 serve->authname=g_strdup(optarg);
568 serve->max_connections = strtol(optarg, NULL, 0);
576 /* What's left: the port to export, the name of the to be exported
577 * file, and, optionally, the size of the file, in that order. */
586 g_critical("Need a complete configuration on the command line to output a config file section!");
589 dump_section(serve, section_header);
595 * Error codes for config file parsing
598 CFILE_NOTFOUND, /**< The configuration file is not found */
599 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
600 CFILE_KEY_MISSING, /**< A (required) key is missing */
601 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
602 CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */
603 CFILE_PROGERR, /**< Programmer error */
604 CFILE_NO_EXPORTS, /**< A config file was specified that does not
605 define any exports */
606 CFILE_INCORRECT_PORT, /**< The reserved port was specified for an
611 * Remove a SERVER from memory. Used from the hash table
613 void remove_server(gpointer s) {
617 g_free(server->exportname);
619 g_free(server->authname);
620 if(server->listenaddr)
621 g_free(server->listenaddr);
623 g_free(server->prerun);
625 g_free(server->postrun);
626 if(server->transactionlog)
627 g_free(server->transactionlog);
633 * @param s the old server we want to duplicate
634 * @return new duplicated server
636 SERVER* dup_serve(SERVER *s) {
637 SERVER *serve = NULL;
639 serve=g_new0(SERVER, 1);
644 serve->exportname = g_strdup(s->exportname);
646 serve->expected_size = s->expected_size;
649 serve->listenaddr = g_strdup(s->listenaddr);
651 serve->port = s->port;
654 serve->authname = strdup(s->authname);
656 serve->flags = s->flags;
657 serve->socket = s->socket;
658 serve->socket_family = s->socket_family;
659 serve->virtstyle = s->virtstyle;
660 serve->cidrlen = s->cidrlen;
663 serve->prerun = g_strdup(s->prerun);
666 serve->postrun = g_strdup(s->postrun);
668 if(s->transactionlog)
669 serve->transactionlog = g_strdup(s->transactionlog);
672 serve->servename = g_strdup(s->servename);
674 serve->max_connections = s->max_connections;
680 * append new server to array
682 * @param a server array
683 * @return 0 success, -1 error
685 int append_serve(SERVER *s, GArray *a) {
687 struct addrinfo hints;
688 struct addrinfo *ai = NULL;
689 struct addrinfo *rp = NULL;
690 char host[NI_MAXHOST];
696 err("Invalid parsing server");
700 port = g_strdup_printf("%d", s->port);
702 memset(&hints,'\0',sizeof(hints));
703 hints.ai_family = AF_UNSPEC;
704 hints.ai_socktype = SOCK_STREAM;
705 hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
706 hints.ai_protocol = IPPROTO_TCP;
708 e = getaddrinfo(s->listenaddr, port, &hints, &ai);
714 for (rp = ai; rp != NULL; rp = rp->ai_next) {
715 e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
717 if (e != 0) { // error
718 fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
722 // duplicate server and set listenaddr to resolved IP address
725 ns->listenaddr = g_strdup(host);
726 ns->socket_family = rp->ai_family;
727 g_array_append_val(a, *ns);
735 fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
746 * Parse the config file.
748 * @param f the name of the config file
749 * @param e a GError. @see CFILE_ERRORS for what error values this function can
751 * @return a Array of SERVER* pointers, If the config file is empty or does not
752 * exist, returns an empty GHashTable; if the config file contains an
753 * error, returns NULL, and e is set appropriately
755 GArray* parse_cfile(gchar* f, GError** e) {
756 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
757 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
759 gchar *virtstyle=NULL;
761 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
762 { "port", TRUE, PARAM_INT, &(s.port), 0 },
763 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
764 { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 },
765 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
766 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
767 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
768 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
769 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
770 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
771 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
772 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
773 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
774 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
775 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
776 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
777 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
778 { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY },
779 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
780 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
782 const int lp_size=sizeof(lp)/sizeof(PARAM);
784 { "user", FALSE, PARAM_STRING, &runuser, 0 },
785 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
786 { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 },
787 { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 },
788 { "port", FALSE, PARAM_STRING, &modernport, 0 },
791 int p_size=sizeof(gp)/sizeof(PARAM);
794 const char *err_msg=NULL;
805 errdomain = g_quark_from_string("parse_cfile");
806 cfile = g_key_file_new();
807 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
808 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
809 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
810 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
811 g_key_file_free(cfile);
814 startgroup = g_key_file_get_start_group(cfile);
815 if(!startgroup || strcmp(startgroup, "generic")) {
816 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
817 g_key_file_free(cfile);
820 groups = g_key_file_get_groups(cfile, NULL);
821 for(i=0;groups[i];i++) {
822 memset(&s, '\0', sizeof(SERVER));
824 /* After the [generic] group, start parsing exports */
829 for(j=0;j<p_size;j++) {
830 g_assert(p[j].target != NULL);
831 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
834 ival = g_key_file_get_integer(cfile,
839 *((gint*)p[j].target) = ival;
843 sval = g_key_file_get_string(cfile,
848 *((gchar**)p[j].target) = sval;
852 bval = g_key_file_get_boolean(cfile,
854 p[j].paramname, &err);
857 *((gint*)p[j].target) |= p[j].flagval;
859 *((gint*)p[j].target) &= ~(p[j].flagval);
865 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
867 /* Ignore not-found error for optional values */
871 err_msg = MISSING_REQUIRED_ERROR;
874 err_msg = DEFAULT_ERROR;
876 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
877 g_array_free(retval, TRUE);
879 g_key_file_free(cfile);
884 if(!strncmp(virtstyle, "none", 4)) {
885 s.virtstyle=VIRT_NONE;
886 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
887 s.virtstyle=VIRT_IPLIT;
888 } else if(!strncmp(virtstyle, "iphash", 6)) {
889 s.virtstyle=VIRT_IPHASH;
890 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
891 s.virtstyle=VIRT_CIDR;
892 if(strlen(virtstyle)<10) {
893 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
894 g_array_free(retval, TRUE);
895 g_key_file_free(cfile);
898 s.cidrlen=strtol(virtstyle+8, NULL, 0);
900 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
901 g_array_free(retval, TRUE);
902 g_key_file_free(cfile);
905 if(s.port && !do_oldstyle) {
906 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
907 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
910 s.virtstyle=VIRT_IPLIT;
912 /* Don't need to free this, it's not our string */
914 /* Don't append values for the [generic] group */
916 s.socket_family = AF_UNSPEC;
917 s.servename = groups[i];
919 append_serve(&s, retval);
926 if(s.flags & F_SDP) {
927 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
928 g_array_free(retval, TRUE);
929 g_key_file_free(cfile);
935 g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
937 g_key_file_free(cfile);
942 * Signal handler for SIGCHLD
943 * @param s the signal we're handling (must be SIGCHLD, or something
946 void sigchld_handler(int s) {
951 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
952 if(WIFEXITED(status)) {
953 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
955 i=g_hash_table_lookup(children, &pid);
957 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
959 DEBUG("Removing %d from the list of children", pid);
960 g_hash_table_remove(children, &pid);
966 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
969 * @param value the value corresponding to the above key
970 * @param user_data a pointer which we always set to 1, so that we know what
973 void killchild(gpointer key, gpointer value, gpointer user_data) {
975 int *parent=user_data;
982 * Handle SIGTERM and dispatch it to our children
983 * @param s the signal we're handling (must be SIGTERM, or something
984 * is severely wrong).
986 void sigterm_handler(int s) {
989 g_hash_table_foreach(children, killchild, &parent);
999 * Detect the size of a file.
1001 * @param fhandle An open filedescriptor
1002 * @return the size of the file, or OFFT_MAX if detection was
1005 off_t size_autodetect(int fhandle) {
1008 struct stat stat_buf;
1011 #ifdef HAVE_SYS_MOUNT_H
1012 #ifdef HAVE_SYS_IOCTL_H
1014 DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
1015 if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
1016 return (off_t)bytes;
1018 #endif /* BLKGETSIZE64 */
1019 #endif /* HAVE_SYS_IOCTL_H */
1020 #endif /* HAVE_SYS_MOUNT_H */
1022 DEBUG("looking for fhandle size with fstat\n");
1023 stat_buf.st_size = 0;
1024 error = fstat(fhandle, &stat_buf);
1026 /* always believe stat if a regular file as it might really
1028 if (S_ISREG(stat_buf.st_mode) || (stat_buf.st_size > 0))
1029 return (off_t)stat_buf.st_size;
1031 err("fstat failed: %m");
1034 DEBUG("looking for fhandle size with lseek SEEK_END\n");
1035 es = lseek(fhandle, (off_t)0, SEEK_END);
1036 if (es > ((off_t)0)) {
1039 DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
1042 err("Could not find size of exported block device: %m");
1047 * Get the file handle and offset, given an export offset.
1049 * @param export An array of export files
1050 * @param a The offset to get corresponding file/offset for
1051 * @param fhandle [out] File descriptor
1052 * @param foffset [out] Offset into fhandle
1053 * @param maxbytes [out] Tells how many bytes can be read/written
1054 * from fhandle starting at foffset (0 if there is no limit)
1055 * @return 0 on success, -1 on failure
1057 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1058 /* Negative offset not allowed */
1062 /* Binary search for last file with starting offset <= a */
1065 int end = export->len - 1;
1066 while( start <= end ) {
1067 int mid = (start + end) / 2;
1068 fi = g_array_index(export, FILE_INFO, mid);
1069 if( fi.startoff < a ) {
1071 } else if( fi.startoff > a ) {
1079 /* end should never go negative, since first startoff is 0 and a >= 0 */
1082 fi = g_array_index(export, FILE_INFO, end);
1083 *fhandle = fi.fhandle;
1084 *foffset = a - fi.startoff;
1086 if( end+1 < export->len ) {
1087 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1088 *maxbytes = fi_next.startoff - a;
1095 * seek to a position in a file, with error handling.
1096 * @param handle a filedescriptor
1097 * @param a position to seek to
1098 * @todo get rid of this; lastpoint is a global variable right now, but it
1099 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
1102 void myseek(int handle,off_t a) {
1103 if (lseek(handle, a, SEEK_SET) < 0) {
1104 err("Can not seek locally!\n");
1109 * Write an amount of bytes at a given offset to the right file. This
1110 * abstracts the write-side of the multiple file option.
1112 * @param a The offset where the write should start
1113 * @param buf The buffer to write from
1114 * @param len The length of buf
1115 * @param client The client we're serving for
1116 * @param fua Flag to indicate 'Force Unit Access'
1117 * @return The number of bytes actually written, or -1 in case of an error
1119 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1125 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1127 if(maxbytes && len > maxbytes)
1130 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1132 myseek(fhandle, foffset);
1133 retval = write(fhandle, buf, len);
1134 if(client->server->flags & F_SYNC) {
1138 /* This is where we would do the following
1139 * #ifdef USE_SYNC_FILE_RANGE
1140 * However, we don't, for the reasons set out below
1141 * by Christoph Hellwig <hch@infradead.org>
1144 * fdatasync is equivalent to fsync except that it does not flush
1145 * non-essential metadata (basically just timestamps in practice), but it
1146 * does flush metadata requried to find the data again, e.g. allocation
1147 * information and extent maps. sync_file_range does nothing but flush
1148 * out pagecache content - it means you basically won't get your data
1149 * back in case of a crash if you either:
1151 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1152 * b) are using a sparse file on a filesystem
1153 * c) are using a fallocate-preallocated file on a filesystem
1154 * d) use any file on a COW filesystem like btrfs
1156 * e.g. it only does anything useful for you if you do not have a volatile
1157 * write cache, and either use a raw block device node, or just overwrite
1158 * an already fully allocated (and not preallocated) file on a non-COW
1162 * What we should do is open a second FD with O_DSYNC set, then write to
1163 * that when appropriate. However, with a Linux client, every REQ_FUA
1164 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1169 sync_file_range(fhandle, foffset, len,
1170 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1171 SYNC_FILE_RANGE_WAIT_AFTER);
1180 * Call rawexpwrite repeatedly until all data has been written.
1182 * @param a The offset where the write should start
1183 * @param buf The buffer to write from
1184 * @param len The length of buf
1185 * @param client The client we're serving for
1186 * @param fua Flag to indicate 'Force Unit Access'
1187 * @return 0 on success, nonzero on failure
1189 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1192 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1197 return (ret < 0 || len != 0);
1201 * Read an amount of bytes at a given offset from the right file. This
1202 * abstracts the read-side of the multiple files option.
1204 * @param a The offset where the read should start
1205 * @param buf A buffer to read into
1206 * @param len The size of buf
1207 * @param client The client we're serving for
1208 * @return The number of bytes actually read, or -1 in case of an
1211 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1216 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1218 if(maxbytes && len > maxbytes)
1221 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1223 myseek(fhandle, foffset);
1224 return read(fhandle, buf, len);
1228 * Call rawexpread repeatedly until all data has been read.
1229 * @return 0 on success, nonzero on failure
1231 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1234 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1239 return (ret < 0 || len != 0);
1243 * Read an amount of bytes at a given offset from the right file. This
1244 * abstracts the read-side of the copyonwrite stuff, and calls
1245 * rawexpread() with the right parameters to do the actual work.
1246 * @param a The offset where the read should start
1247 * @param buf A buffer to read into
1248 * @param len The size of buf
1249 * @param client The client we're going to read for
1250 * @return 0 on success, nonzero on failure
1252 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1253 off_t rdlen, offset;
1254 off_t mapcnt, mapl, maph, pagestart;
1256 if (!(client->server->flags & F_COPYONWRITE))
1257 return(rawexpread_fully(a, buf, len, client));
1258 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1260 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1262 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1263 pagestart=mapcnt*DIFFPAGESIZE;
1265 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1266 len : (size_t)DIFFPAGESIZE-offset;
1267 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1268 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1269 (unsigned long)(client->difmap[mapcnt]));
1270 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1271 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1272 } else { /* the block is not there */
1273 DEBUG("Page %llu is not here, we read the original one\n",
1274 (unsigned long long)mapcnt);
1275 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1277 len-=rdlen; a+=rdlen; buf+=rdlen;
1283 * Write an amount of bytes at a given offset to the right file. This
1284 * abstracts the write-side of the copyonwrite option, and calls
1285 * rawexpwrite() with the right parameters to do the actual work.
1287 * @param a The offset where the write should start
1288 * @param buf The buffer to write from
1289 * @param len The length of buf
1290 * @param client The client we're going to write for.
1291 * @param fua Flag to indicate 'Force Unit Access'
1292 * @return 0 on success, nonzero on failure
1294 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1295 char pagebuf[DIFFPAGESIZE];
1296 off_t mapcnt,mapl,maph;
1301 if (!(client->server->flags & F_COPYONWRITE))
1302 return(rawexpwrite_fully(a, buf, len, client, fua));
1303 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1305 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1307 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1308 pagestart=mapcnt*DIFFPAGESIZE ;
1309 offset=a-pagestart ;
1310 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1311 len : (size_t)DIFFPAGESIZE-offset;
1313 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1314 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1315 (unsigned long)(client->difmap[mapcnt])) ;
1316 myseek(client->difffile,
1317 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1318 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1319 } else { /* the block is not there */
1320 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1321 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1322 DEBUG("Page %llu is not here, we put it at %lu\n",
1323 (unsigned long long)mapcnt,
1324 (unsigned long)(client->difmap[mapcnt]));
1325 rdlen=DIFFPAGESIZE ;
1326 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1328 memcpy(pagebuf+offset,buf,wrlen) ;
1329 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1333 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1335 if (client->server->flags & F_SYNC) {
1336 fsync(client->difffile);
1338 /* open question: would it be cheaper to do multiple sync_file_ranges?
1339 as we iterate through the above?
1341 fdatasync(client->difffile);
1347 * Flush data to a client
1349 * @param client The client we're going to write for.
1350 * @return 0 on success, nonzero on failure
1352 int expflush(CLIENT *client) {
1355 if (client->server->flags & F_COPYONWRITE) {
1356 return fsync(client->difffile);
1359 for (i = 0; i < client->export->len; i++) {
1360 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1361 if (fsync(fi.fhandle) < 0)
1369 * Do the initial negotiation.
1371 * @param client The client we're negotiating with.
1373 CLIENT* negotiate(int net, CLIENT *client, GArray* servers, int phase) {
1376 uint32_t flags = NBD_FLAG_HAS_FLAGS;
1377 uint16_t smallflags = 0;
1380 memset(zeros, '\0', sizeof(zeros));
1381 if(phase & NEG_INIT) {
1383 if (write(net, INIT_PASSWD, 8) < 0) {
1384 err_nonfatal("Negotiation failed: %m");
1388 if(phase & NEG_MODERN) {
1390 magic = htonll(opts_magic);
1393 magic = htonll(cliserv_magic);
1395 if (write(net, &magic, sizeof(magic)) < 0) {
1396 err_nonfatal("Negotiation failed: %m");
1401 if ((phase & NEG_MODERN) && (phase & NEG_INIT)) {
1410 err("programmer error");
1411 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1412 err("Negotiation failed: %m");
1413 if (read(net, &reserved, sizeof(reserved)) < 0)
1414 err("Negotiation failed: %m");
1415 if (read(net, &magic, sizeof(magic)) < 0)
1416 err("Negotiation failed: %m");
1417 magic = ntohll(magic);
1418 if(magic != opts_magic) {
1422 if (read(net, &opt, sizeof(opt)) < 0)
1423 err("Negotiation failed: %m");
1425 if(opt != NBD_OPT_EXPORT_NAME) {
1429 if (read(net, &namelen, sizeof(namelen)) < 0)
1430 err("Negotiation failed: %m");
1431 namelen = ntohl(namelen);
1432 name = malloc(namelen+1);
1434 if (read(net, name, namelen) < 0)
1435 err("Negotiation failed: %m");
1436 for(i=0; i<servers->len; i++) {
1437 SERVER* serve = &(g_array_index(servers, SERVER, i));
1438 if(!strcmp(serve->servename, name)) {
1439 CLIENT* client = g_new0(CLIENT, 1);
1440 client->server = serve;
1441 client->exportsize = OFFT_MAX;
1443 client->modern = TRUE;
1444 client->transactionlogfd = -1;
1453 size_host = htonll((u64)(client->exportsize));
1454 if (write(net, &size_host, 8) < 0)
1455 err("Negotiation failed: %m");
1456 if (client->server->flags & F_READONLY)
1457 flags |= NBD_FLAG_READ_ONLY;
1458 if (client->server->flags & F_FLUSH)
1459 flags |= NBD_FLAG_SEND_FLUSH;
1460 if (client->server->flags & F_FUA)
1461 flags |= NBD_FLAG_SEND_FUA;
1462 if (client->server->flags & F_ROTATIONAL)
1463 flags |= NBD_FLAG_ROTATIONAL;
1464 if (phase & NEG_OLD) {
1466 flags = htonl(flags);
1467 if (write(client->net, &flags, 4) < 0)
1468 err("Negotiation failed: %m");
1471 smallflags = (uint16_t)(flags & ~((uint16_t)0));
1472 smallflags = htons(smallflags);
1473 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1474 err("Negotiation failed: %m");
1478 if (write(client->net, zeros, 124) < 0)
1479 err("Negotiation failed: %m");
1483 /** sending macro. */
1484 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1485 if (client->transactionlogfd != -1) \
1486 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1488 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1490 * Serve a file to a single client.
1492 * @todo This beast needs to be split up in many tiny little manageable
1493 * pieces. Preferably with a chainsaw.
1495 * @param client The client we're going to serve to.
1496 * @return when the client disconnects
1498 int mainloop(CLIENT *client) {
1499 struct nbd_request request;
1500 struct nbd_reply reply;
1501 gboolean go_on=TRUE;
1505 negotiate(client->net, client, NULL, client->modern ? NEG_MODERN : (NEG_OLD | NEG_INIT));
1506 DEBUG("Entering request loop!\n");
1507 reply.magic = htonl(NBD_REPLY_MAGIC);
1520 readit(client->net, &request, sizeof(request));
1521 if (client->transactionlogfd != -1)
1522 writeit(client->transactionlogfd, &request, sizeof(request));
1524 request.from = ntohll(request.from);
1525 request.type = ntohl(request.type);
1526 command = request.type & NBD_CMD_MASK_COMMAND;
1527 len = ntohl(request.len);
1529 DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
1530 (unsigned long long)request.from,
1531 (unsigned long long)request.from / 512, (unsigned int)len);
1533 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1534 err("Not enough magic.");
1536 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1538 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1539 if ((request.from + len) > (OFFT_MAX)) {
1540 DEBUG("[Number too large!]");
1541 ERROR(client, reply, EINVAL);
1545 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1547 ERROR(client, reply, EINVAL);
1552 if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
1553 currlen = BUFSIZE - sizeof(struct nbd_reply);
1554 msg2(LOG_INFO, "oversized request (this is not a problem)");
1561 msg2(LOG_INFO, "Disconnect request received.");
1562 if (client->server->flags & F_COPYONWRITE) {
1563 if (client->difmap) g_free(client->difmap) ;
1564 close(client->difffile);
1565 unlink(client->difffilename);
1566 free(client->difffilename);
1572 DEBUG("wr: net->buf, ");
1574 readit(client->net, buf, currlen);
1575 DEBUG("buf->exp, ");
1576 if ((client->server->flags & F_READONLY) ||
1577 (client->server->flags & F_AUTOREADONLY)) {
1578 DEBUG("[WRITE to READONLY!]");
1579 ERROR(client, reply, EPERM);
1580 consume(client->net, buf, len-currlen, BUFSIZE);
1583 if (expwrite(request.from, buf, currlen, client,
1584 request.type & NBD_CMD_FLAG_FUA)) {
1585 DEBUG("Write failed: %m" );
1586 ERROR(client, reply, errno);
1587 consume(client->net, buf, len-currlen, BUFSIZE);
1591 request.from += currlen;
1592 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1594 SEND(client->net, reply);
1600 if (expflush(client)) {
1601 DEBUG("Flush failed: %m");
1602 ERROR(client, reply, errno);
1605 SEND(client->net, reply);
1610 DEBUG("exp->buf, ");
1611 memcpy(buf, &reply, sizeof(struct nbd_reply));
1612 if (client->transactionlogfd != -1)
1613 writeit(client->transactionlogfd, &reply, sizeof(reply));
1614 p = buf + sizeof(struct nbd_reply);
1615 writelen = currlen + sizeof(struct nbd_reply);
1617 if (expread(request.from, p, currlen, client)) {
1618 DEBUG("Read failed: %m");
1619 ERROR(client, reply, errno);
1623 DEBUG("buf->net, ");
1624 writeit(client->net, buf, writelen);
1626 request.from += currlen;
1627 currlen = (len < BUFSIZE) ? len : BUFSIZE;
1635 DEBUG ("Ignoring unknown command\n");
1643 * Set up client export array, which is an array of FILE_INFO.
1644 * Also, split a single exportfile into multiple ones, if that was asked.
1645 * @param client information on the client which we want to setup export for
1647 void setupexport(CLIENT* client) {
1649 off_t laststartoff = 0, lastsize = 0;
1650 int multifile = (client->server->flags & F_MULTIFILE);
1651 int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
1652 int cancreate = (client->server->expected_size) && !multifile;
1654 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1656 /* If multi-file, open as many files as we can.
1657 * If not, open exactly one file.
1658 * Calculate file sizes as we go to get total size. */
1662 gchar* error_string;
1666 /* if expected_size is specified, and this is the first file, we can create the file */
1667 mode_t mode = (client->server->flags & F_READONLY) ?
1668 O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
1671 tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
1672 DEBUG( "Opening %s\n", tmpname );
1673 fi.fhandle = mkstemp(tmpname);
1676 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1678 tmpname=g_strdup(client->exportname);
1680 DEBUG( "Opening %s\n", tmpname );
1681 fi.fhandle = open(tmpname, mode, 0x600);
1682 if(fi.fhandle == -1 && mode == O_RDWR) {
1683 /* Try again because maybe media was read-only */
1684 fi.fhandle = open(tmpname, O_RDONLY);
1685 if(fi.fhandle != -1) {
1686 /* Opening the base file in copyonwrite mode is
1688 if(!(client->server->flags & F_COPYONWRITE)) {
1689 client->server->flags |= F_AUTOREADONLY;
1690 client->server->flags |= F_READONLY;
1695 if(fi.fhandle == -1) {
1696 if(multifile && i>0)
1698 error_string=g_strdup_printf(
1699 "Could not open exported file %s: %%m",
1705 unlink(tmpname); /* File will stick around whilst FD open */
1707 fi.startoff = laststartoff + lastsize;
1708 g_array_append_val(client->export, fi);
1711 /* Starting offset and size of this file will be used to
1712 * calculate starting offset of next file */
1713 laststartoff = fi.startoff;
1714 lastsize = size_autodetect(fi.fhandle);
1716 /* If we created the file, it will be length zero */
1717 if (!lastsize && cancreate) {
1718 /* we can ignore errors as we recalculate the size */
1719 ftruncate (fi.fhandle, client->server->expected_size);
1720 lastsize = size_autodetect(fi.fhandle);
1721 if (lastsize != client->server->expected_size)
1722 err("Could not expand file");
1723 break; /* don't look for any more files */
1726 if(!multifile || temporary)
1730 /* Set export size to total calculated size */
1731 client->exportsize = laststartoff + lastsize;
1733 /* Export size may be overridden */
1734 if(client->server->expected_size) {
1735 /* desired size must be <= total calculated size */
1736 if(client->server->expected_size > client->exportsize) {
1737 err("Size of exported file is too big\n");
1740 client->exportsize = client->server->expected_size;
1743 msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1745 msg3(LOG_INFO, "Total number of files: %d", i);
1749 int copyonwrite_prepare(CLIENT* client) {
1751 if ((client->difffilename = malloc(1024))==NULL)
1752 err("Failed to allocate string for diff file name");
1753 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1755 client->difffilename[1023]='\0';
1756 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1757 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1758 if (client->difffile<0) err("Could not create diff file (%m)") ;
1759 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1760 err("Could not allocate memory") ;
1761 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1767 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1770 * @param command the command to be ran. Read from the config file
1771 * @param file the file name we're about to export
1773 int do_run(gchar* command, gchar* file) {
1777 if(command && *command) {
1778 cmd = g_strdup_printf(command, file);
1786 * Serve a connection.
1788 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1789 * follow the road map.
1791 * @param client a connected client
1793 void serveconnection(CLIENT *client) {
1794 if (client->server->transactionlog && (client->transactionlogfd == -1))
1796 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1798 S_IRUSR | S_IWUSR)))
1799 g_warning("Could not open transaction log %s",
1800 client->server->transactionlog);
1803 if(do_run(client->server->prerun, client->exportname)) {
1806 setupexport(client);
1808 if (client->server->flags & F_COPYONWRITE) {
1809 copyonwrite_prepare(client);
1812 setmysockopt(client->net);
1815 do_run(client->server->postrun, client->exportname);
1817 if (-1 != client->transactionlogfd)
1819 close(client->transactionlogfd);
1820 client->transactionlogfd = -1;
1825 * Find the name of the file we have to serve. This will use g_strdup_printf
1826 * to put the IP address of the client inside a filename containing
1827 * "%s" (in the form as specified by the "virtstyle" option). That name
1828 * is then written to client->exportname.
1830 * @param net A socket connected to an nbd client
1831 * @param client information about the client. The IP address in human-readable
1832 * format will be written to a new char* buffer, the address of which will be
1833 * stored in client->clientname.
1835 void set_peername(int net, CLIENT *client) {
1836 struct sockaddr_storage addrin;
1837 struct sockaddr_storage netaddr;
1838 struct sockaddr_in *netaddr4 = NULL;
1839 struct sockaddr_in6 *netaddr6 = NULL;
1840 size_t addrinlen = sizeof( addrin );
1841 struct addrinfo hints;
1842 struct addrinfo *ai = NULL;
1843 char peername[NI_MAXHOST];
1844 char netname[NI_MAXHOST];
1850 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1851 err("getsockname failed: %m");
1853 getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen,
1854 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST);
1856 memset(&hints, '\0', sizeof (hints));
1857 hints.ai_flags = AI_ADDRCONFIG;
1858 e = getaddrinfo(peername, NULL, &hints, &ai);
1861 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
1866 switch(client->server->virtstyle) {
1868 client->exportname=g_strdup(client->server->exportname);
1871 for(i=0;i<strlen(peername);i++) {
1872 if(peername[i]=='.') {
1877 client->exportname=g_strdup_printf(client->server->exportname, peername);
1880 memcpy(&netaddr, &addrin, addrinlen);
1881 if(ai->ai_family == AF_INET) {
1882 netaddr4 = (struct sockaddr_in *)&netaddr;
1883 (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
1884 (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
1886 getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen,
1887 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1888 tmp=g_strdup_printf("%s/%s", netname, peername);
1889 }else if(ai->ai_family == AF_INET6) {
1890 netaddr6 = (struct sockaddr_in6 *)&netaddr;
1892 shift = 128-(client->server->cidrlen);
1894 while(shift >= 32) {
1895 ((netaddr6->sin6_addr).s6_addr32[i])=0;
1899 (netaddr6->sin6_addr).s6_addr32[i]>>=shift;
1900 (netaddr6->sin6_addr).s6_addr32[i]<<=shift;
1902 getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen,
1903 netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
1904 tmp=g_strdup_printf("%s/%s", netname, peername);
1908 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1914 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1915 peername, client->exportname);
1916 client->clientname=g_strdup(peername);
1921 * @param data a pointer to pid_t which should be freed
1923 void destroy_pid_t(gpointer data) {
1928 * Loop through the available servers, and serve them. Never returns.
1930 int serveloop(GArray* servers) {
1931 struct sockaddr_storage addrin;
1932 socklen_t addrinlen=sizeof(addrin);
1940 * Set up the master fd_set. The set of descriptors we need
1941 * to select() for never changes anyway and it buys us a *lot*
1942 * of time to only build this once. However, if we ever choose
1943 * to not fork() for clients anymore, we may have to revisit
1948 for(i=0;i<servers->len;i++) {
1949 if((sock=(g_array_index(servers, SERVER, i)).socket)) {
1950 FD_SET(sock, &mset);
1951 max=sock>max?sock:max;
1955 FD_SET(modernsock, &mset);
1956 max=modernsock>max?modernsock:max;
1959 CLIENT *client = NULL;
1962 memcpy(&rset, &mset, sizeof(fd_set));
1963 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
1968 if(FD_ISSET(modernsock, &rset)) {
1969 if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1971 client = negotiate(net, NULL, servers, NEG_INIT | NEG_MODERN);
1973 err_nonfatal("negotiation failed");
1978 serve = client->server;
1980 for(i=0;i<servers->len && !net;i++) {
1981 serve=&(g_array_index(servers, SERVER, i));
1982 if(FD_ISSET(serve->socket, &rset)) {
1983 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1990 if(serve->max_connections > 0 &&
1991 g_hash_table_size(children) >= serve->max_connections) {
1992 msg2(LOG_INFO, "Max connections reached");
1996 if((sock_flags = fcntl(net, F_GETFL, 0))==-1) {
1997 err("fcntl F_GETFL");
1999 if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) {
2000 err("fcntl F_SETFL ~O_NONBLOCK");
2003 client = g_new0(CLIENT, 1);
2004 client->server=serve;
2005 client->exportsize=OFFT_MAX;
2007 client->transactionlogfd = -1;
2009 set_peername(net, client);
2010 if (!authorized_client(client)) {
2011 msg2(LOG_INFO,"Unauthorized client") ;
2015 msg2(LOG_INFO,"Authorized client") ;
2016 pid=g_malloc(sizeof(pid_t));
2019 if ((*pid=fork())<0) {
2020 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
2024 if (*pid>0) { /* parent */
2026 g_hash_table_insert(children, pid, pid);
2030 g_hash_table_destroy(children);
2031 for(i=0;i<servers->len;i++) {
2032 serve=&g_array_index(servers, SERVER, i);
2033 close(serve->socket);
2035 /* FALSE does not free the
2036 actual data. This is required,
2037 because the client has a
2038 direct reference into that
2039 data, and otherwise we get a
2041 g_array_free(servers, FALSE);
2044 msg2(LOG_INFO,"Starting to serve");
2045 serveconnection(client);
2052 void dosockopts(int socket) {
2060 /* lose the pesky "Address already in use" error message */
2061 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
2062 err("setsockopt SO_REUSEADDR");
2064 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
2065 err("setsockopt SO_KEEPALIVE");
2068 /* make the listening socket non-blocking */
2069 if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
2070 err("fcntl F_GETFL");
2072 if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
2073 err("fcntl F_SETFL O_NONBLOCK");
2078 * Connect a server's socket.
2080 * @param serve the server we want to connect.
2082 int setup_serve(SERVER *serve) {
2083 struct addrinfo hints;
2084 struct addrinfo *ai = NULL;
2089 return serve->servename ? 1 : 0;
2091 memset(&hints,'\0',sizeof(hints));
2092 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2093 hints.ai_socktype = SOCK_STREAM;
2094 hints.ai_family = serve->socket_family;
2096 port = g_strdup_printf ("%d", serve->port);
2100 e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2105 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2111 if(serve->socket_family == AF_UNSPEC)
2112 serve->socket_family = ai->ai_family;
2115 if ((serve->flags) && F_SDP) {
2116 if (ai->ai_family == AF_INET)
2117 ai->ai_family = AF_INET_SDP;
2118 else (ai->ai_family == AF_INET6)
2119 ai->ai_family = AF_INET6_SDP;
2122 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
2125 dosockopts(serve->socket);
2127 DEBUG("Waiting for connections... bind, ");
2128 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2129 if (e != 0 && errno != EADDRINUSE)
2132 if (listen(serve->socket, 1) < 0)
2136 if(serve->servename) {
2143 void open_modern(void) {
2144 struct addrinfo hints;
2145 struct addrinfo* ai = NULL;
2149 memset(&hints, '\0', sizeof(hints));
2150 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2151 hints.ai_socktype = SOCK_STREAM;
2152 hints.ai_family = AF_UNSPEC;
2153 hints.ai_protocol = IPPROTO_TCP;
2154 e = getaddrinfo(modern_listen, modernport, &hints, &ai);
2156 fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
2159 if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2163 dosockopts(modernsock);
2165 if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
2168 if(listen(modernsock, 10) <0) {
2176 * Connect our servers.
2178 void setup_servers(GArray* servers) {
2180 struct sigaction sa;
2183 for(i=0;i<servers->len;i++) {
2184 want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
2189 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2191 sa.sa_handler = sigchld_handler;
2192 sigemptyset(&sa.sa_mask);
2193 sa.sa_flags = SA_RESTART;
2194 if(sigaction(SIGCHLD, &sa, NULL) == -1)
2195 err("sigaction: %m");
2196 sa.sa_handler = sigterm_handler;
2197 sigemptyset(&sa.sa_mask);
2198 sa.sa_flags = SA_RESTART;
2199 if(sigaction(SIGTERM, &sa, NULL) == -1)
2200 err("sigaction: %m");
2204 * Go daemon (unless we specified at compile time that we didn't want this)
2205 * @param serve the first server of our configuration. If its port is zero,
2206 * then do not daemonize, because we're doing inetd then. This parameter
2207 * is only used to create a PID file of the form
2208 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
2210 #if !defined(NODAEMON)
2211 void daemonize(SERVER* serve) {
2214 if(serve && !(serve->port)) {
2220 if(!*pidftemplate) {
2222 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2224 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2227 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2228 pidf=fopen(pidfname, "w");
2230 fprintf(pidf,"%d\n", (int)getpid());
2234 fprintf(stderr, "Not fatal; continuing");
2238 #define daemonize(serve)
2239 #endif /* !defined(NODAEMON) */
2242 * Everything beyond this point (in the file) is run in non-daemon mode.
2243 * The stuff above daemonize() isn't.
2246 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
2248 void serve_err(SERVER* serve, const char* msg) {
2249 g_message("Export of %s on port %d failed:", serve->exportname,
2255 * Set up user-ID and/or group-ID
2257 void dousers(void) {
2262 gr=getgrnam(rungroup);
2264 str = g_strdup_printf("Invalid group name: %s", rungroup);
2267 if(setgid(gr->gr_gid)<0) {
2268 err("Could not set GID: %m");
2272 pw=getpwnam(runuser);
2274 str = g_strdup_printf("Invalid user name: %s", runuser);
2277 if(setuid(pw->pw_uid)<0) {
2278 err("Could not set UID: %m");
2284 void glib_message_syslog_redirect(const gchar *log_domain,
2285 GLogLevelFlags log_level,
2286 const gchar *message,
2289 int level=LOG_DEBUG;
2293 case G_LOG_FLAG_FATAL:
2294 case G_LOG_LEVEL_CRITICAL:
2295 case G_LOG_LEVEL_ERROR:
2298 case G_LOG_LEVEL_WARNING:
2301 case G_LOG_LEVEL_MESSAGE:
2302 case G_LOG_LEVEL_INFO:
2305 case G_LOG_LEVEL_DEBUG:
2311 syslog(level, "%s", message);
2316 * Main entry point...
2318 int main(int argc, char *argv[]) {
2323 if (sizeof( struct nbd_request )!=28) {
2324 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2325 exit(EXIT_FAILURE) ;
2328 memset(pidftemplate, '\0', 256);
2331 config_file_pos = g_strdup(CFILE);
2332 serve=cmdline(argc, argv);
2333 servers = parse_cfile(config_file_pos, &err);
2336 serve->socket_family = AF_UNSPEC;
2338 append_serve(serve, servers);
2340 if (!(serve->port)) {
2343 /* You really should define ISSERVER if you're going to use
2344 * inetd mode, but if you don't, closing stdout and stderr
2345 * (which inetd had connected to the client socket) will let it
2349 open("/dev/null", O_WRONLY);
2350 open("/dev/null", O_WRONLY);
2351 g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2353 client=g_malloc(sizeof(CLIENT));
2354 client->server=serve;
2356 client->exportsize=OFFT_MAX;
2357 set_peername(0,client);
2358 serveconnection(client);
2363 if(!servers || !servers->len) {
2364 if(err && !(err->domain == g_quark_from_string("parse_cfile")
2365 && err->code == CFILE_NOTFOUND)) {
2366 g_warning("Could not parse config file: %s",
2367 err ? err->message : "Unknown error");
2371 g_warning("Specifying an export on the command line is deprecated.");
2372 g_warning("Please use a configuration file instead.");
2375 if((!serve) && (!servers||!servers->len)) {
2376 g_message("No configured exports; quitting.");
2381 setup_servers(servers);