2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/select.h> /* select */
62 #include <sys/wait.h> /* wait */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
66 #include <sys/param.h>
67 #ifdef HAVE_SYS_MOUNT_H
68 #include <sys/mount.h> /* For BLKGETSIZE */
70 #include <signal.h> /* sigaction */
72 #include <netinet/tcp.h>
73 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
74 #include <netdb.h> /* hostent, gethostby*, getservby* */
81 #include <arpa/inet.h>
91 /* used in cliserv.h, so must come first */
92 #define MY_NAME "nbd_server"
95 /** Default position of the config file */
97 #define SYSCONFDIR "/etc"
99 #define CFILE SYSCONFDIR "/nbd-server/config"
101 /** Where our config file actually is */
102 gchar* config_file_pos;
104 /** What user we're running as */
106 /** What group we're running as */
107 gchar* rungroup=NULL;
109 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
111 #define msg2(a,b) syslog(a,b)
112 #define msg3(a,b,c) syslog(a,b,c)
113 #define msg4(a,b,c,d) syslog(a,b,c,d)
115 #define msg2(a,b) g_message(b)
116 #define msg3(a,b,c) g_message(b,c)
117 #define msg4(a,b,c,d) g_message(b,c,d)
120 /* Debugging macros */
123 #define DEBUG( a ) printf( a )
124 #define DEBUG2( a,b ) printf( a,b )
125 #define DEBUG3( a,b,c ) printf( a,b,c )
126 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
129 #define DEBUG2( a,b )
130 #define DEBUG3( a,b,c )
131 #define DEBUG4( a,b,c,d )
133 #ifndef PACKAGE_VERSION
134 #define PACKAGE_VERSION ""
137 * The highest value a variable of type off_t can reach. This is a signed
138 * integer, so set all bits except for the leftmost one.
140 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
141 #define LINELEN 256 /**< Size of static buffer used to read the
142 authorization file (yuck) */
143 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
144 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
145 #define F_READONLY 1 /**< flag to tell us a file is readonly */
146 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
147 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
149 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
150 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
151 #define F_SDP /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
152 GHashTable *children;
153 char pidfname[256]; /**< name of our PID file */
154 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
155 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
158 * Types of virtuatlization
161 VIRT_NONE=0, /**< No virtualization */
162 VIRT_IPLIT, /**< Literal IP address as part of the filename */
163 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
164 doing the same as in IPLIT */
165 VIRT_CIDR, /**< Every subnet in its own directory */
169 * Variables associated with a server.
172 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
173 off_t expected_size; /**< size of the exported file as it was told to
174 us through configuration */
175 unsigned int port; /**< port we're exporting this file at */
176 char* authname; /**< filename of the authorization file */
177 int flags; /**< flags associated with this exported file */
178 unsigned int timeout;/**< how long a connection may be idle
180 int socket; /**< The socket of this server. */
181 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
182 uint8_t cidrlen; /**< The length of the mask when we use
183 CIDR-style virtualization */
184 gchar* prerun; /**< command to be ran after connecting a client,
185 but before starting to serve */
186 gchar* postrun; /**< command that will be ran after the client
191 * Variables associated with a client socket.
194 int fhandle; /**< file descriptor */
195 off_t startoff; /**< starting offset of this file */
199 off_t exportsize; /**< size of the file we're exporting */
200 char *clientname; /**< peer */
201 char *exportname; /**< (processed) filename of the file we're exporting */
202 GArray *export; /**< array of FILE_INFO of exported files;
203 array size is always 1 unless we're
204 doing the multiple file option */
205 int net; /**< The actual client socket */
206 SERVER *server; /**< The server this client is getting data from */
207 char* difffilename; /**< filename of the copy-on-write file, if any */
208 int difffile; /**< filedescriptor of copyonwrite file. @todo
209 shouldn't this be an array too? (cfr export) Or
210 make -m and -c mutually exclusive */
211 u32 difffilelen; /**< number of pages in difffile */
212 u32 *difmap; /**< see comment on the global difmap for this one */
216 * Type of configuration file values
219 PARAM_INT, /**< This parameter is an integer */
220 PARAM_STRING, /**< This parameter is a string */
221 PARAM_BOOL, /**< This parameter is a boolean */
225 * Configuration file values
228 gchar *paramname; /**< Name of the parameter, as it appears in
230 gboolean required; /**< Whether this is a required (as opposed to
231 optional) parameter */
232 PARAM_TYPE ptype; /**< Type of the parameter. */
233 gpointer target; /**< Pointer to where the data of this
234 parameter should be written. If ptype is
235 PARAM_BOOL, the data is or'ed rather than
237 gint flagval; /**< Flag mask for this parameter in case ptype
242 * Check whether a client is allowed to connect. Works with an authorization
243 * file which contains one line per machine, no wildcards.
245 * @param opts The client who's trying to connect.
246 * @return 0 - authorization refused, 1 - OK
248 int authorized_client(CLIENT *opts) {
249 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
254 struct in_addr client;
255 struct in_addr cltemp;
258 if ((f=fopen(opts->server->authname,"r"))==NULL) {
259 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
260 opts->server->authname,strerror(errno)) ;
264 inet_aton(opts->clientname, &client);
265 while (fgets(line,LINELEN,f)!=NULL) {
266 if((tmp=index(line, '/'))) {
267 if(strlen(line)<=tmp-line) {
268 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
272 if(inet_aton(line,&addr)) {
273 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
276 len=strtol(tmp, NULL, 0);
277 addr.s_addr>>=32-len;
278 addr.s_addr<<=32-len;
279 memcpy(&cltemp,&client,sizeof(client));
280 cltemp.s_addr>>=32-len;
281 cltemp.s_addr<<=32-len;
282 if(addr.s_addr == cltemp.s_addr) {
286 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
296 * Read data from a file descriptor into a buffer
298 * @param f a file descriptor
299 * @param buf a buffer
300 * @param len the number of bytes to be read
302 inline void readit(int f, void *buf, size_t len) {
306 if ((res = read(f, buf, len)) <= 0)
307 err("Read failed: %m");
314 * Write data from a buffer into a filedescriptor
316 * @param f a file descriptor
317 * @param buf a buffer containing data
318 * @param len the number of bytes to be written
320 inline void writeit(int f, void *buf, size_t len) {
324 if ((res = write(f, buf, len)) <= 0)
325 err("Send failed: %m");
332 * Print out a message about how to use nbd-server. Split out to a separate
333 * function so that we can call it from multiple places
336 printf("This is nbd-server version " VERSION "\n");
337 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file] [-p PID file name] [-o section name]\n"
338 "\t-r|--read-only\t\tread only\n"
339 "\t-m|--multi-file\t\tmultiple file\n"
340 "\t-c|--copy-on-write\tcopy on write\n"
341 "\t-C|--config-file\tspecify an alternate configuration file\n"
342 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
343 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n"
344 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
345 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
346 "\tif port is set to 0, stdin is used (for running from inetd)\n"
347 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
348 "\t\taddress of the machine trying to connect\n" );
349 printf("Using configuration file %s\n", CFILE);
352 /* Dumps a config file section of the given SERVER*, and exits. */
353 void dump_section(SERVER* serve, gchar* section_header) {
354 printf("[%s]\n", section_header);
355 printf("\texportname = %s\n", serve->exportname);
356 printf("\tport = %d\n", serve->port);
357 if(serve->flags & F_READONLY) {
358 printf("\treadonly = true\n");
360 if(serve->flags & F_MULTIFILE) {
361 printf("\tmultifile = true\n");
363 if(serve->flags & F_COPYONWRITE) {
364 printf("\tcopyonwrite = true\n");
366 if(serve->expected_size) {
367 printf("\tfilesize = %Ld\n", (long long int)serve->expected_size);
369 if(serve->authname) {
370 printf("\tauthfile = %s\n", serve->authname);
373 printf("\ttimeout = %d\n", serve->timeout);
379 * Parse the command line.
381 * @param argc the argc argument to main()
382 * @param argv the argv argument to main()
384 SERVER* cmdline(int argc, char *argv[]) {
388 struct option long_options[] = {
389 {"read-only", no_argument, NULL, 'r'},
390 {"multi-file", no_argument, NULL, 'm'},
391 {"copy-on-write", no_argument, NULL, 'c'},
392 {"authorize-file", required_argument, NULL, 'l'},
393 {"idle-time", required_argument, NULL, 'a'},
394 {"config-file", required_argument, NULL, 'C'},
395 {"pid-file", required_argument, NULL, 'p'},
396 {"output-config", required_argument, NULL, 'o'},
403 gboolean do_output=FALSE;
404 gchar* section_header;
409 serve=g_new0(SERVER, 1);
410 serve->authname = g_strdup(default_authname);
411 serve->virtstyle=VIRT_IPLIT;
412 while((c=getopt_long(argc, argv, "-a:C:cl:mo:rp:", long_options, &i))>=0) {
415 /* non-option argument */
416 switch(nonspecial++) {
418 serve->port=strtol(optarg, NULL, 0);
421 serve->exportname = g_strdup(optarg);
422 if(serve->exportname[0] != '/') {
423 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
428 last=strlen(optarg)-1;
430 if (suffix == 'k' || suffix == 'K' ||
431 suffix == 'm' || suffix == 'M')
433 es = (off_t)atol(optarg);
441 serve->expected_size = es;
446 serve->flags |= F_READONLY;
449 serve->flags |= F_MULTIFILE;
453 section_header = g_strdup(optarg);
456 strncpy(pidftemplate, optarg, 256);
459 serve->flags |=F_COPYONWRITE;
462 g_free(config_file_pos);
463 config_file_pos=g_strdup(optarg);
466 g_free(serve->authname);
467 serve->authname=g_strdup(optarg);
470 serve->timeout=strtol(optarg, NULL, 0);
478 /* What's left: the port to export, the name of the to be exported
479 * file, and, optionally, the size of the file, in that order. */
486 g_critical("Need a complete configuration on the command line to output a config file section!");
489 dump_section(serve, section_header);
495 * Error codes for config file parsing
498 CFILE_NOTFOUND, /**< The configuration file is not found */
499 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
500 CFILE_KEY_MISSING, /**< A (required) key is missing */
501 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
502 CFILE_PROGERR /**< Programmer error */
506 * Remove a SERVER from memory. Used from the hash table
508 void remove_server(gpointer s) {
512 g_free(server->exportname);
514 g_free(server->authname);
519 * Parse the config file.
521 * @param f the name of the config file
522 * @param e a GError. @see CFILE_ERRORS for what error values this function can
524 * @return a Array of SERVER* pointers, If the config file is empty or does not
525 * exist, returns an empty GHashTable; if the config file contains an
526 * error, returns NULL, and e is set appropriately
528 GArray* parse_cfile(gchar* f, GError** e) {
529 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
530 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
532 gchar *virtstyle=NULL;
534 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
535 { "port", TRUE, PARAM_INT, NULL, 0 },
536 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
537 { "timeout", FALSE, PARAM_INT, NULL, 0 },
538 { "filesize", FALSE, PARAM_INT, NULL, 0 },
539 { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
540 { "prerun", FALSE, PARAM_STRING, NULL, 0 },
541 { "postrun", FALSE, PARAM_STRING, NULL, 0 },
542 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
543 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
544 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
545 { "autoreadonly", FALSE, PARAM_BOOL, NULL, F_AUTOREADONLY },
546 { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
547 { "sdp", FALSE, PARAM_BOOL, NULL, F_SDP },
549 const int lp_size=14;
551 { "user", FALSE, PARAM_STRING, &runuser, 0 },
552 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
558 const char *err_msg=NULL;
566 errdomain = g_quark_from_string("parse_cfile");
567 cfile = g_key_file_new();
568 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
569 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
570 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
571 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
572 g_key_file_free(cfile);
575 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
576 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
577 g_key_file_free(cfile);
580 groups = g_key_file_get_groups(cfile, NULL);
581 for(i=0;groups[i];i++) {
582 memset(&s, '\0', sizeof(SERVER));
583 lp[0].target=&(s.exportname);
584 lp[1].target=&(s.port);
585 lp[2].target=&(s.authname);
586 lp[3].target=&(s.timeout);
587 lp[4].target=&(s.expected_size);
588 lp[5].target=&(virtstyle);
589 lp[6].target=&(s.prerun);
590 lp[7].target=&(s.postrun);
591 lp[8].target=lp[9].target=lp[10].target=
592 lp[11].target=lp[12].target=
593 lp[13].target=&(s.flags);
595 /* After the [generic] group, start parsing exports */
600 for(j=0;j<p_size;j++) {
601 g_assert(p[j].target != NULL);
602 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
605 *((gint*)p[j].target) =
606 g_key_file_get_integer(cfile,
612 *((gchar**)p[j].target) =
613 g_key_file_get_string(cfile,
619 value = g_key_file_get_boolean(cfile,
621 p[j].paramname, &err);
624 *((gint*)p[j].target) |= p[j].flagval;
626 *((gint*)p[j].target) &= ~(p[j].flagval);
632 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
634 /* Ignore not-found error for optional values */
638 err_msg = MISSING_REQUIRED_ERROR;
641 err_msg = DEFAULT_ERROR;
643 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
644 g_array_free(retval, TRUE);
646 g_key_file_free(cfile);
651 if(!strncmp(virtstyle, "none", 4)) {
652 s.virtstyle=VIRT_NONE;
653 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
654 s.virtstyle=VIRT_IPLIT;
655 } else if(!strncmp(virtstyle, "iphash", 6)) {
656 s.virtstyle=VIRT_IPHASH;
657 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
658 s.virtstyle=VIRT_CIDR;
659 if(strlen(virtstyle)<10) {
660 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
661 g_array_free(retval, TRUE);
662 g_key_file_free(cfile);
665 s.cidrlen=strtol(virtstyle+8, NULL, 0);
667 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
668 g_array_free(retval, TRUE);
669 g_key_file_free(cfile);
673 s.virtstyle=VIRT_IPLIT;
675 /* Don't need to free this, it's not our string */
677 /* Don't append values for the [generic] group */
679 g_array_append_val(retval, s);
682 if(s.flags && F_SDP) {
683 g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
684 g_array_free(retval, TRUE);
685 g_key_file_free(cfile);
694 * Signal handler for SIGCHLD
695 * @param s the signal we're handling (must be SIGCHLD, or something
698 void sigchld_handler(int s) {
703 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
704 if(WIFEXITED(status)) {
705 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
707 i=g_hash_table_lookup(children, &pid);
709 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
711 DEBUG2("Removing %d from the list of children", pid);
712 g_hash_table_remove(children, &pid);
718 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
721 * @param value the value corresponding to the above key
722 * @param user_data a pointer which we always set to 1, so that we know what
725 void killchild(gpointer key, gpointer value, gpointer user_data) {
727 int *parent=user_data;
734 * Handle SIGTERM and dispatch it to our children
735 * @param s the signal we're handling (must be SIGTERM, or something
736 * is severely wrong).
738 void sigterm_handler(int s) {
741 g_hash_table_foreach(children, killchild, &parent);
751 * Detect the size of a file.
753 * @param fhandle An open filedescriptor
754 * @return the size of the file, or OFFT_MAX if detection was
757 off_t size_autodetect(int fhandle) {
759 unsigned long sectors;
760 struct stat stat_buf;
763 #ifdef HAVE_SYS_MOUNT_H
764 #ifdef HAVE_SYS_IOCTL_H
766 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
767 if (!ioctl(fhandle, BLKGETSIZE, §ors) && sectors) {
768 es = (off_t)sectors * (off_t)512;
771 #endif /* BLKGETSIZE */
772 #endif /* HAVE_SYS_IOCTL_H */
773 #endif /* HAVE_SYS_MOUNT_H */
775 DEBUG("looking for fhandle size with fstat\n");
776 stat_buf.st_size = 0;
777 error = fstat(fhandle, &stat_buf);
779 if(stat_buf.st_size > 0)
780 return (off_t)stat_buf.st_size;
782 err("fstat failed: %m");
785 DEBUG("looking for fhandle size with lseek SEEK_END\n");
786 es = lseek(fhandle, (off_t)0, SEEK_END);
787 if (es > ((off_t)0)) {
790 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
793 err("Could not find size of exported block device: %m");
798 * Get the file handle and offset, given an export offset.
800 * @param export An array of export files
801 * @param a The offset to get corresponding file/offset for
802 * @param fhandle [out] File descriptor
803 * @param foffset [out] Offset into fhandle
804 * @param maxbytes [out] Tells how many bytes can be read/written
805 * from fhandle starting at foffset (0 if there is no limit)
806 * @return 0 on success, -1 on failure
808 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
809 /* Negative offset not allowed */
813 /* Binary search for last file with starting offset <= a */
816 int end = export->len - 1;
817 while( start <= end ) {
818 int mid = (start + end) / 2;
819 fi = g_array_index(export, FILE_INFO, mid);
820 if( fi.startoff < a ) {
822 } else if( fi.startoff > a ) {
830 /* end should never go negative, since first startoff is 0 and a >= 0 */
833 fi = g_array_index(export, FILE_INFO, end);
834 *fhandle = fi.fhandle;
835 *foffset = a - fi.startoff;
837 if( end+1 < export->len ) {
838 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
839 *maxbytes = fi_next.startoff - a;
846 * seek to a position in a file, with error handling.
847 * @param handle a filedescriptor
848 * @param a position to seek to
849 * @todo get rid of this; lastpoint is a global variable right now, but it
850 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
853 void myseek(int handle,off_t a) {
854 if (lseek(handle, a, SEEK_SET) < 0) {
855 err("Can not seek locally!\n");
860 * Write an amount of bytes at a given offset to the right file. This
861 * abstracts the write-side of the multiple file option.
863 * @param a The offset where the write should start
864 * @param buf The buffer to write from
865 * @param len The length of buf
866 * @param client The client we're serving for
867 * @return The number of bytes actually written, or -1 in case of an error
869 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
874 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
876 if(maxbytes && len > maxbytes)
879 DEBUG4("(WRITE to fd %d offset %Lu len %u), ", fhandle, foffset, len);
881 myseek(fhandle, foffset);
882 return write(fhandle, buf, len);
886 * Call rawexpwrite repeatedly until all data has been written.
887 * @return 0 on success, nonzero on failure
889 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
892 while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
897 return (ret < 0 || len != 0);
901 * Read an amount of bytes at a given offset from the right file. This
902 * abstracts the read-side of the multiple files option.
904 * @param a The offset where the read should start
905 * @param buf A buffer to read into
906 * @param len The size of buf
907 * @param client The client we're serving for
908 * @return The number of bytes actually read, or -1 in case of an
911 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
916 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
918 if(maxbytes && len > maxbytes)
921 DEBUG4("(READ from fd %d offset %Lu len %u), ", fhandle, foffset, len);
923 myseek(fhandle, foffset);
924 return read(fhandle, buf, len);
928 * Call rawexpread repeatedly until all data has been read.
929 * @return 0 on success, nonzero on failure
931 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
934 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
939 return (ret < 0 || len != 0);
943 * Read an amount of bytes at a given offset from the right file. This
944 * abstracts the read-side of the copyonwrite stuff, and calls
945 * rawexpread() with the right parameters to do the actual work.
946 * @param a The offset where the read should start
947 * @param buf A buffer to read into
948 * @param len The size of buf
949 * @param client The client we're going to read for
950 * @return 0 on success, nonzero on failure
952 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
954 off_t mapcnt, mapl, maph, pagestart;
956 if (!(client->server->flags & F_COPYONWRITE))
957 return(rawexpread_fully(a, buf, len, client));
958 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
960 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
962 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
963 pagestart=mapcnt*DIFFPAGESIZE;
965 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
966 len : (size_t)DIFFPAGESIZE-offset;
967 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
968 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
969 (unsigned long)(client->difmap[mapcnt]));
970 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
971 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
972 } else { /* the block is not there */
973 DEBUG2("Page %Lu is not here, we read the original one\n",
974 (unsigned long long)mapcnt);
975 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
977 len-=rdlen; a+=rdlen; buf+=rdlen;
983 * Write an amount of bytes at a given offset to the right file. This
984 * abstracts the write-side of the copyonwrite option, and calls
985 * rawexpwrite() with the right parameters to do the actual work.
987 * @param a The offset where the write should start
988 * @param buf The buffer to write from
989 * @param len The length of buf
990 * @param client The client we're going to write for.
991 * @return 0 on success, nonzero on failure
993 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
994 char pagebuf[DIFFPAGESIZE];
995 off_t mapcnt,mapl,maph;
1000 if (!(client->server->flags & F_COPYONWRITE))
1001 return(rawexpwrite_fully(a, buf, len, client));
1002 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
1004 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1006 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1007 pagestart=mapcnt*DIFFPAGESIZE ;
1008 offset=a-pagestart ;
1009 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1010 len : (size_t)DIFFPAGESIZE-offset;
1012 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1013 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
1014 (unsigned long)(client->difmap[mapcnt])) ;
1015 myseek(client->difffile,
1016 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1017 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1018 } else { /* the block is not there */
1019 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1020 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1021 DEBUG3("Page %Lu is not here, we put it at %lu\n",
1022 (unsigned long long)mapcnt,
1023 (unsigned long)(client->difmap[mapcnt]));
1024 rdlen=DIFFPAGESIZE ;
1025 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1027 memcpy(pagebuf+offset,buf,wrlen) ;
1028 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1032 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1038 * Do the initial negotiation.
1040 * @param client The client we're negotiating with.
1042 void negotiate(CLIENT *client) {
1045 u32 flags = NBD_FLAG_HAS_FLAGS;
1047 memset(zeros, '\0', sizeof(zeros));
1048 if (write(client->net, INIT_PASSWD, 8) < 0)
1049 err("Negotiation failed: %m");
1050 cliserv_magic = htonll(cliserv_magic);
1051 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
1052 err("Negotiation failed: %m");
1053 size_host = htonll((u64)(client->exportsize));
1054 if (write(client->net, &size_host, 8) < 0)
1055 err("Negotiation failed: %m");
1056 if (client->server->flags & F_READONLY)
1057 flags |= NBD_FLAG_READ_ONLY;
1058 flags = htonl(flags);
1059 if (write(client->net, &flags, 4) < 0)
1060 err("Negotiation failed: %m");
1061 if (write(client->net, zeros, 124) < 0)
1062 err("Negotiation failed: %m");
1065 /** sending macro. */
1066 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1068 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1070 * Serve a file to a single client.
1072 * @todo This beast needs to be split up in many tiny little manageable
1073 * pieces. Preferably with a chainsaw.
1075 * @param client The client we're going to serve to.
1076 * @return when the client disconnects
1078 int mainloop(CLIENT *client) {
1079 struct nbd_request request;
1080 struct nbd_reply reply;
1081 gboolean go_on=TRUE;
1086 DEBUG("Entering request loop!\n");
1087 reply.magic = htonl(NBD_REPLY_MAGIC);
1096 if (client->server->timeout)
1097 alarm(client->server->timeout);
1098 readit(client->net, &request, sizeof(request));
1099 request.from = ntohll(request.from);
1100 request.type = ntohl(request.type);
1102 if (request.type==NBD_CMD_DISC) {
1103 msg2(LOG_INFO, "Disconnect request received.");
1104 if (client->server->flags & F_COPYONWRITE) {
1105 if (client->difmap) g_free(client->difmap) ;
1106 close(client->difffile);
1107 unlink(client->difffilename);
1108 free(client->difffilename);
1114 len = ntohl(request.len);
1116 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1117 err("Not enough magic.");
1118 if (len > BUFSIZE + sizeof(struct nbd_reply))
1119 err("Request too big!");
1121 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
1122 "READ", (unsigned long long)request.from,
1123 (unsigned long long)request.from / 512, len);
1125 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1126 if ((request.from + len) > (OFFT_MAX)) {
1127 DEBUG("[Number too large!]");
1128 ERROR(client, reply, EINVAL);
1132 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1134 ERROR(client, reply, EINVAL);
1138 if (request.type==NBD_CMD_WRITE) {
1139 DEBUG("wr: net->buf, ");
1140 readit(client->net, buf, len);
1141 DEBUG("buf->exp, ");
1142 if ((client->server->flags & F_READONLY) ||
1143 (client->server->flags & F_AUTOREADONLY)) {
1144 DEBUG("[WRITE to READONLY!]");
1145 ERROR(client, reply, EPERM);
1148 if (expwrite(request.from, buf, len, client)) {
1149 DEBUG("Write failed: %m" );
1150 ERROR(client, reply, errno);
1153 SEND(client->net, reply);
1159 DEBUG("exp->buf, ");
1160 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1161 DEBUG("Read failed: %m");
1162 ERROR(client, reply, errno);
1166 DEBUG("buf->net, ");
1167 memcpy(buf, &reply, sizeof(struct nbd_reply));
1168 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1175 * Set up client export array, which is an array of FILE_INFO.
1176 * Also, split a single exportfile into multiple ones, if that was asked.
1177 * @param client information on the client which we want to setup export for
1179 void setupexport(CLIENT* client) {
1181 off_t laststartoff = 0, lastsize = 0;
1182 int multifile = (client->server->flags & F_MULTIFILE);
1184 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1186 /* If multi-file, open as many files as we can.
1187 * If not, open exactly one file.
1188 * Calculate file sizes as we go to get total size. */
1192 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1195 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1197 tmpname=g_strdup(client->exportname);
1199 DEBUG2( "Opening %s\n", tmpname );
1200 fi.fhandle = open(tmpname, mode);
1201 if(fi.fhandle == -1 && mode == O_RDWR) {
1202 /* Try again because maybe media was read-only */
1203 fi.fhandle = open(tmpname, O_RDONLY);
1204 if(fi.fhandle != -1) {
1205 client->server->flags |= F_AUTOREADONLY;
1206 client->server->flags |= F_READONLY;
1209 if(fi.fhandle == -1) {
1210 if(multifile && i>0)
1212 err("Could not open exported file: %m");
1214 fi.startoff = laststartoff + lastsize;
1215 g_array_append_val(client->export, fi);
1218 /* Starting offset and size of this file will be used to
1219 * calculate starting offset of next file */
1220 laststartoff = fi.startoff;
1221 lastsize = size_autodetect(fi.fhandle);
1227 /* Set export size to total calculated size */
1228 client->exportsize = laststartoff + lastsize;
1230 /* Export size may be overridden */
1231 if(client->server->expected_size) {
1232 /* desired size must be <= total calculated size */
1233 if(client->server->expected_size > client->exportsize) {
1234 err("Size of exported file is too big\n");
1237 client->exportsize = client->server->expected_size;
1240 msg3(LOG_INFO, "Size of exported file/device is %Lu", (unsigned long long)client->exportsize);
1242 msg3(LOG_INFO, "Total number of files: %d", i);
1246 int copyonwrite_prepare(CLIENT* client) {
1248 if ((client->difffilename = malloc(1024))==NULL)
1249 err("Failed to allocate string for diff file name");
1250 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1252 client->difffilename[1023]='\0';
1253 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1254 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1255 if (client->difffile<0) err("Could not create diff file (%m)") ;
1256 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1257 err("Could not allocate memory") ;
1258 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1264 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1267 * @param command the command to be ran. Read from the config file
1268 * @param file the file name we're about to export
1270 int do_run(gchar* command, gchar* file) {
1274 if(command && *command) {
1275 cmd = g_strdup_printf(command, file);
1283 * Serve a connection.
1285 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1286 * follow the road map.
1288 * @param client a connected client
1290 void serveconnection(CLIENT *client) {
1291 if(do_run(client->server->prerun, client->exportname)) {
1294 setupexport(client);
1296 if (client->server->flags & F_COPYONWRITE) {
1297 copyonwrite_prepare(client);
1300 setmysockopt(client->net);
1303 do_run(client->server->postrun, client->exportname);
1307 * Find the name of the file we have to serve. This will use g_strdup_printf
1308 * to put the IP address of the client inside a filename containing
1309 * "%s" (in the form as specified by the "virtstyle" option). That name
1310 * is then written to client->exportname.
1312 * @param net A socket connected to an nbd client
1313 * @param client information about the client. The IP address in human-readable
1314 * format will be written to a new char* buffer, the address of which will be
1315 * stored in client->clientname.
1317 void set_peername(int net, CLIENT *client) {
1318 struct sockaddr_in addrin;
1319 struct sockaddr_in netaddr;
1320 size_t addrinlen = sizeof( addrin );
1326 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1327 err("getsockname failed: %m");
1328 peername = g_strdup(inet_ntoa(addrin.sin_addr));
1329 switch(client->server->virtstyle) {
1331 client->exportname=g_strdup(client->server->exportname);
1334 for(i=0;i<strlen(peername);i++) {
1335 if(peername[i]=='.') {
1340 client->exportname=g_strdup_printf(client->server->exportname, peername);
1343 memcpy(&netaddr, &addrin, addrinlen);
1344 netaddr.sin_addr.s_addr>>=32-(client->server->cidrlen);
1345 netaddr.sin_addr.s_addr<<=32-(client->server->cidrlen);
1346 netname = inet_ntoa(netaddr.sin_addr);
1347 tmp=g_strdup_printf("%s/%s", netname, peername);
1348 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1352 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1353 peername, client->exportname);
1354 client->clientname=g_strdup(peername);
1360 * @param data a pointer to pid_t which should be freed
1362 void destroy_pid_t(gpointer data) {
1367 * Go daemon (unless we specified at compile time that we didn't want this)
1368 * @param serve the first server of our configuration. If its port is zero,
1369 * then do not daemonize, because we're doing inetd then. This parameter
1370 * is only used to create a PID file of the form
1371 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1373 #if !defined(NODAEMON) && !defined(NOFORK)
1374 void daemonize(SERVER* serve) {
1377 if(serve && !(serve->port)) {
1383 if(!*pidftemplate) {
1385 strncpy(pidftemplate, "/var/run/server.%d.pid", 255);
1387 strncpy(pidftemplate, "/var/run/server.pid", 255);
1390 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1391 pidf=fopen(pidfname, "w");
1393 fprintf(pidf,"%d\n", (int)getpid());
1397 fprintf(stderr, "Not fatal; continuing");
1401 #define daemonize(serve)
1402 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1405 * Connect a server's socket.
1407 * @param serve the server we want to connect.
1409 void setup_serve(SERVER *serve) {
1410 struct sockaddr_in addrin;
1411 struct sigaction sa;
1412 int addrinlen = sizeof(addrin);
1423 if ((serve->flags) && F_SDP) {
1427 if ((serve->socket = socket(af, SOCK_STREAM, IPPROTO_TCP)) < 0)
1430 /* lose the pesky "Address already in use" error message */
1431 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1432 err("setsockopt SO_REUSEADDR");
1434 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1435 err("setsockopt SO_KEEPALIVE");
1438 /* make the listening socket non-blocking */
1439 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1440 err("fcntl F_GETFL");
1442 if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1443 err("fcntl F_SETFL O_NONBLOCK");
1446 DEBUG("Waiting for connections... bind, ");
1447 addrin.sin_family = AF_INET;
1449 if(serve->flags && F_SDP) {
1450 addrin.sin_family = AF_INET_SDP;
1453 addrin.sin_port = htons(serve->port);
1454 addrin.sin_addr.s_addr = 0;
1455 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1458 if (listen(serve->socket, 1) < 0)
1460 sa.sa_handler = sigchld_handler;
1461 sigemptyset(&sa.sa_mask);
1462 sa.sa_flags = SA_RESTART;
1463 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1464 err("sigaction: %m");
1465 sa.sa_handler = sigterm_handler;
1466 sigemptyset(&sa.sa_mask);
1467 sa.sa_flags = SA_RESTART;
1468 if(sigaction(SIGTERM, &sa, NULL) == -1)
1469 err("sigaction: %m");
1473 * Connect our servers.
1475 void setup_servers(GArray* servers) {
1478 for(i=0;i<servers->len;i++) {
1479 setup_serve(&(g_array_index(servers, SERVER, i)));
1481 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1485 * Loop through the available servers, and serve them.
1487 int serveloop(GArray* servers) {
1488 struct sockaddr_in addrin;
1489 socklen_t addrinlen=sizeof(addrin);
1499 * Set up the master fd_set. The set of descriptors we need
1500 * to select() for never changes anyway and it buys us a *lot*
1501 * of time to only build this once. However, if we ever choose
1502 * to not fork() for clients anymore, we may have to revisit
1507 for(i=0;i<servers->len;i++) {
1508 sock=(g_array_index(servers, SERVER, i)).socket;
1509 FD_SET(sock, &mset);
1510 max=sock>max?sock:max;
1517 memcpy(&rset, &mset, sizeof(fd_set));
1520 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1522 for(i=0;i<servers->len;i++) {
1523 serve=&(g_array_index(servers, SERVER, i));
1524 if(FD_ISSET(serve->socket, &rset)) {
1525 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1528 client = g_malloc(sizeof(CLIENT));
1529 client->server=serve;
1530 client->exportsize=OFFT_MAX;
1532 set_peername(net, client);
1533 if (!authorized_client(client)) {
1534 msg2(LOG_INFO,"Unauthorized client") ;
1538 msg2(LOG_INFO,"Authorized client") ;
1539 pid=g_malloc(sizeof(pid_t));
1541 if ((*pid=fork())<0) {
1542 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1546 if (*pid>0) { /* parent */
1548 g_hash_table_insert(children, pid, pid);
1552 g_hash_table_destroy(children);
1553 for(i=0;i<servers->len;i++) {
1554 serve=g_array_index(servers, SERVER*, i);
1555 close(serve->socket);
1557 /* FALSE does not free the
1558 actual data. This is required,
1559 because the client has a
1560 direct reference into that
1561 data, and otherwise we get a
1563 g_array_free(servers, FALSE);
1565 msg2(LOG_INFO,"Starting to serve");
1566 serveconnection(client);
1575 * Set up user-ID and/or group-ID
1577 void dousers(void) {
1581 gr=getgrnam(rungroup);
1582 if(setgid(gr->gr_gid)<0)
1583 msg3(LOG_DEBUG, "Could not set GID: %s", strerror(errno));
1586 pw=getpwnam(runuser);
1587 if(setuid(pw->pw_uid)<0)
1588 msg3(LOG_DEBUG, "Could not set UID: %s", strerror(errno));
1593 * Main entry point...
1595 int main(int argc, char *argv[]) {
1600 if (sizeof( struct nbd_request )!=28) {
1601 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1605 memset(pidftemplate, '\0', 256);
1608 config_file_pos = g_strdup(CFILE);
1609 serve=cmdline(argc, argv);
1610 servers = parse_cfile(config_file_pos, &err);
1611 if(!servers || !servers->len) {
1612 g_warning("Could not parse config file: %s",
1613 err ? err->message : "Unknown error");
1616 g_array_append_val(servers, *serve);
1618 if (!(serve->port)) {
1621 /* You really should define ISSERVER if you're going to use
1622 * inetd mode, but if you don't, closing stdout and stderr
1623 * (which inetd had connected to the client socket) will let it
1627 open("/dev/null", O_WRONLY);
1628 open("/dev/null", O_WRONLY);
1630 client=g_malloc(sizeof(CLIENT));
1631 client->server=serve;
1633 client->exportsize=OFFT_MAX;
1634 set_peername(0,client);
1635 serveconnection(client);
1639 if((!serve) && (!servers||!servers->len)) {
1640 g_message("Nothing to do! Bye!");
1644 setup_servers(servers);