2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/select.h> /* select */
62 #include <sys/wait.h> /* wait */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
66 #include <sys/param.h>
67 #ifdef HAVE_SYS_MOUNT_H
68 #include <sys/mount.h> /* For BLKGETSIZE */
70 #include <signal.h> /* sigaction */
71 #include <netinet/tcp.h>
72 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
73 #include <netdb.h> /* hostent, gethostby*, getservby* */
80 #include <arpa/inet.h>
90 /* used in cliserv.h, so must come first */
91 #define MY_NAME "nbd_server"
94 /** Default position of the config file */
96 #define SYSCONFDIR "/etc"
98 #define CFILE SYSCONFDIR "/nbd-server/config"
100 /** Where our config file actually is */
101 gchar* config_file_pos;
103 /** What user we're running as */
105 /** What group we're running as */
106 gchar* rungroup=NULL;
108 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
110 #define msg2(a,b) syslog(a,b)
111 #define msg3(a,b,c) syslog(a,b,c)
112 #define msg4(a,b,c,d) syslog(a,b,c,d)
114 #define msg2(a,b) g_message(b)
115 #define msg3(a,b,c) g_message(b,c)
116 #define msg4(a,b,c,d) g_message(b,c,d)
119 /* Debugging macros */
122 #define DEBUG( a ) printf( a )
123 #define DEBUG2( a,b ) printf( a,b )
124 #define DEBUG3( a,b,c ) printf( a,b,c )
125 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
128 #define DEBUG2( a,b )
129 #define DEBUG3( a,b,c )
130 #define DEBUG4( a,b,c,d )
132 #ifndef PACKAGE_VERSION
133 #define PACKAGE_VERSION ""
136 * The highest value a variable of type off_t can reach. This is a signed
137 * integer, so set all bits except for the leftmost one.
139 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
140 #define LINELEN 256 /**< Size of static buffer used to read the
141 authorization file (yuck) */
142 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
143 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
144 #define F_READONLY 1 /**< flag to tell us a file is readonly */
145 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
146 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
148 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
150 GHashTable *children;
151 char pidfname[256]; /**< name of our PID file */
152 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
153 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
156 * Types of virtuatlization
159 VIRT_NONE=0, /**< No virtualization */
160 VIRT_IPLIT, /**< Literal IP address as part of the filename */
161 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
162 doing the same as in IPLIT */
163 VIRT_CIDR, /**< Every subnet in its own directory */
167 * Variables associated with a server.
170 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
171 off_t expected_size; /**< size of the exported file as it was told to
172 us through configuration */
173 unsigned int port; /**< port we're exporting this file at */
174 char* authname; /**< filename of the authorization file */
175 int flags; /**< flags associated with this exported file */
176 unsigned int timeout;/**< how long a connection may be idle
178 int socket; /**< The socket of this server. */
179 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
180 uint8_t cidrlen; /**< The length of the mask when we use
181 CIDR-style virtualization */
185 * Variables associated with a client socket.
188 int fhandle; /**< file descriptor */
189 off_t startoff; /**< starting offset of this file */
193 off_t exportsize; /**< size of the file we're exporting */
194 char *clientname; /**< peer */
195 char *exportname; /**< (processed) filename of the file we're exporting */
196 GArray *export; /**< array of FILE_INFO of exported files;
197 array size is always 1 unless we're
198 doing the multiple file option */
199 int net; /**< The actual client socket */
200 SERVER *server; /**< The server this client is getting data from */
201 char* difffilename; /**< filename of the copy-on-write file, if any */
202 int difffile; /**< filedescriptor of copyonwrite file. @todo
203 shouldn't this be an array too? (cfr export) Or
204 make -m and -c mutually exclusive */
205 u32 difffilelen; /**< number of pages in difffile */
206 u32 *difmap; /**< see comment on the global difmap for this one */
210 * Type of configuration file values
213 PARAM_INT, /**< This parameter is an integer */
214 PARAM_STRING, /**< This parameter is a string */
215 PARAM_BOOL, /**< This parameter is a boolean */
219 * Configuration file values
222 gchar *paramname; /**< Name of the parameter, as it appears in
224 gboolean required; /**< Whether this is a required (as opposed to
225 optional) parameter */
226 PARAM_TYPE ptype; /**< Type of the parameter. */
227 gpointer target; /**< Pointer to where the data of this
228 parameter should be written. If ptype is
229 PARAM_BOOL, the data is or'ed rather than
231 gint flagval; /**< Flag mask for this parameter in case ptype
236 * Check whether a client is allowed to connect. Works with an authorization
237 * file which contains one line per machine, no wildcards.
239 * @param opts The client who's trying to connect.
240 * @return 0 - authorization refused, 1 - OK
242 int authorized_client(CLIENT *opts) {
243 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
248 struct in_addr client;
249 struct in_addr cltemp;
252 if ((f=fopen(opts->server->authname,"r"))==NULL) {
253 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
254 opts->server->authname,strerror(errno)) ;
258 inet_aton(opts->clientname, &client);
259 while (fgets(line,LINELEN,f)!=NULL) {
260 if((tmp=index(line, '/'))) {
261 if(strlen(line)<=tmp-line) {
262 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
266 if(inet_aton(line,&addr)) {
267 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
270 len=strtol(tmp, NULL, 0);
271 addr.s_addr>>=32-len;
272 addr.s_addr<<=32-len;
273 memcpy(&cltemp,&client,sizeof(client));
274 cltemp.s_addr>>=32-len;
275 cltemp.s_addr<<=32-len;
276 if(addr.s_addr == cltemp.s_addr) {
280 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
290 * Read data from a file descriptor into a buffer
292 * @param f a file descriptor
293 * @param buf a buffer
294 * @param len the number of bytes to be read
296 inline void readit(int f, void *buf, size_t len) {
300 if ((res = read(f, buf, len)) <= 0)
301 err("Read failed: %m");
308 * Write data from a buffer into a filedescriptor
310 * @param f a file descriptor
311 * @param buf a buffer containing data
312 * @param len the number of bytes to be written
314 inline void writeit(int f, void *buf, size_t len) {
318 if ((res = write(f, buf, len)) <= 0)
319 err("Send failed: %m");
326 * Print out a message about how to use nbd-server. Split out to a separate
327 * function so that we can call it from multiple places
330 printf("This is nbd-server version " VERSION "\n");
331 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file] [-p PID file name] [-o section name]\n"
332 "\t-r|--read-only\t\tread only\n"
333 "\t-m|--multi-file\t\tmultiple file\n"
334 "\t-c|--copy-on-write\tcopy on write\n"
335 "\t-C|--config-file\tspecify an alternate configuration file\n"
336 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
337 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n"
338 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
339 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
340 "\tif port is set to 0, stdin is used (for running from inetd)\n"
341 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
342 "\t\taddress of the machine trying to connect\n" );
343 printf("Using configuration file %s\n", CFILE);
346 /* Dumps a config file section of the given SERVER*, and exits. */
347 void dump_section(SERVER* serve, gchar* section_header) {
348 printf("[%s]\n", section_header);
349 printf("\texportname = %s\n", serve->exportname);
350 printf("\tport = %d\n", serve->port);
351 if(serve->flags & F_READONLY) {
352 printf("\treadonly = true\n");
354 if(serve->flags & F_MULTIFILE) {
355 printf("\tmultifile = true\n");
357 if(serve->flags & F_COPYONWRITE) {
358 printf("\tcopyonwrite = true\n");
360 if(serve->expected_size) {
361 printf("\tfilesize = %Ld\n", (long long int)serve->expected_size);
363 if(serve->authname) {
364 printf("\tauthfile = %s\n", serve->authname);
367 printf("\ttimeout = %d\n", serve->timeout);
373 * Parse the command line.
375 * @param argc the argc argument to main()
376 * @param argv the argv argument to main()
378 SERVER* cmdline(int argc, char *argv[]) {
382 struct option long_options[] = {
383 {"read-only", no_argument, NULL, 'r'},
384 {"multi-file", no_argument, NULL, 'm'},
385 {"copy-on-write", no_argument, NULL, 'c'},
386 {"authorize-file", required_argument, NULL, 'l'},
387 {"idle-time", required_argument, NULL, 'a'},
388 {"config-file", required_argument, NULL, 'C'},
389 {"pid-file", required_argument, NULL, 'p'},
390 {"output-config", required_argument, NULL, 'o'},
397 gboolean do_output=FALSE;
398 gchar* section_header;
403 serve=g_new0(SERVER, 1);
404 serve->authname = g_strdup(default_authname);
405 while((c=getopt_long(argc, argv, "-a:C:cl:mo:rp:", long_options, &i))>=0) {
408 /* non-option argument */
409 switch(nonspecial++) {
411 serve->port=strtol(optarg, NULL, 0);
414 serve->exportname = g_strdup(optarg);
415 if(serve->exportname[0] != '/') {
416 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
421 last=strlen(optarg)-1;
423 if (suffix == 'k' || suffix == 'K' ||
424 suffix == 'm' || suffix == 'M')
426 es = (off_t)atol(optarg);
434 serve->expected_size = es;
439 serve->flags |= F_READONLY;
442 serve->flags |= F_MULTIFILE;
446 section_header = g_strdup(optarg);
449 strncpy(pidftemplate, optarg, 256);
452 serve->flags |=F_COPYONWRITE;
455 g_free(config_file_pos);
456 config_file_pos=g_strdup(optarg);
459 g_free(serve->authname);
460 serve->authname=g_strdup(optarg);
463 serve->timeout=strtol(optarg, NULL, 0);
471 /* What's left: the port to export, the name of the to be exported
472 * file, and, optionally, the size of the file, in that order. */
479 g_critical("Need a complete configuration on the command line to output a config file section!");
482 dump_section(serve, section_header);
488 * Error codes for config file parsing
491 CFILE_NOTFOUND, /**< The configuration file is not found */
492 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
493 CFILE_KEY_MISSING, /**< A (required) key is missing */
494 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
495 CFILE_PROGERR /**< Programmer error */
499 * Remove a SERVER from memory. Used from the hash table
501 void remove_server(gpointer s) {
505 g_free(server->exportname);
507 g_free(server->authname);
512 * Parse the config file.
514 * @param f the name of the config file
515 * @param e a GError. @see CFILE_ERRORS for what error values this function can
517 * @return a Array of SERVER* pointers, If the config file is empty or does not
518 * exist, returns an empty GHashTable; if the config file contains an
519 * error, returns NULL, and e is set appropriately
521 GArray* parse_cfile(gchar* f, GError** e) {
522 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
523 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
525 gchar *virtstyle=NULL;
527 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
528 { "port", TRUE, PARAM_INT, NULL, 0 },
529 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
530 { "timeout", FALSE, PARAM_INT, NULL, 0 },
531 { "filesize", FALSE, PARAM_INT, NULL, 0 },
532 { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
533 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
534 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
535 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
536 { "autoreadonly", FALSE, PARAM_BOOL, NULL, F_AUTOREADONLY },
537 { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
539 const int lp_size=11;
541 { "user", FALSE, PARAM_STRING, &runuser, 0 },
542 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
548 const char *err_msg=NULL;
556 errdomain = g_quark_from_string("parse_cfile");
557 cfile = g_key_file_new();
558 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
559 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
560 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
561 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
562 g_key_file_free(cfile);
565 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
566 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
567 g_key_file_free(cfile);
570 groups = g_key_file_get_groups(cfile, NULL);
571 for(i=0;groups[i];i++) {
572 memset(&s, '\0', sizeof(SERVER));
573 lp[0].target=&(s.exportname);
574 lp[1].target=&(s.port);
575 lp[2].target=&(s.authname);
576 lp[3].target=&(s.timeout);
577 lp[4].target=&(s.expected_size);
578 lp[5].target=&(virtstyle);
579 lp[6].target=lp[7].target=lp[8].target=
580 lp[9].target=lp[10].target=&(s.flags);
581 /* After the [generic] group, start parsing exports */
586 for(j=0;j<p_size;j++) {
587 g_assert(p[j].target != NULL);
588 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
591 *((gint*)p[j].target) =
592 g_key_file_get_integer(cfile,
598 *((gchar**)p[j].target) =
599 g_key_file_get_string(cfile,
605 value = g_key_file_get_boolean(cfile,
607 p[j].paramname, &err);
610 *((gint*)p[j].target) |= p[j].flagval;
612 *((gint*)p[j].target) &= ~(p[j].flagval);
618 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
620 /* Ignore not-found error for optional values */
624 err_msg = MISSING_REQUIRED_ERROR;
627 err_msg = DEFAULT_ERROR;
629 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
630 g_array_free(retval, TRUE);
632 g_key_file_free(cfile);
637 if(!strncmp(virtstyle, "none", 4)) {
638 s.virtstyle=VIRT_NONE;
639 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
640 s.virtstyle=VIRT_IPLIT;
641 } else if(!strncmp(virtstyle, "iphash", 6)) {
642 s.virtstyle=VIRT_IPHASH;
643 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
644 s.virtstyle=VIRT_CIDR;
645 if(strlen(virtstyle)<10) {
646 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
647 g_array_free(retval, TRUE);
648 g_key_file_free(cfile);
651 s.cidrlen=strtol(virtstyle+8, NULL, 0);
653 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
654 g_array_free(retval, TRUE);
655 g_key_file_free(cfile);
659 s.virtstyle=VIRT_IPLIT;
661 /* Don't need to free this, it's not our string */
663 /* Don't append values for the [generic] group */
665 g_array_append_val(retval, s);
672 * Signal handler for SIGCHLD
673 * @param s the signal we're handling (must be SIGCHLD, or something
676 void sigchld_handler(int s) {
681 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
682 if(WIFEXITED(status)) {
683 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
685 i=g_hash_table_lookup(children, &pid);
687 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
689 DEBUG2("Removing %d from the list of children", pid);
690 g_hash_table_remove(children, &pid);
696 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
699 * @param value the value corresponding to the above key
700 * @param user_data a pointer which we always set to 1, so that we know what
703 void killchild(gpointer key, gpointer value, gpointer user_data) {
705 int *parent=user_data;
712 * Handle SIGTERM and dispatch it to our children
713 * @param s the signal we're handling (must be SIGTERM, or something
714 * is severely wrong).
716 void sigterm_handler(int s) {
719 g_hash_table_foreach(children, killchild, &parent);
729 * Detect the size of a file.
731 * @param fhandle An open filedescriptor
732 * @return the size of the file, or OFFT_MAX if detection was
735 off_t size_autodetect(int fhandle) {
737 unsigned long sectors;
738 struct stat stat_buf;
741 #ifdef HAVE_SYS_MOUNT_H
742 #ifdef HAVE_SYS_IOCTL_H
744 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
745 if (!ioctl(fhandle, BLKGETSIZE, §ors) && sectors) {
746 es = (off_t)sectors * (off_t)512;
749 #endif /* BLKGETSIZE */
750 #endif /* HAVE_SYS_IOCTL_H */
751 #endif /* HAVE_SYS_MOUNT_H */
753 DEBUG("looking for fhandle size with fstat\n");
754 stat_buf.st_size = 0;
755 error = fstat(fhandle, &stat_buf);
757 if(stat_buf.st_size > 0)
758 return (off_t)stat_buf.st_size;
760 err("fstat failed: %m");
763 DEBUG("looking for fhandle size with lseek SEEK_END\n");
764 es = lseek(fhandle, (off_t)0, SEEK_END);
765 if (es > ((off_t)0)) {
768 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
771 err("Could not find size of exported block device: %m");
776 * Get the file handle and offset, given an export offset.
778 * @param export An array of export files
779 * @param a The offset to get corresponding file/offset for
780 * @param fhandle [out] File descriptor
781 * @param foffset [out] Offset into fhandle
782 * @param maxbytes [out] Tells how many bytes can be read/written
783 * from fhandle starting at foffset (0 if there is no limit)
784 * @return 0 on success, -1 on failure
786 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
787 /* Negative offset not allowed */
791 /* Binary search for last file with starting offset <= a */
794 int end = export->len - 1;
795 while( start <= end ) {
796 int mid = (start + end) / 2;
797 fi = g_array_index(export, FILE_INFO, mid);
798 if( fi.startoff < a ) {
800 } else if( fi.startoff > a ) {
808 /* end should never go negative, since first startoff is 0 and a >= 0 */
811 fi = g_array_index(export, FILE_INFO, end);
812 *fhandle = fi.fhandle;
813 *foffset = a - fi.startoff;
815 if( end+1 < export->len ) {
816 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
817 *maxbytes = fi_next.startoff - a;
824 * seek to a position in a file, with error handling.
825 * @param handle a filedescriptor
826 * @param a position to seek to
827 * @todo get rid of this; lastpoint is a global variable right now, but it
828 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
831 void myseek(int handle,off_t a) {
832 if (lseek(handle, a, SEEK_SET) < 0) {
833 err("Can not seek locally!\n");
838 * Write an amount of bytes at a given offset to the right file. This
839 * abstracts the write-side of the multiple file option.
841 * @param a The offset where the write should start
842 * @param buf The buffer to write from
843 * @param len The length of buf
844 * @param client The client we're serving for
845 * @return The number of bytes actually written, or -1 in case of an error
847 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
852 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
854 if(maxbytes && len > maxbytes)
857 DEBUG4("(WRITE to fd %d offset %Lu len %u), ", fhandle, foffset, len);
859 myseek(fhandle, foffset);
860 return write(fhandle, buf, len);
864 * Call rawexpwrite repeatedly until all data has been written.
865 * @return 0 on success, nonzero on failure
867 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
870 while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
875 return (ret < 0 || len != 0);
879 * Read an amount of bytes at a given offset from the right file. This
880 * abstracts the read-side of the multiple files option.
882 * @param a The offset where the read should start
883 * @param buf A buffer to read into
884 * @param len The size of buf
885 * @param client The client we're serving for
886 * @return The number of bytes actually read, or -1 in case of an
889 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
894 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
896 if(maxbytes && len > maxbytes)
899 DEBUG4("(READ from fd %d offset %Lu len %u), ", fhandle, foffset, len);
901 myseek(fhandle, foffset);
902 return read(fhandle, buf, len);
906 * Call rawexpread repeatedly until all data has been read.
907 * @return 0 on success, nonzero on failure
909 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
912 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
917 return (ret < 0 || len != 0);
921 * Read an amount of bytes at a given offset from the right file. This
922 * abstracts the read-side of the copyonwrite stuff, and calls
923 * rawexpread() with the right parameters to do the actual work.
924 * @param a The offset where the read should start
925 * @param buf A buffer to read into
926 * @param len The size of buf
927 * @param client The client we're going to read for
928 * @return 0 on success, nonzero on failure
930 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
932 off_t mapcnt, mapl, maph, pagestart;
934 if (!(client->server->flags & F_COPYONWRITE))
935 return(rawexpread_fully(a, buf, len, client));
936 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
938 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
940 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
941 pagestart=mapcnt*DIFFPAGESIZE;
943 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
944 len : (size_t)DIFFPAGESIZE-offset;
945 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
946 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
947 (unsigned long)(client->difmap[mapcnt]));
948 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
949 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
950 } else { /* the block is not there */
951 DEBUG2("Page %Lu is not here, we read the original one\n",
952 (unsigned long long)mapcnt);
953 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
955 len-=rdlen; a+=rdlen; buf+=rdlen;
961 * Write an amount of bytes at a given offset to the right file. This
962 * abstracts the write-side of the copyonwrite option, and calls
963 * rawexpwrite() with the right parameters to do the actual work.
965 * @param a The offset where the write should start
966 * @param buf The buffer to write from
967 * @param len The length of buf
968 * @param client The client we're going to write for.
969 * @return 0 on success, nonzero on failure
971 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
972 char pagebuf[DIFFPAGESIZE];
973 off_t mapcnt,mapl,maph;
978 if (!(client->server->flags & F_COPYONWRITE))
979 return(rawexpwrite_fully(a, buf, len, client));
980 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
982 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
984 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
985 pagestart=mapcnt*DIFFPAGESIZE ;
987 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
988 len : (size_t)DIFFPAGESIZE-offset;
990 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
991 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
992 (unsigned long)(client->difmap[mapcnt])) ;
993 myseek(client->difffile,
994 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
995 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
996 } else { /* the block is not there */
997 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
998 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
999 DEBUG3("Page %Lu is not here, we put it at %lu\n",
1000 (unsigned long long)mapcnt,
1001 (unsigned long)(client->difmap[mapcnt]));
1002 rdlen=DIFFPAGESIZE ;
1003 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1005 memcpy(pagebuf+offset,buf,wrlen) ;
1006 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1010 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1016 * Do the initial negotiation.
1018 * @param client The client we're negotiating with.
1020 void negotiate(CLIENT *client) {
1024 memset(zeros, '\0', 290);
1025 if (write(client->net, INIT_PASSWD, 8) < 0)
1026 err("Negotiation failed: %m");
1027 cliserv_magic = htonll(cliserv_magic);
1028 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
1029 err("Negotiation failed: %m");
1030 size_host = htonll((u64)(client->exportsize));
1031 if (write(client->net, &size_host, 8) < 0)
1032 err("Negotiation failed: %m");
1033 if (write(client->net, zeros, 128) < 0)
1034 err("Negotiation failed: %m");
1037 /** sending macro. */
1038 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1040 #define ERROR(client,reply) { reply.error = htonl(-1); SEND(client->net,reply); reply.error = 0; }
1042 * Serve a file to a single client.
1044 * @todo This beast needs to be split up in many tiny little manageable
1045 * pieces. Preferably with a chainsaw.
1047 * @param client The client we're going to serve to.
1050 int mainloop(CLIENT *client) {
1051 struct nbd_request request;
1052 struct nbd_reply reply;
1053 gboolean go_on=TRUE;
1058 DEBUG("Entering request loop!\n");
1059 reply.magic = htonl(NBD_REPLY_MAGIC);
1068 if (client->server->timeout)
1069 alarm(client->server->timeout);
1070 readit(client->net, &request, sizeof(request));
1071 request.from = ntohll(request.from);
1072 request.type = ntohl(request.type);
1074 if (request.type==NBD_CMD_DISC) {
1075 msg2(LOG_INFO, "Disconnect request received.");
1076 if (client->server->flags & F_COPYONWRITE) {
1077 if (client->difmap) g_free(client->difmap) ;
1078 close(client->difffile);
1079 unlink(client->difffilename);
1080 free(client->difffilename);
1086 len = ntohl(request.len);
1088 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1089 err("Not enough magic.");
1090 if (len > BUFSIZE + sizeof(struct nbd_reply))
1091 err("Request too big!");
1093 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
1094 "READ", (unsigned long long)request.from,
1095 (unsigned long long)request.from / 512, len);
1097 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1098 if ((request.from + len) > (OFFT_MAX)) {
1099 DEBUG("[Number too large!]");
1100 ERROR(client, reply);
1104 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1106 ERROR(client, reply);
1110 if (request.type==NBD_CMD_WRITE) {
1111 DEBUG("wr: net->buf, ");
1112 readit(client->net, buf, len);
1113 DEBUG("buf->exp, ");
1114 if ((client->server->flags & F_READONLY) ||
1115 (client->server->flags & F_AUTOREADONLY)) {
1116 DEBUG("[WRITE to READONLY!]");
1117 ERROR(client, reply);
1120 if (expwrite(request.from, buf, len, client)) {
1121 DEBUG("Write failed: %m" );
1122 ERROR(client, reply);
1125 SEND(client->net, reply);
1131 DEBUG("exp->buf, ");
1132 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1133 DEBUG("Read failed: %m");
1134 ERROR(client, reply);
1138 DEBUG("buf->net, ");
1139 memcpy(buf, &reply, sizeof(struct nbd_reply));
1140 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1147 * Set up client export array, which is an array of FILE_INFO.
1148 * Also, split a single exportfile into multiple ones, if that was asked.
1149 * @param client information on the client which we want to setup export for
1151 void setupexport(CLIENT* client) {
1153 off_t laststartoff = 0, lastsize = 0;
1154 int multifile = (client->server->flags & F_MULTIFILE);
1156 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1158 /* If multi-file, open as many files as we can.
1159 * If not, open exactly one file.
1160 * Calculate file sizes as we go to get total size. */
1164 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1167 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1169 tmpname=g_strdup(client->exportname);
1171 DEBUG2( "Opening %s\n", tmpname );
1172 fi.fhandle = open(tmpname, mode);
1173 if(fi.fhandle == -1 && mode == O_RDWR) {
1174 /* Try again because maybe media was read-only */
1175 fi.fhandle = open(tmpname, O_RDONLY);
1176 if(fi.fhandle != -1) {
1177 client->server->flags |= F_AUTOREADONLY;
1178 client->server->flags |= F_READONLY;
1181 if(fi.fhandle == -1) {
1182 if(multifile && i>0)
1184 err("Could not open exported file: %m");
1186 fi.startoff = laststartoff + lastsize;
1187 g_array_append_val(client->export, fi);
1190 /* Starting offset and size of this file will be used to
1191 * calculate starting offset of next file */
1192 laststartoff = fi.startoff;
1193 lastsize = size_autodetect(fi.fhandle);
1199 /* Set export size to total calculated size */
1200 client->exportsize = laststartoff + lastsize;
1202 /* Export size may be overridden */
1203 if(client->server->expected_size) {
1204 /* desired size must be <= total calculated size */
1205 if(client->server->expected_size > client->exportsize) {
1206 err("Size of exported file is too big\n");
1209 client->exportsize = client->server->expected_size;
1212 msg3(LOG_INFO, "Size of exported file/device is %Lu", (unsigned long long)client->exportsize);
1214 msg3(LOG_INFO, "Total number of files: %d", i);
1218 int copyonwrite_prepare(CLIENT* client) {
1220 if ((client->difffilename = malloc(1024))==NULL)
1221 err("Failed to allocate string for diff file name");
1222 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1224 client->difffilename[1023]='\0';
1225 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1226 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1227 if (client->difffile<0) err("Could not create diff file (%m)") ;
1228 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1229 err("Could not allocate memory") ;
1230 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1236 * Serve a connection.
1238 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1239 * follow the road map.
1241 * @param client a connected client
1243 void serveconnection(CLIENT *client) {
1244 setupexport(client);
1246 if (client->server->flags & F_COPYONWRITE) {
1247 copyonwrite_prepare(client);
1250 setmysockopt(client->net);
1256 * Find the name of the file we have to serve. This will use g_strdup_printf
1257 * to put the IP address of the client inside a filename containing
1258 * "%s" (in the form as specified by the "virtstyle" option). That name
1259 * is then written to client->exportname.
1261 * @param net A socket connected to an nbd client
1262 * @param client information about the client. The IP address in human-readable
1263 * format will be written to a new char* buffer, the address of which will be
1264 * stored in client->clientname.
1266 void set_peername(int net, CLIENT *client) {
1267 struct sockaddr_in addrin;
1268 struct sockaddr_in netaddr;
1269 size_t addrinlen = sizeof( addrin );
1275 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1276 err("getsockname failed: %m");
1277 peername = g_strdup(inet_ntoa(addrin.sin_addr));
1278 switch(client->server->virtstyle) {
1280 client->exportname=g_strdup(client->server->exportname);
1283 for(i=0;i<strlen(peername);i++) {
1284 if(peername[i]=='.') {
1289 client->exportname=g_strdup_printf(client->server->exportname, peername);
1292 memcpy(&netaddr, &addrin, addrinlen);
1293 netaddr.sin_addr.s_addr>>=32-(client->server->cidrlen);
1294 netaddr.sin_addr.s_addr<<=32-(client->server->cidrlen);
1295 netname = inet_ntoa(netaddr.sin_addr);
1296 tmp=g_strdup_printf("%s/%s", netname, peername);
1297 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1301 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1302 peername, client->exportname);
1303 client->clientname=g_strdup(peername);
1309 * @param data a pointer to pid_t which should be freed
1311 void destroy_pid_t(gpointer data) {
1316 * Go daemon (unless we specified at compile time that we didn't want this)
1317 * @param serve the first server of our configuration. If its port is zero,
1318 * then do not daemonize, because we're doing inetd then. This parameter
1319 * is only used to create a PID file of the form
1320 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1322 #if !defined(NODAEMON) && !defined(NOFORK)
1323 void daemonize(SERVER* serve) {
1326 if(!(serve->port)) {
1332 if(!*pidftemplate) {
1334 strncpy(pidftemplate, "/var/run/server.%d.pid", 255);
1336 strncpy(pidftemplate, "/var/run/server.pid", 255);
1339 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1340 pidf=fopen(pidfname, "w");
1342 fprintf(pidf,"%d\n", (int)getpid());
1346 fprintf(stderr, "Not fatal; continuing");
1350 #define daemonize(serve)
1351 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1354 * Connect a server's socket.
1356 * @param serve the server we want to connect.
1358 void setup_serve(SERVER *serve) {
1359 struct sockaddr_in addrin;
1360 struct sigaction sa;
1361 int addrinlen = sizeof(addrin);
1368 if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
1371 /* lose the pesky "Address already in use" error message */
1372 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1373 err("setsockopt SO_REUSEADDR");
1375 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1376 err("setsockopt SO_KEEPALIVE");
1379 /* make the listening socket non-blocking */
1380 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1381 err("fcntl F_GETFL");
1383 if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1384 err("fcntl F_SETFL O_NONBLOCK");
1387 DEBUG("Waiting for connections... bind, ");
1388 addrin.sin_family = AF_INET;
1389 addrin.sin_port = htons(serve->port);
1390 addrin.sin_addr.s_addr = 0;
1391 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1394 if (listen(serve->socket, 1) < 0)
1396 sa.sa_handler = sigchld_handler;
1397 sigemptyset(&sa.sa_mask);
1398 sa.sa_flags = SA_RESTART;
1399 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1400 err("sigaction: %m");
1401 sa.sa_handler = sigterm_handler;
1402 sigemptyset(&sa.sa_mask);
1403 sa.sa_flags = SA_RESTART;
1404 if(sigaction(SIGTERM, &sa, NULL) == -1)
1405 err("sigaction: %m");
1409 * Connect our servers.
1411 void setup_servers(GArray* servers) {
1414 for(i=0;i<servers->len;i++) {
1415 setup_serve(&(g_array_index(servers, SERVER, i)));
1417 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1421 * Loop through the available servers, and serve them.
1423 int serveloop(GArray* servers) {
1424 struct sockaddr_in addrin;
1425 socklen_t addrinlen=sizeof(addrin);
1435 * Set up the master fd_set. The set of descriptors we need
1436 * to select() for never changes anyway and it buys us a *lot*
1437 * of time to only build this once. However, if we ever choose
1438 * to not fork() for clients anymore, we may have to revisit
1443 for(i=0;i<servers->len;i++) {
1444 sock=(g_array_index(servers, SERVER, i)).socket;
1445 FD_SET(sock, &mset);
1446 max=sock>max?sock:max;
1453 memcpy(&rset, &mset, sizeof(fd_set));
1456 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1458 for(i=0;i<servers->len;i++) {
1459 serve=&(g_array_index(servers, SERVER, i));
1460 if(FD_ISSET(serve->socket, &rset)) {
1461 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1464 client = g_malloc(sizeof(CLIENT));
1465 client->server=serve;
1466 client->exportsize=OFFT_MAX;
1468 set_peername(net, client);
1469 if (!authorized_client(client)) {
1470 msg2(LOG_INFO,"Unauthorized client") ;
1474 msg2(LOG_INFO,"Authorized client") ;
1475 pid=g_malloc(sizeof(pid_t));
1477 if ((*pid=fork())<0) {
1478 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1482 if (*pid>0) { /* parent */
1484 g_hash_table_insert(children, pid, pid);
1488 g_hash_table_destroy(children);
1489 for(i=0;i<servers->len,serve=(g_array_index(servers, SERVER*, i));i++) {
1490 close(serve->socket);
1492 /* FALSE does not free the
1493 actual data. This is required,
1494 because the client has a
1495 direct reference into that
1496 data, and otherwise we get a
1498 g_array_free(servers, FALSE);
1500 msg2(LOG_INFO,"Starting to serve");
1501 serveconnection(client);
1509 * Set up user-ID and/or group-ID
1511 void dousers(void) {
1515 pw=getpwnam(runuser);
1516 if(setuid(pw->pw_uid)<0)
1517 msg3(LOG_DEBUG, "Could not set UID: %s", strerror(errno));
1520 gr=getgrnam(rungroup);
1521 if(setgid(gr->gr_gid)<0)
1522 msg3(LOG_DEBUG, "Could not set GID: %s", strerror(errno));
1527 * Main entry point...
1529 int main(int argc, char *argv[]) {
1534 if (sizeof( struct nbd_request )!=28) {
1535 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1539 memset(pidftemplate, '\0', 256);
1542 config_file_pos = g_strdup(CFILE);
1543 serve=cmdline(argc, argv);
1544 servers = parse_cfile(config_file_pos, &err);
1545 if(!servers || !servers->len) {
1546 g_warning("Could not parse config file: %s", err->message);
1549 g_array_append_val(servers, *serve);
1551 if (!(serve->port)) {
1554 /* You really should define ISSERVER if you're going to use
1555 * inetd mode, but if you don't, closing stdout and stderr
1556 * (which inetd had connected to the client socket) will let it
1560 open("/dev/null", O_WRONLY);
1561 open("/dev/null", O_WRONLY);
1563 client=g_malloc(sizeof(CLIENT));
1564 client->server=serve;
1566 client->exportsize=OFFT_MAX;
1567 set_peername(0,client);
1568 serveconnection(client);
1572 if((!serve) && (!servers||!servers->len)) {
1573 g_message("Nothing to do! Bye!");
1577 setup_servers(servers);