2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/select.h> /* select */
62 #include <sys/wait.h> /* wait */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
66 #include <sys/param.h>
67 #ifdef HAVE_SYS_MOUNT_H
68 #include <sys/mount.h> /* For BLKGETSIZE */
70 #include <signal.h> /* sigaction */
71 #include <netinet/tcp.h>
72 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
73 #include <netdb.h> /* hostent, gethostby*, getservby* */
80 #include <arpa/inet.h>
90 /* used in cliserv.h, so must come first */
91 #define MY_NAME "nbd_server"
94 /** Default position of the config file */
96 #define SYSCONFDIR "/etc"
98 #define CFILE SYSCONFDIR "/nbd-server/config"
100 /** Where our config file actually is */
101 gchar* config_file_pos;
103 /** What user we're running as */
105 /** What group we're running as */
106 gchar* rungroup=NULL;
108 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
110 #define msg2(a,b) syslog(a,b)
111 #define msg3(a,b,c) syslog(a,b,c)
112 #define msg4(a,b,c,d) syslog(a,b,c,d)
114 #define msg2(a,b) g_message(b)
115 #define msg3(a,b,c) g_message(b,c)
116 #define msg4(a,b,c,d) g_message(b,c,d)
119 /* Debugging macros */
122 #define DEBUG( a ) printf( a )
123 #define DEBUG2( a,b ) printf( a,b )
124 #define DEBUG3( a,b,c ) printf( a,b,c )
125 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
128 #define DEBUG2( a,b )
129 #define DEBUG3( a,b,c )
130 #define DEBUG4( a,b,c,d )
132 #ifndef PACKAGE_VERSION
133 #define PACKAGE_VERSION ""
136 * The highest value a variable of type off_t can reach. This is a signed
137 * integer, so set all bits except for the leftmost one.
139 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
140 #define LINELEN 256 /**< Size of static buffer used to read the
141 authorization file (yuck) */
142 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
143 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
144 #define F_READONLY 1 /**< flag to tell us a file is readonly */
145 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
146 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
148 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
150 GHashTable *children;
151 char pidfname[256]; /**< name of our PID file */
152 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
153 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
156 * Types of virtuatlization
159 VIRT_NONE=0, /**< No virtualization */
160 VIRT_IPLIT, /**< Literal IP address as part of the filename */
161 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
162 doing the same as in IPLIT */
163 VIRT_CIDR, /**< Every subnet in its own directory */
167 * Variables associated with a server.
170 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
171 off_t expected_size; /**< size of the exported file as it was told to
172 us through configuration */
173 unsigned int port; /**< port we're exporting this file at */
174 char* authname; /**< filename of the authorization file */
175 int flags; /**< flags associated with this exported file */
176 unsigned int timeout;/**< how long a connection may be idle
178 int socket; /**< The socket of this server. */
179 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
180 uint8_t cidrlen; /**< The length of the mask when we use
181 CIDR-style virtualization */
185 * Variables associated with a client socket.
188 int fhandle; /**< file descriptor */
189 off_t startoff; /**< starting offset of this file */
193 off_t exportsize; /**< size of the file we're exporting */
194 char *clientname; /**< peer */
195 char *exportname; /**< (processed) filename of the file we're exporting */
196 GArray *export; /**< array of FILE_INFO of exported files;
197 array size is always 1 unless we're
198 doing the multiple file option */
199 int net; /**< The actual client socket */
200 SERVER *server; /**< The server this client is getting data from */
201 char* difffilename; /**< filename of the copy-on-write file, if any */
202 int difffile; /**< filedescriptor of copyonwrite file. @todo
203 shouldn't this be an array too? (cfr export) Or
204 make -m and -c mutually exclusive */
205 u32 difffilelen; /**< number of pages in difffile */
206 u32 *difmap; /**< see comment on the global difmap for this one */
210 * Type of configuration file values
213 PARAM_INT, /**< This parameter is an integer */
214 PARAM_STRING, /**< This parameter is a string */
215 PARAM_BOOL, /**< This parameter is a boolean */
219 * Configuration file values
222 gchar *paramname; /**< Name of the parameter, as it appears in
224 gboolean required; /**< Whether this is a required (as opposed to
225 optional) parameter */
226 PARAM_TYPE ptype; /**< Type of the parameter. */
227 gpointer target; /**< Pointer to where the data of this
228 parameter should be written. If ptype is
229 PARAM_BOOL, the data is or'ed rather than
231 gint flagval; /**< Flag mask for this parameter in case ptype
236 * Check whether a client is allowed to connect. Works with an authorization
237 * file which contains one line per machine, no wildcards.
239 * @param opts The client who's trying to connect.
240 * @return 0 - authorization refused, 1 - OK
242 int authorized_client(CLIENT *opts) {
243 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
248 struct in_addr client;
249 struct in_addr cltemp;
252 if ((f=fopen(opts->server->authname,"r"))==NULL) {
253 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
254 opts->server->authname,strerror(errno)) ;
258 inet_aton(opts->clientname, &client);
259 while (fgets(line,LINELEN,f)!=NULL) {
260 if((tmp=index(line, '/'))) {
261 if(strlen(line)<=tmp-line) {
262 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
266 if(inet_aton(line,&addr)) {
267 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
270 len=strtol(tmp, NULL, 0);
271 addr.s_addr>>=32-len;
272 addr.s_addr<<=32-len;
273 memcpy(&cltemp,&client,sizeof(client));
274 cltemp.s_addr>>=32-len;
275 cltemp.s_addr<<=32-len;
276 if(addr.s_addr == cltemp.s_addr) {
280 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
290 * Read data from a file descriptor into a buffer
292 * @param f a file descriptor
293 * @param buf a buffer
294 * @param len the number of bytes to be read
296 inline void readit(int f, void *buf, size_t len) {
298 gboolean tried = FALSE;
302 if ((res = read(f, buf, len)) <= 0) {
303 if(!tried && errno==EAGAIN) {
304 /* Assume the connection will work some time in
305 * the future, but don't run away with CPU time
306 * in case it doesn't */
310 DEBUG("Read failed, trying again");
316 select(f+1, &set, NULL, NULL, &tv);
318 err("Read failed: %m");
329 * Write data from a buffer into a filedescriptor
331 * @param f a file descriptor
332 * @param buf a buffer containing data
333 * @param len the number of bytes to be written
335 inline void writeit(int f, void *buf, size_t len) {
337 gboolean tried=FALSE;
341 if ((res = write(f, buf, len)) <= 0) {
342 if(!tried && errno==EAGAIN) {
343 /* Assume the connection will work some time in
344 * the future, but don't run away with CPU time
345 * in case it doesn't */
349 DEBUG("Write failed, trying again");
355 select(f+1, NULL, &set, NULL, &tv);
357 err("Send failed: %m");
368 * Print out a message about how to use nbd-server. Split out to a separate
369 * function so that we can call it from multiple places
372 printf("This is nbd-server version " VERSION "\n");
373 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file] [-p PID file name]\n"
374 "\t-r|--read-only\t\tread only\n"
375 "\t-m|--multi-file\t\tmultiple file\n"
376 "\t-c|--copy-on-write\tcopy on write\n"
377 "\t-C|--config-file\tspecify an alternate configuration file\n"
378 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
379 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n"
380 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n\n"
381 "\tif port is set to 0, stdin is used (for running from inetd)\n"
382 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
383 "\t\taddress of the machine trying to connect\n" );
384 printf("Using configuration file %s\n", CFILE);
388 * Parse the command line.
390 * @param argc the argc argument to main()
391 * @param argv the argv argument to main()
393 SERVER* cmdline(int argc, char *argv[]) {
397 struct option long_options[] = {
398 {"read-only", no_argument, NULL, 'r'},
399 {"multi-file", no_argument, NULL, 'm'},
400 {"copy-on-write", no_argument, NULL, 'c'},
401 {"authorize-file", required_argument, NULL, 'l'},
402 {"idle-time", required_argument, NULL, 'a'},
403 {"config-file", required_argument, NULL, 'C'},
404 {"pid-file", required_argument, NULL, 'p'},
415 serve=g_new0(SERVER, 1);
416 serve->authname = g_strdup(default_authname);
417 while((c=getopt_long(argc, argv, "-a:C:cl:mrp:", long_options, &i))>=0) {
420 /* non-option argument */
421 switch(nonspecial++) {
423 serve->port=strtol(optarg, NULL, 0);
426 serve->exportname = g_strdup(optarg);
427 if(serve->exportname[0] != '/') {
428 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
433 last=strlen(optarg)-1;
435 if (suffix == 'k' || suffix == 'K' ||
436 suffix == 'm' || suffix == 'M')
438 es = (off_t)atol(optarg);
446 serve->expected_size = es;
451 serve->flags |= F_READONLY;
454 serve->flags |= F_MULTIFILE;
457 strncpy(pidftemplate, optarg, 256);
460 serve->flags |=F_COPYONWRITE;
463 g_free(config_file_pos);
464 config_file_pos=g_strdup(optarg);
467 g_free(serve->authname);
468 serve->authname=g_strdup(optarg);
471 serve->timeout=strtol(optarg, NULL, 0);
479 /* What's left: the port to export, the name of the to be exported
480 * file, and, optionally, the size of the file, in that order. */
489 * Error codes for config file parsing
492 CFILE_NOTFOUND, /**< The configuration file is not found */
493 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
494 CFILE_KEY_MISSING, /**< A (required) key is missing */
495 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
496 CFILE_PROGERR /**< Programmer error */
500 * Remove a SERVER from memory. Used from the hash table
502 void remove_server(gpointer s) {
506 g_free(server->exportname);
508 g_free(server->authname);
513 * Parse the config file.
515 * @param f the name of the config file
516 * @param e a GError. @see CFILE_ERRORS for what error values this function can
518 * @return a Array of SERVER* pointers, If the config file is empty or does not
519 * exist, returns an empty GHashTable; if the config file contains an
520 * error, returns NULL, and e is set appropriately
522 GArray* parse_cfile(gchar* f, GError** e) {
523 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
524 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
526 gchar *virtstyle=NULL;
528 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
529 { "port", TRUE, PARAM_INT, NULL, 0 },
530 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
531 { "timeout", FALSE, PARAM_INT, NULL, 0 },
532 { "filesize", FALSE, PARAM_INT, NULL, 0 },
533 { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
534 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
535 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
536 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
537 { "autoreadonly", FALSE, PARAM_BOOL, NULL, F_AUTOREADONLY },
538 { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
540 const int lp_size=11;
542 { "user", FALSE, PARAM_STRING, &runuser, 0 },
543 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
549 const char *err_msg=NULL;
557 errdomain = g_quark_from_string("parse_cfile");
558 cfile = g_key_file_new();
559 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
560 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
561 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
562 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
563 g_key_file_free(cfile);
566 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
567 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
568 g_key_file_free(cfile);
571 groups = g_key_file_get_groups(cfile, NULL);
572 for(i=0;groups[i];i++) {
573 memset(&s, '\0', sizeof(SERVER));
574 lp[0].target=&(s.exportname);
575 lp[1].target=&(s.port);
576 lp[2].target=&(s.authname);
577 lp[3].target=&(s.timeout);
578 lp[4].target=&(s.expected_size);
579 lp[5].target=&(virtstyle);
580 lp[6].target=lp[7].target=lp[8].target=
581 lp[9].target=lp[10].target=&(s.flags);
582 /* After the [generic] group, start parsing exports */
587 for(j=0;j<p_size;j++) {
588 g_assert(p[j].target != NULL);
589 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
592 *((gint*)p[j].target) =
593 g_key_file_get_integer(cfile,
599 *((gchar**)p[j].target) =
600 g_key_file_get_string(cfile,
606 value = g_key_file_get_boolean(cfile,
608 p[j].paramname, &err);
611 *((gint*)p[j].target) |= p[j].flagval;
613 *((gint*)p[j].target) &= ~(p[j].flagval);
619 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
621 /* Ignore not-found error for optional values */
625 err_msg = MISSING_REQUIRED_ERROR;
628 err_msg = DEFAULT_ERROR;
630 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
631 g_array_free(retval, TRUE);
633 g_key_file_free(cfile);
638 if(!strncmp(virtstyle, "none", 4)) {
639 s.virtstyle=VIRT_NONE;
640 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
641 s.virtstyle=VIRT_IPLIT;
642 } else if(!strncmp(virtstyle, "iphash", 6)) {
643 s.virtstyle=VIRT_IPHASH;
644 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
645 s.virtstyle=VIRT_CIDR;
646 if(strlen(virtstyle)<10) {
647 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
648 g_array_free(retval, TRUE);
649 g_key_file_free(cfile);
652 s.cidrlen=strtol(virtstyle+8, NULL, 0);
654 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
655 g_array_free(retval, TRUE);
656 g_key_file_free(cfile);
660 s.virtstyle=VIRT_IPLIT;
662 /* Don't need to free this, it's not our string */
664 /* Don't append values for the [generic] group */
666 g_array_append_val(retval, s);
673 * Signal handler for SIGCHLD
674 * @param s the signal we're handling (must be SIGCHLD, or something
677 void sigchld_handler(int s) {
682 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
683 if(WIFEXITED(&status)) {
684 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
686 i=g_hash_table_lookup(children, &pid);
688 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
690 DEBUG2("Removing %d from the list of children", pid);
691 g_hash_table_remove(children, &pid);
697 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
700 * @param value the value corresponding to the above key
701 * @param user_data a pointer which we always set to 1, so that we know what
704 void killchild(gpointer key, gpointer value, gpointer user_data) {
706 int *parent=user_data;
713 * Handle SIGTERM and dispatch it to our children
714 * @param s the signal we're handling (must be SIGTERM, or something
715 * is severely wrong).
717 void sigterm_handler(int s) {
720 g_hash_table_foreach(children, killchild, &parent);
730 * Detect the size of a file.
732 * @param fhandle An open filedescriptor
733 * @return the size of the file, or OFFT_MAX if detection was
736 off_t size_autodetect(int fhandle) {
739 struct stat stat_buf;
742 #ifdef HAVE_SYS_MOUNT_H
743 #ifdef HAVE_SYS_IOCTL_H
745 DEBUG("looking for fhandle size with ioctl BLKGETSIZE\n");
746 if (!ioctl(fhandle, BLKGETSIZE, &es32) && es32) {
747 es = (off_t)es32 * (off_t)512;
750 #endif /* BLKGETSIZE */
751 #endif /* HAVE_SYS_IOCTL_H */
752 #endif /* HAVE_SYS_MOUNT_H */
754 DEBUG("looking for fhandle size with fstat\n");
755 stat_buf.st_size = 0;
756 error = fstat(fhandle, &stat_buf);
758 if(stat_buf.st_size > 0)
759 return (off_t)stat_buf.st_size;
761 err("fstat failed: %m");
764 DEBUG("looking for fhandle size with lseek SEEK_END\n");
765 es = lseek(fhandle, (off_t)0, SEEK_END);
766 if (es > ((off_t)0)) {
769 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
772 err("Could not find size of exported block device: %m");
777 * Get the file handle and offset, given an export offset.
779 * @param export An array of export files
780 * @param a The offset to get corresponding file/offset for
781 * @param fhandle [out] File descriptor
782 * @param foffset [out] Offset into fhandle
783 * @param maxbytes [out] Tells how many bytes can be read/written
784 * from fhandle starting at foffset (0 if there is no limit)
785 * @return 0 on success, -1 on failure
787 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
788 /* Negative offset not allowed */
792 /* Binary search for last file with starting offset <= a */
795 int end = export->len - 1;
796 while( start <= end ) {
797 int mid = (start + end) / 2;
798 fi = g_array_index(export, FILE_INFO, mid);
799 if( fi.startoff < a ) {
801 } else if( fi.startoff > a ) {
809 /* end should never go negative, since first startoff is 0 and a >= 0 */
812 fi = g_array_index(export, FILE_INFO, end);
813 *fhandle = fi.fhandle;
814 *foffset = a - fi.startoff;
816 if( end+1 < export->len ) {
817 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
818 *maxbytes = fi_next.startoff - a;
825 * seek to a position in a file, with error handling.
826 * @param handle a filedescriptor
827 * @param a position to seek to
828 * @todo get rid of this; lastpoint is a global variable right now, but it
829 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
832 void myseek(int handle,off_t a) {
833 if (lseek(handle, a, SEEK_SET) < 0) {
834 err("Can not seek locally!\n");
839 * Write an amount of bytes at a given offset to the right file. This
840 * abstracts the write-side of the multiple file option.
842 * @param a The offset where the write should start
843 * @param buf The buffer to write from
844 * @param len The length of buf
845 * @param client The client we're serving for
846 * @return The number of bytes actually written, or -1 in case of an error
848 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
853 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
855 if(maxbytes && len > maxbytes)
858 DEBUG4("(WRITE to fd %d offset %Lu len %u), ", fhandle, foffset, len);
860 myseek(fhandle, foffset);
861 return write(fhandle, buf, len);
865 * Call rawexpwrite repeatedly until all data has been written.
866 * @return 0 on success, nonzero on failure
868 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
871 while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
876 return (ret < 0 || len != 0);
880 * Read an amount of bytes at a given offset from the right file. This
881 * abstracts the read-side of the multiple files option.
883 * @param a The offset where the read should start
884 * @param buf A buffer to read into
885 * @param len The size of buf
886 * @param client The client we're serving for
887 * @return The number of bytes actually read, or -1 in case of an
890 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
895 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
897 if(maxbytes && len > maxbytes)
900 DEBUG4("(READ from fd %d offset %Lu len %u), ", fhandle, foffset, len);
902 myseek(fhandle, foffset);
903 return read(fhandle, buf, len);
907 * Call rawexpread repeatedly until all data has been read.
908 * @return 0 on success, nonzero on failure
910 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
913 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
918 return (ret < 0 || len != 0);
922 * Read an amount of bytes at a given offset from the right file. This
923 * abstracts the read-side of the copyonwrite stuff, and calls
924 * rawexpread() with the right parameters to do the actual work.
925 * @param a The offset where the read should start
926 * @param buf A buffer to read into
927 * @param len The size of buf
928 * @param client The client we're going to read for
929 * @return 0 on success, nonzero on failure
931 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
933 off_t mapcnt, mapl, maph, pagestart;
935 if (!(client->server->flags & F_COPYONWRITE))
936 return(rawexpread_fully(a, buf, len, client));
937 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
939 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
941 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
942 pagestart=mapcnt*DIFFPAGESIZE;
944 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
945 len : (size_t)DIFFPAGESIZE-offset;
946 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
947 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
948 (unsigned long)(client->difmap[mapcnt]));
949 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
950 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
951 } else { /* the block is not there */
952 DEBUG2("Page %Lu is not here, we read the original one\n",
953 (unsigned long long)mapcnt);
954 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
956 len-=rdlen; a+=rdlen; buf+=rdlen;
962 * Write an amount of bytes at a given offset to the right file. This
963 * abstracts the write-side of the copyonwrite option, and calls
964 * rawexpwrite() with the right parameters to do the actual work.
966 * @param a The offset where the write should start
967 * @param buf The buffer to write from
968 * @param len The length of buf
969 * @param client The client we're going to write for.
970 * @return 0 on success, nonzero on failure
972 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
973 char pagebuf[DIFFPAGESIZE];
974 off_t mapcnt,mapl,maph;
979 if (!(client->server->flags & F_COPYONWRITE))
980 return(rawexpwrite_fully(a, buf, len, client));
981 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
983 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
985 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
986 pagestart=mapcnt*DIFFPAGESIZE ;
988 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
989 len : (size_t)DIFFPAGESIZE-offset;
991 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
992 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
993 (unsigned long)(client->difmap[mapcnt])) ;
994 myseek(client->difffile,
995 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
996 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
997 } else { /* the block is not there */
998 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
999 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1000 DEBUG3("Page %Lu is not here, we put it at %lu\n",
1001 (unsigned long long)mapcnt,
1002 (unsigned long)(client->difmap[mapcnt]));
1003 rdlen=DIFFPAGESIZE ;
1004 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1006 memcpy(pagebuf+offset,buf,wrlen) ;
1007 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1011 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1017 * Do the initial negotiation.
1019 * @param client The client we're negotiating with.
1021 void negotiate(CLIENT *client) {
1025 memset(zeros, '\0', 290);
1026 if (write(client->net, INIT_PASSWD, 8) < 0)
1027 err("Negotiation failed: %m");
1028 cliserv_magic = htonll(cliserv_magic);
1029 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
1030 err("Negotiation failed: %m");
1031 size_host = htonll((u64)(client->exportsize));
1032 if (write(client->net, &size_host, 8) < 0)
1033 err("Negotiation failed: %m");
1034 if (write(client->net, zeros, 128) < 0)
1035 err("Negotiation failed: %m");
1038 /** sending macro. */
1039 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1041 #define ERROR(client,reply) { reply.error = htonl(-1); SEND(client->net,reply); reply.error = 0; }
1043 * Serve a file to a single client.
1045 * @todo This beast needs to be split up in many tiny little manageable
1046 * pieces. Preferably with a chainsaw.
1048 * @param client The client we're going to serve to.
1051 int mainloop(CLIENT *client) {
1052 struct nbd_request request;
1053 struct nbd_reply reply;
1054 gboolean go_on=TRUE;
1059 DEBUG("Entering request loop!\n");
1060 reply.magic = htonl(NBD_REPLY_MAGIC);
1069 if (client->server->timeout)
1070 alarm(client->server->timeout);
1071 readit(client->net, &request, sizeof(request));
1072 request.from = ntohll(request.from);
1073 request.type = ntohl(request.type);
1075 if (request.type==NBD_CMD_DISC) {
1076 msg2(LOG_INFO, "Disconnect request received.");
1077 if (client->server->flags & F_COPYONWRITE) {
1078 if (client->difmap) g_free(client->difmap) ;
1079 close(client->difffile);
1080 unlink(client->difffilename);
1081 free(client->difffilename);
1087 len = ntohl(request.len);
1089 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1090 err("Not enough magic.");
1091 if (len > BUFSIZE + sizeof(struct nbd_reply))
1092 err("Request too big!");
1094 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
1095 "READ", (unsigned long long)request.from,
1096 (unsigned long long)request.from / 512, len);
1098 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1099 if ((request.from + len) > (OFFT_MAX)) {
1100 DEBUG("[Number too large!]");
1101 ERROR(client, reply);
1105 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1107 ERROR(client, reply);
1111 if (request.type==NBD_CMD_WRITE) {
1112 DEBUG("wr: net->buf, ");
1113 readit(client->net, buf, len);
1114 DEBUG("buf->exp, ");
1115 if ((client->server->flags & F_READONLY) ||
1116 (client->server->flags & F_AUTOREADONLY)) {
1117 DEBUG("[WRITE to READONLY!]");
1118 ERROR(client, reply);
1121 if (expwrite(request.from, buf, len, client)) {
1122 DEBUG("Write failed: %m" );
1123 ERROR(client, reply);
1126 SEND(client->net, reply);
1132 DEBUG("exp->buf, ");
1133 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1134 DEBUG("Read failed: %m");
1135 ERROR(client, reply);
1139 DEBUG("buf->net, ");
1140 memcpy(buf, &reply, sizeof(struct nbd_reply));
1141 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1148 * Set up client export array, which is an array of FILE_INFO.
1149 * Also, split a single exportfile into multiple ones, if that was asked.
1150 * @param client information on the client which we want to setup export for
1152 void setupexport(CLIENT* client) {
1154 off_t laststartoff = 0, lastsize = 0;
1155 int multifile = (client->server->flags & F_MULTIFILE);
1157 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1159 /* If multi-file, open as many files as we can.
1160 * If not, open exactly one file.
1161 * Calculate file sizes as we go to get total size. */
1165 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1168 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1170 tmpname=g_strdup(client->exportname);
1172 DEBUG2( "Opening %s\n", tmpname );
1173 fi.fhandle = open(tmpname, mode);
1174 if(fi.fhandle == -1 && mode == O_RDWR) {
1175 /* Try again because maybe media was read-only */
1176 fi.fhandle = open(tmpname, O_RDONLY);
1177 if(fi.fhandle != -1) {
1178 client->server->flags |= F_AUTOREADONLY;
1179 client->server->flags |= F_READONLY;
1182 if(fi.fhandle == -1) {
1183 if(multifile && i>0)
1185 err("Could not open exported file: %m");
1187 fi.startoff = laststartoff + lastsize;
1188 g_array_append_val(client->export, fi);
1191 /* Starting offset and size of this file will be used to
1192 * calculate starting offset of next file */
1193 laststartoff = fi.startoff;
1194 lastsize = size_autodetect(fi.fhandle);
1200 /* Set export size to total calculated size */
1201 client->exportsize = laststartoff + lastsize;
1203 /* Export size may be overridden */
1204 if(client->server->expected_size) {
1205 /* desired size must be <= total calculated size */
1206 if(client->server->expected_size > client->exportsize) {
1207 err("Size of exported file is too big\n");
1210 client->exportsize = client->server->expected_size;
1213 msg3(LOG_INFO, "Size of exported file/device is %Lu", (unsigned long long)client->exportsize);
1215 msg3(LOG_INFO, "Total number of files: %d", i);
1219 int copyonwrite_prepare(CLIENT* client) {
1221 if ((client->difffilename = malloc(1024))==NULL)
1222 err("Failed to allocate string for diff file name");
1223 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1225 client->difffilename[1023]='\0';
1226 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1227 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1228 if (client->difffile<0) err("Could not create diff file (%m)") ;
1229 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1230 err("Could not allocate memory") ;
1231 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1237 * Serve a connection.
1239 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1240 * follow the road map.
1242 * @param client a connected client
1244 void serveconnection(CLIENT *client) {
1245 setupexport(client);
1247 if (client->server->flags & F_COPYONWRITE) {
1248 copyonwrite_prepare(client);
1251 setmysockopt(client->net);
1257 * Find the name of the file we have to serve. This will use g_strdup_printf
1258 * to put the IP address of the client inside a filename containing
1259 * "%s" (in the form as specified by the "virtstyle" option). That name
1260 * is then written to client->exportname.
1262 * @param net A socket connected to an nbd client
1263 * @param client information about the client. The IP address in human-readable
1264 * format will be written to a new char* buffer, the address of which will be
1265 * stored in client->clientname.
1267 void set_peername(int net, CLIENT *client) {
1268 struct sockaddr_in addrin;
1269 struct sockaddr_in netaddr;
1270 size_t addrinlen = sizeof( addrin );
1276 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1277 err("getsockname failed: %m");
1278 peername = g_strdup(inet_ntoa(addrin.sin_addr));
1279 switch(client->server->virtstyle) {
1281 client->exportname=g_strdup(client->server->exportname);
1284 for(i=0;i<strlen(peername);i++) {
1285 if(peername[i]=='.') {
1290 client->exportname=g_strdup_printf(client->server->exportname, peername);
1293 memcpy(&netaddr, &addrin, addrinlen);
1294 netaddr.sin_addr.s_addr>>=32-(client->server->cidrlen);
1295 netaddr.sin_addr.s_addr<<=32-(client->server->cidrlen);
1296 netname = inet_ntoa(netaddr.sin_addr);
1297 tmp=g_strdup_printf("%s/%s", netname, peername);
1298 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1303 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1304 peername, client->exportname);
1305 client->clientname=g_strdup(peername);
1310 * @param data a pointer to pid_t which should be freed
1312 void destroy_pid_t(gpointer data) {
1317 * Go daemon (unless we specified at compile time that we didn't want this)
1318 * @param serve the first server of our configuration. If its port is zero,
1319 * then do not daemonize, because we're doing inetd then. This parameter
1320 * is only used to create a PID file of the form
1321 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1323 #if !defined(NODAEMON) && !defined(NOFORK)
1324 void daemonize(SERVER* serve) {
1330 if(!*pidftemplate) {
1332 strncpy(pidftemplate, "/var/run/server.%d.pid", 255);
1334 strncpy(pidftemplate, "/var/run/server.pid", 255);
1337 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1338 pidf=fopen(pidfname, "w");
1340 fprintf(pidf,"%d\n", (int)getpid());
1344 fprintf(stderr, "Not fatal; continuing");
1348 #define daemonize(serve)
1349 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1352 * Connect a server's socket.
1354 * @param serve the server we want to connect.
1356 void setup_serve(SERVER *serve) {
1357 struct sockaddr_in addrin;
1358 struct sigaction sa;
1359 int addrinlen = sizeof(addrin);
1366 if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
1369 /* lose the pesky "Address already in use" error message */
1370 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1371 err("setsockopt SO_REUSEADDR");
1373 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1374 err("setsockopt SO_KEEPALIVE");
1377 /* make the listening socket non-blocking */
1378 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1379 err("fcntl F_GETFL");
1381 if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1382 err("fcntl F_SETFL O_NONBLOCK on server socket");
1385 DEBUG("Waiting for connections... bind, ");
1386 addrin.sin_family = AF_INET;
1387 addrin.sin_port = htons(serve->port);
1388 addrin.sin_addr.s_addr = 0;
1389 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1392 if (listen(serve->socket, 1) < 0)
1394 sa.sa_handler = sigchld_handler;
1395 sigemptyset(&sa.sa_mask);
1396 sa.sa_flags = SA_RESTART;
1397 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1398 err("sigaction: %m");
1399 sa.sa_handler = sigterm_handler;
1400 sigemptyset(&sa.sa_mask);
1401 sa.sa_flags = SA_RESTART;
1402 if(sigaction(SIGTERM, &sa, NULL) == -1)
1403 err("sigaction: %m");
1407 * Connect our servers.
1409 void setup_servers(GArray* servers) {
1412 for(i=0;i<servers->len;i++) {
1413 setup_serve(&(g_array_index(servers, SERVER, i)));
1415 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1419 * Loop through the available servers, and serve them.
1421 int serveloop(GArray* servers) {
1422 struct sockaddr_in addrin;
1423 socklen_t addrinlen=sizeof(addrin);
1433 * Set up the master fd_set. The set of descriptors we need
1434 * to select() for never changes anyway and it buys us a *lot*
1435 * of time to only build this once. However, if we ever choose
1436 * to not fork() for clients anymore, we may have to revisit
1441 for(i=0;i<servers->len;i++) {
1442 sock=(g_array_index(servers, SERVER, i)).socket;
1443 FD_SET(sock, &mset);
1444 max=sock>max?sock:max;
1451 memcpy(&rset, &mset, sizeof(fd_set));
1454 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1456 for(i=0;i<servers->len;i++) {
1457 serve=&(g_array_index(servers, SERVER, i));
1458 if(FD_ISSET(serve->socket, &rset)) {
1461 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1464 client = g_malloc(sizeof(CLIENT));
1465 client->server=serve;
1466 client->exportsize=OFFT_MAX;
1467 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1468 err("fcntl F_GETFL");
1470 if (fcntl(net, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1471 err("fcntl F_SETFL O_NONBLOCK on client socket");
1474 set_peername(net, client);
1475 if (!authorized_client(client)) {
1476 msg2(LOG_INFO,"Unauthorized client") ;
1480 msg2(LOG_INFO,"Authorized client") ;
1481 pid=g_malloc(sizeof(pid_t));
1483 if ((*pid=fork())<0) {
1484 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1488 if (*pid>0) { /* parent */
1490 g_hash_table_insert(children, pid, pid);
1494 g_hash_table_destroy(children);
1495 for(i=0;i<servers->len,serve=(g_array_index(servers, SERVER*, i));i++) {
1496 close(serve->socket);
1498 /* FALSE does not free the
1499 actual data. This is required,
1500 because the client has a
1501 direct reference into that
1502 data, and otherwise we get a
1504 g_array_free(servers, FALSE);
1506 msg2(LOG_INFO,"Starting to serve");
1507 serveconnection(client);
1515 * Set up user-ID and/or group-ID
1517 void dousers(void) {
1521 pw=getpwnam(runuser);
1522 if(setuid(pw->pw_uid)<0)
1523 msg3(LOG_DEBUG, "Could not set UID: %s", strerror(errno));
1526 gr=getgrnam(rungroup);
1527 if(setgid(gr->gr_gid)<0)
1528 msg3(LOG_DEBUG, "Could not set GID: %s", strerror(errno));
1533 * Main entry point...
1535 int main(int argc, char *argv[]) {
1540 if (sizeof( struct nbd_request )!=28) {
1541 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1545 memset(pidftemplate, '\0', 256);
1548 config_file_pos = g_strdup(CFILE);
1549 serve=cmdline(argc, argv);
1550 servers = parse_cfile(config_file_pos, &err);
1551 if(!servers || !servers->len) {
1552 g_warning("Could not parse config file: %s", err->message);
1555 g_array_append_val(servers, *serve);
1558 /* We don't support this at this time */
1560 if (!(serve->port)) {
1563 /* You really should define ISSERVER if you're going to use
1564 * inetd mode, but if you don't, closing stdout and stderr
1565 * (which inetd had connected to the client socket) will let it
1569 open("/dev/null", O_WRONLY);
1570 open("/dev/null", O_WRONLY);
1572 client=g_malloc(sizeof(CLIENT));
1573 client->server=serve;
1575 client->exportsize=OFFT_MAX;
1576 set_peername(0,client);
1577 serveconnection(client);
1581 if((!serve) && (!servers||!servers->len)) {
1582 g_message("Nothing to do! Bye!");
1586 setup_servers(servers);