2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/select.h> /* select */
62 #include <sys/wait.h> /* wait */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
66 #include <sys/param.h>
67 #ifdef HAVE_SYS_MOUNT_H
68 #include <sys/mount.h> /* For BLKGETSIZE */
70 #include <signal.h> /* sigaction */
71 #include <netinet/tcp.h>
72 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
73 #include <netdb.h> /* hostent, gethostby*, getservby* */
80 #include <arpa/inet.h>
90 /* used in cliserv.h, so must come first */
91 #define MY_NAME "nbd_server"
94 /** Default position of the config file */
96 #define SYSCONFDIR "/etc"
98 #define CFILE SYSCONFDIR "/nbd-server/config"
100 /** Where our config file actually is */
101 gchar* config_file_pos;
103 /** What user we're running as */
105 /** What group we're running as */
106 gchar* rungroup=NULL;
108 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
110 #define msg2(a,b) syslog(a,b)
111 #define msg3(a,b,c) syslog(a,b,c)
112 #define msg4(a,b,c,d) syslog(a,b,c,d)
114 #define msg2(a,b) g_message(b)
115 #define msg3(a,b,c) g_message(b,c)
116 #define msg4(a,b,c,d) g_message(b,c,d)
119 /* Debugging macros */
122 #define DEBUG( a ) printf( a )
123 #define DEBUG2( a,b ) printf( a,b )
124 #define DEBUG3( a,b,c ) printf( a,b,c )
125 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
128 #define DEBUG2( a,b )
129 #define DEBUG3( a,b,c )
130 #define DEBUG4( a,b,c,d )
132 #ifndef PACKAGE_VERSION
133 #define PACKAGE_VERSION ""
136 * The highest value a variable of type off_t can reach. This is a signed
137 * integer, so set all bits except for the leftmost one.
139 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
140 #define LINELEN 256 /**< Size of static buffer used to read the
141 authorization file (yuck) */
142 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
143 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
144 #define F_READONLY 1 /**< flag to tell us a file is readonly */
145 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
146 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
148 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
150 GHashTable *children;
151 char pidfname[256]; /**< name of our PID file */
152 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
153 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
156 * Types of virtuatlization
159 VIRT_NONE=0, /**< No virtualization */
160 VIRT_IPLIT, /**< Literal IP address as part of the filename */
161 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
162 doing the same as in IPLIT */
163 VIRT_CIDR, /**< Every subnet in its own directory */
167 * Variables associated with a server.
170 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
171 off_t expected_size; /**< size of the exported file as it was told to
172 us through configuration */
173 unsigned int port; /**< port we're exporting this file at */
174 char* authname; /**< filename of the authorization file */
175 int flags; /**< flags associated with this exported file */
176 unsigned int timeout;/**< how long a connection may be idle
178 int socket; /**< The socket of this server. */
179 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
180 uint8_t cidrlen; /**< The length of the mask when we use
181 CIDR-style virtualization */
182 gchar* prerun; /**< command to be ran after connecting a client,
183 but before starting to serve */
184 gchar* postrun; /**< command that will be ran after the client
189 * Variables associated with a client socket.
192 int fhandle; /**< file descriptor */
193 off_t startoff; /**< starting offset of this file */
197 off_t exportsize; /**< size of the file we're exporting */
198 char *clientname; /**< peer */
199 char *exportname; /**< (processed) filename of the file we're exporting */
200 GArray *export; /**< array of FILE_INFO of exported files;
201 array size is always 1 unless we're
202 doing the multiple file option */
203 int net; /**< The actual client socket */
204 SERVER *server; /**< The server this client is getting data from */
205 char* difffilename; /**< filename of the copy-on-write file, if any */
206 int difffile; /**< filedescriptor of copyonwrite file. @todo
207 shouldn't this be an array too? (cfr export) Or
208 make -m and -c mutually exclusive */
209 u32 difffilelen; /**< number of pages in difffile */
210 u32 *difmap; /**< see comment on the global difmap for this one */
214 * Type of configuration file values
217 PARAM_INT, /**< This parameter is an integer */
218 PARAM_STRING, /**< This parameter is a string */
219 PARAM_BOOL, /**< This parameter is a boolean */
223 * Configuration file values
226 gchar *paramname; /**< Name of the parameter, as it appears in
228 gboolean required; /**< Whether this is a required (as opposed to
229 optional) parameter */
230 PARAM_TYPE ptype; /**< Type of the parameter. */
231 gpointer target; /**< Pointer to where the data of this
232 parameter should be written. If ptype is
233 PARAM_BOOL, the data is or'ed rather than
235 gint flagval; /**< Flag mask for this parameter in case ptype
240 * Check whether a client is allowed to connect. Works with an authorization
241 * file which contains one line per machine, no wildcards.
243 * @param opts The client who's trying to connect.
244 * @return 0 - authorization refused, 1 - OK
246 int authorized_client(CLIENT *opts) {
247 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
252 struct in_addr client;
253 struct in_addr cltemp;
256 if ((f=fopen(opts->server->authname,"r"))==NULL) {
257 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
258 opts->server->authname,strerror(errno)) ;
262 inet_aton(opts->clientname, &client);
263 while (fgets(line,LINELEN,f)!=NULL) {
264 if((tmp=index(line, '/'))) {
265 if(strlen(line)<=tmp-line) {
266 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
270 if(inet_aton(line,&addr)) {
271 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
274 len=strtol(tmp, NULL, 0);
275 addr.s_addr>>=32-len;
276 addr.s_addr<<=32-len;
277 memcpy(&cltemp,&client,sizeof(client));
278 cltemp.s_addr>>=32-len;
279 cltemp.s_addr<<=32-len;
280 if(addr.s_addr == cltemp.s_addr) {
284 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
294 * Read data from a file descriptor into a buffer
296 * @param f a file descriptor
297 * @param buf a buffer
298 * @param len the number of bytes to be read
300 inline void readit(int f, void *buf, size_t len) {
304 if ((res = read(f, buf, len)) <= 0)
305 err("Read failed: %m");
312 * Write data from a buffer into a filedescriptor
314 * @param f a file descriptor
315 * @param buf a buffer containing data
316 * @param len the number of bytes to be written
318 inline void writeit(int f, void *buf, size_t len) {
322 if ((res = write(f, buf, len)) <= 0)
323 err("Send failed: %m");
330 * Print out a message about how to use nbd-server. Split out to a separate
331 * function so that we can call it from multiple places
334 printf("This is nbd-server version " VERSION "\n");
335 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file] [-p PID file name] [-o section name]\n"
336 "\t-r|--read-only\t\tread only\n"
337 "\t-m|--multi-file\t\tmultiple file\n"
338 "\t-c|--copy-on-write\tcopy on write\n"
339 "\t-C|--config-file\tspecify an alternate configuration file\n"
340 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
341 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n"
342 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
343 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
344 "\tif port is set to 0, stdin is used (for running from inetd)\n"
345 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
346 "\t\taddress of the machine trying to connect\n" );
347 printf("Using configuration file %s\n", CFILE);
350 /* Dumps a config file section of the given SERVER*, and exits. */
351 void dump_section(SERVER* serve, gchar* section_header) {
352 printf("[%s]\n", section_header);
353 printf("\texportname = %s\n", serve->exportname);
354 printf("\tport = %d\n", serve->port);
355 if(serve->flags & F_READONLY) {
356 printf("\treadonly = true\n");
358 if(serve->flags & F_MULTIFILE) {
359 printf("\tmultifile = true\n");
361 if(serve->flags & F_COPYONWRITE) {
362 printf("\tcopyonwrite = true\n");
364 if(serve->expected_size) {
365 printf("\tfilesize = %Ld\n", (long long int)serve->expected_size);
367 if(serve->authname) {
368 printf("\tauthfile = %s\n", serve->authname);
371 printf("\ttimeout = %d\n", serve->timeout);
377 * Parse the command line.
379 * @param argc the argc argument to main()
380 * @param argv the argv argument to main()
382 SERVER* cmdline(int argc, char *argv[]) {
386 struct option long_options[] = {
387 {"read-only", no_argument, NULL, 'r'},
388 {"multi-file", no_argument, NULL, 'm'},
389 {"copy-on-write", no_argument, NULL, 'c'},
390 {"authorize-file", required_argument, NULL, 'l'},
391 {"idle-time", required_argument, NULL, 'a'},
392 {"config-file", required_argument, NULL, 'C'},
393 {"pid-file", required_argument, NULL, 'p'},
394 {"output-config", required_argument, NULL, 'o'},
401 gboolean do_output=FALSE;
402 gchar* section_header;
407 serve=g_new0(SERVER, 1);
408 serve->authname = g_strdup(default_authname);
409 while((c=getopt_long(argc, argv, "-a:C:cl:mo:rp:", long_options, &i))>=0) {
412 /* non-option argument */
413 switch(nonspecial++) {
415 serve->port=strtol(optarg, NULL, 0);
418 serve->exportname = g_strdup(optarg);
419 if(serve->exportname[0] != '/') {
420 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
425 last=strlen(optarg)-1;
427 if (suffix == 'k' || suffix == 'K' ||
428 suffix == 'm' || suffix == 'M')
430 es = (off_t)atol(optarg);
438 serve->expected_size = es;
443 serve->flags |= F_READONLY;
446 serve->flags |= F_MULTIFILE;
450 section_header = g_strdup(optarg);
453 strncpy(pidftemplate, optarg, 256);
456 serve->flags |=F_COPYONWRITE;
459 g_free(config_file_pos);
460 config_file_pos=g_strdup(optarg);
463 g_free(serve->authname);
464 serve->authname=g_strdup(optarg);
467 serve->timeout=strtol(optarg, NULL, 0);
475 /* What's left: the port to export, the name of the to be exported
476 * file, and, optionally, the size of the file, in that order. */
483 g_critical("Need a complete configuration on the command line to output a config file section!");
486 dump_section(serve, section_header);
492 * Error codes for config file parsing
495 CFILE_NOTFOUND, /**< The configuration file is not found */
496 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
497 CFILE_KEY_MISSING, /**< A (required) key is missing */
498 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
499 CFILE_PROGERR /**< Programmer error */
503 * Remove a SERVER from memory. Used from the hash table
505 void remove_server(gpointer s) {
509 g_free(server->exportname);
511 g_free(server->authname);
516 * Parse the config file.
518 * @param f the name of the config file
519 * @param e a GError. @see CFILE_ERRORS for what error values this function can
521 * @return a Array of SERVER* pointers, If the config file is empty or does not
522 * exist, returns an empty GHashTable; if the config file contains an
523 * error, returns NULL, and e is set appropriately
525 GArray* parse_cfile(gchar* f, GError** e) {
526 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
527 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
529 gchar *virtstyle=NULL;
531 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
532 { "port", TRUE, PARAM_INT, NULL, 0 },
533 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
534 { "timeout", FALSE, PARAM_INT, NULL, 0 },
535 { "filesize", FALSE, PARAM_INT, NULL, 0 },
536 { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
537 { "prerun", FALSE, PARAM_STRING, NULL, 0 },
538 { "postrun", FALSE, PARAM_STRING, NULL, 0 },
539 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
540 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
541 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
542 { "autoreadonly", FALSE, PARAM_BOOL, NULL, F_AUTOREADONLY },
543 { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
545 const int lp_size=11;
547 { "user", FALSE, PARAM_STRING, &runuser, 0 },
548 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
554 const char *err_msg=NULL;
562 errdomain = g_quark_from_string("parse_cfile");
563 cfile = g_key_file_new();
564 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
565 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
566 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
567 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
568 g_key_file_free(cfile);
571 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
572 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
573 g_key_file_free(cfile);
576 groups = g_key_file_get_groups(cfile, NULL);
577 for(i=0;groups[i];i++) {
578 memset(&s, '\0', sizeof(SERVER));
579 lp[0].target=&(s.exportname);
580 lp[1].target=&(s.port);
581 lp[2].target=&(s.authname);
582 lp[3].target=&(s.timeout);
583 lp[4].target=&(s.expected_size);
584 lp[5].target=&(virtstyle);
585 lp[6].target=&(s.prerun);
586 lp[7].target=&(s.postrun);
587 lp[8].target=lp[9].target=lp[10].target=
588 lp[11].target=lp[12].target=&(s.flags);
590 /* After the [generic] group, start parsing exports */
595 for(j=0;j<p_size;j++) {
596 g_assert(p[j].target != NULL);
597 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
600 *((gint*)p[j].target) =
601 g_key_file_get_integer(cfile,
607 *((gchar**)p[j].target) =
608 g_key_file_get_string(cfile,
614 value = g_key_file_get_boolean(cfile,
616 p[j].paramname, &err);
619 *((gint*)p[j].target) |= p[j].flagval;
621 *((gint*)p[j].target) &= ~(p[j].flagval);
627 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
629 /* Ignore not-found error for optional values */
633 err_msg = MISSING_REQUIRED_ERROR;
636 err_msg = DEFAULT_ERROR;
638 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
639 g_array_free(retval, TRUE);
641 g_key_file_free(cfile);
646 if(!strncmp(virtstyle, "none", 4)) {
647 s.virtstyle=VIRT_NONE;
648 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
649 s.virtstyle=VIRT_IPLIT;
650 } else if(!strncmp(virtstyle, "iphash", 6)) {
651 s.virtstyle=VIRT_IPHASH;
652 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
653 s.virtstyle=VIRT_CIDR;
654 if(strlen(virtstyle)<10) {
655 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
656 g_array_free(retval, TRUE);
657 g_key_file_free(cfile);
660 s.cidrlen=strtol(virtstyle+8, NULL, 0);
662 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
663 g_array_free(retval, TRUE);
664 g_key_file_free(cfile);
668 s.virtstyle=VIRT_IPLIT;
670 /* Don't need to free this, it's not our string */
672 /* Don't append values for the [generic] group */
674 g_array_append_val(retval, s);
681 * Signal handler for SIGCHLD
682 * @param s the signal we're handling (must be SIGCHLD, or something
685 void sigchld_handler(int s) {
690 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
691 if(WIFEXITED(status)) {
692 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
694 i=g_hash_table_lookup(children, &pid);
696 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
698 DEBUG2("Removing %d from the list of children", pid);
699 g_hash_table_remove(children, &pid);
705 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
708 * @param value the value corresponding to the above key
709 * @param user_data a pointer which we always set to 1, so that we know what
712 void killchild(gpointer key, gpointer value, gpointer user_data) {
714 int *parent=user_data;
721 * Handle SIGTERM and dispatch it to our children
722 * @param s the signal we're handling (must be SIGTERM, or something
723 * is severely wrong).
725 void sigterm_handler(int s) {
728 g_hash_table_foreach(children, killchild, &parent);
738 * Detect the size of a file.
740 * @param fhandle An open filedescriptor
741 * @return the size of the file, or OFFT_MAX if detection was
744 off_t size_autodetect(int fhandle) {
746 unsigned long sectors;
747 struct stat stat_buf;
750 #ifdef HAVE_SYS_MOUNT_H
751 #ifdef HAVE_SYS_IOCTL_H
753 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
754 if (!ioctl(fhandle, BLKGETSIZE, §ors) && sectors) {
755 es = (off_t)sectors * (off_t)512;
758 #endif /* BLKGETSIZE */
759 #endif /* HAVE_SYS_IOCTL_H */
760 #endif /* HAVE_SYS_MOUNT_H */
762 DEBUG("looking for fhandle size with fstat\n");
763 stat_buf.st_size = 0;
764 error = fstat(fhandle, &stat_buf);
766 if(stat_buf.st_size > 0)
767 return (off_t)stat_buf.st_size;
769 err("fstat failed: %m");
772 DEBUG("looking for fhandle size with lseek SEEK_END\n");
773 es = lseek(fhandle, (off_t)0, SEEK_END);
774 if (es > ((off_t)0)) {
777 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
780 err("Could not find size of exported block device: %m");
785 * Get the file handle and offset, given an export offset.
787 * @param export An array of export files
788 * @param a The offset to get corresponding file/offset for
789 * @param fhandle [out] File descriptor
790 * @param foffset [out] Offset into fhandle
791 * @param maxbytes [out] Tells how many bytes can be read/written
792 * from fhandle starting at foffset (0 if there is no limit)
793 * @return 0 on success, -1 on failure
795 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
796 /* Negative offset not allowed */
800 /* Binary search for last file with starting offset <= a */
803 int end = export->len - 1;
804 while( start <= end ) {
805 int mid = (start + end) / 2;
806 fi = g_array_index(export, FILE_INFO, mid);
807 if( fi.startoff < a ) {
809 } else if( fi.startoff > a ) {
817 /* end should never go negative, since first startoff is 0 and a >= 0 */
820 fi = g_array_index(export, FILE_INFO, end);
821 *fhandle = fi.fhandle;
822 *foffset = a - fi.startoff;
824 if( end+1 < export->len ) {
825 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
826 *maxbytes = fi_next.startoff - a;
833 * seek to a position in a file, with error handling.
834 * @param handle a filedescriptor
835 * @param a position to seek to
836 * @todo get rid of this; lastpoint is a global variable right now, but it
837 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
840 void myseek(int handle,off_t a) {
841 if (lseek(handle, a, SEEK_SET) < 0) {
842 err("Can not seek locally!\n");
847 * Write an amount of bytes at a given offset to the right file. This
848 * abstracts the write-side of the multiple file option.
850 * @param a The offset where the write should start
851 * @param buf The buffer to write from
852 * @param len The length of buf
853 * @param client The client we're serving for
854 * @return The number of bytes actually written, or -1 in case of an error
856 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
861 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
863 if(maxbytes && len > maxbytes)
866 DEBUG4("(WRITE to fd %d offset %Lu len %u), ", fhandle, foffset, len);
868 myseek(fhandle, foffset);
869 return write(fhandle, buf, len);
873 * Call rawexpwrite repeatedly until all data has been written.
874 * @return 0 on success, nonzero on failure
876 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
879 while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
884 return (ret < 0 || len != 0);
888 * Read an amount of bytes at a given offset from the right file. This
889 * abstracts the read-side of the multiple files option.
891 * @param a The offset where the read should start
892 * @param buf A buffer to read into
893 * @param len The size of buf
894 * @param client The client we're serving for
895 * @return The number of bytes actually read, or -1 in case of an
898 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
903 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
905 if(maxbytes && len > maxbytes)
908 DEBUG4("(READ from fd %d offset %Lu len %u), ", fhandle, foffset, len);
910 myseek(fhandle, foffset);
911 return read(fhandle, buf, len);
915 * Call rawexpread repeatedly until all data has been read.
916 * @return 0 on success, nonzero on failure
918 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
921 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
926 return (ret < 0 || len != 0);
930 * Read an amount of bytes at a given offset from the right file. This
931 * abstracts the read-side of the copyonwrite stuff, and calls
932 * rawexpread() with the right parameters to do the actual work.
933 * @param a The offset where the read should start
934 * @param buf A buffer to read into
935 * @param len The size of buf
936 * @param client The client we're going to read for
937 * @return 0 on success, nonzero on failure
939 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
941 off_t mapcnt, mapl, maph, pagestart;
943 if (!(client->server->flags & F_COPYONWRITE))
944 return(rawexpread_fully(a, buf, len, client));
945 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
947 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
949 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
950 pagestart=mapcnt*DIFFPAGESIZE;
952 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
953 len : (size_t)DIFFPAGESIZE-offset;
954 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
955 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
956 (unsigned long)(client->difmap[mapcnt]));
957 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
958 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
959 } else { /* the block is not there */
960 DEBUG2("Page %Lu is not here, we read the original one\n",
961 (unsigned long long)mapcnt);
962 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
964 len-=rdlen; a+=rdlen; buf+=rdlen;
970 * Write an amount of bytes at a given offset to the right file. This
971 * abstracts the write-side of the copyonwrite option, and calls
972 * rawexpwrite() with the right parameters to do the actual work.
974 * @param a The offset where the write should start
975 * @param buf The buffer to write from
976 * @param len The length of buf
977 * @param client The client we're going to write for.
978 * @return 0 on success, nonzero on failure
980 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
981 char pagebuf[DIFFPAGESIZE];
982 off_t mapcnt,mapl,maph;
987 if (!(client->server->flags & F_COPYONWRITE))
988 return(rawexpwrite_fully(a, buf, len, client));
989 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
991 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
993 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
994 pagestart=mapcnt*DIFFPAGESIZE ;
996 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
997 len : (size_t)DIFFPAGESIZE-offset;
999 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1000 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
1001 (unsigned long)(client->difmap[mapcnt])) ;
1002 myseek(client->difffile,
1003 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1004 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1005 } else { /* the block is not there */
1006 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1007 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1008 DEBUG3("Page %Lu is not here, we put it at %lu\n",
1009 (unsigned long long)mapcnt,
1010 (unsigned long)(client->difmap[mapcnt]));
1011 rdlen=DIFFPAGESIZE ;
1012 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1014 memcpy(pagebuf+offset,buf,wrlen) ;
1015 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1019 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1025 * Do the initial negotiation.
1027 * @param client The client we're negotiating with.
1029 void negotiate(CLIENT *client) {
1032 u32 flags = NBD_FLAG_HAS_FLAGS;
1034 memset(zeros, '\0', sizeof(zeros));
1035 if (write(client->net, INIT_PASSWD, 8) < 0)
1036 err("Negotiation failed: %m");
1037 cliserv_magic = htonll(cliserv_magic);
1038 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
1039 err("Negotiation failed: %m");
1040 size_host = htonll((u64)(client->exportsize));
1041 if (write(client->net, &size_host, 8) < 0)
1042 err("Negotiation failed: %m");
1043 if (client->server->flags & F_READONLY)
1044 flags |= NBD_FLAG_READ_ONLY;
1045 flags = htonl(flags);
1046 if (write(client->net, &flags, 4) < 0)
1047 err("Negotiation failed: %m");
1048 if (write(client->net, zeros, 124) < 0)
1049 err("Negotiation failed: %m");
1052 /** sending macro. */
1053 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1055 #define ERROR(client,reply) { reply.error = htonl(-1); SEND(client->net,reply); reply.error = 0; }
1057 * Serve a file to a single client.
1059 * @todo This beast needs to be split up in many tiny little manageable
1060 * pieces. Preferably with a chainsaw.
1062 * @param client The client we're going to serve to.
1063 * @return when the client disconnects
1065 int mainloop(CLIENT *client) {
1066 struct nbd_request request;
1067 struct nbd_reply reply;
1068 gboolean go_on=TRUE;
1073 DEBUG("Entering request loop!\n");
1074 reply.magic = htonl(NBD_REPLY_MAGIC);
1083 if (client->server->timeout)
1084 alarm(client->server->timeout);
1085 readit(client->net, &request, sizeof(request));
1086 request.from = ntohll(request.from);
1087 request.type = ntohl(request.type);
1089 if (request.type==NBD_CMD_DISC) {
1090 msg2(LOG_INFO, "Disconnect request received.");
1091 if (client->server->flags & F_COPYONWRITE) {
1092 if (client->difmap) g_free(client->difmap) ;
1093 close(client->difffile);
1094 unlink(client->difffilename);
1095 free(client->difffilename);
1101 len = ntohl(request.len);
1103 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1104 err("Not enough magic.");
1105 if (len > BUFSIZE + sizeof(struct nbd_reply))
1106 err("Request too big!");
1108 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
1109 "READ", (unsigned long long)request.from,
1110 (unsigned long long)request.from / 512, len);
1112 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1113 if ((request.from + len) > (OFFT_MAX)) {
1114 DEBUG("[Number too large!]");
1115 ERROR(client, reply);
1119 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1121 ERROR(client, reply);
1125 if (request.type==NBD_CMD_WRITE) {
1126 DEBUG("wr: net->buf, ");
1127 readit(client->net, buf, len);
1128 DEBUG("buf->exp, ");
1129 if ((client->server->flags & F_READONLY) ||
1130 (client->server->flags & F_AUTOREADONLY)) {
1131 DEBUG("[WRITE to READONLY!]");
1132 ERROR(client, reply);
1135 if (expwrite(request.from, buf, len, client)) {
1136 DEBUG("Write failed: %m" );
1137 ERROR(client, reply);
1140 SEND(client->net, reply);
1146 DEBUG("exp->buf, ");
1147 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1148 DEBUG("Read failed: %m");
1149 ERROR(client, reply);
1153 DEBUG("buf->net, ");
1154 memcpy(buf, &reply, sizeof(struct nbd_reply));
1155 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1162 * Set up client export array, which is an array of FILE_INFO.
1163 * Also, split a single exportfile into multiple ones, if that was asked.
1164 * @param client information on the client which we want to setup export for
1166 void setupexport(CLIENT* client) {
1168 off_t laststartoff = 0, lastsize = 0;
1169 int multifile = (client->server->flags & F_MULTIFILE);
1171 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1173 /* If multi-file, open as many files as we can.
1174 * If not, open exactly one file.
1175 * Calculate file sizes as we go to get total size. */
1179 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1182 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1184 tmpname=g_strdup(client->exportname);
1186 DEBUG2( "Opening %s\n", tmpname );
1187 fi.fhandle = open(tmpname, mode);
1188 if(fi.fhandle == -1 && mode == O_RDWR) {
1189 /* Try again because maybe media was read-only */
1190 fi.fhandle = open(tmpname, O_RDONLY);
1191 if(fi.fhandle != -1) {
1192 client->server->flags |= F_AUTOREADONLY;
1193 client->server->flags |= F_READONLY;
1196 if(fi.fhandle == -1) {
1197 if(multifile && i>0)
1199 err("Could not open exported file: %m");
1201 fi.startoff = laststartoff + lastsize;
1202 g_array_append_val(client->export, fi);
1205 /* Starting offset and size of this file will be used to
1206 * calculate starting offset of next file */
1207 laststartoff = fi.startoff;
1208 lastsize = size_autodetect(fi.fhandle);
1214 /* Set export size to total calculated size */
1215 client->exportsize = laststartoff + lastsize;
1217 /* Export size may be overridden */
1218 if(client->server->expected_size) {
1219 /* desired size must be <= total calculated size */
1220 if(client->server->expected_size > client->exportsize) {
1221 err("Size of exported file is too big\n");
1224 client->exportsize = client->server->expected_size;
1227 msg3(LOG_INFO, "Size of exported file/device is %Lu", (unsigned long long)client->exportsize);
1229 msg3(LOG_INFO, "Total number of files: %d", i);
1233 int copyonwrite_prepare(CLIENT* client) {
1235 if ((client->difffilename = malloc(1024))==NULL)
1236 err("Failed to allocate string for diff file name");
1237 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1239 client->difffilename[1023]='\0';
1240 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1241 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1242 if (client->difffile<0) err("Could not create diff file (%m)") ;
1243 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1244 err("Could not allocate memory") ;
1245 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1251 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1254 * @param command the command to be ran. Read from the config file
1255 * @param file the file name we're about to export
1257 int do_run(gchar* command, gchar* file) {
1262 cmd = g_strdup_printf(command, file);
1270 * Serve a connection.
1272 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1273 * follow the road map.
1275 * @param client a connected client
1277 void serveconnection(CLIENT *client) {
1278 setupexport(client);
1280 if (client->server->flags & F_COPYONWRITE) {
1281 copyonwrite_prepare(client);
1284 setmysockopt(client->net);
1286 if(!do_run(client->server->prerun, client->exportname)) {
1289 do_run(client->server->postrun, client->exportname);
1293 * Find the name of the file we have to serve. This will use g_strdup_printf
1294 * to put the IP address of the client inside a filename containing
1295 * "%s" (in the form as specified by the "virtstyle" option). That name
1296 * is then written to client->exportname.
1298 * @param net A socket connected to an nbd client
1299 * @param client information about the client. The IP address in human-readable
1300 * format will be written to a new char* buffer, the address of which will be
1301 * stored in client->clientname.
1303 void set_peername(int net, CLIENT *client) {
1304 struct sockaddr_in addrin;
1305 struct sockaddr_in netaddr;
1306 size_t addrinlen = sizeof( addrin );
1312 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1313 err("getsockname failed: %m");
1314 peername = g_strdup(inet_ntoa(addrin.sin_addr));
1315 switch(client->server->virtstyle) {
1317 client->exportname=g_strdup(client->server->exportname);
1320 for(i=0;i<strlen(peername);i++) {
1321 if(peername[i]=='.') {
1326 client->exportname=g_strdup_printf(client->server->exportname, peername);
1329 memcpy(&netaddr, &addrin, addrinlen);
1330 netaddr.sin_addr.s_addr>>=32-(client->server->cidrlen);
1331 netaddr.sin_addr.s_addr<<=32-(client->server->cidrlen);
1332 netname = inet_ntoa(netaddr.sin_addr);
1333 tmp=g_strdup_printf("%s/%s", netname, peername);
1334 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1338 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1339 peername, client->exportname);
1340 client->clientname=g_strdup(peername);
1346 * @param data a pointer to pid_t which should be freed
1348 void destroy_pid_t(gpointer data) {
1353 * Go daemon (unless we specified at compile time that we didn't want this)
1354 * @param serve the first server of our configuration. If its port is zero,
1355 * then do not daemonize, because we're doing inetd then. This parameter
1356 * is only used to create a PID file of the form
1357 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1359 #if !defined(NODAEMON) && !defined(NOFORK)
1360 void daemonize(SERVER* serve) {
1363 if(serve && !(serve->port)) {
1369 if(!*pidftemplate) {
1371 strncpy(pidftemplate, "/var/run/server.%d.pid", 255);
1373 strncpy(pidftemplate, "/var/run/server.pid", 255);
1376 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1377 pidf=fopen(pidfname, "w");
1379 fprintf(pidf,"%d\n", (int)getpid());
1383 fprintf(stderr, "Not fatal; continuing");
1387 #define daemonize(serve)
1388 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1391 * Connect a server's socket.
1393 * @param serve the server we want to connect.
1395 void setup_serve(SERVER *serve) {
1396 struct sockaddr_in addrin;
1397 struct sigaction sa;
1398 int addrinlen = sizeof(addrin);
1405 if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
1408 /* lose the pesky "Address already in use" error message */
1409 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1410 err("setsockopt SO_REUSEADDR");
1412 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1413 err("setsockopt SO_KEEPALIVE");
1416 /* make the listening socket non-blocking */
1417 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1418 err("fcntl F_GETFL");
1420 if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1421 err("fcntl F_SETFL O_NONBLOCK");
1424 DEBUG("Waiting for connections... bind, ");
1425 addrin.sin_family = AF_INET;
1426 addrin.sin_port = htons(serve->port);
1427 addrin.sin_addr.s_addr = 0;
1428 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1431 if (listen(serve->socket, 1) < 0)
1433 sa.sa_handler = sigchld_handler;
1434 sigemptyset(&sa.sa_mask);
1435 sa.sa_flags = SA_RESTART;
1436 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1437 err("sigaction: %m");
1438 sa.sa_handler = sigterm_handler;
1439 sigemptyset(&sa.sa_mask);
1440 sa.sa_flags = SA_RESTART;
1441 if(sigaction(SIGTERM, &sa, NULL) == -1)
1442 err("sigaction: %m");
1446 * Connect our servers.
1448 void setup_servers(GArray* servers) {
1451 for(i=0;i<servers->len;i++) {
1452 setup_serve(&(g_array_index(servers, SERVER, i)));
1454 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1458 * Loop through the available servers, and serve them.
1460 int serveloop(GArray* servers) {
1461 struct sockaddr_in addrin;
1462 socklen_t addrinlen=sizeof(addrin);
1472 * Set up the master fd_set. The set of descriptors we need
1473 * to select() for never changes anyway and it buys us a *lot*
1474 * of time to only build this once. However, if we ever choose
1475 * to not fork() for clients anymore, we may have to revisit
1480 for(i=0;i<servers->len;i++) {
1481 sock=(g_array_index(servers, SERVER, i)).socket;
1482 FD_SET(sock, &mset);
1483 max=sock>max?sock:max;
1490 memcpy(&rset, &mset, sizeof(fd_set));
1493 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1495 for(i=0;i<servers->len;i++) {
1496 serve=&(g_array_index(servers, SERVER, i));
1497 if(FD_ISSET(serve->socket, &rset)) {
1498 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1501 client = g_malloc(sizeof(CLIENT));
1502 client->server=serve;
1503 client->exportsize=OFFT_MAX;
1505 set_peername(net, client);
1506 if (!authorized_client(client)) {
1507 msg2(LOG_INFO,"Unauthorized client") ;
1511 msg2(LOG_INFO,"Authorized client") ;
1512 pid=g_malloc(sizeof(pid_t));
1514 if ((*pid=fork())<0) {
1515 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1519 if (*pid>0) { /* parent */
1521 g_hash_table_insert(children, pid, pid);
1525 g_hash_table_destroy(children);
1526 for(i=0;i<servers->len,serve=(g_array_index(servers, SERVER*, i));i++) {
1527 close(serve->socket);
1529 /* FALSE does not free the
1530 actual data. This is required,
1531 because the client has a
1532 direct reference into that
1533 data, and otherwise we get a
1535 g_array_free(servers, FALSE);
1537 msg2(LOG_INFO,"Starting to serve");
1538 serveconnection(client);
1547 * Set up user-ID and/or group-ID
1549 void dousers(void) {
1553 pw=getpwnam(runuser);
1554 if(setuid(pw->pw_uid)<0)
1555 msg3(LOG_DEBUG, "Could not set UID: %s", strerror(errno));
1558 gr=getgrnam(rungroup);
1559 if(setgid(gr->gr_gid)<0)
1560 msg3(LOG_DEBUG, "Could not set GID: %s", strerror(errno));
1565 * Main entry point...
1567 int main(int argc, char *argv[]) {
1572 if (sizeof( struct nbd_request )!=28) {
1573 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1577 memset(pidftemplate, '\0', 256);
1580 config_file_pos = g_strdup(CFILE);
1581 serve=cmdline(argc, argv);
1582 servers = parse_cfile(config_file_pos, &err);
1583 if(!servers || !servers->len) {
1584 g_warning("Could not parse config file: %s", err->message);
1587 g_array_append_val(servers, *serve);
1589 if (!(serve->port)) {
1592 /* You really should define ISSERVER if you're going to use
1593 * inetd mode, but if you don't, closing stdout and stderr
1594 * (which inetd had connected to the client socket) will let it
1598 open("/dev/null", O_WRONLY);
1599 open("/dev/null", O_WRONLY);
1601 client=g_malloc(sizeof(CLIENT));
1602 client->server=serve;
1604 client->exportsize=OFFT_MAX;
1605 set_peername(0,client);
1606 serveconnection(client);
1610 if((!serve) && (!servers||!servers->len)) {
1611 g_message("Nothing to do! Bye!");
1615 setup_servers(servers);