2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/select.h> /* select */
62 #include <sys/wait.h> /* wait */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
66 #include <sys/param.h>
67 #ifdef HAVE_SYS_MOUNT_H
68 #include <sys/mount.h> /* For BLKGETSIZE */
70 #include <signal.h> /* sigaction */
71 #include <netinet/tcp.h>
72 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
73 #include <netdb.h> /* hostent, gethostby*, getservby* */
80 #include <arpa/inet.h>
90 /* used in cliserv.h, so must come first */
91 #define MY_NAME "nbd_server"
94 /** Default position of the config file */
96 #define SYSCONFDIR "/etc"
98 #define CFILE SYSCONFDIR "/nbd-server/config"
100 /** Where our config file actually is */
101 gchar* config_file_pos;
103 /** What user we're running as */
105 /** What group we're running as */
106 gchar* rungroup=NULL;
108 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
110 #define msg2(a,b) syslog(a,b)
111 #define msg3(a,b,c) syslog(a,b,c)
112 #define msg4(a,b,c,d) syslog(a,b,c,d)
114 #define msg2(a,b) g_message(b)
115 #define msg3(a,b,c) g_message(b,c)
116 #define msg4(a,b,c,d) g_message(b,c,d)
119 /* Debugging macros */
122 #define DEBUG( a ) printf( a )
123 #define DEBUG2( a,b ) printf( a,b )
124 #define DEBUG3( a,b,c ) printf( a,b,c )
125 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
128 #define DEBUG2( a,b )
129 #define DEBUG3( a,b,c )
130 #define DEBUG4( a,b,c,d )
132 #ifndef PACKAGE_VERSION
133 #define PACKAGE_VERSION ""
136 * The highest value a variable of type off_t can reach. This is a signed
137 * integer, so set all bits except for the leftmost one.
139 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
140 #define LINELEN 256 /**< Size of static buffer used to read the
141 authorization file (yuck) */
142 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
143 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
144 #define F_READONLY 1 /**< flag to tell us a file is readonly */
145 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
146 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
148 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
150 GHashTable *children;
151 char pidfname[256]; /**< name of our PID file */
152 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
153 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
156 * Types of virtuatlization
159 VIRT_NONE=0, /**< No virtualization */
160 VIRT_IPLIT, /**< Literal IP address as part of the filename */
161 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
162 doing the same as in IPLIT */
163 VIRT_CIDR, /**< Every subnet in its own directory */
167 * Variables associated with a server.
170 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
171 off_t expected_size; /**< size of the exported file as it was told to
172 us through configuration */
173 unsigned int port; /**< port we're exporting this file at */
174 char* authname; /**< filename of the authorization file */
175 int flags; /**< flags associated with this exported file */
176 unsigned int timeout;/**< how long a connection may be idle
178 int socket; /**< The socket of this server. */
179 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
180 uint8_t cidrlen; /**< The length of the mask when we use
181 CIDR-style virtualization */
182 gchar* prerun; /**< command to be ran after connecting a client,
183 but before starting to serve */
184 gchar* postrun; /**< command that will be ran after the client
189 * Variables associated with a client socket.
192 int fhandle; /**< file descriptor */
193 off_t startoff; /**< starting offset of this file */
197 off_t exportsize; /**< size of the file we're exporting */
198 char *clientname; /**< peer */
199 char *exportname; /**< (processed) filename of the file we're exporting */
200 GArray *export; /**< array of FILE_INFO of exported files;
201 array size is always 1 unless we're
202 doing the multiple file option */
203 int net; /**< The actual client socket */
204 SERVER *server; /**< The server this client is getting data from */
205 char* difffilename; /**< filename of the copy-on-write file, if any */
206 int difffile; /**< filedescriptor of copyonwrite file. @todo
207 shouldn't this be an array too? (cfr export) Or
208 make -m and -c mutually exclusive */
209 u32 difffilelen; /**< number of pages in difffile */
210 u32 *difmap; /**< see comment on the global difmap for this one */
214 * Type of configuration file values
217 PARAM_INT, /**< This parameter is an integer */
218 PARAM_STRING, /**< This parameter is a string */
219 PARAM_BOOL, /**< This parameter is a boolean */
223 * Configuration file values
226 gchar *paramname; /**< Name of the parameter, as it appears in
228 gboolean required; /**< Whether this is a required (as opposed to
229 optional) parameter */
230 PARAM_TYPE ptype; /**< Type of the parameter. */
231 gpointer target; /**< Pointer to where the data of this
232 parameter should be written. If ptype is
233 PARAM_BOOL, the data is or'ed rather than
235 gint flagval; /**< Flag mask for this parameter in case ptype
240 * Check whether a client is allowed to connect. Works with an authorization
241 * file which contains one line per machine, no wildcards.
243 * @param opts The client who's trying to connect.
244 * @return 0 - authorization refused, 1 - OK
246 int authorized_client(CLIENT *opts) {
247 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
252 struct in_addr client;
253 struct in_addr cltemp;
256 if ((f=fopen(opts->server->authname,"r"))==NULL) {
257 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
258 opts->server->authname,strerror(errno)) ;
262 inet_aton(opts->clientname, &client);
263 while (fgets(line,LINELEN,f)!=NULL) {
264 if((tmp=index(line, '/'))) {
265 if(strlen(line)<=tmp-line) {
266 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
270 if(inet_aton(line,&addr)) {
271 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
274 len=strtol(tmp, NULL, 0);
275 addr.s_addr>>=32-len;
276 addr.s_addr<<=32-len;
277 memcpy(&cltemp,&client,sizeof(client));
278 cltemp.s_addr>>=32-len;
279 cltemp.s_addr<<=32-len;
280 if(addr.s_addr == cltemp.s_addr) {
284 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
294 * Read data from a file descriptor into a buffer
296 * @param f a file descriptor
297 * @param buf a buffer
298 * @param len the number of bytes to be read
300 inline void readit(int f, void *buf, size_t len) {
304 if ((res = read(f, buf, len)) <= 0)
305 err("Read failed: %m");
312 * Write data from a buffer into a filedescriptor
314 * @param f a file descriptor
315 * @param buf a buffer containing data
316 * @param len the number of bytes to be written
318 inline void writeit(int f, void *buf, size_t len) {
322 if ((res = write(f, buf, len)) <= 0)
323 err("Send failed: %m");
330 * Print out a message about how to use nbd-server. Split out to a separate
331 * function so that we can call it from multiple places
334 printf("This is nbd-server version " VERSION "\n");
335 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file] [-p PID file name] [-o section name]\n"
336 "\t-r|--read-only\t\tread only\n"
337 "\t-m|--multi-file\t\tmultiple file\n"
338 "\t-c|--copy-on-write\tcopy on write\n"
339 "\t-C|--config-file\tspecify an alternate configuration file\n"
340 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
341 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n"
342 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
343 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
344 "\tif port is set to 0, stdin is used (for running from inetd)\n"
345 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
346 "\t\taddress of the machine trying to connect\n" );
347 printf("Using configuration file %s\n", CFILE);
350 /* Dumps a config file section of the given SERVER*, and exits. */
351 void dump_section(SERVER* serve, gchar* section_header) {
352 printf("[%s]\n", section_header);
353 printf("\texportname = %s\n", serve->exportname);
354 printf("\tport = %d\n", serve->port);
355 if(serve->flags & F_READONLY) {
356 printf("\treadonly = true\n");
358 if(serve->flags & F_MULTIFILE) {
359 printf("\tmultifile = true\n");
361 if(serve->flags & F_COPYONWRITE) {
362 printf("\tcopyonwrite = true\n");
364 if(serve->expected_size) {
365 printf("\tfilesize = %Ld\n", (long long int)serve->expected_size);
367 if(serve->authname) {
368 printf("\tauthfile = %s\n", serve->authname);
371 printf("\ttimeout = %d\n", serve->timeout);
377 * Parse the command line.
379 * @param argc the argc argument to main()
380 * @param argv the argv argument to main()
382 SERVER* cmdline(int argc, char *argv[]) {
386 struct option long_options[] = {
387 {"read-only", no_argument, NULL, 'r'},
388 {"multi-file", no_argument, NULL, 'm'},
389 {"copy-on-write", no_argument, NULL, 'c'},
390 {"authorize-file", required_argument, NULL, 'l'},
391 {"idle-time", required_argument, NULL, 'a'},
392 {"config-file", required_argument, NULL, 'C'},
393 {"pid-file", required_argument, NULL, 'p'},
394 {"output-config", required_argument, NULL, 'o'},
401 gboolean do_output=FALSE;
402 gchar* section_header;
407 serve=g_new0(SERVER, 1);
408 serve->authname = g_strdup(default_authname);
409 while((c=getopt_long(argc, argv, "-a:C:cl:mo:rp:", long_options, &i))>=0) {
412 /* non-option argument */
413 switch(nonspecial++) {
415 serve->port=strtol(optarg, NULL, 0);
418 serve->exportname = g_strdup(optarg);
419 if(serve->exportname[0] != '/') {
420 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
425 last=strlen(optarg)-1;
427 if (suffix == 'k' || suffix == 'K' ||
428 suffix == 'm' || suffix == 'M')
430 es = (off_t)atol(optarg);
438 serve->expected_size = es;
443 serve->flags |= F_READONLY;
446 serve->flags |= F_MULTIFILE;
450 section_header = g_strdup(optarg);
453 strncpy(pidftemplate, optarg, 256);
456 serve->flags |=F_COPYONWRITE;
459 g_free(config_file_pos);
460 config_file_pos=g_strdup(optarg);
463 g_free(serve->authname);
464 serve->authname=g_strdup(optarg);
467 serve->timeout=strtol(optarg, NULL, 0);
475 /* What's left: the port to export, the name of the to be exported
476 * file, and, optionally, the size of the file, in that order. */
483 g_critical("Need a complete configuration on the command line to output a config file section!");
486 dump_section(serve, section_header);
492 * Error codes for config file parsing
495 CFILE_NOTFOUND, /**< The configuration file is not found */
496 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
497 CFILE_KEY_MISSING, /**< A (required) key is missing */
498 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
499 CFILE_PROGERR /**< Programmer error */
503 * Remove a SERVER from memory. Used from the hash table
505 void remove_server(gpointer s) {
509 g_free(server->exportname);
511 g_free(server->authname);
516 * Parse the config file.
518 * @param f the name of the config file
519 * @param e a GError. @see CFILE_ERRORS for what error values this function can
521 * @return a Array of SERVER* pointers, If the config file is empty or does not
522 * exist, returns an empty GHashTable; if the config file contains an
523 * error, returns NULL, and e is set appropriately
525 GArray* parse_cfile(gchar* f, GError** e) {
526 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
527 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
529 gchar *virtstyle=NULL;
531 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
532 { "port", TRUE, PARAM_INT, NULL, 0 },
533 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
534 { "timeout", FALSE, PARAM_INT, NULL, 0 },
535 { "filesize", FALSE, PARAM_INT, NULL, 0 },
536 { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
537 { "prerun", FALSE, PARAM_STRING, NULL, 0 },
538 { "postrun", FALSE, PARAM_STRING, NULL, 0 },
539 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
540 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
541 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
542 { "autoreadonly", FALSE, PARAM_BOOL, NULL, F_AUTOREADONLY },
543 { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
545 const int lp_size=11;
547 { "user", FALSE, PARAM_STRING, &runuser, 0 },
548 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
554 const char *err_msg=NULL;
562 errdomain = g_quark_from_string("parse_cfile");
563 cfile = g_key_file_new();
564 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
565 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
566 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
567 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
568 g_key_file_free(cfile);
571 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
572 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
573 g_key_file_free(cfile);
576 groups = g_key_file_get_groups(cfile, NULL);
577 for(i=0;groups[i];i++) {
578 memset(&s, '\0', sizeof(SERVER));
579 lp[0].target=&(s.exportname);
580 lp[1].target=&(s.port);
581 lp[2].target=&(s.authname);
582 lp[3].target=&(s.timeout);
583 lp[4].target=&(s.expected_size);
584 lp[5].target=&(virtstyle);
585 lp[6].target=&(s.prerun);
586 lp[7].target=&(s.postrun);
587 lp[8].target=lp[9].target=lp[10].target=
588 lp[11].target=lp[12].target=&(s.flags);
590 /* After the [generic] group, start parsing exports */
595 for(j=0;j<p_size;j++) {
596 g_assert(p[j].target != NULL);
597 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
600 *((gint*)p[j].target) =
601 g_key_file_get_integer(cfile,
607 *((gchar**)p[j].target) =
608 g_key_file_get_string(cfile,
614 value = g_key_file_get_boolean(cfile,
616 p[j].paramname, &err);
619 *((gint*)p[j].target) |= p[j].flagval;
621 *((gint*)p[j].target) &= ~(p[j].flagval);
627 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
629 /* Ignore not-found error for optional values */
633 err_msg = MISSING_REQUIRED_ERROR;
636 err_msg = DEFAULT_ERROR;
638 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
639 g_array_free(retval, TRUE);
641 g_key_file_free(cfile);
646 if(!strncmp(virtstyle, "none", 4)) {
647 s.virtstyle=VIRT_NONE;
648 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
649 s.virtstyle=VIRT_IPLIT;
650 } else if(!strncmp(virtstyle, "iphash", 6)) {
651 s.virtstyle=VIRT_IPHASH;
652 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
653 s.virtstyle=VIRT_CIDR;
654 if(strlen(virtstyle)<10) {
655 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
656 g_array_free(retval, TRUE);
657 g_key_file_free(cfile);
660 s.cidrlen=strtol(virtstyle+8, NULL, 0);
662 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
663 g_array_free(retval, TRUE);
664 g_key_file_free(cfile);
668 s.virtstyle=VIRT_IPLIT;
670 /* Don't need to free this, it's not our string */
672 /* Don't append values for the [generic] group */
674 g_array_append_val(retval, s);
681 * Signal handler for SIGCHLD
682 * @param s the signal we're handling (must be SIGCHLD, or something
685 void sigchld_handler(int s) {
690 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
691 if(WIFEXITED(status)) {
692 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
694 i=g_hash_table_lookup(children, &pid);
696 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
698 DEBUG2("Removing %d from the list of children", pid);
699 g_hash_table_remove(children, &pid);
705 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
708 * @param value the value corresponding to the above key
709 * @param user_data a pointer which we always set to 1, so that we know what
712 void killchild(gpointer key, gpointer value, gpointer user_data) {
714 int *parent=user_data;
721 * Handle SIGTERM and dispatch it to our children
722 * @param s the signal we're handling (must be SIGTERM, or something
723 * is severely wrong).
725 void sigterm_handler(int s) {
728 g_hash_table_foreach(children, killchild, &parent);
738 * Detect the size of a file.
740 * @param fhandle An open filedescriptor
741 * @return the size of the file, or OFFT_MAX if detection was
744 off_t size_autodetect(int fhandle) {
746 unsigned long sectors;
747 struct stat stat_buf;
750 #ifdef HAVE_SYS_MOUNT_H
751 #ifdef HAVE_SYS_IOCTL_H
753 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
754 if (!ioctl(fhandle, BLKGETSIZE, §ors) && sectors) {
755 es = (off_t)sectors * (off_t)512;
758 #endif /* BLKGETSIZE */
759 #endif /* HAVE_SYS_IOCTL_H */
760 #endif /* HAVE_SYS_MOUNT_H */
762 DEBUG("looking for fhandle size with fstat\n");
763 stat_buf.st_size = 0;
764 error = fstat(fhandle, &stat_buf);
766 if(stat_buf.st_size > 0)
767 return (off_t)stat_buf.st_size;
769 err("fstat failed: %m");
772 DEBUG("looking for fhandle size with lseek SEEK_END\n");
773 es = lseek(fhandle, (off_t)0, SEEK_END);
774 if (es > ((off_t)0)) {
777 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
780 err("Could not find size of exported block device: %m");
785 * Get the file handle and offset, given an export offset.
787 * @param export An array of export files
788 * @param a The offset to get corresponding file/offset for
789 * @param fhandle [out] File descriptor
790 * @param foffset [out] Offset into fhandle
791 * @param maxbytes [out] Tells how many bytes can be read/written
792 * from fhandle starting at foffset (0 if there is no limit)
793 * @return 0 on success, -1 on failure
795 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
796 /* Negative offset not allowed */
800 /* Binary search for last file with starting offset <= a */
803 int end = export->len - 1;
804 while( start <= end ) {
805 int mid = (start + end) / 2;
806 fi = g_array_index(export, FILE_INFO, mid);
807 if( fi.startoff < a ) {
809 } else if( fi.startoff > a ) {
817 /* end should never go negative, since first startoff is 0 and a >= 0 */
820 fi = g_array_index(export, FILE_INFO, end);
821 *fhandle = fi.fhandle;
822 *foffset = a - fi.startoff;
824 if( end+1 < export->len ) {
825 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
826 *maxbytes = fi_next.startoff - a;
833 * seek to a position in a file, with error handling.
834 * @param handle a filedescriptor
835 * @param a position to seek to
836 * @todo get rid of this; lastpoint is a global variable right now, but it
837 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
840 void myseek(int handle,off_t a) {
841 if (lseek(handle, a, SEEK_SET) < 0) {
842 err("Can not seek locally!\n");
847 * Write an amount of bytes at a given offset to the right file. This
848 * abstracts the write-side of the multiple file option.
850 * @param a The offset where the write should start
851 * @param buf The buffer to write from
852 * @param len The length of buf
853 * @param client The client we're serving for
854 * @return The number of bytes actually written, or -1 in case of an error
856 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
861 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
863 if(maxbytes && len > maxbytes)
866 DEBUG4("(WRITE to fd %d offset %Lu len %u), ", fhandle, foffset, len);
868 myseek(fhandle, foffset);
869 return write(fhandle, buf, len);
873 * Call rawexpwrite repeatedly until all data has been written.
874 * @return 0 on success, nonzero on failure
876 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
879 while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
884 return (ret < 0 || len != 0);
888 * Read an amount of bytes at a given offset from the right file. This
889 * abstracts the read-side of the multiple files option.
891 * @param a The offset where the read should start
892 * @param buf A buffer to read into
893 * @param len The size of buf
894 * @param client The client we're serving for
895 * @return The number of bytes actually read, or -1 in case of an
898 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
903 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
905 if(maxbytes && len > maxbytes)
908 DEBUG4("(READ from fd %d offset %Lu len %u), ", fhandle, foffset, len);
910 myseek(fhandle, foffset);
911 return read(fhandle, buf, len);
915 * Call rawexpread repeatedly until all data has been read.
916 * @return 0 on success, nonzero on failure
918 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
921 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
926 return (ret < 0 || len != 0);
930 * Read an amount of bytes at a given offset from the right file. This
931 * abstracts the read-side of the copyonwrite stuff, and calls
932 * rawexpread() with the right parameters to do the actual work.
933 * @param a The offset where the read should start
934 * @param buf A buffer to read into
935 * @param len The size of buf
936 * @param client The client we're going to read for
937 * @return 0 on success, nonzero on failure
939 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
941 off_t mapcnt, mapl, maph, pagestart;
943 if (!(client->server->flags & F_COPYONWRITE))
944 return(rawexpread_fully(a, buf, len, client));
945 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
947 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
949 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
950 pagestart=mapcnt*DIFFPAGESIZE;
952 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
953 len : (size_t)DIFFPAGESIZE-offset;
954 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
955 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
956 (unsigned long)(client->difmap[mapcnt]));
957 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
958 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
959 } else { /* the block is not there */
960 DEBUG2("Page %Lu is not here, we read the original one\n",
961 (unsigned long long)mapcnt);
962 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
964 len-=rdlen; a+=rdlen; buf+=rdlen;
970 * Write an amount of bytes at a given offset to the right file. This
971 * abstracts the write-side of the copyonwrite option, and calls
972 * rawexpwrite() with the right parameters to do the actual work.
974 * @param a The offset where the write should start
975 * @param buf The buffer to write from
976 * @param len The length of buf
977 * @param client The client we're going to write for.
978 * @return 0 on success, nonzero on failure
980 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
981 char pagebuf[DIFFPAGESIZE];
982 off_t mapcnt,mapl,maph;
987 if (!(client->server->flags & F_COPYONWRITE))
988 return(rawexpwrite_fully(a, buf, len, client));
989 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
991 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
993 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
994 pagestart=mapcnt*DIFFPAGESIZE ;
996 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
997 len : (size_t)DIFFPAGESIZE-offset;
999 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1000 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
1001 (unsigned long)(client->difmap[mapcnt])) ;
1002 myseek(client->difffile,
1003 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1004 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1005 } else { /* the block is not there */
1006 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1007 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1008 DEBUG3("Page %Lu is not here, we put it at %lu\n",
1009 (unsigned long long)mapcnt,
1010 (unsigned long)(client->difmap[mapcnt]));
1011 rdlen=DIFFPAGESIZE ;
1012 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1014 memcpy(pagebuf+offset,buf,wrlen) ;
1015 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1019 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1025 * Do the initial negotiation.
1027 * @param client The client we're negotiating with.
1029 void negotiate(CLIENT *client) {
1033 memset(zeros, '\0', 290);
1034 if (write(client->net, INIT_PASSWD, 8) < 0)
1035 err("Negotiation failed: %m");
1036 cliserv_magic = htonll(cliserv_magic);
1037 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
1038 err("Negotiation failed: %m");
1039 size_host = htonll((u64)(client->exportsize));
1040 if (write(client->net, &size_host, 8) < 0)
1041 err("Negotiation failed: %m");
1042 if (write(client->net, zeros, 128) < 0)
1043 err("Negotiation failed: %m");
1046 /** sending macro. */
1047 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1049 #define ERROR(client,reply) { reply.error = htonl(-1); SEND(client->net,reply); reply.error = 0; }
1051 * Serve a file to a single client.
1053 * @todo This beast needs to be split up in many tiny little manageable
1054 * pieces. Preferably with a chainsaw.
1056 * @param client The client we're going to serve to.
1057 * @return when the client disconnects
1059 int mainloop(CLIENT *client) {
1060 struct nbd_request request;
1061 struct nbd_reply reply;
1062 gboolean go_on=TRUE;
1067 DEBUG("Entering request loop!\n");
1068 reply.magic = htonl(NBD_REPLY_MAGIC);
1077 if (client->server->timeout)
1078 alarm(client->server->timeout);
1079 readit(client->net, &request, sizeof(request));
1080 request.from = ntohll(request.from);
1081 request.type = ntohl(request.type);
1083 if (request.type==NBD_CMD_DISC) {
1084 msg2(LOG_INFO, "Disconnect request received.");
1085 if (client->server->flags & F_COPYONWRITE) {
1086 if (client->difmap) g_free(client->difmap) ;
1087 close(client->difffile);
1088 unlink(client->difffilename);
1089 free(client->difffilename);
1095 len = ntohl(request.len);
1097 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1098 err("Not enough magic.");
1099 if (len > BUFSIZE + sizeof(struct nbd_reply))
1100 err("Request too big!");
1102 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
1103 "READ", (unsigned long long)request.from,
1104 (unsigned long long)request.from / 512, len);
1106 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1107 if ((request.from + len) > (OFFT_MAX)) {
1108 DEBUG("[Number too large!]");
1109 ERROR(client, reply);
1113 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1115 ERROR(client, reply);
1119 if (request.type==NBD_CMD_WRITE) {
1120 DEBUG("wr: net->buf, ");
1121 readit(client->net, buf, len);
1122 DEBUG("buf->exp, ");
1123 if ((client->server->flags & F_READONLY) ||
1124 (client->server->flags & F_AUTOREADONLY)) {
1125 DEBUG("[WRITE to READONLY!]");
1126 ERROR(client, reply);
1129 if (expwrite(request.from, buf, len, client)) {
1130 DEBUG("Write failed: %m" );
1131 ERROR(client, reply);
1134 SEND(client->net, reply);
1140 DEBUG("exp->buf, ");
1141 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1142 DEBUG("Read failed: %m");
1143 ERROR(client, reply);
1147 DEBUG("buf->net, ");
1148 memcpy(buf, &reply, sizeof(struct nbd_reply));
1149 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1156 * Set up client export array, which is an array of FILE_INFO.
1157 * Also, split a single exportfile into multiple ones, if that was asked.
1158 * @param client information on the client which we want to setup export for
1160 void setupexport(CLIENT* client) {
1162 off_t laststartoff = 0, lastsize = 0;
1163 int multifile = (client->server->flags & F_MULTIFILE);
1165 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1167 /* If multi-file, open as many files as we can.
1168 * If not, open exactly one file.
1169 * Calculate file sizes as we go to get total size. */
1173 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1176 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1178 tmpname=g_strdup(client->exportname);
1180 DEBUG2( "Opening %s\n", tmpname );
1181 fi.fhandle = open(tmpname, mode);
1182 if(fi.fhandle == -1 && mode == O_RDWR) {
1183 /* Try again because maybe media was read-only */
1184 fi.fhandle = open(tmpname, O_RDONLY);
1185 if(fi.fhandle != -1) {
1186 client->server->flags |= F_AUTOREADONLY;
1187 client->server->flags |= F_READONLY;
1190 if(fi.fhandle == -1) {
1191 if(multifile && i>0)
1193 err("Could not open exported file: %m");
1195 fi.startoff = laststartoff + lastsize;
1196 g_array_append_val(client->export, fi);
1199 /* Starting offset and size of this file will be used to
1200 * calculate starting offset of next file */
1201 laststartoff = fi.startoff;
1202 lastsize = size_autodetect(fi.fhandle);
1208 /* Set export size to total calculated size */
1209 client->exportsize = laststartoff + lastsize;
1211 /* Export size may be overridden */
1212 if(client->server->expected_size) {
1213 /* desired size must be <= total calculated size */
1214 if(client->server->expected_size > client->exportsize) {
1215 err("Size of exported file is too big\n");
1218 client->exportsize = client->server->expected_size;
1221 msg3(LOG_INFO, "Size of exported file/device is %Lu", (unsigned long long)client->exportsize);
1223 msg3(LOG_INFO, "Total number of files: %d", i);
1227 int copyonwrite_prepare(CLIENT* client) {
1229 if ((client->difffilename = malloc(1024))==NULL)
1230 err("Failed to allocate string for diff file name");
1231 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1233 client->difffilename[1023]='\0';
1234 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1235 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1236 if (client->difffile<0) err("Could not create diff file (%m)") ;
1237 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1238 err("Could not allocate memory") ;
1239 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1245 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1248 * @param command the command to be ran. Read from the config file
1249 * @param file the file name we're about to export
1251 int do_run(gchar* command, gchar* file) {
1256 cmd = g_strdup_printf(command, file);
1264 * Serve a connection.
1266 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1267 * follow the road map.
1269 * @param client a connected client
1271 void serveconnection(CLIENT *client) {
1272 setupexport(client);
1274 if (client->server->flags & F_COPYONWRITE) {
1275 copyonwrite_prepare(client);
1278 setmysockopt(client->net);
1280 if(!do_run(client->server->prerun, client->exportname)) {
1283 do_run(client->server->postrun, client->exportname);
1287 * Find the name of the file we have to serve. This will use g_strdup_printf
1288 * to put the IP address of the client inside a filename containing
1289 * "%s" (in the form as specified by the "virtstyle" option). That name
1290 * is then written to client->exportname.
1292 * @param net A socket connected to an nbd client
1293 * @param client information about the client. The IP address in human-readable
1294 * format will be written to a new char* buffer, the address of which will be
1295 * stored in client->clientname.
1297 void set_peername(int net, CLIENT *client) {
1298 struct sockaddr_in addrin;
1299 struct sockaddr_in netaddr;
1300 size_t addrinlen = sizeof( addrin );
1306 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1307 err("getsockname failed: %m");
1308 peername = g_strdup(inet_ntoa(addrin.sin_addr));
1309 switch(client->server->virtstyle) {
1311 client->exportname=g_strdup(client->server->exportname);
1314 for(i=0;i<strlen(peername);i++) {
1315 if(peername[i]=='.') {
1320 client->exportname=g_strdup_printf(client->server->exportname, peername);
1323 memcpy(&netaddr, &addrin, addrinlen);
1324 netaddr.sin_addr.s_addr>>=32-(client->server->cidrlen);
1325 netaddr.sin_addr.s_addr<<=32-(client->server->cidrlen);
1326 netname = inet_ntoa(netaddr.sin_addr);
1327 tmp=g_strdup_printf("%s/%s", netname, peername);
1328 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1332 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1333 peername, client->exportname);
1334 client->clientname=g_strdup(peername);
1340 * @param data a pointer to pid_t which should be freed
1342 void destroy_pid_t(gpointer data) {
1347 * Go daemon (unless we specified at compile time that we didn't want this)
1348 * @param serve the first server of our configuration. If its port is zero,
1349 * then do not daemonize, because we're doing inetd then. This parameter
1350 * is only used to create a PID file of the form
1351 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1353 #if !defined(NODAEMON) && !defined(NOFORK)
1354 void daemonize(SERVER* serve) {
1357 if(serve && !(serve->port)) {
1363 if(!*pidftemplate) {
1365 strncpy(pidftemplate, "/var/run/server.%d.pid", 255);
1367 strncpy(pidftemplate, "/var/run/server.pid", 255);
1370 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1371 pidf=fopen(pidfname, "w");
1373 fprintf(pidf,"%d\n", (int)getpid());
1377 fprintf(stderr, "Not fatal; continuing");
1381 #define daemonize(serve)
1382 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1385 * Connect a server's socket.
1387 * @param serve the server we want to connect.
1389 void setup_serve(SERVER *serve) {
1390 struct sockaddr_in addrin;
1391 struct sigaction sa;
1392 int addrinlen = sizeof(addrin);
1399 if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
1402 /* lose the pesky "Address already in use" error message */
1403 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1404 err("setsockopt SO_REUSEADDR");
1406 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1407 err("setsockopt SO_KEEPALIVE");
1410 /* make the listening socket non-blocking */
1411 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1412 err("fcntl F_GETFL");
1414 if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1415 err("fcntl F_SETFL O_NONBLOCK");
1418 DEBUG("Waiting for connections... bind, ");
1419 addrin.sin_family = AF_INET;
1420 addrin.sin_port = htons(serve->port);
1421 addrin.sin_addr.s_addr = 0;
1422 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1425 if (listen(serve->socket, 1) < 0)
1427 sa.sa_handler = sigchld_handler;
1428 sigemptyset(&sa.sa_mask);
1429 sa.sa_flags = SA_RESTART;
1430 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1431 err("sigaction: %m");
1432 sa.sa_handler = sigterm_handler;
1433 sigemptyset(&sa.sa_mask);
1434 sa.sa_flags = SA_RESTART;
1435 if(sigaction(SIGTERM, &sa, NULL) == -1)
1436 err("sigaction: %m");
1440 * Connect our servers.
1442 void setup_servers(GArray* servers) {
1445 for(i=0;i<servers->len;i++) {
1446 setup_serve(&(g_array_index(servers, SERVER, i)));
1448 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1452 * Loop through the available servers, and serve them.
1454 int serveloop(GArray* servers) {
1455 struct sockaddr_in addrin;
1456 socklen_t addrinlen=sizeof(addrin);
1466 * Set up the master fd_set. The set of descriptors we need
1467 * to select() for never changes anyway and it buys us a *lot*
1468 * of time to only build this once. However, if we ever choose
1469 * to not fork() for clients anymore, we may have to revisit
1474 for(i=0;i<servers->len;i++) {
1475 sock=(g_array_index(servers, SERVER, i)).socket;
1476 FD_SET(sock, &mset);
1477 max=sock>max?sock:max;
1484 memcpy(&rset, &mset, sizeof(fd_set));
1487 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1489 for(i=0;i<servers->len;i++) {
1490 serve=&(g_array_index(servers, SERVER, i));
1491 if(FD_ISSET(serve->socket, &rset)) {
1492 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1495 client = g_malloc(sizeof(CLIENT));
1496 client->server=serve;
1497 client->exportsize=OFFT_MAX;
1499 set_peername(net, client);
1500 if (!authorized_client(client)) {
1501 msg2(LOG_INFO,"Unauthorized client") ;
1505 msg2(LOG_INFO,"Authorized client") ;
1506 pid=g_malloc(sizeof(pid_t));
1508 if ((*pid=fork())<0) {
1509 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1513 if (*pid>0) { /* parent */
1515 g_hash_table_insert(children, pid, pid);
1519 g_hash_table_destroy(children);
1520 for(i=0;i<servers->len,serve=(g_array_index(servers, SERVER*, i));i++) {
1521 close(serve->socket);
1523 /* FALSE does not free the
1524 actual data. This is required,
1525 because the client has a
1526 direct reference into that
1527 data, and otherwise we get a
1529 g_array_free(servers, FALSE);
1531 msg2(LOG_INFO,"Starting to serve");
1532 serveconnection(client);
1541 * Set up user-ID and/or group-ID
1543 void dousers(void) {
1547 pw=getpwnam(runuser);
1548 if(setuid(pw->pw_uid)<0)
1549 msg3(LOG_DEBUG, "Could not set UID: %s", strerror(errno));
1552 gr=getgrnam(rungroup);
1553 if(setgid(gr->gr_gid)<0)
1554 msg3(LOG_DEBUG, "Could not set GID: %s", strerror(errno));
1559 * Main entry point...
1561 int main(int argc, char *argv[]) {
1566 if (sizeof( struct nbd_request )!=28) {
1567 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1571 memset(pidftemplate, '\0', 256);
1574 config_file_pos = g_strdup(CFILE);
1575 serve=cmdline(argc, argv);
1576 servers = parse_cfile(config_file_pos, &err);
1577 if(!servers || !servers->len) {
1578 g_warning("Could not parse config file: %s", err->message);
1581 g_array_append_val(servers, *serve);
1583 if (!(serve->port)) {
1586 /* You really should define ISSERVER if you're going to use
1587 * inetd mode, but if you don't, closing stdout and stderr
1588 * (which inetd had connected to the client socket) will let it
1592 open("/dev/null", O_WRONLY);
1593 open("/dev/null", O_WRONLY);
1595 client=g_malloc(sizeof(CLIENT));
1596 client->server=serve;
1598 client->exportsize=OFFT_MAX;
1599 set_peername(0,client);
1600 serveconnection(client);
1604 if((!serve) && (!servers||!servers->len)) {
1605 g_message("Nothing to do! Bye!");
1609 setup_servers(servers);