2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/select.h> /* select */
62 #include <sys/wait.h> /* wait */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
66 #include <sys/param.h>
67 #ifdef HAVE_SYS_MOUNT_H
68 #include <sys/mount.h> /* For BLKGETSIZE */
70 #include <signal.h> /* sigaction */
72 #include <netinet/tcp.h>
73 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
74 #include <netdb.h> /* hostent, gethostby*, getservby* */
81 #include <arpa/inet.h>
91 /* used in cliserv.h, so must come first */
92 #define MY_NAME "nbd_server"
95 /** Default position of the config file */
97 #define SYSCONFDIR "/etc"
99 #define CFILE SYSCONFDIR "/nbd-server/config"
101 /** Where our config file actually is */
102 gchar* config_file_pos;
104 /** What user we're running as */
106 /** What group we're running as */
107 gchar* rungroup=NULL;
109 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
111 #define msg2(a,b) syslog(a,b)
112 #define msg3(a,b,c) syslog(a,b,c)
113 #define msg4(a,b,c,d) syslog(a,b,c,d)
115 #define msg2(a,b) g_message(b)
116 #define msg3(a,b,c) g_message(b,c)
117 #define msg4(a,b,c,d) g_message(b,c,d)
120 /* Debugging macros */
123 #define DEBUG( a ) printf( a )
124 #define DEBUG2( a,b ) printf( a,b )
125 #define DEBUG3( a,b,c ) printf( a,b,c )
126 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
129 #define DEBUG2( a,b )
130 #define DEBUG3( a,b,c )
131 #define DEBUG4( a,b,c,d )
133 #ifndef PACKAGE_VERSION
134 #define PACKAGE_VERSION ""
137 * The highest value a variable of type off_t can reach. This is a signed
138 * integer, so set all bits except for the leftmost one.
140 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
141 #define LINELEN 256 /**< Size of static buffer used to read the
142 authorization file (yuck) */
143 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
144 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
145 #define F_READONLY 1 /**< flag to tell us a file is readonly */
146 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
147 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
149 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
151 GHashTable *children;
152 char pidfname[256]; /**< name of our PID file */
153 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
154 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
157 * Types of virtuatlization
160 VIRT_NONE=0, /**< No virtualization */
161 VIRT_IPLIT, /**< Literal IP address as part of the filename */
162 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
163 doing the same as in IPLIT */
164 VIRT_CIDR, /**< Every subnet in its own directory */
168 * Variables associated with a server.
171 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
172 off_t expected_size; /**< size of the exported file as it was told to
173 us through configuration */
174 unsigned int port; /**< port we're exporting this file at */
175 char* authname; /**< filename of the authorization file */
176 int flags; /**< flags associated with this exported file */
177 unsigned int timeout;/**< how long a connection may be idle
179 int socket; /**< The socket of this server. */
180 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
181 uint8_t cidrlen; /**< The length of the mask when we use
182 CIDR-style virtualization */
183 gchar* prerun; /**< command to be ran after connecting a client,
184 but before starting to serve */
185 gchar* postrun; /**< command that will be ran after the client
190 * Variables associated with a client socket.
193 int fhandle; /**< file descriptor */
194 off_t startoff; /**< starting offset of this file */
198 off_t exportsize; /**< size of the file we're exporting */
199 char *clientname; /**< peer */
200 char *exportname; /**< (processed) filename of the file we're exporting */
201 GArray *export; /**< array of FILE_INFO of exported files;
202 array size is always 1 unless we're
203 doing the multiple file option */
204 int net; /**< The actual client socket */
205 SERVER *server; /**< The server this client is getting data from */
206 char* difffilename; /**< filename of the copy-on-write file, if any */
207 int difffile; /**< filedescriptor of copyonwrite file. @todo
208 shouldn't this be an array too? (cfr export) Or
209 make -m and -c mutually exclusive */
210 u32 difffilelen; /**< number of pages in difffile */
211 u32 *difmap; /**< see comment on the global difmap for this one */
215 * Type of configuration file values
218 PARAM_INT, /**< This parameter is an integer */
219 PARAM_STRING, /**< This parameter is a string */
220 PARAM_BOOL, /**< This parameter is a boolean */
224 * Configuration file values
227 gchar *paramname; /**< Name of the parameter, as it appears in
229 gboolean required; /**< Whether this is a required (as opposed to
230 optional) parameter */
231 PARAM_TYPE ptype; /**< Type of the parameter. */
232 gpointer target; /**< Pointer to where the data of this
233 parameter should be written. If ptype is
234 PARAM_BOOL, the data is or'ed rather than
236 gint flagval; /**< Flag mask for this parameter in case ptype
241 * Check whether a client is allowed to connect. Works with an authorization
242 * file which contains one line per machine, no wildcards.
244 * @param opts The client who's trying to connect.
245 * @return 0 - authorization refused, 1 - OK
247 int authorized_client(CLIENT *opts) {
248 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
253 struct in_addr client;
254 struct in_addr cltemp;
257 if ((f=fopen(opts->server->authname,"r"))==NULL) {
258 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
259 opts->server->authname,strerror(errno)) ;
263 inet_aton(opts->clientname, &client);
264 while (fgets(line,LINELEN,f)!=NULL) {
265 if((tmp=index(line, '/'))) {
266 if(strlen(line)<=tmp-line) {
267 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
271 if(inet_aton(line,&addr)) {
272 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
275 len=strtol(tmp, NULL, 0);
276 addr.s_addr>>=32-len;
277 addr.s_addr<<=32-len;
278 memcpy(&cltemp,&client,sizeof(client));
279 cltemp.s_addr>>=32-len;
280 cltemp.s_addr<<=32-len;
281 if(addr.s_addr == cltemp.s_addr) {
285 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
295 * Read data from a file descriptor into a buffer
297 * @param f a file descriptor
298 * @param buf a buffer
299 * @param len the number of bytes to be read
301 inline void readit(int f, void *buf, size_t len) {
305 if ((res = read(f, buf, len)) <= 0)
306 err("Read failed: %m");
313 * Write data from a buffer into a filedescriptor
315 * @param f a file descriptor
316 * @param buf a buffer containing data
317 * @param len the number of bytes to be written
319 inline void writeit(int f, void *buf, size_t len) {
323 if ((res = write(f, buf, len)) <= 0)
324 err("Send failed: %m");
331 * Print out a message about how to use nbd-server. Split out to a separate
332 * function so that we can call it from multiple places
335 printf("This is nbd-server version " VERSION "\n");
336 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file] [-p PID file name] [-o section name]\n"
337 "\t-r|--read-only\t\tread only\n"
338 "\t-m|--multi-file\t\tmultiple file\n"
339 "\t-c|--copy-on-write\tcopy on write\n"
340 "\t-C|--config-file\tspecify an alternate configuration file\n"
341 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
342 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n"
343 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
344 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
345 "\tif port is set to 0, stdin is used (for running from inetd)\n"
346 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
347 "\t\taddress of the machine trying to connect\n" );
348 printf("Using configuration file %s\n", CFILE);
351 /* Dumps a config file section of the given SERVER*, and exits. */
352 void dump_section(SERVER* serve, gchar* section_header) {
353 printf("[%s]\n", section_header);
354 printf("\texportname = %s\n", serve->exportname);
355 printf("\tport = %d\n", serve->port);
356 if(serve->flags & F_READONLY) {
357 printf("\treadonly = true\n");
359 if(serve->flags & F_MULTIFILE) {
360 printf("\tmultifile = true\n");
362 if(serve->flags & F_COPYONWRITE) {
363 printf("\tcopyonwrite = true\n");
365 if(serve->expected_size) {
366 printf("\tfilesize = %Ld\n", (long long int)serve->expected_size);
368 if(serve->authname) {
369 printf("\tauthfile = %s\n", serve->authname);
372 printf("\ttimeout = %d\n", serve->timeout);
378 * Parse the command line.
380 * @param argc the argc argument to main()
381 * @param argv the argv argument to main()
383 SERVER* cmdline(int argc, char *argv[]) {
387 struct option long_options[] = {
388 {"read-only", no_argument, NULL, 'r'},
389 {"multi-file", no_argument, NULL, 'm'},
390 {"copy-on-write", no_argument, NULL, 'c'},
391 {"authorize-file", required_argument, NULL, 'l'},
392 {"idle-time", required_argument, NULL, 'a'},
393 {"config-file", required_argument, NULL, 'C'},
394 {"pid-file", required_argument, NULL, 'p'},
395 {"output-config", required_argument, NULL, 'o'},
402 gboolean do_output=FALSE;
403 gchar* section_header;
408 serve=g_new0(SERVER, 1);
409 serve->authname = g_strdup(default_authname);
410 while((c=getopt_long(argc, argv, "-a:C:cl:mo:rp:", long_options, &i))>=0) {
413 /* non-option argument */
414 switch(nonspecial++) {
416 serve->port=strtol(optarg, NULL, 0);
419 serve->exportname = g_strdup(optarg);
420 if(serve->exportname[0] != '/') {
421 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
426 last=strlen(optarg)-1;
428 if (suffix == 'k' || suffix == 'K' ||
429 suffix == 'm' || suffix == 'M')
431 es = (off_t)atol(optarg);
439 serve->expected_size = es;
444 serve->flags |= F_READONLY;
447 serve->flags |= F_MULTIFILE;
451 section_header = g_strdup(optarg);
454 strncpy(pidftemplate, optarg, 256);
457 serve->flags |=F_COPYONWRITE;
460 g_free(config_file_pos);
461 config_file_pos=g_strdup(optarg);
464 g_free(serve->authname);
465 serve->authname=g_strdup(optarg);
468 serve->timeout=strtol(optarg, NULL, 0);
476 /* What's left: the port to export, the name of the to be exported
477 * file, and, optionally, the size of the file, in that order. */
484 g_critical("Need a complete configuration on the command line to output a config file section!");
487 dump_section(serve, section_header);
493 * Error codes for config file parsing
496 CFILE_NOTFOUND, /**< The configuration file is not found */
497 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
498 CFILE_KEY_MISSING, /**< A (required) key is missing */
499 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
500 CFILE_PROGERR /**< Programmer error */
504 * Remove a SERVER from memory. Used from the hash table
506 void remove_server(gpointer s) {
510 g_free(server->exportname);
512 g_free(server->authname);
517 * Parse the config file.
519 * @param f the name of the config file
520 * @param e a GError. @see CFILE_ERRORS for what error values this function can
522 * @return a Array of SERVER* pointers, If the config file is empty or does not
523 * exist, returns an empty GHashTable; if the config file contains an
524 * error, returns NULL, and e is set appropriately
526 GArray* parse_cfile(gchar* f, GError** e) {
527 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
528 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
530 gchar *virtstyle=NULL;
532 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
533 { "port", TRUE, PARAM_INT, NULL, 0 },
534 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
535 { "timeout", FALSE, PARAM_INT, NULL, 0 },
536 { "filesize", FALSE, PARAM_INT, NULL, 0 },
537 { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
538 { "prerun", FALSE, PARAM_STRING, NULL, 0 },
539 { "postrun", FALSE, PARAM_STRING, NULL, 0 },
540 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
541 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
542 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
543 { "autoreadonly", FALSE, PARAM_BOOL, NULL, F_AUTOREADONLY },
544 { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
546 const int lp_size=11;
548 { "user", FALSE, PARAM_STRING, &runuser, 0 },
549 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
555 const char *err_msg=NULL;
563 errdomain = g_quark_from_string("parse_cfile");
564 cfile = g_key_file_new();
565 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
566 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
567 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
568 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
569 g_key_file_free(cfile);
572 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
573 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
574 g_key_file_free(cfile);
577 groups = g_key_file_get_groups(cfile, NULL);
578 for(i=0;groups[i];i++) {
579 memset(&s, '\0', sizeof(SERVER));
580 lp[0].target=&(s.exportname);
581 lp[1].target=&(s.port);
582 lp[2].target=&(s.authname);
583 lp[3].target=&(s.timeout);
584 lp[4].target=&(s.expected_size);
585 lp[5].target=&(virtstyle);
586 lp[6].target=&(s.prerun);
587 lp[7].target=&(s.postrun);
588 lp[8].target=lp[9].target=lp[10].target=
589 lp[11].target=lp[12].target=&(s.flags);
591 /* After the [generic] group, start parsing exports */
596 for(j=0;j<p_size;j++) {
597 g_assert(p[j].target != NULL);
598 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
601 *((gint*)p[j].target) =
602 g_key_file_get_integer(cfile,
608 *((gchar**)p[j].target) =
609 g_key_file_get_string(cfile,
615 value = g_key_file_get_boolean(cfile,
617 p[j].paramname, &err);
620 *((gint*)p[j].target) |= p[j].flagval;
622 *((gint*)p[j].target) &= ~(p[j].flagval);
628 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
630 /* Ignore not-found error for optional values */
634 err_msg = MISSING_REQUIRED_ERROR;
637 err_msg = DEFAULT_ERROR;
639 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
640 g_array_free(retval, TRUE);
642 g_key_file_free(cfile);
647 if(!strncmp(virtstyle, "none", 4)) {
648 s.virtstyle=VIRT_NONE;
649 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
650 s.virtstyle=VIRT_IPLIT;
651 } else if(!strncmp(virtstyle, "iphash", 6)) {
652 s.virtstyle=VIRT_IPHASH;
653 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
654 s.virtstyle=VIRT_CIDR;
655 if(strlen(virtstyle)<10) {
656 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
657 g_array_free(retval, TRUE);
658 g_key_file_free(cfile);
661 s.cidrlen=strtol(virtstyle+8, NULL, 0);
663 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
664 g_array_free(retval, TRUE);
665 g_key_file_free(cfile);
669 s.virtstyle=VIRT_IPLIT;
671 /* Don't need to free this, it's not our string */
673 /* Don't append values for the [generic] group */
675 g_array_append_val(retval, s);
682 * Signal handler for SIGCHLD
683 * @param s the signal we're handling (must be SIGCHLD, or something
686 void sigchld_handler(int s) {
691 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
692 if(WIFEXITED(status)) {
693 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
695 i=g_hash_table_lookup(children, &pid);
697 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
699 DEBUG2("Removing %d from the list of children", pid);
700 g_hash_table_remove(children, &pid);
706 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
709 * @param value the value corresponding to the above key
710 * @param user_data a pointer which we always set to 1, so that we know what
713 void killchild(gpointer key, gpointer value, gpointer user_data) {
715 int *parent=user_data;
722 * Handle SIGTERM and dispatch it to our children
723 * @param s the signal we're handling (must be SIGTERM, or something
724 * is severely wrong).
726 void sigterm_handler(int s) {
729 g_hash_table_foreach(children, killchild, &parent);
739 * Detect the size of a file.
741 * @param fhandle An open filedescriptor
742 * @return the size of the file, or OFFT_MAX if detection was
745 off_t size_autodetect(int fhandle) {
747 unsigned long sectors;
748 struct stat stat_buf;
751 #ifdef HAVE_SYS_MOUNT_H
752 #ifdef HAVE_SYS_IOCTL_H
754 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
755 if (!ioctl(fhandle, BLKGETSIZE, §ors) && sectors) {
756 es = (off_t)sectors * (off_t)512;
759 #endif /* BLKGETSIZE */
760 #endif /* HAVE_SYS_IOCTL_H */
761 #endif /* HAVE_SYS_MOUNT_H */
763 DEBUG("looking for fhandle size with fstat\n");
764 stat_buf.st_size = 0;
765 error = fstat(fhandle, &stat_buf);
767 if(stat_buf.st_size > 0)
768 return (off_t)stat_buf.st_size;
770 err("fstat failed: %m");
773 DEBUG("looking for fhandle size with lseek SEEK_END\n");
774 es = lseek(fhandle, (off_t)0, SEEK_END);
775 if (es > ((off_t)0)) {
778 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
781 err("Could not find size of exported block device: %m");
786 * Get the file handle and offset, given an export offset.
788 * @param export An array of export files
789 * @param a The offset to get corresponding file/offset for
790 * @param fhandle [out] File descriptor
791 * @param foffset [out] Offset into fhandle
792 * @param maxbytes [out] Tells how many bytes can be read/written
793 * from fhandle starting at foffset (0 if there is no limit)
794 * @return 0 on success, -1 on failure
796 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
797 /* Negative offset not allowed */
801 /* Binary search for last file with starting offset <= a */
804 int end = export->len - 1;
805 while( start <= end ) {
806 int mid = (start + end) / 2;
807 fi = g_array_index(export, FILE_INFO, mid);
808 if( fi.startoff < a ) {
810 } else if( fi.startoff > a ) {
818 /* end should never go negative, since first startoff is 0 and a >= 0 */
821 fi = g_array_index(export, FILE_INFO, end);
822 *fhandle = fi.fhandle;
823 *foffset = a - fi.startoff;
825 if( end+1 < export->len ) {
826 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
827 *maxbytes = fi_next.startoff - a;
834 * seek to a position in a file, with error handling.
835 * @param handle a filedescriptor
836 * @param a position to seek to
837 * @todo get rid of this; lastpoint is a global variable right now, but it
838 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
841 void myseek(int handle,off_t a) {
842 if (lseek(handle, a, SEEK_SET) < 0) {
843 err("Can not seek locally!\n");
848 * Write an amount of bytes at a given offset to the right file. This
849 * abstracts the write-side of the multiple file option.
851 * @param a The offset where the write should start
852 * @param buf The buffer to write from
853 * @param len The length of buf
854 * @param client The client we're serving for
855 * @return The number of bytes actually written, or -1 in case of an error
857 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
862 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
864 if(maxbytes && len > maxbytes)
867 DEBUG4("(WRITE to fd %d offset %Lu len %u), ", fhandle, foffset, len);
869 myseek(fhandle, foffset);
870 return write(fhandle, buf, len);
874 * Call rawexpwrite repeatedly until all data has been written.
875 * @return 0 on success, nonzero on failure
877 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
880 while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
885 return (ret < 0 || len != 0);
889 * Read an amount of bytes at a given offset from the right file. This
890 * abstracts the read-side of the multiple files option.
892 * @param a The offset where the read should start
893 * @param buf A buffer to read into
894 * @param len The size of buf
895 * @param client The client we're serving for
896 * @return The number of bytes actually read, or -1 in case of an
899 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
904 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
906 if(maxbytes && len > maxbytes)
909 DEBUG4("(READ from fd %d offset %Lu len %u), ", fhandle, foffset, len);
911 myseek(fhandle, foffset);
912 return read(fhandle, buf, len);
916 * Call rawexpread repeatedly until all data has been read.
917 * @return 0 on success, nonzero on failure
919 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
922 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
927 return (ret < 0 || len != 0);
931 * Read an amount of bytes at a given offset from the right file. This
932 * abstracts the read-side of the copyonwrite stuff, and calls
933 * rawexpread() with the right parameters to do the actual work.
934 * @param a The offset where the read should start
935 * @param buf A buffer to read into
936 * @param len The size of buf
937 * @param client The client we're going to read for
938 * @return 0 on success, nonzero on failure
940 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
942 off_t mapcnt, mapl, maph, pagestart;
944 if (!(client->server->flags & F_COPYONWRITE))
945 return(rawexpread_fully(a, buf, len, client));
946 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
948 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
950 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
951 pagestart=mapcnt*DIFFPAGESIZE;
953 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
954 len : (size_t)DIFFPAGESIZE-offset;
955 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
956 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
957 (unsigned long)(client->difmap[mapcnt]));
958 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
959 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
960 } else { /* the block is not there */
961 DEBUG2("Page %Lu is not here, we read the original one\n",
962 (unsigned long long)mapcnt);
963 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
965 len-=rdlen; a+=rdlen; buf+=rdlen;
971 * Write an amount of bytes at a given offset to the right file. This
972 * abstracts the write-side of the copyonwrite option, and calls
973 * rawexpwrite() with the right parameters to do the actual work.
975 * @param a The offset where the write should start
976 * @param buf The buffer to write from
977 * @param len The length of buf
978 * @param client The client we're going to write for.
979 * @return 0 on success, nonzero on failure
981 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
982 char pagebuf[DIFFPAGESIZE];
983 off_t mapcnt,mapl,maph;
988 if (!(client->server->flags & F_COPYONWRITE))
989 return(rawexpwrite_fully(a, buf, len, client));
990 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
992 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
994 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
995 pagestart=mapcnt*DIFFPAGESIZE ;
997 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
998 len : (size_t)DIFFPAGESIZE-offset;
1000 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1001 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
1002 (unsigned long)(client->difmap[mapcnt])) ;
1003 myseek(client->difffile,
1004 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1005 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1006 } else { /* the block is not there */
1007 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1008 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1009 DEBUG3("Page %Lu is not here, we put it at %lu\n",
1010 (unsigned long long)mapcnt,
1011 (unsigned long)(client->difmap[mapcnt]));
1012 rdlen=DIFFPAGESIZE ;
1013 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1015 memcpy(pagebuf+offset,buf,wrlen) ;
1016 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1020 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1026 * Do the initial negotiation.
1028 * @param client The client we're negotiating with.
1030 void negotiate(CLIENT *client) {
1033 u32 flags = NBD_FLAG_HAS_FLAGS;
1035 memset(zeros, '\0', sizeof(zeros));
1036 if (write(client->net, INIT_PASSWD, 8) < 0)
1037 err("Negotiation failed: %m");
1038 cliserv_magic = htonll(cliserv_magic);
1039 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
1040 err("Negotiation failed: %m");
1041 size_host = htonll((u64)(client->exportsize));
1042 if (write(client->net, &size_host, 8) < 0)
1043 err("Negotiation failed: %m");
1044 if (client->server->flags & F_READONLY)
1045 flags |= NBD_FLAG_READ_ONLY;
1046 flags = htonl(flags);
1047 if (write(client->net, &flags, 4) < 0)
1048 err("Negotiation failed: %m");
1049 if (write(client->net, zeros, 124) < 0)
1050 err("Negotiation failed: %m");
1053 /** sending macro. */
1054 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1056 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1058 * Serve a file to a single client.
1060 * @todo This beast needs to be split up in many tiny little manageable
1061 * pieces. Preferably with a chainsaw.
1063 * @param client The client we're going to serve to.
1064 * @return when the client disconnects
1066 int mainloop(CLIENT *client) {
1067 struct nbd_request request;
1068 struct nbd_reply reply;
1069 gboolean go_on=TRUE;
1074 DEBUG("Entering request loop!\n");
1075 reply.magic = htonl(NBD_REPLY_MAGIC);
1084 if (client->server->timeout)
1085 alarm(client->server->timeout);
1086 readit(client->net, &request, sizeof(request));
1087 request.from = ntohll(request.from);
1088 request.type = ntohl(request.type);
1090 if (request.type==NBD_CMD_DISC) {
1091 msg2(LOG_INFO, "Disconnect request received.");
1092 if (client->server->flags & F_COPYONWRITE) {
1093 if (client->difmap) g_free(client->difmap) ;
1094 close(client->difffile);
1095 unlink(client->difffilename);
1096 free(client->difffilename);
1102 len = ntohl(request.len);
1104 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1105 err("Not enough magic.");
1106 if (len > BUFSIZE + sizeof(struct nbd_reply))
1107 err("Request too big!");
1109 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
1110 "READ", (unsigned long long)request.from,
1111 (unsigned long long)request.from / 512, len);
1113 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1114 if ((request.from + len) > (OFFT_MAX)) {
1115 DEBUG("[Number too large!]");
1116 ERROR(client, reply, EINVAL);
1120 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1122 ERROR(client, reply, EINVAL);
1126 if (request.type==NBD_CMD_WRITE) {
1127 DEBUG("wr: net->buf, ");
1128 readit(client->net, buf, len);
1129 DEBUG("buf->exp, ");
1130 if ((client->server->flags & F_READONLY) ||
1131 (client->server->flags & F_AUTOREADONLY)) {
1132 DEBUG("[WRITE to READONLY!]");
1133 ERROR(client, reply, EPERM);
1136 if (expwrite(request.from, buf, len, client)) {
1137 DEBUG("Write failed: %m" );
1138 ERROR(client, reply, errno);
1141 SEND(client->net, reply);
1147 DEBUG("exp->buf, ");
1148 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1149 DEBUG("Read failed: %m");
1150 ERROR(client, reply, errno);
1154 DEBUG("buf->net, ");
1155 memcpy(buf, &reply, sizeof(struct nbd_reply));
1156 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1163 * Set up client export array, which is an array of FILE_INFO.
1164 * Also, split a single exportfile into multiple ones, if that was asked.
1165 * @param client information on the client which we want to setup export for
1167 void setupexport(CLIENT* client) {
1169 off_t laststartoff = 0, lastsize = 0;
1170 int multifile = (client->server->flags & F_MULTIFILE);
1172 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1174 /* If multi-file, open as many files as we can.
1175 * If not, open exactly one file.
1176 * Calculate file sizes as we go to get total size. */
1180 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1183 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1185 tmpname=g_strdup(client->exportname);
1187 DEBUG2( "Opening %s\n", tmpname );
1188 fi.fhandle = open(tmpname, mode);
1189 if(fi.fhandle == -1 && mode == O_RDWR) {
1190 /* Try again because maybe media was read-only */
1191 fi.fhandle = open(tmpname, O_RDONLY);
1192 if(fi.fhandle != -1) {
1193 client->server->flags |= F_AUTOREADONLY;
1194 client->server->flags |= F_READONLY;
1197 if(fi.fhandle == -1) {
1198 if(multifile && i>0)
1200 err("Could not open exported file: %m");
1202 fi.startoff = laststartoff + lastsize;
1203 g_array_append_val(client->export, fi);
1206 /* Starting offset and size of this file will be used to
1207 * calculate starting offset of next file */
1208 laststartoff = fi.startoff;
1209 lastsize = size_autodetect(fi.fhandle);
1215 /* Set export size to total calculated size */
1216 client->exportsize = laststartoff + lastsize;
1218 /* Export size may be overridden */
1219 if(client->server->expected_size) {
1220 /* desired size must be <= total calculated size */
1221 if(client->server->expected_size > client->exportsize) {
1222 err("Size of exported file is too big\n");
1225 client->exportsize = client->server->expected_size;
1228 msg3(LOG_INFO, "Size of exported file/device is %Lu", (unsigned long long)client->exportsize);
1230 msg3(LOG_INFO, "Total number of files: %d", i);
1234 int copyonwrite_prepare(CLIENT* client) {
1236 if ((client->difffilename = malloc(1024))==NULL)
1237 err("Failed to allocate string for diff file name");
1238 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1240 client->difffilename[1023]='\0';
1241 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1242 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1243 if (client->difffile<0) err("Could not create diff file (%m)") ;
1244 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1245 err("Could not allocate memory") ;
1246 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1252 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1255 * @param command the command to be ran. Read from the config file
1256 * @param file the file name we're about to export
1258 int do_run(gchar* command, gchar* file) {
1263 cmd = g_strdup_printf(command, file);
1271 * Serve a connection.
1273 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1274 * follow the road map.
1276 * @param client a connected client
1278 void serveconnection(CLIENT *client) {
1279 setupexport(client);
1281 if (client->server->flags & F_COPYONWRITE) {
1282 copyonwrite_prepare(client);
1285 setmysockopt(client->net);
1287 if(!do_run(client->server->prerun, client->exportname)) {
1290 do_run(client->server->postrun, client->exportname);
1294 * Find the name of the file we have to serve. This will use g_strdup_printf
1295 * to put the IP address of the client inside a filename containing
1296 * "%s" (in the form as specified by the "virtstyle" option). That name
1297 * is then written to client->exportname.
1299 * @param net A socket connected to an nbd client
1300 * @param client information about the client. The IP address in human-readable
1301 * format will be written to a new char* buffer, the address of which will be
1302 * stored in client->clientname.
1304 void set_peername(int net, CLIENT *client) {
1305 struct sockaddr_in addrin;
1306 struct sockaddr_in netaddr;
1307 size_t addrinlen = sizeof( addrin );
1313 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1314 err("getsockname failed: %m");
1315 peername = g_strdup(inet_ntoa(addrin.sin_addr));
1316 switch(client->server->virtstyle) {
1318 client->exportname=g_strdup(client->server->exportname);
1321 for(i=0;i<strlen(peername);i++) {
1322 if(peername[i]=='.') {
1327 client->exportname=g_strdup_printf(client->server->exportname, peername);
1330 memcpy(&netaddr, &addrin, addrinlen);
1331 netaddr.sin_addr.s_addr>>=32-(client->server->cidrlen);
1332 netaddr.sin_addr.s_addr<<=32-(client->server->cidrlen);
1333 netname = inet_ntoa(netaddr.sin_addr);
1334 tmp=g_strdup_printf("%s/%s", netname, peername);
1335 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1339 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1340 peername, client->exportname);
1341 client->clientname=g_strdup(peername);
1347 * @param data a pointer to pid_t which should be freed
1349 void destroy_pid_t(gpointer data) {
1354 * Go daemon (unless we specified at compile time that we didn't want this)
1355 * @param serve the first server of our configuration. If its port is zero,
1356 * then do not daemonize, because we're doing inetd then. This parameter
1357 * is only used to create a PID file of the form
1358 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1360 #if !defined(NODAEMON) && !defined(NOFORK)
1361 void daemonize(SERVER* serve) {
1364 if(serve && !(serve->port)) {
1370 if(!*pidftemplate) {
1372 strncpy(pidftemplate, "/var/run/server.%d.pid", 255);
1374 strncpy(pidftemplate, "/var/run/server.pid", 255);
1377 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1378 pidf=fopen(pidfname, "w");
1380 fprintf(pidf,"%d\n", (int)getpid());
1384 fprintf(stderr, "Not fatal; continuing");
1388 #define daemonize(serve)
1389 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1392 * Connect a server's socket.
1394 * @param serve the server we want to connect.
1396 void setup_serve(SERVER *serve) {
1397 struct sockaddr_in addrin;
1398 struct sigaction sa;
1399 int addrinlen = sizeof(addrin);
1406 if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
1409 /* lose the pesky "Address already in use" error message */
1410 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1411 err("setsockopt SO_REUSEADDR");
1413 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1414 err("setsockopt SO_KEEPALIVE");
1417 /* make the listening socket non-blocking */
1418 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1419 err("fcntl F_GETFL");
1421 if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1422 err("fcntl F_SETFL O_NONBLOCK");
1425 DEBUG("Waiting for connections... bind, ");
1426 addrin.sin_family = AF_INET;
1427 addrin.sin_port = htons(serve->port);
1428 addrin.sin_addr.s_addr = 0;
1429 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1432 if (listen(serve->socket, 1) < 0)
1434 sa.sa_handler = sigchld_handler;
1435 sigemptyset(&sa.sa_mask);
1436 sa.sa_flags = SA_RESTART;
1437 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1438 err("sigaction: %m");
1439 sa.sa_handler = sigterm_handler;
1440 sigemptyset(&sa.sa_mask);
1441 sa.sa_flags = SA_RESTART;
1442 if(sigaction(SIGTERM, &sa, NULL) == -1)
1443 err("sigaction: %m");
1447 * Connect our servers.
1449 void setup_servers(GArray* servers) {
1452 for(i=0;i<servers->len;i++) {
1453 setup_serve(&(g_array_index(servers, SERVER, i)));
1455 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1459 * Loop through the available servers, and serve them.
1461 int serveloop(GArray* servers) {
1462 struct sockaddr_in addrin;
1463 socklen_t addrinlen=sizeof(addrin);
1473 * Set up the master fd_set. The set of descriptors we need
1474 * to select() for never changes anyway and it buys us a *lot*
1475 * of time to only build this once. However, if we ever choose
1476 * to not fork() for clients anymore, we may have to revisit
1481 for(i=0;i<servers->len;i++) {
1482 sock=(g_array_index(servers, SERVER, i)).socket;
1483 FD_SET(sock, &mset);
1484 max=sock>max?sock:max;
1491 memcpy(&rset, &mset, sizeof(fd_set));
1494 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1496 for(i=0;i<servers->len;i++) {
1497 serve=&(g_array_index(servers, SERVER, i));
1498 if(FD_ISSET(serve->socket, &rset)) {
1499 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1502 client = g_malloc(sizeof(CLIENT));
1503 client->server=serve;
1504 client->exportsize=OFFT_MAX;
1506 set_peername(net, client);
1507 if (!authorized_client(client)) {
1508 msg2(LOG_INFO,"Unauthorized client") ;
1512 msg2(LOG_INFO,"Authorized client") ;
1513 pid=g_malloc(sizeof(pid_t));
1515 if ((*pid=fork())<0) {
1516 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1520 if (*pid>0) { /* parent */
1522 g_hash_table_insert(children, pid, pid);
1526 g_hash_table_destroy(children);
1527 for(i=0;i<servers->len,serve=(g_array_index(servers, SERVER*, i));i++) {
1528 close(serve->socket);
1530 /* FALSE does not free the
1531 actual data. This is required,
1532 because the client has a
1533 direct reference into that
1534 data, and otherwise we get a
1536 g_array_free(servers, FALSE);
1538 msg2(LOG_INFO,"Starting to serve");
1539 serveconnection(client);
1548 * Set up user-ID and/or group-ID
1550 void dousers(void) {
1554 gr=getgrnam(rungroup);
1555 if(setgid(gr->gr_gid)<0)
1556 msg3(LOG_DEBUG, "Could not set GID: %s", strerror(errno));
1559 pw=getpwnam(runuser);
1560 if(setuid(pw->pw_uid)<0)
1561 msg3(LOG_DEBUG, "Could not set UID: %s", strerror(errno));
1566 * Main entry point...
1568 int main(int argc, char *argv[]) {
1573 if (sizeof( struct nbd_request )!=28) {
1574 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1578 memset(pidftemplate, '\0', 256);
1581 config_file_pos = g_strdup(CFILE);
1582 serve=cmdline(argc, argv);
1583 servers = parse_cfile(config_file_pos, &err);
1584 if(!servers || !servers->len) {
1585 g_warning("Could not parse config file: %s",
1586 err ? err->message : "Unknown error");
1589 g_array_append_val(servers, *serve);
1591 if (!(serve->port)) {
1594 /* You really should define ISSERVER if you're going to use
1595 * inetd mode, but if you don't, closing stdout and stderr
1596 * (which inetd had connected to the client socket) will let it
1600 open("/dev/null", O_WRONLY);
1601 open("/dev/null", O_WRONLY);
1603 client=g_malloc(sizeof(CLIENT));
1604 client->server=serve;
1606 client->exportsize=OFFT_MAX;
1607 set_peername(0,client);
1608 serveconnection(client);
1612 if((!serve) && (!servers||!servers->len)) {
1613 g_message("Nothing to do! Bye!");
1617 setup_servers(servers);