2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/select.h> /* select */
62 #include <sys/wait.h> /* wait */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
66 #include <sys/param.h>
67 #ifdef HAVE_SYS_MOUNT_H
68 #include <sys/mount.h> /* For BLKGETSIZE */
70 #include <signal.h> /* sigaction */
72 #include <netinet/tcp.h>
73 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
74 #include <netdb.h> /* hostent, gethostby*, getservby* */
81 #include <arpa/inet.h>
91 /* used in cliserv.h, so must come first */
92 #define MY_NAME "nbd_server"
95 /** Default position of the config file */
97 #define SYSCONFDIR "/etc"
99 #define CFILE SYSCONFDIR "/nbd-server/config"
101 /** Where our config file actually is */
102 gchar* config_file_pos;
104 /** What user we're running as */
106 /** What group we're running as */
107 gchar* rungroup=NULL;
109 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
111 #define msg2(a,b) syslog(a,b)
112 #define msg3(a,b,c) syslog(a,b,c)
113 #define msg4(a,b,c,d) syslog(a,b,c,d)
115 #define msg2(a,b) g_message(b)
116 #define msg3(a,b,c) g_message(b,c)
117 #define msg4(a,b,c,d) g_message(b,c,d)
120 /* Debugging macros */
123 #define DEBUG( a ) printf( a )
124 #define DEBUG2( a,b ) printf( a,b )
125 #define DEBUG3( a,b,c ) printf( a,b,c )
126 #define DEBUG4( a,b,c,d ) printf( a,b,c,d )
129 #define DEBUG2( a,b )
130 #define DEBUG3( a,b,c )
131 #define DEBUG4( a,b,c,d )
133 #ifndef PACKAGE_VERSION
134 #define PACKAGE_VERSION ""
137 * The highest value a variable of type off_t can reach. This is a signed
138 * integer, so set all bits except for the leftmost one.
140 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
141 #define LINELEN 256 /**< Size of static buffer used to read the
142 authorization file (yuck) */
143 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
144 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
145 #define F_READONLY 1 /**< flag to tell us a file is readonly */
146 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
147 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
149 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
151 GHashTable *children;
152 char pidfname[256]; /**< name of our PID file */
153 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
154 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
157 * Types of virtuatlization
160 VIRT_NONE=0, /**< No virtualization */
161 VIRT_IPLIT, /**< Literal IP address as part of the filename */
162 VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before
163 doing the same as in IPLIT */
164 VIRT_CIDR, /**< Every subnet in its own directory */
168 * Variables associated with a server.
171 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
172 off_t expected_size; /**< size of the exported file as it was told to
173 us through configuration */
174 unsigned int port; /**< port we're exporting this file at */
175 char* authname; /**< filename of the authorization file */
176 int flags; /**< flags associated with this exported file */
177 unsigned int timeout;/**< how long a connection may be idle
179 int socket; /**< The socket of this server. */
180 VIRT_STYLE virtstyle;/**< The style of virtualization, if any */
181 uint8_t cidrlen; /**< The length of the mask when we use
182 CIDR-style virtualization */
183 gchar* prerun; /**< command to be ran after connecting a client,
184 but before starting to serve */
185 gchar* postrun; /**< command that will be ran after the client
190 * Variables associated with a client socket.
193 int fhandle; /**< file descriptor */
194 off_t startoff; /**< starting offset of this file */
198 off_t exportsize; /**< size of the file we're exporting */
199 char *clientname; /**< peer */
200 char *exportname; /**< (processed) filename of the file we're exporting */
201 GArray *export; /**< array of FILE_INFO of exported files;
202 array size is always 1 unless we're
203 doing the multiple file option */
204 int net; /**< The actual client socket */
205 SERVER *server; /**< The server this client is getting data from */
206 char* difffilename; /**< filename of the copy-on-write file, if any */
207 int difffile; /**< filedescriptor of copyonwrite file. @todo
208 shouldn't this be an array too? (cfr export) Or
209 make -m and -c mutually exclusive */
210 u32 difffilelen; /**< number of pages in difffile */
211 u32 *difmap; /**< see comment on the global difmap for this one */
215 * Type of configuration file values
218 PARAM_INT, /**< This parameter is an integer */
219 PARAM_STRING, /**< This parameter is a string */
220 PARAM_BOOL, /**< This parameter is a boolean */
224 * Configuration file values
227 gchar *paramname; /**< Name of the parameter, as it appears in
229 gboolean required; /**< Whether this is a required (as opposed to
230 optional) parameter */
231 PARAM_TYPE ptype; /**< Type of the parameter. */
232 gpointer target; /**< Pointer to where the data of this
233 parameter should be written. If ptype is
234 PARAM_BOOL, the data is or'ed rather than
236 gint flagval; /**< Flag mask for this parameter in case ptype
241 * Check whether a client is allowed to connect. Works with an authorization
242 * file which contains one line per machine, no wildcards.
244 * @param opts The client who's trying to connect.
245 * @return 0 - authorization refused, 1 - OK
247 int authorized_client(CLIENT *opts) {
248 const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
253 struct in_addr client;
254 struct in_addr cltemp;
257 if ((f=fopen(opts->server->authname,"r"))==NULL) {
258 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
259 opts->server->authname,strerror(errno)) ;
263 inet_aton(opts->clientname, &client);
264 while (fgets(line,LINELEN,f)!=NULL) {
265 if((tmp=index(line, '/'))) {
266 if(strlen(line)<=tmp-line) {
267 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
271 if(inet_aton(line,&addr)) {
272 msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
275 len=strtol(tmp, NULL, 0);
276 addr.s_addr>>=32-len;
277 addr.s_addr<<=32-len;
278 memcpy(&cltemp,&client,sizeof(client));
279 cltemp.s_addr>>=32-len;
280 cltemp.s_addr<<=32-len;
281 if(addr.s_addr == cltemp.s_addr) {
285 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
295 * Read data from a file descriptor into a buffer
297 * @param f a file descriptor
298 * @param buf a buffer
299 * @param len the number of bytes to be read
301 inline void readit(int f, void *buf, size_t len) {
305 if ((res = read(f, buf, len)) <= 0)
306 err("Read failed: %m");
313 * Write data from a buffer into a filedescriptor
315 * @param f a file descriptor
316 * @param buf a buffer containing data
317 * @param len the number of bytes to be written
319 inline void writeit(int f, void *buf, size_t len) {
323 if ((res = write(f, buf, len)) <= 0)
324 err("Send failed: %m");
331 * Print out a message about how to use nbd-server. Split out to a separate
332 * function so that we can call it from multiple places
335 printf("This is nbd-server version " VERSION "\n");
336 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file] [-p PID file name] [-o section name]\n"
337 "\t-r|--read-only\t\tread only\n"
338 "\t-m|--multi-file\t\tmultiple file\n"
339 "\t-c|--copy-on-write\tcopy on write\n"
340 "\t-C|--config-file\tspecify an alternate configuration file\n"
341 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
342 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n"
343 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
344 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n"
345 "\tif port is set to 0, stdin is used (for running from inetd)\n"
346 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
347 "\t\taddress of the machine trying to connect\n" );
348 printf("Using configuration file %s\n", CFILE);
351 /* Dumps a config file section of the given SERVER*, and exits. */
352 void dump_section(SERVER* serve, gchar* section_header) {
353 printf("[%s]\n", section_header);
354 printf("\texportname = %s\n", serve->exportname);
355 printf("\tport = %d\n", serve->port);
356 if(serve->flags & F_READONLY) {
357 printf("\treadonly = true\n");
359 if(serve->flags & F_MULTIFILE) {
360 printf("\tmultifile = true\n");
362 if(serve->flags & F_COPYONWRITE) {
363 printf("\tcopyonwrite = true\n");
365 if(serve->expected_size) {
366 printf("\tfilesize = %Ld\n", (long long int)serve->expected_size);
368 if(serve->authname) {
369 printf("\tauthfile = %s\n", serve->authname);
372 printf("\ttimeout = %d\n", serve->timeout);
378 * Parse the command line.
380 * @param argc the argc argument to main()
381 * @param argv the argv argument to main()
383 SERVER* cmdline(int argc, char *argv[]) {
387 struct option long_options[] = {
388 {"read-only", no_argument, NULL, 'r'},
389 {"multi-file", no_argument, NULL, 'm'},
390 {"copy-on-write", no_argument, NULL, 'c'},
391 {"authorize-file", required_argument, NULL, 'l'},
392 {"idle-time", required_argument, NULL, 'a'},
393 {"config-file", required_argument, NULL, 'C'},
394 {"pid-file", required_argument, NULL, 'p'},
395 {"output-config", required_argument, NULL, 'o'},
402 gboolean do_output=FALSE;
403 gchar* section_header;
408 serve=g_new0(SERVER, 1);
409 serve->authname = g_strdup(default_authname);
410 serve->virtstyle=VIRT_IPLIT;
411 while((c=getopt_long(argc, argv, "-a:C:cl:mo:rp:", long_options, &i))>=0) {
414 /* non-option argument */
415 switch(nonspecial++) {
417 serve->port=strtol(optarg, NULL, 0);
420 serve->exportname = g_strdup(optarg);
421 if(serve->exportname[0] != '/') {
422 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
427 last=strlen(optarg)-1;
429 if (suffix == 'k' || suffix == 'K' ||
430 suffix == 'm' || suffix == 'M')
432 es = (off_t)atol(optarg);
440 serve->expected_size = es;
445 serve->flags |= F_READONLY;
448 serve->flags |= F_MULTIFILE;
452 section_header = g_strdup(optarg);
455 strncpy(pidftemplate, optarg, 256);
458 serve->flags |=F_COPYONWRITE;
461 g_free(config_file_pos);
462 config_file_pos=g_strdup(optarg);
465 g_free(serve->authname);
466 serve->authname=g_strdup(optarg);
469 serve->timeout=strtol(optarg, NULL, 0);
477 /* What's left: the port to export, the name of the to be exported
478 * file, and, optionally, the size of the file, in that order. */
485 g_critical("Need a complete configuration on the command line to output a config file section!");
488 dump_section(serve, section_header);
494 * Error codes for config file parsing
497 CFILE_NOTFOUND, /**< The configuration file is not found */
498 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
499 CFILE_KEY_MISSING, /**< A (required) key is missing */
500 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
501 CFILE_PROGERR /**< Programmer error */
505 * Remove a SERVER from memory. Used from the hash table
507 void remove_server(gpointer s) {
511 g_free(server->exportname);
513 g_free(server->authname);
518 * Parse the config file.
520 * @param f the name of the config file
521 * @param e a GError. @see CFILE_ERRORS for what error values this function can
523 * @return a Array of SERVER* pointers, If the config file is empty or does not
524 * exist, returns an empty GHashTable; if the config file contains an
525 * error, returns NULL, and e is set appropriately
527 GArray* parse_cfile(gchar* f, GError** e) {
528 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
529 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
531 gchar *virtstyle=NULL;
533 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
534 { "port", TRUE, PARAM_INT, NULL, 0 },
535 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
536 { "timeout", FALSE, PARAM_INT, NULL, 0 },
537 { "filesize", FALSE, PARAM_INT, NULL, 0 },
538 { "virtstyle", FALSE, PARAM_STRING, NULL, 0 },
539 { "prerun", FALSE, PARAM_STRING, NULL, 0 },
540 { "postrun", FALSE, PARAM_STRING, NULL, 0 },
541 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
542 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
543 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
544 { "autoreadonly", FALSE, PARAM_BOOL, NULL, F_AUTOREADONLY },
545 { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE },
547 const int lp_size=11;
549 { "user", FALSE, PARAM_STRING, &runuser, 0 },
550 { "group", FALSE, PARAM_STRING, &rungroup, 0 },
556 const char *err_msg=NULL;
564 errdomain = g_quark_from_string("parse_cfile");
565 cfile = g_key_file_new();
566 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
567 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
568 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
569 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
570 g_key_file_free(cfile);
573 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
574 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
575 g_key_file_free(cfile);
578 groups = g_key_file_get_groups(cfile, NULL);
579 for(i=0;groups[i];i++) {
580 memset(&s, '\0', sizeof(SERVER));
581 lp[0].target=&(s.exportname);
582 lp[1].target=&(s.port);
583 lp[2].target=&(s.authname);
584 lp[3].target=&(s.timeout);
585 lp[4].target=&(s.expected_size);
586 lp[5].target=&(virtstyle);
587 lp[6].target=&(s.prerun);
588 lp[7].target=&(s.postrun);
589 lp[8].target=lp[9].target=lp[10].target=
590 lp[11].target=lp[12].target=&(s.flags);
592 /* After the [generic] group, start parsing exports */
597 for(j=0;j<p_size;j++) {
598 g_assert(p[j].target != NULL);
599 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
602 *((gint*)p[j].target) =
603 g_key_file_get_integer(cfile,
609 *((gchar**)p[j].target) =
610 g_key_file_get_string(cfile,
616 value = g_key_file_get_boolean(cfile,
618 p[j].paramname, &err);
621 *((gint*)p[j].target) |= p[j].flagval;
623 *((gint*)p[j].target) &= ~(p[j].flagval);
629 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
631 /* Ignore not-found error for optional values */
635 err_msg = MISSING_REQUIRED_ERROR;
638 err_msg = DEFAULT_ERROR;
640 g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
641 g_array_free(retval, TRUE);
643 g_key_file_free(cfile);
648 if(!strncmp(virtstyle, "none", 4)) {
649 s.virtstyle=VIRT_NONE;
650 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
651 s.virtstyle=VIRT_IPLIT;
652 } else if(!strncmp(virtstyle, "iphash", 6)) {
653 s.virtstyle=VIRT_IPHASH;
654 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
655 s.virtstyle=VIRT_CIDR;
656 if(strlen(virtstyle)<10) {
657 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
658 g_array_free(retval, TRUE);
659 g_key_file_free(cfile);
662 s.cidrlen=strtol(virtstyle+8, NULL, 0);
664 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
665 g_array_free(retval, TRUE);
666 g_key_file_free(cfile);
670 s.virtstyle=VIRT_IPLIT;
672 /* Don't need to free this, it's not our string */
674 /* Don't append values for the [generic] group */
676 g_array_append_val(retval, s);
683 * Signal handler for SIGCHLD
684 * @param s the signal we're handling (must be SIGCHLD, or something
687 void sigchld_handler(int s) {
692 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
693 if(WIFEXITED(status)) {
694 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
696 i=g_hash_table_lookup(children, &pid);
698 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
700 DEBUG2("Removing %d from the list of children", pid);
701 g_hash_table_remove(children, &pid);
707 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
710 * @param value the value corresponding to the above key
711 * @param user_data a pointer which we always set to 1, so that we know what
714 void killchild(gpointer key, gpointer value, gpointer user_data) {
716 int *parent=user_data;
723 * Handle SIGTERM and dispatch it to our children
724 * @param s the signal we're handling (must be SIGTERM, or something
725 * is severely wrong).
727 void sigterm_handler(int s) {
730 g_hash_table_foreach(children, killchild, &parent);
740 * Detect the size of a file.
742 * @param fhandle An open filedescriptor
743 * @return the size of the file, or OFFT_MAX if detection was
746 off_t size_autodetect(int fhandle) {
748 unsigned long sectors;
749 struct stat stat_buf;
752 #ifdef HAVE_SYS_MOUNT_H
753 #ifdef HAVE_SYS_IOCTL_H
755 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
756 if (!ioctl(fhandle, BLKGETSIZE, §ors) && sectors) {
757 es = (off_t)sectors * (off_t)512;
760 #endif /* BLKGETSIZE */
761 #endif /* HAVE_SYS_IOCTL_H */
762 #endif /* HAVE_SYS_MOUNT_H */
764 DEBUG("looking for fhandle size with fstat\n");
765 stat_buf.st_size = 0;
766 error = fstat(fhandle, &stat_buf);
768 if(stat_buf.st_size > 0)
769 return (off_t)stat_buf.st_size;
771 err("fstat failed: %m");
774 DEBUG("looking for fhandle size with lseek SEEK_END\n");
775 es = lseek(fhandle, (off_t)0, SEEK_END);
776 if (es > ((off_t)0)) {
779 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
782 err("Could not find size of exported block device: %m");
787 * Get the file handle and offset, given an export offset.
789 * @param export An array of export files
790 * @param a The offset to get corresponding file/offset for
791 * @param fhandle [out] File descriptor
792 * @param foffset [out] Offset into fhandle
793 * @param maxbytes [out] Tells how many bytes can be read/written
794 * from fhandle starting at foffset (0 if there is no limit)
795 * @return 0 on success, -1 on failure
797 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
798 /* Negative offset not allowed */
802 /* Binary search for last file with starting offset <= a */
805 int end = export->len - 1;
806 while( start <= end ) {
807 int mid = (start + end) / 2;
808 fi = g_array_index(export, FILE_INFO, mid);
809 if( fi.startoff < a ) {
811 } else if( fi.startoff > a ) {
819 /* end should never go negative, since first startoff is 0 and a >= 0 */
822 fi = g_array_index(export, FILE_INFO, end);
823 *fhandle = fi.fhandle;
824 *foffset = a - fi.startoff;
826 if( end+1 < export->len ) {
827 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
828 *maxbytes = fi_next.startoff - a;
835 * seek to a position in a file, with error handling.
836 * @param handle a filedescriptor
837 * @param a position to seek to
838 * @todo get rid of this; lastpoint is a global variable right now, but it
839 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
842 void myseek(int handle,off_t a) {
843 if (lseek(handle, a, SEEK_SET) < 0) {
844 err("Can not seek locally!\n");
849 * Write an amount of bytes at a given offset to the right file. This
850 * abstracts the write-side of the multiple file option.
852 * @param a The offset where the write should start
853 * @param buf The buffer to write from
854 * @param len The length of buf
855 * @param client The client we're serving for
856 * @return The number of bytes actually written, or -1 in case of an error
858 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
863 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
865 if(maxbytes && len > maxbytes)
868 DEBUG4("(WRITE to fd %d offset %Lu len %u), ", fhandle, foffset, len);
870 myseek(fhandle, foffset);
871 return write(fhandle, buf, len);
875 * Call rawexpwrite repeatedly until all data has been written.
876 * @return 0 on success, nonzero on failure
878 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) {
881 while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) {
886 return (ret < 0 || len != 0);
890 * Read an amount of bytes at a given offset from the right file. This
891 * abstracts the read-side of the multiple files option.
893 * @param a The offset where the read should start
894 * @param buf A buffer to read into
895 * @param len The size of buf
896 * @param client The client we're serving for
897 * @return The number of bytes actually read, or -1 in case of an
900 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
905 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
907 if(maxbytes && len > maxbytes)
910 DEBUG4("(READ from fd %d offset %Lu len %u), ", fhandle, foffset, len);
912 myseek(fhandle, foffset);
913 return read(fhandle, buf, len);
917 * Call rawexpread repeatedly until all data has been read.
918 * @return 0 on success, nonzero on failure
920 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
923 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
928 return (ret < 0 || len != 0);
932 * Read an amount of bytes at a given offset from the right file. This
933 * abstracts the read-side of the copyonwrite stuff, and calls
934 * rawexpread() with the right parameters to do the actual work.
935 * @param a The offset where the read should start
936 * @param buf A buffer to read into
937 * @param len The size of buf
938 * @param client The client we're going to read for
939 * @return 0 on success, nonzero on failure
941 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
943 off_t mapcnt, mapl, maph, pagestart;
945 if (!(client->server->flags & F_COPYONWRITE))
946 return(rawexpread_fully(a, buf, len, client));
947 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
949 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
951 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
952 pagestart=mapcnt*DIFFPAGESIZE;
954 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
955 len : (size_t)DIFFPAGESIZE-offset;
956 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
957 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
958 (unsigned long)(client->difmap[mapcnt]));
959 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
960 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
961 } else { /* the block is not there */
962 DEBUG2("Page %Lu is not here, we read the original one\n",
963 (unsigned long long)mapcnt);
964 if(rawexpread_fully(a, buf, rdlen, client)) return -1;
966 len-=rdlen; a+=rdlen; buf+=rdlen;
972 * Write an amount of bytes at a given offset to the right file. This
973 * abstracts the write-side of the copyonwrite option, and calls
974 * rawexpwrite() with the right parameters to do the actual work.
976 * @param a The offset where the write should start
977 * @param buf The buffer to write from
978 * @param len The length of buf
979 * @param client The client we're going to write for.
980 * @return 0 on success, nonzero on failure
982 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
983 char pagebuf[DIFFPAGESIZE];
984 off_t mapcnt,mapl,maph;
989 if (!(client->server->flags & F_COPYONWRITE))
990 return(rawexpwrite_fully(a, buf, len, client));
991 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
993 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
995 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
996 pagestart=mapcnt*DIFFPAGESIZE ;
998 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
999 len : (size_t)DIFFPAGESIZE-offset;
1001 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1002 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
1003 (unsigned long)(client->difmap[mapcnt])) ;
1004 myseek(client->difffile,
1005 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1006 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1007 } else { /* the block is not there */
1008 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1009 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1010 DEBUG3("Page %Lu is not here, we put it at %lu\n",
1011 (unsigned long long)mapcnt,
1012 (unsigned long)(client->difmap[mapcnt]));
1013 rdlen=DIFFPAGESIZE ;
1014 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1016 memcpy(pagebuf+offset,buf,wrlen) ;
1017 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1021 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1027 * Do the initial negotiation.
1029 * @param client The client we're negotiating with.
1031 void negotiate(CLIENT *client) {
1034 u32 flags = NBD_FLAG_HAS_FLAGS;
1036 memset(zeros, '\0', sizeof(zeros));
1037 if (write(client->net, INIT_PASSWD, 8) < 0)
1038 err("Negotiation failed: %m");
1039 cliserv_magic = htonll(cliserv_magic);
1040 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
1041 err("Negotiation failed: %m");
1042 size_host = htonll((u64)(client->exportsize));
1043 if (write(client->net, &size_host, 8) < 0)
1044 err("Negotiation failed: %m");
1045 if (client->server->flags & F_READONLY)
1046 flags |= NBD_FLAG_READ_ONLY;
1047 flags = htonl(flags);
1048 if (write(client->net, &flags, 4) < 0)
1049 err("Negotiation failed: %m");
1050 if (write(client->net, zeros, 124) < 0)
1051 err("Negotiation failed: %m");
1054 /** sending macro. */
1055 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
1057 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1059 * Serve a file to a single client.
1061 * @todo This beast needs to be split up in many tiny little manageable
1062 * pieces. Preferably with a chainsaw.
1064 * @param client The client we're going to serve to.
1065 * @return when the client disconnects
1067 int mainloop(CLIENT *client) {
1068 struct nbd_request request;
1069 struct nbd_reply reply;
1070 gboolean go_on=TRUE;
1075 DEBUG("Entering request loop!\n");
1076 reply.magic = htonl(NBD_REPLY_MAGIC);
1085 if (client->server->timeout)
1086 alarm(client->server->timeout);
1087 readit(client->net, &request, sizeof(request));
1088 request.from = ntohll(request.from);
1089 request.type = ntohl(request.type);
1091 if (request.type==NBD_CMD_DISC) {
1092 msg2(LOG_INFO, "Disconnect request received.");
1093 if (client->server->flags & F_COPYONWRITE) {
1094 if (client->difmap) g_free(client->difmap) ;
1095 close(client->difffile);
1096 unlink(client->difffilename);
1097 free(client->difffilename);
1103 len = ntohl(request.len);
1105 if (request.magic != htonl(NBD_REQUEST_MAGIC))
1106 err("Not enough magic.");
1107 if (len > BUFSIZE + sizeof(struct nbd_reply))
1108 err("Request too big!");
1110 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
1111 "READ", (unsigned long long)request.from,
1112 (unsigned long long)request.from / 512, len);
1114 memcpy(reply.handle, request.handle, sizeof(reply.handle));
1115 if ((request.from + len) > (OFFT_MAX)) {
1116 DEBUG("[Number too large!]");
1117 ERROR(client, reply, EINVAL);
1121 if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
1123 ERROR(client, reply, EINVAL);
1127 if (request.type==NBD_CMD_WRITE) {
1128 DEBUG("wr: net->buf, ");
1129 readit(client->net, buf, len);
1130 DEBUG("buf->exp, ");
1131 if ((client->server->flags & F_READONLY) ||
1132 (client->server->flags & F_AUTOREADONLY)) {
1133 DEBUG("[WRITE to READONLY!]");
1134 ERROR(client, reply, EPERM);
1137 if (expwrite(request.from, buf, len, client)) {
1138 DEBUG("Write failed: %m" );
1139 ERROR(client, reply, errno);
1142 SEND(client->net, reply);
1148 DEBUG("exp->buf, ");
1149 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
1150 DEBUG("Read failed: %m");
1151 ERROR(client, reply, errno);
1155 DEBUG("buf->net, ");
1156 memcpy(buf, &reply, sizeof(struct nbd_reply));
1157 writeit(client->net, buf, len + sizeof(struct nbd_reply));
1164 * Set up client export array, which is an array of FILE_INFO.
1165 * Also, split a single exportfile into multiple ones, if that was asked.
1166 * @param client information on the client which we want to setup export for
1168 void setupexport(CLIENT* client) {
1170 off_t laststartoff = 0, lastsize = 0;
1171 int multifile = (client->server->flags & F_MULTIFILE);
1173 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1175 /* If multi-file, open as many files as we can.
1176 * If not, open exactly one file.
1177 * Calculate file sizes as we go to get total size. */
1181 mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR;
1184 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1186 tmpname=g_strdup(client->exportname);
1188 DEBUG2( "Opening %s\n", tmpname );
1189 fi.fhandle = open(tmpname, mode);
1190 if(fi.fhandle == -1 && mode == O_RDWR) {
1191 /* Try again because maybe media was read-only */
1192 fi.fhandle = open(tmpname, O_RDONLY);
1193 if(fi.fhandle != -1) {
1194 client->server->flags |= F_AUTOREADONLY;
1195 client->server->flags |= F_READONLY;
1198 if(fi.fhandle == -1) {
1199 if(multifile && i>0)
1201 err("Could not open exported file: %m");
1203 fi.startoff = laststartoff + lastsize;
1204 g_array_append_val(client->export, fi);
1207 /* Starting offset and size of this file will be used to
1208 * calculate starting offset of next file */
1209 laststartoff = fi.startoff;
1210 lastsize = size_autodetect(fi.fhandle);
1216 /* Set export size to total calculated size */
1217 client->exportsize = laststartoff + lastsize;
1219 /* Export size may be overridden */
1220 if(client->server->expected_size) {
1221 /* desired size must be <= total calculated size */
1222 if(client->server->expected_size > client->exportsize) {
1223 err("Size of exported file is too big\n");
1226 client->exportsize = client->server->expected_size;
1229 msg3(LOG_INFO, "Size of exported file/device is %Lu", (unsigned long long)client->exportsize);
1231 msg3(LOG_INFO, "Total number of files: %d", i);
1235 int copyonwrite_prepare(CLIENT* client) {
1237 if ((client->difffilename = malloc(1024))==NULL)
1238 err("Failed to allocate string for diff file name");
1239 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1241 client->difffilename[1023]='\0';
1242 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
1243 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1244 if (client->difffile<0) err("Could not create diff file (%m)") ;
1245 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1246 err("Could not allocate memory") ;
1247 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1253 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1256 * @param command the command to be ran. Read from the config file
1257 * @param file the file name we're about to export
1259 int do_run(gchar* command, gchar* file) {
1263 if(command && *command) {
1264 cmd = g_strdup_printf(command, file);
1272 * Serve a connection.
1274 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1275 * follow the road map.
1277 * @param client a connected client
1279 void serveconnection(CLIENT *client) {
1280 if(do_run(client->server->prerun, client->exportname)) {
1283 setupexport(client);
1285 if (client->server->flags & F_COPYONWRITE) {
1286 copyonwrite_prepare(client);
1289 setmysockopt(client->net);
1292 do_run(client->server->postrun, client->exportname);
1296 * Find the name of the file we have to serve. This will use g_strdup_printf
1297 * to put the IP address of the client inside a filename containing
1298 * "%s" (in the form as specified by the "virtstyle" option). That name
1299 * is then written to client->exportname.
1301 * @param net A socket connected to an nbd client
1302 * @param client information about the client. The IP address in human-readable
1303 * format will be written to a new char* buffer, the address of which will be
1304 * stored in client->clientname.
1306 void set_peername(int net, CLIENT *client) {
1307 struct sockaddr_in addrin;
1308 struct sockaddr_in netaddr;
1309 size_t addrinlen = sizeof( addrin );
1315 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1316 err("getsockname failed: %m");
1317 peername = g_strdup(inet_ntoa(addrin.sin_addr));
1318 switch(client->server->virtstyle) {
1320 client->exportname=g_strdup(client->server->exportname);
1323 for(i=0;i<strlen(peername);i++) {
1324 if(peername[i]=='.') {
1329 client->exportname=g_strdup_printf(client->server->exportname, peername);
1332 memcpy(&netaddr, &addrin, addrinlen);
1333 netaddr.sin_addr.s_addr>>=32-(client->server->cidrlen);
1334 netaddr.sin_addr.s_addr<<=32-(client->server->cidrlen);
1335 netname = inet_ntoa(netaddr.sin_addr);
1336 tmp=g_strdup_printf("%s/%s", netname, peername);
1337 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1341 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1342 peername, client->exportname);
1343 client->clientname=g_strdup(peername);
1349 * @param data a pointer to pid_t which should be freed
1351 void destroy_pid_t(gpointer data) {
1356 * Go daemon (unless we specified at compile time that we didn't want this)
1357 * @param serve the first server of our configuration. If its port is zero,
1358 * then do not daemonize, because we're doing inetd then. This parameter
1359 * is only used to create a PID file of the form
1360 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1362 #if !defined(NODAEMON) && !defined(NOFORK)
1363 void daemonize(SERVER* serve) {
1366 if(serve && !(serve->port)) {
1372 if(!*pidftemplate) {
1374 strncpy(pidftemplate, "/var/run/server.%d.pid", 255);
1376 strncpy(pidftemplate, "/var/run/server.pid", 255);
1379 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
1380 pidf=fopen(pidfname, "w");
1382 fprintf(pidf,"%d\n", (int)getpid());
1386 fprintf(stderr, "Not fatal; continuing");
1390 #define daemonize(serve)
1391 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1394 * Connect a server's socket.
1396 * @param serve the server we want to connect.
1398 void setup_serve(SERVER *serve) {
1399 struct sockaddr_in addrin;
1400 struct sigaction sa;
1401 int addrinlen = sizeof(addrin);
1408 if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
1411 /* lose the pesky "Address already in use" error message */
1412 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1413 err("setsockopt SO_REUSEADDR");
1415 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1416 err("setsockopt SO_KEEPALIVE");
1419 /* make the listening socket non-blocking */
1420 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1421 err("fcntl F_GETFL");
1423 if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1424 err("fcntl F_SETFL O_NONBLOCK");
1427 DEBUG("Waiting for connections... bind, ");
1428 addrin.sin_family = AF_INET;
1429 addrin.sin_port = htons(serve->port);
1430 addrin.sin_addr.s_addr = 0;
1431 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1434 if (listen(serve->socket, 1) < 0)
1436 sa.sa_handler = sigchld_handler;
1437 sigemptyset(&sa.sa_mask);
1438 sa.sa_flags = SA_RESTART;
1439 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1440 err("sigaction: %m");
1441 sa.sa_handler = sigterm_handler;
1442 sigemptyset(&sa.sa_mask);
1443 sa.sa_flags = SA_RESTART;
1444 if(sigaction(SIGTERM, &sa, NULL) == -1)
1445 err("sigaction: %m");
1449 * Connect our servers.
1451 void setup_servers(GArray* servers) {
1454 for(i=0;i<servers->len;i++) {
1455 setup_serve(&(g_array_index(servers, SERVER, i)));
1457 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1461 * Loop through the available servers, and serve them.
1463 int serveloop(GArray* servers) {
1464 struct sockaddr_in addrin;
1465 socklen_t addrinlen=sizeof(addrin);
1475 * Set up the master fd_set. The set of descriptors we need
1476 * to select() for never changes anyway and it buys us a *lot*
1477 * of time to only build this once. However, if we ever choose
1478 * to not fork() for clients anymore, we may have to revisit
1483 for(i=0;i<servers->len;i++) {
1484 sock=(g_array_index(servers, SERVER, i)).socket;
1485 FD_SET(sock, &mset);
1486 max=sock>max?sock:max;
1493 memcpy(&rset, &mset, sizeof(fd_set));
1496 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1498 for(i=0;i<servers->len;i++) {
1499 serve=&(g_array_index(servers, SERVER, i));
1500 if(FD_ISSET(serve->socket, &rset)) {
1501 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1504 client = g_malloc(sizeof(CLIENT));
1505 client->server=serve;
1506 client->exportsize=OFFT_MAX;
1508 set_peername(net, client);
1509 if (!authorized_client(client)) {
1510 msg2(LOG_INFO,"Unauthorized client") ;
1514 msg2(LOG_INFO,"Authorized client") ;
1515 pid=g_malloc(sizeof(pid_t));
1517 if ((*pid=fork())<0) {
1518 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1522 if (*pid>0) { /* parent */
1524 g_hash_table_insert(children, pid, pid);
1528 g_hash_table_destroy(children);
1529 for(i=0;i<servers->len;i++) {
1530 serve=g_array_index(servers, SERVER*, i);
1531 close(serve->socket);
1533 /* FALSE does not free the
1534 actual data. This is required,
1535 because the client has a
1536 direct reference into that
1537 data, and otherwise we get a
1539 g_array_free(servers, FALSE);
1541 msg2(LOG_INFO,"Starting to serve");
1542 serveconnection(client);
1551 * Set up user-ID and/or group-ID
1553 void dousers(void) {
1557 gr=getgrnam(rungroup);
1558 if(setgid(gr->gr_gid)<0)
1559 msg3(LOG_DEBUG, "Could not set GID: %s", strerror(errno));
1562 pw=getpwnam(runuser);
1563 if(setuid(pw->pw_uid)<0)
1564 msg3(LOG_DEBUG, "Could not set UID: %s", strerror(errno));
1569 * Main entry point...
1571 int main(int argc, char *argv[]) {
1576 if (sizeof( struct nbd_request )!=28) {
1577 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1581 memset(pidftemplate, '\0', 256);
1584 config_file_pos = g_strdup(CFILE);
1585 serve=cmdline(argc, argv);
1586 servers = parse_cfile(config_file_pos, &err);
1587 if(!servers || !servers->len) {
1588 g_warning("Could not parse config file: %s",
1589 err ? err->message : "Unknown error");
1592 g_array_append_val(servers, *serve);
1594 if (!(serve->port)) {
1597 /* You really should define ISSERVER if you're going to use
1598 * inetd mode, but if you don't, closing stdout and stderr
1599 * (which inetd had connected to the client socket) will let it
1603 open("/dev/null", O_WRONLY);
1604 open("/dev/null", O_WRONLY);
1606 client=g_malloc(sizeof(CLIENT));
1607 client->server=serve;
1609 client->exportsize=OFFT_MAX;
1610 set_peername(0,client);
1611 serveconnection(client);
1615 if((!serve) && (!servers||!servers->len)) {
1616 g_message("Nothing to do! Bye!");
1620 setup_servers(servers);