2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/select.h> /* select */
62 #include <sys/wait.h> /* wait */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
66 #include <sys/param.h>
67 #ifdef HAVE_SYS_MOUNT_H
68 #include <sys/mount.h> /* For BLKGETSIZE */
70 #include <signal.h> /* sigaction */
71 #include <netinet/tcp.h>
72 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
73 #include <netdb.h> /* hostent, gethostby*, getservby* */
80 #include <arpa/inet.h>
88 /* used in cliserv.h, so must come first */
89 #define MY_NAME "nbd_server"
92 /** Default position of the config file */
94 #define SYSCONFDIR "/etc"
96 #define CFILE SYSCONFDIR "/nbd-server/config"
98 /** Where our config file actually is */
99 gchar* config_file_pos;
101 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
103 #define msg2(a,b) syslog(a,b)
104 #define msg3(a,b,c) syslog(a,b,c)
105 #define msg4(a,b,c,d) syslog(a,b,c,d)
107 #define msg2(a,b) g_message(b)
108 #define msg3(a,b,c) g_message(b,c)
109 #define msg4(a,b,c,d) g_message(b,c,d)
112 /* Debugging macros */
115 #define DEBUG( a ) printf( a )
116 #define DEBUG2( a,b ) printf( a,b )
117 #define DEBUG3( a,b,c ) printf( a,b,c )
120 #define DEBUG2( a,b )
121 #define DEBUG3( a,b,c )
123 #ifndef PACKAGE_VERSION
124 #define PACKAGE_VERSION ""
127 * The highest value a variable of type off_t can reach.
129 /* This is starting to get ugly. If someone knows a better way to find
130 * the maximum value of a signed type *without* relying on overflow
131 * (doing so breaks on 64bit architectures), that would be nice.
133 * Actually, do we need this at all? Can't we just say '0 is autodetect', and
134 * live with it? Or better yet, use an extra flag, or so?
135 * Answer: yes, we need it, as the hunksize is defined to this when the
136 * multiple file thingy isn't used.
138 #define OFFT_MAX (((((off_t)1)<<((sizeof(off_t)-1)*8))-1)<<7)+127
139 #define LINELEN 256 /**< Size of static buffer used to read the
140 authorization file (yuck) */
141 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
142 #define GIGA (1*1024*1024*1024) /**< 1 Gigabyte. Used as hunksize when doing
143 the multiple file thingy. @todo: make this a
144 configuration option. */
145 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
146 #define F_READONLY 1 /**< flag to tell us a file is readonly */
147 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
148 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
150 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
151 GHashTable *children;
152 char pidfname[256]; /**< name of our PID file */
153 char default_authname[] = "/etc/nbd_server.allow"; /**< default name of allow file */
156 * Variables associated with a server.
159 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
160 off_t hunksize; /**< size of a hunk of an exported file */
161 off_t expected_size; /**< size of the exported file as it was told to
162 us through configuration */
163 unsigned int port; /**< port we're exporting this file at */
164 char* authname; /**< filename of the authorization file */
165 int flags; /**< flags associated with this exported file */
166 unsigned int timeout;/**< how long a connection may be idle
168 int socket; /**< The socket of this server. */
172 * Variables associated with a client socket.
175 off_t exportsize; /**< size of the file we're exporting */
176 char *clientname; /**< peer */
177 char *exportname; /**< (processed) filename of the file we're exporting */
178 GArray *export; /**< array of filedescriptors of exported files;
179 only the first is actually used unless we're
180 doing the multiple file option */
181 int net; /**< The actual client socket */
182 SERVER *server; /**< The server this client is getting data from */
183 char* difffilename; /**< filename of the copy-on-write file, if any */
184 int difffile; /**< filedescriptor of copyonwrite file. @todo
185 shouldn't this be an array too? (cfr export) Or
186 make -m and -c mutually exclusive */
187 u32 difffilelen; /**< number of pages in difffile */
188 u32 *difmap; /**< see comment on the global difmap for this one */
192 * Type of configuration file values
195 PARAM_INT, /**< This parameter is an integer */
196 PARAM_STRING, /**< This parameter is a string */
197 PARAM_BOOL, /**< This parameter is a boolean */
200 * Configuration file values
203 gchar *paramname; /**< Name of the parameter, as it appears in
205 gboolean required; /**< Whether this is a required (as opposed to
206 optional) parameter */
207 PARAM_TYPE ptype; /**< Type of the parameter. */
208 gpointer target; /**< Pointer to where the data of this
209 parameter should be written. If ptype is
210 PARAM_BOOL, the data is or'ed rather than
212 gint flagval; /**< Flag mask for this parameter in case ptype
217 * Check whether a client is allowed to connect. Works with an authorization
218 * file which contains one line per machine, no wildcards.
220 * @param opts The client who's trying to connect.
221 * @return 0 - authorization refused, 1 - OK
223 int authorized_client(CLIENT *opts) {
228 if ((f=fopen(opts->server->authname,"r"))==NULL) {
229 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
230 opts->server->authname,strerror(errno)) ;
234 while (fgets(line,LINELEN,f)!=NULL) {
235 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
245 * Read data from a file descriptor into a buffer
247 * @param f a file descriptor
248 * @param buf a buffer
249 * @param len the number of bytes to be read
251 inline void readit(int f, void *buf, size_t len) {
255 if ((res = read(f, buf, len)) <= 0)
256 err("Read failed: %m");
263 * Write data from a buffer into a filedescriptor
265 * @param f a file descriptor
266 * @param buf a buffer containing data
267 * @param len the number of bytes to be written
269 inline void writeit(int f, void *buf, size_t len) {
273 if ((res = write(f, buf, len)) <= 0)
274 err("Send failed: %m");
281 * Print out a message about how to use nbd-server. Split out to a separate
282 * function so that we can call it from multiple places
285 printf("This is nbd-server version " VERSION "\n");
286 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file]\n"
287 "\t-r|--read-only\t\tread only\n"
288 "\t-m|--multi-file\t\tmultiple file\n"
289 "\t-c|--copy-on-write\tcopy on write\n"
290 "\t-C|--config-file\tspecify an alternat configuration file\n"
291 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
292 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n\n"
293 "\tif port is set to 0, stdin is used (for running from inetd)\n"
294 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
295 "\t\taddress of the machine trying to connect\n" );
296 printf("Using configuration file %s\n", CFILE);
300 * Parse the command line.
302 * @param argc the argc argument to main()
303 * @param argv the argv argument to main()
305 SERVER* cmdline(int argc, char *argv[]) {
309 struct option long_options[] = {
310 {"read-only", no_argument, NULL, 'r'},
311 {"multi-file", no_argument, NULL, 'm'},
312 {"copy-on-write", no_argument, NULL, 'c'},
313 {"authorize-file", required_argument, NULL, 'l'},
314 {"idle-time", required_argument, NULL, 'a'},
315 {"config-file", required_argument, NULL, 'C'},
326 serve=g_new0(SERVER, 1);
327 serve->hunksize=OFFT_MAX;
328 serve->authname = g_strdup(default_authname);
329 while((c=getopt_long(argc, argv, "-a:C:cl:mr", long_options, &i))>=0) {
332 /* non-option argument */
333 switch(nonspecial++) {
335 serve->port=strtol(optarg, NULL, 0);
338 serve->exportname = g_strdup(optarg);
339 if(serve->exportname[0] != '/') {
340 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
345 last=strlen(optarg)-1;
347 if (suffix == 'k' || suffix == 'K' ||
348 suffix == 'm' || suffix == 'M')
350 es = (off_t)atol(optarg);
358 serve->expected_size = es;
363 serve->flags |= F_READONLY;
366 serve->flags |= F_MULTIFILE;
367 serve->hunksize = 1*GIGA;
370 serve->flags |=F_COPYONWRITE;
373 g_free(config_file_pos);
374 config_file_pos=g_strdup(optarg);
377 g_free(serve->authname);
378 serve->authname=g_strdup(optarg);
381 serve->timeout=strtol(optarg, NULL, 0);
389 /* What's left: the port to export, the name of the to be exported
390 * file, and, optionally, the size of the file, in that order. */
399 * Error codes for config file parsing
402 CFILE_NOTFOUND, /**< The configuration file is not found */
403 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
404 CFILE_KEY_MISSING, /**< A (required) key is missing */
405 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
406 CFILE_PROGERR /**< Programmer error */
410 * Remove a SERVER from memory. Used from the hash table
412 void remove_server(gpointer s) {
416 g_free(server->exportname);
418 g_free(server->authname);
423 * Parse the config file.
425 * @param f the name of the config file
426 * @param e a GError. @see CFILE_ERRORS for what error values this function can
428 * @return a GHashTable of SERVER* pointers, with the port number as the hash
429 * key. If the config file is empty or does not exist, returns an empty
430 * GHashTable; if the config file contains an error, returns NULL, and
431 * e is set appropriately
433 GArray* parse_cfile(gchar* f, GError** e) {
436 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
437 { "port", TRUE, PARAM_INT, NULL, 0 },
438 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
439 { "timeout", FALSE, PARAM_INT, NULL, 0 },
440 { "filesize", FALSE, PARAM_INT, NULL, 0 },
441 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
442 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
443 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
454 memset(&s, '\0', sizeof(SERVER));
455 errdomain = g_quark_from_string("parse_cfile");
456 cfile = g_key_file_new();
457 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
458 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
459 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
460 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
461 g_key_file_free(cfile);
464 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
465 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
466 g_key_file_free(cfile);
469 groups = g_key_file_get_groups(cfile, NULL);
470 for(i=1;groups[i];i++) {
471 p[0].target=&(s.exportname);
472 p[1].target=&(s.port);
473 p[2].target=&(s.authname);
474 p[3].target=&(s.timeout);
475 p[4].target=&(s.expected_size);
476 p[5].target=p[6].target=p[7].target=p[8].target=&(s.flags);
477 for(j=0;j<p_size;j++) {
478 g_assert(p[j].target != NULL);
479 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
482 *((gint*)p[j].target) =
483 g_key_file_get_integer(cfile,
489 *((gchar**)p[j].target) =
490 g_key_file_get_string(cfile,
496 value = g_key_file_get_boolean(cfile,
498 p[j].paramname, &err);
500 *((gint*)p[j].target) |= value;
505 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
507 g_set_error(e, errdomain, CFILE_KEY_MISSING, "Could not find required value %s in group %s: %s", p[j].paramname, groups[i], err->message);
508 g_array_free(retval, TRUE);
510 g_key_file_free(cfile);
516 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Could not parse %s in group %s: %s", p[j].paramname, groups[i], err->message);
517 g_array_free(retval, TRUE);
519 g_key_file_free(cfile);
524 g_array_append_val(retval, s);
530 * Signal handler for SIGCHLD
531 * @param s the signal we're handling (must be SIGCHLD, or something
534 void sigchld_handler(int s) {
539 while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
540 if(WIFEXITED(&status)) {
541 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
543 i=g_hash_table_lookup(children, &pid);
545 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
547 DEBUG2("Removing %d from the list of children", pid);
548 g_hash_table_remove(children, &pid);
554 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
557 * @param value the value corresponding to the above key
558 * @param user_data a pointer which we always set to 1, so that we know what
561 void killchild(gpointer key, gpointer value, gpointer user_data) {
563 int *parent=user_data;
570 * Handle SIGTERM and dispatch it to our children
571 * @param s the signal we're handling (must be SIGTERM, or something
572 * is severely wrong).
574 void sigterm_handler(int s) {
577 g_hash_table_foreach(children, killchild, &parent);
587 * Detect the size of a file.
589 * @param export An open filedescriptor
590 * @return the size of the file, or OFFT_MAX if detection was
593 off_t size_autodetect(int export) {
596 struct stat stat_buf;
599 #ifdef HAVE_SYS_MOUNT_H
600 #ifdef HAVE_SYS_IOCTL_H
602 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
603 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
604 es = (off_t)es32 * (off_t)512;
607 #endif /* BLKGETSIZE */
608 #endif /* HAVE_SYS_IOCTL_H */
609 #endif /* HAVE_SYS_MOUNT_H */
611 DEBUG("looking for export size with fstat\n");
612 stat_buf.st_size = 0;
613 error = fstat(export, &stat_buf);
615 if(stat_buf.st_size > 0)
616 return (off_t)stat_buf.st_size;
618 err("fstat failed: %m");
621 DEBUG("looking for export size with lseek SEEK_END\n");
622 es = lseek(export, (off_t)0, SEEK_END);
623 if (es > ((off_t)0)) {
626 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
629 err("Could not find size of exported block device: %m");
634 * seek to a position in a file, with error handling.
635 * @param handle a filedescriptor
636 * @param a position to seek to
637 * @todo get rid of this; lastpoint is a global variable right now, but it
638 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
641 void myseek(int handle,off_t a) {
642 if (lseek(handle, a, SEEK_SET) < 0) {
643 err("Can not seek locally!\n");
648 * Write an amount of bytes at a given offset to the right file. This
649 * abstracts the write-side of the multiple file option.
651 * @param a The offset where the write should start
652 * @param buf The buffer to write from
653 * @param len The length of buf
654 * @param client The client we're serving for
655 * @return The number of bytes actually written, or -1 in case of an error
657 int rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
660 myseek(g_array_index(client->export, int, (int)(a/client->server->hunksize)), a%client->server->hunksize);
662 res = write(g_array_index(client->export, int, (int)((off_t)a/(off_t)(client->server->hunksize))), buf, len);
663 return (res < 0 || (size_t)res != len);
667 * Read an amount of bytes at a given offset from the right file. This
668 * abstracts the read-side of the multiple files option.
670 * @param a The offset where the read should start
671 * @param buf A buffer to read into
672 * @param len The size of buf
673 * @param client The client we're serving for
674 * @return The number of bytes actually read, or -1 in case of an
677 int rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
680 myseek(g_array_index(client->export,int,(int)a/client->server->hunksize),
681 a%client->server->hunksize);
682 res = read(g_array_index(client->export,int,(int)a/client->server->hunksize), buf, len);
683 return (res < 0 || (size_t)res != len);
687 * Read an amount of bytes at a given offset from the right file. This
688 * abstracts the read-side of the copyonwrite stuff, and calls
689 * rawexpread() with the right parameters to do the actual work.
690 * @param a The offset where the read should start
691 * @param buf A buffer to read into
692 * @param len The size of buf
693 * @param client The client we're going to read for
694 * @return The number of bytes actually read, or -1 in case of an error
696 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
698 off_t mapcnt, mapl, maph, pagestart;
700 if (!(client->server->flags & F_COPYONWRITE))
701 return rawexpread(a, buf, len, client);
702 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
704 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
706 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
707 pagestart=mapcnt*DIFFPAGESIZE;
709 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
710 len : (size_t)DIFFPAGESIZE-offset;
711 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
712 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
713 (unsigned long)(client->difmap[mapcnt]));
714 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
715 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
716 } else { /* the block is not there */
717 DEBUG2("Page %Lu is not here, we read the original one\n",
718 (unsigned long long)mapcnt);
719 if(rawexpread(a, buf, rdlen, client)) return -1;
721 len-=rdlen; a+=rdlen; buf+=rdlen;
727 * Write an amount of bytes at a given offset to the right file. This
728 * abstracts the write-side of the copyonwrite option, and calls
729 * rawexpwrite() with the right parameters to do the actual work.
731 * @param a The offset where the write should start
732 * @param buf The buffer to write from
733 * @param len The length of buf
734 * @param client The client we're going to write for.
735 * @return The number of bytes actually written, or -1 in case of an error
737 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
738 char pagebuf[DIFFPAGESIZE];
739 off_t mapcnt,mapl,maph;
744 if (!(client->server->flags & F_COPYONWRITE))
745 return(rawexpwrite(a,buf,len, client));
746 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
748 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
750 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
751 pagestart=mapcnt*DIFFPAGESIZE ;
753 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
754 len : (size_t)DIFFPAGESIZE-offset;
756 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
757 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
758 (unsigned long)(client->difmap[mapcnt])) ;
759 myseek(client->difffile,
760 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
761 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
762 } else { /* the block is not there */
763 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
764 client->difmap[mapcnt]=client->difffilelen++ ;
765 DEBUG3("Page %Lu is not here, we put it at %lu\n",
766 (unsigned long long)mapcnt,
767 (unsigned long)(client->difmap[mapcnt]));
769 if (rdlen+pagestart%(client->server->hunksize) >
770 (client->server->hunksize))
771 rdlen=client->server->hunksize -
772 (pagestart%client->server->hunksize);
773 if (rawexpread(pagestart, pagebuf, rdlen, client))
775 memcpy(pagebuf+offset,buf,wrlen) ;
776 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
780 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
786 * Do the initial negotiation.
788 * @param client The client we're negotiating with.
790 void negotiate(CLIENT *client) {
794 memset(zeros, 0, 290);
795 if (write(client->net, INIT_PASSWD, 8) < 0)
796 err("Negotiation failed: %m");
797 cliserv_magic = htonll(cliserv_magic);
798 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
799 err("Negotiation failed: %m");
800 size_host = htonll((u64)(client->exportsize));
801 if (write(client->net, &size_host, 8) < 0)
802 err("Negotiation failed: %m");
803 if (write(client->net, zeros, 128) < 0)
804 err("Negotiation failed: %m");
807 /** sending macro. */
808 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
810 #define ERROR(client,reply) { reply.error = htonl(-1); SEND(client->net,reply); reply.error = 0; }
812 * Serve a file to a single client.
814 * @todo This beast needs to be split up in many tiny little manageable
815 * pieces. Preferably with a chainsaw.
817 * @param client The client we're going to serve to.
820 int mainloop(CLIENT *client) {
821 struct nbd_request request;
822 struct nbd_reply reply;
828 DEBUG("Entering request loop!\n");
829 reply.magic = htonl(NBD_REPLY_MAGIC);
838 if (client->server->timeout)
839 alarm(client->server->timeout);
840 readit(client->net, &request, sizeof(request));
841 request.from = ntohll(request.from);
842 request.type = ntohl(request.type);
844 if (request.type==NBD_CMD_DISC) {
845 msg2(LOG_INFO, "Disconnect request received.");
846 if (client->difmap) g_free(client->difmap) ;
847 if (client->difffile>=0) {
848 close(client->difffile);
849 unlink(client->difffilename);
850 free(client->difffilename);
856 len = ntohl(request.len);
858 if (request.magic != htonl(NBD_REQUEST_MAGIC))
859 err("Not enough magic.");
860 if (len > BUFSIZE + sizeof(struct nbd_reply))
861 err("Request too big!");
863 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
864 "READ", (unsigned long long)request.from,
865 (unsigned long long)request.from / 512, len);
867 memcpy(reply.handle, request.handle, sizeof(reply.handle));
868 if ((request.from + len) > (OFFT_MAX)) {
869 DEBUG("[Number too large!]");
870 ERROR(client, reply);
874 if (((ssize_t)((off_t)request.from + len) > client->exportsize) ||
875 ((client->server->flags & F_READONLY) && request.type)) {
877 ERROR(client, reply);
881 if (request.type==NBD_CMD_WRITE) {
882 DEBUG("wr: net->buf, ");
883 readit(client->net, buf, len);
885 if ((client->server->flags & F_AUTOREADONLY) ||
886 expwrite(request.from, buf, len,
888 DEBUG("Write failed: %m" );
889 ERROR(client, reply);
892 SEND(client->net, reply);
899 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
900 DEBUG("Read failed: %m");
901 ERROR(client, reply);
906 memcpy(buf, &reply, sizeof(struct nbd_reply));
907 writeit(client->net, buf, len + sizeof(struct nbd_reply));
914 * Split a single exportfile into multiple ones, if that was asked.
915 * @return 0 on success, -1 on failure
916 * @param client information on the client which we want to split
918 int splitexport(CLIENT* client) {
922 client->export = g_array_new(TRUE, TRUE, sizeof(int));
923 for (i=0; i<client->exportsize; i+=client->server->hunksize) {
926 if(client->server->flags & F_MULTIFILE) {
927 tmpname=g_strdup_printf("%s.%d", client->exportname,
928 (int)(i/client->server->hunksize));
930 tmpname=g_strdup(client->exportname);
932 DEBUG2( "Opening %s\n", tmpname );
933 if((fhandle = open(tmpname, (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
934 /* Read WRITE ACCESS was requested by media is only read only */
935 client->server->flags |= F_AUTOREADONLY;
936 client->server->flags |= F_READONLY;
937 if((fhandle = open(tmpname, O_RDONLY)) == -1)
938 err("Could not open exported file: %m");
940 g_array_insert_val(client->export,i/client->server->hunksize,fhandle);
946 int copyonwrite_prepare(CLIENT* client) {
948 if ((client->difffilename = malloc(1024))==NULL)
949 err("Failed to allocate string for diff file name");
950 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
952 client->difffilename[1023]='\0';
953 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
954 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
955 if (client->difffile<0) err("Could not create diff file (%m)") ;
956 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
957 err("Could not allocate memory") ;
958 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
964 * Serve a connection.
966 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
967 * follow the road map.
969 * @param client a connected client
971 void serveconnection(CLIENT *client) {
974 if (!client->server->expected_size) {
975 client->exportsize = size_autodetect(g_array_index(client->export,int,0));
977 /* Perhaps we should check first. Not now. */
978 client->exportsize = client->server->expected_size;
980 if (client->exportsize > OFFT_MAX) {
981 /* uhm, well... In a parallel universe, this *might* be
983 err("Size of exported file is too big\n");
986 msg3(LOG_INFO, "size of exported file/device is %Lu", (unsigned long long)client->exportsize);
989 if (client->server->flags & F_COPYONWRITE) {
990 copyonwrite_prepare(client);
993 setmysockopt(client->net);
999 * Find the name of the file we have to serve. This will use g_strdup_printf
1000 * to put the IP address of the client inside a filename containing
1001 * "%s". That name is then written to client->exportname.
1003 * @param net A socket connected to an nbd client
1004 * @param client information about the client. The IP address in human-readable
1005 * format will be written to a new char* buffer, the address of which will be
1006 * stored in client->clientname.
1008 void set_peername(int net, CLIENT *client) {
1009 struct sockaddr_in addrin;
1010 int addrinlen = sizeof( addrin );
1013 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1014 err("getsockname failed: %m");
1015 peername = inet_ntoa(addrin.sin_addr);
1016 client->exportname=g_strdup_printf(client->server->exportname, peername);
1018 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1019 peername, client->exportname);
1020 client->clientname=g_strdup(peername);
1025 * @param data a pointer to pid_t which should be freed
1027 void destroy_pid_t(gpointer data) {
1032 * Go daemon (unless we specified at compile time that we didn't want this)
1033 * @param serve the first server of our configuration. If its port is zero,
1034 * then do not daemonize, because we're doing inetd then. This parameter
1035 * is only used to create a PID file of the form
1036 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1038 #if !defined(NODAEMON) && !defined(NOFORK)
1039 void daemonize(SERVER* serve) {
1046 snprintf(pidfname, sizeof(char)*255, "/var/run/nbd-server.%d.pid", serve->port);
1048 strncpy(pidfname, "/var/run/nbd-server.pid", sizeof(char)*255);
1050 pidf=fopen(pidfname, "w");
1052 fprintf(pidf,"%d\n", (int)getpid());
1056 fprintf(stderr, "Not fatal; continuing");
1060 #define daemonize(serve)
1061 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1064 * Connect a server's socket.
1066 * @param serve the server we want to connect.
1068 void setup_serve(SERVER *serve) {
1069 struct sockaddr_in addrin;
1070 struct sigaction sa;
1071 int addrinlen = sizeof(addrin);
1078 if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
1081 /* lose the pesky "Address already in use" error message */
1082 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1083 err("setsockopt SO_REUSEADDR");
1085 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1086 err("setsockopt SO_KEEPALIVE");
1089 /* make the listening socket non-blocking */
1090 if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) {
1091 err("fcntl F_GETFL");
1093 if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
1094 err("fcntl F_SETFL O_NONBLOCK");
1097 DEBUG("Waiting for connections... bind, ");
1098 addrin.sin_family = AF_INET;
1099 addrin.sin_port = htons(serve->port);
1100 addrin.sin_addr.s_addr = 0;
1101 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1104 if (listen(serve->socket, 1) < 0)
1106 sa.sa_handler = sigchld_handler;
1107 sigemptyset(&sa.sa_mask);
1108 sa.sa_flags = SA_RESTART;
1109 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1110 err("sigaction: %m");
1111 sa.sa_handler = sigterm_handler;
1112 sigemptyset(&sa.sa_mask);
1113 sa.sa_flags = SA_RESTART;
1114 if(sigaction(SIGTERM, &sa, NULL) == -1)
1115 err("sigaction: %m");
1116 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1120 * Connect our servers.
1122 void setup_servers(GArray* servers) {
1125 for(i=0;i<servers->len;i++) {
1126 setup_serve(&(g_array_index(servers, SERVER, i)));
1131 * Loop through the available servers, and serve them.
1133 int serveloop(GArray* servers) {
1134 struct sockaddr_in addrin;
1135 socklen_t addrinlen=sizeof(addrin);
1145 * Set up the master fd_set. The set of descriptors we need
1146 * to select() for never changes anyway and it buys us a *lot*
1147 * of time to only build this once. However, if we ever choose
1148 * to not fork() for clients anymore, we may have to revisit
1153 for(i=0;i<servers->len;i++) {
1154 sock=(g_array_index(servers, SERVER, i)).socket;
1155 FD_SET(sock, &mset);
1156 max=sock>max?sock:max;
1163 memcpy(&rset, &mset, sizeof(fd_set));
1166 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1168 for(i=0;i<servers->len;i++) {
1169 serve=&(g_array_index(servers, SERVER, i));
1170 if(FD_ISSET(serve->socket, &rset)) {
1171 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1174 client = g_malloc(sizeof(CLIENT));
1175 client->server=serve;
1176 client->exportsize=OFFT_MAX;
1178 set_peername(net, client);
1179 if (!authorized_client(client)) {
1180 msg2(LOG_INFO,"Unauthorized client") ;
1184 msg2(LOG_INFO,"Authorized client") ;
1185 pid=g_malloc(sizeof(pid_t));
1187 if ((*pid=fork())<0) {
1188 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1192 if (*pid>0) { /* parent */
1194 g_hash_table_insert(children, pid, pid);
1198 g_hash_table_destroy(children);
1199 for(i=0;i<servers->len,serve=(g_array_index(servers, SERVER*, i));i++) {
1200 close(serve->socket);
1202 /* FALSE does not free the
1203 actual data. This is required,
1204 because the client has a
1205 direct reference into that
1206 data, and otherwise we get a
1208 g_array_free(servers, FALSE);
1210 msg2(LOG_INFO,"Starting to serve");
1211 serveconnection(client);
1219 * Main entry point...
1221 int main(int argc, char *argv[]) {
1226 if (sizeof( struct nbd_request )!=28) {
1227 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1232 config_file_pos = g_strdup(CFILE);
1233 serve=cmdline(argc, argv);
1234 servers = parse_cfile(config_file_pos, &err);
1235 if(!servers || !servers->len) {
1236 g_warning("Could not parse config file: %s", err->message);
1239 g_array_append_val(servers, *serve);
1242 /* We don't support this at this time */
1244 if (!(serve->port)) {
1247 /* You really should define ISSERVER if you're going to use
1248 * inetd mode, but if you don't, closing stdout and stderr
1249 * (which inetd had connected to the client socket) will let it
1253 open("/dev/null", O_WRONLY);
1254 open("/dev/null", O_WRONLY);
1256 client=g_malloc(sizeof(CLIENT));
1257 client->server=serve;
1259 client->exportsize=OFFT_MAX;
1260 set_peername(0,client);
1261 serveconnection(client);
1265 if((!serve) && (!servers||!servers->len)) {
1266 g_message("Nothing to do! Bye!");
1270 setup_servers(servers);