2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
54 /* Includes LFS defines, which defines behaviours of some of the following
55 * headers, so must come before those */
58 #include <sys/types.h>
59 #include <sys/socket.h>
61 #include <sys/wait.h> /* wait */
62 #ifdef HAVE_SYS_IOCTL_H
63 #include <sys/ioctl.h>
65 #include <sys/param.h>
66 #ifdef HAVE_SYS_MOUNT_H
67 #include <sys/mount.h> /* For BLKGETSIZE */
69 #include <signal.h> /* sigaction */
70 #include <netinet/tcp.h>
71 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
72 #include <netdb.h> /* hostent, gethostby*, getservby* */
79 #include <arpa/inet.h>
87 /* used in cliserv.h, so must come first */
88 #define MY_NAME "nbd_server"
91 /** Default position of the config file */
93 #define SYSCONFDIR "/etc"
95 #define CFILE SYSCONFDIR "/nbd-server/config"
97 /** Where our config file actually is */
98 gchar* config_file_pos;
100 /** how much space for child PIDs we have by default. Dynamically
101 allocated, and will be realloc()ed if out of space, so this should
102 probably be fair for most situations. */
103 #define DEFAULT_CHILD_ARRAY 256
105 /** Logging macros, now nothing goes to syslog unless you say ISSERVER */
107 #define msg2(a,b) syslog(a,b)
108 #define msg3(a,b,c) syslog(a,b,c)
109 #define msg4(a,b,c,d) syslog(a,b,c,d)
111 #define msg2(a,b) g_message(b)
112 #define msg3(a,b,c) g_message(b,c)
113 #define msg4(a,b,c,d) g_message(b,c,d)
116 /* Debugging macros */
119 #define DEBUG( a ) printf( a )
120 #define DEBUG2( a,b ) printf( a,b )
121 #define DEBUG3( a,b,c ) printf( a,b,c )
124 #define DEBUG2( a,b )
125 #define DEBUG3( a,b,c )
127 #ifndef PACKAGE_VERSION
128 #define PACKAGE_VERSION ""
131 * The highest value a variable of type off_t can reach.
133 /* This is starting to get ugly. If someone knows a better way to find
134 * the maximum value of a signed type *without* relying on overflow
135 * (doing so breaks on 64bit architectures), that would be nice.
137 * Actually, do we need this at all? Can't we just say '0 is autodetect', and
138 * live with it? Or better yet, use an extra flag, or so?
139 * Answer: yes, we need it, as the hunksize is defined to this when the
140 * multiple file thingy isn't used.
142 #define OFFT_MAX (((((off_t)1)<<((sizeof(off_t)-1)*8))-1)<<7)+127
143 #define LINELEN 256 /**< Size of static buffer used to read the
144 authorization file (yuck) */
145 #define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */
146 #define GIGA (1*1024*1024*1024) /**< 1 Gigabyte. Used as hunksize when doing
147 the multiple file thingy. @todo: make this a
148 configuration option. */
149 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
150 #define F_READONLY 1 /**< flag to tell us a file is readonly */
151 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
152 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
154 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
155 GHashTable *children;
156 char pidfname[256]; /**< name of our PID file */
157 char default_authname[] = "/etc/nbd_server.allow"; /**< default name of allow file */
160 * Variables associated with a server.
163 gchar* exportname; /**< (unprocessed) filename of the file we're exporting */
164 off_t hunksize; /**< size of a hunk of an exported file */
165 off_t expected_size; /**< size of the exported file as it was told to
166 us through configuration */
167 unsigned int port; /**< port we're exporting this file at */
168 char* authname; /**< filename of the authorization file */
169 int flags; /**< flags associated with this exported file */
170 unsigned int timeout;/**< how long a connection may be idle
172 int socket; /**< The socket of this server. */
176 * Variables associated with a client socket.
179 off_t exportsize; /**< size of the file we're exporting */
180 char *clientname; /**< peer */
181 char *exportname; /**< (processed) filename of the file we're exporting */
182 GArray *export; /**< array of filedescriptors of exported files;
183 only the first is actually used unless we're
184 doing the multiple file option */
185 int net; /**< The actual client socket */
186 SERVER *server; /**< The server this client is getting data from */
187 char* difffilename; /**< filename of the copy-on-write file, if any */
188 int difffile; /**< filedescriptor of copyonwrite file. @todo
189 shouldn't this be an array too? (cfr export) Or
190 make -m and -c mutually exclusive */
191 u32 difffilelen; /**< number of pages in difffile */
192 u32 *difmap; /**< see comment on the global difmap for this one */
196 * Type of configuration file values
199 PARAM_INT, /**< This parameter is an integer */
200 PARAM_STRING, /**< This parameter is a string */
201 PARAM_BOOL, /**< This parameter is a boolean */
204 * Configuration file values
207 gchar *paramname; /**< Name of the parameter, as it appears in
209 gboolean required; /**< Whether this is a required (as opposed to
210 optional) parameter */
211 PARAM_TYPE ptype; /**< Type of the parameter. */
212 gpointer target; /**< Pointer to where the data of this
213 parameter should be written. If ptype is
214 PARAM_BOOL, the data is or'ed rather than
216 gint flagval; /**< Flag mask for this parameter in case ptype
221 * Check whether a client is allowed to connect. Works with an authorization
222 * file which contains one line per machine, no wildcards.
224 * @param opts The client who's trying to connect.
225 * @return 0 - authorization refused, 1 - OK
227 int authorized_client(CLIENT *opts) {
232 if ((f=fopen(opts->server->authname,"r"))==NULL) {
233 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
234 opts->server->authname,strerror(errno)) ;
238 while (fgets(line,LINELEN,f)!=NULL) {
239 if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
249 * Read data from a file descriptor into a buffer
251 * @param f a file descriptor
252 * @param buf a buffer
253 * @param len the number of bytes to be read
255 inline void readit(int f, void *buf, size_t len) {
259 if ((res = read(f, buf, len)) <= 0)
260 err("Read failed: %m");
267 * Write data from a buffer into a filedescriptor
269 * @param f a file descriptor
270 * @param buf a buffer containing data
271 * @param len the number of bytes to be written
273 inline void writeit(int f, void *buf, size_t len) {
277 if ((res = write(f, buf, len)) <= 0)
278 err("Send failed: %m");
285 * Print out a message about how to use nbd-server. Split out to a separate
286 * function so that we can call it from multiple places
289 printf("This is nbd-server version " VERSION "\n");
290 printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file]\n"
291 "\t-r|--read-only\t\tread only\n"
292 "\t-m|--multi-file\t\tmultiple file\n"
293 "\t-c|--copy-on-write\tcopy on write\n"
294 "\t-C|--config-file\tspecify an alternat configuration file\n"
295 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
296 "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n\n"
297 "\tif port is set to 0, stdin is used (for running from inetd)\n"
298 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
299 "\t\taddress of the machine trying to connect\n" );
300 printf("Using configuration file %s\n", CFILE);
304 * Parse the command line.
306 * @param argc the argc argument to main()
307 * @param argv the argv argument to main()
309 SERVER* cmdline(int argc, char *argv[]) {
313 struct option long_options[] = {
314 {"read-only", no_argument, NULL, 'r'},
315 {"multi-file", no_argument, NULL, 'm'},
316 {"copy-on-write", no_argument, NULL, 'c'},
317 {"authorize-file", required_argument, NULL, 'l'},
318 {"idle-time", required_argument, NULL, 'a'},
319 {"config-file", required_argument, NULL, 'C'},
330 serve=g_new0(SERVER, 1);
331 serve->hunksize=OFFT_MAX;
332 serve->authname = g_strdup(default_authname);
333 while((c=getopt_long(argc, argv, "-a:C:cl:mr", long_options, &i))>=0) {
336 /* non-option argument */
337 switch(nonspecial++) {
339 serve->port=strtol(optarg, NULL, 0);
342 serve->exportname = g_strdup(optarg);
343 if(serve->exportname[0] != '/') {
344 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
349 last=strlen(optarg)-1;
351 if (suffix == 'k' || suffix == 'K' ||
352 suffix == 'm' || suffix == 'M')
354 es = (off_t)atol(optarg);
362 serve->expected_size = es;
367 serve->flags |= F_READONLY;
370 serve->flags |= F_MULTIFILE;
371 serve->hunksize = 1*GIGA;
374 serve->flags |=F_COPYONWRITE;
377 g_free(config_file_pos);
378 config_file_pos=g_strdup(optarg);
381 g_free(serve->authname);
382 serve->authname=g_strdup(optarg);
385 serve->timeout=strtol(optarg, NULL, 0);
393 /* What's left: the port to export, the name of the to be exported
394 * file, and, optionally, the size of the file, in that order. */
403 * Error codes for config file parsing
406 CFILE_NOTFOUND, /**< The configuration file is not found */
407 CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */
408 CFILE_KEY_MISSING, /**< A (required) key is missing */
409 CFILE_VALUE_INVALID, /**< A value is syntactically invalid */
410 CFILE_PROGERR /**< Programmer error */
414 * Remove a SERVER from memory. Used from the hash table
416 void remove_server(gpointer s) {
420 g_free(server->exportname);
422 g_free(server->authname);
427 * Parse the config file.
429 * @param f the name of the config file
430 * @param e a GError. @see CFILE_ERRORS for what error values this function can
432 * @return a GHashTable of SERVER* pointers, with the port number as the hash
433 * key. If the config file is empty or does not exist, returns an empty
434 * GHashTable; if the config file contains an error, returns NULL, and
435 * e is set appropriately
437 GArray* parse_cfile(gchar* f, GError** e) {
440 { "exportname", TRUE, PARAM_STRING, NULL, 0 },
441 { "port", TRUE, PARAM_INT, NULL, 0 },
442 { "authfile", FALSE, PARAM_STRING, NULL, 0 },
443 { "timeout", FALSE, PARAM_INT, NULL, 0 },
444 { "filesize", FALSE, PARAM_INT, NULL, 0 },
445 { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY },
446 { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE },
447 { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE },
458 memset(&s, '\0', sizeof(SERVER));
459 errdomain = g_quark_from_string("parse_cfile");
460 cfile = g_key_file_new();
461 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
462 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
463 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
464 g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file.");
465 g_key_file_free(cfile);
468 if(strcmp(g_key_file_get_start_group(cfile), "generic")) {
469 g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
470 g_key_file_free(cfile);
473 groups = g_key_file_get_groups(cfile, NULL);
474 for(i=1;groups[i];i++) {
475 p[0].target=&(s.exportname);
476 p[1].target=&(s.port);
477 p[2].target=&(s.authname);
478 p[3].target=&(s.timeout);
479 p[4].target=&(s.expected_size);
480 p[5].target=p[6].target=p[7].target=p[8].target=&(s.flags);
481 for(j=0;j<p_size;j++) {
482 g_assert(p[j].target != NULL);
483 g_assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL);
486 *((gint*)p[j].target) =
487 g_key_file_get_integer(cfile,
493 *((gchar**)p[j].target) =
494 g_key_file_get_string(cfile,
500 value = g_key_file_get_boolean(cfile,
502 p[j].paramname, &err);
504 *((gint*)p[j].target) |= value;
509 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
511 g_set_error(e, errdomain, CFILE_KEY_MISSING, "Could not find required value %s in group %s: %s", p[j].paramname, groups[i], err->message);
512 g_array_free(retval, TRUE);
514 g_key_file_free(cfile);
520 g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Could not parse %s in group %s: %s", p[j].paramname, groups[i], err->message);
521 g_array_free(retval, TRUE);
523 g_key_file_free(cfile);
528 g_array_append_val(retval, s);
534 * Signal handler for SIGCHLD
535 * @param s the signal we're handling (must be SIGCHLD, or something
538 void sigchld_handler(int s) {
543 while((pid=wait(status)) > 0) {
544 if(WIFEXITED(status)) {
545 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
547 i=g_hash_table_lookup(children, &pid);
549 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
551 DEBUG2("Removing %d from the list of children", pid);
552 g_hash_table_remove(children, &pid);
558 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
561 * @param value the value corresponding to the above key
562 * @param user_data a pointer which we always set to 1, so that we know what
565 void killchild(gpointer key, gpointer value, gpointer user_data) {
567 int *parent=user_data;
574 * Handle SIGTERM and dispatch it to our children
575 * @param s the signal we're handling (must be SIGTERM, or something
576 * is severely wrong).
578 void sigterm_handler(int s) {
581 g_hash_table_foreach(children, killchild, &parent);
591 * Detect the size of a file.
593 * @param export An open filedescriptor
594 * @return the size of the file, or OFFT_MAX if detection was
597 off_t size_autodetect(int export) {
600 struct stat stat_buf;
603 #ifdef HAVE_SYS_MOUNT_H
604 #ifdef HAVE_SYS_IOCTL_H
606 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
607 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
608 es = (off_t)es32 * (off_t)512;
611 #endif /* BLKGETSIZE */
612 #endif /* HAVE_SYS_IOCTL_H */
613 #endif /* HAVE_SYS_MOUNT_H */
615 DEBUG("looking for export size with fstat\n");
616 stat_buf.st_size = 0;
617 error = fstat(export, &stat_buf);
619 if(stat_buf.st_size > 0)
620 return (off_t)stat_buf.st_size;
622 err("fstat failed: %m");
625 DEBUG("looking for export size with lseek SEEK_END\n");
626 es = lseek(export, (off_t)0, SEEK_END);
627 if (es > ((off_t)0)) {
630 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
633 err("Could not find size of exported block device: %m");
638 * seek to a position in a file, with error handling.
639 * @param handle a filedescriptor
640 * @param a position to seek to
641 * @todo get rid of this; lastpoint is a global variable right now, but it
642 * shouldn't be. If we pass it on as a parameter, that makes things a *lot*
645 void myseek(int handle,off_t a) {
646 if (lseek(handle, a, SEEK_SET) < 0) {
647 err("Can not seek locally!\n");
652 * Write an amount of bytes at a given offset to the right file. This
653 * abstracts the write-side of the multiple file option.
655 * @param a The offset where the write should start
656 * @param buf The buffer to write from
657 * @param len The length of buf
658 * @param client The client we're serving for
659 * @return The number of bytes actually written, or -1 in case of an error
661 int rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) {
664 myseek(g_array_index(client->export, int, (int)(a/client->server->hunksize)), a%client->server->hunksize);
666 res = write(g_array_index(client->export, int, (int)((off_t)a/(off_t)(client->server->hunksize))), buf, len);
667 return (res < 0 || (size_t)res != len);
671 * Read an amount of bytes at a given offset from the right file. This
672 * abstracts the read-side of the multiple files option.
674 * @param a The offset where the read should start
675 * @param buf A buffer to read into
676 * @param len The size of buf
677 * @param client The client we're serving for
678 * @return The number of bytes actually read, or -1 in case of an
681 int rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
684 myseek(g_array_index(client->export,int,(int)a/client->server->hunksize),
685 a%client->server->hunksize);
686 res = read(g_array_index(client->export,int,(int)a/client->server->hunksize), buf, len);
687 return (res < 0 || (size_t)res != len);
691 * Read an amount of bytes at a given offset from the right file. This
692 * abstracts the read-side of the copyonwrite stuff, and calls
693 * rawexpread() with the right parameters to do the actual work.
694 * @param a The offset where the read should start
695 * @param buf A buffer to read into
696 * @param len The size of buf
697 * @param client The client we're going to read for
698 * @return The number of bytes actually read, or -1 in case of an error
700 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
702 off_t mapcnt, mapl, maph, pagestart;
704 if (!(client->server->flags & F_COPYONWRITE))
705 return rawexpread(a, buf, len, client);
706 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
708 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
710 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
711 pagestart=mapcnt*DIFFPAGESIZE;
713 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
714 len : (size_t)DIFFPAGESIZE-offset;
715 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
716 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
717 (unsigned long)(client->difmap[mapcnt]));
718 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
719 if (read(client->difffile, buf, rdlen) != rdlen) return -1;
720 } else { /* the block is not there */
721 DEBUG2("Page %Lu is not here, we read the original one\n",
722 (unsigned long long)mapcnt);
723 if(rawexpread(a, buf, rdlen, client)) return -1;
725 len-=rdlen; a+=rdlen; buf+=rdlen;
731 * Write an amount of bytes at a given offset to the right file. This
732 * abstracts the write-side of the copyonwrite option, and calls
733 * rawexpwrite() with the right parameters to do the actual work.
735 * @param a The offset where the write should start
736 * @param buf The buffer to write from
737 * @param len The length of buf
738 * @param client The client we're going to write for.
739 * @return The number of bytes actually written, or -1 in case of an error
741 int expwrite(off_t a, char *buf, size_t len, CLIENT *client) {
742 char pagebuf[DIFFPAGESIZE];
743 off_t mapcnt,mapl,maph;
748 if (!(client->server->flags & F_COPYONWRITE))
749 return(rawexpwrite(a,buf,len, client));
750 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
752 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
754 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
755 pagestart=mapcnt*DIFFPAGESIZE ;
757 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
758 len : (size_t)DIFFPAGESIZE-offset;
760 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
761 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
762 (unsigned long)(client->difmap[mapcnt])) ;
763 myseek(client->difffile,
764 client->difmap[mapcnt]*DIFFPAGESIZE+offset);
765 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
766 } else { /* the block is not there */
767 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
768 client->difmap[mapcnt]=client->difffilelen++ ;
769 DEBUG3("Page %Lu is not here, we put it at %lu\n",
770 (unsigned long long)mapcnt,
771 (unsigned long)(client->difmap[mapcnt]));
773 if (rdlen+pagestart%(client->server->hunksize) >
774 (client->server->hunksize))
775 rdlen=client->server->hunksize -
776 (pagestart%client->server->hunksize);
777 if (rawexpread(pagestart, pagebuf, rdlen, client))
779 memcpy(pagebuf+offset,buf,wrlen) ;
780 if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
784 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
790 * Do the initial negotiation.
792 * @param client The client we're negotiating with.
794 void negotiate(CLIENT *client) {
798 memset(zeros, 0, 290);
799 if (write(client->net, INIT_PASSWD, 8) < 0)
800 err("Negotiation failed: %m");
801 cliserv_magic = htonll(cliserv_magic);
802 if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
803 err("Negotiation failed: %m");
804 size_host = htonll((u64)(client->exportsize));
805 if (write(client->net, &size_host, 8) < 0)
806 err("Negotiation failed: %m");
807 if (write(client->net, zeros, 128) < 0)
808 err("Negotiation failed: %m");
811 /** sending macro. */
812 #define SEND(net,reply) writeit( net, &reply, sizeof( reply ));
814 #define ERROR(client,reply) { reply.error = htonl(-1); SEND(client->net,reply); reply.error = 0; }
816 * Serve a file to a single client.
818 * @todo This beast needs to be split up in many tiny little manageable
819 * pieces. Preferably with a chainsaw.
821 * @param client The client we're going to serve to.
824 int mainloop(CLIENT *client) {
825 struct nbd_request request;
826 struct nbd_reply reply;
832 DEBUG("Entering request loop!\n");
833 reply.magic = htonl(NBD_REPLY_MAGIC);
842 if (client->server->timeout)
843 alarm(client->server->timeout);
844 readit(client->net, &request, sizeof(request));
845 request.from = ntohll(request.from);
846 request.type = ntohl(request.type);
848 if (request.type==NBD_CMD_DISC) {
849 msg2(LOG_INFO, "Disconnect request received.");
850 if (client->difmap) g_free(client->difmap) ;
851 if (client->difffile>=0) {
852 close(client->difffile);
853 unlink(client->difffilename);
854 free(client->difffilename);
860 len = ntohl(request.len);
862 if (request.magic != htonl(NBD_REQUEST_MAGIC))
863 err("Not enough magic.");
864 if (len > BUFSIZE + sizeof(struct nbd_reply))
865 err("Request too big!");
867 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
868 "READ", (unsigned long long)request.from,
869 (unsigned long long)request.from / 512, len);
871 memcpy(reply.handle, request.handle, sizeof(reply.handle));
872 if ((request.from + len) > (OFFT_MAX)) {
873 DEBUG("[Number too large!]");
874 ERROR(client, reply);
878 if (((ssize_t)((off_t)request.from + len) > client->exportsize) ||
879 ((client->server->flags & F_READONLY) && request.type)) {
881 ERROR(client, reply);
885 if (request.type==NBD_CMD_WRITE) {
886 DEBUG("wr: net->buf, ");
887 readit(client->net, buf, len);
889 if ((client->server->flags & F_AUTOREADONLY) ||
890 expwrite(request.from, buf, len,
892 DEBUG("Write failed: %m" );
893 ERROR(client, reply);
896 SEND(client->net, reply);
903 if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) {
904 DEBUG("Read failed: %m");
905 ERROR(client, reply);
910 memcpy(buf, &reply, sizeof(struct nbd_reply));
911 writeit(client->net, buf, len + sizeof(struct nbd_reply));
918 * Split a single exportfile into multiple ones, if that was asked.
919 * @return 0 on success, -1 on failure
920 * @param client information on the client which we want to split
922 int splitexport(CLIENT* client) {
926 client->export = g_array_new(TRUE, TRUE, sizeof(int));
927 for (i=0; i<client->exportsize; i+=client->server->hunksize) {
930 if(client->server->flags & F_MULTIFILE) {
931 tmpname=g_strdup_printf("%s.%d", client->exportname,
932 (int)(i/client->server->hunksize));
934 tmpname=g_strdup(client->exportname);
936 DEBUG2( "Opening %s\n", tmpname );
937 if((fhandle = open(tmpname, (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
938 /* Read WRITE ACCESS was requested by media is only read only */
939 client->server->flags |= F_AUTOREADONLY;
940 client->server->flags |= F_READONLY;
941 if((fhandle = open(tmpname, O_RDONLY)) == -1)
942 err("Could not open exported file: %m");
944 g_array_insert_val(client->export,i/client->server->hunksize,fhandle);
950 int copyonwrite_prepare(CLIENT* client) {
952 if ((client->difffilename = malloc(1024))==NULL)
953 err("Failed to allocate string for diff file name");
954 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
956 client->difffilename[1023]='\0';
957 msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
958 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
959 if (client->difffile<0) err("Could not create diff file (%m)") ;
960 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
961 err("Could not allocate memory") ;
962 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
968 * Serve a connection.
970 * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
971 * follow the road map.
973 * @param client a connected client
975 void serveconnection(CLIENT *client) {
978 if (!client->server->expected_size) {
979 client->exportsize = size_autodetect(g_array_index(client->export,int,0));
981 /* Perhaps we should check first. Not now. */
982 client->exportsize = client->server->expected_size;
984 if (client->exportsize > OFFT_MAX) {
985 /* uhm, well... In a parallel universe, this *might* be
987 err("Size of exported file is too big\n");
990 msg3(LOG_INFO, "size of exported file/device is %Lu", (unsigned long long)client->exportsize);
993 if (client->server->flags & F_COPYONWRITE) {
994 copyonwrite_prepare(client);
997 setmysockopt(client->net);
1003 * Find the name of the file we have to serve. This will use g_strdup_printf
1004 * to put the IP address of the client inside a filename containing
1005 * "%s". That name is then written to client->exportname.
1007 * @param net A socket connected to an nbd client
1008 * @param client information about the client. The IP address in human-readable
1009 * format will be written to a new char* buffer, the address of which will be
1010 * stored in client->clientname.
1012 void set_peername(int net, CLIENT *client) {
1013 struct sockaddr_in addrin;
1014 int addrinlen = sizeof( addrin );
1017 if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0)
1018 err("getsockname failed: %m");
1019 peername = inet_ntoa(addrin.sin_addr);
1020 client->exportname=g_strdup_printf(client->server->exportname, peername);
1022 msg4(LOG_INFO, "connect from %s, assigned file is %s",
1023 peername, client->exportname);
1024 client->clientname=g_strdup(peername);
1029 * @param data a pointer to pid_t which should be freed
1031 void destroy_pid_t(gpointer data) {
1036 * Go daemon (unless we specified at compile time that we didn't want this)
1037 * @param serve the first server of our configuration. If its port is zero,
1038 * then do not daemonize, because we're doing inetd then. This parameter
1039 * is only used to create a PID file of the form
1040 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
1042 #if !defined(NODAEMON) && !defined(NOFORK)
1043 void daemonize(SERVER* serve) {
1050 snprintf(pidfname, sizeof(char)*255, "/var/run/nbd-server.%d.pid", serve->port);
1052 strncpy(pidfname, "/var/run/nbd-server.pid", sizeof(char)*255);
1054 pidf=fopen(pidfname, "w");
1056 fprintf(pidf,"%d\n", (int)getpid());
1060 fprintf(stderr, "Not fatal; continuing");
1064 #define daemonize(serve)
1065 #endif /* !defined(NODAEMON) && !defined(NOFORK) */
1068 * Connect a server's socket.
1070 * @param serve the server we want to connect.
1072 void setup_serve(SERVER *serve) {
1073 struct sockaddr_in addrin;
1074 struct sigaction sa;
1075 int addrinlen = sizeof(addrin);
1081 if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
1084 /* lose the pesky "Address already in use" error message */
1085 if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
1086 err("setsockopt SO_REUSEADDR");
1088 if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
1089 err("setsockopt SO_KEEPALIVE");
1092 DEBUG("Waiting for connections... bind, ");
1093 addrin.sin_family = AF_INET;
1094 addrin.sin_port = htons(serve->port);
1095 addrin.sin_addr.s_addr = 0;
1096 if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0)
1099 if (listen(serve->socket, 1) < 0)
1101 sa.sa_handler = sigchld_handler;
1102 sigemptyset(&sa.sa_mask);
1103 sa.sa_flags = SA_RESTART;
1104 if(sigaction(SIGCHLD, &sa, NULL) == -1)
1105 err("sigaction: %m");
1106 sa.sa_handler = sigterm_handler;
1107 sigemptyset(&sa.sa_mask);
1108 sa.sa_flags = SA_RESTART;
1109 if(sigaction(SIGTERM, &sa, NULL) == -1)
1110 err("sigaction: %m");
1111 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
1115 * Connect our servers.
1117 void setup_servers(GArray* servers) {
1120 for(i=0;i<servers->len;i++) {
1121 setup_serve(&(g_array_index(servers, SERVER, i)));
1126 * Loop through the available servers, and serve them.
1128 int serveloop(GArray* servers) {
1129 struct sockaddr_in addrin;
1130 socklen_t addrinlen=sizeof(addrin);
1137 * Set up the master fd_set. The set of descriptors we need
1138 * to select() for never changes anyway and it buys us a *lot*
1139 * of time to only build this once. However, if we ever choose
1140 * to not fork() for clients anymore, we may have to revisit
1145 for(i=0;i<servers->len;i++) {
1146 sock=(g_array_index(servers, SERVER, i)).socket;
1147 FD_SET(sock, &mset);
1148 max=sock>max?sock:max;
1155 memcpy(&rset, &mset, sizeof(fd_set));
1158 if(select(max+1, &rset, NULL, NULL, &tv)>0) {
1160 for(i=0;i<servers->len;i++) {
1161 serve=&(g_array_index(servers, SERVER, i));
1162 if(FD_ISSET(serve->socket, &rset)) {
1163 if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
1166 client = g_malloc(sizeof(CLIENT));
1167 client->server=serve;
1168 client->exportsize=OFFT_MAX;
1170 set_peername(net, client);
1171 if (!authorized_client(client)) {
1172 msg2(LOG_INFO,"Unauthorized client") ;
1176 msg2(LOG_INFO,"Authorized client") ;
1177 pid=g_malloc(sizeof(pid_t));
1179 if ((*pid=fork())<0) {
1180 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
1184 if (*pid>0) { /* parent */
1186 g_hash_table_insert(children, pid, pid);
1190 g_hash_table_destroy(children);
1191 for(i=0;i<servers->len,serve=(g_array_index(servers, SERVER*, i));i++) {
1192 close(serve->socket);
1194 /* FALSE does not free the
1195 actual data. This is required,
1196 because the client has a
1197 direct reference into that
1198 data, and otherwise we get a
1200 g_array_free(servers, FALSE);
1202 msg2(LOG_INFO,"Starting to serve");
1203 serveconnection(client);
1211 * Main entry point...
1213 int main(int argc, char *argv[]) {
1218 if (sizeof( struct nbd_request )!=28) {
1219 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
1224 config_file_pos = g_strdup(CFILE);
1225 serve=cmdline(argc, argv);
1226 servers = parse_cfile(config_file_pos, &err);
1227 if(!servers || !servers->len) {
1228 g_warning("Could not parse config file: %s", err->message);
1231 g_array_append_val(servers, *serve);
1234 /* We don't support this at this time */
1236 if (!(serve->port)) {
1239 /* You really should define ISSERVER if you're going to use
1240 * inetd mode, but if you don't, closing stdout and stderr
1241 * (which inetd had connected to the client socket) will let it
1245 open("/dev/null", O_WRONLY);
1246 open("/dev/null", O_WRONLY);
1248 client=g_malloc(sizeof(CLIENT));
1249 client->server=serve;
1251 client->exportsize=OFFT_MAX;
1252 set_peername(0,client);
1253 serveconnection(client);
1257 if((!serve) && (!servers||!servers->len)) {
1258 g_message("Nothing to do! Bye!");
1262 setup_servers(servers);