X-Git-Url: http://git.alex.org.uk diff --git a/nbd-server.c b/nbd-server.c index c027557..3343d9d 100644 --- a/nbd-server.c +++ b/nbd-server.c @@ -49,6 +49,10 @@ * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a * lot more work, but this is a start. Wouter Verhelst * + * 16/03/2010 - Add IPv6 support. + * Kitt Tientanopajai + * Neutron Soutmun + * Suriya Soutmun */ /* Includes LFS defines, which defines behaviours of some of the following @@ -70,8 +74,8 @@ #include /* sigaction */ #include #include -#include /* sockaddr_in, htons, in_addr */ -#include /* hostent, gethostby*, getservby* */ +#include +#include #include #include #include @@ -92,6 +96,10 @@ #define MY_NAME "nbd_server" #include "cliserv.h" +#ifdef WITH_SDP +#include +#endif + /** Default position of the config file */ #ifndef SYSCONFDIR #define SYSCONFDIR "/etc" @@ -105,6 +113,11 @@ gchar* config_file_pos; gchar* runuser=NULL; /** What group we're running as */ gchar* rungroup=NULL; +/** whether to export using the old negotiation protocol (port-based) */ +gboolean do_oldstyle=FALSE; + +/* Whether we should avoid forking */ +int dontfork = 0; /** Logging macros, now nothing goes to syslog unless you say ISSERVER */ #ifdef ISSERVER @@ -120,15 +133,9 @@ gchar* rungroup=NULL; /* Debugging macros */ //#define DODBG #ifdef DODBG -#define DEBUG( a ) printf( a ) -#define DEBUG2( a,b ) printf( a,b ) -#define DEBUG3( a,b,c ) printf( a,b,c ) -#define DEBUG4( a,b,c,d ) printf( a,b,c,d ) +#define DEBUG(...) printf(__VA_ARGS__) #else -#define DEBUG( a ) -#define DEBUG2( a,b ) -#define DEBUG3( a,b,c ) -#define DEBUG4( a,b,c,d ) +#define DEBUG(...) #endif #ifndef PACKAGE_VERSION #define PACKAGE_VERSION "" @@ -140,7 +147,7 @@ gchar* rungroup=NULL; #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1)) #define LINELEN 256 /**< Size of static buffer used to read the authorization file (yuck) */ -#define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */ +#define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */ #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */ #define F_READONLY 1 /**< flag to tell us a file is readonly */ #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */ @@ -149,11 +156,22 @@ gchar* rungroup=NULL; #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */ #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */ #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */ +#define F_SYNC 64 /**< Whether to fsync() after a write */ +#define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */ +#define F_FUA 256 /**< Whether server wants FUA to be sent by the client */ +#define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */ GHashTable *children; char pidfname[256]; /**< name of our PID file */ char pidftemplate[256]; /**< template to be used for the filename of the PID file */ char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */ +int modernsock=0; /**< Socket for the modern handler. Not used + if a client was only specified on the + command line; only port used if + oldstyle is set to false (and then the + command-line client isn't used, gna gna) */ +char* modern_listen; /**< listenaddr value for modernsock */ + /** * Types of virtuatlization **/ @@ -176,9 +194,8 @@ typedef struct { unsigned int port; /**< port we're exporting this file at */ char* authname; /**< filename of the authorization file */ int flags; /**< flags associated with this exported file */ - unsigned int timeout;/**< how long a connection may be idle - (0=forever) */ int socket; /**< The socket of this server. */ + int socket_family; /**< family of the socket */ VIRT_STYLE virtstyle;/**< The style of virtualization, if any */ uint8_t cidrlen; /**< The length of the mask when we use CIDR-style virtualization */ @@ -186,6 +203,9 @@ typedef struct { but before starting to serve */ gchar* postrun; /**< command that will be ran after the client disconnects */ + gchar* servename; /**< name of the export as selected by nbd-client */ + int max_connections; /**< maximum number of opened connections */ + gchar* transactionlog;/**< filename for transaction log */ } SERVER; /** @@ -211,6 +231,8 @@ typedef struct { make -m and -c mutually exclusive */ u32 difffilelen; /**< number of pages in difffile */ u32 *difmap; /**< see comment on the global difmap for this one */ + gboolean modern; /**< client was negotiated using modern negotiation protocol */ + int transactionlogfd;/**< fd for transaction log */ } CLIENT; /** @@ -270,7 +292,7 @@ int authorized_client(CLIENT *opts) { return 0; } *(tmp++)=0; - if(inet_aton(line,&addr)) { + if(!inet_aton(line,&addr)) { msg4(LOG_CRIT, ERRMSG, line, opts->server->authname); return 0; } @@ -300,14 +322,18 @@ int authorized_client(CLIENT *opts) { * @param buf a buffer * @param len the number of bytes to be read **/ -inline void readit(int f, void *buf, size_t len) { +static inline void readit(int f, void *buf, size_t len) { ssize_t res; while (len > 0) { DEBUG("*"); - if ((res = read(f, buf, len)) <= 0) - err("Read failed: %m"); - len -= res; - buf += res; + if ((res = read(f, buf, len)) <= 0) { + if(errno != EAGAIN) { + err("Read failed: %m"); + } + } else { + len -= res; + buf += res; + } } } @@ -318,7 +344,7 @@ inline void readit(int f, void *buf, size_t len) { * @param buf a buffer containing data * @param len the number of bytes to be written **/ -inline void writeit(int f, void *buf, size_t len) { +static inline void writeit(int f, void *buf, size_t len) { ssize_t res; while (len > 0) { DEBUG("+"); @@ -335,15 +361,15 @@ inline void writeit(int f, void *buf, size_t len) { */ void usage() { printf("This is nbd-server version " VERSION "\n"); - printf("Usage: [ip:]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file] [-p PID file name] [-o section name]\n" + printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n" "\t-r|--read-only\t\tread only\n" "\t-m|--multi-file\t\tmultiple file\n" "\t-c|--copy-on-write\tcopy on write\n" "\t-C|--config-file\tspecify an alternate configuration file\n" "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n" - "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n" "\t-p|--pid-file\t\tspecify a filename to write our PID to\n" - "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n\n" + "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n" + "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n" "\tif port is set to 0, stdin is used (for running from inetd)\n" "\tif file_to_export contains '%%s', it is substituted with the IP\n" "\t\taddress of the machine trying to connect\n" @@ -367,14 +393,11 @@ void dump_section(SERVER* serve, gchar* section_header) { printf("\tcopyonwrite = true\n"); } if(serve->expected_size) { - printf("\tfilesize = %Ld\n", (long long int)serve->expected_size); + printf("\tfilesize = %lld\n", (long long int)serve->expected_size); } if(serve->authname) { printf("\tauthfile = %s\n", serve->authname); } - if(serve->timeout) { - printf("\ttimeout = %d\n", serve->timeout); - } exit(EXIT_SUCCESS); } @@ -392,11 +415,12 @@ SERVER* cmdline(int argc, char *argv[]) { {"read-only", no_argument, NULL, 'r'}, {"multi-file", no_argument, NULL, 'm'}, {"copy-on-write", no_argument, NULL, 'c'}, + {"dont-fork", no_argument, NULL, 'd'}, {"authorize-file", required_argument, NULL, 'l'}, - {"idle-time", required_argument, NULL, 'a'}, {"config-file", required_argument, NULL, 'C'}, {"pid-file", required_argument, NULL, 'p'}, {"output-config", required_argument, NULL, 'o'}, + {"max-connection", required_argument, NULL, 'M'}, {0,0,0,0} }; SERVER *serve; @@ -413,18 +437,30 @@ SERVER* cmdline(int argc, char *argv[]) { serve=g_new0(SERVER, 1); serve->authname = g_strdup(default_authname); serve->virtstyle=VIRT_IPLIT; - while((c=getopt_long(argc, argv, "-a:C:cl:mo:rp:", long_options, &i))>=0) { + while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) { switch (c) { case 1: /* non-option argument */ switch(nonspecial++) { case 0: - addr_port=g_strsplit(optarg, ":", 2); + if(strchr(optarg, ':') == strrchr(optarg, ':')) { + addr_port=g_strsplit(optarg, ":", 2); + + /* Check for "@" - maybe user using this separator + for IPv4 address */ + if(!addr_port[1]) { + g_strfreev(addr_port); + addr_port=g_strsplit(optarg, "@", 2); + } + } else { + addr_port=g_strsplit(optarg, "@", 2); + } + if(addr_port[1]) { serve->port=strtol(addr_port[1], NULL, 0); serve->listenaddr=g_strdup(addr_port[0]); } else { - serve->listenaddr=g_strdup("0.0.0.0"); + serve->listenaddr=NULL; serve->port=strtol(addr_port[0], NULL, 0); } g_strfreev(addr_port); @@ -442,7 +478,7 @@ SERVER* cmdline(int argc, char *argv[]) { if (suffix == 'k' || suffix == 'K' || suffix == 'm' || suffix == 'M') optarg[last] = '\0'; - es = (off_t)atol(optarg); + es = (off_t)atoll(optarg); switch (suffix) { case 'm': case 'M': es <<= 10; @@ -470,6 +506,9 @@ SERVER* cmdline(int argc, char *argv[]) { case 'c': serve->flags |=F_COPYONWRITE; break; + case 'd': + dontfork = 1; + break; case 'C': g_free(config_file_pos); config_file_pos=g_strdup(optarg); @@ -478,8 +517,8 @@ SERVER* cmdline(int argc, char *argv[]) { g_free(serve->authname); serve->authname=g_strdup(optarg); break; - case 'a': - serve->timeout=strtol(optarg, NULL, 0); + case 'M': + serve->max_connections = strtol(optarg, NULL, 0); break; default: usage(); @@ -492,6 +531,8 @@ SERVER* cmdline(int argc, char *argv[]) { if(nonspecial<2) { g_free(serve); serve=NULL; + } else { + do_oldstyle = TRUE; } if(do_output) { if(!serve) { @@ -512,7 +553,11 @@ typedef enum { CFILE_KEY_MISSING, /**< A (required) key is missing */ CFILE_VALUE_INVALID, /**< A value is syntactically invalid */ CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */ - CFILE_PROGERR /**< Programmer error */ + CFILE_PROGERR, /**< Programmer error */ + CFILE_NO_EXPORTS, /**< A config file was specified that does not + define any exports */ + CFILE_INCORRECT_PORT, /**< The reserved port was specified for an + old-style export. */ } CFILE_ERRORS; /** @@ -525,10 +570,132 @@ void remove_server(gpointer s) { g_free(server->exportname); if(server->authname) g_free(server->authname); + if(server->listenaddr) + g_free(server->listenaddr); + if(server->prerun) + g_free(server->prerun); + if(server->postrun) + g_free(server->postrun); + if(server->transactionlog) + g_free(server->transactionlog); g_free(server); } /** + * duplicate server + * @param s the old server we want to duplicate + * @return new duplicated server + **/ +SERVER* dup_serve(SERVER *s) { + SERVER *serve = NULL; + + serve=g_new0(SERVER, 1); + if(serve == NULL) + return NULL; + + if(s->exportname) + serve->exportname = g_strdup(s->exportname); + + serve->expected_size = s->expected_size; + + if(s->listenaddr) + serve->listenaddr = g_strdup(s->listenaddr); + + serve->port = s->port; + + if(s->authname) + serve->authname = strdup(s->authname); + + serve->flags = s->flags; + serve->socket = s->socket; + serve->socket_family = s->socket_family; + serve->virtstyle = s->virtstyle; + serve->cidrlen = s->cidrlen; + + if(s->prerun) + serve->prerun = g_strdup(s->prerun); + + if(s->postrun) + serve->postrun = g_strdup(s->postrun); + + if(s->transactionlog) + serve->transactionlog = g_strdup(s->transactionlog); + + if(s->servename) + serve->servename = g_strdup(s->servename); + + serve->max_connections = s->max_connections; + + return serve; +} + +/** + * append new server to array + * @param s server + * @param a server array + * @return 0 success, -1 error + */ +int append_serve(SERVER *s, GArray *a) { + SERVER *ns = NULL; + struct addrinfo hints; + struct addrinfo *ai = NULL; + struct addrinfo *rp = NULL; + char host[NI_MAXHOST]; + gchar *port = NULL; + int e; + int ret; + + if(!s) { + err("Invalid parsing server"); + return -1; + } + + port = g_strdup_printf("%d", s->port); + + memset(&hints,'\0',sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE; + hints.ai_protocol = IPPROTO_TCP; + + e = getaddrinfo(s->listenaddr, port, &hints, &ai); + + if (port) + g_free(port); + + if(e == 0) { + for (rp = ai; rp != NULL; rp = rp->ai_next) { + e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST); + + if (e != 0) { // error + fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e)); + continue; + } + + // duplicate server and set listenaddr to resolved IP address + ns = dup_serve (s); + if (ns) { + ns->listenaddr = g_strdup(host); + ns->socket_family = rp->ai_family; + g_array_append_val(a, *ns); + free(ns); + ns = NULL; + } + } + + ret = 0; + } else { + fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e)); + ret = -1; + } + + if (ai) + freeaddrinfo(ai); + + return ret; +} + +/** * Parse the config file. * * @param f the name of the config file @@ -544,29 +711,35 @@ GArray* parse_cfile(gchar* f, GError** e) { SERVER s; gchar *virtstyle=NULL; PARAM lp[] = { - { "exportname", TRUE, PARAM_STRING, NULL, 0 }, - { "port", TRUE, PARAM_INT, NULL, 0 }, - { "authfile", FALSE, PARAM_STRING, NULL, 0 }, - { "timeout", FALSE, PARAM_INT, NULL, 0 }, - { "filesize", FALSE, PARAM_INT, NULL, 0 }, - { "virtstyle", FALSE, PARAM_STRING, NULL, 0 }, - { "prerun", FALSE, PARAM_STRING, NULL, 0 }, - { "postrun", FALSE, PARAM_STRING, NULL, 0 }, - { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY }, - { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE }, - { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE }, - { "autoreadonly", FALSE, PARAM_BOOL, NULL, F_AUTOREADONLY }, - { "sparse_cow", FALSE, PARAM_BOOL, NULL, F_SPARSE }, - { "sdp", FALSE, PARAM_BOOL, NULL, F_SDP }, - { "listenaddr", FALSE, PARAM_STRING, NULL, 0 }, + { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 }, + { "port", TRUE, PARAM_INT, &(s.port), 0 }, + { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 }, + { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 }, + { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 }, + { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 }, + { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 }, + { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 }, + { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY }, + { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE }, + { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE }, + { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE }, + { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP }, + { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC }, + { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH }, + { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA }, + { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL }, + { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 }, + { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 }, }; - const int lp_size=15; + const int lp_size=sizeof(lp)/sizeof(PARAM); PARAM gp[] = { { "user", FALSE, PARAM_STRING, &runuser, 0 }, { "group", FALSE, PARAM_STRING, &rungroup, 0 }, + { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 }, + { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 }, }; PARAM* p=gp; - int p_size=2; + int p_size=sizeof(gp)/sizeof(PARAM); GKeyFile *cfile; GError *err = NULL; const char *err_msg=NULL; @@ -574,6 +747,7 @@ GArray* parse_cfile(gchar* f, GError** e) { GArray *retval=NULL; gchar **groups; gboolean value; + gchar* startgroup; gint i; gint j; @@ -582,11 +756,12 @@ GArray* parse_cfile(gchar* f, GError** e) { retval = g_array_new(FALSE, TRUE, sizeof(SERVER)); if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS | G_KEY_FILE_KEEP_TRANSLATIONS, &err)) { - g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file."); + g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f); g_key_file_free(cfile); return retval; } - if(strcmp(g_key_file_get_start_group(cfile), "generic")) { + startgroup = g_key_file_get_start_group(cfile); + if(!startgroup || strcmp(startgroup, "generic")) { g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!"); g_key_file_free(cfile); return NULL; @@ -594,18 +769,6 @@ GArray* parse_cfile(gchar* f, GError** e) { groups = g_key_file_get_groups(cfile, NULL); for(i=0;groups[i];i++) { memset(&s, '\0', sizeof(SERVER)); - lp[0].target=&(s.exportname); - lp[1].target=&(s.port); - lp[2].target=&(s.authname); - lp[3].target=&(s.timeout); - lp[4].target=&(s.expected_size); - lp[5].target=&(virtstyle); - lp[6].target=&(s.prerun); - lp[7].target=&(s.postrun); - lp[8].target=lp[9].target=lp[10].target= - lp[11].target=lp[12].target= - lp[13].target=&(s.flags); - lp[14].target=&(s.listenaddr); /* After the [generic] group, start parsing exports */ if(i==1) { @@ -643,6 +806,11 @@ GArray* parse_cfile(gchar* f, GError** e) { } break; } + if(!strcmp(p[j].paramname, "port") && !strcmp(p[j].target, NBD_DEFAULT_PORT)) { + g_set_error(e, errdomain, CFILE_INCORRECT_PORT, "Config file specifies default port for oldstyle export"); + g_key_file_free(cfile); + return NULL; + } if(err) { if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) { if(!p[j].required) { @@ -684,6 +852,10 @@ GArray* parse_cfile(gchar* f, GError** e) { g_key_file_free(cfile); return NULL; } + if(s.port && !do_oldstyle) { + g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect."); + g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info"); + } } else { s.virtstyle=VIRT_IPLIT; } @@ -691,10 +863,14 @@ GArray* parse_cfile(gchar* f, GError** e) { virtstyle=NULL; /* Don't append values for the [generic] group */ if(i>0) { - if(!s.listenaddr) { - s.listenaddr = g_strdup("0.0.0.0"); + s.socket_family = AF_UNSPEC; + s.servename = groups[i]; + + append_serve(&s, retval); + } else { + if(!do_oldstyle) { + lp[1].required = 0; } - g_array_append_val(retval, s); } #ifndef WITH_SDP if(s.flags & F_SDP) { @@ -705,6 +881,10 @@ GArray* parse_cfile(gchar* f, GError** e) { } #endif } + if(i==1) { + g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports"); + } + g_key_file_free(cfile); return retval; } @@ -726,7 +906,7 @@ void sigchld_handler(int s) { if(!i) { msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid); } else { - DEBUG2("Removing %d from the list of children", pid); + DEBUG("Removing %d from the list of children", pid); g_hash_table_remove(children, &pid); } } @@ -762,7 +942,7 @@ void sigterm_handler(int s) { unlink(pidfname); } - exit(0); + exit(EXIT_SUCCESS); } /** @@ -774,19 +954,18 @@ void sigterm_handler(int s) { **/ off_t size_autodetect(int fhandle) { off_t es; - unsigned long sectors; + u64 bytes; struct stat stat_buf; int error; #ifdef HAVE_SYS_MOUNT_H #ifdef HAVE_SYS_IOCTL_H -#ifdef BLKGETSIZE - DEBUG("looking for export size with ioctl BLKGETSIZE\n"); - if (!ioctl(fhandle, BLKGETSIZE, §ors) && sectors) { - es = (off_t)sectors * (off_t)512; - return es; +#ifdef BLKGETSIZE64 + DEBUG("looking for export size with ioctl BLKGETSIZE64\n"); + if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) { + return (off_t)bytes; } -#endif /* BLKGETSIZE */ +#endif /* BLKGETSIZE64 */ #endif /* HAVE_SYS_IOCTL_H */ #endif /* HAVE_SYS_MOUNT_H */ @@ -805,7 +984,7 @@ off_t size_autodetect(int fhandle) { if (es > ((off_t)0)) { return es; } else { - DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); + DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); } err("Could not find size of exported block device: %m"); @@ -884,30 +1063,74 @@ void myseek(int handle,off_t a) { * @param client The client we're serving for * @return The number of bytes actually written, or -1 in case of an error **/ -ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) { +ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { int fhandle; off_t foffset; size_t maxbytes; + ssize_t retval; if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes)) return -1; if(maxbytes && len > maxbytes) len = maxbytes; - DEBUG4("(WRITE to fd %d offset %Lu len %u), ", fhandle, foffset, len); + DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua); myseek(fhandle, foffset); - return write(fhandle, buf, len); + retval = write(fhandle, buf, len); + if(client->server->flags & F_SYNC) { + fsync(fhandle); + } else if (fua) { + + /* This is where we would do the following + * #ifdef USE_SYNC_FILE_RANGE + * However, we don't, for the reasons set out below + * by Christoph Hellwig + * + * [BEGINS] + * fdatasync is equivalent to fsync except that it does not flush + * non-essential metadata (basically just timestamps in practice), but it + * does flush metadata requried to find the data again, e.g. allocation + * information and extent maps. sync_file_range does nothing but flush + * out pagecache content - it means you basically won't get your data + * back in case of a crash if you either: + * + * a) have a volatile write cache in your disk (e.g. any normal SATA disk) + * b) are using a sparse file on a filesystem + * c) are using a fallocate-preallocated file on a filesystem + * d) use any file on a COW filesystem like btrfs + * + * e.g. it only does anything useful for you if you do not have a volatile + * write cache, and either use a raw block device node, or just overwrite + * an already fully allocated (and not preallocated) file on a non-COW + * filesystem. + * [ENDS] + * + * What we should do is open a second FD with O_DSYNC set, then write to + * that when appropriate. However, with a Linux client, every REQ_FUA + * immediately follows a REQ_FLUSH, so fdatasync does not cause performance + * problems. + * + */ +#if 0 + sync_file_range(fhandle, foffset, len, + SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | + SYNC_FILE_RANGE_WAIT_AFTER); +#else + fdatasync(fhandle); +#endif + } + return retval; } /** * Call rawexpwrite repeatedly until all data has been written. * @return 0 on success, nonzero on failure **/ -int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client) { +int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) { ssize_t ret=0; - while(len > 0 && (ret=rawexpwrite(a, buf, len, client)) > 0 ) { + while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) { a += ret; buf += ret; len -= ret; @@ -936,7 +1159,7 @@ ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { if(maxbytes && len > maxbytes) len = maxbytes; - DEBUG4("(READ from fd %d offset %Lu len %u), ", fhandle, foffset, len); + DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len); myseek(fhandle, foffset); return read(fhandle, buf, len); @@ -973,7 +1196,7 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { if (!(client->server->flags & F_COPYONWRITE)) return(rawexpread_fully(a, buf, len, client)); - DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a); + DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE; @@ -983,12 +1206,12 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { rdlen=(0difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); if (read(client->difffile, buf, rdlen) != rdlen) return -1; } else { /* the block is not there */ - DEBUG2("Page %Lu is not here, we read the original one\n", + DEBUG("Page %llu is not here, we read the original one\n", (unsigned long long)mapcnt); if(rawexpread_fully(a, buf, rdlen, client)) return -1; } @@ -1008,7 +1231,7 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { * @param client The client we're going to write for. * @return 0 on success, nonzero on failure **/ -int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { +int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { char pagebuf[DIFFPAGESIZE]; off_t mapcnt,mapl,maph; off_t wrlen,rdlen; @@ -1016,8 +1239,8 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { off_t offset; if (!(client->server->flags & F_COPYONWRITE)) - return(rawexpwrite_fully(a, buf, len, client)); - DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a); + return(rawexpwrite_fully(a, buf, len, client, fua)); + DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ; @@ -1028,7 +1251,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { len : (size_t)DIFFPAGESIZE-offset; if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])) ; myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); @@ -1036,7 +1259,7 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { } else { /* the block is not there */ myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ; client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++; - DEBUG3("Page %Lu is not here, we put it at %lu\n", + DEBUG("Page %llu is not here, we put it at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); rdlen=DIFFPAGESIZE ; @@ -1049,6 +1272,30 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { } len-=wrlen ; a+=wrlen ; buf+=wrlen ; } + if (client->server->flags & F_SYNC) { + fsync(client->difffile); + } else if (fua) { + /* open question: would it be cheaper to do multiple sync_file_ranges? + as we iterate through the above? + */ + fdatasync(client->difffile); + } + return 0; +} + +int expflush(CLIENT *client) { + gint i; + + if (client->server->flags & F_COPYONWRITE) { + return fsync(client->difffile); + } + + for (i = 0; i < client->export->len; i++) { + FILE_INFO fi = g_array_index(client->export, FILE_INFO, i); + if (fsync(fi.fhandle) < 0) + return -1; + } + return 0; } @@ -1057,31 +1304,120 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { * * @param client The client we're negotiating with. **/ -void negotiate(CLIENT *client) { +CLIENT* negotiate(int net, CLIENT *client, GArray* servers) { char zeros[128]; - u64 size_host; - u32 flags = NBD_FLAG_HAS_FLAGS; + uint64_t size_host; + uint32_t flags = NBD_FLAG_HAS_FLAGS; + uint16_t smallflags = 0; + uint64_t magic; memset(zeros, '\0', sizeof(zeros)); - if (write(client->net, INIT_PASSWD, 8) < 0) - err("Negotiation failed: %m"); - cliserv_magic = htonll(cliserv_magic); - if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0) - err("Negotiation failed: %m"); + if(!client || !client->modern) { + /* common */ + if (write(net, INIT_PASSWD, 8) < 0) { + err_nonfatal("Negotiation failed: %m"); + if(client) + exit(EXIT_FAILURE); + } + if(!client || client->modern) { + /* modern */ + magic = htonll(opts_magic); + } else { + /* oldstyle */ + magic = htonll(cliserv_magic); + } + if (write(net, &magic, sizeof(magic)) < 0) { + err_nonfatal("Negotiation failed: %m"); + if(client) + exit(EXIT_FAILURE); + } + } + if(!client) { + /* modern */ + uint32_t reserved; + uint32_t opt; + uint32_t namelen; + char* name; + int i; + + if(!servers) + err("programmer error"); + if (write(net, &smallflags, sizeof(uint16_t)) < 0) + err("Negotiation failed: %m"); + if (read(net, &reserved, sizeof(reserved)) < 0) + err("Negotiation failed: %m"); + if (read(net, &magic, sizeof(magic)) < 0) + err("Negotiation failed: %m"); + magic = ntohll(magic); + if(magic != opts_magic) { + close(net); + return NULL; + } + if (read(net, &opt, sizeof(opt)) < 0) + err("Negotiation failed: %m"); + opt = ntohl(opt); + if(opt != NBD_OPT_EXPORT_NAME) { + close(net); + return NULL; + } + if (read(net, &namelen, sizeof(namelen)) < 0) + err("Negotiation failed: %m"); + namelen = ntohl(namelen); + name = malloc(namelen+1); + name[namelen]=0; + if (read(net, name, namelen) < 0) + err("Negotiation failed: %m"); + for(i=0; ilen; i++) { + SERVER* serve = &(g_array_index(servers, SERVER, i)); + if(!strcmp(serve->servename, name)) { + CLIENT* client = g_new0(CLIENT, 1); + client->server = serve; + client->exportsize = OFFT_MAX; + client->net = net; + client->modern = TRUE; + client->transactionlogfd = -1; + free(name); + return client; + } + } + free(name); + return NULL; + } + /* common */ size_host = htonll((u64)(client->exportsize)); - if (write(client->net, &size_host, 8) < 0) + if (write(net, &size_host, 8) < 0) err("Negotiation failed: %m"); if (client->server->flags & F_READONLY) flags |= NBD_FLAG_READ_ONLY; - flags = htonl(flags); - if (write(client->net, &flags, 4) < 0) - err("Negotiation failed: %m"); + if (client->server->flags & F_FLUSH) + flags |= NBD_FLAG_SEND_FLUSH; + if (client->server->flags & F_FUA) + flags |= NBD_FLAG_SEND_FUA; + if (client->server->flags & F_ROTATIONAL) + flags |= NBD_FLAG_ROTATIONAL; + if (!client->modern) { + /* oldstyle */ + flags = htonl(flags); + if (write(client->net, &flags, 4) < 0) + err("Negotiation failed: %m"); + } else { + /* modern */ + smallflags = (uint16_t)(flags & ~((uint16_t)0)); + smallflags = htons(smallflags); + if (write(client->net, &smallflags, sizeof(smallflags)) < 0) { + err("Negotiation failed: %m"); + } + } + /* common */ if (write(client->net, zeros, 124) < 0) err("Negotiation failed: %m"); + return NULL; } /** sending macro. */ -#define SEND(net,reply) writeit( net, &reply, sizeof( reply )); +#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \ + if (client->transactionlogfd != -1) \ + writeit(client->transactionlogfd, &reply, sizeof(reply)); } /** error macro. */ #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; } /** @@ -1100,24 +1436,30 @@ int mainloop(CLIENT *client) { #ifdef DODBG int i = 0; #endif - negotiate(client); + negotiate(client->net, client, NULL); DEBUG("Entering request loop!\n"); reply.magic = htonl(NBD_REPLY_MAGIC); reply.error = 0; while (go_on) { char buf[BUFSIZE]; + char* p; size_t len; + size_t currlen; + size_t writelen; + uint16_t command; #ifdef DODBG i++; printf("%d: ", i); #endif - if (client->server->timeout) - alarm(client->server->timeout); readit(client->net, &request, sizeof(request)); + if (client->transactionlogfd != -1) + writeit(client->transactionlogfd, &request, sizeof(request)); + request.from = ntohll(request.from); request.type = ntohl(request.type); + command = request.type & NBD_CMD_MASK_COMMAND; - if (request.type==NBD_CMD_DISC) { + if (command==NBD_CMD_DISC) { msg2(LOG_INFO, "Disconnect request received."); if (client->server->flags & F_COPYONWRITE) { if (client->difmap) g_free(client->difmap) ; @@ -1133,38 +1475,60 @@ int mainloop(CLIENT *client) { if (request.magic != htonl(NBD_REQUEST_MAGIC)) err("Not enough magic."); - if (len > BUFSIZE + sizeof(struct nbd_reply)) - err("Request too big!"); -#ifdef DODBG - printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" : + if (len > BUFSIZE - sizeof(struct nbd_reply)) { + currlen = BUFSIZE - sizeof(struct nbd_reply); + msg2(LOG_INFO, "oversized request (this is not a problem)"); + } else { + currlen = len; + } + DEBUG("%s from %llu (%llu) len %d, ", command ? "WRITE" : "READ", (unsigned long long)request.from, - (unsigned long long)request.from / 512, len); -#endif + (unsigned long long)request.from / 512, (unsigned int)len); memcpy(reply.handle, request.handle, sizeof(reply.handle)); - if ((request.from + len) > (OFFT_MAX)) { - DEBUG("[Number too large!]"); - ERROR(client, reply, EINVAL); - continue; - } - if (((ssize_t)((off_t)request.from + len) > client->exportsize)) { - DEBUG("[RANGE!]"); - ERROR(client, reply, EINVAL); - continue; + if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) { + if ((request.from + len) > (OFFT_MAX)) { + DEBUG("[Number too large!]"); + ERROR(client, reply, EINVAL); + continue; + } + + if (((ssize_t)((off_t)request.from + len) > client->exportsize)) { + DEBUG("[RANGE!]"); + ERROR(client, reply, EINVAL); + continue; + } } - if (request.type==NBD_CMD_WRITE) { + if (command==NBD_CMD_WRITE) { DEBUG("wr: net->buf, "); - readit(client->net, buf, len); - DEBUG("buf->exp, "); - if ((client->server->flags & F_READONLY) || - (client->server->flags & F_AUTOREADONLY)) { - DEBUG("[WRITE to READONLY!]"); - ERROR(client, reply, EPERM); - continue; + while(len > 0) { + readit(client->net, buf, currlen); + DEBUG("buf->exp, "); + if ((client->server->flags & F_READONLY) || + (client->server->flags & F_AUTOREADONLY)) { + DEBUG("[WRITE to READONLY!]"); + ERROR(client, reply, EPERM); + continue; + } + if (expwrite(request.from, buf, len, client, + request.type & NBD_CMD_FLAG_FUA)) { + DEBUG("Write failed: %m" ); + ERROR(client, reply, errno); + continue; + } + SEND(client->net, reply); + DEBUG("OK!\n"); + len -= currlen; + currlen = (len < BUFSIZE) ? len : BUFSIZE; } - if (expwrite(request.from, buf, len, client)) { - DEBUG("Write failed: %m" ); + continue; + } + + if (command==NBD_CMD_FLUSH) { + DEBUG("fl: "); + if (expflush(client)) { + DEBUG("Flush failed: %m"); ERROR(client, reply, errno); continue; } @@ -1172,19 +1536,34 @@ int mainloop(CLIENT *client) { DEBUG("OK!\n"); continue; } - /* READ */ - DEBUG("exp->buf, "); - if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) { - DEBUG("Read failed: %m"); - ERROR(client, reply, errno); + if (command==NBD_CMD_READ) { + DEBUG("exp->buf, "); + memcpy(buf, &reply, sizeof(struct nbd_reply)); + if (client->transactionlogfd != -1) + writeit(client->transactionlogfd, &reply, sizeof(reply)); + p = buf + sizeof(struct nbd_reply); + writelen = currlen + sizeof(struct nbd_reply); + while(len > 0) { + if (expread(request.from, p, currlen, client)) { + DEBUG("Read failed: %m"); + ERROR(client, reply, errno); + continue; + } + + DEBUG("buf->net, "); + writeit(client->net, buf, writelen); + len -= currlen; + request.from += currlen; + currlen = (len < BUFSIZE) ? len : BUFSIZE; + p = buf; + writelen = currlen; + } + DEBUG("OK!\n"); continue; } - DEBUG("buf->net, "); - memcpy(buf, &reply, sizeof(struct nbd_reply)); - writeit(client->net, buf, len + sizeof(struct nbd_reply)); - DEBUG("OK!\n"); + DEBUG ("Ignoring unknown command\n"); } return 0; } @@ -1207,6 +1586,7 @@ void setupexport(CLIENT* client) { for(i=0; ; i++) { FILE_INFO fi; gchar *tmpname; + gchar* error_string; mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR; if(multifile) { @@ -1214,20 +1594,27 @@ void setupexport(CLIENT* client) { } else { tmpname=g_strdup(client->exportname); } - DEBUG2( "Opening %s\n", tmpname ); + DEBUG( "Opening %s\n", tmpname ); fi.fhandle = open(tmpname, mode); if(fi.fhandle == -1 && mode == O_RDWR) { /* Try again because maybe media was read-only */ fi.fhandle = open(tmpname, O_RDONLY); if(fi.fhandle != -1) { - client->server->flags |= F_AUTOREADONLY; - client->server->flags |= F_READONLY; + /* Opening the base file in copyonwrite mode is + * okay */ + if(!(client->server->flags & F_COPYONWRITE)) { + client->server->flags |= F_AUTOREADONLY; + client->server->flags |= F_READONLY; + } } } if(fi.fhandle == -1) { if(multifile && i>0) break; - err("Could not open exported file: %m"); + error_string=g_strdup_printf( + "Could not open exported file %s: %%m", + tmpname); + err(error_string); } fi.startoff = laststartoff + lastsize; g_array_append_val(client->export, fi); @@ -1255,7 +1642,7 @@ void setupexport(CLIENT* client) { client->exportsize = client->server->expected_size; } - msg3(LOG_INFO, "Size of exported file/device is %Lu", (unsigned long long)client->exportsize); + msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize); if(multifile) { msg3(LOG_INFO, "Total number of files: %d", i); } @@ -1306,6 +1693,15 @@ int do_run(gchar* command, gchar* file) { * @param client a connected client **/ void serveconnection(CLIENT *client) { + if (client->server->transactionlog && (client->transactionlogfd == -1)) + { + if (-1 == (client->transactionlogfd = open(client->server->transactionlog, + O_WRONLY | O_CREAT, + S_IRUSR | S_IWUSR))) + g_warning("Could not open transaction log %s", + client->server->transactionlog); + } + if(do_run(client->server->prerun, client->exportname)) { exit(EXIT_FAILURE); } @@ -1319,6 +1715,12 @@ void serveconnection(CLIENT *client) { mainloop(client); do_run(client->server->postrun, client->exportname); + + if (-1 != client->transactionlogfd) + { + close(client->transactionlogfd); + client->transactionlogfd = -1; + } } /** @@ -1333,17 +1735,36 @@ void serveconnection(CLIENT *client) { * stored in client->clientname. **/ void set_peername(int net, CLIENT *client) { - struct sockaddr_in addrin; - struct sockaddr_in netaddr; + struct sockaddr_storage addrin; + struct sockaddr_storage netaddr; + struct sockaddr_in *netaddr4 = NULL; + struct sockaddr_in6 *netaddr6 = NULL; size_t addrinlen = sizeof( addrin ); - char *peername; - char *netname; - char *tmp; + struct addrinfo hints; + struct addrinfo *ai = NULL; + char peername[NI_MAXHOST]; + char netname[NI_MAXHOST]; + char *tmp = NULL; int i; + int e; + int shift; if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0) err("getsockname failed: %m"); - peername = g_strdup(inet_ntoa(addrin.sin_addr)); + + getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen, + peername, sizeof (peername), NULL, 0, NI_NUMERICHOST); + + memset(&hints, '\0', sizeof (hints)); + hints.ai_flags = AI_ADDRCONFIG; + e = getaddrinfo(peername, NULL, &hints, &ai); + + if(e != 0) { + fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e)); + freeaddrinfo(ai); + return; + } + switch(client->server->virtstyle) { case VIRT_NONE: client->exportname=g_strdup(client->server->exportname); @@ -1359,18 +1780,42 @@ void set_peername(int net, CLIENT *client) { break; case VIRT_CIDR: memcpy(&netaddr, &addrin, addrinlen); - netaddr.sin_addr.s_addr>>=32-(client->server->cidrlen); - netaddr.sin_addr.s_addr<<=32-(client->server->cidrlen); - netname = inet_ntoa(netaddr.sin_addr); - tmp=g_strdup_printf("%s/%s", netname, peername); - client->exportname=g_strdup_printf(client->server->exportname, tmp); + if(ai->ai_family == AF_INET) { + netaddr4 = (struct sockaddr_in *)&netaddr; + (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen); + (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen); + + getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen, + netname, sizeof (netname), NULL, 0, NI_NUMERICHOST); + tmp=g_strdup_printf("%s/%s", netname, peername); + }else if(ai->ai_family == AF_INET6) { + netaddr6 = (struct sockaddr_in6 *)&netaddr; + + shift = 128-(client->server->cidrlen); + i = 3; + while(shift >= 32) { + ((netaddr6->sin6_addr).s6_addr32[i])=0; + shift-=32; + i--; + } + (netaddr6->sin6_addr).s6_addr32[i]>>=shift; + (netaddr6->sin6_addr).s6_addr32[i]<<=shift; + + getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen, + netname, sizeof(netname), NULL, 0, NI_NUMERICHOST); + tmp=g_strdup_printf("%s/%s", netname, peername); + } + + if(tmp != NULL) + client->exportname=g_strdup_printf(client->server->exportname, tmp); + break; } + freeaddrinfo(ai); msg4(LOG_INFO, "connect from %s, assigned file is %s", peername, client->exportname); client->clientname=g_strdup(peername); - g_free(peername); } /** @@ -1385,15 +1830,13 @@ void destroy_pid_t(gpointer data) { * Loop through the available servers, and serve them. Never returns. **/ int serveloop(GArray* servers) { - struct sockaddr_in addrin; + struct sockaddr_storage addrin; socklen_t addrinlen=sizeof(addrin); - SERVER *serve; int i; int max; int sock; fd_set mset; fd_set rset; - struct timeval tv; /* * Set up the master fd_set. The set of descriptors we need @@ -1405,39 +1848,76 @@ int serveloop(GArray* servers) { max=0; FD_ZERO(&mset); for(i=0;ilen;i++) { - sock=(g_array_index(servers, SERVER, i)).socket; - FD_SET(sock, &mset); - max=sock>max?sock:max; + if((sock=(g_array_index(servers, SERVER, i)).socket)) { + FD_SET(sock, &mset); + max=sock>max?sock:max; + } + } + if(modernsock) { + FD_SET(modernsock, &mset); + max=modernsock>max?modernsock:max; } for(;;) { - CLIENT *client; - int net; + CLIENT *client = NULL; pid_t *pid; memcpy(&rset, &mset, sizeof(fd_set)); - tv.tv_sec=0; - tv.tv_usec=500; - if(select(max+1, &rset, NULL, NULL, &tv)>0) { + if(select(max+1, &rset, NULL, NULL, NULL)>0) { + int net = 0; + SERVER* serve=NULL; + DEBUG("accept, "); - for(i=0;ilen;i++) { + if(FD_ISSET(modernsock, &rset)) { + if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0) + err("accept: %m"); + client = negotiate(net, NULL, servers); + if(!client) { + err_nonfatal("negotiation failed"); + close(net); + net=0; + continue; + } + serve = client->server; + } + for(i=0;ilen && !net;i++) { serve=&(g_array_index(servers, SERVER, i)); if(FD_ISSET(serve->socket, &rset)) { if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0) err("accept: %m"); - - client = g_malloc(sizeof(CLIENT)); + } + } + if(net) { + int sock_flags; + + if(serve->max_connections > 0 && + g_hash_table_size(children) >= serve->max_connections) { + msg2(LOG_INFO, "Max connections reached"); + close(net); + continue; + } + if((sock_flags = fcntl(net, F_GETFL, 0))==-1) { + err("fcntl F_GETFL"); + } + if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) { + err("fcntl F_SETFL ~O_NONBLOCK"); + } + if(!client) { + client = g_new0(CLIENT, 1); client->server=serve; client->exportsize=OFFT_MAX; client->net=net; - set_peername(net, client); - if (!authorized_client(client)) { - msg2(LOG_INFO,"Unauthorized client") ; - close(net); - continue; - } - msg2(LOG_INFO,"Authorized client") ; - pid=g_malloc(sizeof(pid_t)); -#ifndef NOFORK + client->transactionlogfd = -1; + } + set_peername(net, client); + if (!authorized_client(client)) { + msg2(LOG_INFO,"Unauthorized client") ; + close(net); + continue; + } + msg2(LOG_INFO,"Authorized client") ; + pid=g_malloc(sizeof(pid_t)); + + if (!dontfork) { if ((*pid=fork())<0) { msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ; close(net); @@ -1451,93 +1931,147 @@ int serveloop(GArray* servers) { /* child */ g_hash_table_destroy(children); for(i=0;ilen;i++) { - serve=g_array_index(servers, SERVER*, i); + serve=&g_array_index(servers, SERVER, i); close(serve->socket); } /* FALSE does not free the - actual data. This is required, - because the client has a - direct reference into that - data, and otherwise we get a - segfault... */ + actual data. This is required, + because the client has a + direct reference into that + data, and otherwise we get a + segfault... */ g_array_free(servers, FALSE); -#endif // NOFORK - msg2(LOG_INFO,"Starting to serve"); - serveconnection(client); - exit(EXIT_SUCCESS); } + + msg2(LOG_INFO,"Starting to serve"); + serveconnection(client); + exit(EXIT_SUCCESS); } } } } -/** - * Connect a server's socket. - * - * @param serve the server we want to connect. - **/ -void setup_serve(SERVER *serve) { - struct sockaddr_in addrin; - struct sigaction sa; - int addrinlen = sizeof(addrin); - int sock_flags; - int af; +void dosockopts(int socket) { #ifndef sun int yes=1; #else char yes='1'; #endif /* sun */ - - af = AF_INET; -#ifdef WITH_SDP - if ((serve->flags) && F_SDP) { - af = AF_INET_SDP; - } -#endif - if ((serve->socket = socket(af, SOCK_STREAM, IPPROTO_TCP)) < 0) - err("socket: %m"); + int sock_flags; /* lose the pesky "Address already in use" error message */ - if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { + if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { err("setsockopt SO_REUSEADDR"); } - if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) { + if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) { err("setsockopt SO_KEEPALIVE"); } /* make the listening socket non-blocking */ - if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) { + if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) { err("fcntl F_GETFL"); } - if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) { + if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) { err("fcntl F_SETFL O_NONBLOCK"); } +} + +/** + * Connect a server's socket. + * + * @param serve the server we want to connect. + **/ +int setup_serve(SERVER *serve) { + struct addrinfo hints; + struct addrinfo *ai = NULL; + gchar *port = NULL; + int e; + + if(!do_oldstyle) { + return serve->servename ? 1 : 0; + } + memset(&hints,'\0',sizeof(hints)); + hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV; + hints.ai_socktype = SOCK_STREAM; + hints.ai_family = serve->socket_family; + + port = g_strdup_printf ("%d", serve->port); + if (port == NULL) + return 0; + + e = getaddrinfo(serve->listenaddr,port,&hints,&ai); + + g_free(port); + + if(e != 0) { + fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e)); + serve->socket = -1; + freeaddrinfo(ai); + exit(EXIT_FAILURE); + } + + if(serve->socket_family == AF_UNSPEC) + serve->socket_family = ai->ai_family; - DEBUG("Waiting for connections... bind, "); - addrin.sin_family = AF_INET; #ifdef WITH_SDP - if(serve->flags & F_SDP) { - addrin.sin_family = AF_INET_SDP; + if ((serve->flags) && F_SDP) { + if (ai->ai_family == AF_INET) + ai->ai_family = AF_INET_SDP; + else (ai->ai_family == AF_INET6) + ai->ai_family = AF_INET6_SDP; } #endif - addrin.sin_port = htons(serve->port); - if(!inet_aton(serve->listenaddr, &(addrin.sin_addr))) - err("could not parse listen address"); - if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0) + if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0) + err("socket: %m"); + + dosockopts(serve->socket); + + DEBUG("Waiting for connections... bind, "); + e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen); + if (e != 0 && errno != EADDRINUSE) err("bind: %m"); DEBUG("listen, "); if (listen(serve->socket, 1) < 0) err("listen: %m"); - sa.sa_handler = sigchld_handler; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_RESTART; - if(sigaction(SIGCHLD, &sa, NULL) == -1) - err("sigaction: %m"); - sa.sa_handler = sigterm_handler; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_RESTART; - if(sigaction(SIGTERM, &sa, NULL) == -1) - err("sigaction: %m"); + + freeaddrinfo (ai); + if(serve->servename) { + return 1; + } else { + return 0; + } +} + +void open_modern(void) { + struct addrinfo hints; + struct addrinfo* ai = NULL; + struct sock_flags; + int e; + + memset(&hints, '\0', sizeof(hints)); + hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; + hints.ai_socktype = SOCK_STREAM; + hints.ai_family = AF_UNSPEC; + hints.ai_protocol = IPPROTO_TCP; + e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai); + if(e != 0) { + fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e)); + exit(EXIT_FAILURE); + } + if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) { + err("socket: %m"); + } + + dosockopts(modernsock); + + if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) { + err("bind: %m"); + } + if(listen(modernsock, 10) <0) { + err("listen: %m"); + } + + freeaddrinfo(ai); } /** @@ -1545,11 +2079,27 @@ void setup_serve(SERVER *serve) { **/ void setup_servers(GArray* servers) { int i; + struct sigaction sa; + int want_modern=0; for(i=0;ilen;i++) { - setup_serve(&(g_array_index(servers, SERVER, i))); + want_modern |= setup_serve(&(g_array_index(servers, SERVER, i))); + } + if(want_modern) { + open_modern(); } children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t); + + sa.sa_handler = sigchld_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + if(sigaction(SIGCHLD, &sa, NULL) == -1) + err("sigaction: %m"); + sa.sa_handler = sigterm_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + if(sigaction(SIGTERM, &sa, NULL) == -1) + err("sigaction: %m"); } /** @@ -1559,7 +2109,7 @@ void setup_servers(GArray* servers) { * is only used to create a PID file of the form * /var/run/nbd-server.<port>.pid; it's not modified in any way. **/ -#if !defined(NODAEMON) && !defined(NOFORK) +#if !defined(NODAEMON) void daemonize(SERVER* serve) { FILE*pidf; @@ -1571,9 +2121,9 @@ void daemonize(SERVER* serve) { } if(!*pidftemplate) { if(serve) { - strncpy(pidftemplate, "/var/run/server.%d.pid", 255); + strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255); } else { - strncpy(pidftemplate, "/var/run/server.pid", 255); + strncpy(pidftemplate, "/var/run/nbd-server.pid", 255); } } snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0); @@ -1588,7 +2138,7 @@ void daemonize(SERVER* serve) { } #else #define daemonize(serve) -#endif /* !defined(NODAEMON) && !defined(NOFORK) */ +#endif /* !defined(NODAEMON) */ /* * Everything beyond this point (in the file) is run in non-daemon mode. @@ -1609,30 +2159,60 @@ void serve_err(SERVER* serve, const char* msg) { void dousers(void) { struct passwd *pw; struct group *gr; + gchar* str; if(rungroup) { gr=getgrnam(rungroup); if(!gr) { - g_message("Invalid group name: %s", rungroup); - exit(EXIT_FAILURE); + str = g_strdup_printf("Invalid group name: %s", rungroup); + err(str); } if(setgid(gr->gr_gid)<0) { - g_message("Could not set GID: %s", strerror(errno)); - exit(EXIT_FAILURE); + err("Could not set GID: %m"); } } if(runuser) { pw=getpwnam(runuser); if(!pw) { - g_message("Invalid user name: %s", runuser); - exit(EXIT_FAILURE); + str = g_strdup_printf("Invalid user name: %s", runuser); + err(str); } if(setuid(pw->pw_uid)<0) { - g_message("Could not set UID: %s", strerror(errno)); - exit(EXIT_FAILURE); + err("Could not set UID: %m"); } } } +#ifndef ISSERVER +void glib_message_syslog_redirect(const gchar *log_domain, + GLogLevelFlags log_level, + const gchar *message, + gpointer user_data) +{ + int level=LOG_DEBUG; + + switch( log_level ) + { + case G_LOG_FLAG_FATAL: + case G_LOG_LEVEL_CRITICAL: + case G_LOG_LEVEL_ERROR: + level=LOG_ERR; + break; + case G_LOG_LEVEL_WARNING: + level=LOG_WARNING; + break; + case G_LOG_LEVEL_MESSAGE: + case G_LOG_LEVEL_INFO: + level=LOG_INFO; + break; + case G_LOG_LEVEL_DEBUG: + level=LOG_DEBUG; + default: + level=LOG_ERR; + } + syslog(level, "%s", message); +} +#endif + /** * Main entry point... **/ @@ -1643,7 +2223,7 @@ int main(int argc, char *argv[]) { if (sizeof( struct nbd_request )!=28) { fprintf(stderr,"Bad size of structure. Alignment problems?\n"); - exit(-1) ; + exit(EXIT_FAILURE) ; } memset(pidftemplate, '\0', 256); @@ -1652,12 +2232,11 @@ int main(int argc, char *argv[]) { config_file_pos = g_strdup(CFILE); serve=cmdline(argc, argv); servers = parse_cfile(config_file_pos, &err); - if(!servers || !servers->len) { - g_warning("Could not parse config file: %s", - err ? err->message : "Unknown error"); - } + if(serve) { - g_array_append_val(servers, *serve); + serve->socket_family = AF_UNSPEC; + + append_serve(serve, servers); if (!(serve->port)) { CLIENT *client; @@ -1670,6 +2249,7 @@ int main(int argc, char *argv[]) { close(2); open("/dev/null", O_WRONLY); open("/dev/null", O_WRONLY); + g_log_set_default_handler( glib_message_syslog_redirect, NULL ); #endif client=g_malloc(sizeof(CLIENT)); client->server=serve; @@ -1680,11 +2260,25 @@ int main(int argc, char *argv[]) { return 0; } } + + if(!servers || !servers->len) { + if(err && !(err->domain == g_quark_from_string("parse_cfile") + && err->code == CFILE_NOTFOUND)) { + g_warning("Could not parse config file: %s", + err ? err->message : "Unknown error"); + } + } + if(serve) { + g_warning("Specifying an export on the command line is deprecated."); + g_warning("Please use a configuration file instead."); + } + if((!serve) && (!servers||!servers->len)) { - g_message("Nothing to do! Bye!"); + g_message("No configured exports; quitting."); exit(EXIT_FAILURE); } - daemonize(serve); + if (!dontfork) + daemonize(serve); setup_servers(servers); dousers(); serveloop(servers);