X-Git-Url: http://git.alex.org.uk diff --git a/nbd-server.c b/nbd-server.c index 4ecb1f5..6d734b3 100644 --- a/nbd-server.c +++ b/nbd-server.c @@ -49,6 +49,10 @@ * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a * lot more work, but this is a start. Wouter Verhelst * + * 16/03/2010 - Add IPv6 support. + * Kitt Tientanopajai + * Neutron Soutmun + * Suriya Soutmun */ /* Includes LFS defines, which defines behaviours of some of the following @@ -68,9 +72,10 @@ #include /* For BLKGETSIZE */ #endif #include /* sigaction */ +#include #include -#include /* sockaddr_in, htons, in_addr */ -#include /* hostent, gethostby*, getservby* */ +#include +#include #include #include #include @@ -82,6 +87,8 @@ #include #include #include +#include +#include #include @@ -89,6 +96,10 @@ #define MY_NAME "nbd_server" #include "cliserv.h" +#ifdef WITH_SDP +#include +#endif + /** Default position of the config file */ #ifndef SYSCONFDIR #define SYSCONFDIR "/etc" @@ -98,6 +109,16 @@ /** Where our config file actually is */ gchar* config_file_pos; +/** What user we're running as */ +gchar* runuser=NULL; +/** What group we're running as */ +gchar* rungroup=NULL; +/** whether to export using the old negotiation protocol (port-based) */ +gboolean do_oldstyle=FALSE; + +/* Whether we should avoid forking */ +int dontfork = 0; + /** Logging macros, now nothing goes to syslog unless you say ISSERVER */ #ifdef ISSERVER #define msg2(a,b) syslog(a,b) @@ -112,13 +133,9 @@ gchar* config_file_pos; /* Debugging macros */ //#define DODBG #ifdef DODBG -#define DEBUG( a ) printf( a ) -#define DEBUG2( a,b ) printf( a,b ) -#define DEBUG3( a,b,c ) printf( a,b,c ) +#define DEBUG(...) printf(__VA_ARGS__) #else -#define DEBUG( a ) -#define DEBUG2( a,b ) -#define DEBUG3( a,b,c ) +#define DEBUG(...) #endif #ifndef PACKAGE_VERSION #define PACKAGE_VERSION "" @@ -129,46 +146,82 @@ gchar* config_file_pos; **/ #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1)) #define LINELEN 256 /**< Size of static buffer used to read the - authorization file (yuck) */ -#define BUFSIZE (1024*1024) /**< Size of buffer that can hold requests */ -#define GIGA (1*1024*1024*1024) /**< 1 Gigabyte. Used as hunksize when doing - the multiple file thingy. @todo: make this a - configuration option. */ + authorization file (yuck) */ +#define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */ #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */ #define F_READONLY 1 /**< flag to tell us a file is readonly */ #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */ #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using copyonwrite */ #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */ +#define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */ +#define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */ +#define F_SYNC 64 /**< Whether to fsync() after a write */ +#define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */ +#define F_FUA 256 /**< Whether server wants FUA to be sent by the client */ +#define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */ GHashTable *children; char pidfname[256]; /**< name of our PID file */ -char default_authname[] = "/etc/nbd_server.allow"; /**< default name of allow file */ +char pidftemplate[256]; /**< template to be used for the filename of the PID file */ +char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */ + +int modernsock=0; /**< Socket for the modern handler. Not used + if a client was only specified on the + command line; only port used if + oldstyle is set to false (and then the + command-line client isn't used, gna gna) */ +char* modern_listen; /**< listenaddr value for modernsock */ + +/** + * Types of virtuatlization + **/ +typedef enum { + VIRT_NONE=0, /**< No virtualization */ + VIRT_IPLIT, /**< Literal IP address as part of the filename */ + VIRT_IPHASH, /**< Replacing all dots in an ip address by a / before + doing the same as in IPLIT */ + VIRT_CIDR, /**< Every subnet in its own directory */ +} VIRT_STYLE; /** * Variables associated with a server. **/ typedef struct { gchar* exportname; /**< (unprocessed) filename of the file we're exporting */ - off_t hunksize; /**< size of a hunk of an exported file */ off_t expected_size; /**< size of the exported file as it was told to us through configuration */ + gchar* listenaddr; /**< The IP address we're listening on */ unsigned int port; /**< port we're exporting this file at */ char* authname; /**< filename of the authorization file */ int flags; /**< flags associated with this exported file */ - unsigned int timeout;/**< how long a connection may be idle - (0=forever) */ int socket; /**< The socket of this server. */ + int socket_family; /**< family of the socket */ + VIRT_STYLE virtstyle;/**< The style of virtualization, if any */ + uint8_t cidrlen; /**< The length of the mask when we use + CIDR-style virtualization */ + gchar* prerun; /**< command to be ran after connecting a client, + but before starting to serve */ + gchar* postrun; /**< command that will be ran after the client + disconnects */ + gchar* servename; /**< name of the export as selected by nbd-client */ + int max_connections; /**< maximum number of opened connections */ + gchar* transactionlog;/**< filename for transaction log */ } SERVER; /** * Variables associated with a client socket. **/ typedef struct { + int fhandle; /**< file descriptor */ + off_t startoff; /**< starting offset of this file */ +} FILE_INFO; + +typedef struct { off_t exportsize; /**< size of the file we're exporting */ char *clientname; /**< peer */ char *exportname; /**< (processed) filename of the file we're exporting */ - GArray *export; /**< array of filedescriptors of exported files; - only the first is actually used unless we're + GArray *export; /**< array of FILE_INFO of exported files; + array size is always 1 unless we're doing the multiple file option */ int net; /**< The actual client socket */ SERVER *server; /**< The server this client is getting data from */ @@ -178,6 +231,8 @@ typedef struct { make -m and -c mutually exclusive */ u32 difffilelen; /**< number of pages in difffile */ u32 *difmap; /**< see comment on the global difmap for this one */ + gboolean modern; /**< client was negotiated using modern negotiation protocol */ + int transactionlogfd;/**< fd for transaction log */ } CLIENT; /** @@ -188,6 +243,7 @@ typedef enum { PARAM_STRING, /**< This parameter is a string */ PARAM_BOOL, /**< This parameter is a boolean */ } PARAM_TYPE; + /** * Configuration file values **/ @@ -205,6 +261,22 @@ typedef struct { is PARAM_BOOL. */ } PARAM; +static inline const char * getcommandname(uint64_t command) { + switch (command) { + case NBD_CMD_READ: + return "NBD_CMD_READ"; + case NBD_CMD_WRITE: + return "NBD_CMD_WRITE"; + case NBD_CMD_DISC: + return "NBD_CMD_DISC"; + case NBD_CMD_FLUSH: + return "NBD_CMD_FLUSH"; + default: + break; + } + return "UNKNOWN"; +} + /** * Check whether a client is allowed to connect. Works with an authorization * file which contains one line per machine, no wildcards. @@ -213,9 +285,14 @@ typedef struct { * @return 0 - authorization refused, 1 - OK **/ int authorized_client(CLIENT *opts) { + const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections."; FILE *f ; - char line[LINELEN]; + char *tmp; + struct in_addr addr; + struct in_addr client; + struct in_addr cltemp; + int len; if ((f=fopen(opts->server->authname,"r"))==NULL) { msg4(LOG_INFO,"Can't open authorization file %s (%s).", @@ -223,14 +300,35 @@ int authorized_client(CLIENT *opts) { return 1 ; } + inet_aton(opts->clientname, &client); while (fgets(line,LINELEN,f)!=NULL) { + if((tmp=index(line, '/'))) { + if(strlen(line)<=tmp-line) { + msg4(LOG_CRIT, ERRMSG, line, opts->server->authname); + return 0; + } + *(tmp++)=0; + if(!inet_aton(line,&addr)) { + msg4(LOG_CRIT, ERRMSG, line, opts->server->authname); + return 0; + } + len=strtol(tmp, NULL, 0); + addr.s_addr>>=32-len; + addr.s_addr<<=32-len; + memcpy(&cltemp,&client,sizeof(client)); + cltemp.s_addr>>=32-len; + cltemp.s_addr<<=32-len; + if(addr.s_addr == cltemp.s_addr) { + return 1; + } + } if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) { fclose(f); return 1; } } - fclose(f) ; - return 0 ; + fclose(f); + return 0; } /** @@ -240,14 +338,18 @@ int authorized_client(CLIENT *opts) { * @param buf a buffer * @param len the number of bytes to be read **/ -inline void readit(int f, void *buf, size_t len) { +static inline void readit(int f, void *buf, size_t len) { ssize_t res; while (len > 0) { DEBUG("*"); - if ((res = read(f, buf, len)) <= 0) - err("Read failed: %m"); - len -= res; - buf += res; + if ((res = read(f, buf, len)) <= 0) { + if(errno != EAGAIN) { + err("Read failed: %m"); + } + } else { + len -= res; + buf += res; + } } } @@ -258,7 +360,7 @@ inline void readit(int f, void *buf, size_t len) { * @param buf a buffer containing data * @param len the number of bytes to be written **/ -inline void writeit(int f, void *buf, size_t len) { +static inline void writeit(int f, void *buf, size_t len) { ssize_t res; while (len > 0) { DEBUG("+"); @@ -275,19 +377,46 @@ inline void writeit(int f, void *buf, size_t len) { */ void usage() { printf("This is nbd-server version " VERSION "\n"); - printf("Usage: port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-a timeout_sec] [-C configuration file]\n" + printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n" "\t-r|--read-only\t\tread only\n" "\t-m|--multi-file\t\tmultiple file\n" "\t-c|--copy-on-write\tcopy on write\n" - "\t-C|--config-file\tspecify an alternat configuration file\n" + "\t-C|--config-file\tspecify an alternate configuration file\n" "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n" - "\t-a|--idle-time\t\tmaximum idle seconds; server terminates when\n\t\t\t\tidle time exceeded\n\n" + "\t-p|--pid-file\t\tspecify a filename to write our PID to\n" + "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n" + "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n" "\tif port is set to 0, stdin is used (for running from inetd)\n" "\tif file_to_export contains '%%s', it is substituted with the IP\n" - "\t\taddress of the machine trying to connect\n" ); + "\t\taddress of the machine trying to connect\n" + "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n"); printf("Using configuration file %s\n", CFILE); } +/* Dumps a config file section of the given SERVER*, and exits. */ +void dump_section(SERVER* serve, gchar* section_header) { + printf("[%s]\n", section_header); + printf("\texportname = %s\n", serve->exportname); + printf("\tlistenaddr = %s\n", serve->listenaddr); + printf("\tport = %d\n", serve->port); + if(serve->flags & F_READONLY) { + printf("\treadonly = true\n"); + } + if(serve->flags & F_MULTIFILE) { + printf("\tmultifile = true\n"); + } + if(serve->flags & F_COPYONWRITE) { + printf("\tcopyonwrite = true\n"); + } + if(serve->expected_size) { + printf("\tfilesize = %lld\n", (long long int)serve->expected_size); + } + if(serve->authname) { + printf("\tauthfile = %s\n", serve->authname); + } + exit(EXIT_SUCCESS); +} + /** * Parse the command line. * @@ -302,29 +431,55 @@ SERVER* cmdline(int argc, char *argv[]) { {"read-only", no_argument, NULL, 'r'}, {"multi-file", no_argument, NULL, 'm'}, {"copy-on-write", no_argument, NULL, 'c'}, + {"dont-fork", no_argument, NULL, 'd'}, {"authorize-file", required_argument, NULL, 'l'}, - {"idle-time", required_argument, NULL, 'a'}, {"config-file", required_argument, NULL, 'C'}, + {"pid-file", required_argument, NULL, 'p'}, + {"output-config", required_argument, NULL, 'o'}, + {"max-connection", required_argument, NULL, 'M'}, {0,0,0,0} }; SERVER *serve; off_t es; size_t last; char suffix; + gboolean do_output=FALSE; + gchar* section_header=""; + gchar** addr_port; if(argc==1) { return NULL; } serve=g_new0(SERVER, 1); - serve->hunksize=OFFT_MAX; serve->authname = g_strdup(default_authname); - while((c=getopt_long(argc, argv, "-a:C:cl:mr", long_options, &i))>=0) { + serve->virtstyle=VIRT_IPLIT; + while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) { switch (c) { case 1: /* non-option argument */ switch(nonspecial++) { case 0: - serve->port=strtol(optarg, NULL, 0); + if(strchr(optarg, ':') == strrchr(optarg, ':')) { + addr_port=g_strsplit(optarg, ":", 2); + + /* Check for "@" - maybe user using this separator + for IPv4 address */ + if(!addr_port[1]) { + g_strfreev(addr_port); + addr_port=g_strsplit(optarg, "@", 2); + } + } else { + addr_port=g_strsplit(optarg, "@", 2); + } + + if(addr_port[1]) { + serve->port=strtol(addr_port[1], NULL, 0); + serve->listenaddr=g_strdup(addr_port[0]); + } else { + serve->listenaddr=NULL; + serve->port=strtol(addr_port[0], NULL, 0); + } + g_strfreev(addr_port); break; case 1: serve->exportname = g_strdup(optarg); @@ -339,7 +494,7 @@ SERVER* cmdline(int argc, char *argv[]) { if (suffix == 'k' || suffix == 'K' || suffix == 'm' || suffix == 'M') optarg[last] = '\0'; - es = (off_t)atol(optarg); + es = (off_t)atoll(optarg); switch (suffix) { case 'm': case 'M': es <<= 10; @@ -356,11 +511,20 @@ SERVER* cmdline(int argc, char *argv[]) { break; case 'm': serve->flags |= F_MULTIFILE; - serve->hunksize = 1*GIGA; + break; + case 'o': + do_output = TRUE; + section_header = g_strdup(optarg); + break; + case 'p': + strncpy(pidftemplate, optarg, 256); break; case 'c': serve->flags |=F_COPYONWRITE; break; + case 'd': + dontfork = 1; + break; case 'C': g_free(config_file_pos); config_file_pos=g_strdup(optarg); @@ -369,8 +533,8 @@ SERVER* cmdline(int argc, char *argv[]) { g_free(serve->authname); serve->authname=g_strdup(optarg); break; - case 'a': - serve->timeout=strtol(optarg, NULL, 0); + case 'M': + serve->max_connections = strtol(optarg, NULL, 0); break; default: usage(); @@ -383,6 +547,15 @@ SERVER* cmdline(int argc, char *argv[]) { if(nonspecial<2) { g_free(serve); serve=NULL; + } else { + do_oldstyle = TRUE; + } + if(do_output) { + if(!serve) { + g_critical("Need a complete configuration on the command line to output a config file section!"); + exit(EXIT_FAILURE); + } + dump_section(serve, section_header); } return serve; } @@ -395,7 +568,12 @@ typedef enum { CFILE_MISSING_GENERIC, /**< The (required) group "generic" is missing */ CFILE_KEY_MISSING, /**< A (required) key is missing */ CFILE_VALUE_INVALID, /**< A value is syntactically invalid */ - CFILE_PROGERR /**< Programmer error */ + CFILE_VALUE_UNSUPPORTED,/**< A value is not supported in this build */ + CFILE_PROGERR, /**< Programmer error */ + CFILE_NO_EXPORTS, /**< A config file was specified that does not + define any exports */ + CFILE_INCORRECT_PORT, /**< The reserved port was specified for an + old-style export. */ } CFILE_ERRORS; /** @@ -408,35 +586,176 @@ void remove_server(gpointer s) { g_free(server->exportname); if(server->authname) g_free(server->authname); + if(server->listenaddr) + g_free(server->listenaddr); + if(server->prerun) + g_free(server->prerun); + if(server->postrun) + g_free(server->postrun); + if(server->transactionlog) + g_free(server->transactionlog); g_free(server); } /** + * duplicate server + * @param s the old server we want to duplicate + * @return new duplicated server + **/ +SERVER* dup_serve(SERVER *s) { + SERVER *serve = NULL; + + serve=g_new0(SERVER, 1); + if(serve == NULL) + return NULL; + + if(s->exportname) + serve->exportname = g_strdup(s->exportname); + + serve->expected_size = s->expected_size; + + if(s->listenaddr) + serve->listenaddr = g_strdup(s->listenaddr); + + serve->port = s->port; + + if(s->authname) + serve->authname = strdup(s->authname); + + serve->flags = s->flags; + serve->socket = s->socket; + serve->socket_family = s->socket_family; + serve->virtstyle = s->virtstyle; + serve->cidrlen = s->cidrlen; + + if(s->prerun) + serve->prerun = g_strdup(s->prerun); + + if(s->postrun) + serve->postrun = g_strdup(s->postrun); + + if(s->transactionlog) + serve->transactionlog = g_strdup(s->transactionlog); + + if(s->servename) + serve->servename = g_strdup(s->servename); + + serve->max_connections = s->max_connections; + + return serve; +} + +/** + * append new server to array + * @param s server + * @param a server array + * @return 0 success, -1 error + */ +int append_serve(SERVER *s, GArray *a) { + SERVER *ns = NULL; + struct addrinfo hints; + struct addrinfo *ai = NULL; + struct addrinfo *rp = NULL; + char host[NI_MAXHOST]; + gchar *port = NULL; + int e; + int ret; + + if(!s) { + err("Invalid parsing server"); + return -1; + } + + port = g_strdup_printf("%d", s->port); + + memset(&hints,'\0',sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE; + hints.ai_protocol = IPPROTO_TCP; + + e = getaddrinfo(s->listenaddr, port, &hints, &ai); + + if (port) + g_free(port); + + if(e == 0) { + for (rp = ai; rp != NULL; rp = rp->ai_next) { + e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST); + + if (e != 0) { // error + fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e)); + continue; + } + + // duplicate server and set listenaddr to resolved IP address + ns = dup_serve (s); + if (ns) { + ns->listenaddr = g_strdup(host); + ns->socket_family = rp->ai_family; + g_array_append_val(a, *ns); + free(ns); + ns = NULL; + } + } + + ret = 0; + } else { + fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e)); + ret = -1; + } + + if (ai) + freeaddrinfo(ai); + + return ret; +} + +/** * Parse the config file. * * @param f the name of the config file * @param e a GError. @see CFILE_ERRORS for what error values this function can * return. - * @return a GHashTable of SERVER* pointers, with the port number as the hash - * key. If the config file is empty or does not exist, returns an empty - * GHashTable; if the config file contains an error, returns NULL, and - * e is set appropriately + * @return a Array of SERVER* pointers, If the config file is empty or does not + * exist, returns an empty GHashTable; if the config file contains an + * error, returns NULL, and e is set appropriately **/ GArray* parse_cfile(gchar* f, GError** e) { const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s"; const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s"; SERVER s; - PARAM p[] = { - { "exportname", TRUE, PARAM_STRING, NULL, 0 }, - { "port", TRUE, PARAM_INT, NULL, 0 }, - { "authfile", FALSE, PARAM_STRING, NULL, 0 }, - { "timeout", FALSE, PARAM_INT, NULL, 0 }, - { "filesize", FALSE, PARAM_INT, NULL, 0 }, - { "readonly", FALSE, PARAM_BOOL, NULL, F_READONLY }, - { "multifile", FALSE, PARAM_BOOL, NULL, F_MULTIFILE }, - { "copyonwrite", FALSE, PARAM_BOOL, NULL, F_COPYONWRITE }, + gchar *virtstyle=NULL; + PARAM lp[] = { + { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 }, + { "port", TRUE, PARAM_INT, &(s.port), 0 }, + { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 }, + { "filesize", FALSE, PARAM_INT, &(s.expected_size), 0 }, + { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 }, + { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 }, + { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 }, + { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 }, + { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY }, + { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE }, + { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE }, + { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE }, + { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP }, + { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC }, + { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH }, + { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA }, + { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL }, + { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 }, + { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 }, }; - const int p_size=8; + const int lp_size=sizeof(lp)/sizeof(PARAM); + PARAM gp[] = { + { "user", FALSE, PARAM_STRING, &runuser, 0 }, + { "group", FALSE, PARAM_STRING, &rungroup, 0 }, + { "oldstyle", FALSE, PARAM_BOOL, &do_oldstyle, 1 }, + { "listenaddr", FALSE, PARAM_STRING, &modern_listen, 0 }, + }; + PARAM* p=gp; + int p_size=sizeof(gp)/sizeof(PARAM); GKeyFile *cfile; GError *err = NULL; const char *err_msg=NULL; @@ -444,31 +763,34 @@ GArray* parse_cfile(gchar* f, GError** e) { GArray *retval=NULL; gchar **groups; gboolean value; - gint i,j; + gchar* startgroup; + gint i; + gint j; - memset(&s, '\0', sizeof(SERVER)); errdomain = g_quark_from_string("parse_cfile"); cfile = g_key_file_new(); retval = g_array_new(FALSE, TRUE, sizeof(SERVER)); if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS | G_KEY_FILE_KEEP_TRANSLATIONS, &err)) { - g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file."); + g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f); g_key_file_free(cfile); return retval; } - if(strcmp(g_key_file_get_start_group(cfile), "generic")) { + startgroup = g_key_file_get_start_group(cfile); + if(!startgroup || strcmp(startgroup, "generic")) { g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!"); g_key_file_free(cfile); return NULL; } groups = g_key_file_get_groups(cfile, NULL); - for(i=1;groups[i];i++) { - p[0].target=&(s.exportname); - p[1].target=&(s.port); - p[2].target=&(s.authname); - p[3].target=&(s.timeout); - p[4].target=&(s.expected_size); - p[5].target=p[6].target=p[7].target=p[8].target=&(s.flags); + for(i=0;groups[i];i++) { + memset(&s, '\0', sizeof(SERVER)); + + /* After the [generic] group, start parsing exports */ + if(i==1) { + p=lp; + p_size=lp_size; + } for(j=0;jcode == G_KEY_FILE_ERROR_KEY_NOT_FOUND) { if(!p[j].required) { @@ -515,8 +846,61 @@ GArray* parse_cfile(gchar* f, GError** e) { return NULL; } } - g_array_append_val(retval, s); + if(virtstyle) { + if(!strncmp(virtstyle, "none", 4)) { + s.virtstyle=VIRT_NONE; + } else if(!strncmp(virtstyle, "ipliteral", 9)) { + s.virtstyle=VIRT_IPLIT; + } else if(!strncmp(virtstyle, "iphash", 6)) { + s.virtstyle=VIRT_IPHASH; + } else if(!strncmp(virtstyle, "cidrhash", 8)) { + s.virtstyle=VIRT_CIDR; + if(strlen(virtstyle)<10) { + g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]); + g_array_free(retval, TRUE); + g_key_file_free(cfile); + return NULL; + } + s.cidrlen=strtol(virtstyle+8, NULL, 0); + } else { + g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]); + g_array_free(retval, TRUE); + g_key_file_free(cfile); + return NULL; + } + if(s.port && !do_oldstyle) { + g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect."); + g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info"); + } + } else { + s.virtstyle=VIRT_IPLIT; + } + /* Don't need to free this, it's not our string */ + virtstyle=NULL; + /* Don't append values for the [generic] group */ + if(i>0) { + s.socket_family = AF_UNSPEC; + s.servename = groups[i]; + + append_serve(&s, retval); + } else { + if(!do_oldstyle) { + lp[1].required = 0; + } + } +#ifndef WITH_SDP + if(s.flags & F_SDP) { + g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]); + g_array_free(retval, TRUE); + g_key_file_free(cfile); + return NULL; + } +#endif + } + if(i==1) { + g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports"); } + g_key_file_free(cfile); return retval; } @@ -531,14 +915,14 @@ void sigchld_handler(int s) { pid_t pid; while((pid=waitpid(-1, &status, WNOHANG)) > 0) { - if(WIFEXITED(&status)) { + if(WIFEXITED(status)) { msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status)); } i=g_hash_table_lookup(children, &pid); if(!i) { msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid); } else { - DEBUG2("Removing %d from the list of children", pid); + DEBUG("Removing %d from the list of children", pid); g_hash_table_remove(children, &pid); } } @@ -574,37 +958,36 @@ void sigterm_handler(int s) { unlink(pidfname); } - exit(0); + exit(EXIT_SUCCESS); } /** * Detect the size of a file. * - * @param export An open filedescriptor + * @param fhandle An open filedescriptor * @return the size of the file, or OFFT_MAX if detection was * impossible. **/ -off_t size_autodetect(int export) { +off_t size_autodetect(int fhandle) { off_t es; - u32 es32; + u64 bytes; struct stat stat_buf; int error; #ifdef HAVE_SYS_MOUNT_H #ifdef HAVE_SYS_IOCTL_H -#ifdef BLKGETSIZE - DEBUG("looking for export size with ioctl BLKGETSIZE\n"); - if (!ioctl(export, BLKGETSIZE, &es32) && es32) { - es = (off_t)es32 * (off_t)512; - return es; +#ifdef BLKGETSIZE64 + DEBUG("looking for export size with ioctl BLKGETSIZE64\n"); + if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) { + return (off_t)bytes; } -#endif /* BLKGETSIZE */ +#endif /* BLKGETSIZE64 */ #endif /* HAVE_SYS_IOCTL_H */ #endif /* HAVE_SYS_MOUNT_H */ - DEBUG("looking for export size with fstat\n"); + DEBUG("looking for fhandle size with fstat\n"); stat_buf.st_size = 0; - error = fstat(export, &stat_buf); + error = fstat(fhandle, &stat_buf); if (!error) { if(stat_buf.st_size > 0) return (off_t)stat_buf.st_size; @@ -612,12 +995,12 @@ off_t size_autodetect(int export) { err("fstat failed: %m"); } - DEBUG("looking for export size with lseek SEEK_END\n"); - es = lseek(export, (off_t)0, SEEK_END); + DEBUG("looking for fhandle size with lseek SEEK_END\n"); + es = lseek(fhandle, (off_t)0, SEEK_END); if (es > ((off_t)0)) { return es; } else { - DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); + DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4))); } err("Could not find size of exported block device: %m"); @@ -625,6 +1008,54 @@ off_t size_autodetect(int export) { } /** + * Get the file handle and offset, given an export offset. + * + * @param export An array of export files + * @param a The offset to get corresponding file/offset for + * @param fhandle [out] File descriptor + * @param foffset [out] Offset into fhandle + * @param maxbytes [out] Tells how many bytes can be read/written + * from fhandle starting at foffset (0 if there is no limit) + * @return 0 on success, -1 on failure + **/ +int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) { + /* Negative offset not allowed */ + if(a < 0) + return -1; + + /* Binary search for last file with starting offset <= a */ + FILE_INFO fi; + int start = 0; + int end = export->len - 1; + while( start <= end ) { + int mid = (start + end) / 2; + fi = g_array_index(export, FILE_INFO, mid); + if( fi.startoff < a ) { + start = mid + 1; + } else if( fi.startoff > a ) { + end = mid - 1; + } else { + start = end = mid; + break; + } + } + + /* end should never go negative, since first startoff is 0 and a >= 0 */ + g_assert(end >= 0); + + fi = g_array_index(export, FILE_INFO, end); + *fhandle = fi.fhandle; + *foffset = a - fi.startoff; + *maxbytes = 0; + if( end+1 < export->len ) { + FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1); + *maxbytes = fi_next.startoff - a; + } + + return 0; +} + +/** * seek to a position in a file, with error handling. * @param handle a filedescriptor * @param a position to seek to @@ -648,13 +1079,79 @@ void myseek(int handle,off_t a) { * @param client The client we're serving for * @return The number of bytes actually written, or -1 in case of an error **/ -int rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) { - ssize_t res; +ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { + int fhandle; + off_t foffset; + size_t maxbytes; + ssize_t retval; + + if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes)) + return -1; + if(maxbytes && len > maxbytes) + len = maxbytes; + + DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua); + + myseek(fhandle, foffset); + retval = write(fhandle, buf, len); + if(client->server->flags & F_SYNC) { + fsync(fhandle); + } else if (fua) { + + /* This is where we would do the following + * #ifdef USE_SYNC_FILE_RANGE + * However, we don't, for the reasons set out below + * by Christoph Hellwig + * + * [BEGINS] + * fdatasync is equivalent to fsync except that it does not flush + * non-essential metadata (basically just timestamps in practice), but it + * does flush metadata requried to find the data again, e.g. allocation + * information and extent maps. sync_file_range does nothing but flush + * out pagecache content - it means you basically won't get your data + * back in case of a crash if you either: + * + * a) have a volatile write cache in your disk (e.g. any normal SATA disk) + * b) are using a sparse file on a filesystem + * c) are using a fallocate-preallocated file on a filesystem + * d) use any file on a COW filesystem like btrfs + * + * e.g. it only does anything useful for you if you do not have a volatile + * write cache, and either use a raw block device node, or just overwrite + * an already fully allocated (and not preallocated) file on a non-COW + * filesystem. + * [ENDS] + * + * What we should do is open a second FD with O_DSYNC set, then write to + * that when appropriate. However, with a Linux client, every REQ_FUA + * immediately follows a REQ_FLUSH, so fdatasync does not cause performance + * problems. + * + */ +#if 0 + sync_file_range(fhandle, foffset, len, + SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | + SYNC_FILE_RANGE_WAIT_AFTER); +#else + fdatasync(fhandle); +#endif + } + return retval; +} - myseek(g_array_index(client->export, int, (int)(a/client->server->hunksize)), a%client->server->hunksize); - ; - res = write(g_array_index(client->export, int, (int)((off_t)a/(off_t)(client->server->hunksize))), buf, len); - return (res < 0 || (size_t)res != len); +/** + * Call rawexpwrite repeatedly until all data has been written. + * @return 0 on success, nonzero on failure + **/ +int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) { + ssize_t ret=0; + + while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) { + a += ret; + buf += ret; + len -= ret; + } + return (ret < 0 || len != 0); } /** @@ -668,13 +1165,35 @@ int rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client) { * @return The number of bytes actually read, or -1 in case of an * error. **/ -int rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { - ssize_t res; +ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { + int fhandle; + off_t foffset; + size_t maxbytes; + + if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes)) + return -1; + if(maxbytes && len > maxbytes) + len = maxbytes; + + DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len); - myseek(g_array_index(client->export,int,(int)a/client->server->hunksize), - a%client->server->hunksize); - res = read(g_array_index(client->export,int,(int)a/client->server->hunksize), buf, len); - return (res < 0 || (size_t)res != len); + myseek(fhandle, foffset); + return read(fhandle, buf, len); +} + +/** + * Call rawexpread repeatedly until all data has been read. + * @return 0 on success, nonzero on failure + **/ +int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) { + ssize_t ret=0; + + while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) { + a += ret; + buf += ret; + len -= ret; + } + return (ret < 0 || len != 0); } /** @@ -685,15 +1204,15 @@ int rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { * @param buf A buffer to read into * @param len The size of buf * @param client The client we're going to read for - * @return The number of bytes actually read, or -1 in case of an error + * @return 0 on success, nonzero on failure **/ int expread(off_t a, char *buf, size_t len, CLIENT *client) { off_t rdlen, offset; off_t mapcnt, mapl, maph, pagestart; if (!(client->server->flags & F_COPYONWRITE)) - return rawexpread(a, buf, len, client); - DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a); + return(rawexpread_fully(a, buf, len, client)); + DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE; @@ -703,14 +1222,14 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { rdlen=(0difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); if (read(client->difffile, buf, rdlen) != rdlen) return -1; } else { /* the block is not there */ - DEBUG2("Page %Lu is not here, we read the original one\n", + DEBUG("Page %llu is not here, we read the original one\n", (unsigned long long)mapcnt); - if(rawexpread(a, buf, rdlen, client)) return -1; + if(rawexpread_fully(a, buf, rdlen, client)) return -1; } len-=rdlen; a+=rdlen; buf+=rdlen; } @@ -726,9 +1245,9 @@ int expread(off_t a, char *buf, size_t len, CLIENT *client) { * @param buf The buffer to write from * @param len The length of buf * @param client The client we're going to write for. - * @return The number of bytes actually written, or -1 in case of an error + * @return 0 on success, nonzero on failure **/ -int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { +int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { char pagebuf[DIFFPAGESIZE]; off_t mapcnt,mapl,maph; off_t wrlen,rdlen; @@ -736,8 +1255,8 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { off_t offset; if (!(client->server->flags & F_COPYONWRITE)) - return(rawexpwrite(a,buf,len, client)); - DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a); + return(rawexpwrite_fully(a, buf, len, client, fua)); + DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ; @@ -748,23 +1267,19 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { len : (size_t)DIFFPAGESIZE-offset; if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ - DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt, + DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])) ; myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); if (write(client->difffile, buf, wrlen) != wrlen) return -1 ; } else { /* the block is not there */ myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ; - client->difmap[mapcnt]=client->difffilelen++ ; - DEBUG3("Page %Lu is not here, we put it at %lu\n", + client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++; + DEBUG("Page %llu is not here, we put it at %lu\n", (unsigned long long)mapcnt, (unsigned long)(client->difmap[mapcnt])); rdlen=DIFFPAGESIZE ; - if (rdlen+pagestart%(client->server->hunksize) > - (client->server->hunksize)) - rdlen=client->server->hunksize - - (pagestart%client->server->hunksize); - if (rawexpread(pagestart, pagebuf, rdlen, client)) + if (rawexpread_fully(pagestart, pagebuf, rdlen, client)) return -1; memcpy(pagebuf+offset,buf,wrlen) ; if (write(client->difffile, pagebuf, DIFFPAGESIZE) != @@ -773,6 +1288,30 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { } len-=wrlen ; a+=wrlen ; buf+=wrlen ; } + if (client->server->flags & F_SYNC) { + fsync(client->difffile); + } else if (fua) { + /* open question: would it be cheaper to do multiple sync_file_ranges? + as we iterate through the above? + */ + fdatasync(client->difffile); + } + return 0; +} + +int expflush(CLIENT *client) { + gint i; + + if (client->server->flags & F_COPYONWRITE) { + return fsync(client->difffile); + } + + for (i = 0; i < client->export->len; i++) { + FILE_INFO fi = g_array_index(client->export, FILE_INFO, i); + if (fsync(fi.fhandle) < 0) + return -1; + } + return 0; } @@ -781,27 +1320,122 @@ int expwrite(off_t a, char *buf, size_t len, CLIENT *client) { * * @param client The client we're negotiating with. **/ -void negotiate(CLIENT *client) { - char zeros[300]; - u64 size_host; - - memset(zeros, 0, 290); - if (write(client->net, INIT_PASSWD, 8) < 0) - err("Negotiation failed: %m"); - cliserv_magic = htonll(cliserv_magic); - if (write(client->net, &cliserv_magic, sizeof(cliserv_magic)) < 0) - err("Negotiation failed: %m"); +CLIENT* negotiate(int net, CLIENT *client, GArray* servers) { + char zeros[128]; + uint64_t size_host; + uint32_t flags = NBD_FLAG_HAS_FLAGS; + uint16_t smallflags = 0; + uint64_t magic; + + memset(zeros, '\0', sizeof(zeros)); + if(!client || !client->modern) { + /* common */ + if (write(net, INIT_PASSWD, 8) < 0) { + err_nonfatal("Negotiation failed: %m"); + if(client) + exit(EXIT_FAILURE); + } + if(!client || client->modern) { + /* modern */ + magic = htonll(opts_magic); + } else { + /* oldstyle */ + magic = htonll(cliserv_magic); + } + if (write(net, &magic, sizeof(magic)) < 0) { + err_nonfatal("Negotiation failed: %m"); + if(client) + exit(EXIT_FAILURE); + } + } + if(!client) { + /* modern */ + uint32_t reserved; + uint32_t opt; + uint32_t namelen; + char* name; + int i; + + if(!servers) + err("programmer error"); + if (write(net, &smallflags, sizeof(uint16_t)) < 0) + err("Negotiation failed: %m"); + if (read(net, &reserved, sizeof(reserved)) < 0) + err("Negotiation failed: %m"); + if (read(net, &magic, sizeof(magic)) < 0) + err("Negotiation failed: %m"); + magic = ntohll(magic); + if(magic != opts_magic) { + close(net); + return NULL; + } + if (read(net, &opt, sizeof(opt)) < 0) + err("Negotiation failed: %m"); + opt = ntohl(opt); + if(opt != NBD_OPT_EXPORT_NAME) { + close(net); + return NULL; + } + if (read(net, &namelen, sizeof(namelen)) < 0) + err("Negotiation failed: %m"); + namelen = ntohl(namelen); + name = malloc(namelen+1); + name[namelen]=0; + if (read(net, name, namelen) < 0) + err("Negotiation failed: %m"); + for(i=0; ilen; i++) { + SERVER* serve = &(g_array_index(servers, SERVER, i)); + if(!strcmp(serve->servename, name)) { + CLIENT* client = g_new0(CLIENT, 1); + client->server = serve; + client->exportsize = OFFT_MAX; + client->net = net; + client->modern = TRUE; + client->transactionlogfd = -1; + free(name); + return client; + } + } + free(name); + return NULL; + } + /* common */ size_host = htonll((u64)(client->exportsize)); - if (write(client->net, &size_host, 8) < 0) + if (write(net, &size_host, 8) < 0) err("Negotiation failed: %m"); - if (write(client->net, zeros, 128) < 0) + if (client->server->flags & F_READONLY) + flags |= NBD_FLAG_READ_ONLY; + if (client->server->flags & F_FLUSH) + flags |= NBD_FLAG_SEND_FLUSH; + if (client->server->flags & F_FUA) + flags |= NBD_FLAG_SEND_FUA; + if (client->server->flags & F_ROTATIONAL) + flags |= NBD_FLAG_ROTATIONAL; + if (!client->modern) { + /* oldstyle */ + flags = htonl(flags); + if (write(client->net, &flags, 4) < 0) + err("Negotiation failed: %m"); + } else { + /* modern */ + smallflags = (uint16_t)(flags & ~((uint16_t)0)); + smallflags = htons(smallflags); + if (write(client->net, &smallflags, sizeof(smallflags)) < 0) { + err("Negotiation failed: %m"); + } + } + /* common */ + if (write(client->net, zeros, 124) < 0) err("Negotiation failed: %m"); + return NULL; } /** sending macro. */ -#define SEND(net,reply) writeit( net, &reply, sizeof( reply )); +#define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \ + if (client->transactionlogfd != -1) \ + writeit(client->transactionlogfd, &reply, sizeof(reply)); } /** error macro. */ -#define ERROR(client,reply) { reply.error = htonl(-1); SEND(client->net,reply); reply.error = 0; } +#define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; } /** * Serve a file to a single client. * @@ -809,7 +1443,7 @@ void negotiate(CLIENT *client) { * pieces. Preferably with a chainsaw. * * @param client The client we're going to serve to. - * @return never + * @return when the client disconnects **/ int mainloop(CLIENT *client) { struct nbd_request request; @@ -818,123 +1452,218 @@ int mainloop(CLIENT *client) { #ifdef DODBG int i = 0; #endif - negotiate(client); + negotiate(client->net, client, NULL); DEBUG("Entering request loop!\n"); reply.magic = htonl(NBD_REPLY_MAGIC); reply.error = 0; while (go_on) { char buf[BUFSIZE]; + char* p; size_t len; + size_t currlen; + size_t writelen; + uint16_t command; #ifdef DODBG i++; printf("%d: ", i); #endif - if (client->server->timeout) - alarm(client->server->timeout); readit(client->net, &request, sizeof(request)); + if (client->transactionlogfd != -1) + writeit(client->transactionlogfd, &request, sizeof(request)); + request.from = ntohll(request.from); request.type = ntohl(request.type); + command = request.type & NBD_CMD_MASK_COMMAND; + len = ntohl(request.len); + + DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command), + (unsigned long long)request.from, + (unsigned long long)request.from / 512, (unsigned int)len); + + if (request.magic != htonl(NBD_REQUEST_MAGIC)) + err("Not enough magic."); + + memcpy(reply.handle, request.handle, sizeof(reply.handle)); + + if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) { + if ((request.from + len) > (OFFT_MAX)) { + DEBUG("[Number too large!]"); + ERROR(client, reply, EINVAL); + continue; + } + + if (((ssize_t)((off_t)request.from + len) > client->exportsize)) { + DEBUG("[RANGE!]"); + ERROR(client, reply, EINVAL); + continue; + } + + currlen = len; + if (currlen > BUFSIZE - sizeof(struct nbd_reply)) { + currlen = BUFSIZE - sizeof(struct nbd_reply); + msg2(LOG_INFO, "oversized request (this is not a problem)"); + } + } - if (request.type==NBD_CMD_DISC) { + switch (command) { + + case NBD_CMD_DISC: msg2(LOG_INFO, "Disconnect request received."); - if (client->difmap) g_free(client->difmap) ; - if (client->difffile>=0) { + if (client->server->flags & F_COPYONWRITE) { + if (client->difmap) g_free(client->difmap) ; close(client->difffile); unlink(client->difffilename); free(client->difffilename); } go_on=FALSE; continue; - } - - len = ntohl(request.len); - - if (request.magic != htonl(NBD_REQUEST_MAGIC)) - err("Not enough magic."); - if (len > BUFSIZE + sizeof(struct nbd_reply)) - err("Request too big!"); -#ifdef DODBG - printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" : - "READ", (unsigned long long)request.from, - (unsigned long long)request.from / 512, len); -#endif - memcpy(reply.handle, request.handle, sizeof(reply.handle)); - if ((request.from + len) > (OFFT_MAX)) { - DEBUG("[Number too large!]"); - ERROR(client, reply); - continue; - } - if (((ssize_t)((off_t)request.from + len) > client->exportsize) || - ((client->server->flags & F_READONLY) && request.type)) { - DEBUG("[RANGE!]"); - ERROR(client, reply); + case NBD_CMD_WRITE: + DEBUG("wr: net->buf, "); + while(len > 0) { + readit(client->net, buf, currlen); + DEBUG("buf->exp, "); + if ((client->server->flags & F_READONLY) || + (client->server->flags & F_AUTOREADONLY)) { + DEBUG("[WRITE to READONLY!]"); + ERROR(client, reply, EPERM); + continue; + } + if (expwrite(request.from, buf, len, client, + request.type & NBD_CMD_FLAG_FUA)) { + DEBUG("Write failed: %m" ); + ERROR(client, reply, errno); + continue; + } + len -= currlen; + currlen = (len < BUFSIZE) ? len : BUFSIZE; + } + SEND(client->net, reply); + DEBUG("OK!\n"); continue; - } - if (request.type==NBD_CMD_WRITE) { - DEBUG("wr: net->buf, "); - readit(client->net, buf, len); - DEBUG("buf->exp, "); - if ((client->server->flags & F_AUTOREADONLY) || - expwrite(request.from, buf, len, - client)) { - DEBUG("Write failed: %m" ); - ERROR(client, reply); + case NBD_CMD_FLUSH: + DEBUG("fl: "); + if (expflush(client)) { + DEBUG("Flush failed: %m"); + ERROR(client, reply, errno); continue; } SEND(client->net, reply); DEBUG("OK!\n"); continue; - } - /* READ */ - DEBUG("exp->buf, "); - if (expread(request.from, buf + sizeof(struct nbd_reply), len, client)) { - DEBUG("Read failed: %m"); - ERROR(client, reply); + case NBD_CMD_READ: + DEBUG("exp->buf, "); + memcpy(buf, &reply, sizeof(struct nbd_reply)); + if (client->transactionlogfd != -1) + writeit(client->transactionlogfd, &reply, sizeof(reply)); + p = buf + sizeof(struct nbd_reply); + writelen = currlen + sizeof(struct nbd_reply); + while(len > 0) { + if (expread(request.from, p, currlen, client)) { + DEBUG("Read failed: %m"); + ERROR(client, reply, errno); + continue; + } + + DEBUG("buf->net, "); + writeit(client->net, buf, writelen); + len -= currlen; + request.from += currlen; + currlen = (len < BUFSIZE) ? len : BUFSIZE; + p = buf; + writelen = currlen; + } + DEBUG("OK!\n"); + continue; + + default: + DEBUG ("Ignoring unknown command\n"); continue; } - - DEBUG("buf->net, "); - memcpy(buf, &reply, sizeof(struct nbd_reply)); - writeit(client->net, buf, len + sizeof(struct nbd_reply)); - DEBUG("OK!\n"); } return 0; } /** - * Split a single exportfile into multiple ones, if that was asked. - * @return 0 on success, -1 on failure - * @param client information on the client which we want to split + * Set up client export array, which is an array of FILE_INFO. + * Also, split a single exportfile into multiple ones, if that was asked. + * @param client information on the client which we want to setup export for **/ -int splitexport(CLIENT* client) { - off_t i; - int fhandle; +void setupexport(CLIENT* client) { + int i; + off_t laststartoff = 0, lastsize = 0; + int multifile = (client->server->flags & F_MULTIFILE); - client->export = g_array_new(TRUE, TRUE, sizeof(int)); - for (i=0; iexportsize; i+=client->server->hunksize) { + client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO)); + + /* If multi-file, open as many files as we can. + * If not, open exactly one file. + * Calculate file sizes as we go to get total size. */ + for(i=0; ; i++) { + FILE_INFO fi; gchar *tmpname; + gchar* error_string; + mode_t mode = (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR; - if(client->server->flags & F_MULTIFILE) { - tmpname=g_strdup_printf("%s.%d", client->exportname, - (int)(i/client->server->hunksize)); + if(multifile) { + tmpname=g_strdup_printf("%s.%d", client->exportname, i); } else { tmpname=g_strdup(client->exportname); } - DEBUG2( "Opening %s\n", tmpname ); - if((fhandle = open(tmpname, (client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) { - /* Read WRITE ACCESS was requested by media is only read only */ - client->server->flags |= F_AUTOREADONLY; - client->server->flags |= F_READONLY; - if((fhandle = open(tmpname, O_RDONLY)) == -1) - err("Could not open exported file: %m"); + DEBUG( "Opening %s\n", tmpname ); + fi.fhandle = open(tmpname, mode); + if(fi.fhandle == -1 && mode == O_RDWR) { + /* Try again because maybe media was read-only */ + fi.fhandle = open(tmpname, O_RDONLY); + if(fi.fhandle != -1) { + /* Opening the base file in copyonwrite mode is + * okay */ + if(!(client->server->flags & F_COPYONWRITE)) { + client->server->flags |= F_AUTOREADONLY; + client->server->flags |= F_READONLY; + } + } + } + if(fi.fhandle == -1) { + if(multifile && i>0) + break; + error_string=g_strdup_printf( + "Could not open exported file %s: %%m", + tmpname); + err(error_string); } - g_array_insert_val(client->export,i/client->server->hunksize,fhandle); + fi.startoff = laststartoff + lastsize; + g_array_append_val(client->export, fi); g_free(tmpname); + + /* Starting offset and size of this file will be used to + * calculate starting offset of next file */ + laststartoff = fi.startoff; + lastsize = size_autodetect(fi.fhandle); + + if(!multifile) + break; + } + + /* Set export size to total calculated size */ + client->exportsize = laststartoff + lastsize; + + /* Export size may be overridden */ + if(client->server->expected_size) { + /* desired size must be <= total calculated size */ + if(client->server->expected_size > client->exportsize) { + err("Size of exported file is too big\n"); + } + + client->exportsize = client->server->expected_size; + } + + msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize); + if(multifile) { + msg3(LOG_INFO, "Total number of files: %d", i); } - return 0; } int copyonwrite_prepare(CLIENT* client) { @@ -955,6 +1684,25 @@ int copyonwrite_prepare(CLIENT* client) { } /** + * Run a command. This is used for the ``prerun'' and ``postrun'' config file + * options + * + * @param command the command to be ran. Read from the config file + * @param file the file name we're about to export + **/ +int do_run(gchar* command, gchar* file) { + gchar* cmd; + int retval=0; + + if(command && *command) { + cmd = g_strdup_printf(command, file); + retval=system(cmd); + g_free(cmd); + } + return retval; +} + +/** * Serve a connection. * * @todo allow for multithreading, perhaps use libevent. Not just yet, though; @@ -963,22 +1711,19 @@ int copyonwrite_prepare(CLIENT* client) { * @param client a connected client **/ void serveconnection(CLIENT *client) { - splitexport(client); - - if (!client->server->expected_size) { - client->exportsize = size_autodetect(g_array_index(client->export,int,0)); - } else { - /* Perhaps we should check first. Not now. */ - client->exportsize = client->server->expected_size; - } - if (client->exportsize > OFFT_MAX) { - /* uhm, well... In a parallel universe, this *might* be - * possible... */ - err("Size of exported file is too big\n"); + if (client->server->transactionlog && (client->transactionlogfd == -1)) + { + if (-1 == (client->transactionlogfd = open(client->server->transactionlog, + O_WRONLY | O_CREAT, + S_IRUSR | S_IWUSR))) + g_warning("Could not open transaction log %s", + client->server->transactionlog); } - else { - msg3(LOG_INFO, "size of exported file/device is %Lu", (unsigned long long)client->exportsize); + + if(do_run(client->server->prerun, client->exportname)) { + exit(EXIT_FAILURE); } + setupexport(client); if (client->server->flags & F_COPYONWRITE) { copyonwrite_prepare(client); @@ -987,12 +1732,20 @@ void serveconnection(CLIENT *client) { setmysockopt(client->net); mainloop(client); + do_run(client->server->postrun, client->exportname); + + if (-1 != client->transactionlogfd) + { + close(client->transactionlogfd); + client->transactionlogfd = -1; + } } /** * Find the name of the file we have to serve. This will use g_strdup_printf * to put the IP address of the client inside a filename containing - * "%s". That name is then written to client->exportname. + * "%s" (in the form as specified by the "virtstyle" option). That name + * is then written to client->exportname. * * @param net A socket connected to an nbd client * @param client information about the client. The IP address in human-readable @@ -1000,140 +1753,108 @@ void serveconnection(CLIENT *client) { * stored in client->clientname. **/ void set_peername(int net, CLIENT *client) { - struct sockaddr_in addrin; - int addrinlen = sizeof( addrin ); - char *peername ; + struct sockaddr_storage addrin; + struct sockaddr_storage netaddr; + struct sockaddr_in *netaddr4 = NULL; + struct sockaddr_in6 *netaddr6 = NULL; + size_t addrinlen = sizeof( addrin ); + struct addrinfo hints; + struct addrinfo *ai = NULL; + char peername[NI_MAXHOST]; + char netname[NI_MAXHOST]; + char *tmp = NULL; + int i; + int e; + int shift; if (getpeername(net, (struct sockaddr *) &addrin, (socklen_t *)&addrinlen) < 0) err("getsockname failed: %m"); - peername = inet_ntoa(addrin.sin_addr); - client->exportname=g_strdup_printf(client->server->exportname, peername); - - msg4(LOG_INFO, "connect from %s, assigned file is %s", - peername, client->exportname); - client->clientname=g_strdup(peername); -} -/** - * Destroy a pid_t* - * @param data a pointer to pid_t which should be freed - **/ -void destroy_pid_t(gpointer data) { - g_free(data); -} + getnameinfo((struct sockaddr *)&addrin, (socklen_t)addrinlen, + peername, sizeof (peername), NULL, 0, NI_NUMERICHOST); -/** - * Go daemon (unless we specified at compile time that we didn't want this) - * @param serve the first server of our configuration. If its port is zero, - * then do not daemonize, because we're doing inetd then. This parameter - * is only used to create a PID file of the form - * /var/run/nbd-server.<port>.pid; it's not modified in any way. - **/ -#if !defined(NODAEMON) && !defined(NOFORK) -void daemonize(SERVER* serve) { - FILE*pidf; + memset(&hints, '\0', sizeof (hints)); + hints.ai_flags = AI_ADDRCONFIG; + e = getaddrinfo(peername, NULL, &hints, &ai); - if(daemon(0,0)<0) { - err("daemon"); + if(e != 0) { + fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e)); + freeaddrinfo(ai); + return; } - if(serve) { - snprintf(pidfname, sizeof(char)*255, "/var/run/nbd-server.%d.pid", serve->port); - } else { - strncpy(pidfname, "/var/run/nbd-server.pid", sizeof(char)*255); - } - pidf=fopen(pidfname, "w"); - if(pidf) { - fprintf(pidf,"%d\n", (int)getpid()); - fclose(pidf); - } else { - perror("fopen"); - fprintf(stderr, "Not fatal; continuing"); - } -} -#else -#define daemonize(serve) -#endif /* !defined(NODAEMON) && !defined(NOFORK) */ -/** - * Connect a server's socket. - * - * @param serve the server we want to connect. - **/ -void setup_serve(SERVER *serve) { - struct sockaddr_in addrin; - struct sigaction sa; - int addrinlen = sizeof(addrin); - int sock_flags; -#ifndef sun - int yes=1; -#else - char yes='1'; -#endif /* sun */ - if ((serve->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) - err("socket: %m"); + switch(client->server->virtstyle) { + case VIRT_NONE: + client->exportname=g_strdup(client->server->exportname); + break; + case VIRT_IPHASH: + for(i=0;iexportname=g_strdup_printf(client->server->exportname, peername); + break; + case VIRT_CIDR: + memcpy(&netaddr, &addrin, addrinlen); + if(ai->ai_family == AF_INET) { + netaddr4 = (struct sockaddr_in *)&netaddr; + (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen); + (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen); + + getnameinfo((struct sockaddr *) netaddr4, (socklen_t) addrinlen, + netname, sizeof (netname), NULL, 0, NI_NUMERICHOST); + tmp=g_strdup_printf("%s/%s", netname, peername); + }else if(ai->ai_family == AF_INET6) { + netaddr6 = (struct sockaddr_in6 *)&netaddr; + + shift = 128-(client->server->cidrlen); + i = 3; + while(shift >= 32) { + ((netaddr6->sin6_addr).s6_addr32[i])=0; + shift-=32; + i--; + } + (netaddr6->sin6_addr).s6_addr32[i]>>=shift; + (netaddr6->sin6_addr).s6_addr32[i]<<=shift; - /* lose the pesky "Address already in use" error message */ - if (setsockopt(serve->socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { - err("setsockopt SO_REUSEADDR"); - } - if (setsockopt(serve->socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) { - err("setsockopt SO_KEEPALIVE"); - } + getnameinfo((struct sockaddr *)netaddr6, (socklen_t)addrinlen, + netname, sizeof(netname), NULL, 0, NI_NUMERICHOST); + tmp=g_strdup_printf("%s/%s", netname, peername); + } - /* make the listening socket non-blocking */ - if ((sock_flags = fcntl(serve->socket, F_GETFL, 0)) == -1) { - err("fcntl F_GETFL"); - } - if (fcntl(serve->socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) { - err("fcntl F_SETFL O_NONBLOCK"); + if(tmp != NULL) + client->exportname=g_strdup_printf(client->server->exportname, tmp); + + break; } - DEBUG("Waiting for connections... bind, "); - addrin.sin_family = AF_INET; - addrin.sin_port = htons(serve->port); - addrin.sin_addr.s_addr = 0; - if (bind(serve->socket, (struct sockaddr *) &addrin, addrinlen) < 0) - err("bind: %m"); - DEBUG("listen, "); - if (listen(serve->socket, 1) < 0) - err("listen: %m"); - sa.sa_handler = sigchld_handler; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_RESTART; - if(sigaction(SIGCHLD, &sa, NULL) == -1) - err("sigaction: %m"); - sa.sa_handler = sigterm_handler; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_RESTART; - if(sigaction(SIGTERM, &sa, NULL) == -1) - err("sigaction: %m"); - children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t); + freeaddrinfo(ai); + msg4(LOG_INFO, "connect from %s, assigned file is %s", + peername, client->exportname); + client->clientname=g_strdup(peername); } /** - * Connect our servers. + * Destroy a pid_t* + * @param data a pointer to pid_t which should be freed **/ -void setup_servers(GArray* servers) { - int i; - - for(i=0;ilen;i++) { - setup_serve(&(g_array_index(servers, SERVER, i))); - } +void destroy_pid_t(gpointer data) { + g_free(data); } /** - * Loop through the available servers, and serve them. + * Loop through the available servers, and serve them. Never returns. **/ int serveloop(GArray* servers) { - struct sockaddr_in addrin; + struct sockaddr_storage addrin; socklen_t addrinlen=sizeof(addrin); - SERVER *serve; int i; int max; int sock; fd_set mset; fd_set rset; - struct timeval tv; /* * Set up the master fd_set. The set of descriptors we need @@ -1145,39 +1866,76 @@ int serveloop(GArray* servers) { max=0; FD_ZERO(&mset); for(i=0;ilen;i++) { - sock=(g_array_index(servers, SERVER, i)).socket; - FD_SET(sock, &mset); - max=sock>max?sock:max; + if((sock=(g_array_index(servers, SERVER, i)).socket)) { + FD_SET(sock, &mset); + max=sock>max?sock:max; + } + } + if(modernsock) { + FD_SET(modernsock, &mset); + max=modernsock>max?modernsock:max; } for(;;) { - CLIENT *client; - int net; + CLIENT *client = NULL; pid_t *pid; memcpy(&rset, &mset, sizeof(fd_set)); - tv.tv_sec=0; - tv.tv_usec=500; - if(select(max+1, &rset, NULL, NULL, &tv)>0) { + if(select(max+1, &rset, NULL, NULL, NULL)>0) { + int net = 0; + SERVER* serve=NULL; + DEBUG("accept, "); - for(i=0;ilen;i++) { + if(FD_ISSET(modernsock, &rset)) { + if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0) + err("accept: %m"); + client = negotiate(net, NULL, servers); + if(!client) { + err_nonfatal("negotiation failed"); + close(net); + net=0; + continue; + } + serve = client->server; + } + for(i=0;ilen && !net;i++) { serve=&(g_array_index(servers, SERVER, i)); if(FD_ISSET(serve->socket, &rset)) { if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0) err("accept: %m"); - - client = g_malloc(sizeof(CLIENT)); + } + } + if(net) { + int sock_flags; + + if(serve->max_connections > 0 && + g_hash_table_size(children) >= serve->max_connections) { + msg2(LOG_INFO, "Max connections reached"); + close(net); + continue; + } + if((sock_flags = fcntl(net, F_GETFL, 0))==-1) { + err("fcntl F_GETFL"); + } + if(fcntl(net, F_SETFL, sock_flags &~O_NONBLOCK)==-1) { + err("fcntl F_SETFL ~O_NONBLOCK"); + } + if(!client) { + client = g_new0(CLIENT, 1); client->server=serve; client->exportsize=OFFT_MAX; client->net=net; - set_peername(net, client); - if (!authorized_client(client)) { - msg2(LOG_INFO,"Unauthorized client") ; - close(net); - continue; - } - msg2(LOG_INFO,"Authorized client") ; - pid=g_malloc(sizeof(pid_t)); -#ifndef NOFORK + client->transactionlogfd = -1; + } + set_peername(net, client); + if (!authorized_client(client)) { + msg2(LOG_INFO,"Unauthorized client") ; + close(net); + continue; + } + msg2(LOG_INFO,"Authorized client") ; + pid=g_malloc(sizeof(pid_t)); + + if (!dontfork) { if ((*pid=fork())<0) { msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ; close(net); @@ -1190,25 +1948,289 @@ int serveloop(GArray* servers) { } /* child */ g_hash_table_destroy(children); - for(i=0;ilen,serve=(g_array_index(servers, SERVER*, i));i++) { + for(i=0;ilen;i++) { + serve=&g_array_index(servers, SERVER, i); close(serve->socket); } /* FALSE does not free the - actual data. This is required, - because the client has a - direct reference into that - data, and otherwise we get a - segfault... */ + actual data. This is required, + because the client has a + direct reference into that + data, and otherwise we get a + segfault... */ g_array_free(servers, FALSE); -#endif // NOFORK - msg2(LOG_INFO,"Starting to serve"); - serveconnection(client); } + + msg2(LOG_INFO,"Starting to serve"); + serveconnection(client); + exit(EXIT_SUCCESS); } } } } +void dosockopts(int socket) { +#ifndef sun + int yes=1; +#else + char yes='1'; +#endif /* sun */ + int sock_flags; + + /* lose the pesky "Address already in use" error message */ + if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { + err("setsockopt SO_REUSEADDR"); + } + if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) { + err("setsockopt SO_KEEPALIVE"); + } + + /* make the listening socket non-blocking */ + if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) { + err("fcntl F_GETFL"); + } + if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) { + err("fcntl F_SETFL O_NONBLOCK"); + } +} + +/** + * Connect a server's socket. + * + * @param serve the server we want to connect. + **/ +int setup_serve(SERVER *serve) { + struct addrinfo hints; + struct addrinfo *ai = NULL; + gchar *port = NULL; + int e; + + if(!do_oldstyle) { + return serve->servename ? 1 : 0; + } + memset(&hints,'\0',sizeof(hints)); + hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV; + hints.ai_socktype = SOCK_STREAM; + hints.ai_family = serve->socket_family; + + port = g_strdup_printf ("%d", serve->port); + if (port == NULL) + return 0; + + e = getaddrinfo(serve->listenaddr,port,&hints,&ai); + + g_free(port); + + if(e != 0) { + fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e)); + serve->socket = -1; + freeaddrinfo(ai); + exit(EXIT_FAILURE); + } + + if(serve->socket_family == AF_UNSPEC) + serve->socket_family = ai->ai_family; + +#ifdef WITH_SDP + if ((serve->flags) && F_SDP) { + if (ai->ai_family == AF_INET) + ai->ai_family = AF_INET_SDP; + else (ai->ai_family == AF_INET6) + ai->ai_family = AF_INET6_SDP; + } +#endif + if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0) + err("socket: %m"); + + dosockopts(serve->socket); + + DEBUG("Waiting for connections... bind, "); + e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen); + if (e != 0 && errno != EADDRINUSE) + err("bind: %m"); + DEBUG("listen, "); + if (listen(serve->socket, 1) < 0) + err("listen: %m"); + + freeaddrinfo (ai); + if(serve->servename) { + return 1; + } else { + return 0; + } +} + +void open_modern(void) { + struct addrinfo hints; + struct addrinfo* ai = NULL; + struct sock_flags; + int e; + + memset(&hints, '\0', sizeof(hints)); + hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; + hints.ai_socktype = SOCK_STREAM; + hints.ai_family = AF_UNSPEC; + hints.ai_protocol = IPPROTO_TCP; + e = getaddrinfo(modern_listen, NBD_DEFAULT_PORT, &hints, &ai); + if(e != 0) { + fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e)); + exit(EXIT_FAILURE); + } + if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) { + err("socket: %m"); + } + + dosockopts(modernsock); + + if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) { + err("bind: %m"); + } + if(listen(modernsock, 10) <0) { + err("listen: %m"); + } + + freeaddrinfo(ai); +} + +/** + * Connect our servers. + **/ +void setup_servers(GArray* servers) { + int i; + struct sigaction sa; + int want_modern=0; + + for(i=0;ilen;i++) { + want_modern |= setup_serve(&(g_array_index(servers, SERVER, i))); + } + if(want_modern) { + open_modern(); + } + children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t); + + sa.sa_handler = sigchld_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + if(sigaction(SIGCHLD, &sa, NULL) == -1) + err("sigaction: %m"); + sa.sa_handler = sigterm_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + if(sigaction(SIGTERM, &sa, NULL) == -1) + err("sigaction: %m"); +} + +/** + * Go daemon (unless we specified at compile time that we didn't want this) + * @param serve the first server of our configuration. If its port is zero, + * then do not daemonize, because we're doing inetd then. This parameter + * is only used to create a PID file of the form + * /var/run/nbd-server.<port>.pid; it's not modified in any way. + **/ +#if !defined(NODAEMON) +void daemonize(SERVER* serve) { + FILE*pidf; + + if(serve && !(serve->port)) { + return; + } + if(daemon(0,0)<0) { + err("daemon"); + } + if(!*pidftemplate) { + if(serve) { + strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255); + } else { + strncpy(pidftemplate, "/var/run/nbd-server.pid", 255); + } + } + snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0); + pidf=fopen(pidfname, "w"); + if(pidf) { + fprintf(pidf,"%d\n", (int)getpid()); + fclose(pidf); + } else { + perror("fopen"); + fprintf(stderr, "Not fatal; continuing"); + } +} +#else +#define daemonize(serve) +#endif /* !defined(NODAEMON) */ + +/* + * Everything beyond this point (in the file) is run in non-daemon mode. + * The stuff above daemonize() isn't. + */ + +void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN; + +void serve_err(SERVER* serve, const char* msg) { + g_message("Export of %s on port %d failed:", serve->exportname, + serve->port); + err(msg); +} + +/** + * Set up user-ID and/or group-ID + **/ +void dousers(void) { + struct passwd *pw; + struct group *gr; + gchar* str; + if(rungroup) { + gr=getgrnam(rungroup); + if(!gr) { + str = g_strdup_printf("Invalid group name: %s", rungroup); + err(str); + } + if(setgid(gr->gr_gid)<0) { + err("Could not set GID: %m"); + } + } + if(runuser) { + pw=getpwnam(runuser); + if(!pw) { + str = g_strdup_printf("Invalid user name: %s", runuser); + err(str); + } + if(setuid(pw->pw_uid)<0) { + err("Could not set UID: %m"); + } + } +} + +#ifndef ISSERVER +void glib_message_syslog_redirect(const gchar *log_domain, + GLogLevelFlags log_level, + const gchar *message, + gpointer user_data) +{ + int level=LOG_DEBUG; + + switch( log_level ) + { + case G_LOG_FLAG_FATAL: + case G_LOG_LEVEL_CRITICAL: + case G_LOG_LEVEL_ERROR: + level=LOG_ERR; + break; + case G_LOG_LEVEL_WARNING: + level=LOG_WARNING; + break; + case G_LOG_LEVEL_MESSAGE: + case G_LOG_LEVEL_INFO: + level=LOG_INFO; + break; + case G_LOG_LEVEL_DEBUG: + level=LOG_DEBUG; + default: + level=LOG_ERR; + } + syslog(level, "%s", message); +} +#endif + /** * Main entry point... **/ @@ -1219,49 +2241,64 @@ int main(int argc, char *argv[]) { if (sizeof( struct nbd_request )!=28) { fprintf(stderr,"Bad size of structure. Alignment problems?\n"); - exit(-1) ; + exit(EXIT_FAILURE) ; } + memset(pidftemplate, '\0', 256); + logging(); config_file_pos = g_strdup(CFILE); serve=cmdline(argc, argv); servers = parse_cfile(config_file_pos, &err); + + if(serve) { + serve->socket_family = AF_UNSPEC; + + append_serve(serve, servers); + + if (!(serve->port)) { + CLIENT *client; +#ifndef ISSERVER + /* You really should define ISSERVER if you're going to use + * inetd mode, but if you don't, closing stdout and stderr + * (which inetd had connected to the client socket) will let it + * work. */ + close(1); + close(2); + open("/dev/null", O_WRONLY); + open("/dev/null", O_WRONLY); + g_log_set_default_handler( glib_message_syslog_redirect, NULL ); +#endif + client=g_malloc(sizeof(CLIENT)); + client->server=serve; + client->net=0; + client->exportsize=OFFT_MAX; + set_peername(0,client); + serveconnection(client); + return 0; + } + } + if(!servers || !servers->len) { - g_warning("Could not parse config file: %s", err->message); + if(err && !(err->domain == g_quark_from_string("parse_cfile") + && err->code == CFILE_NOTFOUND)) { + g_warning("Could not parse config file: %s", + err ? err->message : "Unknown error"); + } } if(serve) { - g_array_append_val(servers, *serve); + g_warning("Specifying an export on the command line is deprecated."); + g_warning("Please use a configuration file instead."); } -/* We don't support this at this time */ -#if 0 - if (!(serve->port)) { - CLIENT *client; -#ifndef ISSERVER - /* You really should define ISSERVER if you're going to use - * inetd mode, but if you don't, closing stdout and stderr - * (which inetd had connected to the client socket) will let it - * work. */ - close(1); - close(2); - open("/dev/null", O_WRONLY); - open("/dev/null", O_WRONLY); -#endif - client=g_malloc(sizeof(CLIENT)); - client->server=serve; - client->net=0; - client->exportsize=OFFT_MAX; - set_peername(0,client); - serveconnection(client); - return 0; - } -#endif if((!serve) && (!servers||!servers->len)) { - g_message("Nothing to do! Bye!"); + g_message("No configured exports; quitting."); exit(EXIT_FAILURE); } - daemonize(serve); + if (!dontfork) + daemonize(serve); setup_servers(servers); + dousers(); serveloop(servers); return 0 ; }