2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8 * Version 1.0 - hopefully 64-bit-clean
9 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
10 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
11 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
12 * type, or don't have 64 bit file offsets by defining FS_32BIT
13 * in compile options for nbd-server *only*. This can be done
14 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
15 * original autoconf input file, or I would make it a configure
16 * option.) Ken Yap <ken@nlc.net.au>.
17 * Version 1.6 - fix autodetection of block device size and really make 64 bit
18 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
19 * Version 2.0 - Version synchronised with client
20 * Version 2.1 - Reap zombie client processes when they exit. Removed
21 * (uncommented) the _IO magic, it's no longer necessary. Wouter
22 * Verhelst <wouter@debian.org>
23 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
24 * Version 2.3 - Fixed code so that Large File Support works. This
25 * removes the FS_32BIT compile-time directive; define
26 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
27 * using FS_32BIT. This will allow you to use files >2GB instead of
28 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
29 * Version 2.4 - Added code to keep track of children, so that we can
30 * properly kill them from initscripts. Add a call to daemon(),
31 * so that processes don't think they have to wait for us, which is
32 * interesting for initscripts as well. Wouter Verhelst
36 #define VERSION PACKAGE_VERSION
37 #define GIGA (1*1024*1024*1024)
39 #include <sys/types.h>
40 #include <sys/socket.h>
42 #include <sys/wait.h> /* wait */
43 #include <signal.h> /* sigaction */
44 #include <netinet/tcp.h>
45 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
46 #include <netdb.h> /* hostent, gethostby*, getservby* */
53 #include <arpa/inet.h>
58 #define MY_NAME "nbd_server"
60 /* Authorization file should contain lines with IP addresses of
61 clients authorized to use the server. If it does not exist,
64 You may want to set this to an absolute path if you're not using
65 -DNODAEMON, since if you don't, nbd-server will look for this file
66 in the root-directory ("/"). */
67 #define AUTH_FILE "nbd_server.allow"
68 /* how much space for child PIDs we have by default. Dynamically
69 allocated, and will be realloc()ed if out of space, so this should
70 probably be fair for most situations. */
71 #define DEFAULT_CHILD_ARRAY 256
75 /* Deep magic: ioctl.h defines _IO macro (at least on linux) */
78 /* Debugging macros, now nothing goes to syslog unless you say ISSERVER */
80 #define msg2(a,b) syslog(a,b)
81 #define msg3(a,b,c) syslog(a,b,c)
82 #define msg4(a,b,c,d) syslog(a,b,c,d)
84 #define msg2(a,b) do { fprintf(stderr,b) ; fputs("\n",stderr) ; } while(0)
85 #define msg3(a,b,c) do { fprintf(stderr,b,c); fputs("\n",stderr) ; } while(0)
86 #define msg4(a,b,c,d) do { fprintf(stderr,b,c,d); fputs("\n",stderr) ; } while(0)
90 #include <sys/ioctl.h>
91 #include <sys/mount.h> /* For BLKGETSIZE */
95 #define DEBUG( a ) printf( a )
96 #define DEBUG2( a,b ) printf( a,b )
97 #define DEBUG3( a,b,c ) printf( a,b,c )
100 #define DEBUG2( a,b )
101 #define DEBUG3( a,b,c )
104 void serveconnection(int net);
105 void set_peername(int net,char *clientname);
108 char difffilename[256];
109 unsigned int timeout = 0;
110 int autoreadonly = 0;
112 int authorized_client(char *name)
113 /* 0 - authorization refused, 1 - OK
114 authorization file contains one line per machine, no wildcards
121 if ((f=fopen(AUTH_FILE,"r"))==NULL) {
122 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
123 AUTH_FILE,strerror(errno)) ;
127 while (fgets(line,LINELEN,f)!=NULL) {
128 if (strncmp(line,name,strlen(name))==0) {
137 inline void readit(int f, void *buf, int len)
142 if ((res = read(f, buf, len)) <= 0)
143 err("Read failed: %m");
149 inline void writeit(int f, void *buf, int len)
154 if ((res = send(f, buf, len, 0)) <= 0)
155 err("Send failed: %m");
161 /* This is starting to get ugly. If someone knows a better way to find
162 * the maximum value of a signed type *without* relying on overflow
163 * (doing so breaks on 64bit architectures), that would be nice.
165 #define OFFT_MAX (((((off_t)1)<<((sizeof(off_t)-1)*8))-1)<<7)+127
166 int port; /* Port I'm listening at */
167 char *exportname; /* File I'm exporting */
168 off_t exportsize = OFFT_MAX; /* ...and its length */
169 off_t hunksize = OFFT_MAX;
173 u32 difffilelen=0 ; /* number of pages in difffile */
175 char clientname[256] ;
176 int child_arraysize=DEFAULT_CHILD_ARRAY;
180 #define DIFFPAGESIZE 4096 /* diff file uses those chunks */
183 #define F_MULTIFILE 2
184 #define F_COPYONWRITE 4
186 void cmdline(int argc, char *argv[])
191 printf("This is nbd-server version " VERSION "\n");
192 printf("Usage: port file_to_export [size][kKmM] [-r] [-m] [-c] [-a timeout_sec]\n"
194 " -m multiple file\n"
195 " -c copy on write\n"
196 " -a maximum idle seconds, terminates when idle time exceeded\n"
197 " if port is set to 0, stdin is used (for running from inetd)\n"
198 " if file_to_export contains '%%s', it is substituted with IP\n"
199 " address of machine trying to connect\n" );
202 port = atoi(argv[1]);
203 for (i = 3; i < argc; i++) {
204 if (*argv[i] == '-') {
205 switch (argv[i][1]) {
210 flags |= F_MULTIFILE;
213 case 'c': flags |=F_COPYONWRITE;
217 timeout = atoi(argv[i+1]);
220 fprintf(stderr, "timeout requires argument\n");
226 int last = strlen(argv[i])-1;
227 char suffix = argv[i][last];
228 if (suffix == 'k' || suffix == 'K' ||
229 suffix == 'm' || suffix == 'M')
230 argv[i][last] = '\0';
231 es = (off_t)atol(argv[i]);
243 exportname = argv[2];
246 void sigchld_handler(int s)
252 while((pid=wait(status)) > 0) {
253 if(WIFEXITED(status)) {
254 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
256 for(i=0;children[i]!=pid&&i<child_arraysize;i++);
257 if(i>=child_arraysize) {
258 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld",(long) pid);
260 children[i]=(pid_t)0;
261 DEBUG2("Removing %d from the list of children", pid);
266 /* If we are terminated, make sure our children are, too. */
267 void sigterm_handler(int s) {
271 for(i=0;i<child_arraysize;i++) {
273 kill(children[i], s);
285 void connectme(int port)
287 struct sockaddr_in addrin;
289 int addrinlen = sizeof(addrin);
290 int net, sock, newpid, i;
304 snprintf(pidfname, sizeof(char)*255, "/var/run/nbd-server.%d.pid", port);
305 pidf=fopen(pidfname, "w");
307 fprintf(pidf,"%d", (int)getpid());
310 fprintf(stderr, "Not fatal; continuing");
314 #endif /* NODAEMON */
316 if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
319 /* lose the pesky "Address already in use" error message */
320 if (setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
324 DEBUG("Waiting for connections... bind, ");
325 addrin.sin_family = AF_INET;
326 addrin.sin_port = htons(port);
327 addrin.sin_addr.s_addr = 0;
328 if (bind(sock, (struct sockaddr *) &addrin, addrinlen) < 0)
331 if (listen(sock, 1) < 0)
334 sa.sa_handler = sigchld_handler;
335 sigemptyset(&sa.sa_mask);
336 sa.sa_flags = SA_RESTART;
337 if(sigaction(SIGCHLD, &sa, NULL) == -1)
338 err("sigaction: %m");
339 sa.sa_handler = sigterm_handler;
340 sigemptyset(&sa.sa_mask);
341 sa.sa_flags = SA_RESTART;
342 if(sigaction(SIGTERM, &sa, NULL) == -1)
343 err("sigaction: %m");
344 children=malloc(sizeof(pid_t)*child_arraysize);
345 memset(children, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
346 for(;;) { /* infinite loop */
347 if ((net = accept(sock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
350 set_peername(net,clientname) ;
351 if (!authorized_client(clientname)) {
352 msg2(LOG_INFO,"Unauthorized client") ;
356 msg2(LOG_INFO,"Authorized client") ;
357 for(i=0;children[i]&&i<child_arraysize;i++);
358 if(i>=child_arraysize) {
359 realloc(children, sizeof(pid_t)*child_arraysize);
360 memset(children+child_arraysize, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
362 child_arraysize+=DEFAULT_CHILD_ARRAY;
365 if ((children[i]=fork())<0) {
366 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
370 if (children[i]>0) { /* parent */
371 close(net) ; continue ; }
376 msg2(LOG_INFO,"Starting to serve") ;
377 serveconnection(net) ;
381 #define SEND writeit( net, &reply, sizeof( reply ));
382 #define ERROR { reply.error = htonl(-1); SEND; reply.error = 0; lastpoint = -1; }
384 off_t lastpoint = (off_t)-1;
386 void maybeseek(int handle, off_t a)
389 err("Can not happen\n");
390 if (lastpoint != a) {
391 if (lseek(handle, a, SEEK_SET) < 0)
392 err("Can not seek locally!\n");
399 void myseek(int handle,off_t a)
401 if (lseek(handle, a, SEEK_SET) < 0)
402 err("Can not seek locally!\n");
405 char pagebuf[DIFFPAGESIZE];
407 int rawexpread(off_t a, char *buf, int len)
409 maybeseek(export[a/hunksize], a%hunksize);
410 return (read(export[a/hunksize], buf, len) != len);
413 int expread(off_t a, char *buf, int len)
416 off_t mapcnt, mapl, maph, pagestart;
418 if (!(flags & F_COPYONWRITE))
419 return rawexpread(a, buf, len);
420 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
422 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
424 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
425 pagestart=mapcnt*DIFFPAGESIZE;
427 rdlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset;
428 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
429 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
430 (unsigned long)difmap[mapcnt]);
431 myseek(difffile, difmap[mapcnt]*DIFFPAGESIZE+offset);
432 if (read(difffile, buf, rdlen) != rdlen) return -1;
433 } else { /* the block is not there */
434 DEBUG2("Page %Lu is not here, we read the original one\n",
435 (unsigned long long)mapcnt);
436 return rawexpread(a, buf, rdlen);
438 len-=rdlen; a+=rdlen; buf+=rdlen;
443 int rawexpwrite(off_t a, char *buf, int len)
445 maybeseek(export[a/hunksize], a%hunksize);
446 return (write(export[a/hunksize], buf, len) != len);
450 int expwrite(off_t a, char *buf, int len)
452 u32 mapcnt,mapl,maph ; int wrlen,rdlen ;
453 off_t pagestart ; int offset ;
455 if (!(flags & F_COPYONWRITE))
456 return(rawexpwrite(a,buf,len));
457 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
459 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
461 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
462 pagestart=mapcnt*DIFFPAGESIZE ;
464 wrlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset ;
466 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
467 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
468 (unsigned long)difmap[mapcnt]) ;
469 myseek(difffile,difmap[mapcnt]*DIFFPAGESIZE+offset) ;
470 if (write(difffile, buf, wrlen) != wrlen) return -1 ;
471 } else { /* the block is not there */
472 myseek(difffile,difffilelen*DIFFPAGESIZE) ;
473 difmap[mapcnt]=difffilelen++ ;
474 DEBUG3("Page %Lu is not here, we put it at %lu\n",
475 (unsigned long long)mapcnt,
476 (unsigned long)difmap[mapcnt]);
478 if (rdlen+pagestart%hunksize>hunksize)
479 rdlen=hunksize-(pagestart%hunksize) ;
480 if (rawexpread(pagestart,pagebuf,rdlen)) return -1 ;
481 memcpy(pagebuf+offset,buf,wrlen) ;
482 if (write(difffile,pagebuf,DIFFPAGESIZE)!=DIFFPAGESIZE) return -1 ;
484 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
489 int mainloop(int net)
491 struct nbd_request request;
492 struct nbd_reply reply;
497 memset(zeros, 0, 290);
498 if (write(net, INIT_PASSWD, 8) < 0)
499 err("Negotiation failed: %m");
500 cliserv_magic = htonll(cliserv_magic);
501 if (write(net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
502 err("Negotiation failed: %m");
503 size_host = htonll(exportsize);
504 if (write(net, &size_host, 8) < 0)
505 err("Negotiation failed: %m");
506 if (write(net, zeros, 128) < 0)
507 err("Negotiation failed: %m");
509 DEBUG("Entering request loop!\n");
510 reply.magic = htonl(NBD_REPLY_MAGIC);
513 #define BUFSIZE (1024*1024)
523 readit(net, &request, sizeof(request));
524 request.from = ntohll(request.from);
525 request.type = ntohl(request.type);
527 if (request.type==2) { /* Disconnect request */
528 if (difmap) free(difmap) ;
530 close(difffile) ; unlink(difffilename) ; }
531 err("Disconnect request received.") ;
534 len = ntohl(request.len);
536 if (request.magic != htonl(NBD_REQUEST_MAGIC))
537 err("Not enough magic.");
539 err("Request too big!");
541 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
542 "READ", (unsigned long long)request.from,
543 (unsigned long long)request.from / 512, len);
545 memcpy(reply.handle, request.handle, sizeof(reply.handle));
546 if ((request.from + len) > (OFFT_MAX)) {
547 DEBUG("[Number too large!]");
551 if ((((off_t)request.from + len) > exportsize) ||
552 ((flags & F_READONLY) && request.type)) {
557 if (request.type==1) { /* WRITE */
558 DEBUG("wr: net->buf, ");
559 readit(net, buf, len);
561 if ((autoreadonly == 1) || expwrite(request.from, buf, len)) {
562 DEBUG("Write failed: %m" );
574 if (expread(request.from, buf + sizeof(struct nbd_reply), len)) {
576 DEBUG("Read failed: %m");
583 memcpy(buf, &reply, sizeof(struct nbd_reply));
584 writeit(net, buf, len + sizeof(struct nbd_reply));
589 char exportname2[1024];
591 void set_peername(int net,char *clientname)
593 struct sockaddr_in addrin;
594 int addrinlen = sizeof( addrin );
597 if (getpeername( net, (struct sockaddr *) &addrin, &addrinlen ) < 0)
598 err("getsockname failed: %m");
599 peername = inet_ntoa(addrin.sin_addr);
600 sprintf(exportname2, exportname, peername);
602 msg4(LOG_INFO, "connect from %s, assigned file is %s", peername, exportname2);
603 strncpy(clientname,peername,255) ;
606 off_t size_autodetect(int export)
610 struct stat stat_buf;
613 DEBUG("looking for export size with lseek SEEK_END\n");
614 es = lseek(export, (off_t)0, SEEK_END);
615 if (es > ((off_t)0)) {
618 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
621 DEBUG("looking for export size with fstat\n");
622 stat_buf.st_size = 0;
623 error = fstat(export, &stat_buf);
624 if (!error && stat_buf.st_size > 0) {
625 return (off_t)stat_buf.st_size;
627 err("fstat failed: %m");
631 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
632 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
633 es = (off_t)es32 * (off_t)512;
637 err("Could not find size of exported block device: %m");
641 int main(int argc, char *argv[])
646 if (sizeof( struct nbd_request )!=28) {
647 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
653 if (!port) return 1 ;
654 connectme(port); /* serve infinitely */
659 void serveconnection(int net)
663 for (i=0; i<exportsize; i+=hunksize) {
664 char exportname3[1024];
666 sprintf(exportname3, exportname2, i/hunksize);
667 printf( "Opening %s\n", exportname3 );
668 if ((export[i/hunksize] = open(exportname3, (flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
669 /* Read WRITE ACCESS was requested by media is only read only */
672 if ((export[i/hunksize] = open(exportname3, O_RDONLY)) == -1)
673 err("Could not open exported file: %m");
677 if (exportsize == (off_t)OFFT_MAX) {
678 exportsize = size_autodetect(export[0]);
680 if (exportsize > (off_t)OFFT_MAX) {
681 err("Size of exported file is too big\n");
684 msg3(LOG_INFO, "size of exported file/device is %Lu",
685 (unsigned long long)exportsize);
687 if (flags & F_COPYONWRITE) {
688 sprintf(difffilename,"%s-%s-%d.diff",exportname2,clientname,
690 msg3(LOG_INFO,"About to create map and diff file %s",difffilename) ;
691 difffile=open(difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
692 if (difffile<0) err("Could not create diff file (%m)") ;
693 if ((difmap=calloc(exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
694 err("Could not allocate memory") ;
695 for (i=0;i<exportsize/DIFFPAGESIZE;i++) difmap[i]=(u32)-1 ;