2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8 * Version 1.0 - hopefully 64-bit-clean
9 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
10 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
11 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
12 * type, or don't have 64 bit file offsets by defining FS_32BIT
13 * in compile options for nbd-server *only*. This can be done
14 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
15 * original autoconf input file, or I would make it a configure
16 * option.) Ken Yap <ken@nlc.net.au>.
17 * Version 1.6 - fix autodetection of block device size and really make 64 bit
18 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
19 * Version 2.0 - Version synchronised with client
20 * Version 2.1 - Reap zombie client processes when they exit. Removed
21 * (uncommented) the _IO magic, it's no longer necessary. Wouter
22 * Verhelst <wouter@debian.org>
23 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
24 * Version 2.3 - Fixed code so that Large File Support works. This
25 * removes the FS_32BIT compile-time directive; define
26 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
27 * using FS_32BIT. This will allow you to use files >2GB instead of
28 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
29 * Version 2.4 - Added code to keep track of children, so that we can
30 * properly kill them from initscripts. Add a call to daemon(),
31 * so that processes don't think they have to wait for us, which is
32 * interesting for initscripts as well. Wouter Verhelst
34 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
35 * zero after fork()ing, resulting in nbd-server going berserk
36 * when it receives a signal with at least one child open. Wouter
37 * Verhelst <wouter@debian.org>
38 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
39 * rectified type of mainloop::size_host (sf.net bugs 814435 and
40 * 817385); close the PID file after writing to it, so that the
41 * daemon can actually be found. Wouter Verhelst
45 #define VERSION PACKAGE_VERSION
46 #define GIGA (1*1024*1024*1024)
48 #include <sys/types.h>
49 #include <sys/socket.h>
51 #include <sys/wait.h> /* wait */
52 #include <signal.h> /* sigaction */
53 #include <netinet/tcp.h>
54 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
55 #include <netdb.h> /* hostent, gethostby*, getservby* */
62 #include <arpa/inet.h>
67 #define MY_NAME "nbd_server"
69 /* how much space for child PIDs we have by default. Dynamically
70 allocated, and will be realloc()ed if out of space, so this should
71 probably be fair for most situations. */
72 #define DEFAULT_CHILD_ARRAY 256
76 /* Deep magic: ioctl.h defines _IO macro (at least on linux) */
78 /* Debugging macros, now nothing goes to syslog unless you say ISSERVER */
80 #define msg2(a,b) syslog(a,b)
81 #define msg3(a,b,c) syslog(a,b,c)
82 #define msg4(a,b,c,d) syslog(a,b,c,d)
84 #define msg2(a,b) do { fprintf(stderr,b) ; fputs("\n",stderr) ; } while(0)
85 #define msg3(a,b,c) do { fprintf(stderr,b,c); fputs("\n",stderr) ; } while(0)
86 #define msg4(a,b,c,d) do { fprintf(stderr,b,c,d); fputs("\n",stderr) ; } while(0)
89 #include <sys/ioctl.h>
90 #include <sys/mount.h> /* For BLKGETSIZE */
94 #define DEBUG( a ) printf( a )
95 #define DEBUG2( a,b ) printf( a,b )
96 #define DEBUG3( a,b,c ) printf( a,b,c )
100 #define DEBUG3( a,b,c )
103 void serveconnection(int net);
104 void set_peername(int net,char *clientname);
107 char difffilename[256];
108 unsigned int timeout = 0;
109 int autoreadonly = 0;
110 char *auth_file="nbd_server.allow";
112 int authorized_client(char *name)
113 /* 0 - authorization refused, 1 - OK
114 authorization file contains one line per machine, no wildcards
121 if ((f=fopen(auth_file,"r"))==NULL) {
122 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
123 auth_file,strerror(errno)) ;
127 while (fgets(line,LINELEN,f)!=NULL) {
128 if (strncmp(line,name,strlen(name))==0) {
137 inline void readit(int f, void *buf, int len)
142 if ((res = read(f, buf, len)) <= 0)
143 err("Read failed: %m");
149 inline void writeit(int f, void *buf, int len)
154 if ((res = send(f, buf, len, 0)) <= 0)
155 err("Send failed: %m");
161 /* This is starting to get ugly. If someone knows a better way to find
162 * the maximum value of a signed type *without* relying on overflow
163 * (doing so breaks on 64bit architectures), that would be nice.
165 #define OFFT_MAX (((((off_t)1)<<((sizeof(off_t)-1)*8))-1)<<7)+127
166 int port; /* Port I'm listening at */
167 char *exportname; /* File I'm exporting */
168 off_t exportsize = OFFT_MAX; /* ...and its length */
169 off_t hunksize = OFFT_MAX;
173 u32 difffilelen=0 ; /* number of pages in difffile */
175 char clientname[256] ;
176 int child_arraysize=DEFAULT_CHILD_ARRAY;
180 #define DIFFPAGESIZE 4096 /* diff file uses those chunks */
183 #define F_MULTIFILE 2
184 #define F_COPYONWRITE 4
186 void cmdline(int argc, char *argv[])
191 printf("This is nbd-server version " VERSION "\n");
192 printf("Usage: port file_to_export [size][kKmM] [-r] [-m] [-c] [-a timeout_sec]\n"
194 " -m multiple file\n"
195 " -c copy on write\n"
196 " -l file with list of hosts that are allowed to connect.\n"
197 " -a maximum idle seconds, terminates when idle time exceeded\n"
198 " if port is set to 0, stdin is used (for running from inetd)\n"
199 " if file_to_export contains '%%s', it is substituted with IP\n"
200 " address of machine trying to connect\n" );
203 port = atoi(argv[1]);
204 for (i = 3; i < argc; i++) {
205 if (*argv[i] == '-') {
206 switch (argv[i][1]) {
211 flags |= F_MULTIFILE;
214 case 'c': flags |=F_COPYONWRITE;
221 fprintf(stderr, "host list file requires an argument");
226 timeout = atoi(argv[i+1]);
229 fprintf(stderr, "timeout requires argument\n");
235 int last = strlen(argv[i])-1;
236 char suffix = argv[i][last];
237 if (suffix == 'k' || suffix == 'K' ||
238 suffix == 'm' || suffix == 'M')
239 argv[i][last] = '\0';
240 es = (off_t)atol(argv[i]);
252 exportname = argv[2];
255 void sigchld_handler(int s)
261 while((pid=wait(status)) > 0) {
262 if(WIFEXITED(status)) {
263 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
265 for(i=0;children[i]!=pid&&i<child_arraysize;i++);
266 if(i>=child_arraysize) {
267 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld",(long) pid);
269 children[i]=(pid_t)0;
270 DEBUG2("Removing %d from the list of children", pid);
275 /* If we are terminated, make sure our children are, too. */
276 void sigterm_handler(int s) {
280 for(i=0;i<child_arraysize;i++) {
282 kill(children[i], s);
294 void connectme(int port)
296 struct sockaddr_in addrin;
298 int addrinlen = sizeof(addrin);
299 int net, sock, newpid, i;
313 snprintf(pidfname, sizeof(char)*255, "/var/run/nbd-server.%d.pid", port);
314 pidf=fopen(pidfname, "w");
316 fprintf(pidf,"%d", (int)getpid());
320 fprintf(stderr, "Not fatal; continuing");
324 #endif /* NODAEMON */
326 if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
329 /* lose the pesky "Address already in use" error message */
330 if (setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
331 err("setsockopt SO_REUSEADDR");
333 if (setsockopt(sock,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
334 err("setsockopt SO_KEEPALIVE");
337 DEBUG("Waiting for connections... bind, ");
338 addrin.sin_family = AF_INET;
339 addrin.sin_port = htons(port);
340 addrin.sin_addr.s_addr = 0;
341 if (bind(sock, (struct sockaddr *) &addrin, addrinlen) < 0)
344 if (listen(sock, 1) < 0)
347 sa.sa_handler = sigchld_handler;
348 sigemptyset(&sa.sa_mask);
349 sa.sa_flags = SA_RESTART;
350 if(sigaction(SIGCHLD, &sa, NULL) == -1)
351 err("sigaction: %m");
352 sa.sa_handler = sigterm_handler;
353 sigemptyset(&sa.sa_mask);
354 sa.sa_flags = SA_RESTART;
355 if(sigaction(SIGTERM, &sa, NULL) == -1)
356 err("sigaction: %m");
357 children=malloc(sizeof(pid_t)*child_arraysize);
358 memset(children, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
359 for(;;) { /* infinite loop */
360 if ((net = accept(sock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
363 set_peername(net,clientname) ;
364 if (!authorized_client(clientname)) {
365 msg2(LOG_INFO,"Unauthorized client") ;
369 msg2(LOG_INFO,"Authorized client") ;
370 for(i=0;children[i]&&i<child_arraysize;i++);
371 if(i>=child_arraysize) {
372 realloc(children, sizeof(pid_t)*child_arraysize);
373 memset(children+child_arraysize, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
375 child_arraysize+=DEFAULT_CHILD_ARRAY;
378 if ((children[i]=fork())<0) {
379 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
383 if (children[i]>0) { /* parent */
384 close(net) ; continue ; }
390 msg2(LOG_INFO,"Starting to serve") ;
391 serveconnection(net) ;
395 #define SEND writeit( net, &reply, sizeof( reply ));
396 #define ERROR { reply.error = htonl(-1); SEND; reply.error = 0; lastpoint = -1; }
398 off_t lastpoint = (off_t)-1;
400 void maybeseek(int handle, off_t a)
403 err("Can not happen\n");
404 if (lastpoint != a) {
405 if (lseek(handle, a, SEEK_SET) < 0)
406 err("Can not seek locally!\n");
413 void myseek(int handle,off_t a)
415 if (lseek(handle, a, SEEK_SET) < 0)
416 err("Can not seek locally!\n");
419 char pagebuf[DIFFPAGESIZE];
421 int rawexpread(off_t a, char *buf, int len)
423 maybeseek(export[a/hunksize], a%hunksize);
424 return (read(export[a/hunksize], buf, len) != len);
427 int expread(off_t a, char *buf, int len)
430 off_t mapcnt, mapl, maph, pagestart;
432 if (!(flags & F_COPYONWRITE))
433 return rawexpread(a, buf, len);
434 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
436 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
438 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
439 pagestart=mapcnt*DIFFPAGESIZE;
441 rdlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset;
442 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
443 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
444 (unsigned long)difmap[mapcnt]);
445 myseek(difffile, difmap[mapcnt]*DIFFPAGESIZE+offset);
446 if (read(difffile, buf, rdlen) != rdlen) return -1;
447 } else { /* the block is not there */
448 DEBUG2("Page %Lu is not here, we read the original one\n",
449 (unsigned long long)mapcnt);
450 return rawexpread(a, buf, rdlen);
452 len-=rdlen; a+=rdlen; buf+=rdlen;
457 int rawexpwrite(off_t a, char *buf, int len)
459 maybeseek(export[a/hunksize], a%hunksize);
460 return (write(export[a/hunksize], buf, len) != len);
464 int expwrite(off_t a, char *buf, int len)
466 u32 mapcnt,mapl,maph ; int wrlen,rdlen ;
467 off_t pagestart ; int offset ;
469 if (!(flags & F_COPYONWRITE))
470 return(rawexpwrite(a,buf,len));
471 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
473 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
475 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
476 pagestart=mapcnt*DIFFPAGESIZE ;
478 wrlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset ;
480 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
481 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
482 (unsigned long)difmap[mapcnt]) ;
483 myseek(difffile,difmap[mapcnt]*DIFFPAGESIZE+offset) ;
484 if (write(difffile, buf, wrlen) != wrlen) return -1 ;
485 } else { /* the block is not there */
486 myseek(difffile,difffilelen*DIFFPAGESIZE) ;
487 difmap[mapcnt]=difffilelen++ ;
488 DEBUG3("Page %Lu is not here, we put it at %lu\n",
489 (unsigned long long)mapcnt,
490 (unsigned long)difmap[mapcnt]);
492 if (rdlen+pagestart%hunksize>hunksize)
493 rdlen=hunksize-(pagestart%hunksize) ;
494 if (rawexpread(pagestart,pagebuf,rdlen)) return -1 ;
495 memcpy(pagebuf+offset,buf,wrlen) ;
496 if (write(difffile,pagebuf,DIFFPAGESIZE)!=DIFFPAGESIZE) return -1 ;
498 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
503 int mainloop(int net)
505 struct nbd_request request;
506 struct nbd_reply reply;
511 memset(zeros, 0, 290);
512 if (write(net, INIT_PASSWD, 8) < 0)
513 err("Negotiation failed: %m");
514 cliserv_magic = htonll(cliserv_magic);
515 if (write(net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
516 err("Negotiation failed: %m");
517 size_host = htonll((u64)exportsize);
518 if (write(net, &size_host, 8) < 0)
519 err("Negotiation failed: %m");
520 if (write(net, zeros, 128) < 0)
521 err("Negotiation failed: %m");
523 DEBUG("Entering request loop!\n");
524 reply.magic = htonl(NBD_REPLY_MAGIC);
527 #define BUFSIZE (1024*1024)
537 readit(net, &request, sizeof(request));
538 request.from = ntohll(request.from);
539 request.type = ntohl(request.type);
541 if (request.type==2) { /* Disconnect request */
542 if (difmap) free(difmap) ;
544 close(difffile) ; unlink(difffilename) ; }
545 err("Disconnect request received.") ;
548 len = ntohl(request.len);
550 if (request.magic != htonl(NBD_REQUEST_MAGIC))
551 err("Not enough magic.");
553 err("Request too big!");
555 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
556 "READ", (unsigned long long)request.from,
557 (unsigned long long)request.from / 512, len);
559 memcpy(reply.handle, request.handle, sizeof(reply.handle));
560 if ((request.from + len) > (OFFT_MAX)) {
561 DEBUG("[Number too large!]");
565 if ((((off_t)request.from + len) > exportsize) ||
566 ((flags & F_READONLY) && request.type)) {
571 if (request.type==1) { /* WRITE */
572 DEBUG("wr: net->buf, ");
573 readit(net, buf, len);
575 if ((autoreadonly == 1) || expwrite(request.from, buf, len)) {
576 DEBUG("Write failed: %m" );
588 if (expread(request.from, buf + sizeof(struct nbd_reply), len)) {
590 DEBUG("Read failed: %m");
597 memcpy(buf, &reply, sizeof(struct nbd_reply));
598 writeit(net, buf, len + sizeof(struct nbd_reply));
603 char exportname2[1024];
605 void set_peername(int net,char *clientname)
607 struct sockaddr_in addrin;
608 int addrinlen = sizeof( addrin );
611 if (getpeername( net, (struct sockaddr *) &addrin, &addrinlen ) < 0)
612 err("getsockname failed: %m");
613 peername = inet_ntoa(addrin.sin_addr);
614 sprintf(exportname2, exportname, peername);
616 msg4(LOG_INFO, "connect from %s, assigned file is %s", peername, exportname2);
617 strncpy(clientname,peername,255) ;
620 off_t size_autodetect(int export)
624 struct stat stat_buf;
627 DEBUG("looking for export size with lseek SEEK_END\n");
628 es = lseek(export, (off_t)0, SEEK_END);
629 if (es > ((off_t)0)) {
632 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
635 DEBUG("looking for export size with fstat\n");
636 stat_buf.st_size = 0;
637 error = fstat(export, &stat_buf);
638 if (!error && stat_buf.st_size > 0) {
639 return (off_t)stat_buf.st_size;
641 err("fstat failed: %m");
645 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
646 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
647 es = (off_t)es32 * (off_t)512;
651 err("Could not find size of exported block device: %m");
655 int main(int argc, char *argv[])
660 if (sizeof( struct nbd_request )!=28) {
661 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
667 if (!port) return 1 ;
668 connectme(port); /* serve infinitely */
673 void serveconnection(int net)
677 for (i=0; i<exportsize; i+=hunksize) {
678 char exportname3[1024];
680 sprintf(exportname3, exportname2, i/hunksize);
681 printf( "Opening %s\n", exportname3 );
682 if ((export[i/hunksize] = open(exportname3, (flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
683 /* Read WRITE ACCESS was requested by media is only read only */
686 if ((export[i/hunksize] = open(exportname3, O_RDONLY)) == -1)
687 err("Could not open exported file: %m");
691 if (exportsize == (off_t)OFFT_MAX) {
692 exportsize = size_autodetect(export[0]);
694 if (exportsize > (off_t)OFFT_MAX) {
695 err("Size of exported file is too big\n");
698 msg3(LOG_INFO, "size of exported file/device is %Lu",
699 (unsigned long long)exportsize);
701 if (flags & F_COPYONWRITE) {
702 sprintf(difffilename,"%s-%s-%d.diff",exportname2,clientname,
704 msg3(LOG_INFO,"About to create map and diff file %s",difffilename) ;
705 difffile=open(difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
706 if (difffile<0) err("Could not create diff file (%m)") ;
707 if ((difmap=calloc(exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
708 err("Could not allocate memory") ;
709 for (i=0;i<exportsize/DIFFPAGESIZE;i++) difmap[i]=(u32)-1 ;