2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8 * Version 1.0 - hopefully 64-bit-clean
9 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
10 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
11 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
12 * type, or don't have 64 bit file offsets by defining FS_32BIT
13 * in compile options for nbd-server *only*. This can be done
14 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
15 * original autoconf input file, or I would make it a configure
16 * option.) Ken Yap <ken@nlc.net.au>.
17 * Version 1.6 - fix autodetection of block device size and really make 64 bit
18 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
19 * Version 2.0 - Version synchronised with client
20 * Version 2.1 - Reap zombie client processes when they exit. Removed
21 * (uncommented) the _IO magic, it's no longer necessary. Wouter
22 * Verhelst <wouter@debian.org>
23 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
24 * Version 2.3 - Fixed code so that Large File Support works. This
25 * removes the FS_32BIT compile-time directive; define
26 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
27 * using FS_32BIT. This will allow you to use files >2GB instead of
28 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
29 * Version 2.4 - Added code to keep track of children, so that we can
30 * properly kill them from initscripts. Add a call to daemon(),
31 * so that processes don't think they have to wait for us, which is
32 * interesting for initscripts as well. Wouter Verhelst
34 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
35 * zero after fork()ing, resulting in nbd-server going berserk
36 * when it receives a signal with at least one child open. Wouter
37 * Verhelst <wouter@debian.org>
38 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
39 * rectified type of mainloop::size_host (sf.net bugs 814435 and
40 * 817385); close the PID file after writing to it, so that the
41 * daemon can actually be found. Wouter Verhelst
43 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
44 * correctly put in network endianness. Many types were corrected
45 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
48 #define VERSION PACKAGE_VERSION
49 #define GIGA (1*1024*1024*1024)
51 #include <sys/types.h>
52 #include <sys/socket.h>
54 #include <sys/wait.h> /* wait */
55 #include <sys/ioctl.h>
56 #include <sys/mount.h> /* For BLKGETSIZE */
57 #include <signal.h> /* sigaction */
58 #include <netinet/tcp.h>
59 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
60 #include <netdb.h> /* hostent, gethostby*, getservby* */
67 #include <arpa/inet.h>
70 /* used in cliserv.h, so must be first */
71 #define MY_NAME "nbd_server"
74 /* how much space for child PIDs we have by default. Dynamically
75 allocated, and will be realloc()ed if out of space, so this should
76 probably be fair for most situations. */
77 #define DEFAULT_CHILD_ARRAY 256
79 /* Debugging macros, now nothing goes to syslog unless you say ISSERVER */
81 #define msg2(a,b) syslog(a,b)
82 #define msg3(a,b,c) syslog(a,b,c)
83 #define msg4(a,b,c,d) syslog(a,b,c,d)
85 #define msg2(a,b) do { fprintf(stderr,b) ; fputs("\n",stderr) ; } while(0)
86 #define msg3(a,b,c) do { fprintf(stderr,b,c); fputs("\n",stderr) ; } while(0)
87 #define msg4(a,b,c,d) do { fprintf(stderr,b,c,d); fputs("\n",stderr) ; } while(0)
92 #define DEBUG( a ) printf( a )
93 #define DEBUG2( a,b ) printf( a,b )
94 #define DEBUG3( a,b,c ) printf( a,b,c )
98 #define DEBUG3( a,b,c )
101 #ifndef PACKAGE_VERSION
102 #define PACKAGE_VERSION ""
105 /* This is starting to get ugly. If someone knows a better way to find
106 * the maximum value of a signed type *without* relying on overflow
107 * (doing so breaks on 64bit architectures), that would be nice.
109 #define OFFT_MAX (((((off_t)1)<<((sizeof(off_t)-1)*8))-1)<<7)+127
111 void serveconnection(int net);
112 void set_peername(int net,char *clientname);
115 char difffilename[256];
116 unsigned int timeout = 0;
117 int autoreadonly = 0;
118 char *auth_file="nbd_server.allow";
120 int authorized_client(char *name)
121 /* 0 - authorization refused, 1 - OK
122 authorization file contains one line per machine, no wildcards
129 if ((f=fopen(auth_file,"r"))==NULL) {
130 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
131 auth_file,strerror(errno)) ;
135 while (fgets(line,LINELEN,f)!=NULL) {
136 if (strncmp(line,name,strlen(name))==0) {
145 inline void readit(int f, void *buf, size_t len)
150 if ((res = read(f, buf, len)) <= 0)
151 err("Read failed: %m");
157 inline void writeit(int f, void *buf, size_t len)
162 if ((res = send(f, buf, len, 0)) <= 0)
163 err("Send failed: %m");
169 unsigned int port; /* Port I'm listening at */
170 char *exportname; /* File I'm exporting */
171 off_t exportsize = OFFT_MAX; /* ...and its length */
172 off_t hunksize = OFFT_MAX;
176 u32 difffilelen=0 ; /* number of pages in difffile */
178 char clientname[256] ;
179 int child_arraysize=DEFAULT_CHILD_ARRAY;
183 #define DIFFPAGESIZE 4096 /* diff file uses those chunks */
186 #define F_MULTIFILE 2
187 #define F_COPYONWRITE 4
189 void cmdline(int argc, char *argv[])
194 printf("This is nbd-server version " VERSION "\n");
195 printf("Usage: port file_to_export [size][kKmM] [-r] [-m] [-c] [-a timeout_sec]\n"
197 " -m multiple file\n"
198 " -c copy on write\n"
199 " -l file with list of hosts that are allowed to connect.\n"
200 " -a maximum idle seconds, terminates when idle time exceeded\n"
201 " if port is set to 0, stdin is used (for running from inetd)\n"
202 " if file_to_export contains '%%s', it is substituted with IP\n"
203 " address of machine trying to connect\n" );
206 port = atoi(argv[1]);
207 for (i = 3; i < argc; i++) {
208 if (*argv[i] == '-') {
209 switch (argv[i][1]) {
214 flags |= F_MULTIFILE;
217 case 'c': flags |=F_COPYONWRITE;
224 fprintf(stderr, "host list file requires an argument");
229 timeout = atoi(argv[i+1]);
232 fprintf(stderr, "timeout requires argument\n");
238 size_t last = strlen(argv[i])-1;
239 char suffix = argv[i][last];
240 if (suffix == 'k' || suffix == 'K' ||
241 suffix == 'm' || suffix == 'M')
242 argv[i][last] = '\0';
243 es = (off_t)atol(argv[i]);
255 exportname = argv[2];
258 void sigchld_handler(int s)
264 while((pid=wait(status)) > 0) {
265 if(WIFEXITED(status)) {
266 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
268 for(i=0;children[i]!=pid&&i<child_arraysize;i++);
269 if(i>=child_arraysize) {
270 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld",(long) pid);
272 children[i]=(pid_t)0;
273 DEBUG2("Removing %d from the list of children", pid);
278 /* If we are terminated, make sure our children are, too. */
279 void sigterm_handler(int s) {
283 for(i=0;i<child_arraysize;i++) {
285 kill(children[i], s);
297 void connectme(unsigned int port)
299 struct sockaddr_in addrin;
301 int addrinlen = sizeof(addrin);
302 int net, sock, newpid, i;
316 snprintf(pidfname, sizeof(char)*255, "/var/run/nbd-server.%d.pid", port);
317 pidf=fopen(pidfname, "w");
319 fprintf(pidf,"%d", (int)getpid());
323 fprintf(stderr, "Not fatal; continuing");
327 #endif /* NODAEMON */
329 if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
332 /* lose the pesky "Address already in use" error message */
333 if (setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
334 err("setsockopt SO_REUSEADDR");
336 if (setsockopt(sock,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
337 err("setsockopt SO_KEEPALIVE");
340 DEBUG("Waiting for connections... bind, ");
341 addrin.sin_family = AF_INET;
342 addrin.sin_port = htons(port);
343 addrin.sin_addr.s_addr = 0;
344 if (bind(sock, (struct sockaddr *) &addrin, addrinlen) < 0)
347 if (listen(sock, 1) < 0)
350 sa.sa_handler = sigchld_handler;
351 sigemptyset(&sa.sa_mask);
352 sa.sa_flags = SA_RESTART;
353 if(sigaction(SIGCHLD, &sa, NULL) == -1)
354 err("sigaction: %m");
355 sa.sa_handler = sigterm_handler;
356 sigemptyset(&sa.sa_mask);
357 sa.sa_flags = SA_RESTART;
358 if(sigaction(SIGTERM, &sa, NULL) == -1)
359 err("sigaction: %m");
360 children=malloc(sizeof(pid_t)*child_arraysize);
361 memset(children, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
362 for(;;) { /* infinite loop */
363 if ((net = accept(sock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
366 set_peername(net,clientname) ;
367 if (!authorized_client(clientname)) {
368 msg2(LOG_INFO,"Unauthorized client") ;
372 msg2(LOG_INFO,"Authorized client") ;
373 for(i=0;children[i]&&i<child_arraysize;i++);
374 if(i>=child_arraysize) {
377 ptr=realloc(children, sizeof(pid_t)*child_arraysize);
380 memset(children+child_arraysize, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
382 child_arraysize+=DEFAULT_CHILD_ARRAY;
384 msg2(LOG_INFO,"Not enough memory to store child PID");
390 if ((children[i]=fork())<0) {
391 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
395 if (children[i]>0) { /* parent */
396 close(net) ; continue ; }
402 msg2(LOG_INFO,"Starting to serve") ;
403 serveconnection(net) ;
407 #define SEND writeit( net, &reply, sizeof( reply ));
408 #define ERROR { reply.error = htonl(-1); SEND; reply.error = 0; lastpoint = -1; }
410 off_t lastpoint = (off_t)-1;
412 void maybeseek(int handle, off_t a)
414 if (a < 0 || a > exportsize)
415 err("Can not happen\n");
416 if (lastpoint != a) {
417 if (lseek(handle, a, SEEK_SET) < 0)
418 err("Can not seek locally!\n");
425 void myseek(int handle,off_t a)
427 if (lseek(handle, a, SEEK_SET) < 0)
428 err("Can not seek locally!\n");
431 char pagebuf[DIFFPAGESIZE];
433 int rawexpread(off_t a, char *buf, size_t len)
437 maybeseek(export[a/hunksize], a%hunksize);
438 res = read(export[a/hunksize], buf, len);
439 return (res < 0 || (size_t)res != len);
442 int expread(off_t a, char *buf, size_t len)
445 off_t mapcnt, mapl, maph, pagestart;
447 if (!(flags & F_COPYONWRITE))
448 return rawexpread(a, buf, len);
449 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
451 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
453 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
454 pagestart=mapcnt*DIFFPAGESIZE;
456 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
457 len : (size_t)DIFFPAGESIZE-offset;
458 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
459 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
460 (unsigned long)difmap[mapcnt]);
461 myseek(difffile, difmap[mapcnt]*DIFFPAGESIZE+offset);
462 if (read(difffile, buf, rdlen) != rdlen) return -1;
463 } else { /* the block is not there */
464 DEBUG2("Page %Lu is not here, we read the original one\n",
465 (unsigned long long)mapcnt);
466 return rawexpread(a, buf, rdlen);
468 len-=rdlen; a+=rdlen; buf+=rdlen;
473 int rawexpwrite(off_t a, char *buf, size_t len)
477 maybeseek(export[a/hunksize], a%hunksize);
478 res = write(export[a/hunksize], buf, len);
479 return (res < 0 || (size_t)res != len);
483 int expwrite(off_t a, char *buf, size_t len)
485 off_t mapcnt,mapl,maph ;
490 if (!(flags & F_COPYONWRITE))
491 return(rawexpwrite(a,buf,len));
492 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
494 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
496 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
497 pagestart=mapcnt*DIFFPAGESIZE ;
499 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
500 len : (size_t)DIFFPAGESIZE-offset;
502 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
503 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
504 (unsigned long)difmap[mapcnt]) ;
505 myseek(difffile,difmap[mapcnt]*DIFFPAGESIZE+offset) ;
506 if (write(difffile, buf, wrlen) != wrlen) return -1 ;
507 } else { /* the block is not there */
508 myseek(difffile,difffilelen*DIFFPAGESIZE) ;
509 difmap[mapcnt]=difffilelen++ ;
510 DEBUG3("Page %Lu is not here, we put it at %lu\n",
511 (unsigned long long)mapcnt,
512 (unsigned long)difmap[mapcnt]);
514 if (rdlen+pagestart%hunksize>hunksize)
515 rdlen=hunksize-(pagestart%hunksize) ;
516 if (rawexpread(pagestart,pagebuf,rdlen)) return -1 ;
517 memcpy(pagebuf+offset,buf,wrlen) ;
518 if (write(difffile,pagebuf,DIFFPAGESIZE)!=DIFFPAGESIZE) return -1 ;
520 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
525 int mainloop(int net)
527 struct nbd_request request;
528 struct nbd_reply reply;
533 memset(zeros, 0, 290);
534 if (write(net, INIT_PASSWD, 8) < 0)
535 err("Negotiation failed: %m");
536 cliserv_magic = htonll(cliserv_magic);
537 if (write(net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
538 err("Negotiation failed: %m");
539 size_host = htonll((u64)exportsize);
540 if (write(net, &size_host, 8) < 0)
541 err("Negotiation failed: %m");
542 if (write(net, zeros, 128) < 0)
543 err("Negotiation failed: %m");
545 DEBUG("Entering request loop!\n");
546 reply.magic = htonl(NBD_REPLY_MAGIC);
549 #define BUFSIZE (1024*1024)
559 readit(net, &request, sizeof(request));
560 request.from = ntohll(request.from);
561 request.type = ntohl(request.type);
563 if (request.type==2) { /* Disconnect request */
564 if (difmap) free(difmap) ;
566 close(difffile) ; unlink(difffilename) ; }
567 err("Disconnect request received.") ;
570 len = ntohl(request.len);
572 if (request.magic != htonl(NBD_REQUEST_MAGIC))
573 err("Not enough magic.");
575 err("Request too big!");
577 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
578 "READ", (unsigned long long)request.from,
579 (unsigned long long)request.from / 512, len);
581 memcpy(reply.handle, request.handle, sizeof(reply.handle));
582 if ((request.from + len) > (OFFT_MAX)) {
583 DEBUG("[Number too large!]");
587 if (((size_t)((off_t)request.from + len) > exportsize) ||
588 ((flags & F_READONLY) && request.type)) {
593 if (request.type==1) { /* WRITE */
594 DEBUG("wr: net->buf, ");
595 readit(net, buf, len);
597 if ((autoreadonly == 1) || expwrite(request.from, buf, len)) {
598 DEBUG("Write failed: %m" );
610 if (expread(request.from, buf + sizeof(struct nbd_reply), len)) {
612 DEBUG("Read failed: %m");
619 memcpy(buf, &reply, sizeof(struct nbd_reply));
620 writeit(net, buf, len + sizeof(struct nbd_reply));
625 char exportname2[1024];
627 void set_peername(int net,char *clientname)
629 struct sockaddr_in addrin;
630 int addrinlen = sizeof( addrin );
633 if (getpeername( net, (struct sockaddr *) &addrin, &addrinlen ) < 0)
634 err("getsockname failed: %m");
635 peername = inet_ntoa(addrin.sin_addr);
636 sprintf(exportname2, exportname, peername);
638 msg4(LOG_INFO, "connect from %s, assigned file is %s", peername, exportname2);
639 strncpy(clientname,peername,255) ;
642 off_t size_autodetect(int export)
646 struct stat stat_buf;
649 DEBUG("looking for export size with lseek SEEK_END\n");
650 es = lseek(export, (off_t)0, SEEK_END);
651 if (es > ((off_t)0)) {
654 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
657 DEBUG("looking for export size with fstat\n");
658 stat_buf.st_size = 0;
659 error = fstat(export, &stat_buf);
660 if (!error && stat_buf.st_size > 0) {
661 return (off_t)stat_buf.st_size;
663 err("fstat failed: %m");
667 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
668 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
669 es = (off_t)es32 * (off_t)512;
673 err("Could not find size of exported block device: %m");
677 int main(int argc, char *argv[])
679 if (sizeof( struct nbd_request )!=28) {
680 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
686 if (!port) return 1 ;
687 connectme(port); /* serve infinitely */
692 void serveconnection(int net)
696 for (i=0; i<exportsize; i+=hunksize) {
697 char exportname3[1024];
699 sprintf(exportname3, exportname2, i/hunksize);
700 printf( "Opening %s\n", exportname3 );
701 if ((export[i/hunksize] = open(exportname3, (flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
702 /* Read WRITE ACCESS was requested by media is only read only */
705 if ((export[i/hunksize] = open(exportname3, O_RDONLY)) == -1)
706 err("Could not open exported file: %m");
710 if (exportsize == OFFT_MAX) {
711 exportsize = size_autodetect(export[0]);
713 if (exportsize > OFFT_MAX) {
714 err("Size of exported file is too big\n");
717 msg3(LOG_INFO, "size of exported file/device is %Lu",
718 (unsigned long long)exportsize);
720 if (flags & F_COPYONWRITE) {
721 sprintf(difffilename,"%s-%s-%d.diff",exportname2,clientname,
723 msg3(LOG_INFO,"About to create map and diff file %s",difffilename) ;
724 difffile=open(difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
725 if (difffile<0) err("Could not create diff file (%m)") ;
726 if ((difmap=calloc(exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
727 err("Could not allocate memory") ;
728 for (i=0;i<exportsize/DIFFPAGESIZE;i++) difmap[i]=(u32)-1 ;