2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8 * Version 1.0 - hopefully 64-bit-clean
9 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
10 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
11 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
12 * type, or don't have 64 bit file offsets by defining FS_32BIT
13 * in compile options for nbd-server *only*. This can be done
14 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
15 * original autoconf input file, or I would make it a configure
16 * option.) Ken Yap <ken@nlc.net.au>.
17 * Version 1.6 - fix autodetection of block device size and really make 64 bit
18 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
19 * Version 2.0 - Version synchronised with client
20 * Version 2.1 - Reap zombie client processes when they exit. Removed
21 * (uncommented) the _IO magic, it's no longer necessary. Wouter
22 * Verhelst <wouter@debian.org>
23 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
24 * Version 2.3 - Fixed code so that Large File Support works. This
25 * removes the FS_32BIT compile-time directive; define
26 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
27 * using FS_32BIT. This will allow you to use files >2GB instead of
28 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
29 * Version 2.4 - Added code to keep track of children, so that we can
30 * properly kill them from initscripts. Add a call to daemon(),
31 * so that processes don't think they have to wait for us, which is
32 * interesting for initscripts as well. Wouter Verhelst
34 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
35 * zero after fork()ing, resulting in nbd-server going berserk
36 * when it receives a signal with at least one child open. Wouter
37 * Verhelst <wouter@debian.org>
38 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
39 * rectified type of mainloop::size_host (sf.net bugs 814435 and
40 * 817385); close the PID file after writing to it, so that the
41 * daemon can actually be found. Wouter Verhelst
43 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
44 * correctly put in network endianness. Many types were corrected
45 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
48 #define VERSION PACKAGE_VERSION
49 #define GIGA (1*1024*1024*1024)
51 #include <sys/types.h>
52 #include <sys/socket.h>
54 #include <sys/wait.h> /* wait */
55 #include <sys/ioctl.h>
56 #include <sys/param.h>
57 #include <sys/mount.h> /* For BLKGETSIZE */
58 #include <signal.h> /* sigaction */
59 #include <netinet/tcp.h>
60 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
61 #include <netdb.h> /* hostent, gethostby*, getservby* */
68 #include <arpa/inet.h>
71 /* used in cliserv.h, so must be first */
72 #define MY_NAME "nbd_server"
75 /* how much space for child PIDs we have by default. Dynamically
76 allocated, and will be realloc()ed if out of space, so this should
77 probably be fair for most situations. */
78 #define DEFAULT_CHILD_ARRAY 256
80 /* Debugging macros, now nothing goes to syslog unless you say ISSERVER */
82 #define msg2(a,b) syslog(a,b)
83 #define msg3(a,b,c) syslog(a,b,c)
84 #define msg4(a,b,c,d) syslog(a,b,c,d)
86 #define msg2(a,b) do { fprintf(stderr,b) ; fputs("\n",stderr) ; } while(0)
87 #define msg3(a,b,c) do { fprintf(stderr,b,c); fputs("\n",stderr) ; } while(0)
88 #define msg4(a,b,c,d) do { fprintf(stderr,b,c,d); fputs("\n",stderr) ; } while(0)
93 #define DEBUG( a ) printf( a )
94 #define DEBUG2( a,b ) printf( a,b )
95 #define DEBUG3( a,b,c ) printf( a,b,c )
99 #define DEBUG3( a,b,c )
102 #ifndef PACKAGE_VERSION
103 #define PACKAGE_VERSION ""
106 /* This is starting to get ugly. If someone knows a better way to find
107 * the maximum value of a signed type *without* relying on overflow
108 * (doing so breaks on 64bit architectures), that would be nice.
110 #define OFFT_MAX (((((off_t)1)<<((sizeof(off_t)-1)*8))-1)<<7)+127
112 void serveconnection(int net);
113 void set_peername(int net,char *clientname);
116 char difffilename[256];
117 unsigned int timeout = 0;
118 int autoreadonly = 0;
119 char *auth_file="nbd_server.allow";
121 int authorized_client(char *name)
122 /* 0 - authorization refused, 1 - OK
123 authorization file contains one line per machine, no wildcards
130 if ((f=fopen(auth_file,"r"))==NULL) {
131 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
132 auth_file,strerror(errno)) ;
136 while (fgets(line,LINELEN,f)!=NULL) {
137 if (strncmp(line,name,strlen(name))==0) {
146 inline void readit(int f, void *buf, size_t len)
151 if ((res = read(f, buf, len)) <= 0)
152 err("Read failed: %m");
158 inline void writeit(int f, void *buf, size_t len)
163 if ((res = send(f, buf, len, 0)) <= 0)
164 err("Send failed: %m");
170 unsigned int port; /* Port I'm listening at */
171 char *exportname; /* File I'm exporting */
172 off_t exportsize = OFFT_MAX; /* ...and its length */
173 off_t hunksize = OFFT_MAX;
177 u32 difffilelen=0 ; /* number of pages in difffile */
179 char clientname[256] ;
180 int child_arraysize=DEFAULT_CHILD_ARRAY;
184 #define DIFFPAGESIZE 4096 /* diff file uses those chunks */
187 #define F_MULTIFILE 2
188 #define F_COPYONWRITE 4
190 void cmdline(int argc, char *argv[])
195 printf("This is nbd-server version " VERSION "\n");
196 printf("Usage: port file_to_export [size][kKmM] [-r] [-m] [-c] [-a timeout_sec]\n"
198 " -m multiple file\n"
199 " -c copy on write\n"
200 " -l file with list of hosts that are allowed to connect.\n"
201 " -a maximum idle seconds, terminates when idle time exceeded\n"
202 " if port is set to 0, stdin is used (for running from inetd)\n"
203 " if file_to_export contains '%%s', it is substituted with IP\n"
204 " address of machine trying to connect\n" );
207 port = atoi(argv[1]);
208 for (i = 3; i < argc; i++) {
209 if (*argv[i] == '-') {
210 switch (argv[i][1]) {
215 flags |= F_MULTIFILE;
218 case 'c': flags |=F_COPYONWRITE;
225 fprintf(stderr, "host list file requires an argument");
230 timeout = atoi(argv[i+1]);
233 fprintf(stderr, "timeout requires argument\n");
239 size_t last = strlen(argv[i])-1;
240 char suffix = argv[i][last];
241 if (suffix == 'k' || suffix == 'K' ||
242 suffix == 'm' || suffix == 'M')
243 argv[i][last] = '\0';
244 es = (off_t)atol(argv[i]);
256 exportname = argv[2];
259 void sigchld_handler(int s)
265 while((pid=wait(status)) > 0) {
266 if(WIFEXITED(status)) {
267 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
269 for(i=0;children[i]!=pid&&i<child_arraysize;i++);
270 if(i>=child_arraysize) {
271 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld",(long) pid);
273 children[i]=(pid_t)0;
274 DEBUG2("Removing %d from the list of children", pid);
279 /* If we are terminated, make sure our children are, too. */
280 void sigterm_handler(int s) {
284 for(i=0;i<child_arraysize;i++) {
286 kill(children[i], s);
298 void connectme(unsigned int port)
300 struct sockaddr_in addrin;
302 int addrinlen = sizeof(addrin);
303 int net, sock, newpid, i;
317 snprintf(pidfname, sizeof(char)*255, "/var/run/nbd-server.%d.pid", port);
318 pidf=fopen(pidfname, "w");
320 fprintf(pidf,"%d", (int)getpid());
324 fprintf(stderr, "Not fatal; continuing");
328 #endif /* NODAEMON */
330 if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
333 /* lose the pesky "Address already in use" error message */
334 if (setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
335 err("setsockopt SO_REUSEADDR");
337 if (setsockopt(sock,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
338 err("setsockopt SO_KEEPALIVE");
341 DEBUG("Waiting for connections... bind, ");
342 addrin.sin_family = AF_INET;
343 addrin.sin_port = htons(port);
344 addrin.sin_addr.s_addr = 0;
345 if (bind(sock, (struct sockaddr *) &addrin, addrinlen) < 0)
348 if (listen(sock, 1) < 0)
351 sa.sa_handler = sigchld_handler;
352 sigemptyset(&sa.sa_mask);
353 sa.sa_flags = SA_RESTART;
354 if(sigaction(SIGCHLD, &sa, NULL) == -1)
355 err("sigaction: %m");
356 sa.sa_handler = sigterm_handler;
357 sigemptyset(&sa.sa_mask);
358 sa.sa_flags = SA_RESTART;
359 if(sigaction(SIGTERM, &sa, NULL) == -1)
360 err("sigaction: %m");
361 children=malloc(sizeof(pid_t)*child_arraysize);
362 memset(children, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
363 for(;;) { /* infinite loop */
364 if ((net = accept(sock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
367 set_peername(net,clientname) ;
368 if (!authorized_client(clientname)) {
369 msg2(LOG_INFO,"Unauthorized client") ;
373 msg2(LOG_INFO,"Authorized client") ;
374 for(i=0;children[i]&&i<child_arraysize;i++);
375 if(i>=child_arraysize) {
378 ptr=realloc(children, sizeof(pid_t)*child_arraysize);
381 memset(children+child_arraysize, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
383 child_arraysize+=DEFAULT_CHILD_ARRAY;
385 msg2(LOG_INFO,"Not enough memory to store child PID");
391 if ((children[i]=fork())<0) {
392 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
396 if (children[i]>0) { /* parent */
397 close(net) ; continue ; }
403 msg2(LOG_INFO,"Starting to serve") ;
404 serveconnection(net) ;
408 #define SEND writeit( net, &reply, sizeof( reply ));
409 #define ERROR { reply.error = htonl(-1); SEND; reply.error = 0; lastpoint = -1; }
411 off_t lastpoint = (off_t)-1;
413 void maybeseek(int handle, off_t a)
415 if (a < 0 || a > exportsize)
416 err("Can not happen\n");
417 if (lastpoint != a) {
418 if (lseek(handle, a, SEEK_SET) < 0)
419 err("Can not seek locally!\n");
426 void myseek(int handle,off_t a)
428 if (lseek(handle, a, SEEK_SET) < 0)
429 err("Can not seek locally!\n");
432 char pagebuf[DIFFPAGESIZE];
434 int rawexpread(off_t a, char *buf, size_t len)
438 maybeseek(export[a/hunksize], a%hunksize);
439 res = read(export[a/hunksize], buf, len);
440 return (res < 0 || (size_t)res != len);
443 int expread(off_t a, char *buf, size_t len)
446 off_t mapcnt, mapl, maph, pagestart;
448 if (!(flags & F_COPYONWRITE))
449 return rawexpread(a, buf, len);
450 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
452 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
454 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
455 pagestart=mapcnt*DIFFPAGESIZE;
457 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
458 len : (size_t)DIFFPAGESIZE-offset;
459 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
460 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
461 (unsigned long)difmap[mapcnt]);
462 myseek(difffile, difmap[mapcnt]*DIFFPAGESIZE+offset);
463 if (read(difffile, buf, rdlen) != rdlen) return -1;
464 } else { /* the block is not there */
465 DEBUG2("Page %Lu is not here, we read the original one\n",
466 (unsigned long long)mapcnt);
467 return rawexpread(a, buf, rdlen);
469 len-=rdlen; a+=rdlen; buf+=rdlen;
474 int rawexpwrite(off_t a, char *buf, size_t len)
478 maybeseek(export[a/hunksize], a%hunksize);
479 res = write(export[a/hunksize], buf, len);
480 return (res < 0 || (size_t)res != len);
484 int expwrite(off_t a, char *buf, size_t len)
486 off_t mapcnt,mapl,maph ;
491 if (!(flags & F_COPYONWRITE))
492 return(rawexpwrite(a,buf,len));
493 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
495 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
497 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
498 pagestart=mapcnt*DIFFPAGESIZE ;
500 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
501 len : (size_t)DIFFPAGESIZE-offset;
503 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
504 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
505 (unsigned long)difmap[mapcnt]) ;
506 myseek(difffile,difmap[mapcnt]*DIFFPAGESIZE+offset) ;
507 if (write(difffile, buf, wrlen) != wrlen) return -1 ;
508 } else { /* the block is not there */
509 myseek(difffile,difffilelen*DIFFPAGESIZE) ;
510 difmap[mapcnt]=difffilelen++ ;
511 DEBUG3("Page %Lu is not here, we put it at %lu\n",
512 (unsigned long long)mapcnt,
513 (unsigned long)difmap[mapcnt]);
515 if (rdlen+pagestart%hunksize>hunksize)
516 rdlen=hunksize-(pagestart%hunksize) ;
517 if (rawexpread(pagestart,pagebuf,rdlen)) return -1 ;
518 memcpy(pagebuf+offset,buf,wrlen) ;
519 if (write(difffile,pagebuf,DIFFPAGESIZE)!=DIFFPAGESIZE) return -1 ;
521 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
526 int mainloop(int net)
528 struct nbd_request request;
529 struct nbd_reply reply;
534 memset(zeros, 0, 290);
535 if (write(net, INIT_PASSWD, 8) < 0)
536 err("Negotiation failed: %m");
537 cliserv_magic = htonll(cliserv_magic);
538 if (write(net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
539 err("Negotiation failed: %m");
540 size_host = htonll((u64)exportsize);
541 if (write(net, &size_host, 8) < 0)
542 err("Negotiation failed: %m");
543 if (write(net, zeros, 128) < 0)
544 err("Negotiation failed: %m");
546 DEBUG("Entering request loop!\n");
547 reply.magic = htonl(NBD_REPLY_MAGIC);
550 #define BUFSIZE (1024*1024)
560 readit(net, &request, sizeof(request));
561 request.from = ntohll(request.from);
562 request.type = ntohl(request.type);
564 if (request.type==2) { /* Disconnect request */
565 if (difmap) free(difmap) ;
567 close(difffile) ; unlink(difffilename) ; }
568 err("Disconnect request received.") ;
571 len = ntohl(request.len);
573 if (request.magic != htonl(NBD_REQUEST_MAGIC))
574 err("Not enough magic.");
576 err("Request too big!");
578 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
579 "READ", (unsigned long long)request.from,
580 (unsigned long long)request.from / 512, len);
582 memcpy(reply.handle, request.handle, sizeof(reply.handle));
583 if ((request.from + len) > (OFFT_MAX)) {
584 DEBUG("[Number too large!]");
588 if (((size_t)((off_t)request.from + len) > exportsize) ||
589 ((flags & F_READONLY) && request.type)) {
594 if (request.type==1) { /* WRITE */
595 DEBUG("wr: net->buf, ");
596 readit(net, buf, len);
598 if ((autoreadonly == 1) || expwrite(request.from, buf, len)) {
599 DEBUG("Write failed: %m" );
611 if (expread(request.from, buf + sizeof(struct nbd_reply), len)) {
613 DEBUG("Read failed: %m");
620 memcpy(buf, &reply, sizeof(struct nbd_reply));
621 writeit(net, buf, len + sizeof(struct nbd_reply));
626 char exportname2[1024];
628 void set_peername(int net,char *clientname)
630 struct sockaddr_in addrin;
631 int addrinlen = sizeof( addrin );
634 if (getpeername( net, (struct sockaddr *) &addrin, &addrinlen ) < 0)
635 err("getsockname failed: %m");
636 peername = inet_ntoa(addrin.sin_addr);
637 sprintf(exportname2, exportname, peername);
639 msg4(LOG_INFO, "connect from %s, assigned file is %s", peername, exportname2);
640 strncpy(clientname,peername,255) ;
643 off_t size_autodetect(int export)
647 struct stat stat_buf;
650 DEBUG("looking for export size with lseek SEEK_END\n");
651 es = lseek(export, (off_t)0, SEEK_END);
652 if (es > ((off_t)0)) {
655 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
658 DEBUG("looking for export size with fstat\n");
659 stat_buf.st_size = 0;
660 error = fstat(export, &stat_buf);
661 if (!error && stat_buf.st_size > 0) {
662 return (off_t)stat_buf.st_size;
664 err("fstat failed: %m");
668 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
669 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
670 es = (off_t)es32 * (off_t)512;
674 err("Could not find size of exported block device: %m");
678 int main(int argc, char *argv[])
680 if (sizeof( struct nbd_request )!=28) {
681 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
687 if (!port) return 1 ;
688 connectme(port); /* serve infinitely */
693 void serveconnection(int net)
697 for (i=0; i<exportsize; i+=hunksize) {
698 char exportname3[1024];
700 sprintf(exportname3, exportname2, i/hunksize);
701 printf( "Opening %s\n", exportname3 );
702 if ((export[i/hunksize] = open(exportname3, (flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
703 /* Read WRITE ACCESS was requested by media is only read only */
706 if ((export[i/hunksize] = open(exportname3, O_RDONLY)) == -1)
707 err("Could not open exported file: %m");
711 if (exportsize == OFFT_MAX) {
712 exportsize = size_autodetect(export[0]);
714 if (exportsize > OFFT_MAX) {
715 err("Size of exported file is too big\n");
718 msg3(LOG_INFO, "size of exported file/device is %Lu",
719 (unsigned long long)exportsize);
721 if (flags & F_COPYONWRITE) {
722 sprintf(difffilename,"%s-%s-%d.diff",exportname2,clientname,
724 msg3(LOG_INFO,"About to create map and diff file %s",difffilename) ;
725 difffile=open(difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
726 if (difffile<0) err("Could not create diff file (%m)") ;
727 if ((difmap=calloc(exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
728 err("Could not allocate memory") ;
729 for (i=0;i<exportsize/DIFFPAGESIZE;i++) difmap[i]=(u32)-1 ;