2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8 * Version 1.0 - hopefully 64-bit-clean
9 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
10 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
11 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
12 * type, or don't have 64 bit file offsets by defining FS_32BIT
13 * in compile options for nbd-server *only*. This can be done
14 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
15 * original autoconf input file, or I would make it a configure
16 * option.) Ken Yap <ken@nlc.net.au>.
17 * Version 1.6 - fix autodetection of block device size and really make 64 bit
18 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
19 * Version 2.0 - Version synchronised with client
20 * Version 2.1 - Reap zombie client processes when they exit. Removed
21 * (uncommented) the _IO magic, it's no longer necessary. Wouter
22 * Verhelst <wouter@debian.org>
23 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
24 * Version 2.3 - Fixed code so that Large File Support works. This
25 * removes the FS_32BIT compile-time directive; define
26 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
27 * using FS_32BIT. This will allow you to use files >2GB instead of
28 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
29 * Version 2.4 - Added code to keep track of children, so that we can
30 * properly kill them from initscripts. Add a call to daemon(),
31 * so that processes don't think they have to wait for us, which is
32 * interesting for initscripts as well. Wouter Verhelst
36 #define VERSION PACKAGE_VERSION
37 #define GIGA (1*1024*1024*1024)
39 #include <sys/types.h>
40 #include <sys/socket.h>
42 #include <sys/wait.h> /* wait */
43 #include <signal.h> /* sigaction */
44 #include <netinet/tcp.h>
45 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
46 #include <netdb.h> /* hostent, gethostby*, getservby* */
53 #include <arpa/inet.h>
58 #define MY_NAME "nbd_server"
60 /* how much space for child PIDs we have by default. Dynamically
61 allocated, and will be realloc()ed if out of space, so this should
62 probably be fair for most situations. */
63 #define DEFAULT_CHILD_ARRAY 256
67 /* Deep magic: ioctl.h defines _IO macro (at least on linux) */
69 /* Debugging macros, now nothing goes to syslog unless you say ISSERVER */
71 #define msg2(a,b) syslog(a,b)
72 #define msg3(a,b,c) syslog(a,b,c)
73 #define msg4(a,b,c,d) syslog(a,b,c,d)
75 #define msg2(a,b) do { fprintf(stderr,b) ; fputs("\n",stderr) ; } while(0)
76 #define msg3(a,b,c) do { fprintf(stderr,b,c); fputs("\n",stderr) ; } while(0)
77 #define msg4(a,b,c,d) do { fprintf(stderr,b,c,d); fputs("\n",stderr) ; } while(0)
80 #include <sys/ioctl.h>
81 #include <sys/mount.h> /* For BLKGETSIZE */
85 #define DEBUG( a ) printf( a )
86 #define DEBUG2( a,b ) printf( a,b )
87 #define DEBUG3( a,b,c ) printf( a,b,c )
91 #define DEBUG3( a,b,c )
94 void serveconnection(int net);
95 void set_peername(int net,char *clientname);
98 char difffilename[256];
99 unsigned int timeout = 0;
100 int autoreadonly = 0;
101 char *auth_file="nbd_server.allow";
103 int authorized_client(char *name)
104 /* 0 - authorization refused, 1 - OK
105 authorization file contains one line per machine, no wildcards
112 if ((f=fopen(auth_file,"r"))==NULL) {
113 msg4(LOG_INFO,"Can't open authorization file %s (%s).",
114 auth_file,strerror(errno)) ;
118 while (fgets(line,LINELEN,f)!=NULL) {
119 if (strncmp(line,name,strlen(name))==0) {
128 inline void readit(int f, void *buf, int len)
133 if ((res = read(f, buf, len)) <= 0)
134 err("Read failed: %m");
140 inline void writeit(int f, void *buf, int len)
145 if ((res = send(f, buf, len, 0)) <= 0)
146 err("Send failed: %m");
152 /* This is starting to get ugly. If someone knows a better way to find
153 * the maximum value of a signed type *without* relying on overflow
154 * (doing so breaks on 64bit architectures), that would be nice.
156 #define OFFT_MAX (((((off_t)1)<<((sizeof(off_t)-1)*8))-1)<<7)+127
157 int port; /* Port I'm listening at */
158 char *exportname; /* File I'm exporting */
159 off_t exportsize = OFFT_MAX; /* ...and its length */
160 off_t hunksize = OFFT_MAX;
164 u32 difffilelen=0 ; /* number of pages in difffile */
166 char clientname[256] ;
167 int child_arraysize=DEFAULT_CHILD_ARRAY;
171 #define DIFFPAGESIZE 4096 /* diff file uses those chunks */
174 #define F_MULTIFILE 2
175 #define F_COPYONWRITE 4
177 void cmdline(int argc, char *argv[])
182 printf("This is nbd-server version " VERSION "\n");
183 printf("Usage: port file_to_export [size][kKmM] [-r] [-m] [-c] [-a timeout_sec]\n"
185 " -m multiple file\n"
186 " -c copy on write\n"
187 " -l file with list of hosts that are allowed to connect.\n"
188 " -a maximum idle seconds, terminates when idle time exceeded\n"
189 " if port is set to 0, stdin is used (for running from inetd)\n"
190 " if file_to_export contains '%%s', it is substituted with IP\n"
191 " address of machine trying to connect\n" );
194 port = atoi(argv[1]);
195 for (i = 3; i < argc; i++) {
196 if (*argv[i] == '-') {
197 switch (argv[i][1]) {
202 flags |= F_MULTIFILE;
205 case 'c': flags |=F_COPYONWRITE;
212 fprintf(stderr, "host list file requires an argument");
217 timeout = atoi(argv[i+1]);
220 fprintf(stderr, "timeout requires argument\n");
226 int last = strlen(argv[i])-1;
227 char suffix = argv[i][last];
228 if (suffix == 'k' || suffix == 'K' ||
229 suffix == 'm' || suffix == 'M')
230 argv[i][last] = '\0';
231 es = (off_t)atol(argv[i]);
243 exportname = argv[2];
246 void sigchld_handler(int s)
252 while((pid=wait(status)) > 0) {
253 if(WIFEXITED(status)) {
254 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
256 for(i=0;children[i]!=pid&&i<child_arraysize;i++);
257 if(i>=child_arraysize) {
258 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld",(long) pid);
260 children[i]=(pid_t)0;
261 DEBUG2("Removing %d from the list of children", pid);
266 /* If we are terminated, make sure our children are, too. */
267 void sigterm_handler(int s) {
271 for(i=0;i<child_arraysize;i++) {
273 kill(children[i], s);
285 void connectme(int port)
287 struct sockaddr_in addrin;
289 int addrinlen = sizeof(addrin);
290 int net, sock, newpid, i;
304 snprintf(pidfname, sizeof(char)*255, "/var/run/nbd-server.%d.pid", port);
305 pidf=fopen(pidfname, "w");
307 fprintf(pidf,"%d", (int)getpid());
310 fprintf(stderr, "Not fatal; continuing");
314 #endif /* NODAEMON */
316 if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
319 /* lose the pesky "Address already in use" error message */
320 if (setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
324 DEBUG("Waiting for connections... bind, ");
325 addrin.sin_family = AF_INET;
326 addrin.sin_port = htons(port);
327 addrin.sin_addr.s_addr = 0;
328 if (bind(sock, (struct sockaddr *) &addrin, addrinlen) < 0)
331 if (listen(sock, 1) < 0)
334 sa.sa_handler = sigchld_handler;
335 sigemptyset(&sa.sa_mask);
336 sa.sa_flags = SA_RESTART;
337 if(sigaction(SIGCHLD, &sa, NULL) == -1)
338 err("sigaction: %m");
339 sa.sa_handler = sigterm_handler;
340 sigemptyset(&sa.sa_mask);
341 sa.sa_flags = SA_RESTART;
342 if(sigaction(SIGTERM, &sa, NULL) == -1)
343 err("sigaction: %m");
344 children=malloc(sizeof(pid_t)*child_arraysize);
345 memset(children, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
346 for(;;) { /* infinite loop */
347 if ((net = accept(sock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
350 set_peername(net,clientname) ;
351 if (!authorized_client(clientname)) {
352 msg2(LOG_INFO,"Unauthorized client") ;
356 msg2(LOG_INFO,"Authorized client") ;
357 for(i=0;children[i]&&i<child_arraysize;i++);
358 if(i>=child_arraysize) {
359 realloc(children, sizeof(pid_t)*child_arraysize);
360 memset(children+child_arraysize, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
362 child_arraysize+=DEFAULT_CHILD_ARRAY;
365 if ((children[i]=fork())<0) {
366 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
370 if (children[i]>0) { /* parent */
371 close(net) ; continue ; }
377 msg2(LOG_INFO,"Starting to serve") ;
378 serveconnection(net) ;
382 #define SEND writeit( net, &reply, sizeof( reply ));
383 #define ERROR { reply.error = htonl(-1); SEND; reply.error = 0; lastpoint = -1; }
385 off_t lastpoint = (off_t)-1;
387 void maybeseek(int handle, off_t a)
390 err("Can not happen\n");
391 if (lastpoint != a) {
392 if (lseek(handle, a, SEEK_SET) < 0)
393 err("Can not seek locally!\n");
400 void myseek(int handle,off_t a)
402 if (lseek(handle, a, SEEK_SET) < 0)
403 err("Can not seek locally!\n");
406 char pagebuf[DIFFPAGESIZE];
408 int rawexpread(off_t a, char *buf, int len)
410 maybeseek(export[a/hunksize], a%hunksize);
411 return (read(export[a/hunksize], buf, len) != len);
414 int expread(off_t a, char *buf, int len)
417 off_t mapcnt, mapl, maph, pagestart;
419 if (!(flags & F_COPYONWRITE))
420 return rawexpread(a, buf, len);
421 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
423 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
425 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
426 pagestart=mapcnt*DIFFPAGESIZE;
428 rdlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset;
429 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
430 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
431 (unsigned long)difmap[mapcnt]);
432 myseek(difffile, difmap[mapcnt]*DIFFPAGESIZE+offset);
433 if (read(difffile, buf, rdlen) != rdlen) return -1;
434 } else { /* the block is not there */
435 DEBUG2("Page %Lu is not here, we read the original one\n",
436 (unsigned long long)mapcnt);
437 return rawexpread(a, buf, rdlen);
439 len-=rdlen; a+=rdlen; buf+=rdlen;
444 int rawexpwrite(off_t a, char *buf, int len)
446 maybeseek(export[a/hunksize], a%hunksize);
447 return (write(export[a/hunksize], buf, len) != len);
451 int expwrite(off_t a, char *buf, int len)
453 u32 mapcnt,mapl,maph ; int wrlen,rdlen ;
454 off_t pagestart ; int offset ;
456 if (!(flags & F_COPYONWRITE))
457 return(rawexpwrite(a,buf,len));
458 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
460 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
462 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
463 pagestart=mapcnt*DIFFPAGESIZE ;
465 wrlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset ;
467 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
468 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
469 (unsigned long)difmap[mapcnt]) ;
470 myseek(difffile,difmap[mapcnt]*DIFFPAGESIZE+offset) ;
471 if (write(difffile, buf, wrlen) != wrlen) return -1 ;
472 } else { /* the block is not there */
473 myseek(difffile,difffilelen*DIFFPAGESIZE) ;
474 difmap[mapcnt]=difffilelen++ ;
475 DEBUG3("Page %Lu is not here, we put it at %lu\n",
476 (unsigned long long)mapcnt,
477 (unsigned long)difmap[mapcnt]);
479 if (rdlen+pagestart%hunksize>hunksize)
480 rdlen=hunksize-(pagestart%hunksize) ;
481 if (rawexpread(pagestart,pagebuf,rdlen)) return -1 ;
482 memcpy(pagebuf+offset,buf,wrlen) ;
483 if (write(difffile,pagebuf,DIFFPAGESIZE)!=DIFFPAGESIZE) return -1 ;
485 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
490 int mainloop(int net)
492 struct nbd_request request;
493 struct nbd_reply reply;
498 memset(zeros, 0, 290);
499 if (write(net, INIT_PASSWD, 8) < 0)
500 err("Negotiation failed: %m");
501 cliserv_magic = htonll(cliserv_magic);
502 if (write(net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
503 err("Negotiation failed: %m");
504 size_host = htonll(exportsize);
505 if (write(net, &size_host, 8) < 0)
506 err("Negotiation failed: %m");
507 if (write(net, zeros, 128) < 0)
508 err("Negotiation failed: %m");
510 DEBUG("Entering request loop!\n");
511 reply.magic = htonl(NBD_REPLY_MAGIC);
514 #define BUFSIZE (1024*1024)
524 readit(net, &request, sizeof(request));
525 request.from = ntohll(request.from);
526 request.type = ntohl(request.type);
528 if (request.type==2) { /* Disconnect request */
529 if (difmap) free(difmap) ;
531 close(difffile) ; unlink(difffilename) ; }
532 err("Disconnect request received.") ;
535 len = ntohl(request.len);
537 if (request.magic != htonl(NBD_REQUEST_MAGIC))
538 err("Not enough magic.");
540 err("Request too big!");
542 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
543 "READ", (unsigned long long)request.from,
544 (unsigned long long)request.from / 512, len);
546 memcpy(reply.handle, request.handle, sizeof(reply.handle));
547 if ((request.from + len) > (OFFT_MAX)) {
548 DEBUG("[Number too large!]");
552 if ((((off_t)request.from + len) > exportsize) ||
553 ((flags & F_READONLY) && request.type)) {
558 if (request.type==1) { /* WRITE */
559 DEBUG("wr: net->buf, ");
560 readit(net, buf, len);
562 if ((autoreadonly == 1) || expwrite(request.from, buf, len)) {
563 DEBUG("Write failed: %m" );
575 if (expread(request.from, buf + sizeof(struct nbd_reply), len)) {
577 DEBUG("Read failed: %m");
584 memcpy(buf, &reply, sizeof(struct nbd_reply));
585 writeit(net, buf, len + sizeof(struct nbd_reply));
590 char exportname2[1024];
592 void set_peername(int net,char *clientname)
594 struct sockaddr_in addrin;
595 int addrinlen = sizeof( addrin );
598 if (getpeername( net, (struct sockaddr *) &addrin, &addrinlen ) < 0)
599 err("getsockname failed: %m");
600 peername = inet_ntoa(addrin.sin_addr);
601 sprintf(exportname2, exportname, peername);
603 msg4(LOG_INFO, "connect from %s, assigned file is %s", peername, exportname2);
604 strncpy(clientname,peername,255) ;
607 off_t size_autodetect(int export)
611 struct stat stat_buf;
614 DEBUG("looking for export size with lseek SEEK_END\n");
615 es = lseek(export, (off_t)0, SEEK_END);
616 if (es > ((off_t)0)) {
619 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
622 DEBUG("looking for export size with fstat\n");
623 stat_buf.st_size = 0;
624 error = fstat(export, &stat_buf);
625 if (!error && stat_buf.st_size > 0) {
626 return (off_t)stat_buf.st_size;
628 err("fstat failed: %m");
632 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
633 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
634 es = (off_t)es32 * (off_t)512;
638 err("Could not find size of exported block device: %m");
642 int main(int argc, char *argv[])
647 if (sizeof( struct nbd_request )!=28) {
648 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
654 if (!port) return 1 ;
655 connectme(port); /* serve infinitely */
660 void serveconnection(int net)
664 for (i=0; i<exportsize; i+=hunksize) {
665 char exportname3[1024];
667 sprintf(exportname3, exportname2, i/hunksize);
668 printf( "Opening %s\n", exportname3 );
669 if ((export[i/hunksize] = open(exportname3, (flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
670 /* Read WRITE ACCESS was requested by media is only read only */
673 if ((export[i/hunksize] = open(exportname3, O_RDONLY)) == -1)
674 err("Could not open exported file: %m");
678 if (exportsize == (off_t)OFFT_MAX) {
679 exportsize = size_autodetect(export[0]);
681 if (exportsize > (off_t)OFFT_MAX) {
682 err("Size of exported file is too big\n");
685 msg3(LOG_INFO, "size of exported file/device is %Lu",
686 (unsigned long long)exportsize);
688 if (flags & F_COPYONWRITE) {
689 sprintf(difffilename,"%s-%s-%d.diff",exportname2,clientname,
691 msg3(LOG_INFO,"About to create map and diff file %s",difffilename) ;
692 difffile=open(difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
693 if (difffile<0) err("Could not create diff file (%m)") ;
694 if ((difmap=calloc(exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
695 err("Could not allocate memory") ;
696 for (i=0;i<exportsize/DIFFPAGESIZE;i++) difmap[i]=(u32)-1 ;