2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8 * Version 1.0 - hopefully 64-bit-clean
9 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
10 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
11 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
12 * type, or don't have 64 bit file offsets by defining FS_32BIT
13 * in compile options for nbd-server *only*. This can be done
14 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
15 * original autoconf input file, or I would make it a configure
16 * option.) Ken Yap <ken@nlc.net.au>.
17 * Version 1.6 - fix autodetection of block device size and really make 64 bit
18 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
19 * Version 2.0 - Version synchronised with client
20 * Version 2.1 - Reap zombie client processes when they exit. Removed
21 * (uncommented) the _IO magic, it's no longer necessary. Wouter
22 * Verhelst <wouter@debian.org>
23 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
24 * Version 2.3 - Fixed code so that Large File Support works. This
25 * removes the FS_32BIT compile-time directive; define
26 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
27 * using FS_32BIT. This will allow you to use files >2GB instead of
28 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
29 * Version 2.4 - Added code to keep track of children, so that we can
30 * properly kill them from initscripts. Add a call to daemon(),
31 * so that processes don't think they have to wait for us, which is
32 * interesting for initscripts as well. Wouter Verhelst
36 #define VERSION PACKAGE_VERSION
37 #define GIGA (1*1024*1024*1024)
39 #include <sys/types.h>
40 #include <sys/socket.h>
42 #include <sys/wait.h> /* wait */
43 #include <signal.h> /* sigaction */
44 #include <netinet/tcp.h>
45 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
46 #include <netdb.h> /* hostent, gethostby*, getservby* */
53 #include <arpa/inet.h>
58 #define MY_NAME "nbd_server"
60 /* Authorization file should contain lines with IP addresses of
61 clients authorized to use the server. If it does not exist,
62 access is permitted. */
63 #define AUTH_FILE "nbd_server.allow"
64 /* how much space for child PIDs we have by default. Dynamically
65 allocated, and will be realloc()ed if out of space, so this should
66 probably be fair for most situations. */
67 #define DEFAULT_CHILD_ARRAY 256
71 /* Deep magic: ioctl.h defines _IO macro (at least on linux) */
74 /* Debugging macros, now nothing goes to syslog unless you say ISSERVER */
76 #define msg2(a,b) syslog(a,b)
77 #define msg3(a,b,c) syslog(a,b,c)
78 #define msg4(a,b,c,d) syslog(a,b,c,d)
80 #define msg2(a,b) do { fprintf(stderr,b) ; fputs("\n",stderr) ; } while(0)
81 #define msg3(a,b,c) do { fprintf(stderr,b,c); fputs("\n",stderr) ; } while(0)
82 #define msg4(a,b,c,d) do { fprintf(stderr,b,c,d); fputs("\n",stderr) ; } while(0)
86 #include <sys/ioctl.h>
87 #include <sys/mount.h> /* For BLKGETSIZE */
91 #define DEBUG( a ) printf( a )
92 #define DEBUG2( a,b ) printf( a,b )
93 #define DEBUG3( a,b,c ) printf( a,b,c )
97 #define DEBUG3( a,b,c )
100 void serveconnection(int net);
101 void set_peername(int net,char *clientname);
104 char difffilename[256];
105 unsigned int timeout = 0;
106 int autoreadonly = 0;
108 int authorized_client(char *name)
109 /* 0 - authorization refused, 1 - OK
110 authorization file contains one line per machine, no wildcards
116 if ((f=fopen(AUTH_FILE,"r"))==NULL)
117 { msg4(LOG_INFO,"Can't open authorization file %s (%s).",
118 AUTH_FILE,strerror(errno)) ;
122 while (fgets(line,LINELEN,f)!=NULL) {
123 if (strncmp(line,name,strlen(name))==0) { fclose(f) ; return 1 ; }
129 inline void readit(int f, void *buf, int len)
134 if ((res = read(f, buf, len)) <= 0)
135 err("Read failed: %m");
141 inline void writeit(int f, void *buf, int len)
146 if ((res = send(f, buf, len, 0)) <= 0)
147 err("Send failed: %m");
153 /* This is starting to get ugly. If someone knows a better way to find
154 * the maximum value of a signed type *without* relying on overflow
155 * (doing so breaks on 64bit architectures), that would be nice.
157 #define OFFT_MAX (((((off_t)1)<<((sizeof(off_t)-1)*8))-1)<<7)+127
158 int port; /* Port I'm listening at */
159 char *exportname; /* File I'm exporting */
160 off_t exportsize = OFFT_MAX; /* ...and its length */
161 off_t hunksize = OFFT_MAX;
165 u32 difffilelen=0 ; /* number of pages in difffile */
167 char clientname[256] ;
168 int child_arraysize=DEFAULT_CHILD_ARRAY;
171 #define DIFFPAGESIZE 4096 /* diff file uses those chunks */
174 #define F_MULTIFILE 2
175 #define F_COPYONWRITE 4
177 void cmdline(int argc, char *argv[])
182 printf("This is nbd-server version " VERSION "\n");
183 printf("Usage: port file_to_export [size][kKmM] [-r] [-m] [-c] [-a timeout_sec]\n"
185 " -m multiple file\n"
186 " -c copy on write\n"
187 " -a maximum idle seconds, terminates when idle time exceeded\n"
188 " if port is set to 0, stdin is used (for running from inetd)\n"
189 " if file_to_export contains '%%s', it is substituted with IP\n"
190 " address of machine trying to connect\n" );
193 port = atoi(argv[1]);
194 for (i = 3; i < argc; i++) {
195 if (*argv[i] == '-') {
196 switch (argv[i][1]) {
201 flags |= F_MULTIFILE;
204 case 'c': flags |=F_COPYONWRITE;
208 timeout = atoi(argv[i+1]);
211 fprintf(stderr, "timeout requires argument\n");
217 int last = strlen(argv[i])-1;
218 char suffix = argv[i][last];
219 if (suffix == 'k' || suffix == 'K' ||
220 suffix == 'm' || suffix == 'M')
221 argv[i][last] = '\0';
222 es = (off_t)atol(argv[i]);
234 exportname = argv[2];
237 void sigchld_handler(int s)
243 while((pid=wait(status)) > 0) {
244 if(WIFEXITED(status)) {
245 msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
247 for(i=0;children[i]!=pid&&i<child_arraysize;i++);
248 if(i>=child_arraysize) {
249 msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld",(long) pid);
251 children[i]=(pid_t)0;
252 DEBUG2("Removing %d from the list of children", pid);
257 /* If we are terminated, make sure our children are, too. */
258 void sigterm_handler(int s) {
261 for(i=0;i<child_arraysize;i++) {
263 kill(children[i], s);
270 void connectme(int port)
272 struct sockaddr_in addrin;
274 int addrinlen = sizeof(addrin);
275 int net, sock, newpid, i;
282 if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
285 /* lose the pesky "Address already in use" error message */
286 if (setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
290 DEBUG("Waiting for connections... bind, ");
291 addrin.sin_family = AF_INET;
292 addrin.sin_port = htons(port);
293 addrin.sin_addr.s_addr = 0;
294 if (bind(sock, (struct sockaddr *) &addrin, addrinlen) < 0)
297 if (listen(sock, 1) < 0)
300 sa.sa_handler = sigchld_handler;
301 sigemptyset(&sa.sa_mask);
302 sa.sa_flags = SA_RESTART;
303 if(sigaction(SIGCHLD, &sa, NULL) == -1)
304 err("sigaction: %m");
305 sa.sa_handler = sigterm_handler;
306 sigemptyset(&sa.sa_mask);
307 sa.sa_flags = SA_RESTART;
308 if(sigaction(SIGTERM, &sa, NULL) == -1)
309 err("sigaction: %m");
310 children=malloc(sizeof(pid_t)*child_arraysize);
311 memset(children, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
318 #endif /* NODAEMON */
319 for(;;) { /* infinite loop */
320 if ((net = accept(sock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
323 set_peername(net,clientname) ;
324 if (!authorized_client(clientname)) {
325 msg2(LOG_INFO,"Unauthorized client") ;
329 msg2(LOG_INFO,"Authorized client") ;
330 for(i=0;children[i]&&i<child_arraysize;i++);
331 if(i>=child_arraysize) {
332 realloc(children, sizeof(pid_t)*child_arraysize);
333 memset(children+child_arraysize, 0, sizeof(pid_t)*DEFAULT_CHILD_ARRAY);
335 child_arraysize+=DEFAULT_CHILD_ARRAY;
338 if ((children[i]=fork())<0) {
339 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
343 if (children[i]>0) { /* parent */
344 close(net) ; continue ; }
349 msg2(LOG_INFO,"Starting to serve") ;
350 serveconnection(net) ;
354 #define SEND writeit( net, &reply, sizeof( reply ));
355 #define ERROR { reply.error = htonl(-1); SEND; reply.error = 0; lastpoint = -1; }
357 off_t lastpoint = (off_t)-1;
359 void maybeseek(int handle, off_t a)
362 err("Can not happen\n");
363 if (lastpoint != a) {
364 if (lseek(handle, a, SEEK_SET) < 0)
365 err("Can not seek locally!\n");
372 void myseek(int handle,off_t a)
374 if (lseek(handle, a, SEEK_SET) < 0)
375 err("Can not seek locally!\n");
378 char pagebuf[DIFFPAGESIZE];
380 int rawexpread(off_t a, char *buf, int len)
382 maybeseek(export[a/hunksize], a%hunksize);
383 return (read(export[a/hunksize], buf, len) != len);
386 int expread(off_t a, char *buf, int len)
389 off_t mapcnt, mapl, maph, pagestart;
391 if (!(flags & F_COPYONWRITE))
392 return rawexpread(a, buf, len);
393 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
395 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
397 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
398 pagestart=mapcnt*DIFFPAGESIZE;
400 rdlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset;
401 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
402 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
403 (unsigned long)difmap[mapcnt]);
404 myseek(difffile, difmap[mapcnt]*DIFFPAGESIZE+offset);
405 if (read(difffile, buf, rdlen) != rdlen) return -1;
406 } else { /* the block is not there */
407 DEBUG2("Page %Lu is not here, we read the original one\n",
408 (unsigned long long)mapcnt);
409 return rawexpread(a, buf, rdlen);
411 len-=rdlen; a+=rdlen; buf+=rdlen;
416 int rawexpwrite(off_t a, char *buf, int len)
418 maybeseek(export[a/hunksize], a%hunksize);
419 return (write(export[a/hunksize], buf, len) != len);
423 int expwrite(off_t a, char *buf, int len)
425 u32 mapcnt,mapl,maph ; int wrlen,rdlen ;
426 off_t pagestart ; int offset ;
428 if (!(flags & F_COPYONWRITE))
429 return(rawexpwrite(a,buf,len));
430 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
432 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
434 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
435 pagestart=mapcnt*DIFFPAGESIZE ;
437 wrlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset ;
439 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
440 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
441 (unsigned long)difmap[mapcnt]) ;
442 myseek(difffile,difmap[mapcnt]*DIFFPAGESIZE+offset) ;
443 if (write(difffile, buf, wrlen) != wrlen) return -1 ;
444 } else { /* the block is not there */
445 myseek(difffile,difffilelen*DIFFPAGESIZE) ;
446 difmap[mapcnt]=difffilelen++ ;
447 DEBUG3("Page %Lu is not here, we put it at %lu\n",
448 (unsigned long long)mapcnt,
449 (unsigned long)difmap[mapcnt]);
451 if (rdlen+pagestart%hunksize>hunksize)
452 rdlen=hunksize-(pagestart%hunksize) ;
453 if (rawexpread(pagestart,pagebuf,rdlen)) return -1 ;
454 memcpy(pagebuf+offset,buf,wrlen) ;
455 if (write(difffile,pagebuf,DIFFPAGESIZE)!=DIFFPAGESIZE) return -1 ;
457 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
462 int mainloop(int net)
464 struct nbd_request request;
465 struct nbd_reply reply;
470 memset(zeros, 0, 290);
471 if (write(net, INIT_PASSWD, 8) < 0)
472 err("Negotiation failed: %m");
473 cliserv_magic = htonll(cliserv_magic);
474 if (write(net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
475 err("Negotiation failed: %m");
476 size_host = htonll(exportsize);
477 if (write(net, &size_host, 8) < 0)
478 err("Negotiation failed: %m");
479 if (write(net, zeros, 128) < 0)
480 err("Negotiation failed: %m");
482 DEBUG("Entering request loop!\n");
483 reply.magic = htonl(NBD_REPLY_MAGIC);
486 #define BUFSIZE (1024*1024)
496 readit(net, &request, sizeof(request));
497 request.from = ntohll(request.from);
498 request.type = ntohl(request.type);
500 if (request.type==2) { /* Disconnect request */
501 if (difmap) free(difmap) ;
503 close(difffile) ; unlink(difffilename) ; }
504 err("Disconnect request received.") ;
507 len = ntohl(request.len);
509 if (request.magic != htonl(NBD_REQUEST_MAGIC))
510 err("Not enough magic.");
512 err("Request too big!");
514 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
515 "READ", (unsigned long long)request.from,
516 (unsigned long long)request.from / 512, len);
518 memcpy(reply.handle, request.handle, sizeof(reply.handle));
519 if ((request.from + len) > (OFFT_MAX)) {
520 DEBUG("[Number too large!]");
524 if ((((off_t)request.from + len) > exportsize) ||
525 ((flags & F_READONLY) && request.type)) {
530 if (request.type==1) { /* WRITE */
531 DEBUG("wr: net->buf, ");
532 readit(net, buf, len);
534 if ((autoreadonly == 1) || expwrite(request.from, buf, len)) {
535 DEBUG("Write failed: %m" );
547 if (expread(request.from, buf + sizeof(struct nbd_reply), len)) {
549 DEBUG("Read failed: %m");
556 memcpy(buf, &reply, sizeof(struct nbd_reply));
557 writeit(net, buf, len + sizeof(struct nbd_reply));
562 char exportname2[1024];
564 void set_peername(int net,char *clientname)
566 struct sockaddr_in addrin;
567 int addrinlen = sizeof( addrin );
570 if (getpeername( net, (struct sockaddr *) &addrin, &addrinlen ) < 0)
571 err("getsockname failed: %m");
572 peername = inet_ntoa(addrin.sin_addr);
573 sprintf(exportname2, exportname, peername);
575 msg4(LOG_INFO, "connect from %s, assigned file is %s", peername, exportname2);
576 strncpy(clientname,peername,255) ;
579 off_t size_autodetect(int export)
583 struct stat stat_buf;
586 DEBUG("looking for export size with lseek SEEK_END\n");
587 es = lseek(export, (off_t)0, SEEK_END);
588 if (es > ((off_t)0)) {
591 DEBUG2("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
594 DEBUG("looking for export size with fstat\n");
595 stat_buf.st_size = 0;
596 error = fstat(export, &stat_buf);
597 if (!error && stat_buf.st_size > 0) {
598 return (off_t)stat_buf.st_size;
600 err("fstat failed: %m");
604 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
605 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
606 es = (off_t)es32 * (off_t)512;
610 err("Could not find size of exported block device: %m");
614 int main(int argc, char *argv[])
619 if (sizeof( struct nbd_request )!=28) {
620 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
626 if (!port) return 1 ;
627 connectme(port); /* serve infinitely */
632 void serveconnection(int net)
636 for (i=0; i<exportsize; i+=hunksize) {
637 char exportname3[1024];
639 sprintf(exportname3, exportname2, i/hunksize);
640 printf( "Opening %s\n", exportname3 );
641 if ((export[i/hunksize] = open(exportname3, (flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
642 /* Read WRITE ACCESS was requested by media is only read only */
645 if ((export[i/hunksize] = open(exportname3, O_RDONLY)) == -1)
646 err("Could not open exported file: %m");
650 if (exportsize == (off_t)OFFT_MAX) {
651 exportsize = size_autodetect(export[0]);
653 if (exportsize > (off_t)OFFT_MAX) {
654 err("Size of exported file is too big\n");
657 msg3(LOG_INFO, "size of exported file/device is %Lu",
658 (unsigned long long)exportsize);
660 if (flags & F_COPYONWRITE) {
661 sprintf(difffilename,"%s-%s-%d.diff",exportname2,clientname,
663 msg3(LOG_INFO,"About to create map and diff file %s",difffilename) ;
664 difffile=open(difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
665 if (difffile<0) err("Could not create diff file (%m)") ;
666 if ((difmap=calloc(exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
667 err("Could not allocate memory") ;
668 for (i=0;i<exportsize/DIFFPAGESIZE;i++) difmap[i]=(u32)-1 ;