2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8 * Version 1.0 - hopefully 64-bit-clean
9 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
10 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
11 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
12 * type, or don't have 64 bit file offsets by defining FS_32BIT
13 * in compile options for nbd-server *only*. This can be done
14 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
15 * original autoconf input file, or I would make it a configure
16 * option.) Ken Yap <ken@nlc.net.au>.
17 * Version 1.6 - fix autodetection of block device size and really make 64 bit
18 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
19 * Version 2.0 - Version synchronised with client
20 * Version 2.1 - Reap zombie client processes when they exit. Removed
21 * (uncommented) the _IO magic, it's no longer necessary.
22 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
26 #define GIGA (1*1024*1024*1024)
28 #include <sys/types.h>
29 #include <sys/socket.h>
31 #include <sys/wait.h> /* wait */
32 #include <signal.h> /* sigaction */
33 #include <netinet/tcp.h>
34 #include <netinet/in.h> /* sockaddr_in, htons, in_addr */
35 #include <netdb.h> /* hostent, gethostby*, getservby* */
42 #include <arpa/inet.h>
47 #define MY_NAME "nbd_server"
49 /* Authorization file should contain lines with IP addresses of
50 clients authorized to use the server. If it does not exist,
51 access is permitted. */
52 #define AUTH_FILE "nbd_server.allow"
56 /* Deep magic: ioctl.h defines _IO macro (at least on linux) */
59 /* Debugging macros, now nothing goes to syslog unless you say ISSERVER */
61 #define msg2(a,b) syslog(a,b)
62 #define msg3(a,b,c) syslog(a,b,c)
63 #define msg4(a,b,c,d) syslog(a,b,c,d)
65 #define msg2(a,b) do { fprintf(stderr,b) ; fputs("\n",stderr) ; } while(0)
66 #define msg3(a,b,c) do { fprintf(stderr,b,c); fputs("\n",stderr) ; } while(0)
67 #define msg4(a,b,c,d) do { fprintf(stderr,b,c,d); fputs("\n",stderr) ; } while(0)
71 #include <sys/ioctl.h>
72 #include <sys/mount.h> /* For BLKGETSIZE */
75 typedef u32 fsoffset_t;
79 typedef u64 fsoffset_t;
85 #define DEBUG( a ) printf( a )
86 #define DEBUG2( a,b ) printf( a,b )
87 #define DEBUG3( a,b,c ) printf( a,b,c )
91 #define DEBUG3( a,b,c )
94 #if defined(HAVE_LLSEEK) && !defined(sun)
95 /* Solaris already has llseek defined in unistd.h */
96 extern long long llseek(unsigned int, long long, unsigned int);
99 void serveconnection(int net);
100 void set_peername(int net,char *clientname);
103 char difffilename[256];
104 unsigned int timeout = 0;
105 int autoreadonly = 0;
107 int authorized_client(char *name)
108 /* 0 - authorization refused, 1 - OK
109 authorization file contains one line per machine, no wildcards
115 if ((f=fopen(AUTH_FILE,"r"))==NULL)
116 { msg4(LOG_INFO,"Can't open authorization file %s (%s).",
117 AUTH_FILE,strerror(errno)) ;
121 while (fgets(line,LINELEN,f)!=NULL) {
122 if (strncmp(line,name,strlen(name))==0) { fclose(f) ; return 1 ; }
129 inline void readit(int f, void *buf, int len)
134 if ((res = read(f, buf, len)) <= 0)
135 err("Read failed: %m");
141 inline void writeit(int f, void *buf, int len)
146 if ((res = write(f, buf, len)) <= 0)
147 err("Write failed: %m");
153 int port; /* Port I'm listening at */
154 char *exportname; /* File I'm exporting */
155 fsoffset_t exportsize = (fsoffset_t)-1; /* ...and its length */
156 fsoffset_t hunksize = (fsoffset_t)-1;
160 u32 difffilelen=0 ; /* number of pages in difffile */
162 char clientname[256] ;
165 #define DIFFPAGESIZE 4096 /* diff file uses those chunks */
168 #define F_MULTIFILE 2
169 #define F_COPYONWRITE 4
171 void cmdline(int argc, char *argv[])
176 printf("This is nbd-server version " VERSION "\n");
177 printf("Usage: port file_to_export [size][kKmM] [-r] [-m] [-c] [-a timeout_sec]\n"
179 " -m multiple file\n"
180 " -c copy on write\n"
181 " -a maximum idle seconds, terminates when idle time exceeded\n"
182 " if port is set to 0, stdin is used (for running from inetd)\n"
183 " if file_to_export contains '%%s', it is substituted with IP\n"
184 " address of machine trying to connect\n" );
187 port = atoi(argv[1]);
188 for (i = 3; i < argc; i++) {
189 if (*argv[i] == '-') {
190 switch (argv[i][1]) {
195 flags |= F_MULTIFILE;
198 case 'c': flags |=F_COPYONWRITE;
202 timeout = atoi(argv[i+1]);
205 fprintf(stderr, "timeout requires argument\n");
211 int last = strlen(argv[i])-1;
212 char suffix = argv[i][last];
213 if (suffix == 'k' || suffix == 'K' ||
214 suffix == 'm' || suffix == 'M')
215 argv[i][last] = '\0';
216 es = (fsoffset_t)atol(argv[i]);
228 exportname = argv[2];
231 void sigchld_handler(int s)
233 while(wait(NULL) > 0);
236 void connectme(int port)
238 struct sockaddr_in addrin;
240 int addrinlen = sizeof(addrin);
241 int net, sock, newpid;
248 if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
251 /* lose the pesky "Address already in use" error message */
252 if (setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
256 DEBUG("Waiting for connections... bind, ");
257 addrin.sin_family = AF_INET;
258 addrin.sin_port = htons(port);
259 addrin.sin_addr.s_addr = 0;
260 if (bind(sock, (struct sockaddr *) &addrin, addrinlen) < 0)
263 if (listen(sock, 1) < 0)
266 sa.sa_handler = sigchld_handler;
267 sigemptyset(&sa.sa_mask);
268 sa.sa_flags = SA_RESTART;
269 if(sigaction(SIGCHLD, &sa, NULL) == -1)
270 err("sigaction: %m");
271 for(;;) { /* infinite loop */
272 if ((net = accept(sock, (struct sockaddr *) &addrin, &addrinlen)) < 0)
275 set_peername(net,clientname) ;
276 if (!authorized_client(clientname)) {
277 msg2(LOG_INFO,"Unauthorized client") ;
281 msg2(LOG_INFO,"Authorized client") ;
282 if ((newpid=fork())<0) {
283 msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
287 if (newpid>0) { /* parent */
288 close(net) ; continue ; }
291 msg2(LOG_INFO,"Starting to serve") ;
292 serveconnection(net) ;
296 #define SEND writeit( net, &reply, sizeof( reply ));
297 #define ERROR { reply.error = htonl(-1); SEND; reply.error = 0; lastpoint = -1; }
299 fsoffset_t lastpoint = (fsoffset_t)-1;
301 void maybeseek(int handle, fsoffset_t a)
304 err("Can not happen\n");
305 if (lastpoint != a) {
306 #if defined(HAVE_LLSEEK) && !defined(FS_32BIT)
307 if (llseek(handle, a, SEEK_SET) < 0)
309 if (lseek(handle, (long)a, SEEK_SET) < 0)
311 err("Can not seek locally!\n");
318 void myseek(int handle,fsoffset_t a)
320 #if HAVE_LLSEEK && !defined(FS_32BIT)
321 if (llseek(handle, a, SEEK_SET) < 0)
323 if (lseek(handle, (long)a, SEEK_SET) < 0)
325 err("Can not seek locally!\n");
328 char pagebuf[DIFFPAGESIZE];
330 int rawexpread(fsoffset_t a, char *buf, int len)
332 maybeseek(export[a/hunksize], a%hunksize);
333 return (read(export[a/hunksize], buf, len) != len);
336 int expread(fsoffset_t a, char *buf, int len)
339 fsoffset_t mapcnt, mapl, maph, pagestart;
341 if (!(flags & F_COPYONWRITE))
342 return rawexpread(a, buf, len);
343 DEBUG3("Asked to read %d bytes at %Lu.\n", len, (unsigned long long)a);
345 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
347 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
348 pagestart=mapcnt*DIFFPAGESIZE;
350 rdlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset;
351 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
352 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
353 (unsigned long)difmap[mapcnt]);
354 myseek(difffile, difmap[mapcnt]*DIFFPAGESIZE+offset);
355 if (read(difffile, buf, rdlen) != rdlen) return -1;
356 } else { /* the block is not there */
357 DEBUG2("Page %Lu is not here, we read the original one\n",
358 (unsigned long long)mapcnt);
359 return rawexpread(a, buf, rdlen);
361 len-=rdlen; a+=rdlen; buf+=rdlen;
366 int rawexpwrite(fsoffset_t a, char *buf, int len)
368 maybeseek(export[a/hunksize], a%hunksize);
369 return (write(export[a/hunksize], buf, len) != len);
373 int expwrite(fsoffset_t a, char *buf, int len)
375 u32 mapcnt,mapl,maph ; int wrlen,rdlen ;
376 fsoffset_t pagestart ; int offset ;
378 if (!(flags & F_COPYONWRITE))
379 return(rawexpwrite(a,buf,len));
380 DEBUG3("Asked to write %d bytes at %Lu.\n", len, (unsigned long long)a);
382 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
384 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
385 pagestart=mapcnt*DIFFPAGESIZE ;
387 wrlen=(len<DIFFPAGESIZE-offset) ? len : DIFFPAGESIZE-offset ;
389 if (difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
390 DEBUG3("Page %Lu is at %lu\n", (unsigned long long)mapcnt,
391 (unsigned long)difmap[mapcnt]) ;
392 myseek(difffile,difmap[mapcnt]*DIFFPAGESIZE+offset) ;
393 if (write(difffile, buf, wrlen) != wrlen) return -1 ;
394 } else { /* the block is not there */
395 myseek(difffile,difffilelen*DIFFPAGESIZE) ;
396 difmap[mapcnt]=difffilelen++ ;
397 DEBUG3("Page %Lu is not here, we put it at %lu\n",
398 (unsigned long long)mapcnt,
399 (unsigned long)difmap[mapcnt]);
401 if (rdlen+pagestart%hunksize>hunksize)
402 rdlen=hunksize-(pagestart%hunksize) ;
403 if (rawexpread(pagestart,pagebuf,rdlen)) return -1 ;
404 memcpy(pagebuf+offset,buf,wrlen) ;
405 if (write(difffile,pagebuf,DIFFPAGESIZE)!=DIFFPAGESIZE) return -1 ;
407 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
412 int mainloop(int net)
414 struct nbd_request request;
415 struct nbd_reply reply;
418 fsoffset_t size_host;
420 memset(zeros, 0, 290);
421 if (write(net, INIT_PASSWD, 8) < 0)
422 err("Negotiation failed: %m");
424 cliserv_magic = htonll(cliserv_magic);
426 if (write(net, &cliserv_magic, sizeof(cliserv_magic)) < 0)
427 err("Negotiation failed: %m");
428 size_host = htonll(exportsize);
430 if (write(net, zeros, 4) < 0 || write(net, &size_host, 4) < 0)
432 if (write(net, &size_host, 8) < 0)
434 err("Negotiation failed: %m");
435 if (write(net, zeros, 128) < 0)
436 err("Negotiation failed: %m");
438 DEBUG("Entering request loop!\n");
439 reply.magic = htonl(NBD_REPLY_MAGIC);
442 #define BUFSIZE (1024*1024)
453 readit(net, &request, sizeof(request));
454 request.from = ntohll(request.from);
455 request.type = ntohl(request.type);
457 if (request.type==2) { /* Disconnect request */
458 if (difmap) free(difmap) ;
460 close(difffile) ; unlink(difffilename) ; }
461 err("Disconnect request received.") ;
464 len = ntohl(request.len);
466 if (request.magic != htonl(NBD_REQUEST_MAGIC))
467 err("Not enough magic.");
469 err("Request too big!");
471 printf("%s from %Lu (%Lu) len %d, ", request.type ? "WRITE" :
472 "READ", (unsigned long long)request.from,
473 (unsigned long long)request.from / 512, len);
475 memcpy(reply.handle, request.handle, sizeof(reply.handle));
476 if (((request.from + len) > exportsize) ||
477 ((flags & F_READONLY) && request.type)) {
482 if (request.type==1) { /* WRITE */
483 DEBUG("wr: net->buf, ");
484 readit(net, buf, len);
486 if ((autoreadonly == 1) || expwrite(request.from, buf, len)) {
487 DEBUG("Write failed: %m" );
498 if (expread(request.from, buf + sizeof(struct nbd_reply), len)) {
500 DEBUG("Read failed: %m");
507 memcpy(buf, &reply, sizeof(struct nbd_reply));
508 writeit(net, buf, len + sizeof(struct nbd_reply));
513 char exportname2[1024];
515 void set_peername(int net,char *clientname)
517 struct sockaddr_in addrin;
518 int addrinlen = sizeof( addrin );
521 if (getpeername( net, (struct sockaddr *) &addrin, &addrinlen ) < 0)
522 err("getsockname failed: %m");
523 peername = inet_ntoa(addrin.sin_addr);
524 sprintf(exportname2, exportname, peername);
526 msg4(LOG_INFO, "connect from %s, assigned file is %s", peername, exportname2);
527 strncpy(clientname,peername,255) ;
530 fsoffset_t size_autodetect(int export)
534 struct stat stat_buf;
537 DEBUG("looking for export size with lseek SEEK_END\n");
538 es = (fsoffset_t)lseek(export, 0, SEEK_END);
539 if ((signed long long)es > 0LL)
542 DEBUG("looking for export size with fstat\n");
543 stat_buf.st_size = 0;
544 error = fstat(export, &stat_buf);
545 if (!error && stat_buf.st_size > 0)
546 return (fsoffset_t)stat_buf.st_size;
549 DEBUG("looking for export size with ioctl BLKGETSIZE\n");
550 if (!ioctl(export, BLKGETSIZE, &es32) && es32) {
551 es = (fsoffset_t)es32 * (fsoffset_t)512;
555 err("Could not find size of exported block device: %m");
556 return (fsoffset_t)-1;
559 int main(int argc, char *argv[])
564 if (sizeof( struct nbd_request )!=28) {
565 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
571 if (!port) return 1 ;
572 connectme(port); /* serve infinitely */
577 void serveconnection(int net)
581 for (i=0; i<exportsize; i+=hunksize) {
582 char exportname3[1024];
584 sprintf(exportname3, exportname2, i/hunksize);
585 printf( "Opening %s\n", exportname3 );
586 if ((export[i/hunksize] = open(exportname3, (flags & F_READONLY) ? O_RDONLY : O_RDWR)) == -1) {
587 /* Read WRITE ACCESS was requested by media is only read only */
590 if ((export[i/hunksize] = open(exportname3, O_RDONLY)) == -1)
591 err("Could not open exported file: %m");
595 if (exportsize == (fsoffset_t)-1) {
596 exportsize = size_autodetect(export[0]);
598 if (exportsize > ((fsoffset_t)-1 >> 1)) {
600 if ((exportsize >> 10) > ((fsoffset_t)-1 >> 1))
601 msg3(LOG_INFO, "size of exported file/device is %LuMB",
602 (unsigned long long)(exportsize >> 20));
604 msg3(LOG_INFO, "size of exported file/device is %LuKB",
605 (unsigned long long)(exportsize >> 10));
608 err("Size of exported file is too big\n");
612 msg3(LOG_INFO, "size of exported file/device is %Lu",
613 (unsigned long long)exportsize);
615 if (flags & F_COPYONWRITE) {
616 sprintf(difffilename,"%s-%s-%d.diff",exportname2,clientname,
618 msg3(LOG_INFO,"About to create map and diff file %s",difffilename) ;
619 difffile=open(difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
620 if (difffile<0) err("Could not create diff file (%m)") ;
621 if ((difmap=calloc(exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
622 err("Could not allocate memory") ;
623 for (i=0;i<exportsize/DIFFPAGESIZE;i++) difmap[i]=(u32)-1 ;