Linux-2.6.12-rc2
[linux-flexiantxendom0-natty.git] / net / core / netpoll.c
1 /*
2  * Common framework for low-level network console, dump, and debugger code
3  *
4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5  *
6  * based on the netconsole code from:
7  *
8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9  * Copyright (C) 2002  Red Hat, Inc.
10  */
11
12 #include <linux/smp_lock.h>
13 #include <linux/netdevice.h>
14 #include <linux/etherdevice.h>
15 #include <linux/string.h>
16 #include <linux/inetdevice.h>
17 #include <linux/inet.h>
18 #include <linux/interrupt.h>
19 #include <linux/netpoll.h>
20 #include <linux/sched.h>
21 #include <linux/delay.h>
22 #include <linux/rcupdate.h>
23 #include <linux/workqueue.h>
24 #include <net/tcp.h>
25 #include <net/udp.h>
26 #include <asm/unaligned.h>
27
28 /*
29  * We maintain a small pool of fully-sized skbs, to make sure the
30  * message gets out even in extreme OOM situations.
31  */
32
33 #define MAX_UDP_CHUNK 1460
34 #define MAX_SKBS 32
35 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
36
37 static DEFINE_SPINLOCK(skb_list_lock);
38 static int nr_skbs;
39 static struct sk_buff *skbs;
40
41 static DEFINE_SPINLOCK(queue_lock);
42 static int queue_depth;
43 static struct sk_buff *queue_head, *queue_tail;
44
45 static atomic_t trapped;
46
47 #define NETPOLL_RX_ENABLED  1
48 #define NETPOLL_RX_DROP     2
49
50 #define MAX_SKB_SIZE \
51                 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
52                                 sizeof(struct iphdr) + sizeof(struct ethhdr))
53
54 static void zap_completion_queue(void);
55
56 static void queue_process(void *p)
57 {
58         unsigned long flags;
59         struct sk_buff *skb;
60
61         while (queue_head) {
62                 spin_lock_irqsave(&queue_lock, flags);
63
64                 skb = queue_head;
65                 queue_head = skb->next;
66                 if (skb == queue_tail)
67                         queue_head = NULL;
68
69                 queue_depth--;
70
71                 spin_unlock_irqrestore(&queue_lock, flags);
72
73                 dev_queue_xmit(skb);
74         }
75 }
76
77 static DECLARE_WORK(send_queue, queue_process, NULL);
78
79 void netpoll_queue(struct sk_buff *skb)
80 {
81         unsigned long flags;
82
83         if (queue_depth == MAX_QUEUE_DEPTH) {
84                 __kfree_skb(skb);
85                 return;
86         }
87
88         spin_lock_irqsave(&queue_lock, flags);
89         if (!queue_head)
90                 queue_head = skb;
91         else
92                 queue_tail->next = skb;
93         queue_tail = skb;
94         queue_depth++;
95         spin_unlock_irqrestore(&queue_lock, flags);
96
97         schedule_work(&send_queue);
98 }
99
100 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
101                              unsigned short ulen, u32 saddr, u32 daddr)
102 {
103         if (uh->check == 0)
104                 return 0;
105
106         if (skb->ip_summed == CHECKSUM_HW)
107                 return csum_tcpudp_magic(
108                         saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
109
110         skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
111
112         return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
113 }
114
115 /*
116  * Check whether delayed processing was scheduled for our NIC. If so,
117  * we attempt to grab the poll lock and use ->poll() to pump the card.
118  * If this fails, either we've recursed in ->poll() or it's already
119  * running on another CPU.
120  *
121  * Note: we don't mask interrupts with this lock because we're using
122  * trylock here and interrupts are already disabled in the softirq
123  * case. Further, we test the poll_owner to avoid recursion on UP
124  * systems where the lock doesn't exist.
125  *
126  * In cases where there is bi-directional communications, reading only
127  * one message at a time can lead to packets being dropped by the
128  * network adapter, forcing superfluous retries and possibly timeouts.
129  * Thus, we set our budget to greater than 1.
130  */
131 static void poll_napi(struct netpoll *np)
132 {
133         int budget = 16;
134
135         if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
136             np->poll_owner != smp_processor_id() &&
137             spin_trylock(&np->poll_lock)) {
138                 np->rx_flags |= NETPOLL_RX_DROP;
139                 atomic_inc(&trapped);
140
141                 np->dev->poll(np->dev, &budget);
142
143                 atomic_dec(&trapped);
144                 np->rx_flags &= ~NETPOLL_RX_DROP;
145                 spin_unlock(&np->poll_lock);
146         }
147 }
148
149 void netpoll_poll(struct netpoll *np)
150 {
151         if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
152                 return;
153
154         /* Process pending work on NIC */
155         np->dev->poll_controller(np->dev);
156         if (np->dev->poll)
157                 poll_napi(np);
158
159         zap_completion_queue();
160 }
161
162 static void refill_skbs(void)
163 {
164         struct sk_buff *skb;
165         unsigned long flags;
166
167         spin_lock_irqsave(&skb_list_lock, flags);
168         while (nr_skbs < MAX_SKBS) {
169                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
170                 if (!skb)
171                         break;
172
173                 skb->next = skbs;
174                 skbs = skb;
175                 nr_skbs++;
176         }
177         spin_unlock_irqrestore(&skb_list_lock, flags);
178 }
179
180 static void zap_completion_queue(void)
181 {
182         unsigned long flags;
183         struct softnet_data *sd = &get_cpu_var(softnet_data);
184
185         if (sd->completion_queue) {
186                 struct sk_buff *clist;
187
188                 local_irq_save(flags);
189                 clist = sd->completion_queue;
190                 sd->completion_queue = NULL;
191                 local_irq_restore(flags);
192
193                 while (clist != NULL) {
194                         struct sk_buff *skb = clist;
195                         clist = clist->next;
196                         if(skb->destructor)
197                                 dev_kfree_skb_any(skb); /* put this one back */
198                         else
199                                 __kfree_skb(skb);
200                 }
201         }
202
203         put_cpu_var(softnet_data);
204 }
205
206 static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
207 {
208         int once = 1, count = 0;
209         unsigned long flags;
210         struct sk_buff *skb = NULL;
211
212         zap_completion_queue();
213 repeat:
214         if (nr_skbs < MAX_SKBS)
215                 refill_skbs();
216
217         skb = alloc_skb(len, GFP_ATOMIC);
218
219         if (!skb) {
220                 spin_lock_irqsave(&skb_list_lock, flags);
221                 skb = skbs;
222                 if (skb) {
223                         skbs = skb->next;
224                         skb->next = NULL;
225                         nr_skbs--;
226                 }
227                 spin_unlock_irqrestore(&skb_list_lock, flags);
228         }
229
230         if(!skb) {
231                 count++;
232                 if (once && (count == 1000000)) {
233                         printk("out of netpoll skbs!\n");
234                         once = 0;
235                 }
236                 netpoll_poll(np);
237                 goto repeat;
238         }
239
240         atomic_set(&skb->users, 1);
241         skb_reserve(skb, reserve);
242         return skb;
243 }
244
245 static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
246 {
247         int status;
248
249 repeat:
250         if(!np || !np->dev || !netif_running(np->dev)) {
251                 __kfree_skb(skb);
252                 return;
253         }
254
255         /* avoid recursion */
256         if(np->poll_owner == smp_processor_id() ||
257            np->dev->xmit_lock_owner == smp_processor_id()) {
258                 if (np->drop)
259                         np->drop(skb);
260                 else
261                         __kfree_skb(skb);
262                 return;
263         }
264
265         spin_lock(&np->dev->xmit_lock);
266         np->dev->xmit_lock_owner = smp_processor_id();
267
268         /*
269          * network drivers do not expect to be called if the queue is
270          * stopped.
271          */
272         if (netif_queue_stopped(np->dev)) {
273                 np->dev->xmit_lock_owner = -1;
274                 spin_unlock(&np->dev->xmit_lock);
275
276                 netpoll_poll(np);
277                 goto repeat;
278         }
279
280         status = np->dev->hard_start_xmit(skb, np->dev);
281         np->dev->xmit_lock_owner = -1;
282         spin_unlock(&np->dev->xmit_lock);
283
284         /* transmit busy */
285         if(status) {
286                 netpoll_poll(np);
287                 goto repeat;
288         }
289 }
290
291 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
292 {
293         int total_len, eth_len, ip_len, udp_len;
294         struct sk_buff *skb;
295         struct udphdr *udph;
296         struct iphdr *iph;
297         struct ethhdr *eth;
298
299         udp_len = len + sizeof(*udph);
300         ip_len = eth_len = udp_len + sizeof(*iph);
301         total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
302
303         skb = find_skb(np, total_len, total_len - len);
304         if (!skb)
305                 return;
306
307         memcpy(skb->data, msg, len);
308         skb->len += len;
309
310         udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
311         udph->source = htons(np->local_port);
312         udph->dest = htons(np->remote_port);
313         udph->len = htons(udp_len);
314         udph->check = 0;
315
316         iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
317
318         /* iph->version = 4; iph->ihl = 5; */
319         put_unaligned(0x45, (unsigned char *)iph);
320         iph->tos      = 0;
321         put_unaligned(htons(ip_len), &(iph->tot_len));
322         iph->id       = 0;
323         iph->frag_off = 0;
324         iph->ttl      = 64;
325         iph->protocol = IPPROTO_UDP;
326         iph->check    = 0;
327         put_unaligned(htonl(np->local_ip), &(iph->saddr));
328         put_unaligned(htonl(np->remote_ip), &(iph->daddr));
329         iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
330
331         eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
332
333         eth->h_proto = htons(ETH_P_IP);
334         memcpy(eth->h_source, np->local_mac, 6);
335         memcpy(eth->h_dest, np->remote_mac, 6);
336
337         skb->dev = np->dev;
338
339         netpoll_send_skb(np, skb);
340 }
341
342 static void arp_reply(struct sk_buff *skb)
343 {
344         struct arphdr *arp;
345         unsigned char *arp_ptr;
346         int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
347         u32 sip, tip;
348         struct sk_buff *send_skb;
349         struct netpoll *np = skb->dev->np;
350
351         if (!np) return;
352
353         /* No arp on this interface */
354         if (skb->dev->flags & IFF_NOARP)
355                 return;
356
357         if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
358                                  (2 * skb->dev->addr_len) +
359                                  (2 * sizeof(u32)))))
360                 return;
361
362         skb->h.raw = skb->nh.raw = skb->data;
363         arp = skb->nh.arph;
364
365         if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
366              arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
367             arp->ar_pro != htons(ETH_P_IP) ||
368             arp->ar_op != htons(ARPOP_REQUEST))
369                 return;
370
371         arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
372         memcpy(&sip, arp_ptr, 4);
373         arp_ptr += 4 + skb->dev->addr_len;
374         memcpy(&tip, arp_ptr, 4);
375
376         /* Should we ignore arp? */
377         if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
378                 return;
379
380         size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
381         send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
382                             LL_RESERVED_SPACE(np->dev));
383
384         if (!send_skb)
385                 return;
386
387         send_skb->nh.raw = send_skb->data;
388         arp = (struct arphdr *) skb_put(send_skb, size);
389         send_skb->dev = skb->dev;
390         send_skb->protocol = htons(ETH_P_ARP);
391
392         /* Fill the device header for the ARP frame */
393
394         if (np->dev->hard_header &&
395             np->dev->hard_header(send_skb, skb->dev, ptype,
396                                        np->remote_mac, np->local_mac,
397                                        send_skb->len) < 0) {
398                 kfree_skb(send_skb);
399                 return;
400         }
401
402         /*
403          * Fill out the arp protocol part.
404          *
405          * we only support ethernet device type,
406          * which (according to RFC 1390) should always equal 1 (Ethernet).
407          */
408
409         arp->ar_hrd = htons(np->dev->type);
410         arp->ar_pro = htons(ETH_P_IP);
411         arp->ar_hln = np->dev->addr_len;
412         arp->ar_pln = 4;
413         arp->ar_op = htons(type);
414
415         arp_ptr=(unsigned char *)(arp + 1);
416         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
417         arp_ptr += np->dev->addr_len;
418         memcpy(arp_ptr, &tip, 4);
419         arp_ptr += 4;
420         memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
421         arp_ptr += np->dev->addr_len;
422         memcpy(arp_ptr, &sip, 4);
423
424         netpoll_send_skb(np, send_skb);
425 }
426
427 int __netpoll_rx(struct sk_buff *skb)
428 {
429         int proto, len, ulen;
430         struct iphdr *iph;
431         struct udphdr *uh;
432         struct netpoll *np = skb->dev->np;
433
434         if (!np->rx_hook)
435                 goto out;
436         if (skb->dev->type != ARPHRD_ETHER)
437                 goto out;
438
439         /* check if netpoll clients need ARP */
440         if (skb->protocol == __constant_htons(ETH_P_ARP) &&
441             atomic_read(&trapped)) {
442                 arp_reply(skb);
443                 return 1;
444         }
445
446         proto = ntohs(eth_hdr(skb)->h_proto);
447         if (proto != ETH_P_IP)
448                 goto out;
449         if (skb->pkt_type == PACKET_OTHERHOST)
450                 goto out;
451         if (skb_shared(skb))
452                 goto out;
453
454         iph = (struct iphdr *)skb->data;
455         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
456                 goto out;
457         if (iph->ihl < 5 || iph->version != 4)
458                 goto out;
459         if (!pskb_may_pull(skb, iph->ihl*4))
460                 goto out;
461         if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
462                 goto out;
463
464         len = ntohs(iph->tot_len);
465         if (skb->len < len || len < iph->ihl*4)
466                 goto out;
467
468         if (iph->protocol != IPPROTO_UDP)
469                 goto out;
470
471         len -= iph->ihl*4;
472         uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
473         ulen = ntohs(uh->len);
474
475         if (ulen != len)
476                 goto out;
477         if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
478                 goto out;
479         if (np->local_ip && np->local_ip != ntohl(iph->daddr))
480                 goto out;
481         if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
482                 goto out;
483         if (np->local_port && np->local_port != ntohs(uh->dest))
484                 goto out;
485
486         np->rx_hook(np, ntohs(uh->source),
487                     (char *)(uh+1),
488                     ulen - sizeof(struct udphdr));
489
490         kfree_skb(skb);
491         return 1;
492
493 out:
494         if (atomic_read(&trapped)) {
495                 kfree_skb(skb);
496                 return 1;
497         }
498
499         return 0;
500 }
501
502 int netpoll_parse_options(struct netpoll *np, char *opt)
503 {
504         char *cur=opt, *delim;
505
506         if(*cur != '@') {
507                 if ((delim = strchr(cur, '@')) == NULL)
508                         goto parse_failed;
509                 *delim=0;
510                 np->local_port=simple_strtol(cur, NULL, 10);
511                 cur=delim;
512         }
513         cur++;
514         printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
515
516         if(*cur != '/') {
517                 if ((delim = strchr(cur, '/')) == NULL)
518                         goto parse_failed;
519                 *delim=0;
520                 np->local_ip=ntohl(in_aton(cur));
521                 cur=delim;
522
523                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
524                        np->name, HIPQUAD(np->local_ip));
525         }
526         cur++;
527
528         if ( *cur != ',') {
529                 /* parse out dev name */
530                 if ((delim = strchr(cur, ',')) == NULL)
531                         goto parse_failed;
532                 *delim=0;
533                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
534                 cur=delim;
535         }
536         cur++;
537
538         printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
539
540         if ( *cur != '@' ) {
541                 /* dst port */
542                 if ((delim = strchr(cur, '@')) == NULL)
543                         goto parse_failed;
544                 *delim=0;
545                 np->remote_port=simple_strtol(cur, NULL, 10);
546                 cur=delim;
547         }
548         cur++;
549         printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
550
551         /* dst ip */
552         if ((delim = strchr(cur, '/')) == NULL)
553                 goto parse_failed;
554         *delim=0;
555         np->remote_ip=ntohl(in_aton(cur));
556         cur=delim+1;
557
558         printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
559                        np->name, HIPQUAD(np->remote_ip));
560
561         if( *cur != 0 )
562         {
563                 /* MAC address */
564                 if ((delim = strchr(cur, ':')) == NULL)
565                         goto parse_failed;
566                 *delim=0;
567                 np->remote_mac[0]=simple_strtol(cur, NULL, 16);
568                 cur=delim+1;
569                 if ((delim = strchr(cur, ':')) == NULL)
570                         goto parse_failed;
571                 *delim=0;
572                 np->remote_mac[1]=simple_strtol(cur, NULL, 16);
573                 cur=delim+1;
574                 if ((delim = strchr(cur, ':')) == NULL)
575                         goto parse_failed;
576                 *delim=0;
577                 np->remote_mac[2]=simple_strtol(cur, NULL, 16);
578                 cur=delim+1;
579                 if ((delim = strchr(cur, ':')) == NULL)
580                         goto parse_failed;
581                 *delim=0;
582                 np->remote_mac[3]=simple_strtol(cur, NULL, 16);
583                 cur=delim+1;
584                 if ((delim = strchr(cur, ':')) == NULL)
585                         goto parse_failed;
586                 *delim=0;
587                 np->remote_mac[4]=simple_strtol(cur, NULL, 16);
588                 cur=delim+1;
589                 np->remote_mac[5]=simple_strtol(cur, NULL, 16);
590         }
591
592         printk(KERN_INFO "%s: remote ethernet address "
593                "%02x:%02x:%02x:%02x:%02x:%02x\n",
594                np->name,
595                np->remote_mac[0],
596                np->remote_mac[1],
597                np->remote_mac[2],
598                np->remote_mac[3],
599                np->remote_mac[4],
600                np->remote_mac[5]);
601
602         return 0;
603
604  parse_failed:
605         printk(KERN_INFO "%s: couldn't parse config at %s!\n",
606                np->name, cur);
607         return -1;
608 }
609
610 int netpoll_setup(struct netpoll *np)
611 {
612         struct net_device *ndev = NULL;
613         struct in_device *in_dev;
614
615         np->poll_lock = SPIN_LOCK_UNLOCKED;
616         np->poll_owner = -1;
617
618         if (np->dev_name)
619                 ndev = dev_get_by_name(np->dev_name);
620         if (!ndev) {
621                 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
622                        np->name, np->dev_name);
623                 return -1;
624         }
625
626         np->dev = ndev;
627         ndev->np = np;
628
629         if (!ndev->poll_controller) {
630                 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
631                        np->name, np->dev_name);
632                 goto release;
633         }
634
635         if (!netif_running(ndev)) {
636                 unsigned long atmost, atleast;
637
638                 printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
639                        np->name, np->dev_name);
640
641                 rtnl_shlock();
642                 if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) {
643                         printk(KERN_ERR "%s: failed to open %s\n",
644                                np->name, np->dev_name);
645                         rtnl_shunlock();
646                         goto release;
647                 }
648                 rtnl_shunlock();
649
650                 atleast = jiffies + HZ/10;
651                 atmost = jiffies + 4*HZ;
652                 while (!netif_carrier_ok(ndev)) {
653                         if (time_after(jiffies, atmost)) {
654                                 printk(KERN_NOTICE
655                                        "%s: timeout waiting for carrier\n",
656                                        np->name);
657                                 break;
658                         }
659                         cond_resched();
660                 }
661
662                 /* If carrier appears to come up instantly, we don't
663                  * trust it and pause so that we don't pump all our
664                  * queued console messages into the bitbucket.
665                  */
666
667                 if (time_before(jiffies, atleast)) {
668                         printk(KERN_NOTICE "%s: carrier detect appears"
669                                " untrustworthy, waiting 4 seconds\n",
670                                np->name);
671                         msleep(4000);
672                 }
673         }
674
675         if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
676                 memcpy(np->local_mac, ndev->dev_addr, 6);
677
678         if (!np->local_ip) {
679                 rcu_read_lock();
680                 in_dev = __in_dev_get(ndev);
681
682                 if (!in_dev || !in_dev->ifa_list) {
683                         rcu_read_unlock();
684                         printk(KERN_ERR "%s: no IP address for %s, aborting\n",
685                                np->name, np->dev_name);
686                         goto release;
687                 }
688
689                 np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
690                 rcu_read_unlock();
691                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
692                        np->name, HIPQUAD(np->local_ip));
693         }
694
695         if(np->rx_hook)
696                 np->rx_flags = NETPOLL_RX_ENABLED;
697
698         return 0;
699
700  release:
701         ndev->np = NULL;
702         np->dev = NULL;
703         dev_put(ndev);
704         return -1;
705 }
706
707 void netpoll_cleanup(struct netpoll *np)
708 {
709         if (np->dev)
710                 np->dev->np = NULL;
711         dev_put(np->dev);
712         np->dev = NULL;
713 }
714
715 int netpoll_trap(void)
716 {
717         return atomic_read(&trapped);
718 }
719
720 void netpoll_set_trap(int trap)
721 {
722         if (trap)
723                 atomic_inc(&trapped);
724         else
725                 atomic_dec(&trapped);
726 }
727
728 EXPORT_SYMBOL(netpoll_set_trap);
729 EXPORT_SYMBOL(netpoll_trap);
730 EXPORT_SYMBOL(netpoll_parse_options);
731 EXPORT_SYMBOL(netpoll_setup);
732 EXPORT_SYMBOL(netpoll_cleanup);
733 EXPORT_SYMBOL(netpoll_send_udp);
734 EXPORT_SYMBOL(netpoll_poll);
735 EXPORT_SYMBOL(netpoll_queue);