- Update to 3.3-final.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / sfc_netfront / accel_msg.c
1 /****************************************************************************
2  * Solarflare driver for Xen network acceleration
3  *
4  * Copyright 2006-2008: Solarflare Communications Inc,
5  *                      9501 Jeronimo Road, Suite 250,
6  *                      Irvine, CA 92618, USA
7  *
8  * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU General Public License version 2 as published
12  * by the Free Software Foundation, incorporated herein by reference.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22  ****************************************************************************
23  */
24
25 #include <linux/stddef.h>
26 #include <linux/errno.h>
27
28 #include <xen/xenbus.h>
29
30 #include "accel.h"
31 #include "accel_msg_iface.h"
32 #include "accel_util.h"
33 #include "accel_bufs.h"
34
35 #include "netfront.h" /* drivers/xen/netfront/netfront.h */
36
37 static void vnic_start_interrupts(netfront_accel_vnic *vnic)
38 {
39         unsigned long flags;
40         
41         /* Prime our interrupt */
42         spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
43         if (!netfront_accel_vi_enable_interrupts(vnic)) {
44                 struct netfront_info *np = netdev_priv(vnic->net_dev);
45
46                 /* Cripes, that was quick, better pass it up */
47                 netfront_accel_disable_net_interrupts(vnic);
48                 vnic->irq_enabled = 0;
49                 NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++);
50                 napi_schedule(&np->napi);
51         } else {
52                 /*
53                  * Nothing yet, make sure we get interrupts through
54                  * back end 
55                  */
56                 vnic->irq_enabled = 1;
57                 netfront_accel_enable_net_interrupts(vnic);
58         }
59         spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
60 }
61
62
63 static void vnic_stop_interrupts(netfront_accel_vnic *vnic)
64 {
65         unsigned long flags;
66
67         spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
68         netfront_accel_disable_net_interrupts(vnic);
69         vnic->irq_enabled = 0;
70         spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
71 }
72
73
74 static void vnic_start_fastpath(netfront_accel_vnic *vnic)
75 {
76         struct net_device *net_dev = vnic->net_dev;
77         struct netfront_info *np = netdev_priv(net_dev);
78         unsigned long flags;
79
80         DPRINTK("%s\n", __FUNCTION__);
81
82         spin_lock_irqsave(&vnic->tx_lock, flags);
83         vnic->tx_enabled = 1;
84         spin_unlock_irqrestore(&vnic->tx_lock, flags);
85         
86         napi_disable(&np->napi);
87         vnic->poll_enabled = 1;
88         napi_enable(&np->napi);
89         
90         vnic_start_interrupts(vnic);
91 }
92
93
94 void vnic_stop_fastpath(netfront_accel_vnic *vnic)
95 {
96         struct net_device *net_dev = vnic->net_dev;
97         struct netfront_info *np = (struct netfront_info *)netdev_priv(net_dev);
98         unsigned long flags1, flags2;
99
100         DPRINTK("%s\n", __FUNCTION__);
101
102         vnic_stop_interrupts(vnic);
103         
104         spin_lock_irqsave(&vnic->tx_lock, flags1);
105         vnic->tx_enabled = 0;
106         spin_lock_irqsave(&np->tx_lock, flags2);
107         if (vnic->tx_skb != NULL) {
108                 dev_kfree_skb_any(vnic->tx_skb);
109                 vnic->tx_skb = NULL;
110                 if (netfront_check_queue_ready(net_dev)) {
111                         netif_wake_queue(net_dev);
112                         NETFRONT_ACCEL_STATS_OP
113                                 (vnic->stats.queue_wakes++);
114                 }
115         }
116         spin_unlock_irqrestore(&np->tx_lock, flags2);
117         spin_unlock_irqrestore(&vnic->tx_lock, flags1);
118         
119         /* Must prevent polls and hold lock to modify poll_enabled */
120         napi_disable(&np->napi);
121         spin_lock_irqsave(&vnic->irq_enabled_lock, flags1);
122         vnic->poll_enabled = 0;
123         spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1);
124         napi_enable(&np->napi);
125 }
126
127
128 static void netfront_accel_interface_up(netfront_accel_vnic *vnic)
129 {
130         if (!vnic->backend_netdev_up) {
131                 vnic->backend_netdev_up = 1;
132                 
133                 if (vnic->frontend_ready)
134                         vnic_start_fastpath(vnic);
135         }
136 }
137
138
139 static void netfront_accel_interface_down(netfront_accel_vnic *vnic)
140 {
141         if (vnic->backend_netdev_up) {
142                 vnic->backend_netdev_up = 0;
143                 
144                 if (vnic->frontend_ready)
145                         vnic_stop_fastpath(vnic);
146         }
147 }
148
149
150 static int vnic_add_bufs(netfront_accel_vnic *vnic, 
151                          struct net_accel_msg *msg)
152 {
153         int rc, offset;
154         struct netfront_accel_bufinfo *bufinfo;
155   
156         BUG_ON(msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
157
158         offset = msg->u.mapbufs.reqid;
159
160         if (offset < vnic->bufpages.max_pages - 
161             (vnic->bufpages.max_pages / sfc_netfront_buffer_split)) {
162                 bufinfo = vnic->rx_bufs;
163         } else
164                 bufinfo = vnic->tx_bufs;
165
166         /* Queue up some Rx buffers to start things off. */
167         if ((rc = netfront_accel_add_bufs(&vnic->bufpages, bufinfo, msg)) == 0) {
168                 netfront_accel_vi_add_bufs(vnic, bufinfo == vnic->rx_bufs);
169
170                 if (offset + msg->u.mapbufs.pages == vnic->bufpages.max_pages) {
171                         VPRINTK("%s: got all buffers back\n", __FUNCTION__);
172                         vnic->frontend_ready = 1;
173                         if (vnic->backend_netdev_up)
174                                 vnic_start_fastpath(vnic);
175                 } else {
176                         VPRINTK("%s: got buffers back %d %d\n", __FUNCTION__, 
177                                 offset, msg->u.mapbufs.pages);
178                 }
179         }
180
181         return rc;
182 }
183
184
185 /* The largest [o] such that (1u << o) <= n.  Requires n > 0. */
186
187 inline unsigned log2_le(unsigned long n) {
188         unsigned order = 1;
189         while ((1ul << order) <= n) ++order;
190         return (order - 1);
191 }
192
193 static int vnic_send_buffer_requests(netfront_accel_vnic *vnic,
194                                      struct netfront_accel_bufpages *bufpages)
195 {
196         int pages, offset, rc = 0, sent = 0;
197         struct net_accel_msg msg;
198
199         while (bufpages->page_reqs < bufpages->max_pages) {
200                 offset = bufpages->page_reqs;
201
202                 pages = pow2(log2_le(bufpages->max_pages - 
203                                      bufpages->page_reqs));
204                 pages = pages < NET_ACCEL_MSG_MAX_PAGE_REQ ? 
205                         pages : NET_ACCEL_MSG_MAX_PAGE_REQ;
206
207                 BUG_ON(offset < 0);
208                 BUG_ON(pages <= 0);
209
210                 rc = netfront_accel_buf_map_request(vnic->dev, bufpages,
211                                                     &msg, pages, offset);
212                 if (rc == 0) {
213                         rc = net_accel_msg_send(vnic->shared_page, 
214                                                 &vnic->to_dom0, &msg);
215                         if (rc < 0) {
216                                 VPRINTK("%s: queue full, stopping for now\n",
217                                         __FUNCTION__);
218                                 break;
219                         }
220                         sent++;
221                 } else {
222                         EPRINTK("%s: problem with grant, stopping for now\n",
223                                 __FUNCTION__);
224                         break;
225                 }
226
227                 bufpages->page_reqs += pages;
228         }
229
230         if (sent)
231                 net_accel_msg_notify(vnic->msg_channel_irq);
232
233         return rc;
234 }
235
236
237 /*
238  * In response to dom0 saying "my queue is full", we reply with this
239  * when it is no longer full
240  */
241 inline void vnic_set_queue_not_full(netfront_accel_vnic *vnic)
242 {
243
244         if (test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B,
245                             (unsigned long *)&vnic->shared_page->aflags))
246                 notify_remote_via_irq(vnic->msg_channel_irq);
247         else
248                 VPRINTK("queue not full bit already set, not signalling\n");
249 }
250
251 /* 
252  * Notify dom0 that the queue we want to use is full, it should
253  * respond by setting MSG_AFLAGS_QUEUEUNOTFULL in due course
254  */
255 inline void vnic_set_queue_full(netfront_accel_vnic *vnic)
256 {
257
258         if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B,
259                              (unsigned long *)&vnic->shared_page->aflags))
260                 notify_remote_via_irq(vnic->msg_channel_irq);
261         else
262                 VPRINTK("queue full bit already set, not signalling\n");
263 }
264
265
266 static int vnic_check_hello_version(unsigned version) 
267 {
268         if (version > NET_ACCEL_MSG_VERSION) {
269                 /* Newer protocol, we must refuse */
270                 return -EPROTO;
271         }
272
273         if (version < NET_ACCEL_MSG_VERSION) {
274                 /*
275                  * We are newer, so have discretion to accept if we
276                  * wish.  For now however, just reject
277                  */
278                 return -EPROTO;
279         }
280
281         BUG_ON(version != NET_ACCEL_MSG_VERSION);
282         return 0;
283 }
284
285
286 static int vnic_process_hello_msg(netfront_accel_vnic *vnic,
287                                   struct net_accel_msg *msg)
288 {
289         int err = 0;
290         unsigned pages = sfc_netfront_max_pages;
291
292         if (vnic_check_hello_version(msg->u.hello.version) < 0) {
293                 msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY 
294                         | NET_ACCEL_MSG_ERROR;
295                 msg->u.hello.version = NET_ACCEL_MSG_VERSION;
296         } else {
297                 vnic->backend_netdev_up
298                         = vnic->shared_page->net_dev_up;
299                 
300                 msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY;
301                 msg->u.hello.version = NET_ACCEL_MSG_VERSION;
302                 if (msg->u.hello.max_pages &&
303                     msg->u.hello.max_pages < pages)
304                         pages = msg->u.hello.max_pages;
305                 msg->u.hello.max_pages = pages;
306                 
307                 /* Half of pages for rx, half for tx */ 
308                 err = netfront_accel_alloc_buffer_mem(&vnic->bufpages,
309                                                       vnic->rx_bufs, 
310                                                       vnic->tx_bufs,
311                                                       pages);
312                 if (err)
313                         msg->id |= NET_ACCEL_MSG_ERROR;         
314         }
315         
316         /* Send reply */
317         net_accel_msg_reply_notify(vnic->shared_page, vnic->msg_channel_irq,
318                                    &vnic->to_dom0, msg);
319         return err;
320 }
321
322
323 static int vnic_process_localmac_msg(netfront_accel_vnic *vnic,
324                                      struct net_accel_msg *msg)
325 {
326         unsigned long flags;
327         cuckoo_hash_mac_key key;
328
329         if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
330                 DPRINTK("MAC has moved, could be local: %pM\n",
331                         msg->u.localmac.mac);
332                 key = cuckoo_mac_to_key(msg->u.localmac.mac);
333                 spin_lock_irqsave(&vnic->table_lock, flags);
334                 /* Try to remove it, not a big deal if not there */
335                 cuckoo_hash_remove(&vnic->fastpath_table, 
336                                    (cuckoo_hash_key *)&key);
337                 spin_unlock_irqrestore(&vnic->table_lock, flags);
338         }
339         
340         return 0;
341 }
342
343
344 static 
345 int vnic_process_rx_msg(netfront_accel_vnic *vnic,
346                         struct net_accel_msg *msg)
347 {
348         int err;
349
350         switch (msg->id) {
351         case NET_ACCEL_MSG_HELLO:
352                 /* Hello, reply with Reply */
353                 DPRINTK("got Hello, with version %.8x\n",
354                         msg->u.hello.version);
355                 BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_NONE);
356                 err = vnic_process_hello_msg(vnic, msg);
357                 if (err == 0)
358                         vnic->msg_state = NETFRONT_ACCEL_MSG_HELLO;
359                 break;
360         case NET_ACCEL_MSG_SETHW:
361                 /* Hardware info message */
362                 DPRINTK("got H/W info\n");
363                 BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HELLO);
364                 err = netfront_accel_vi_init(vnic, &msg->u.hw);
365                 if (err == 0)
366                         vnic->msg_state = NETFRONT_ACCEL_MSG_HW;
367                 break;
368         case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY:
369                 VPRINTK("Got mapped buffers back\n");
370                 BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
371                 err = vnic_add_bufs(vnic, msg);
372                 break;
373         case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_ERROR:
374                 /* No buffers.  Can't use the fast path. */
375                 EPRINTK("Got mapped buffers error.  Cannot accelerate.\n");
376                 BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
377                 err = -EIO;
378                 break;
379         case NET_ACCEL_MSG_LOCALMAC:
380                 /* Should be add, remove not currently used */
381                 EPRINTK_ON(!(msg->u.localmac.flags & NET_ACCEL_MSG_ADD));
382                 BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
383                 err = vnic_process_localmac_msg(vnic, msg);
384                 break;
385         default:
386                 EPRINTK("Huh? Message code is 0x%x\n", msg->id);
387                 err = -EPROTO;
388                 break;
389         }
390
391         return err;
392 }
393
394
395 /* Process an IRQ received from back end driver */
396 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
397 void netfront_accel_msg_from_bend(struct work_struct *context)
398 #else
399 void netfront_accel_msg_from_bend(void *context)
400 #endif
401 {
402 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
403         netfront_accel_vnic *vnic = 
404                 container_of(context, netfront_accel_vnic, msg_from_bend);
405 #else
406         netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
407 #endif
408         struct net_accel_msg msg;
409         int err, queue_was_full = 0;
410         
411         mutex_lock(&vnic->vnic_mutex);
412
413         /*
414          * This happens when the shared pages have been unmapped but
415          * the workqueue has yet to be flushed 
416          */
417         if (!vnic->dom0_state_is_setup) 
418                 goto unlock_out;
419
420         while ((vnic->shared_page->aflags & NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK)
421                != 0) {
422                 if (vnic->shared_page->aflags &
423                     NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL) {
424                         /* We've been told there may now be space. */
425                         clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B,
426                                   (unsigned long *)&vnic->shared_page->aflags);
427                 }
428
429                 if (vnic->shared_page->aflags &
430                     NET_ACCEL_MSG_AFLAGS_QUEUE0FULL) {
431                         /*
432                          * There will be space at the end of this
433                          * function if we can make any.
434                          */
435                         clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B,
436                                   (unsigned long *)&vnic->shared_page->aflags);
437                         queue_was_full = 1;
438                 }
439
440                 if (vnic->shared_page->aflags &
441                     NET_ACCEL_MSG_AFLAGS_NETUPDOWN) {
442                         DPRINTK("%s: net interface change\n", __FUNCTION__);
443                         clear_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B,
444                                   (unsigned long *)&vnic->shared_page->aflags);
445                         if (vnic->shared_page->net_dev_up)
446                                 netfront_accel_interface_up(vnic);
447                         else
448                                 netfront_accel_interface_down(vnic);
449                 }
450         }
451
452         /* Pull msg out of shared memory */
453         while ((err = net_accel_msg_recv(vnic->shared_page, &vnic->from_dom0,
454                                          &msg)) == 0) {
455                 err = vnic_process_rx_msg(vnic, &msg);
456                 
457                 if (err != 0)
458                         goto done;
459         }
460
461         /*
462          * Send any pending buffer map request messages that we can,
463          * and mark domU->dom0 as full if necessary.  
464          */
465         if (vnic->msg_state == NETFRONT_ACCEL_MSG_HW &&
466             vnic->bufpages.page_reqs < vnic->bufpages.max_pages) {
467                 if (vnic_send_buffer_requests(vnic, &vnic->bufpages) == -ENOSPC)
468                         vnic_set_queue_full(vnic);
469         }
470
471         /* 
472          * If there are no messages then this is not an error.  It
473          * just means that we've finished processing the queue.
474          */
475         if (err == -ENOENT)
476                 err = 0;
477  done:
478         /* We will now have made space in the dom0->domU queue if we can */
479         if (queue_was_full)
480                 vnic_set_queue_not_full(vnic);
481
482         if (err != 0) {
483                 EPRINTK("%s returned %d\n", __FUNCTION__, err);
484                 netfront_accel_set_closing(vnic);
485         }
486
487  unlock_out:
488         mutex_unlock(&vnic->vnic_mutex);
489
490         return;
491 }
492
493
494 irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
495 {
496         netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
497         VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
498
499         queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
500
501         return IRQ_HANDLED;
502 }
503
504 /* Process an interrupt received from the NIC via backend */
505 irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
506 {
507         netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
508         struct net_device *net_dev = vnic->net_dev;
509         unsigned long flags;
510
511         VPRINTK("net irq %d from device %s\n", irq, vnic->dev->nodename);
512         
513         NETFRONT_ACCEL_STATS_OP(vnic->stats.irq_count++);
514
515         BUG_ON(net_dev==NULL);
516
517         spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
518         if (vnic->irq_enabled) {
519                 struct netfront_info *np = netdev_priv(net_dev);
520
521                 netfront_accel_disable_net_interrupts(vnic);
522                 vnic->irq_enabled = 0;
523                 spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
524
525 #if NETFRONT_ACCEL_STATS
526                 vnic->stats.poll_schedule_count++;
527                 if (vnic->stats.event_count_since_irq >
528                     vnic->stats.events_per_irq_max)
529                         vnic->stats.events_per_irq_max = 
530                                 vnic->stats.event_count_since_irq;
531                 vnic->stats.event_count_since_irq = 0;
532 #endif
533                 napi_schedule(&np->napi);
534         }
535         else {
536                 spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
537                 NETFRONT_ACCEL_STATS_OP(vnic->stats.useless_irq_count++);
538                 DPRINTK("%s: irq when disabled\n", __FUNCTION__);
539         }
540         
541         return IRQ_HANDLED;
542 }
543
544
545 void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
546                                     u32 ip, u16 port, u8 protocol)
547 {
548         unsigned long lock_state;
549         struct net_accel_msg *msg;
550
551         msg = net_accel_msg_start_send(vnic->shared_page, &vnic->to_dom0,
552                                        &lock_state);
553
554         if (msg == NULL)
555                 return;
556
557         net_accel_msg_init(msg, NET_ACCEL_MSG_FASTPATH);
558         msg->u.fastpath.flags = NET_ACCEL_MSG_REMOVE;
559         memcpy(msg->u.fastpath.mac, mac, ETH_ALEN);
560
561         msg->u.fastpath.port = port;
562         msg->u.fastpath.ip = ip;
563         msg->u.fastpath.proto = protocol;
564
565         net_accel_msg_complete_send_notify(vnic->shared_page, &vnic->to_dom0, 
566                                            &lock_state, vnic->msg_channel_irq);
567 }