2 * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * The full GNU General Public License is included in this distribution in the
19 * file called LICENSE.
24 * 2003/06/25 - Shmulik Hen <shmulik.hen at intel dot com>
25 * - Fixed signed/unsigned calculation errors that caused load sharing
26 * to collapse to one slave under very heavy UDP Tx stress.
28 * 2003/08/06 - Amir Noam <amir.noam at intel dot com>
29 * - Add support for setting bond's MAC address with special
30 * handling required for ALB/TLB.
33 //#define BONDING_DEBUG 1
35 #include <linux/skbuff.h>
36 #include <linux/netdevice.h>
37 #include <linux/etherdevice.h>
38 #include <linux/pkt_sched.h>
39 #include <linux/spinlock.h>
40 #include <linux/slab.h>
41 #include <linux/timer.h>
43 #include <linux/ipv6.h>
44 #include <linux/if_arp.h>
45 #include <linux/if_ether.h>
46 #include <linux/if_bonding.h>
49 #include <asm/byteorder.h>
54 #define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */
55 #define BOND_TLB_REBALANCE_INTERVAL 10 /* in seconds, periodic re-balancing
56 * used for division - never set
59 #define BOND_ALB_LP_INTERVAL 1 /* in seconds periodic send of
60 * learning packets to the switch
63 #define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \
64 * ALB_TIMER_TICKS_PER_SEC)
66 #define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \
67 * ALB_TIMER_TICKS_PER_SEC)
69 #define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table.
70 * Note that this value MUST NOT be smaller
71 * because the key hash table BYTE wide !
75 #define TLB_NULL_INDEX 0xffffffff
76 #define MAX_LP_RETRY 3
79 #define RLB_HASH_TABLE_SIZE 256
80 #define RLB_NULL_INDEX 0xffffffff
81 #define RLB_UPDATE_DELAY 2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */
82 #define RLB_ARP_BURST_SIZE 2
83 #define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb
84 * rebalance interval (5 min).
86 /* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is
87 * promiscuous after failover
89 #define RLB_PROMISC_TIMEOUT 10*ALB_TIMER_TICKS_PER_SEC
91 static const u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};
98 u8 padding[ETH_ZLEN - ETH_HLEN];
107 u8 mac_src[ETH_ALEN]; /* sender hardware address */
108 u32 ip_src; /* sender IP address */
109 u8 mac_dst[ETH_ALEN]; /* target hardware address */
110 u32 ip_dst; /* target IP address */
114 /* Forward declaration */
115 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
117 static inline u8 _simple_hash(u8 *hash_start, int hash_size)
122 for (i = 0; i < hash_size; i++) {
123 hash ^= hash_start[i];
129 /*********************** tlb specific functions ***************************/
131 static inline void _lock_tx_hashtbl(struct bonding *bond)
133 spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
136 static inline void _unlock_tx_hashtbl(struct bonding *bond)
138 spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
141 /* Caller must hold tx_hashtbl lock */
142 static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
145 entry->load_history = 1 + entry->tx_bytes /
146 BOND_TLB_REBALANCE_INTERVAL;
149 entry->tx_slave = NULL;
150 entry->next = TLB_NULL_INDEX;
151 entry->prev = TLB_NULL_INDEX;
154 static inline void tlb_init_slave(struct slave *slave)
156 SLAVE_TLB_INFO(slave).load = 0;
157 SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
160 /* Caller must hold bond lock for read */
161 static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load)
163 struct tlb_client_info *tx_hash_table;
166 _lock_tx_hashtbl(bond);
167 /* clear slave from tx_hashtbl */
168 tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
170 index = SLAVE_TLB_INFO(slave).head;
171 while (index != TLB_NULL_INDEX) {
172 u32 next_index = tx_hash_table[index].next;
173 tlb_init_table_entry(&tx_hash_table[index], save_load);
176 _unlock_tx_hashtbl(bond);
178 tlb_init_slave(slave);
181 /* Must be called before starting the monitor timer */
182 static int tlb_initialize(struct bonding *bond)
184 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
185 int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info);
188 spin_lock_init(&(bond_info->tx_hashtbl_lock));
190 _lock_tx_hashtbl(bond);
192 bond_info->tx_hashtbl = kmalloc(size, GFP_KERNEL);
193 if (!bond_info->tx_hashtbl) {
194 printk(KERN_ERR DRV_NAME
195 ": Error: %s: Failed to allocate TLB hash table\n",
197 _unlock_tx_hashtbl(bond);
201 memset(bond_info->tx_hashtbl, 0, size);
202 for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) {
203 tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
205 _unlock_tx_hashtbl(bond);
210 /* Must be called only after all slaves have been released */
211 static void tlb_deinitialize(struct bonding *bond)
213 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
215 _lock_tx_hashtbl(bond);
216 kfree(bond_info->tx_hashtbl);
217 bond_info->tx_hashtbl = NULL;
218 _unlock_tx_hashtbl(bond);
221 /* Caller must hold bond lock for read */
222 static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
224 struct slave *slave, *least_loaded;
228 /* Find the first enabled slave */
229 bond_for_each_slave(bond, slave, i) {
230 if (SLAVE_IS_OK(slave)) {
240 least_loaded = slave;
241 max_gap = (s64)(slave->speed << 20) - /* Convert to Megabit per sec */
242 (s64)(SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */
244 /* Find the slave with the largest gap */
245 bond_for_each_slave_from(bond, slave, i, least_loaded) {
246 if (SLAVE_IS_OK(slave)) {
247 s64 gap = (s64)(slave->speed << 20) -
248 (s64)(SLAVE_TLB_INFO(slave).load << 3);
250 least_loaded = slave;
259 /* Caller must hold bond lock for read */
260 struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
262 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
263 struct tlb_client_info *hash_table;
264 struct slave *assigned_slave;
266 _lock_tx_hashtbl(bond);
268 hash_table = bond_info->tx_hashtbl;
269 assigned_slave = hash_table[hash_index].tx_slave;
270 if (!assigned_slave) {
271 assigned_slave = tlb_get_least_loaded_slave(bond);
273 if (assigned_slave) {
274 struct tlb_slave_info *slave_info =
275 &(SLAVE_TLB_INFO(assigned_slave));
276 u32 next_index = slave_info->head;
278 hash_table[hash_index].tx_slave = assigned_slave;
279 hash_table[hash_index].next = next_index;
280 hash_table[hash_index].prev = TLB_NULL_INDEX;
282 if (next_index != TLB_NULL_INDEX) {
283 hash_table[next_index].prev = hash_index;
286 slave_info->head = hash_index;
288 hash_table[hash_index].load_history;
292 if (assigned_slave) {
293 hash_table[hash_index].tx_bytes += skb_len;
296 _unlock_tx_hashtbl(bond);
298 return assigned_slave;
301 /*********************** rlb specific functions ***************************/
302 static inline void _lock_rx_hashtbl(struct bonding *bond)
304 spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
307 static inline void _unlock_rx_hashtbl(struct bonding *bond)
309 spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
312 /* when an ARP REPLY is received from a client update its info
315 static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
317 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
318 struct rlb_client_info *client_info;
321 _lock_rx_hashtbl(bond);
323 hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
324 client_info = &(bond_info->rx_hashtbl[hash_index]);
326 if ((client_info->assigned) &&
327 (client_info->ip_src == arp->ip_dst) &&
328 (client_info->ip_dst == arp->ip_src)) {
330 /* update the clients MAC address */
331 memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN);
332 client_info->ntt = 1;
333 bond_info->rx_ntt = 1;
336 _unlock_rx_hashtbl(bond);
339 static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype)
341 struct bonding *bond = (struct bonding *)bond_dev->priv;
342 struct arp_pkt *arp = (struct arp_pkt *)skb->data;
343 int res = NET_RX_DROP;
345 if (!(bond_dev->flags & IFF_MASTER)) {
350 dprintk("Packet has no ARP data\n");
354 if (skb->len < sizeof(struct arp_pkt)) {
355 dprintk("Packet is too small to be an ARP\n");
359 if (arp->op_code == htons(ARPOP_REPLY)) {
360 /* update rx hash table for this ARP */
361 rlb_update_entry_from_arp(bond, arp);
362 dprintk("Server received an ARP Reply from client\n");
365 res = NET_RX_SUCCESS;
373 /* Caller must hold bond lock for read */
374 static struct slave *rlb_next_rx_slave(struct bonding *bond)
376 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
377 struct slave *rx_slave = NULL, *slave;
380 slave = bond_info->next_rx_slave;
382 slave = bond->first_slave;
385 bond_for_each_slave(bond, slave, i) {
386 if (SLAVE_IS_OK(slave)) {
389 } else if (slave->speed > rx_slave->speed) {
396 bond_info->next_rx_slave = rx_slave->next;
402 /* teach the switch the mac of a disabled slave
403 * on the primary for fault tolerance
405 * Caller must hold bond->curr_slave_lock for write or bond lock for write
407 static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
409 if (!bond->curr_active_slave) {
412 if (!bond->alb_info.primary_is_promisc) {
413 bond->alb_info.primary_is_promisc = 1;
414 dev_set_promiscuity(bond->curr_active_slave->dev, 1);
416 bond->alb_info.rlb_promisc_timeout_counter = 0;
418 alb_send_learning_packets(bond->curr_active_slave, addr);
421 /* slave being removed should not be active at this point
423 * Caller must hold bond lock for read
425 static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
427 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
428 struct rlb_client_info *rx_hash_table;
429 u32 index, next_index;
431 /* clear slave from rx_hashtbl */
432 _lock_rx_hashtbl(bond);
434 rx_hash_table = bond_info->rx_hashtbl;
435 index = bond_info->rx_hashtbl_head;
436 for (; index != RLB_NULL_INDEX; index = next_index) {
437 next_index = rx_hash_table[index].next;
439 if (rx_hash_table[index].slave == slave) {
440 struct slave *assigned_slave = rlb_next_rx_slave(bond);
442 if (assigned_slave) {
443 rx_hash_table[index].slave = assigned_slave;
444 if (memcmp(rx_hash_table[index].mac_dst,
445 mac_bcast, ETH_ALEN)) {
446 bond_info->rx_hashtbl[index].ntt = 1;
447 bond_info->rx_ntt = 1;
448 /* A slave has been removed from the
449 * table because it is either disabled
450 * or being released. We must retry the
451 * update to avoid clients from not
452 * being updated & disconnecting when
455 bond_info->rlb_update_retry_counter =
458 } else { /* there is no active slave */
459 rx_hash_table[index].slave = NULL;
464 _unlock_rx_hashtbl(bond);
466 write_lock(&bond->curr_slave_lock);
467 if (slave != bond->curr_active_slave) {
468 rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
470 write_unlock(&bond->curr_slave_lock);
473 static void rlb_update_client(struct rlb_client_info *client_info)
477 if (!client_info->slave) {
481 for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
482 arp_send(ARPOP_REPLY, ETH_P_ARP,
484 client_info->slave->dev,
486 client_info->mac_dst,
487 client_info->slave->dev->dev_addr,
488 client_info->mac_dst);
492 /* sends ARP REPLIES that update the clients that need updating */
493 static void rlb_update_rx_clients(struct bonding *bond)
495 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
496 struct rlb_client_info *client_info;
499 _lock_rx_hashtbl(bond);
501 hash_index = bond_info->rx_hashtbl_head;
502 for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
503 client_info = &(bond_info->rx_hashtbl[hash_index]);
504 if (client_info->ntt) {
505 rlb_update_client(client_info);
506 if (bond_info->rlb_update_retry_counter == 0) {
507 client_info->ntt = 0;
512 /* do not update the entries again untill this counter is zero so that
513 * not to confuse the clients.
515 bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
517 _unlock_rx_hashtbl(bond);
520 /* The slave was assigned a new mac address - update the clients */
521 static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave)
523 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
524 struct rlb_client_info *client_info;
528 _lock_rx_hashtbl(bond);
530 hash_index = bond_info->rx_hashtbl_head;
531 for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
532 client_info = &(bond_info->rx_hashtbl[hash_index]);
534 if ((client_info->slave == slave) &&
535 memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
536 client_info->ntt = 1;
541 // update the team's flag only after the whole iteration
543 bond_info->rx_ntt = 1;
545 bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
548 _unlock_rx_hashtbl(bond);
551 /* mark all clients using src_ip to be updated */
552 static void rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip)
554 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
555 struct rlb_client_info *client_info;
558 _lock_rx_hashtbl(bond);
560 hash_index = bond_info->rx_hashtbl_head;
561 for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
562 client_info = &(bond_info->rx_hashtbl[hash_index]);
564 if (!client_info->slave) {
565 printk(KERN_ERR DRV_NAME
566 ": Error: found a client with no channel in "
567 "the client's hash table\n");
570 /*update all clients using this src_ip, that are not assigned
571 * to the team's address (curr_active_slave) and have a known
572 * unicast mac address.
574 if ((client_info->ip_src == src_ip) &&
575 memcmp(client_info->slave->dev->dev_addr,
576 bond->dev->dev_addr, ETH_ALEN) &&
577 memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
578 client_info->ntt = 1;
579 bond_info->rx_ntt = 1;
583 _unlock_rx_hashtbl(bond);
586 /* Caller must hold both bond and ptr locks for read */
587 struct slave *rlb_choose_channel(struct bonding *bond, struct arp_pkt *arp)
589 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
590 struct slave *assigned_slave;
591 struct rlb_client_info *client_info;
594 _lock_rx_hashtbl(bond);
596 hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
597 client_info = &(bond_info->rx_hashtbl[hash_index]);
599 if (client_info->assigned) {
600 if ((client_info->ip_src == arp->ip_src) &&
601 (client_info->ip_dst == arp->ip_dst)) {
602 /* the entry is already assigned to this client */
604 if (memcmp(arp->mac_dst, mac_bcast, ETH_ALEN)) {
605 /* update mac address from arp */
606 memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
609 assigned_slave = client_info->slave;
610 if (assigned_slave) {
611 _unlock_rx_hashtbl(bond);
612 return assigned_slave;
615 /* the entry is already assigned to some other client,
616 * move the old client to primary (curr_active_slave) so
617 * that the new client can be assigned to this entry.
619 if (bond->curr_active_slave &&
620 client_info->slave != bond->curr_active_slave) {
621 client_info->slave = bond->curr_active_slave;
622 rlb_update_client(client_info);
626 /* assign a new slave */
627 assigned_slave = rlb_next_rx_slave(bond);
629 if (assigned_slave) {
630 client_info->ip_src = arp->ip_src;
631 client_info->ip_dst = arp->ip_dst;
632 /* arp->mac_dst is broadcast for arp reqeusts.
633 * will be updated with clients actual unicast mac address
634 * upon receiving an arp reply.
636 memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
637 client_info->slave = assigned_slave;
639 if (memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
640 client_info->ntt = 1;
641 bond->alb_info.rx_ntt = 1;
643 client_info->ntt = 0;
646 if (!client_info->assigned) {
647 u32 prev_tbl_head = bond_info->rx_hashtbl_head;
648 bond_info->rx_hashtbl_head = hash_index;
649 client_info->next = prev_tbl_head;
650 if (prev_tbl_head != RLB_NULL_INDEX) {
651 bond_info->rx_hashtbl[prev_tbl_head].prev =
654 client_info->assigned = 1;
658 _unlock_rx_hashtbl(bond);
660 return assigned_slave;
663 /* chooses (and returns) transmit channel for arp reply
664 * does not choose channel for other arp types since they are
665 * sent on the curr_active_slave
667 static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
669 struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
670 struct slave *tx_slave = NULL;
672 if (arp->op_code == __constant_htons(ARPOP_REPLY)) {
673 /* the arp must be sent on the selected
676 tx_slave = rlb_choose_channel(bond, arp);
678 memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN);
680 dprintk("Server sent ARP Reply packet\n");
681 } else if (arp->op_code == __constant_htons(ARPOP_REQUEST)) {
683 /* Create an entry in the rx_hashtbl for this client as a
685 * When the arp reply is received the entry will be updated
686 * with the correct unicast address of the client.
688 rlb_choose_channel(bond, arp);
690 /* The ARP relpy packets must be delayed so that
691 * they can cancel out the influence of the ARP request.
693 bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY;
695 /* arp requests are broadcast and are sent on the primary
696 * the arp request will collapse all clients on the subnet to
697 * the primary slave. We must register these clients to be
698 * updated with their assigned mac.
700 rlb_req_update_subnet_clients(bond, arp->ip_src);
701 dprintk("Server sent ARP Request packet\n");
707 /* Caller must hold bond lock for read */
708 static void rlb_rebalance(struct bonding *bond)
710 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
711 struct slave *assigned_slave;
712 struct rlb_client_info *client_info;
716 _lock_rx_hashtbl(bond);
719 hash_index = bond_info->rx_hashtbl_head;
720 for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
721 client_info = &(bond_info->rx_hashtbl[hash_index]);
722 assigned_slave = rlb_next_rx_slave(bond);
723 if (assigned_slave && (client_info->slave != assigned_slave)) {
724 client_info->slave = assigned_slave;
725 client_info->ntt = 1;
730 /* update the team's flag only after the whole iteration */
732 bond_info->rx_ntt = 1;
734 _unlock_rx_hashtbl(bond);
737 /* Caller must hold rx_hashtbl lock */
738 static void rlb_init_table_entry(struct rlb_client_info *entry)
740 memset(entry, 0, sizeof(struct rlb_client_info));
741 entry->next = RLB_NULL_INDEX;
742 entry->prev = RLB_NULL_INDEX;
745 static int rlb_initialize(struct bonding *bond)
747 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
748 struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type);
749 int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
752 spin_lock_init(&(bond_info->rx_hashtbl_lock));
754 _lock_rx_hashtbl(bond);
756 bond_info->rx_hashtbl = kmalloc(size, GFP_KERNEL);
757 if (!bond_info->rx_hashtbl) {
758 printk(KERN_ERR DRV_NAME
759 ": Error: %s: Failed to allocate RLB hash table\n",
761 _unlock_rx_hashtbl(bond);
765 bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
767 for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) {
768 rlb_init_table_entry(bond_info->rx_hashtbl + i);
770 _unlock_rx_hashtbl(bond);
772 /*initialize packet type*/
773 pk_type->type = __constant_htons(ETH_P_ARP);
774 pk_type->dev = bond->dev;
775 pk_type->func = rlb_arp_recv;
777 /* register to receive ARPs */
778 dev_add_pack(pk_type);
783 static void rlb_deinitialize(struct bonding *bond)
785 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
787 dev_remove_pack(&(bond_info->rlb_pkt_type));
789 _lock_rx_hashtbl(bond);
790 kfree(bond_info->rx_hashtbl);
791 bond_info->rx_hashtbl = NULL;
792 _unlock_rx_hashtbl(bond);
795 /*********************** tlb/rlb shared functions *********************/
797 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
799 struct learning_pkt pkt;
800 int size = sizeof(struct learning_pkt);
803 memset(&pkt, 0, size);
804 memcpy(pkt.mac_dst, mac_addr, ETH_ALEN);
805 memcpy(pkt.mac_src, mac_addr, ETH_ALEN);
806 pkt.type = __constant_htons(ETH_P_LOOP);
808 for (i = 0; i < MAX_LP_RETRY; i++) {
812 skb = dev_alloc_skb(size);
817 data = skb_put(skb, size);
818 memcpy(data, &pkt, size);
820 skb->nh.raw = data + ETH_HLEN;
821 skb->protocol = pkt.type;
822 skb->priority = TC_PRIO_CONTROL;
823 skb->dev = slave->dev;
829 /* hw is a boolean parameter that determines whether we should try and
830 * set the hw address of the device as well as the hw address of the
833 static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw)
835 struct net_device *dev = slave->dev;
836 struct sockaddr s_addr;
839 memcpy(dev->dev_addr, addr, dev->addr_len);
843 /* for rlb each slave must have a unique hw mac addresses so that */
844 /* each slave will receive packets destined to a different mac */
845 memcpy(s_addr.sa_data, addr, dev->addr_len);
846 s_addr.sa_family = dev->type;
847 if (dev->set_mac_address(dev, &s_addr)) {
848 printk(KERN_ERR DRV_NAME
849 ": Error: dev->set_mac_address of dev %s failed! ALB "
850 "mode requires that the base driver support setting "
851 "the hw address also when the network device's "
852 "interface is open\n",
859 /* Caller must hold bond lock for write or curr_slave_lock for write*/
860 static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct slave *slave2)
862 struct slave *disabled_slave = NULL;
863 u8 tmp_mac_addr[ETH_ALEN];
864 int slaves_state_differ;
866 slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2));
868 memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN);
869 alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled);
870 alb_set_slave_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled);
872 /* fasten the change in the switch */
873 if (SLAVE_IS_OK(slave1)) {
874 alb_send_learning_packets(slave1, slave1->dev->dev_addr);
875 if (bond->alb_info.rlb_enabled) {
876 /* inform the clients that the mac address
879 rlb_req_update_slave_clients(bond, slave1);
882 disabled_slave = slave1;
885 if (SLAVE_IS_OK(slave2)) {
886 alb_send_learning_packets(slave2, slave2->dev->dev_addr);
887 if (bond->alb_info.rlb_enabled) {
888 /* inform the clients that the mac address
891 rlb_req_update_slave_clients(bond, slave2);
894 disabled_slave = slave2;
897 if (bond->alb_info.rlb_enabled && slaves_state_differ) {
898 /* A disabled slave was assigned an active mac addr */
899 rlb_teach_disabled_mac_on_primary(bond,
900 disabled_slave->dev->dev_addr);
905 * alb_change_hw_addr_on_detach
906 * @bond: bonding we're working on
907 * @slave: the slave that was just detached
909 * We assume that @slave was already detached from the slave list.
911 * If @slave's permanent hw address is different both from its current
912 * address and from @bond's address, then somewhere in the bond there's
913 * a slave that has @slave's permanet address as its current address.
914 * We'll make sure that that slave no longer uses @slave's permanent address.
916 * Caller must hold bond lock
918 static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave)
920 struct slave *tmp_slave;
925 perm_curr_diff = memcmp(slave->perm_hwaddr,
926 slave->dev->dev_addr,
928 perm_bond_diff = memcmp(slave->perm_hwaddr,
931 if (perm_curr_diff && perm_bond_diff) {
932 bond_for_each_slave(bond, tmp_slave, i) {
933 if (!memcmp(slave->perm_hwaddr,
934 tmp_slave->dev->dev_addr,
942 alb_swap_mac_addr(bond, slave, tmp_slave);
948 * alb_handle_addr_collision_on_attach
949 * @bond: bonding we're working on
950 * @slave: the slave that was just attached
952 * checks uniqueness of slave's mac address and handles the case the
953 * new slave uses the bonds mac address.
955 * If the permanent hw address of @slave is @bond's hw address, we need to
956 * find a different hw address to give @slave, that isn't in use by any other
957 * slave in the bond. This address must be, of course, one of the premanent
958 * addresses of the other slaves.
960 * We go over the slave list, and for each slave there we compare its
961 * permanent hw address with the current address of all the other slaves.
962 * If no match was found, then we've found a slave with a permanent address
963 * that isn't used by any other slave in the bond, so we can assign it to
966 * assumption: this function is called before @slave is attached to the
969 * caller must hold the bond lock for write since the mac addresses are compared
970 * and may be swapped.
972 static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave)
974 struct slave *tmp_slave1, *tmp_slave2, *free_mac_slave;
977 if (bond->slave_cnt == 0) {
978 /* this is the first slave */
982 /* if slave's mac address differs from bond's mac address
983 * check uniqueness of slave's mac address against the other
984 * slaves in the bond.
986 if (memcmp(slave->perm_hwaddr, bond->dev->dev_addr, ETH_ALEN)) {
987 bond_for_each_slave(bond, tmp_slave1, i) {
988 if (!memcmp(tmp_slave1->dev->dev_addr, slave->dev->dev_addr,
995 /* a slave was found that is using the mac address
998 printk(KERN_ERR DRV_NAME
999 ": Error: the hw address of slave %s is not "
1000 "unique - cannot enslave it!",
1007 /* the slave's address is equal to the address of the bond
1008 * search for a spare address in the bond for this slave.
1010 free_mac_slave = NULL;
1012 bond_for_each_slave(bond, tmp_slave1, i) {
1014 bond_for_each_slave(bond, tmp_slave2, j) {
1015 if (!memcmp(tmp_slave1->perm_hwaddr,
1016 tmp_slave2->dev->dev_addr,
1024 /* no slave has tmp_slave1's perm addr
1027 free_mac_slave = tmp_slave1;
1032 if (free_mac_slave) {
1033 alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr,
1034 bond->alb_info.rlb_enabled);
1036 printk(KERN_WARNING DRV_NAME
1037 ": Warning: the hw address of slave %s is in use by "
1038 "the bond; giving it the hw address of %s\n",
1039 slave->dev->name, free_mac_slave->dev->name);
1041 printk(KERN_ERR DRV_NAME
1042 ": Error: the hw address of slave %s is in use by the "
1043 "bond; couldn't find a slave with a free hw address to "
1044 "give it (this should not have happened)\n",
1053 * alb_set_mac_address
1057 * In TLB mode all slaves are configured to the bond's hw address, but set
1058 * their dev_addr field to different addresses (based on their permanent hw
1061 * For each slave, this function sets the interface to the new address and then
1062 * changes its dev_addr field to its previous value.
1064 * Unwinding assumes bond's mac address has not yet changed.
1066 static int alb_set_mac_address(struct bonding *bond, void *addr)
1069 struct slave *slave, *stop_at;
1070 char tmp_addr[ETH_ALEN];
1074 if (bond->alb_info.rlb_enabled) {
1078 bond_for_each_slave(bond, slave, i) {
1079 if (slave->dev->set_mac_address == NULL) {
1084 /* save net_device's current hw address */
1085 memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN);
1087 res = slave->dev->set_mac_address(slave->dev, addr);
1089 /* restore net_device's hw address */
1090 memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN);
1100 memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len);
1101 sa.sa_family = bond->dev->type;
1103 /* unwind from head to the slave that failed */
1105 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
1106 memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN);
1107 slave->dev->set_mac_address(slave->dev, &sa);
1108 memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN);
1114 /************************ exported alb funcions ************************/
1116 int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
1120 res = tlb_initialize(bond);
1126 bond->alb_info.rlb_enabled = 1;
1127 /* initialize rlb */
1128 res = rlb_initialize(bond);
1130 tlb_deinitialize(bond);
1138 void bond_alb_deinitialize(struct bonding *bond)
1140 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1142 tlb_deinitialize(bond);
1144 if (bond_info->rlb_enabled) {
1145 rlb_deinitialize(bond);
1149 int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1151 struct bonding *bond = (struct bonding *)bond_dev->priv;
1152 struct ethhdr *eth_data = (struct ethhdr *)skb->mac.raw = skb->data;
1153 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1154 struct slave *tx_slave = NULL;
1155 static u32 ip_bcast = 0xffffffff;
1157 int do_tx_balance = 1;
1159 u8 *hash_start = NULL;
1161 /* make sure that the curr_active_slave and the slaves list do
1162 * not change during tx
1164 read_lock(&bond->lock);
1165 read_lock(&bond->curr_slave_lock);
1167 if (!BOND_IS_OK(bond)) {
1171 switch (ntohs(skb->protocol)) {
1173 if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) ||
1174 (skb->nh.iph->daddr == ip_bcast)) {
1178 hash_start = (char*)&(skb->nh.iph->daddr);
1179 hash_size = sizeof(skb->nh.iph->daddr);
1183 if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) {
1188 hash_start = (char*)&(skb->nh.ipv6h->daddr);
1189 hash_size = sizeof(skb->nh.ipv6h->daddr);
1193 if (ipx_hdr(skb)->ipx_checksum !=
1194 __constant_htons(IPX_NO_CHECKSUM)) {
1195 /* something is wrong with this packet */
1200 if (ipx_hdr(skb)->ipx_type !=
1201 __constant_htons(IPX_TYPE_NCP)) {
1202 /* The only protocol worth balancing in
1203 * this family since it has an "ARP" like
1210 hash_start = (char*)eth_data->h_dest;
1211 hash_size = ETH_ALEN;
1216 if (bond_info->rlb_enabled) {
1217 tx_slave = rlb_arp_xmit(skb, bond);
1226 if (do_tx_balance) {
1227 hash_index = _simple_hash(hash_start, hash_size);
1228 tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
1232 /* unbalanced or unassigned, send through primary */
1233 tx_slave = bond->curr_active_slave;
1234 bond_info->unbalanced_load += skb->len;
1237 if (tx_slave && SLAVE_IS_OK(tx_slave)) {
1238 skb->dev = tx_slave->dev;
1239 if (tx_slave != bond->curr_active_slave) {
1240 memcpy(eth_data->h_source,
1241 tx_slave->dev->dev_addr,
1244 dev_queue_xmit(skb);
1246 /* no suitable interface, frame not sent */
1248 tlb_clear_slave(bond, tx_slave, 0);
1254 read_unlock(&bond->curr_slave_lock);
1255 read_unlock(&bond->lock);
1263 void bond_alb_monitor(struct bonding *bond)
1265 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1266 struct slave *slave;
1267 int delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
1270 read_lock(&bond->lock);
1272 if (bond->kill_timers) {
1276 if (bond->slave_cnt == 0) {
1277 bond_info->tx_rebalance_counter = 0;
1278 bond_info->lp_counter = 0;
1282 bond_info->tx_rebalance_counter++;
1283 bond_info->lp_counter++;
1285 /* send learning packets */
1286 if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) {
1287 /* change of curr_active_slave involves swapping of mac addresses.
1288 * in order to avoid this swapping from happening while
1289 * sending the learning packets, the curr_slave_lock must be held for
1292 read_lock(&bond->curr_slave_lock);
1293 bond_for_each_slave(bond, slave, i) {
1294 alb_send_learning_packets(slave,slave->dev->dev_addr);
1296 read_unlock(&bond->curr_slave_lock);
1298 bond_info->lp_counter = 0;
1301 /* rebalance tx traffic */
1302 if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
1303 read_lock(&bond->curr_slave_lock);
1304 bond_for_each_slave(bond, slave, i) {
1305 tlb_clear_slave(bond, slave, 1);
1306 if (slave == bond->curr_active_slave) {
1307 SLAVE_TLB_INFO(slave).load =
1308 bond_info->unbalanced_load /
1309 BOND_TLB_REBALANCE_INTERVAL;
1310 bond_info->unbalanced_load = 0;
1313 read_unlock(&bond->curr_slave_lock);
1314 bond_info->tx_rebalance_counter = 0;
1317 /* handle rlb stuff */
1318 if (bond_info->rlb_enabled) {
1319 /* the following code changes the promiscuity of the
1320 * the curr_active_slave. It needs to be locked with a
1321 * write lock to protect from other code that also
1322 * sets the promiscuity.
1324 write_lock(&bond->curr_slave_lock);
1325 if (bond_info->primary_is_promisc &&
1326 (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) {
1328 bond_info->rlb_promisc_timeout_counter = 0;
1330 /* If the primary was set to promiscuous mode
1331 * because a slave was disabled then
1332 * it can now leave promiscuous mode.
1334 dev_set_promiscuity(bond->curr_active_slave->dev, -1);
1335 bond_info->primary_is_promisc = 0;
1337 write_unlock(&bond->curr_slave_lock);
1339 if (bond_info->rlb_rebalance) {
1340 bond_info->rlb_rebalance = 0;
1341 rlb_rebalance(bond);
1344 /* check if clients need updating */
1345 if (bond_info->rx_ntt) {
1346 if (bond_info->rlb_update_delay_counter) {
1347 --bond_info->rlb_update_delay_counter;
1349 rlb_update_rx_clients(bond);
1350 if (bond_info->rlb_update_retry_counter) {
1351 --bond_info->rlb_update_retry_counter;
1353 bond_info->rx_ntt = 0;
1360 mod_timer(&(bond_info->alb_timer), jiffies + delta_in_ticks);
1362 read_unlock(&bond->lock);
1365 /* assumption: called before the slave is attached to the bond
1366 * and not locked by the bond lock
1368 int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
1372 res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr,
1373 bond->alb_info.rlb_enabled);
1378 /* caller must hold the bond lock for write since the mac addresses
1379 * are compared and may be swapped.
1381 write_lock_bh(&bond->lock);
1383 res = alb_handle_addr_collision_on_attach(bond, slave);
1385 write_unlock_bh(&bond->lock);
1391 tlb_init_slave(slave);
1393 /* order a rebalance ASAP */
1394 bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
1396 if (bond->alb_info.rlb_enabled) {
1397 bond->alb_info.rlb_rebalance = 1;
1403 /* Caller must hold bond lock for write */
1404 void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
1406 if (bond->slave_cnt > 1) {
1407 alb_change_hw_addr_on_detach(bond, slave);
1410 tlb_clear_slave(bond, slave, 0);
1412 if (bond->alb_info.rlb_enabled) {
1413 bond->alb_info.next_rx_slave = NULL;
1414 rlb_clear_slave(bond, slave);
1418 /* Caller must hold bond lock for read */
1419 void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
1421 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1423 if (link == BOND_LINK_DOWN) {
1424 tlb_clear_slave(bond, slave, 0);
1425 if (bond->alb_info.rlb_enabled) {
1426 rlb_clear_slave(bond, slave);
1428 } else if (link == BOND_LINK_UP) {
1429 /* order a rebalance ASAP */
1430 bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
1431 if (bond->alb_info.rlb_enabled) {
1432 bond->alb_info.rlb_rebalance = 1;
1433 /* If the updelay module parameter is smaller than the
1434 * forwarding delay of the switch the rebalance will
1435 * not work because the rebalance arp replies will
1436 * not be forwarded to the clients..
1443 * bond_alb_handle_active_change - assign new curr_active_slave
1444 * @bond: our bonding struct
1445 * @new_slave: new slave to assign
1447 * Set the bond->curr_active_slave to @new_slave and handle
1448 * mac address swapping and promiscuity changes as needed.
1450 * Caller must hold bond curr_slave_lock for write (or bond lock for write)
1452 void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
1454 struct slave *swap_slave;
1457 if (bond->curr_active_slave == new_slave) {
1461 if (bond->curr_active_slave && bond->alb_info.primary_is_promisc) {
1462 dev_set_promiscuity(bond->curr_active_slave->dev, -1);
1463 bond->alb_info.primary_is_promisc = 0;
1464 bond->alb_info.rlb_promisc_timeout_counter = 0;
1467 swap_slave = bond->curr_active_slave;
1468 bond->curr_active_slave = new_slave;
1470 if (!new_slave || (bond->slave_cnt == 0)) {
1474 /* set the new curr_active_slave to the bonds mac address
1475 * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave
1478 /* find slave that is holding the bond's mac address */
1479 bond_for_each_slave(bond, swap_slave, i) {
1480 if (!memcmp(swap_slave->dev->dev_addr,
1481 bond->dev->dev_addr, ETH_ALEN)) {
1488 /* curr_active_slave must be set before calling alb_swap_mac_addr */
1490 /* swap mac address */
1491 alb_swap_mac_addr(bond, swap_slave, new_slave);
1493 /* set the new_slave to the bond mac address */
1494 alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr,
1495 bond->alb_info.rlb_enabled);
1496 /* fasten bond mac on new current slave */
1497 alb_send_learning_packets(new_slave, bond->dev->dev_addr);
1501 int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
1503 struct bonding *bond = (struct bonding *)bond_dev->priv;
1504 struct sockaddr *sa = addr;
1505 struct slave *swap_slave;
1509 if (!is_valid_ether_addr(sa->sa_data)) {
1510 return -EADDRNOTAVAIL;
1513 res = alb_set_mac_address(bond, addr);
1518 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len);
1520 /* If there is no curr_active_slave there is nothing else to do.
1521 * Otherwise we'll need to pass the new address to it and handle
1524 if (!bond->curr_active_slave) {
1528 bond_for_each_slave(bond, swap_slave, i) {
1529 if (!memcmp(swap_slave->dev->dev_addr, bond_dev->dev_addr, ETH_ALEN)) {
1536 alb_swap_mac_addr(bond, swap_slave, bond->curr_active_slave);
1538 alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr,
1539 bond->alb_info.rlb_enabled);
1541 alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr);
1542 if (bond->alb_info.rlb_enabled) {
1543 /* inform clients mac address has changed */
1544 rlb_req_update_slave_clients(bond, bond->curr_active_slave);