treewide: fix replace nbd@openwrt.org with nbd@nbd.name
[openwrt.org/openwrt.git] / target / linux / generic / patches-4.4 / 680-NET-skip-GRO-for-foreign-MAC-addresses.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
Subject: NET: skip GRO for foreign MAC addresses
 
For network drivers using napi_gro_receive, packets are run through GRO,
even when the destination MAC address does not match, and they're supposed
to be delivered to another host behind a different bridge port.
 
This can be very expensive, because for drivers without TSO or scatter-
gather, this can only be undone by copying the skb and checksumming it
again.
 
To be able to track foreign MAC addresses in an inexpensive way, create
a mask of changed bits in MAC addresses of upper devices. This allows
handling VLANs and bridge devices with different addresses (as long as
they are not too different).
 
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4205,6 +4205,9 @@ static enum gro_result dev_gro_receive(s
        enum gro_result ret;
        int grow;
 
+       if (skb->gro_skip)
+               goto normal;
+
        if (!(skb->dev->features & NETIF_F_GRO))
                goto normal;
 
@@ -5357,6 +5360,48 @@ static void __netdev_adjacent_dev_unlink
                                           &upper_dev->adj_list.lower);
 }
 
+static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr,
+                              struct net_device *dev)
+{
+       int i;
+
+       for (i = 0; i < dev->addr_len; i++)
+               mask[i] |= addr[i] ^ dev->dev_addr[i];
+}
+
+static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev,
+                               struct net_device *lower)
+{
+       struct net_device *cur;
+       struct list_head *iter;
+
+       netdev_for_each_upper_dev_rcu(dev, cur, iter) {
+               __netdev_addr_mask(mask, cur->dev_addr, lower);
+               __netdev_upper_mask(mask, cur, lower);
+       }
+}
+
+static void __netdev_update_addr_mask(struct net_device *dev)
+{
+       unsigned char mask[MAX_ADDR_LEN];
+       struct net_device *cur;
+       struct list_head *iter;
+
+       memset(mask, 0, sizeof(mask));
+       __netdev_upper_mask(mask, dev, dev);
+       memcpy(dev->local_addr_mask, mask, dev->addr_len);
+
+       netdev_for_each_lower_dev(dev, cur, iter)
+               __netdev_update_addr_mask(cur);
+}
+
+static void netdev_update_addr_mask(struct net_device *dev)
+{
+       rcu_read_lock();
+       __netdev_update_addr_mask(dev);
+       rcu_read_unlock();
+}
+
 static int __netdev_upper_dev_link(struct net_device *dev,
                                   struct net_device *upper_dev, bool master,
                                   void *private)
@@ -5428,6 +5473,7 @@ static int __netdev_upper_dev_link(struc
                        goto rollback_lower_mesh;
        }
 
+       netdev_update_addr_mask(dev);
        call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
                                      &changeupper_info.info);
        return 0;
@@ -5554,6 +5600,7 @@ void netdev_upper_dev_unlink(struct net_
        list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
                __netdev_adjacent_dev_unlink(dev, i->dev);
 
+       netdev_update_addr_mask(dev);
        call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
                                      &changeupper_info.info);
 }
@@ -6094,6 +6141,7 @@ int dev_set_mac_address(struct net_devic
        if (err)
                return err;
        dev->addr_assign_type = NET_ADDR_SET;
+       netdev_update_addr_mask(dev);
        call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
        add_device_randomness(dev->dev_addr, dev->addr_len);
        return 0;
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1638,6 +1638,8 @@ struct net_device {
        struct netdev_hw_addr_list      mc;
        struct netdev_hw_addr_list      dev_addrs;
 
+       unsigned char           local_addr_mask[MAX_ADDR_LEN];
+
 #ifdef CONFIG_SYSFS
        struct kset             *queues_kset;
 #endif
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -642,7 +642,8 @@ struct sk_buff {
        __u8                    ipvs_property:1;
        __u8                    inner_protocol_type:1;
        __u8                    remcsum_offload:1;
-       /* 3 or 5 bit hole */
+       __u8                    gro_skip:1;
+       /* 2 or 4 bit hole */
 
 #ifdef CONFIG_NET_SCHED
        __u16                   tc_index;       /* traffic control index */
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -140,6 +140,18 @@ u32 eth_get_headlen(void *data, unsigned
 }
 EXPORT_SYMBOL(eth_get_headlen);
 
+static inline bool
+eth_check_local_mask(const void *addr1, const void *addr2, const void *mask)
+{
+       const u16 *a1 = addr1;
+       const u16 *a2 = addr2;
+       const u16 *m = mask;
+
+       return (((a1[0] ^ a2[0]) & ~m[0]) |
+               ((a1[1] ^ a2[1]) & ~m[1]) |
+               ((a1[2] ^ a2[2]) & ~m[2]));
+}
+
 /**
  * eth_type_trans - determine the packet's protocol ID.
  * @skb: received socket data
@@ -168,8 +180,12 @@ __be16 eth_type_trans(struct sk_buff *sk
                        skb->pkt_type = PACKET_MULTICAST;
        }
        else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
-                                                  dev->dev_addr)))
+                                                  dev->dev_addr))) {
                skb->pkt_type = PACKET_OTHERHOST;
+               if (eth_check_local_mask(eth->h_dest, dev->dev_addr,
+                                        dev->local_addr_mask))
+                       skb->gro_skip = 1;
+       }
 
        /*
         * Some variants of DSA tagging don't have an ethertype field
 
comments