inetpeer: get rid of ip_id_count
[ Upstream commit 73f156a6e8c1074ac6327e0abd1169e95eb66463 ]
Ideally, we would need to generate IP ID using a per destination IP
generator.
linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.
1) each inet_peer struct consumes 192 bytes
2) inetpeer cache uses a binary tree of inet_peer structs,
with a nominal size of ~66000 elements under load.
3) lookups in this tree are hitting a lot of cache lines, as tree depth
is about 20.
4) If server deals with many tcp flows, we have a high probability of
not finding the inet_peer, allocating a fresh one, inserting it in
the tree with same initial ip_id_count, (cf secure_ip_id())
5) We garbage collect inet_peer aggressively.
IP ID generation do not have to be 'perfect'
Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.
We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.
ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)
secure_ip_id() and secure_ipv6_id() no longer are needed.
Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 2d64364..168d30d 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -46,13 +46,12 @@
};
/*
* Once inet_peer is queued for deletion (refcnt == -1), following fields
- * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
+ * are not available: rid, tcp_ts, tcp_ts_stamp
* We can share memory with rcu_head to help keep inet_peer small.
*/
union {
struct {
atomic_t rid; /* Frag reception counter */
- atomic_t ip_id_count; /* IP ID for the next packet */
__u32 tcp_ts;
__u32 tcp_ts_stamp;
};
@@ -102,7 +101,7 @@
extern void inetpeer_invalidate_tree(int family);
/*
- * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * temporary check to make sure we dont access rid, tcp_ts,
* tcp_ts_stamp if no refcount is taken on inet_peer
*/
static inline void inet_peer_refcheck(const struct inet_peer *p)
@@ -110,13 +109,4 @@
WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
}
-
-/* can be called with or without local BH being disabled */
-static inline int inet_getid(struct inet_peer *p, int more)
-{
- more++;
- inet_peer_refcheck(p);
- return atomic_add_return(more, &p->ip_id_count) - more;
-}
-
#endif /* _NET_INETPEER_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 6d6b12f..f70e64b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -264,9 +264,19 @@
!(dst_metric_locked(dst, RTAX_MTU)));
}
-extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
+#define IP_IDENTS_SZ 2048u
+extern atomic_t *ip_idents;
-static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
+static inline u32 ip_idents_reserve(u32 hash, int segs)
+{
+ atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
+
+ return atomic_add_return(segs, id_ptr) - segs;
+}
+
+void __ip_select_ident(struct iphdr *iph, int segs);
+
+static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
{
struct iphdr *iph = ip_hdr(skb);
@@ -276,24 +286,20 @@
* does not change, they drop every other packet in
* a TCP stream using header compression.
*/
- iph->id = (sk && inet_sk(sk)->inet_daddr) ?
- htons(inet_sk(sk)->inet_id++) : 0;
- } else
- __ip_select_ident(iph, dst, 0);
-}
-
-static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
-{
- struct iphdr *iph = ip_hdr(skb);
-
- if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
if (sk && inet_sk(sk)->inet_daddr) {
iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += 1 + more;
- } else
+ inet_sk(sk)->inet_id += segs;
+ } else {
iph->id = 0;
- } else
- __ip_select_ident(iph, dst, more);
+ }
+ } else {
+ __ip_select_ident(iph, segs);
+ }
+}
+
+static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
+{
+ ip_select_ident_segs(skb, sk, 1);
}
/*
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 4dccfe3..e8ee3bb 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -50,7 +50,7 @@
int pkt_len = skb->len - skb_transport_offset(skb); \
\
skb->ip_summed = CHECKSUM_NONE; \
- ip_select_ident(skb, &rt->dst, NULL); \
+ ip_select_ident(skb, NULL); \
\
err = ip_local_out(skb); \
if (likely(net_xmit_eval(err) == 0)) { \
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index fa7af91..117eaa5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -392,14 +392,19 @@
int ip6_frag_match(struct inet_frag_queue *q, void *a);
/* more secured version of ipv6_addr_hash() */
-static inline u32 ipv6_addr_jhash(const struct in6_addr *a)
+static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
{
u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
return jhash_3words(v,
(__force u32)a->s6_addr32[2],
(__force u32)a->s6_addr32[3],
- ipv6_hash_secret);
+ initval);
+}
+
+static inline u32 ipv6_addr_jhash(const struct in6_addr *a)
+{
+ return __ipv6_addr_jhash(a, ipv6_hash_secret);
}
static inline int ipv6_addr_any(const struct in6_addr *a)
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index c2e542b..b1c3d1c 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -3,8 +3,6 @@
#include <linux/types.h>
-extern __u32 secure_ip_id(__be32 daddr);
-extern __u32 secure_ipv6_id(const __be32 daddr[4]);
extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);