net dst: use a percpu_counter to track entries
struct dst_ops tracks number of allocated dst in an atomic_t field,
subject to high cache line contention in stress workload.
Switch to a percpu_counter, to reduce number of time we need to dirty a
central location. Place it on a separate cache line to avoid dirtying
read only fields.
Stress test :
(Sending 160.000.000 UDP frames,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_TRIE, SLUB/NUMA)
Before:
real 0m51.179s
user 0m15.329s
sys 10m15.942s
After:
real 0m45.570s
user 0m15.525s
sys 9m56.669s
With a small reordering of struct neighbour fields, subject of a
following patch, (to separate refcnt from other read mostly fields)
real 0m41.841s
user 0m15.261s
sys 8m45.949s
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 39676ea..7e74023 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@
struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
xfrm6_policy_afinfo.garbage_collect(net);
- return atomic_read(&ops->entries) > ops->gc_thresh * 2;
+ return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
}
static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -255,7 +255,6 @@
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
.gc_thresh = 1024,
- .entries = ATOMIC_INIT(0),
};
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -312,11 +311,13 @@
*/
gc_thresh = FIB6_TABLE_HASHSZ * 8;
xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+ dst_entries_init(&xfrm6_dst_ops);
ret = xfrm6_policy_init();
- if (ret)
+ if (ret) {
+ dst_entries_destroy(&xfrm6_dst_ops);
goto out;
-
+ }
ret = xfrm6_state_init();
if (ret)
goto out_policy;
@@ -341,4 +342,5 @@
//xfrm6_input_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
+ dst_entries_destroy(&xfrm6_dst_ops);
}