net: Embed hh_cache inside of struct neighbour.

Now that there is a one-to-one correspondance between neighbour
and hh_cache entries, we no longer need:

1) dynamic allocation
2) attachment to dst->hh
3) refcounting

Initialization of the hh_cache entry is indicated by hh_len
being non-zero, and such initialization is always done with
the neighbour's lock held as a writer.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 56149ec..75ee421 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -343,14 +343,16 @@
 static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+	struct neighbour *neigh;
 	struct dst_entry *dst;
 
 	skb->dev = bridge_parent(skb->dev);
 	if (!skb->dev)
 		goto free_skb;
 	dst = skb_dst(skb);
-	if (dst->hh) {
-		neigh_hh_bridge(dst->hh, skb);
+	neigh = dst->neighbour;
+	if (neigh->hh.hh_len) {
+		neigh_hh_bridge(&neigh->hh, skb);
 		skb->dev = nf_bridge->physindev;
 		return br_handle_frame_finish(skb);
 	} else if (dst->neighbour) {
diff --git a/net/core/dst.c b/net/core/dst.c
index 6135f36..4aacc14 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -172,7 +172,6 @@
 	dst->expires = 0UL;
 	dst->path = dst;
 	dst->neighbour = NULL;
-	dst->hh = NULL;
 #ifdef CONFIG_XFRM
 	dst->xfrm = NULL;
 #endif
@@ -226,19 +225,13 @@
 {
 	struct dst_entry *child;
 	struct neighbour *neigh;
-	struct hh_cache *hh;
 
 	smp_rmb();
 
 again:
 	neigh = dst->neighbour;
-	hh = dst->hh;
 	child = dst->child;
 
-	dst->hh = NULL;
-	if (hh)
-		hh_cache_put(hh);
-
 	if (neigh) {
 		dst->neighbour = NULL;
 		neigh_release(neigh);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f879bb5..77a399f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -297,6 +297,7 @@
 	n->updated	  = n->used = now;
 	n->nud_state	  = NUD_NONE;
 	n->output	  = neigh_blackhole;
+	seqlock_init(&n->hh.hh_lock);
 	n->parms	  = neigh_parms_clone(&tbl->parms);
 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
 
@@ -702,14 +703,11 @@
 	if (neigh_del_timer(neigh))
 		printk(KERN_WARNING "Impossible event.\n");
 
-	hh = neigh->hh;
-	if (hh) {
-		neigh->hh = NULL;
-
+	hh = &neigh->hh;
+	if (hh->hh_len) {
 		write_seqlock_bh(&hh->hh_lock);
 		hh->hh_output = neigh_blackhole;
 		write_sequnlock_bh(&hh->hh_lock);
-		hh_cache_put(hh);
 	}
 
 	skb_queue_purge(&neigh->arp_queue);
@@ -737,8 +735,8 @@
 
 	neigh->output = neigh->ops->output;
 
-	hh = neigh->hh;
-	if (hh)
+	hh = &neigh->hh;
+	if (hh->hh_len)
 		hh->hh_output = neigh->ops->output;
 }
 
@@ -755,8 +753,8 @@
 
 	neigh->output = neigh->ops->connected_output;
 
-	hh = neigh->hh;
-	if (hh)
+	hh = &neigh->hh;
+	if (hh->hh_len)
 		hh->hh_output = neigh->ops->hh_output;
 }
 
@@ -1017,7 +1015,7 @@
 }
 EXPORT_SYMBOL(__neigh_event_send);
 
-static void neigh_update_hhs(const struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
 {
 	struct hh_cache *hh;
 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1027,8 +1025,8 @@
 		update = neigh->dev->header_ops->cache_update;
 
 	if (update) {
-		hh = neigh->hh;
-		if (hh) {
+		hh = &neigh->hh;
+		if (hh->hh_len) {
 			write_seqlock_bh(&hh->hh_lock);
 			update(hh, neigh->dev, neigh->ha);
 			write_sequnlock_bh(&hh->hh_lock);
@@ -1214,62 +1212,29 @@
 }
 EXPORT_SYMBOL(neigh_event_ns);
 
-static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst)
-{
-	struct hh_cache *hh;
-
-	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
-	hh = n->hh;
-	if (hh) {
-		atomic_inc(&hh->hh_refcnt);
-		if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
-			hh_cache_put(hh);
-		return true;
-	}
-	return false;
-}
-
 /* called with read_lock_bh(&n->lock); */
-static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
-			  __be16 protocol)
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
 {
-	struct hh_cache	*hh;
 	struct net_device *dev = dst->dev;
-
-	if (likely(neigh_hh_lookup(n, dst)))
-		return;
-
-	/* slow path */
-	hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
-	if (!hh)
-		return;
-
-	seqlock_init(&hh->hh_lock);
-	atomic_set(&hh->hh_refcnt, 2);
-
-	if (dev->header_ops->cache(n, hh, protocol)) {
-		kfree(hh);
-		return;
-	}
+	__be16 prot = dst->ops->protocol;
+	struct hh_cache	*hh = &n->hh;
 
 	write_lock_bh(&n->lock);
 
-	/* must check if another thread already did the insert */
-	if (neigh_hh_lookup(n, dst)) {
-		kfree(hh);
+	/* Only one thread can come in here and initialize the
+	 * hh_cache entry.
+	 */
+	if (hh->hh_len)
 		goto end;
-	}
+
+	if (dev->header_ops->cache(n, hh, prot))
+		goto end;
 
 	if (n->nud_state & NUD_CONNECTED)
 		hh->hh_output = n->ops->hh_output;
 	else
 		hh->hh_output = n->ops->output;
 
-	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
-	n->hh	    = hh;
-
-	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
-		hh_cache_put(hh);
 end:
 	write_unlock_bh(&n->lock);
 }
@@ -1312,10 +1277,8 @@
 		struct net_device *dev = neigh->dev;
 		unsigned int seq;
 
-		if (dev->header_ops->cache &&
-		    !dst->hh &&
-		    !(dst->flags & DST_NOCACHE))
-			neigh_hh_init(neigh, dst, dst->ops->protocol);
+		if (dev->header_ops->cache && !neigh->hh.hh_len)
+			neigh_hh_init(neigh, dst);
 
 		do {
 			seq = read_seqbegin(&neigh->ha_lock);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 54119d5..a621b96 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -182,6 +182,7 @@
 	struct rtable *rt = (struct rtable *)dst;
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
+	struct neighbour *neigh;
 
 	if (rt->rt_type == RTN_MULTICAST) {
 		IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
@@ -203,11 +204,14 @@
 		skb = skb2;
 	}
 
-	if (dst->hh)
-		return neigh_hh_output(dst->hh, skb);
-	else if (dst->neighbour)
-		return dst->neighbour->output(skb);
-
+	neigh = dst->neighbour;
+	if (neigh) {
+		struct hh_cache *hh = &neigh->hh;
+		if (hh->hh_len)
+			return neigh_hh_output(hh, skb);
+		else
+			return dst->neighbour->output(skb);
+	}
 	if (net_ratelimit())
 		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
 	kfree_skb(skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c6388e8..a52bb74 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -426,9 +426,10 @@
 			(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
 			      dst_metric(&r->dst, RTAX_RTTVAR)),
 			r->rt_key_tos,
-			r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
-			r->dst.hh ? (r->dst.hh->hh_output ==
-				       dev_queue_xmit) : 0,
+			-1,
+			(r->dst.neighbour ?
+			 (r->dst.neighbour->hh.hh_output ==
+			  dev_queue_xmit) : 0),
 			r->rt_spec_dst, &len);
 
 		seq_printf(seq, "%*s\n", 127 - len, "");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9d4b165..f0f144c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -100,6 +100,7 @@
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
+	struct neighbour *neigh;
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
@@ -134,11 +135,14 @@
 				skb->len);
 	}
 
-	if (dst->hh)
-		return neigh_hh_output(dst->hh, skb);
-	else if (dst->neighbour)
-		return dst->neighbour->output(skb);
-
+	neigh = dst->neighbour;
+	if (neigh) {
+		struct hh_cache *hh = &neigh->hh;
+		if (hh->hh_len)
+			return neigh_hh_output(hh, skb);
+		else
+			return dst->neighbour->output(skb);
+	}
 	IP6_INC_STATS_BH(dev_net(dst->dev),
 			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 	kfree_skb(skb);