crypto: caam - consolidate memory barriers from job ring en/dequeue

Memory barriers are implied by the i/o register write implementation
(at least on Power).  So we can remove the redundant wmb() in
caam_jr_enqueue, and, in dequeue(), hoist the h/w done notification
write up to before we need to increment the head of the ring, and
save an smp_mb.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 7ae5e51..0adaad1 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -94,7 +94,8 @@
 		userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
 		userstatus = jrp->outring[hw_idx].jrstatus;
 
-		smp_mb();
+		/* set done */
+		wr_reg32(&jrp->rregs->outring_rmvd, 1);
 
 		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
 					   (JOBR_DEPTH - 1);
@@ -114,9 +115,6 @@
 			jrp->tail = tail;
 		}
 
-		/* set done */
-		wr_reg32(&jrp->rregs->outring_rmvd, 1);
-
 		spin_unlock_bh(&jrp->outlock);
 
 		/* Finally, execute user's callback */
@@ -265,8 +263,6 @@
 				    (JOBR_DEPTH - 1);
 	jrp->head = (head + 1) & (JOBR_DEPTH - 1);
 
-	wmb();
-
 	wr_reg32(&jrp->rregs->inpring_jobadd, 1);
 
 	spin_unlock(&jrp->inplock);