arm64: atomics: fix grossly inconsistent asm constraints for exclusives

Our uses of inline asm constraints for atomic operations are fairly
wild and varied. We basically need to guarantee the following:

  1. Any instructions with barrier implications
     (load-acquire/store-release) have a "memory" clobber

  2. When performing exclusive accesses, the addresing mode is generated
     using the "Q" constraint

  3. Atomic blocks which use the condition flags, have a "cc" clobber

This patch addresses these concerns which, as well as fixing the
semantics of the code, stops GCC complaining about impossible asm
constraints.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 41112fe..7065e92 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -45,13 +45,13 @@
 	asm volatile(
 	"	sevl\n"
 	"1:	wfe\n"
-	"2:	ldaxr	%w0, [%1]\n"
+	"2:	ldaxr	%w0, %1\n"
 	"	cbnz	%w0, 1b\n"
-	"	stxr	%w0, %w2, [%1]\n"
+	"	stxr	%w0, %w2, %1\n"
 	"	cbnz	%w0, 2b\n"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
-	: "memory");
+	: "=&r" (tmp), "+Q" (lock->lock)
+	: "r" (1)
+	: "cc", "memory");
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -59,13 +59,13 @@
 	unsigned int tmp;
 
 	asm volatile(
-	"	ldaxr	%w0, [%1]\n"
+	"	ldaxr	%w0, %1\n"
 	"	cbnz	%w0, 1f\n"
-	"	stxr	%w0, %w2, [%1]\n"
+	"	stxr	%w0, %w2, %1\n"
 	"1:\n"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
-	: "memory");
+	: "=&r" (tmp), "+Q" (lock->lock)
+	: "r" (1)
+	: "cc", "memory");
 
 	return !tmp;
 }
@@ -73,8 +73,8 @@
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	asm volatile(
-	"	stlr	%w1, [%0]\n"
-	: : "r" (&lock->lock), "r" (0) : "memory");
+	"	stlr	%w1, %0\n"
+	: "=Q" (lock->lock) : "r" (0) : "memory");
 }
 
 /*
@@ -94,13 +94,13 @@
 	asm volatile(
 	"	sevl\n"
 	"1:	wfe\n"
-	"2:	ldaxr	%w0, [%1]\n"
+	"2:	ldaxr	%w0, %1\n"
 	"	cbnz	%w0, 1b\n"
-	"	stxr	%w0, %w2, [%1]\n"
+	"	stxr	%w0, %w2, %1\n"
 	"	cbnz	%w0, 2b\n"
-	: "=&r" (tmp)
-	: "r" (&rw->lock), "r" (0x80000000)
-	: "memory");
+	: "=&r" (tmp), "+Q" (rw->lock)
+	: "r" (0x80000000)
+	: "cc", "memory");
 }
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
@@ -108,13 +108,13 @@
 	unsigned int tmp;
 
 	asm volatile(
-	"	ldaxr	%w0, [%1]\n"
+	"	ldaxr	%w0, %1\n"
 	"	cbnz	%w0, 1f\n"
-	"	stxr	%w0, %w2, [%1]\n"
+	"	stxr	%w0, %w2, %1\n"
 	"1:\n"
-	: "=&r" (tmp)
-	: "r" (&rw->lock), "r" (0x80000000)
-	: "memory");
+	: "=&r" (tmp), "+Q" (rw->lock)
+	: "r" (0x80000000)
+	: "cc", "memory");
 
 	return !tmp;
 }
@@ -122,8 +122,8 @@
 static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	asm volatile(
-	"	stlr	%w1, [%0]\n"
-	: : "r" (&rw->lock), "r" (0) : "memory");
+	"	stlr	%w1, %0\n"
+	: "=Q" (rw->lock) : "r" (0) : "memory");
 }
 
 /* write_can_lock - would write_trylock() succeed? */
@@ -148,14 +148,14 @@
 	asm volatile(
 	"	sevl\n"
 	"1:	wfe\n"
-	"2:	ldaxr	%w0, [%2]\n"
+	"2:	ldaxr	%w0, %2\n"
 	"	add	%w0, %w0, #1\n"
 	"	tbnz	%w0, #31, 1b\n"
-	"	stxr	%w1, %w0, [%2]\n"
+	"	stxr	%w1, %w0, %2\n"
 	"	cbnz	%w1, 2b\n"
-	: "=&r" (tmp), "=&r" (tmp2)
-	: "r" (&rw->lock)
-	: "memory");
+	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
+	:
+	: "cc", "memory");
 }
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
@@ -163,13 +163,13 @@
 	unsigned int tmp, tmp2;
 
 	asm volatile(
-	"1:	ldxr	%w0, [%2]\n"
+	"1:	ldxr	%w0, %2\n"
 	"	sub	%w0, %w0, #1\n"
-	"	stlxr	%w1, %w0, [%2]\n"
+	"	stlxr	%w1, %w0, %2\n"
 	"	cbnz	%w1, 1b\n"
-	: "=&r" (tmp), "=&r" (tmp2)
-	: "r" (&rw->lock)
-	: "memory");
+	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
+	:
+	: "cc", "memory");
 }
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
@@ -177,14 +177,14 @@
 	unsigned int tmp, tmp2 = 1;
 
 	asm volatile(
-	"	ldaxr	%w0, [%2]\n"
+	"	ldaxr	%w0, %2\n"
 	"	add	%w0, %w0, #1\n"
 	"	tbnz	%w0, #31, 1f\n"
-	"	stxr	%w1, %w0, [%2]\n"
+	"	stxr	%w1, %w0, %2\n"
 	"1:\n"
-	: "=&r" (tmp), "+r" (tmp2)
-	: "r" (&rw->lock)
-	: "memory");
+	: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
+	:
+	: "cc", "memory");
 
 	return !tmp2;
 }