[S390] Inline assembly cleanup.

Major cleanup of all s390 inline assemblies. They now have a common
coding style. Quite a few have been shortened, mainly by using register
asm variables. Use of the EX_TABLE macro helps  as well. The atomic ops,
bit ops and locking inlines new use the Q-constraint if a newer gcc
is used.  That results in slightly better code.

Thanks to Christian Borntraeger for proof reading the changes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 468f4ea..027c474 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -27,9 +27,7 @@
          * yield the megahertz number of the cpu. The important function
          * is udelay and that is done using the tod clock. -- martin.
          */
-        __asm__ __volatile__(
-                "0: brct %0,0b"
-                : /* no outputs */ : "r" ((loops/2) + 1));
+	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 
 /*
@@ -38,13 +36,12 @@
  */
 void __udelay(unsigned long usecs)
 {
-        uint64_t start_cc, end_cc;
+	uint64_t start_cc;
 
         if (usecs == 0)
                 return;
-        asm volatile ("STCK %0" : "=m" (start_cc));
+	start_cc = get_clock();
         do {
 		cpu_relax();
-                asm volatile ("STCK %0" : "=m" (end_cc));
-        } while (((end_cc - start_cc)/4096) < usecs);
+	} while (((get_clock() - start_cc)/4096) < usecs);
 }