x86-64: Simplify and optimize vdso clock_gettime monotonic variants

We used to store the wall-to-monotonic offset and the realtime base.
It's faster to precompute the monotonic base.

This is about a 3% speedup on Sandy Bridge for CLOCK_MONOTONIC.
It's much more impressive for CLOCK_MONOTONIC_COARSE.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: John Stultz <john.stultz@linaro.org>
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 1f00717..8b38be2 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -7,11 +7,6 @@
 struct vsyscall_gtod_data {
 	seqcount_t	seq;
 
-	/* open coded 'struct timespec' */
-	time_t		wall_time_sec;
-	u32		wall_time_nsec;
-
-	struct timezone sys_tz;
 	struct { /* extract of a clocksource struct */
 		int vclock_mode;
 		cycle_t	cycle_last;
@@ -19,8 +14,16 @@
 		u32	mult;
 		u32	shift;
 	} clock;
-	struct timespec wall_to_monotonic;
+
+	/* open coded 'struct timespec' */
+	time_t		wall_time_sec;
+	u32		wall_time_nsec;
+	u32		monotonic_time_nsec;
+	time_t		monotonic_time_sec;
+
+	struct timezone sys_tz;
 	struct timespec wall_time_coarse;
+	struct timespec monotonic_time_coarse;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;