cputime: Safely read cputime of full dynticks CPUs

While remotely reading the cputime of a task running in a
full dynticks CPU, the values stored in utime/stime fields
of struct task_struct may be stale. Its values may be those
of the last kernel <-> user transition time snapshot and
we need to add the tickless time spent since this snapshot.

To fix this, flush the cputime of the dynticks CPUs on
kernel <-> user transition and record the time / context
where we did this. Then on top of this snapshot and the current
time, perform the fixup on the reader side from task_times()
accessors.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[fixed kvm module related build errors]
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 624ef3f..7105d5c 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -153,7 +153,7 @@
  */
 #define __irq_enter()					\
 	do {						\
-		vtime_account_irq_enter(current);	\
+		account_irq_enter_time(current);	\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
@@ -169,7 +169,7 @@
 #define __irq_exit()					\
 	do {						\
 		trace_hardirq_exit();			\
-		vtime_account_irq_exit(current);	\
+		account_irq_exit_time(current);		\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6d087c5..cc898b8 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -10,6 +10,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/securebits.h>
+#include <linux/seqlock.h>
 #include <net/net_namespace.h>
 
 #ifdef CONFIG_SMP
@@ -141,6 +142,15 @@
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# define INIT_VTIME(tsk)						\
+	.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock),	\
+	.vtime_snap = 0,				\
+	.vtime_snap_whence = VTIME_SYS,
+#else
+# define INIT_VTIME(tsk)
+#endif
+
 #define INIT_TASK_COMM "swapper"
 
 /*
@@ -210,6 +220,7 @@
 	INIT_TRACE_RECURSION						\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
 	INIT_CPUSET_SEQ							\
+	INIT_VTIME(tsk)							\
 }
 
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4fe2396..b7996a7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -741,7 +741,7 @@
 }
 #endif /* CONFIG_IOMMU_API */
 
-static inline void guest_enter(void)
+static inline void __guest_enter(void)
 {
 	/*
 	 * This is running in ioctl context so we can avoid
@@ -751,7 +751,7 @@
 	current->flags |= PF_VCPU;
 }
 
-static inline void guest_exit(void)
+static inline void __guest_exit(void)
 {
 	/*
 	 * This is running in ioctl context so we can avoid
@@ -761,6 +761,22 @@
 	current->flags &= ~PF_VCPU;
 }
 
+#ifdef CONFIG_CONTEXT_TRACKING
+extern void guest_enter(void);
+extern void guest_exit(void);
+
+#else /* !CONFIG_CONTEXT_TRACKING */
+static inline void guest_enter(void)
+{
+	__guest_enter();
+}
+
+static inline void guest_exit(void)
+{
+	__guest_exit();
+}
+#endif /* !CONFIG_CONTEXT_TRACKING */
+
 static inline void kvm_guest_enter(void)
 {
 	unsigned long flags;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a9c608b..a9fa514 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1368,6 +1368,15 @@
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	struct cputime prev_cputime;
 #endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+	seqlock_t vtime_seqlock;
+	unsigned long long vtime_snap;
+	enum {
+		VTIME_SLEEPING = 0,
+		VTIME_USER,
+		VTIME_SYS,
+	} vtime_snap_whence;
+#endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
 	struct timespec real_start_time;	/* boot based time */
@@ -1792,11 +1801,13 @@
 		__put_task_struct(t);
 }
 
-static inline cputime_t task_gtime(struct task_struct *t)
-{
-	return t->gtime;
-}
-
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+			 cputime_t *utime, cputime_t *stime);
+extern void task_cputime_scaled(struct task_struct *t,
+				cputime_t *utimescaled, cputime_t *stimescaled);
+extern cputime_t task_gtime(struct task_struct *t);
+#else
 static inline void task_cputime(struct task_struct *t,
 				cputime_t *utime, cputime_t *stime)
 {
@@ -1815,6 +1826,12 @@
 	if (stimescaled)
 		*stimescaled = t->stimescaled;
 }
+
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index bb50c3c..71a5782 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -8,35 +8,44 @@
 extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
-extern void vtime_account(struct task_struct *tsk);
+extern void vtime_account_irq_enter(struct task_struct *tsk);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern bool vtime_accounting_enabled(void);
-#else
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline bool vtime_accounting_enabled(void) { return true; }
 #endif
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
-static inline void vtime_account(struct task_struct *tsk) { }
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
 static inline bool vtime_accounting_enabled(void) { return false; }
 #endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
-static inline void vtime_user_enter(struct task_struct *tsk)
-{
-	vtime_account_system(tsk);
-}
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+extern void vtime_account_irq_exit(struct task_struct *tsk);
+extern bool vtime_accounting_enabled(void);
+extern void vtime_user_enter(struct task_struct *tsk);
 static inline void vtime_user_exit(struct task_struct *tsk)
 {
 	vtime_account_user(tsk);
 }
+extern void vtime_guest_enter(struct task_struct *tsk);
+extern void vtime_guest_exit(struct task_struct *tsk);
+extern void vtime_init_idle(struct task_struct *tsk);
 #else
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+	/* On hard|softirq exit we always account to hard|softirq cputime */
+	vtime_account_system(tsk);
+}
 static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
+static inline void vtime_guest_enter(struct task_struct *tsk) { }
+static inline void vtime_guest_exit(struct task_struct *tsk) { }
+static inline void vtime_init_idle(struct task_struct *tsk) { }
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -45,25 +54,15 @@
 static inline void irqtime_account_irq(struct task_struct *tsk) { }
 #endif
 
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
+static inline void account_irq_enter_time(struct task_struct *tsk)
 {
-	/*
-	 * Hardirq can interrupt idle task anytime. So we need vtime_account()
-	 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
-	 * Softirq can also interrupt idle task directly if it calls
-	 * local_bh_enable(). Such case probably don't exist but we never know.
-	 * Ksoftirqd is not concerned because idle time is flushed on context
-	 * switch. Softirqs in the end of hardirqs are also not a problem because
-	 * the idle time is flushed on hardirq time already.
-	 */
-	vtime_account(tsk);
+	vtime_account_irq_enter(tsk);
 	irqtime_account_irq(tsk);
 }
 
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
+static inline void account_irq_exit_time(struct task_struct *tsk)
 {
-	/* On hard|softirq exit we always account to hard|softirq cputime */
-	vtime_account_system(tsk);
+	vtime_account_irq_exit(tsk);
 	irqtime_account_irq(tsk);
 }