[S390] idle time accounting vs. machine checks

A machine check can interrupt the i/o and external interrupt handler
anytime. If the machine check occurs while the interrupt handler is
waking up from idle vtime_start_cpu can get executed a second time
and the int_clock / async_enter_timer values in the lowcore get
clobbered. This can confuse the cpu time accounting.
To fix this problem two changes are needed. First the machine check
handler has to use its own copies of int_clock and async_enter_timer,
named mcck_clock and mcck_enter_timer. Second the nested execution
of vtime_start_cpu has to be prevented. This is done in s390_idle_check
by checking the wait bit in the program status word.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 32b1ede..816d81f 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -112,6 +112,7 @@
 	DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
 	DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
 	DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
+	DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
 	DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
 	DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
 	DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
@@ -127,6 +128,7 @@
 	DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
 	DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce));
 	DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
+	DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
 	DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
 	DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
 	DEFINE(__LC_IRB, offsetof(struct _lowcore, irb));
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 07d8499..0e2b162 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -742,15 +742,14 @@
 
 	.globl mcck_int_handler
 mcck_int_handler:
-	stck	__LC_INT_CLOCK
+	stck	__LC_MCCK_CLOCK
 	spt	__LC_CPU_TIMER_SAVE_AREA	# revalidate cpu timer
 	lm	%r0,%r15,__LC_GPREGS_SAVE_AREA	# revalidate gprs
 	SAVE_ALL_BASE __LC_SAVE_AREA+32
 	la	%r12,__LC_MCK_OLD_PSW
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	bo	BASED(mcck_int_main)	# yes -> rest of mcck code invalid
-	mvc	__LC_SAVE_AREA+52(8),__LC_ASYNC_ENTER_TIMER
-	mvc	__LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA
+	mvc	__LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA
 	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
 	bo	BASED(1f)
 	la	%r14,__LC_SYNC_ENTER_TIMER
@@ -764,7 +763,7 @@
 	bl	BASED(0f)
 	la	%r14,__LC_LAST_UPDATE_TIMER
 0:	spt	0(%r14)
-	mvc	__LC_ASYNC_ENTER_TIMER(8),0(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 1:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	bno	BASED(mcck_int_main)	# no -> skip cleanup critical
 	tm	__LC_MCK_OLD_PSW+1,0x01	# test problem state bit
@@ -786,9 +785,9 @@
 	bno	BASED(mcck_no_vtime)	# no -> skip cleanup critical
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	bz	BASED(mcck_no_vtime)
-	UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
+	UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER
 mcck_no_vtime:
 	l	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
@@ -811,7 +810,6 @@
 mcck_return:
 	mvc	__LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW
 	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
-	mvc	__LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+52
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	bno	BASED(0f)
 	lm	%r0,%r15,SP_R0(%r15)	# load gprs 0-15
@@ -934,15 +932,16 @@
 
 cleanup_system_call:
 	mvc	__LC_RETURN_PSW(8),0(%r12)
-	c	%r12,BASED(.Lmck_old_psw)
-	be	BASED(0f)
-	la	%r12,__LC_SAVE_AREA+16
-	b	BASED(1f)
-0:	la	%r12,__LC_SAVE_AREA+32
-1:
 	clc	__LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4)
 	bh	BASED(0f)
+	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+	c	%r12,BASED(.Lmck_old_psw)
+	be	BASED(0f)
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0:	c	%r12,BASED(.Lmck_old_psw)
+	la	%r12,__LC_SAVE_AREA+32
+	be	BASED(0f)
+	la	%r12,__LC_SAVE_AREA+16
 0:	clc	__LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8)
 	bhe	BASED(cleanup_vtime)
 	clc	__LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn)
@@ -984,16 +983,19 @@
 cleanup_sysc_restore:
 	clc	4(4,%r12),BASED(cleanup_sysc_restore_insn)
 	be	BASED(2f)
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+	c	%r12,BASED(.Lmck_old_psw)
+	be	BASED(0f)
 	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	clc	4(4,%r12),BASED(cleanup_sysc_restore_insn+4)
+0:	clc	4(4,%r12),BASED(cleanup_sysc_restore_insn+4)
 	be	BASED(2f)
 	mvc	__LC_RETURN_PSW(8),SP_PSW(%r15)
 	c	%r12,BASED(.Lmck_old_psw)
-	bne	BASED(0f)
-	mvc	__LC_SAVE_AREA+32(16),SP_R12(%r15)
-	b	BASED(1f)
-0:	mvc	__LC_SAVE_AREA+16(16),SP_R12(%r15)
-1:	lm	%r0,%r11,SP_R0(%r15)
+	la	%r12,__LC_SAVE_AREA+32
+	be	BASED(1f)
+	la	%r12,__LC_SAVE_AREA+16
+1:	mvc	0(16,%r12),SP_R12(%r15)
+	lm	%r0,%r11,SP_R0(%r15)
 	l	%r15,SP_R15(%r15)
 2:	la	%r12,__LC_RETURN_PSW
 	br	%r14
@@ -1009,19 +1011,15 @@
 
 cleanup_io_restore:
 	clc	4(4,%r12),BASED(cleanup_io_restore_insn)
-	be	BASED(2f)
-	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	be	BASED(1f)
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
 	clc	4(4,%r12),BASED(cleanup_io_restore_insn+4)
-	be	BASED(2f)
+	be	BASED(1f)
 	mvc	__LC_RETURN_PSW(8),SP_PSW(%r15)
-	c	%r12,BASED(.Lmck_old_psw)
-	bne	BASED(0f)
 	mvc	__LC_SAVE_AREA+32(16),SP_R12(%r15)
-	b	BASED(1f)
-0:	mvc	__LC_SAVE_AREA+16(16),SP_R12(%r15)
-1:	lm	%r0,%r11,SP_R0(%r15)
+	lm	%r0,%r11,SP_R0(%r15)
 	l	%r15,SP_R15(%r15)
-2:	la	%r12,__LC_RETURN_PSW
+1:	la	%r12,__LC_RETURN_PSW
 	br	%r14
 cleanup_io_restore_insn:
 	.long	io_done - 4 + 0x80000000
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 860cea1..6536f5c 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -725,7 +725,7 @@
  */
 	.globl mcck_int_handler
 mcck_int_handler:
-	stck	__LC_INT_CLOCK
+	stck	__LC_MCCK_CLOCK
 	la	%r1,4095		# revalidate r1
 	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# revalidate cpu timer
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
@@ -734,8 +734,7 @@
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	mcck_int_main		# yes -> rest of mcck code invalid
 	la	%r14,4095
-	mvc	__LC_SAVE_AREA+104(8),__LC_ASYNC_ENTER_TIMER
-	mvc	__LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14)
 	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
 	jo	1f
 	la	%r14,__LC_SYNC_ENTER_TIMER
@@ -749,7 +748,7 @@
 	jl	0f
 	la	%r14,__LC_LAST_UPDATE_TIMER
 0:	spt	0(%r14)
-	mvc	__LC_ASYNC_ENTER_TIMER(8),0(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 1:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	jno	mcck_int_main		# no -> skip cleanup critical
 	tm	__LC_MCK_OLD_PSW+1,0x01 # test problem state bit
@@ -770,9 +769,9 @@
 	jno	mcck_no_vtime		# no -> no timer update
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	mcck_no_vtime
-	UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
+	UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER
 mcck_no_vtime:
 	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
@@ -794,7 +793,6 @@
 	mvc	__LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW
 	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
 	lmg	%r0,%r15,SP_R0(%r15)	# load gprs 0-15
-	mvc	__LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+104
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	jno	0f
 	stpt	__LC_EXIT_TIMER
@@ -909,15 +907,16 @@
 
 cleanup_system_call:
 	mvc	__LC_RETURN_PSW(16),0(%r12)
-	cghi	%r12,__LC_MCK_OLD_PSW
-	je	0f
-	la	%r12,__LC_SAVE_AREA+32
-	j	1f
-0:	la	%r12,__LC_SAVE_AREA+64
-1:
 	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8)
 	jh	0f
+	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+	cghi	%r12,__LC_MCK_OLD_PSW
+	je	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0:	cghi	%r12,__LC_MCK_OLD_PSW
+	la	%r12,__LC_SAVE_AREA+64
+	je	0f
+	la	%r12,__LC_SAVE_AREA+32
 0:	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16)
 	jhe	cleanup_vtime
 	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn)
@@ -958,19 +957,22 @@
 
 cleanup_sysc_restore:
 	clc	8(8,%r12),BASED(cleanup_sysc_restore_insn)
-	je	3f
+	je	2f
 	clc	8(8,%r12),BASED(cleanup_sysc_restore_insn+8)
 	jhe	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+	cghi	%r12,__LC_MCK_OLD_PSW
+	je	0f
 	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
 0:	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
 	cghi	%r12,__LC_MCK_OLD_PSW
-	jne	1f
-	mvc	__LC_SAVE_AREA+64(32),SP_R12(%r15)
-	j	2f
-1:	mvc	__LC_SAVE_AREA+32(32),SP_R12(%r15)
-2:	lmg	%r0,%r11,SP_R0(%r15)
+	la	%r12,__LC_SAVE_AREA+64
+	je	1f
+	la	%r12,__LC_SAVE_AREA+32
+1:	mvc	0(32,%r12),SP_R12(%r15)
+	lmg	%r0,%r11,SP_R0(%r15)
 	lg	%r15,SP_R15(%r15)
-3:	la	%r12,__LC_RETURN_PSW
+2:	la	%r12,__LC_RETURN_PSW
 	br	%r14
 cleanup_sysc_restore_insn:
 	.quad	sysc_done - 4
@@ -984,19 +986,15 @@
 
 cleanup_io_restore:
 	clc	8(8,%r12),BASED(cleanup_io_restore_insn)
-	je	3f
+	je	1f
 	clc	8(8,%r12),BASED(cleanup_io_restore_insn+8)
 	jhe	0f
-	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
 0:	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
-	cghi	%r12,__LC_MCK_OLD_PSW
-	jne	1f
 	mvc	__LC_SAVE_AREA+64(32),SP_R12(%r15)
-	j	2f
-1:	mvc	__LC_SAVE_AREA+32(32),SP_R12(%r15)
-2:	lmg	%r0,%r11,SP_R0(%r15)
+	lmg	%r0,%r11,SP_R0(%r15)
 	lg	%r15,SP_R15(%r15)
-3:	la	%r12,__LC_RETURN_PSW
+1:	la	%r12,__LC_RETURN_PSW
 	br	%r14
 cleanup_io_restore_insn:
 	.quad	io_done - 4
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 015e27d..ac15139 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -255,7 +255,8 @@
 	int umode;
 
 	nmi_enter();
-	s390_idle_check();
+	s390_idle_check(regs, S390_lowcore.mcck_clock,
+			S390_lowcore.mcck_enter_timer);
 
 	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
 	mcck = &__get_cpu_var(cpu_mcck);
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index 59618bc..9ce641b 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -120,7 +120,8 @@
 	struct pt_regs *old_regs;
 
 	old_regs = set_irq_regs(regs);
-	s390_idle_check();
+	s390_idle_check(regs, S390_lowcore.int_clock,
+			S390_lowcore.async_enter_timer);
 	irq_enter();
 	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
 		/* Serve timer interrupts first. */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index b59a812..3479f1b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -121,32 +121,35 @@
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
-void vtime_start_cpu(void)
+void vtime_start_cpu(__u64 int_clock, __u64 enter_timer)
 {
 	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
 	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
 	__u64 idle_time, expires;
 
+	if (idle->idle_enter == 0ULL)
+		return;
+
 	/* Account time spent with enabled wait psw loaded as idle time. */
-	idle_time = S390_lowcore.int_clock - idle->idle_enter;
+	idle_time = int_clock - idle->idle_enter;
 	account_idle_time(idle_time);
 	S390_lowcore.steal_timer +=
 		idle->idle_enter - S390_lowcore.last_update_clock;
-	S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+	S390_lowcore.last_update_clock = int_clock;
 
 	/* Account system time spent going idle. */
 	S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
-	S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
+	S390_lowcore.last_update_timer = enter_timer;
 
 	/* Restart vtime CPU timer */
 	if (vq->do_spt) {
 		/* Program old expire value but first save progress. */
-		expires = vq->idle - S390_lowcore.async_enter_timer;
+		expires = vq->idle - enter_timer;
 		expires += get_vtimer();
 		set_vtimer(expires);
 	} else {
 		/* Don't account the CPU timer delta while the cpu was idle. */
-		vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
+		vq->elapsed -= vq->idle - enter_timer;
 	}
 
 	idle->sequence++;