s390: add support for runtime instrumentation
Allow user-space threads to use runtime instrumentation (RI). To enable RI
for a thread there is a new s390 specific system call, sys_s390_runtime_instr,
that takes as parameter a realtime signal number. If the RI facility is
available the system call sets up a control block for the calling thread with
the appropriate permissions for the thread to modify the control block.
The user-space thread can then use the store and modify RI instructions to
alter the control block and start/stop the instrumentation via RION/RIOFF.
If the user specified program buffer runs full RI triggers an external
interrupt. The external interrupt is translated to a real-time signal that
is delivered to the thread that enabled RI on that CPU. The number of
the real-time signal is the number specified in the RI system call. So,
user-space can select any available real-time signal number in case the
application itself uses real-time signals for other purposes.
The kernel saves the RI control blocks on task switch only if the running
thread was enabled for RI. Therefore, the performance impact on task switch
should be negligible if RI is not used.
RI is only enabled for user-space mode and is disabled for the supervisor
state.
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9a111b6..bba01bf 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -49,7 +49,7 @@
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o
-obj-$(CONFIG_64BIT) += cache.o
+obj-$(CONFIG_64BIT) += runtime_instr.o cache.o
# Kexec part
S390_KEXEC_OBJS := machine_kexec.o
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 2d82cfc..5f7928f 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1646,3 +1646,8 @@
llgf %r0,164(%r15) # unsigned long
stg %r0,160(%r15)
jg compat_sys_process_vm_writev
+
+ENTRY(sys_s390_runtime_instr_wrapper)
+ lgfr %r2,%r2 # int
+ lgfr %r3,%r3 # int
+ jg sys_s390_runtime_instr
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 84fd7e9..4bc67db 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -315,6 +315,8 @@
LONG_INSN_POPCNT,
LONG_INSN_RISBHG,
LONG_INSN_RISBLG,
+ LONG_INSN_RINEXT,
+ LONG_INSN_RIEMIT,
};
static char *long_insn_name[] = {
@@ -330,6 +332,8 @@
[LONG_INSN_POPCNT] = "popcnt",
[LONG_INSN_RISBHG] = "risbhg",
[LONG_INSN_RISBLG] = "risblk",
+ [LONG_INSN_RINEXT] = "rinext",
+ [LONG_INSN_RIEMIT] = "riemit",
};
static struct insn opcode[] = {
@@ -582,6 +586,17 @@
{ "", 0, INSTR_INVALID }
};
+static struct insn opcode_aa[] = {
+#ifdef CONFIG_64BIT
+ { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
+ { "rion", 0x01, INSTR_RI_RI },
+ { "tric", 0x02, INSTR_RI_RI },
+ { "rioff", 0x03, INSTR_RI_RI },
+ { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI },
+#endif
+ { "", 0, INSTR_INVALID }
+};
+
static struct insn opcode_b2[] = {
#ifdef CONFIG_64BIT
{ "sske", 0x2b, INSTR_RRF_M0RR },
@@ -1210,6 +1225,9 @@
{ "cliy", 0x55, INSTR_SIY_URD },
{ "oiy", 0x56, INSTR_SIY_URD },
{ "xiy", 0x57, INSTR_SIY_URD },
+ { "lric", 0x60, INSTR_RSY_RDRM },
+ { "stric", 0x61, INSTR_RSY_RDRM },
+ { "mric", 0x62, INSTR_RSY_RDRM },
{ "icmh", 0x80, INSTR_RSE_RURD },
{ "icmh", 0x80, INSTR_RSY_RURD },
{ "icmy", 0x81, INSTR_RSY_RURD },
@@ -1408,6 +1426,9 @@
case 0xa7:
table = opcode_a7;
break;
+ case 0xaa:
+ table = opcode_aa;
+ break;
case 0xb2:
table = opcode_b2;
break;
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index a22fdca..c1b4493 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -44,6 +44,7 @@
{.name = "IUC", .desc = "[EXT] IUCV" },
{.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling" },
{.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter" },
+ {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI" },
{.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" },
{.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" },
{.name = "DAS", .desc = "[I/O] DASD" },
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7331753..cb4328e 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -30,6 +30,7 @@
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -132,6 +133,7 @@
*/
void exit_thread(void)
{
+ exit_thread_runtime_instr();
}
void flush_thread(void)
@@ -170,6 +172,11 @@
/* Save access registers to new thread structure. */
save_access_regs(&p->thread.acrs[0]);
+ /* Don't copy runtime instrumentation info */
+ p->thread.ri_cb = NULL;
+ p->thread.ri_signum = 0;
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
+
#ifndef CONFIG_64BIT
/*
* save fprs to current->thread.fp_regs to merge them with
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
new file mode 100644
index 0000000..e27e23d
--- /dev/null
+++ b/arch/s390/kernel/runtime_instr.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright IBM Corp. 2012
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <asm/runtime_instr.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+/* empty control block to disable RI by loading it */
+struct runtime_instr_cb runtime_instr_empty_cb;
+
+static int runtime_instr_avail(void)
+{
+ return test_facility(64);
+}
+
+static void disable_runtime_instr(void)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+
+ load_runtime_instr_cb(&runtime_instr_empty_cb);
+
+ /*
+ * Make sure the RI bit is deleted from the PSW. If the user did not
+ * switch off RI before the system call the process will get a
+ * specification exception otherwise.
+ */
+ regs->psw.mask &= ~PSW_MASK_RI;
+}
+
+static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ cb->buf_limit = 0xfff;
+ if (addressing_mode == HOME_SPACE_MODE)
+ cb->home_space = 1;
+ cb->int_requested = 1;
+ cb->pstate = 1;
+ cb->pstate_set_buf = 1;
+ cb->pstate_sample = 1;
+ cb->pstate_collect = 1;
+ cb->key = PAGE_DEFAULT_KEY;
+ cb->valid = 1;
+}
+
+void exit_thread_runtime_instr(void)
+{
+ struct task_struct *task = current;
+
+ if (!task->thread.ri_cb)
+ return;
+ disable_runtime_instr();
+ kfree(task->thread.ri_cb);
+ task->thread.ri_signum = 0;
+ task->thread.ri_cb = NULL;
+}
+
+static void runtime_instr_int_handler(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct siginfo info;
+
+ if (!(param32 & CPU_MF_INT_RI_MASK))
+ return;
+
+ kstat_cpu(smp_processor_id()).irqs[EXTINT_CMR]++;
+
+ if (!current->thread.ri_cb)
+ return;
+ if (current->thread.ri_signum < SIGRTMIN ||
+ current->thread.ri_signum > SIGRTMAX) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = current->thread.ri_signum;
+ info.si_code = SI_QUEUE;
+ if (param32 & CPU_MF_INT_RI_BUF_FULL)
+ info.si_int = ENOBUFS;
+ else if (param32 & CPU_MF_INT_RI_HALTED)
+ info.si_int = ECANCELED;
+ else
+ return; /* unknown reason */
+
+ send_sig_info(current->thread.ri_signum, &info, current);
+}
+
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+{
+ struct runtime_instr_cb *cb;
+
+ if (!runtime_instr_avail())
+ return -EOPNOTSUPP;
+
+ if (command == S390_RUNTIME_INSTR_STOP) {
+ preempt_disable();
+ exit_thread_runtime_instr();
+ preempt_enable();
+ return 0;
+ }
+
+ if (command != S390_RUNTIME_INSTR_START ||
+ (signum < SIGRTMIN || signum > SIGRTMAX))
+ return -EINVAL;
+
+ if (!current->thread.ri_cb) {
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb)
+ return -ENOMEM;
+ } else {
+ cb = current->thread.ri_cb;
+ memset(cb, 0, sizeof(*cb));
+ }
+
+ init_runtime_instr_cb(cb);
+ current->thread.ri_signum = signum;
+
+ /* now load the control block to make it available */
+ preempt_disable();
+ current->thread.ri_cb = cb;
+ load_runtime_instr_cb(cb);
+ preempt_enable();
+ return 0;
+}
+
+static int __init runtime_instr_init(void)
+{
+ int rc;
+
+ if (!runtime_instr_avail())
+ return 0;
+
+ measurement_alert_subclass_register();
+ rc = register_external_interrupt(0x1407, runtime_instr_int_handler);
+ if (rc)
+ measurement_alert_subclass_unregister();
+ else
+ pr_info("Runtime instrumentation facility initialized\n");
+ return rc;
+}
+device_initcall(runtime_instr_init);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index bcab2f0..539f0a7 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -350,3 +350,4 @@
SYSCALL(sys_setns,sys_setns,sys_setns_wrapper)
SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */
SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper)
+SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper)