rcu: Have the RCU tracepoints use the tracepoint_string infrastructure
Currently, RCU tracepoints save only a pointer to strings in the
ring buffer. When displayed via the /sys/kernel/debug/tracing/trace file
they are referenced like the printf "%s" that looks at the address
in the ring buffer and prints out the string it points too. This requires
that the strings are constant and persistent in the kernel.
The problem with this is for tools like trace-cmd and perf that read the
binary data from the buffers but have no access to the kernel memory to
find out what string is represented by the address in the buffer.
By using the tracepoint_string infrastructure, the RCU tracepoint strings
can be exported such that userspace tools can map the addresses to
the strings.
# cat /sys/kernel/debug/tracing/printk_formats
0xffffffff81a4a0e8 : "rcu_preempt"
0xffffffff81a4a0f4 : "rcu_bh"
0xffffffff81a4a100 : "rcu_sched"
0xffffffff818437a0 : "cpuqs"
0xffffffff818437a6 : "rcu_sched"
0xffffffff818437a0 : "cpuqs"
0xffffffff818437b0 : "rcu_bh"
0xffffffff818437b7 : "Start context switch"
0xffffffff818437cc : "End context switch"
0xffffffff818437a0 : "cpuqs"
[...]
Now userspaces tools can display:
rcu_utilization: Start context switch
rcu_dyntick: Start 1 0
rcu_utilization: End context switch
rcu_batch_start: rcu_preempt CBs=0/5 bl=10
rcu_dyntick: End 0 140000000000000
rcu_invoke_callback: rcu_preempt rhp=0xffff880071c0d600 func=proc_i_callback
rcu_invoke_callback: rcu_preempt rhp=0xffff880077b5b230 func=__d_free
rcu_dyntick: Start 140000000000000 0
rcu_invoke_callback: rcu_preempt rhp=0xffff880077563980 func=file_free_rcu
rcu_batch_end: rcu_preempt CBs-invoked=3 idle=>c<>c<>c<>c<
rcu_utilization: End RCU core
rcu_grace_period: rcu_preempt 9741 start
rcu_dyntick: Start 1 0
rcu_dyntick: End 0 140000000000000
rcu_dyntick: Start 140000000000000 0
Instead of:
rcu_utilization: ffffffff81843110
rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f32
rcu_batch_start: ffffffff81842f1d CBs=0/4 bl=10
rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f3c
rcu_grace_period: ffffffff81842f1d 9939 ffffffff81842f80
rcu_invoke_callback: ffffffff81842f1d rhp=0xffff88007888aac0 func=file_free_rcu
rcu_grace_period: ffffffff81842f1d 9939 ffffffff81842f95
rcu_invoke_callback: ffffffff81842f1d rhp=0xffff88006aeb4600 func=proc_i_callback
rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f32
rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f3c
rcu_invoke_callback: ffffffff81842f1d rhp=0xffff880071cb9fc0 func=__d_free
rcu_grace_period: ffffffff81842f1d 9939 ffffffff81842f80
rcu_invoke_callback: ffffffff81842f1d rhp=0xffff88007888ae80 func=file_free_rcu
rcu_batch_end: ffffffff81842f1d CBs-invoked=4 idle=>c<>c<>c<>c<
rcu_utilization: ffffffff8184311f
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 97994a3..338f1d1 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -53,18 +53,36 @@
#include <linux/delay.h>
#include <linux/stop_machine.h>
#include <linux/random.h>
+#include <linux/ftrace_event.h>
#include "rcutree.h"
#include <trace/events/rcu.h>
#include "rcu.h"
+/*
+ * Strings used in tracepoints need to be exported via the
+ * tracing system such that tools like perf and trace-cmd can
+ * translate the string address pointers to actual text.
+ */
+#define TPS(x) tracepoint_string(x)
+
/* Data structures. */
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
+/*
+ * In order to export the rcu_state name to the tracing tools, it
+ * needs to be added in the __tracepoint_string section.
+ * This requires defining a separate variable tp_<sname>_varname
+ * that points to the string being used, and this will allow
+ * the tracing userspace tools to be able to decipher the string
+ * address to the matching string.
+ */
#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
+static char sname##_varname[] = #sname; \
+static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
struct rcu_state sname##_state = { \
.level = { &sname##_state.node[0] }, \
.call = cr, \
@@ -76,7 +94,7 @@
.orphan_donetail = &sname##_state.orphan_donelist, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
.onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
- .name = #sname, \
+ .name = sname##_varname, \
.abbr = sabbr, \
}; \
DEFINE_PER_CPU(struct rcu_data, sname##_data)
@@ -176,7 +194,7 @@
struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
if (rdp->passed_quiesce == 0)
- trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
+ trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
rdp->passed_quiesce = 1;
}
@@ -185,7 +203,7 @@
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
if (rdp->passed_quiesce == 0)
- trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
+ trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
rdp->passed_quiesce = 1;
}
@@ -196,10 +214,10 @@
*/
void rcu_note_context_switch(int cpu)
{
- trace_rcu_utilization("Start context switch");
+ trace_rcu_utilization(TPS("Start context switch"));
rcu_sched_qs(cpu);
rcu_preempt_note_context_switch(cpu);
- trace_rcu_utilization("End context switch");
+ trace_rcu_utilization(TPS("End context switch"));
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -343,11 +361,11 @@
static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
bool user)
{
- trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
+ trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
if (!user && !is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());
- trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
+ trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
@@ -477,7 +495,7 @@
rdtp->dynticks_nesting--;
WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
if (rdtp->dynticks_nesting)
- trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
+ trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
else
rcu_eqs_enter_common(rdtp, oldval, true);
local_irq_restore(flags);
@@ -499,11 +517,11 @@
smp_mb__after_atomic_inc(); /* See above. */
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
rcu_cleanup_after_idle(smp_processor_id());
- trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
+ trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
if (!user && !is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());
- trace_rcu_dyntick("Error on exit: not idle task",
+ trace_rcu_dyntick(TPS("Error on exit: not idle task"),
oldval, rdtp->dynticks_nesting);
ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
@@ -618,7 +636,7 @@
rdtp->dynticks_nesting++;
WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
if (oldval)
- trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
+ trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
else
rcu_eqs_exit_common(rdtp, oldval, true);
local_irq_restore(flags);
@@ -773,7 +791,7 @@
* of the current RCU grace period.
*/
if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
- trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
+ trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
rdp->dynticks_fqs++;
return 1;
}
@@ -793,7 +811,7 @@
return 0; /* Grace period is not old enough. */
barrier();
if (cpu_is_offline(rdp->cpu)) {
- trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
+ trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
rdp->offline_fqs++;
return 1;
}
@@ -1056,9 +1074,9 @@
* grace period is already marked as needed, return to the caller.
*/
c = rcu_cbs_completed(rdp->rsp, rnp);
- trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
+ trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
if (rnp->need_future_gp[c & 0x1]) {
- trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
+ trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
return c;
}
@@ -1072,7 +1090,7 @@
if (rnp->gpnum != rnp->completed ||
ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
rnp->need_future_gp[c & 0x1]++;
- trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
+ trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
return c;
}
@@ -1100,7 +1118,7 @@
* recorded, trace and leave.
*/
if (rnp_root->need_future_gp[c & 0x1]) {
- trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
+ trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
goto unlock_out;
}
@@ -1109,9 +1127,9 @@
/* If a grace period is not already in progress, start one. */
if (rnp_root->gpnum != rnp_root->completed) {
- trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
+ trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
} else {
- trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
+ trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
}
unlock_out:
@@ -1135,7 +1153,8 @@
rcu_nocb_gp_cleanup(rsp, rnp);
rnp->need_future_gp[c & 0x1] = 0;
needmore = rnp->need_future_gp[(c + 1) & 0x1];
- trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
+ trace_rcu_future_gp(rnp, rdp, c,
+ needmore ? TPS("CleanupMore") : TPS("Cleanup"));
return needmore;
}
@@ -1203,9 +1222,9 @@
/* Trace depending on how much we were able to accelerate. */
if (!*rdp->nxttail[RCU_WAIT_TAIL])
- trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
+ trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
else
- trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
+ trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
}
/*
@@ -1271,7 +1290,7 @@
/* Remember that we saw this grace-period completion. */
rdp->completed = rnp->completed;
- trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
+ trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
}
if (rdp->gpnum != rnp->gpnum) {
@@ -1281,7 +1300,7 @@
* go looking for one.
*/
rdp->gpnum = rnp->gpnum;
- trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
+ trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
rdp->passed_quiesce = 0;
rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
zero_cpu_stall_ticks(rdp);
@@ -1324,7 +1343,7 @@
/* Advance to a new grace period and initialize state. */
rsp->gpnum++;
- trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
+ trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
record_gp_stall_check_time(rsp);
raw_spin_unlock_irq(&rnp->lock);
@@ -1446,7 +1465,7 @@
rcu_nocb_gp_set(rnp, nocb);
rsp->completed = rsp->gpnum; /* Declare grace period done. */
- trace_rcu_grace_period(rsp->name, rsp->completed, "end");
+ trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
rsp->fqs_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
@@ -1855,7 +1874,7 @@
RCU_TRACE(mask = rdp->grpmask);
trace_rcu_grace_period(rsp->name,
rnp->gpnum + 1 - !!(rnp->qsmask & mask),
- "cpuofl");
+ TPS("cpuofl"));
}
/*
@@ -2042,7 +2061,7 @@
*/
void rcu_check_callbacks(int cpu, int user)
{
- trace_rcu_utilization("Start scheduler-tick");
+ trace_rcu_utilization(TPS("Start scheduler-tick"));
increment_cpu_stall_ticks();
if (user || rcu_is_cpu_rrupt_from_idle()) {
@@ -2075,7 +2094,7 @@
rcu_preempt_check_callbacks(cpu);
if (rcu_pending(cpu))
invoke_rcu_core();
- trace_rcu_utilization("End scheduler-tick");
+ trace_rcu_utilization(TPS("End scheduler-tick"));
}
/*
@@ -2206,10 +2225,10 @@
if (cpu_is_offline(smp_processor_id()))
return;
- trace_rcu_utilization("Start RCU core");
+ trace_rcu_utilization(TPS("Start RCU core"));
for_each_rcu_flavor(rsp)
__rcu_process_callbacks(rsp);
- trace_rcu_utilization("End RCU core");
+ trace_rcu_utilization(TPS("End RCU core"));
}
/*
@@ -2950,7 +2969,7 @@
rdp->completed = rnp->completed;
rdp->passed_quiesce = 0;
rdp->qs_pending = 0;
- trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
+ trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
}
raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
rnp = rnp->parent;
@@ -2980,7 +2999,7 @@
struct rcu_node *rnp = rdp->mynode;
struct rcu_state *rsp;
- trace_rcu_utilization("Start CPU hotplug");
+ trace_rcu_utilization(TPS("Start CPU hotplug"));
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
@@ -3009,7 +3028,7 @@
default:
break;
}
- trace_rcu_utilization("End CPU hotplug");
+ trace_rcu_utilization(TPS("End CPU hotplug"));
return NOTIFY_OK;
}