x86, ptrace: new ptrace BTS API

Here's the new ptrace BTS API that supports two different overflow handling mechanisms (wrap-around and buffer-full-signal) to support two different use cases (debugging and profiling).

It further combines buffer allocation and configuration.

Opens:
- memory rlimit
- overflow signal

What would be the right signal to use?

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3e78c12..18972a3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -33,12 +33,6 @@
 
 
 /*
- * The maximal size of a BTS buffer per traced task in number of BTS
- * records.
- */
-#define PTRACE_BTS_BUFFER_MAX 4000
-
-/*
  * does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
  */
@@ -466,17 +460,12 @@
 	return 0;
 }
 
-static int ptrace_bts_max_buffer_size(void)
-{
-	return PTRACE_BTS_BUFFER_MAX;
-}
-
-static int ptrace_bts_get_buffer_size(struct task_struct *child)
+static int ptrace_bts_get_size(struct task_struct *child)
 {
 	if (!child->thread.ds_area_msr)
 		return -ENXIO;
 
-	return ds_get_bts_size((void *)child->thread.ds_area_msr);
+	return ds_get_bts_index((void *)child->thread.ds_area_msr);
 }
 
 static int ptrace_bts_read_record(struct task_struct *child,
@@ -485,7 +474,7 @@
 {
 	struct bts_struct ret;
 	int retval;
-	int bts_size;
+	int bts_end;
 	int bts_index;
 
 	if (!child->thread.ds_area_msr)
@@ -494,15 +483,15 @@
 	if (index < 0)
 		return -EINVAL;
 
-	bts_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
-	if (bts_size <= index)
+	bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
+	if (bts_end <= index)
 		return -EINVAL;
 
 	/* translate the ptrace bts index into the ds bts index */
 	bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
 	bts_index -= (index + 1);
 	if (bts_index < 0)
-		bts_index += bts_size;
+		bts_index += bts_end;
 
 	retval = ds_read_bts((void *)child->thread.ds_area_msr,
 			     bts_index, &ret);
@@ -530,19 +519,97 @@
 	return sizeof(*in);
 }
 
-static int ptrace_bts_config(struct task_struct *child,
-			     unsigned long options)
+static int ptrace_bts_clear(struct task_struct *child)
 {
-	unsigned long debugctl_mask = ds_debugctl_mask();
-	int retval;
-
-	retval = ptrace_bts_get_buffer_size(child);
-	if (retval < 0)
-		return retval;
-	if (retval == 0)
+	if (!child->thread.ds_area_msr)
 		return -ENXIO;
 
-	if (options & PTRACE_BTS_O_TRACE_TASK) {
+	return ds_clear((void *)child->thread.ds_area_msr);
+}
+
+static int ptrace_bts_drain(struct task_struct *child,
+			    struct bts_struct __user *out)
+{
+	int end, i;
+	void *ds = (void *)child->thread.ds_area_msr;
+
+	if (!ds)
+		return -ENXIO;
+
+	end = ds_get_bts_index(ds);
+	if (end <= 0)
+		return end;
+
+	for (i = 0; i < end; i++, out++) {
+		struct bts_struct ret;
+		int retval;
+
+		retval = ds_read_bts(ds, i, &ret);
+		if (retval < 0)
+			return retval;
+
+		if (copy_to_user(out, &ret, sizeof(ret)))
+			return -EFAULT;
+	}
+
+	ds_clear(ds);
+
+	return i;
+}
+
+static int ptrace_bts_config(struct task_struct *child,
+			     const struct ptrace_bts_config __user *ucfg)
+{
+	struct ptrace_bts_config cfg;
+	unsigned long debugctl_mask;
+	int bts_size, ret;
+	void *ds;
+
+	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
+		return -EFAULT;
+
+	bts_size = 0;
+	ds = (void *)child->thread.ds_area_msr;
+	if (ds) {
+		bts_size = ds_get_bts_size(ds);
+		if (bts_size < 0)
+			return bts_size;
+	}
+
+	if (bts_size != cfg.size) {
+		ret = ds_free((void **)&child->thread.ds_area_msr);
+		if (ret < 0)
+			return ret;
+
+		if (cfg.size > 0)
+			ret = ds_allocate((void **)&child->thread.ds_area_msr,
+					  cfg.size);
+		ds = (void *)child->thread.ds_area_msr;
+		if (ds)
+			set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+		else
+			clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+
+		if (ret < 0)
+			return ret;
+
+		bts_size = ds_get_bts_size(ds);
+		if (bts_size <= 0)
+			return bts_size;
+	}
+
+	if (ds) {
+		if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+			ret = ds_set_overflow(ds, DS_O_SIGNAL);
+		} else {
+			ret = ds_set_overflow(ds, DS_O_WRAP);
+		}
+		if (ret < 0)
+			return ret;
+	}
+
+	debugctl_mask = ds_debugctl_mask();
+	if (ds && (cfg.flags & PTRACE_BTS_O_TRACE)) {
 		child->thread.debugctlmsr |= debugctl_mask;
 		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 	} else {
@@ -555,7 +622,7 @@
 			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 	}
 
-	if (options & PTRACE_BTS_O_TIMESTAMPS)
+	if (ds && (cfg.flags & PTRACE_BTS_O_SCHED))
 		set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 	else
 		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
@@ -563,59 +630,32 @@
 	return 0;
 }
 
-static int ptrace_bts_status(struct task_struct *child)
+static int ptrace_bts_status(struct task_struct *child,
+			     struct ptrace_bts_config __user *ucfg)
 {
-	unsigned long debugctl_mask = ds_debugctl_mask();
-	int retval, status = 0;
+	void *ds = (void *)child->thread.ds_area_msr;
+	struct ptrace_bts_config cfg;
 
-	retval = ptrace_bts_get_buffer_size(child);
-	if (retval < 0)
-		return retval;
-	if (retval == 0)
-		return -ENXIO;
+	memset(&cfg, 0, sizeof(cfg));
 
-	if (ptrace_bts_get_buffer_size(child) <= 0)
-		return -ENXIO;
+	if (ds) {
+		cfg.size = ds_get_bts_size(ds);
 
-	if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
-	    child->thread.debugctlmsr & debugctl_mask)
-		status |= PTRACE_BTS_O_TRACE_TASK;
-	if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
-		status |= PTRACE_BTS_O_TIMESTAMPS;
+		if (ds_get_overflow(ds) == DS_O_SIGNAL)
+			cfg.flags |= PTRACE_BTS_O_SIGNAL;
 
-	return status;
-}
+		if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+		    child->thread.debugctlmsr & ds_debugctl_mask())
+			cfg.flags |= PTRACE_BTS_O_TRACE;
 
-static int ptrace_bts_allocate_bts(struct task_struct *child,
-				   int size_in_records)
-{
-	int retval = 0;
-	void *ds;
-
-	if (size_in_records < 0)
-		return -EINVAL;
-
-	if (size_in_records > ptrace_bts_max_buffer_size())
-		return -EINVAL;
-
-	if (size_in_records == 0) {
-		ptrace_bts_config(child, /* options = */ 0);
-	} else {
-		retval = ds_allocate(&ds, size_in_records);
-		if (retval)
-			return retval;
+		if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+			cfg.flags |= PTRACE_BTS_O_SCHED;
 	}
 
-	if (child->thread.ds_area_msr)
-		ds_free((void **)&child->thread.ds_area_msr);
+	if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
+		return -EFAULT;
 
-	child->thread.ds_area_msr = (unsigned long)ds;
-	if (child->thread.ds_area_msr)
-		set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-	else
-		clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
-	return retval;
+	return sizeof(cfg);
 }
 
 void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -626,9 +666,6 @@
 		.variant.jiffies = jiffies
 	};
 
-	if (ptrace_bts_get_buffer_size(tsk) <= 0)
-		return;
-
 	ptrace_bts_write_record(tsk, &rec);
 }
 
@@ -808,30 +845,32 @@
 		break;
 #endif
 
-	case PTRACE_BTS_MAX_BUFFER_SIZE:
-		ret = ptrace_bts_max_buffer_size();
-		break;
-
-	case PTRACE_BTS_ALLOCATE_BUFFER:
-		ret = ptrace_bts_allocate_bts(child, data);
-		break;
-
-	case PTRACE_BTS_GET_BUFFER_SIZE:
-		ret = ptrace_bts_get_buffer_size(child);
-		break;
-
-	case PTRACE_BTS_READ_RECORD:
-		ret = ptrace_bts_read_record
-			(child, data,
-			 (struct bts_struct __user *) addr);
-		break;
-
 	case PTRACE_BTS_CONFIG:
-		ret = ptrace_bts_config(child, data);
+		ret = ptrace_bts_config
+			(child, (struct ptrace_bts_config __user *)addr);
 		break;
 
 	case PTRACE_BTS_STATUS:
-		ret = ptrace_bts_status(child);
+		ret = ptrace_bts_status
+			(child, (struct ptrace_bts_config __user *)addr);
+		break;
+
+	case PTRACE_BTS_SIZE:
+		ret = ptrace_bts_get_size(child);
+		break;
+
+	case PTRACE_BTS_GET:
+		ret = ptrace_bts_read_record
+			(child, data, (struct bts_struct __user *) addr);
+		break;
+
+	case PTRACE_BTS_CLEAR:
+		ret = ptrace_bts_clear(child);
+		break;
+
+	case PTRACE_BTS_DRAIN:
+		ret = ptrace_bts_drain
+			(child, (struct bts_struct __user *) addr);
 		break;
 
 	default:
@@ -1017,12 +1056,12 @@
 	case PTRACE_SETOPTIONS:
 	case PTRACE_SET_THREAD_AREA:
 	case PTRACE_GET_THREAD_AREA:
-	case PTRACE_BTS_MAX_BUFFER_SIZE:
-	case PTRACE_BTS_ALLOCATE_BUFFER:
-	case PTRACE_BTS_GET_BUFFER_SIZE:
-	case PTRACE_BTS_READ_RECORD:
 	case PTRACE_BTS_CONFIG:
 	case PTRACE_BTS_STATUS:
+	case PTRACE_BTS_SIZE:
+	case PTRACE_BTS_GET:
+	case PTRACE_BTS_CLEAR:
+	case PTRACE_BTS_DRAIN:
 		return sys_ptrace(request, pid, addr, data);
 
 	default: