perf session: Allocate chunks of sample objects

The ordered sample code allocates singular reference objects struct
sample_queue which have 48byte size on 64bit and 20 bytes on 32bit. That's
silly. Allocate ~64k sized chunks and hand them out.

Performance gain: ~ 15%

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20101130163820.398713983@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9fef587..52672da 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -114,6 +114,7 @@
 	self->repipe = repipe;
 	INIT_LIST_HEAD(&self->ordered_samples.samples);
 	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
+	INIT_LIST_HEAD(&self->ordered_samples.to_free);
 	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
 
 	if (mode == O_RDONLY) {
@@ -403,10 +404,10 @@
 {
 	struct ordered_samples *os = &session->ordered_samples;
 
-	while (!list_empty(&os->sample_cache)) {
+	while (!list_empty(&os->to_free)) {
 		struct sample_queue *sq;
 
-		sq = list_entry(os->sample_cache.next, struct sample_queue, list);
+		sq = list_entry(os->to_free.next, struct sample_queue, list);
 		list_del(&sq->list);
 		free(sq);
 	}
@@ -538,10 +539,13 @@
 	}
 }
 
+#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))
+
 static int queue_sample_event(event_t *event, struct sample_data *data,
 			      struct perf_session *s)
 {
-	struct list_head *sc = &s->ordered_samples.sample_cache;
+	struct ordered_samples *os = &s->ordered_samples;
+	struct list_head *sc = &os->sample_cache;
 	u64 timestamp = data->time;
 	struct sample_queue *new;
 
@@ -553,10 +557,17 @@
 	if (!list_empty(sc)) {
 		new = list_entry(sc->next, struct sample_queue, list);
 		list_del(&new->list);
+	} else if (os->sample_buffer) {
+		new = os->sample_buffer + os->sample_buffer_idx;
+		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
+			os->sample_buffer = NULL;
 	} else {
-		new = malloc(sizeof(*new));
-		if (!new)
+		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
+		if (!os->sample_buffer)
 			return -ENOMEM;
+		list_add(&os->sample_buffer->list, &os->to_free);
+		os->sample_buffer_idx = 2;
+		new = os->sample_buffer + 1;
 	}
 
 	new->timestamp = timestamp;