powerpc64: port of the function graph tracer

This is a port of the function graph tracer that was written by
Frederic Weisbecker for the x86.

This only works for PPC64 at the moment and only for static tracing.
PPC32 and dynamic function graph tracing support will come later.

The trace produces a visual calling of functions:

 # tracer: function_graph
 #
 # CPU  DURATION                  FUNCTION CALLS
 # |     |   |                     |   |   |   |
  0)   2.224 us    |                        }
  0) ! 271.024 us  |                      }
  0) ! 320.080 us  |                    }
  0) ! 324.656 us  |                  }
  0) ! 329.136 us  |                }
  0)               |                .put_prev_task_fair() {
  0)               |                  .update_curr() {
  0)   2.240 us    |                    .update_min_vruntime();
  0)   6.512 us    |                  }
  0)   2.528 us    |                  .__enqueue_entity();
  0) + 15.536 us   |                }
  0)               |                .pick_next_task_fair() {
  0)   2.032 us    |                  .__pick_next_entity();
  0)   2.064 us    |                  .__clear_buddies();
  0)               |                  .set_next_entity() {
  0)   2.672 us    |                    .__dequeue_entity();
  0)   6.864 us    |                  }

Geoff Lavand tested on PS3.

Tested-by: Geoff Levand <geoffrey.levand@am.sony.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 383ed6e..a32699e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -931,13 +931,65 @@
 	ld	r5,0(r5)
 	mtctr	r5
 	bctrl
-
 	nop
+
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	b	ftrace_graph_caller
+#endif
 	ld	r0, 128(r1)
 	mtlr	r0
 	addi	r1, r1, 112
 _GLOBAL(ftrace_stub)
 	blr
 
-#endif
-#endif
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ftrace_graph_caller:
+	/* load r4 with local address */
+	ld	r4, 128(r1)
+	subi	r4, r4, MCOUNT_INSN_SIZE
+
+	/* get the parent address */
+	ld	r11, 112(r1)
+	addi	r3, r11, 16
+
+	bl	.prepare_ftrace_return
+	nop
+
+	ld	r0, 128(r1)
+	mtlr	r0
+	addi	r1, r1, 112
+	blr
+
+_GLOBAL(return_to_handler)
+	/* need to save return values */
+	std	r4,  -32(r1)
+	std	r3,  -24(r1)
+	/* save TOC */
+	std	r2,  -16(r1)
+	std	r31, -8(r1)
+	mr	r31, r1
+	stdu	r1, -112(r1)
+
+	/* update the TOC */
+	LOAD_REG_IMMEDIATE(r4,ftrace_return_to_handler)
+	ld	r2, 8(r4)
+
+	bl	.ftrace_return_to_handler
+	nop
+
+	/* return value has real return address */
+	mtlr	r3
+
+	ld	r1, 0(r1)
+	ld	r4,  -32(r1)
+	ld	r3,  -24(r1)
+	ld	r2,  -16(r1)
+	ld	r31, -8(r1)
+
+	/* Jump back to real return address */
+	blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif /* CONFIG_FUNCTION_TRACER */