| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  *  Parisc performance counters | 
 | 3 |  *  Copyright (C) 2001 Randolph Chung <tausq@debian.org> | 
 | 4 |  * | 
 | 5 |  *  This code is derived, with permission, from HP/UX sources. | 
 | 6 |  * | 
 | 7 |  *    This program is free software; you can redistribute it and/or modify | 
 | 8 |  *    it under the terms of the GNU General Public License as published by | 
 | 9 |  *    the Free Software Foundation; either version 2, or (at your option) | 
 | 10 |  *    any later version. | 
 | 11 |  * | 
 | 12 |  *    This program is distributed in the hope that it will be useful, | 
 | 13 |  *    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 14 |  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 | 15 |  *    GNU General Public License for more details. | 
 | 16 |  * | 
 | 17 |  *    You should have received a copy of the GNU General Public License | 
 | 18 |  *    along with this program; if not, write to the Free Software | 
 | 19 |  *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
 | 20 |  */ | 
 | 21 |  | 
 | 22 | /* | 
 | 23 |  *  Edited comment from original sources: | 
 | 24 |  * | 
 | 25 |  *  This driver programs the PCX-U/PCX-W performance counters | 
 | 26 |  *  on the PA-RISC 2.0 chips.  The driver keeps all images now | 
 | 27 |  *  internally to the kernel to hopefully eliminate the possiblity | 
 | 28 |  *  of a bad image halting the CPU.  Also, there are different | 
 | 29 |  *  images for the PCX-W and later chips vs the PCX-U chips. | 
 | 30 |  * | 
 | 31 |  *  Only 1 process is allowed to access the driver at any time, | 
 | 32 |  *  so the only protection that is needed is at open and close. | 
 | 33 |  *  A variable "perf_enabled" is used to hold the state of the | 
 | 34 |  *  driver.  The spinlock "perf_lock" is used to protect the | 
 | 35 |  *  modification of the state during open/close operations so | 
 | 36 |  *  multiple processes don't get into the driver simultaneously. | 
 | 37 |  * | 
 | 38 |  *  This driver accesses the processor directly vs going through | 
 | 39 |  *  the PDC INTRIGUE calls.  This is done to eliminate bugs introduced | 
 | 40 |  *  in various PDC revisions.  The code is much more maintainable | 
 | 41 |  *  and reliable this way vs having to debug on every version of PDC | 
 | 42 |  *  on every box.  | 
 | 43 |  */ | 
 | 44 |  | 
| Randy Dunlap | a941564 | 2006-01-11 12:17:48 -0800 | [diff] [blame] | 45 | #include <linux/capability.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 46 | #include <linux/init.h> | 
 | 47 | #include <linux/proc_fs.h> | 
 | 48 | #include <linux/miscdevice.h> | 
| Arnd Bergmann | b691750 | 2008-05-20 19:16:31 +0200 | [diff] [blame] | 49 | #include <linux/smp_lock.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 50 | #include <linux/spinlock.h> | 
 | 51 |  | 
 | 52 | #include <asm/uaccess.h> | 
 | 53 | #include <asm/perf.h> | 
 | 54 | #include <asm/parisc-device.h> | 
 | 55 | #include <asm/processor.h> | 
 | 56 | #include <asm/runway.h> | 
 | 57 | #include <asm/io.h>		/* for __raw_read() */ | 
 | 58 |  | 
 | 59 | #include "perf_images.h" | 
 | 60 |  | 
 | 61 | #define MAX_RDR_WORDS	24 | 
 | 62 | #define PERF_VERSION	2	/* derived from hpux's PI v2 interface */ | 
 | 63 |  | 
 | 64 | /* definition of RDR regs */ | 
 | 65 | struct rdr_tbl_ent { | 
 | 66 | 	uint16_t	width; | 
 | 67 | 	uint8_t		num_words; | 
 | 68 | 	uint8_t		write_control; | 
 | 69 | }; | 
 | 70 |  | 
| Helge Deller | 8039de1 | 2006-01-10 20:35:03 -0500 | [diff] [blame] | 71 | static int perf_processor_interface __read_mostly = UNKNOWN_INTF; | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 72 | static int perf_enabled __read_mostly; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 73 | static spinlock_t perf_lock; | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 74 | struct parisc_device *cpu_device __read_mostly; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 75 |  | 
 | 76 | /* RDRs to write for PCX-W */ | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 77 | static const int perf_rdrs_W[] = | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 78 | 	{ 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; | 
 | 79 |  | 
 | 80 | /* RDRs to write for PCX-U */ | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 81 | static const int perf_rdrs_U[] = | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 82 | 	{ 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; | 
 | 83 |  | 
 | 84 | /* RDR register descriptions for PCX-W */ | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 85 | static const struct rdr_tbl_ent perf_rdr_tbl_W[] = { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 86 | 	{ 19,	1,	8 },   /* RDR 0 */ | 
 | 87 | 	{ 16,	1,	16 },  /* RDR 1 */ | 
 | 88 | 	{ 72,	2,	0 },   /* RDR 2 */ | 
 | 89 | 	{ 81,	2,	0 },   /* RDR 3 */ | 
 | 90 | 	{ 328,	6,	0 },   /* RDR 4 */ | 
 | 91 | 	{ 160,	3,	0 },   /* RDR 5 */ | 
 | 92 | 	{ 336,	6,	0 },   /* RDR 6 */ | 
 | 93 | 	{ 164,	3,	0 },   /* RDR 7 */ | 
 | 94 | 	{ 0,	0,	0 },   /* RDR 8 */ | 
 | 95 | 	{ 35,	1,	0 },   /* RDR 9 */ | 
 | 96 | 	{ 6,	1,	0 },   /* RDR 10 */ | 
 | 97 | 	{ 18,	1,	0 },   /* RDR 11 */ | 
 | 98 | 	{ 13,	1,	0 },   /* RDR 12 */ | 
 | 99 | 	{ 8,	1,	0 },   /* RDR 13 */ | 
 | 100 | 	{ 8,	1,	0 },   /* RDR 14 */ | 
 | 101 | 	{ 8,	1,	0 },   /* RDR 15 */ | 
 | 102 | 	{ 1530,	24,	0 },   /* RDR 16 */ | 
 | 103 | 	{ 16,	1,	0 },   /* RDR 17 */ | 
 | 104 | 	{ 4,	1,	0 },   /* RDR 18 */ | 
 | 105 | 	{ 0,	0,	0 },   /* RDR 19 */ | 
 | 106 | 	{ 152,	3,	24 },  /* RDR 20 */ | 
 | 107 | 	{ 152,	3,	24 },  /* RDR 21 */ | 
 | 108 | 	{ 233,	4,	48 },  /* RDR 22 */ | 
 | 109 | 	{ 233,	4,	48 },  /* RDR 23 */ | 
 | 110 | 	{ 71,	2,	0 },   /* RDR 24 */ | 
 | 111 | 	{ 71,	2,	0 },   /* RDR 25 */ | 
 | 112 | 	{ 11,	1,	0 },   /* RDR 26 */ | 
 | 113 | 	{ 18,	1,	0 },   /* RDR 27 */ | 
 | 114 | 	{ 128,	2,	0 },   /* RDR 28 */ | 
 | 115 | 	{ 0,	0,	0 },   /* RDR 29 */ | 
 | 116 | 	{ 16,	1,	0 },   /* RDR 30 */ | 
 | 117 | 	{ 16,	1,	0 },   /* RDR 31 */ | 
 | 118 | }; | 
 | 119 |  | 
 | 120 | /* RDR register descriptions for PCX-U */ | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 121 | static const struct rdr_tbl_ent perf_rdr_tbl_U[] = { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 122 | 	{ 19,	1,	8 },              /* RDR 0 */ | 
 | 123 | 	{ 32,	1,	16 },             /* RDR 1 */ | 
 | 124 | 	{ 20,	1,	0 },              /* RDR 2 */ | 
 | 125 | 	{ 0,	0,	0 },              /* RDR 3 */ | 
 | 126 | 	{ 344,	6,	0 },              /* RDR 4 */ | 
 | 127 | 	{ 176,	3,	0 },              /* RDR 5 */ | 
 | 128 | 	{ 336,	6,	0 },              /* RDR 6 */ | 
 | 129 | 	{ 0,	0,	0 },              /* RDR 7 */ | 
 | 130 | 	{ 0,	0,	0 },              /* RDR 8 */ | 
 | 131 | 	{ 0,	0,	0 },              /* RDR 9 */ | 
 | 132 | 	{ 28,	1,	0 },              /* RDR 10 */ | 
 | 133 | 	{ 33,	1,	0 },              /* RDR 11 */ | 
 | 134 | 	{ 0,	0,	0 },              /* RDR 12 */ | 
 | 135 | 	{ 230,	4,	0 },              /* RDR 13 */ | 
 | 136 | 	{ 32,	1,	0 },              /* RDR 14 */ | 
 | 137 | 	{ 128,	2,	0 },              /* RDR 15 */ | 
 | 138 | 	{ 1494,	24,	0 },              /* RDR 16 */ | 
 | 139 | 	{ 18,	1,	0 },              /* RDR 17 */ | 
 | 140 | 	{ 4,	1,	0 },              /* RDR 18 */ | 
 | 141 | 	{ 0,	0,	0 },              /* RDR 19 */ | 
 | 142 | 	{ 158,	3,	24 },             /* RDR 20 */ | 
 | 143 | 	{ 158,	3,	24 },             /* RDR 21 */ | 
 | 144 | 	{ 194,	4,	48 },             /* RDR 22 */ | 
 | 145 | 	{ 194,	4,	48 },             /* RDR 23 */ | 
 | 146 | 	{ 71,	2,	0 },              /* RDR 24 */ | 
 | 147 | 	{ 71,	2,	0 },              /* RDR 25 */ | 
 | 148 | 	{ 28,	1,	0 },              /* RDR 26 */ | 
 | 149 | 	{ 33,	1,	0 },              /* RDR 27 */ | 
 | 150 | 	{ 88,	2,	0 },              /* RDR 28 */ | 
 | 151 | 	{ 32,	1,	0 },              /* RDR 29 */ | 
 | 152 | 	{ 24,	1,	0 },              /* RDR 30 */ | 
 | 153 | 	{ 16,	1,	0 },              /* RDR 31 */ | 
 | 154 | }; | 
 | 155 |  | 
 | 156 | /* | 
 | 157 |  * A non-zero write_control in the above tables is a byte offset into | 
 | 158 |  * this array. | 
 | 159 |  */ | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 160 | static const uint64_t perf_bitmasks[] = { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 161 | 	0x0000000000000000ul,     /* first dbl word must be zero */ | 
 | 162 | 	0xfdffe00000000000ul,     /* RDR0 bitmask */ | 
 | 163 | 	0x003f000000000000ul,     /* RDR1 bitmask */ | 
 | 164 | 	0x00fffffffffffffful,     /* RDR20-RDR21 bitmask (152 bits) */ | 
 | 165 | 	0xfffffffffffffffful, | 
 | 166 | 	0xfffffffc00000000ul, | 
 | 167 | 	0xfffffffffffffffful,     /* RDR22-RDR23 bitmask (233 bits) */ | 
 | 168 | 	0xfffffffffffffffful, | 
 | 169 | 	0xfffffffffffffffcul, | 
 | 170 | 	0xff00000000000000ul | 
 | 171 | }; | 
 | 172 |  | 
 | 173 | /* | 
 | 174 |  * Write control bitmasks for Pa-8700 processor given | 
| Simon Arlott | 7022672 | 2007-05-11 20:42:34 +0100 | [diff] [blame] | 175 |  * some things have changed slightly. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 176 |  */ | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 177 | static const uint64_t perf_bitmasks_piranha[] = { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 178 | 	0x0000000000000000ul,     /* first dbl word must be zero */ | 
 | 179 | 	0xfdffe00000000000ul,     /* RDR0 bitmask */ | 
 | 180 | 	0x003f000000000000ul,     /* RDR1 bitmask */ | 
 | 181 | 	0x00fffffffffffffful,     /* RDR20-RDR21 bitmask (158 bits) */ | 
 | 182 | 	0xfffffffffffffffful, | 
 | 183 | 	0xfffffffc00000000ul, | 
 | 184 | 	0xfffffffffffffffful,     /* RDR22-RDR23 bitmask (210 bits) */ | 
 | 185 | 	0xfffffffffffffffful, | 
 | 186 | 	0xfffffffffffffffful, | 
 | 187 | 	0xfffc000000000000ul | 
 | 188 | }; | 
 | 189 |  | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 190 | static const uint64_t *bitmask_array;   /* array of bitmasks to use */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 191 |  | 
 | 192 | /****************************************************************************** | 
 | 193 |  * Function Prototypes | 
 | 194 |  *****************************************************************************/ | 
 | 195 | static int perf_config(uint32_t *image_ptr); | 
 | 196 | static int perf_release(struct inode *inode, struct file *file); | 
 | 197 | static int perf_open(struct inode *inode, struct file *file); | 
 | 198 | static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); | 
 | 199 | static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,  | 
 | 200 | 	loff_t *ppos); | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 201 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 202 | static void perf_start_counters(void); | 
 | 203 | static int perf_stop_counters(uint32_t *raddr); | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 204 | static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 205 | static int perf_rdr_read_ubuf(uint32_t	rdr_num, uint64_t *buffer); | 
 | 206 | static int perf_rdr_clear(uint32_t rdr_num); | 
 | 207 | static int perf_write_image(uint64_t *memaddr); | 
 | 208 | static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer); | 
 | 209 |  | 
 | 210 | /* External Assembly Routines */ | 
 | 211 | extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width); | 
 | 212 | extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width); | 
 | 213 | extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer); | 
 | 214 | extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer); | 
 | 215 | extern void perf_intrigue_enable_perf_counters (void); | 
 | 216 | extern void perf_intrigue_disable_perf_counters (void); | 
 | 217 |  | 
 | 218 | /****************************************************************************** | 
 | 219 |  * Function Definitions | 
 | 220 |  *****************************************************************************/ | 
 | 221 |  | 
 | 222 |  | 
 | 223 | /* | 
 | 224 |  * configure: | 
 | 225 |  * | 
 | 226 |  * Configure the cpu with a given data image.  First turn off the counters,  | 
 | 227 |  * then download the image, then turn the counters back on. | 
 | 228 |  */ | 
 | 229 | static int perf_config(uint32_t *image_ptr) | 
 | 230 | { | 
 | 231 | 	long error; | 
 | 232 | 	uint32_t raddr[4]; | 
 | 233 |  | 
 | 234 | 	/* Stop the counters*/ | 
 | 235 | 	error = perf_stop_counters(raddr); | 
 | 236 | 	if (error != 0) { | 
 | 237 | 		printk("perf_config: perf_stop_counters = %ld\n", error); | 
 | 238 | 		return -EINVAL;  | 
 | 239 | 	} | 
 | 240 |  | 
 | 241 | printk("Preparing to write image\n"); | 
 | 242 | 	/* Write the image to the chip */ | 
 | 243 | 	error = perf_write_image((uint64_t *)image_ptr); | 
 | 244 | 	if (error != 0) { | 
 | 245 | 		printk("perf_config: DOWNLOAD = %ld\n", error); | 
 | 246 | 		return -EINVAL;  | 
 | 247 | 	} | 
 | 248 |  | 
 | 249 | printk("Preparing to start counters\n"); | 
 | 250 |  | 
 | 251 | 	/* Start the counters */ | 
 | 252 | 	perf_start_counters(); | 
 | 253 |  | 
 | 254 | 	return sizeof(uint32_t); | 
 | 255 | } | 
 | 256 |  | 
 | 257 | /* | 
 | 258 |  * Open the device and initialize all of its memory.  The device is only  | 
 | 259 |  * opened once, but can be "queried" by multiple processes that know its | 
 | 260 |  * file descriptor. | 
 | 261 |  */ | 
 | 262 | static int perf_open(struct inode *inode, struct file *file) | 
 | 263 | { | 
| Arnd Bergmann | b691750 | 2008-05-20 19:16:31 +0200 | [diff] [blame] | 264 | 	lock_kernel(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 265 | 	spin_lock(&perf_lock); | 
 | 266 | 	if (perf_enabled) { | 
 | 267 | 		spin_unlock(&perf_lock); | 
| Arnd Bergmann | b691750 | 2008-05-20 19:16:31 +0200 | [diff] [blame] | 268 | 		unlock_kernel(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 269 | 		return -EBUSY; | 
 | 270 | 	} | 
 | 271 | 	perf_enabled = 1; | 
 | 272 |  	spin_unlock(&perf_lock); | 
| Arnd Bergmann | b691750 | 2008-05-20 19:16:31 +0200 | [diff] [blame] | 273 | 	unlock_kernel(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 274 |  | 
 | 275 | 	return 0; | 
 | 276 | } | 
 | 277 |  | 
 | 278 | /* | 
 | 279 |  * Close the device. | 
 | 280 |  */ | 
 | 281 | static int perf_release(struct inode *inode, struct file *file) | 
 | 282 | { | 
 | 283 | 	spin_lock(&perf_lock); | 
 | 284 | 	perf_enabled = 0; | 
 | 285 | 	spin_unlock(&perf_lock); | 
 | 286 |  | 
 | 287 | 	return 0; | 
 | 288 | } | 
 | 289 |  | 
 | 290 | /* | 
 | 291 |  * Read does nothing for this driver | 
 | 292 |  */ | 
 | 293 | static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos) | 
 | 294 | { | 
 | 295 | 	return 0; | 
 | 296 | } | 
 | 297 |  | 
 | 298 | /* | 
 | 299 |  * write: | 
 | 300 |  * | 
 | 301 |  * This routine downloads the image to the chip.  It must be | 
 | 302 |  * called on the processor that the download should happen | 
 | 303 |  * on. | 
 | 304 |  */ | 
 | 305 | static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,  | 
 | 306 | 	loff_t *ppos) | 
 | 307 | { | 
 | 308 | 	int err; | 
 | 309 | 	size_t image_size; | 
 | 310 | 	uint32_t image_type; | 
 | 311 | 	uint32_t interface_type; | 
 | 312 | 	uint32_t test; | 
 | 313 |  | 
 | 314 | 	if (perf_processor_interface == ONYX_INTF)  | 
 | 315 | 		image_size = PCXU_IMAGE_SIZE; | 
 | 316 | 	else if (perf_processor_interface == CUDA_INTF)  | 
 | 317 | 		image_size = PCXW_IMAGE_SIZE; | 
 | 318 | 	else  | 
 | 319 | 		return -EFAULT; | 
 | 320 |  | 
 | 321 | 	if (!capable(CAP_SYS_ADMIN)) | 
 | 322 | 		return -EACCES; | 
 | 323 |  | 
 | 324 | 	if (count != sizeof(uint32_t)) | 
 | 325 | 		return -EIO; | 
 | 326 |  | 
 | 327 | 	if ((err = copy_from_user(&image_type, buf, sizeof(uint32_t))) != 0)  | 
 | 328 | 		return err; | 
 | 329 |  | 
 | 330 | 	/* Get the interface type and test type */ | 
 | 331 |    	interface_type = (image_type >> 16) & 0xffff; | 
 | 332 | 	test           = (image_type & 0xffff); | 
 | 333 |  | 
 | 334 | 	/* Make sure everything makes sense */ | 
 | 335 |  | 
 | 336 | 	/* First check the machine type is correct for | 
 | 337 | 	   the requested image */ | 
 | 338 |         if (((perf_processor_interface == CUDA_INTF) && | 
 | 339 | 		       (interface_type != CUDA_INTF)) || | 
 | 340 | 	    ((perf_processor_interface == ONYX_INTF) && | 
 | 341 | 	               (interface_type != ONYX_INTF)))  | 
 | 342 | 		return -EINVAL; | 
 | 343 |  | 
 | 344 | 	/* Next check to make sure the requested image | 
 | 345 | 	   is valid */ | 
 | 346 | 	if (((interface_type == CUDA_INTF) &&  | 
 | 347 | 		       (test >= MAX_CUDA_IMAGES)) || | 
 | 348 | 	    ((interface_type == ONYX_INTF) &&  | 
 | 349 | 		       (test >= MAX_ONYX_IMAGES)))  | 
 | 350 | 		return -EINVAL; | 
 | 351 |  | 
 | 352 | 	/* Copy the image into the processor */ | 
 | 353 | 	if (interface_type == CUDA_INTF)  | 
 | 354 | 		return perf_config(cuda_images[test]); | 
 | 355 | 	else | 
 | 356 | 		return perf_config(onyx_images[test]); | 
 | 357 |  | 
 | 358 | 	return count; | 
 | 359 | } | 
 | 360 |  | 
 | 361 | /* | 
 | 362 |  * Patch the images that need to know the IVA addresses. | 
 | 363 |  */ | 
 | 364 | static void perf_patch_images(void) | 
 | 365 | { | 
 | 366 | #if 0 /* FIXME!! */ | 
 | 367 | /*  | 
 | 368 |  * NOTE:  this routine is VERY specific to the current TLB image. | 
 | 369 |  * If the image is changed, this routine might also need to be changed. | 
 | 370 |  */ | 
 | 371 | 	extern void $i_itlb_miss_2_0(); | 
 | 372 | 	extern void $i_dtlb_miss_2_0(); | 
 | 373 | 	extern void PA2_0_iva(); | 
 | 374 |  | 
 | 375 | 	/*  | 
 | 376 | 	 * We can only use the lower 32-bits, the upper 32-bits should be 0 | 
 | 377 | 	 * anyway given this is in the kernel  | 
 | 378 | 	 */ | 
 | 379 | 	uint32_t itlb_addr  = (uint32_t)&($i_itlb_miss_2_0); | 
 | 380 | 	uint32_t dtlb_addr  = (uint32_t)&($i_dtlb_miss_2_0); | 
 | 381 | 	uint32_t IVAaddress = (uint32_t)&PA2_0_iva; | 
 | 382 |  | 
 | 383 | 	if (perf_processor_interface == ONYX_INTF) { | 
 | 384 | 		/* clear last 2 bytes */ | 
 | 385 | 		onyx_images[TLBMISS][15] &= 0xffffff00;   | 
 | 386 | 		/* set 2 bytes */ | 
 | 387 | 		onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); | 
 | 388 | 		onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; | 
 | 389 | 		onyx_images[TLBMISS][17] = itlb_addr; | 
 | 390 |  | 
 | 391 | 		/* clear last 2 bytes */ | 
 | 392 | 		onyx_images[TLBHANDMISS][15] &= 0xffffff00;   | 
 | 393 | 		/* set 2 bytes */ | 
 | 394 | 		onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); | 
 | 395 | 		onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; | 
 | 396 | 		onyx_images[TLBHANDMISS][17] = itlb_addr; | 
 | 397 |  | 
 | 398 | 		/* clear last 2 bytes */ | 
 | 399 | 		onyx_images[BIG_CPI][15] &= 0xffffff00;   | 
 | 400 | 		/* set 2 bytes */ | 
 | 401 | 		onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); | 
 | 402 | 		onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; | 
 | 403 | 		onyx_images[BIG_CPI][17] = itlb_addr; | 
 | 404 |  | 
 | 405 | 	    onyx_images[PANIC][15] &= 0xffffff00;  /* clear last 2 bytes */ | 
 | 406 | 	 	onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */ | 
 | 407 | 		onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00; | 
 | 408 |  | 
 | 409 |  | 
 | 410 | 	} else if (perf_processor_interface == CUDA_INTF) { | 
 | 411 | 		/* Cuda interface */ | 
 | 412 | 		cuda_images[TLBMISS][16] =   | 
 | 413 | 			(cuda_images[TLBMISS][16]&0xffff0000) | | 
 | 414 | 			((dtlb_addr >> 8)&0x0000ffff); | 
 | 415 | 		cuda_images[TLBMISS][17] =  | 
 | 416 | 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); | 
 | 417 | 		cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; | 
 | 418 |  | 
 | 419 | 		cuda_images[TLBHANDMISS][16] =  | 
 | 420 | 			(cuda_images[TLBHANDMISS][16]&0xffff0000) | | 
 | 421 | 			((dtlb_addr >> 8)&0x0000ffff); | 
 | 422 | 		cuda_images[TLBHANDMISS][17] =  | 
 | 423 | 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); | 
 | 424 | 		cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; | 
 | 425 |  | 
 | 426 | 		cuda_images[BIG_CPI][16] =  | 
 | 427 | 			(cuda_images[BIG_CPI][16]&0xffff0000) | | 
 | 428 | 			((dtlb_addr >> 8)&0x0000ffff); | 
 | 429 | 		cuda_images[BIG_CPI][17] =  | 
 | 430 | 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); | 
 | 431 | 		cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; | 
 | 432 | 	} else { | 
 | 433 | 		/* Unknown type */ | 
 | 434 | 	} | 
 | 435 | #endif | 
 | 436 | } | 
 | 437 |  | 
 | 438 |  | 
 | 439 | /* | 
 | 440 |  * ioctl routine | 
 | 441 |  * All routines effect the processor that they are executed on.  Thus you  | 
 | 442 |  * must be running on the processor that you wish to change. | 
 | 443 |  */ | 
 | 444 |  | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 445 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 446 | { | 
 | 447 | 	long error_start; | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 448 | 	uint32_t raddr[4]; | 
 | 449 | 	int error = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 450 |  | 
 | 451 | 	switch (cmd) { | 
 | 452 |  | 
 | 453 | 	    case PA_PERF_ON: | 
 | 454 | 			/* Start the counters */ | 
 | 455 | 			perf_start_counters(); | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 456 | 			break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 457 |  | 
 | 458 | 	    case PA_PERF_OFF: | 
 | 459 | 			error_start = perf_stop_counters(raddr); | 
 | 460 | 			if (error_start != 0) { | 
 | 461 | 				printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start); | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 462 | 				error = -EFAULT; | 
 | 463 | 				break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 464 | 			} | 
 | 465 |  | 
 | 466 | 			/* copy out the Counters */ | 
 | 467 | 			if (copy_to_user((void __user *)arg, raddr,  | 
 | 468 | 					sizeof (raddr)) != 0) { | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 469 | 				error =  -EFAULT; | 
 | 470 | 				break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 471 | 			} | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 472 | 			break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 473 |  | 
 | 474 | 	    case PA_PERF_VERSION: | 
 | 475 |   	  		/* Return the version # */ | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 476 | 			error = put_user(PERF_VERSION, (int *)arg); | 
 | 477 | 			break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 478 |  | 
 | 479 | 	    default: | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 480 |   	 		error = -ENOTTY; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 481 | 	} | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 482 |  | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 483 | 	return error; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 484 | } | 
 | 485 |  | 
| Arjan van de Ven | 5dfe4c9 | 2007-02-12 00:55:31 -0800 | [diff] [blame] | 486 | static const struct file_operations perf_fops = { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 487 | 	.llseek = no_llseek, | 
 | 488 | 	.read = perf_read, | 
 | 489 | 	.write = perf_write, | 
| Christoph Hellwig | ad7dd33 | 2005-11-17 16:40:31 -0500 | [diff] [blame] | 490 | 	.unlocked_ioctl = perf_ioctl, | 
 | 491 | 	.compat_ioctl = perf_ioctl, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 492 | 	.open = perf_open, | 
 | 493 | 	.release = perf_release | 
 | 494 | }; | 
 | 495 | 	 | 
 | 496 | static struct miscdevice perf_dev = { | 
 | 497 | 	MISC_DYNAMIC_MINOR, | 
 | 498 | 	PA_PERF_DEV, | 
 | 499 | 	&perf_fops | 
 | 500 | }; | 
 | 501 |  | 
 | 502 | /* | 
 | 503 |  * Initialize the module | 
 | 504 |  */ | 
 | 505 | static int __init perf_init(void) | 
 | 506 | { | 
 | 507 | 	int ret; | 
 | 508 |  | 
 | 509 | 	/* Determine correct processor interface to use */ | 
 | 510 | 	bitmask_array = perf_bitmasks; | 
 | 511 |  | 
 | 512 | 	if (boot_cpu_data.cpu_type == pcxu || | 
 | 513 | 	    boot_cpu_data.cpu_type == pcxu_) { | 
 | 514 | 		perf_processor_interface = ONYX_INTF; | 
 | 515 | 	} else if (boot_cpu_data.cpu_type == pcxw || | 
 | 516 | 		 boot_cpu_data.cpu_type == pcxw_ || | 
 | 517 | 		 boot_cpu_data.cpu_type == pcxw2 || | 
| Kyle McMartin | 2cbd42d | 2007-03-27 16:47:49 -0400 | [diff] [blame] | 518 | 		 boot_cpu_data.cpu_type == mako || | 
 | 519 | 		 boot_cpu_data.cpu_type == mako2) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 520 | 		perf_processor_interface = CUDA_INTF; | 
 | 521 | 		if (boot_cpu_data.cpu_type == pcxw2 || | 
| Kyle McMartin | 2cbd42d | 2007-03-27 16:47:49 -0400 | [diff] [blame] | 522 | 		    boot_cpu_data.cpu_type == mako || | 
 | 523 | 		    boot_cpu_data.cpu_type == mako2) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 524 | 			bitmask_array = perf_bitmasks_piranha; | 
 | 525 | 	} else { | 
 | 526 | 		perf_processor_interface = UNKNOWN_INTF; | 
 | 527 | 		printk("Performance monitoring counters not supported on this processor\n"); | 
 | 528 | 		return -ENODEV; | 
 | 529 | 	} | 
 | 530 |  | 
 | 531 | 	ret = misc_register(&perf_dev); | 
 | 532 | 	if (ret) { | 
 | 533 | 		printk(KERN_ERR "Performance monitoring counters: " | 
 | 534 | 			"cannot register misc device.\n"); | 
 | 535 | 		return ret; | 
 | 536 | 	} | 
 | 537 |  | 
 | 538 | 	/* Patch the images to match the system */ | 
 | 539 |     	perf_patch_images(); | 
 | 540 |  | 
 | 541 | 	spin_lock_init(&perf_lock); | 
 | 542 |  | 
 | 543 | 	/* TODO: this only lets us access the first cpu.. what to do for SMP? */ | 
| Helge Deller | ef017be | 2008-12-31 03:12:10 +0000 | [diff] [blame] | 544 | 	cpu_device = per_cpu(cpu_data, 0).dev; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 545 | 	printk("Performance monitoring counters enabled for %s\n", | 
| Helge Deller | ef017be | 2008-12-31 03:12:10 +0000 | [diff] [blame] | 546 | 		per_cpu(cpu_data, 0).dev->name); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 547 |  | 
 | 548 | 	return 0; | 
 | 549 | } | 
 | 550 |  | 
 | 551 | /* | 
 | 552 |  * perf_start_counters(void) | 
 | 553 |  * | 
 | 554 |  * Start the counters. | 
 | 555 |  */ | 
 | 556 | static void perf_start_counters(void) | 
 | 557 | { | 
 | 558 | 	/* Enable performance monitor counters */ | 
 | 559 | 	perf_intrigue_enable_perf_counters(); | 
 | 560 | } | 
 | 561 |  | 
 | 562 | /* | 
 | 563 |  * perf_stop_counters | 
 | 564 |  * | 
 | 565 |  * Stop the performance counters and save counts | 
 | 566 |  * in a per_processor array. | 
 | 567 |  */ | 
 | 568 | static int perf_stop_counters(uint32_t *raddr) | 
 | 569 | { | 
 | 570 | 	uint64_t userbuf[MAX_RDR_WORDS]; | 
 | 571 |  | 
 | 572 | 	/* Disable performance counters */ | 
 | 573 | 	perf_intrigue_disable_perf_counters(); | 
 | 574 |  | 
 | 575 | 	if (perf_processor_interface == ONYX_INTF) { | 
 | 576 | 		uint64_t tmp64; | 
 | 577 | 		/* | 
 | 578 | 		 * Read the counters | 
 | 579 | 		 */ | 
 | 580 | 		if (!perf_rdr_read_ubuf(16, userbuf)) | 
 | 581 | 			return -13; | 
 | 582 |  | 
| Simon Arlott | 7022672 | 2007-05-11 20:42:34 +0100 | [diff] [blame] | 583 | 		/* Counter0 is bits 1398 to 1429 */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 584 | 		tmp64 =  (userbuf[21] << 22) & 0x00000000ffc00000; | 
 | 585 | 		tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff; | 
 | 586 | 		/* OR sticky0 (bit 1430) to counter0 bit 32 */ | 
 | 587 | 		tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000; | 
 | 588 | 		raddr[0] = (uint32_t)tmp64; | 
 | 589 |  | 
| Simon Arlott | 7022672 | 2007-05-11 20:42:34 +0100 | [diff] [blame] | 590 | 		/* Counter1 is bits 1431 to 1462 */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 591 | 		tmp64 =  (userbuf[22] >> 9) & 0x00000000ffffffff; | 
 | 592 | 		/* OR sticky1 (bit 1463) to counter1 bit 32 */ | 
 | 593 | 		tmp64 |= (userbuf[22] << 23) & 0x0000000080000000; | 
 | 594 | 		raddr[1] = (uint32_t)tmp64; | 
 | 595 |  | 
| Simon Arlott | 7022672 | 2007-05-11 20:42:34 +0100 | [diff] [blame] | 596 | 		/* Counter2 is bits 1464 to 1495 */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 597 | 		tmp64 =  (userbuf[22] << 24) & 0x00000000ff000000; | 
 | 598 | 		tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff; | 
 | 599 | 		/* OR sticky2 (bit 1496) to counter2 bit 32 */ | 
 | 600 | 		tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; | 
 | 601 | 		raddr[2] = (uint32_t)tmp64; | 
 | 602 | 		 | 
| Simon Arlott | 7022672 | 2007-05-11 20:42:34 +0100 | [diff] [blame] | 603 | 		/* Counter3 is bits 1497 to 1528 */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 604 | 		tmp64 =  (userbuf[23] >> 7) & 0x00000000ffffffff; | 
 | 605 | 		/* OR sticky3 (bit 1529) to counter3 bit 32 */ | 
 | 606 | 		tmp64 |= (userbuf[23] << 25) & 0x0000000080000000; | 
 | 607 | 		raddr[3] = (uint32_t)tmp64; | 
 | 608 |  | 
 | 609 | 		/* | 
 | 610 | 		 * Zero out the counters | 
 | 611 | 		 */ | 
 | 612 |  | 
 | 613 | 		/* | 
 | 614 | 		 * The counters and sticky-bits comprise the last 132 bits | 
 | 615 | 		 * (1398 - 1529) of RDR16 on a U chip.  We'll zero these | 
 | 616 | 		 * out the easy way: zero out last 10 bits of dword 21, | 
 | 617 | 		 * all of dword 22 and 58 bits (plus 6 don't care bits) of | 
 | 618 | 		 * dword 23. | 
 | 619 | 		 */ | 
 | 620 | 		userbuf[21] &= 0xfffffffffffffc00ul;	/* 0 to last 10 bits */ | 
 | 621 | 		userbuf[22] = 0; | 
 | 622 | 		userbuf[23] = 0; | 
 | 623 |  | 
 | 624 | 		/*  | 
| Simon Arlott | 7022672 | 2007-05-11 20:42:34 +0100 | [diff] [blame] | 625 | 		 * Write back the zeroed bytes + the image given | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 626 | 		 * the read was destructive. | 
 | 627 | 		 */ | 
 | 628 | 		perf_rdr_write(16, userbuf); | 
 | 629 | 	} else { | 
 | 630 |  | 
 | 631 | 		/* | 
 | 632 | 		 * Read RDR-15 which contains the counters and sticky bits  | 
 | 633 | 		 */ | 
 | 634 | 		if (!perf_rdr_read_ubuf(15, userbuf)) { | 
 | 635 | 			return -13; | 
 | 636 | 		} | 
 | 637 |  | 
 | 638 | 		/*  | 
 | 639 | 		 * Clear out the counters | 
 | 640 | 		 */ | 
 | 641 | 		perf_rdr_clear(15); | 
 | 642 |  | 
 | 643 | 		/* | 
 | 644 | 		 * Copy the counters  | 
 | 645 | 		 */ | 
 | 646 | 		raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL); | 
 | 647 | 		raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL); | 
 | 648 | 		raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); | 
 | 649 | 		raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); | 
 | 650 | 	} | 
 | 651 |   | 
 | 652 | 	return 0; | 
 | 653 | } | 
 | 654 |  | 
 | 655 | /* | 
 | 656 |  * perf_rdr_get_entry | 
 | 657 |  * | 
 | 658 |  * Retrieve a pointer to the description of what this | 
 | 659 |  * RDR contains. | 
 | 660 |  */ | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 661 | static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 662 | { | 
 | 663 | 	if (perf_processor_interface == ONYX_INTF) { | 
 | 664 | 		return &perf_rdr_tbl_U[rdr_num]; | 
 | 665 | 	} else { | 
 | 666 | 		return &perf_rdr_tbl_W[rdr_num]; | 
 | 667 | 	} | 
 | 668 | } | 
 | 669 |  | 
 | 670 | /* | 
 | 671 |  * perf_rdr_read_ubuf | 
 | 672 |  * | 
 | 673 |  * Read the RDR value into the buffer specified. | 
 | 674 |  */ | 
 | 675 | static int perf_rdr_read_ubuf(uint32_t	rdr_num, uint64_t *buffer) | 
 | 676 | { | 
 | 677 | 	uint64_t	data, data_mask = 0; | 
 | 678 | 	uint32_t	width, xbits, i; | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 679 | 	const struct rdr_tbl_ent *tentry; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 680 |  | 
 | 681 | 	tentry = perf_rdr_get_entry(rdr_num); | 
 | 682 | 	if ((width = tentry->width) == 0) | 
 | 683 | 		return 0; | 
 | 684 |  | 
 | 685 | 	/* Clear out buffer */ | 
 | 686 | 	i = tentry->num_words; | 
 | 687 | 	while (i--) { | 
 | 688 | 		buffer[i] = 0; | 
 | 689 | 	}	 | 
 | 690 |  | 
 | 691 | 	/* Check for bits an even number of 64 */ | 
 | 692 | 	if ((xbits = width & 0x03f) != 0) { | 
 | 693 | 		data_mask = 1; | 
 | 694 | 		data_mask <<= (64 - xbits); | 
 | 695 | 		data_mask--; | 
 | 696 | 	} | 
 | 697 |  | 
 | 698 | 	/* Grab all of the data */ | 
 | 699 | 	i = tentry->num_words; | 
 | 700 | 	while (i--) { | 
 | 701 |  | 
 | 702 | 		if (perf_processor_interface == ONYX_INTF) { | 
 | 703 | 			data = perf_rdr_shift_in_U(rdr_num, width); | 
 | 704 | 		} else { | 
 | 705 | 			data = perf_rdr_shift_in_W(rdr_num, width); | 
 | 706 | 		} | 
 | 707 | 		if (xbits) { | 
 | 708 | 			buffer[i] |= (data << (64 - xbits)); | 
 | 709 | 			if (i) { | 
 | 710 | 				buffer[i-1] |= ((data >> xbits) & data_mask); | 
 | 711 | 			} | 
 | 712 | 		} else { | 
 | 713 | 			buffer[i] = data; | 
 | 714 | 		} | 
 | 715 | 	} | 
 | 716 |  | 
 | 717 | 	return 1; | 
 | 718 | } | 
 | 719 |  | 
 | 720 | /* | 
 | 721 |  * perf_rdr_clear | 
 | 722 |  * | 
 | 723 |  * Zero out the given RDR register | 
 | 724 |  */ | 
 | 725 | static int perf_rdr_clear(uint32_t	rdr_num) | 
 | 726 | { | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 727 | 	const struct rdr_tbl_ent *tentry; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 728 | 	int32_t		i; | 
 | 729 |  | 
 | 730 | 	tentry = perf_rdr_get_entry(rdr_num); | 
 | 731 |  | 
 | 732 | 	if (tentry->width == 0) { | 
 | 733 | 		return -1; | 
 | 734 | 	} | 
 | 735 |  | 
 | 736 | 	i = tentry->num_words; | 
 | 737 | 	while (i--) { | 
 | 738 | 		if (perf_processor_interface == ONYX_INTF) { | 
 | 739 | 			perf_rdr_shift_out_U(rdr_num, 0UL); | 
 | 740 | 		} else { | 
 | 741 | 			perf_rdr_shift_out_W(rdr_num, 0UL); | 
 | 742 | 		} | 
 | 743 | 	} | 
 | 744 |  | 
 | 745 | 	return 0; | 
 | 746 | } | 
 | 747 |  | 
 | 748 |  | 
 | 749 | /* | 
 | 750 |  * perf_write_image | 
 | 751 |  * | 
 | 752 |  * Write the given image out to the processor | 
 | 753 |  */ | 
 | 754 | static int perf_write_image(uint64_t *memaddr) | 
 | 755 | { | 
 | 756 | 	uint64_t buffer[MAX_RDR_WORDS]; | 
 | 757 | 	uint64_t *bptr; | 
 | 758 | 	uint32_t dwords; | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 759 | 	const uint32_t *intrigue_rdr; | 
 | 760 | 	const uint64_t *intrigue_bitmask; | 
 | 761 | 	uint64_t tmp64; | 
| Matthew Wilcox | 53f01bb | 2005-10-21 22:36:40 -0400 | [diff] [blame] | 762 | 	void __iomem *runway; | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 763 | 	const struct rdr_tbl_ent *tentry; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 764 | 	int i; | 
 | 765 |  | 
 | 766 | 	/* Clear out counters */ | 
 | 767 | 	if (perf_processor_interface == ONYX_INTF) { | 
 | 768 |  | 
 | 769 | 		perf_rdr_clear(16); | 
 | 770 |  | 
 | 771 | 		/* Toggle performance monitor */ | 
 | 772 | 		perf_intrigue_enable_perf_counters(); | 
 | 773 | 		perf_intrigue_disable_perf_counters(); | 
 | 774 |  | 
 | 775 | 		intrigue_rdr = perf_rdrs_U; | 
 | 776 | 	} else { | 
 | 777 | 		perf_rdr_clear(15); | 
 | 778 | 		intrigue_rdr = perf_rdrs_W; | 
 | 779 | 	} | 
 | 780 |  | 
 | 781 | 	/* Write all RDRs */ | 
 | 782 | 	while (*intrigue_rdr != -1) { | 
 | 783 | 		tentry = perf_rdr_get_entry(*intrigue_rdr); | 
 | 784 | 		perf_rdr_read_ubuf(*intrigue_rdr, buffer); | 
 | 785 | 		bptr   = &buffer[0]; | 
 | 786 | 		dwords = tentry->num_words; | 
 | 787 | 		if (tentry->write_control) { | 
 | 788 | 			intrigue_bitmask = &bitmask_array[tentry->write_control >> 3]; | 
 | 789 | 			while (dwords--) { | 
 | 790 | 				tmp64 = *intrigue_bitmask & *memaddr++; | 
 | 791 | 				tmp64 |= (~(*intrigue_bitmask++)) & *bptr; | 
 | 792 | 				*bptr++ = tmp64; | 
 | 793 | 			} | 
 | 794 | 		} else { | 
 | 795 | 			while (dwords--) { | 
 | 796 | 				*bptr++ = *memaddr++; | 
 | 797 | 			} | 
 | 798 | 		} | 
 | 799 |  | 
 | 800 | 		perf_rdr_write(*intrigue_rdr, buffer); | 
 | 801 | 		intrigue_rdr++; | 
 | 802 | 	} | 
 | 803 |  | 
 | 804 | 	/* | 
 | 805 | 	 * Now copy out the Runway stuff which is not in RDRs | 
 | 806 | 	 */ | 
 | 807 |  | 
 | 808 | 	if (cpu_device == NULL) | 
 | 809 | 	{ | 
 | 810 | 		printk(KERN_ERR "write_image: cpu_device not yet initialized!\n"); | 
 | 811 | 		return -1; | 
 | 812 | 	} | 
 | 813 |  | 
| Helge Deller | 5076c15 | 2006-03-27 12:52:15 -0700 | [diff] [blame] | 814 | 	runway = ioremap_nocache(cpu_device->hpa.start, 4096); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 815 |  | 
 | 816 | 	/* Merge intrigue bits into Runway STATUS 0 */ | 
| Matthew Wilcox | 53f01bb | 2005-10-21 22:36:40 -0400 | [diff] [blame] | 817 | 	tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; | 
 | 818 | 	__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),  | 
 | 819 | 		     runway + RUNWAY_STATUS); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 820 | 	 | 
 | 821 | 	/* Write RUNWAY DEBUG registers */ | 
 | 822 | 	for (i = 0; i < 8; i++) { | 
| Matthew Wilcox | 53f01bb | 2005-10-21 22:36:40 -0400 | [diff] [blame] | 823 | 		__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 824 | 	} | 
 | 825 |  | 
 | 826 | 	return 0;  | 
 | 827 | } | 
 | 828 |  | 
 | 829 | /* | 
 | 830 |  * perf_rdr_write | 
 | 831 |  * | 
 | 832 |  * Write the given RDR register with the contents | 
 | 833 |  * of the given buffer. | 
 | 834 |  */ | 
 | 835 | static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer) | 
 | 836 | { | 
| Helge Deller | cb6fc18 | 2006-01-17 12:40:40 -0700 | [diff] [blame] | 837 | 	const struct rdr_tbl_ent *tentry; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 838 | 	int32_t		i; | 
 | 839 |  | 
 | 840 | printk("perf_rdr_write\n"); | 
 | 841 | 	tentry = perf_rdr_get_entry(rdr_num); | 
 | 842 | 	if (tentry->width == 0) { return; } | 
 | 843 |  | 
 | 844 | 	i = tentry->num_words; | 
 | 845 | 	while (i--) { | 
 | 846 | 		if (perf_processor_interface == ONYX_INTF) { | 
 | 847 | 			perf_rdr_shift_out_U(rdr_num, buffer[i]); | 
 | 848 | 		} else { | 
 | 849 | 			perf_rdr_shift_out_W(rdr_num, buffer[i]); | 
 | 850 | 		}	 | 
 | 851 | 	} | 
 | 852 | printk("perf_rdr_write done\n"); | 
 | 853 | } | 
 | 854 |  | 
 | 855 | module_init(perf_init); |