blob: 1eb229a0671307a88fff01da714dd829e96baa9e [file] [log] [blame]
Stepan Moskovchenko07552e12012-02-29 20:09:32 -08001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12
13#include <linux/interrupt.h>
14#include <linux/irq.h>
15#include <linux/platform_device.h>
16#include <linux/module.h>
17#include <linux/errno.h>
18#include <linux/proc_fs.h>
19#include <linux/cpu.h>
20#include <mach/msm-krait-l2-accessors.h>
21
22#define CESR_DCTPE BIT(0)
23#define CESR_DCDPE BIT(1)
24#define CESR_ICTPE BIT(2)
25#define CESR_ICDPE BIT(3)
26#define CESR_DCTE (BIT(4) | BIT(5))
27#define CESR_ICTE (BIT(6) | BIT(7))
28#define CESR_TLBMH BIT(16)
29#define CESR_I_MASK 0x000000CC
30
31#define L2ESR_IND_ADDR 0x204
32#define L2ESYNR0_IND_ADDR 0x208
33#define L2ESYNR1_IND_ADDR 0x209
34#define L2EAR0_IND_ADDR 0x20C
35#define L2EAR1_IND_ADDR 0x20D
36
37#define L2ESR_MPDCD BIT(0)
38#define L2ESR_MPSLV BIT(1)
39#define L2ESR_TSESB BIT(2)
40#define L2ESR_TSEDB BIT(3)
41#define L2ESR_DSESB BIT(4)
42#define L2ESR_DSEDB BIT(5)
43#define L2ESR_MSE BIT(6)
44#define L2ESR_MPLDREXNOK BIT(8)
45
Stepan Moskovchenkoe9a5dc12012-04-03 20:25:49 -070046#define L2ESR_ACCESS_ERR_MASK 0xFFFC
47
Stepan Moskovchenko07552e12012-02-29 20:09:32 -080048#define L2ESR_CPU_MASK 0x0F
49#define L2ESR_CPU_SHIFT 16
50
51#ifdef CONFIG_MSM_L1_ERR_PANIC
52#define ERP_L1_ERR(a) panic(a)
53#else
54#define ERP_L1_ERR(a) do { } while (0)
55#endif
56
57#ifdef CONFIG_MSM_L2_ERP_PORT_PANIC
58#define ERP_PORT_ERR(a) panic(a)
59#else
60#define ERP_PORT_ERR(a) WARN(1, a)
61#endif
62
63#ifdef CONFIG_MSM_L2_ERP_1BIT_PANIC
64#define ERP_1BIT_ERR(a) panic(a)
65#else
66#define ERP_1BIT_ERR(a) do { } while (0)
67#endif
68
Stepan Moskovchenkoe9a5dc12012-04-03 20:25:49 -070069#ifdef CONFIG_MSM_L2_ERP_PRINT_ACCESS_ERRORS
70#define print_access_errors() 1
71#else
72#define print_access_errors() 0
73#endif
74
Stepan Moskovchenko07552e12012-02-29 20:09:32 -080075#ifdef CONFIG_MSM_L2_ERP_2BIT_PANIC
76#define ERP_2BIT_ERR(a) panic(a)
77#else
78#define ERP_2BIT_ERR(a) do { } while (0)
79#endif
80
81#define MODULE_NAME "msm_cache_erp"
82
83struct msm_l1_err_stats {
84 unsigned int dctpe;
85 unsigned int dcdpe;
86 unsigned int ictpe;
87 unsigned int icdpe;
88 unsigned int dcte;
89 unsigned int icte;
90 unsigned int tlbmh;
91};
92
93struct msm_l2_err_stats {
94 unsigned int mpdcd;
95 unsigned int mpslv;
96 unsigned int tsesb;
97 unsigned int tsedb;
98 unsigned int dsesb;
99 unsigned int dsedb;
100 unsigned int mse;
101 unsigned int mplxrexnok;
102};
103
104static DEFINE_PER_CPU(struct msm_l1_err_stats, msm_l1_erp_stats);
105static struct msm_l2_err_stats msm_l2_erp_stats;
106
107static int l1_erp_irq, l2_erp_irq;
108static struct proc_dir_entry *procfs_entry;
109
110static inline unsigned int read_cesr(void)
111{
112 unsigned int cesr;
113 asm volatile ("mrc p15, 7, %0, c15, c0, 1" : "=r" (cesr));
114 return cesr;
115}
116
117static inline void write_cesr(unsigned int cesr)
118{
119 asm volatile ("mcr p15, 7, %[cesr], c15, c0, 1" : : [cesr]"r" (cesr));
120}
121
122static inline unsigned int read_cesynr(void)
123{
124 unsigned int cesynr;
125 asm volatile ("mrc p15, 7, %0, c15, c0, 3" : "=r" (cesynr));
126 return cesynr;
127}
128
129static int proc_read_status(char *page, char **start, off_t off, int count,
130 int *eof, void *data)
131{
132 struct msm_l1_err_stats *l1_stats;
133 char *p = page;
134 int len, cpu, ret, bytes_left = PAGE_SIZE;
135
136 for_each_present_cpu(cpu) {
137 l1_stats = &per_cpu(msm_l1_erp_stats, cpu);
138
139 ret = snprintf(p, bytes_left,
140 "CPU %d:\n" \
141 "\tD-cache tag parity errors:\t%u\n" \
142 "\tD-cache data parity errors:\t%u\n" \
143 "\tI-cache tag parity errors:\t%u\n" \
144 "\tI-cache data parity errors:\t%u\n" \
145 "\tD-cache timing errors:\t\t%u\n" \
146 "\tI-cache timing errors:\t\t%u\n" \
147 "\tTLB multi-hit errors:\t\t%u\n\n", \
148 cpu,
149 l1_stats->dctpe,
150 l1_stats->dcdpe,
151 l1_stats->ictpe,
152 l1_stats->icdpe,
153 l1_stats->dcte,
154 l1_stats->icte,
155 l1_stats->tlbmh);
156 p += ret;
157 bytes_left -= ret;
158 }
159
160 p += snprintf(p, bytes_left,
161 "L2 master port decode errors:\t\t%u\n" \
162 "L2 master port slave errors:\t\t%u\n" \
163 "L2 tag soft errors, single-bit:\t\t%u\n" \
164 "L2 tag soft errors, double-bit:\t\t%u\n" \
165 "L2 data soft errors, single-bit:\t%u\n" \
166 "L2 data soft errors, double-bit:\t%u\n" \
167 "L2 modified soft errors:\t\t%u\n" \
168 "L2 master port LDREX NOK errors:\t%u\n",
169 msm_l2_erp_stats.mpdcd,
170 msm_l2_erp_stats.mpslv,
171 msm_l2_erp_stats.tsesb,
172 msm_l2_erp_stats.tsedb,
173 msm_l2_erp_stats.dsesb,
174 msm_l2_erp_stats.dsedb,
175 msm_l2_erp_stats.mse,
176 msm_l2_erp_stats.mplxrexnok);
177
178 len = (p - page) - off;
179 if (len < 0)
180 len = 0;
181
182 *eof = (len <= count) ? 1 : 0;
183 *start = page + off;
184
185 return len;
186}
187
188static irqreturn_t msm_l1_erp_irq(int irq, void *dev_id)
189{
190 struct msm_l1_err_stats *l1_stats = dev_id;
191 unsigned int cesr = read_cesr();
192 unsigned int i_cesynr, d_cesynr;
193
194 pr_alert("L1 Error detected on CPU %d!\n", smp_processor_id());
195 pr_alert("\tCESR = 0x%08x\n", cesr);
196
197 if (cesr & CESR_DCTPE) {
198 pr_alert("D-cache tag parity error\n");
199 l1_stats->dctpe++;
200 }
201
202 if (cesr & CESR_DCDPE) {
203 pr_alert("D-cache data parity error\n");
204 l1_stats->dcdpe++;
205 }
206
207 if (cesr & CESR_ICTPE) {
208 pr_alert("I-cache tag parity error\n");
209 l1_stats->ictpe++;
210 }
211
212 if (cesr & CESR_ICDPE) {
213 pr_alert("I-cache data parity error\n");
214 l1_stats->icdpe++;
215 }
216
217 if (cesr & CESR_DCTE) {
218 pr_alert("D-cache timing error\n");
219 l1_stats->dcte++;
220 }
221
222 if (cesr & CESR_ICTE) {
223 pr_alert("I-cache timing error\n");
224 l1_stats->icte++;
225 }
226
227 if (cesr & CESR_TLBMH) {
228 pr_alert("TLB multi-hit error\n");
229 l1_stats->tlbmh++;
230 }
231
232 if (cesr & (CESR_ICTPE | CESR_ICDPE | CESR_ICTE)) {
233 i_cesynr = read_cesynr();
234 pr_alert("I-side CESYNR = 0x%08x\n", i_cesynr);
235 write_cesr(CESR_I_MASK);
236
237 /*
238 * Clear the I-side bits from the captured CESR value so that we
239 * don't accidentally clear any new I-side errors when we do
240 * the CESR write-clear operation.
241 */
242 cesr &= ~CESR_I_MASK;
243 }
244
245 if (cesr & (CESR_DCTPE | CESR_DCDPE | CESR_DCTE)) {
246 d_cesynr = read_cesynr();
247 pr_alert("D-side CESYNR = 0x%08x\n", d_cesynr);
248 }
249
250 /* Clear the interrupt bits we processed */
251 write_cesr(cesr);
252
253 ERP_L1_ERR("L1 cache / TLB error detected");
254
255 return IRQ_HANDLED;
256}
257
258static irqreturn_t msm_l2_erp_irq(int irq, void *dev_id)
259{
260 unsigned int l2esr;
261 unsigned int l2esynr0;
262 unsigned int l2esynr1;
263 unsigned int l2ear0;
264 unsigned int l2ear1;
265 int soft_error = 0;
266 int port_error = 0;
267 int unrecoverable = 0;
Stepan Moskovchenkoe9a5dc12012-04-03 20:25:49 -0700268 int print_alert;
Stepan Moskovchenko07552e12012-02-29 20:09:32 -0800269
270 l2esr = get_l2_indirect_reg(L2ESR_IND_ADDR);
271 l2esynr0 = get_l2_indirect_reg(L2ESYNR0_IND_ADDR);
272 l2esynr1 = get_l2_indirect_reg(L2ESYNR1_IND_ADDR);
273 l2ear0 = get_l2_indirect_reg(L2EAR0_IND_ADDR);
274 l2ear1 = get_l2_indirect_reg(L2EAR1_IND_ADDR);
275
Stepan Moskovchenkoe9a5dc12012-04-03 20:25:49 -0700276 print_alert = print_access_errors() || (l2esr & L2ESR_ACCESS_ERR_MASK);
277
278 if (print_alert) {
279 pr_alert("L2 Error detected!\n");
280 pr_alert("\tL2ESR = 0x%08x\n", l2esr);
281 pr_alert("\tL2ESYNR0 = 0x%08x\n", l2esynr0);
282 pr_alert("\tL2ESYNR1 = 0x%08x\n", l2esynr1);
283 pr_alert("\tL2EAR0 = 0x%08x\n", l2ear0);
284 pr_alert("\tL2EAR1 = 0x%08x\n", l2ear1);
285 pr_alert("\tCPU bitmap = 0x%x\n", (l2esr >> L2ESR_CPU_SHIFT) &
286 L2ESR_CPU_MASK);
287 }
Stepan Moskovchenko07552e12012-02-29 20:09:32 -0800288
289 if (l2esr & L2ESR_MPDCD) {
Stepan Moskovchenkoe9a5dc12012-04-03 20:25:49 -0700290 if (print_alert)
291 pr_alert("L2 master port decode error\n");
Stepan Moskovchenko07552e12012-02-29 20:09:32 -0800292 port_error++;
293 msm_l2_erp_stats.mpdcd++;
294 }
295
296 if (l2esr & L2ESR_MPSLV) {
Stepan Moskovchenkoe9a5dc12012-04-03 20:25:49 -0700297 if (print_alert)
298 pr_alert("L2 master port slave error\n");
Stepan Moskovchenko07552e12012-02-29 20:09:32 -0800299 port_error++;
300 msm_l2_erp_stats.mpslv++;
301 }
302
303 if (l2esr & L2ESR_TSESB) {
304 pr_alert("L2 tag soft error, single-bit\n");
305 soft_error++;
306 msm_l2_erp_stats.tsesb++;
307 }
308
309 if (l2esr & L2ESR_TSEDB) {
310 pr_alert("L2 tag soft error, double-bit\n");
311 soft_error++;
312 unrecoverable++;
313 msm_l2_erp_stats.tsedb++;
314 }
315
316 if (l2esr & L2ESR_DSESB) {
317 pr_alert("L2 data soft error, single-bit\n");
318 soft_error++;
319 msm_l2_erp_stats.dsesb++;
320 }
321
322 if (l2esr & L2ESR_DSEDB) {
323 pr_alert("L2 data soft error, double-bit\n");
324 soft_error++;
325 unrecoverable++;
326 msm_l2_erp_stats.dsedb++;
327 }
328
329 if (l2esr & L2ESR_MSE) {
330 pr_alert("L2 modified soft error\n");
331 soft_error++;
332 msm_l2_erp_stats.mse++;
333 }
334
335 if (l2esr & L2ESR_MPLDREXNOK) {
336 pr_alert("L2 master port LDREX received Normal OK response\n");
337 port_error++;
338 msm_l2_erp_stats.mplxrexnok++;
339 }
340
Stepan Moskovchenkoe9a5dc12012-04-03 20:25:49 -0700341 if (port_error && print_alert)
Stepan Moskovchenko07552e12012-02-29 20:09:32 -0800342 ERP_PORT_ERR("L2 master port error detected");
343
344 if (soft_error && !unrecoverable)
345 ERP_1BIT_ERR("L2 single-bit error detected");
346
347 if (unrecoverable)
348 ERP_2BIT_ERR("L2 double-bit error detected, trouble ahead");
349
350 set_l2_indirect_reg(L2ESR_IND_ADDR, l2esr);
351 return IRQ_HANDLED;
352}
353
354static void enable_erp_irq_callback(void *info)
355{
356 enable_percpu_irq(l1_erp_irq, IRQ_TYPE_LEVEL_HIGH);
357}
358
359static void disable_erp_irq_callback(void *info)
360{
361 disable_percpu_irq(l1_erp_irq);
362}
363
364static int cache_erp_cpu_callback(struct notifier_block *nfb,
365 unsigned long action, void *hcpu)
366{
367 switch (action & (~CPU_TASKS_FROZEN)) {
368 case CPU_STARTING:
369 enable_erp_irq_callback(NULL);
370 break;
371
372 case CPU_DYING:
373 disable_erp_irq_callback(NULL);
374 break;
375 }
376 return NOTIFY_OK;
377}
378
379static struct notifier_block cache_erp_cpu_notifier = {
380 .notifier_call = cache_erp_cpu_callback,
381};
382
383static int msm_cache_erp_probe(struct platform_device *pdev)
384{
385 struct resource *r;
386 int ret, cpu;
387
388 r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "l1_irq");
389
390 if (!r) {
391 pr_err("Could not get L1 resource\n");
392 ret = -ENODEV;
393 goto fail;
394 }
395
396 l1_erp_irq = r->start;
397
398 ret = request_percpu_irq(l1_erp_irq, msm_l1_erp_irq, "MSM_L1",
399 &msm_l1_erp_stats);
400
401 if (ret) {
402 pr_err("Failed to request the L1 cache error interrupt\n");
403 goto fail;
404 }
405
406 r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "l2_irq");
407
408 if (!r) {
409 pr_err("Could not get L2 resource\n");
410 ret = -ENODEV;
411 goto fail_l1;
412 }
413
414 l2_erp_irq = r->start;
415 ret = request_irq(l2_erp_irq, msm_l2_erp_irq, 0, "MSM_L2", NULL);
416
417 if (ret) {
418 pr_err("Failed to request the L2 cache error interrupt\n");
419 goto fail_l1;
420 }
421
422 procfs_entry = create_proc_entry("cpu/msm_cache_erp", S_IRUGO, NULL);
423
424 if (!procfs_entry) {
425 pr_err("Failed to create procfs node for cache error reporting\n");
426 ret = -ENODEV;
427 goto fail_l2;
428 }
429
430 get_online_cpus();
431 register_hotcpu_notifier(&cache_erp_cpu_notifier);
432 for_each_cpu(cpu, cpu_online_mask)
433 smp_call_function_single(cpu, enable_erp_irq_callback, NULL, 1);
434 put_online_cpus();
435
436 procfs_entry->read_proc = proc_read_status;
437 return 0;
438
439fail_l2:
440 free_irq(l2_erp_irq, NULL);
441fail_l1:
442 free_percpu_irq(l1_erp_irq, NULL);
443fail:
444 return ret;
445}
446
447static int msm_cache_erp_remove(struct platform_device *pdev)
448{
449 int cpu;
450 if (procfs_entry)
451 remove_proc_entry("cpu/msm_cache_erp", NULL);
452
453 get_online_cpus();
454 unregister_hotcpu_notifier(&cache_erp_cpu_notifier);
455 for_each_cpu(cpu, cpu_online_mask)
456 smp_call_function_single(cpu, disable_erp_irq_callback, NULL,
457 1);
458 put_online_cpus();
459
460 free_percpu_irq(l1_erp_irq, NULL);
461
462 disable_irq(l2_erp_irq);
463 free_irq(l2_erp_irq, NULL);
464 return 0;
465}
466
467static struct platform_driver msm_cache_erp_driver = {
468 .probe = msm_cache_erp_probe,
469 .remove = msm_cache_erp_remove,
470 .driver = {
471 .name = MODULE_NAME,
472 .owner = THIS_MODULE,
473 },
474};
475
476static int __init msm_cache_erp_init(void)
477{
478 return platform_driver_register(&msm_cache_erp_driver);
479}
480
481static void __exit msm_cache_erp_exit(void)
482{
483 platform_driver_unregister(&msm_cache_erp_driver);
484}
485
486
487module_init(msm_cache_erp_init);
488module_exit(msm_cache_erp_exit);
489MODULE_LICENSE("GPL v2");
490MODULE_DESCRIPTION("MSM cache error reporting driver");