blob: b1a8a55915d907cce1527e01f4a367ad490bd02d [file] [log] [blame]
Huang Ying06d65de2010-05-18 14:35:19 +08001/*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
Chen, Gong88f074f2013-10-18 14:28:59 -07008 * various tables, such as ERST, BERT and HEST etc.
Huang Ying06d65de2010-05-18 14:35:19 +08009 *
10 * For more information about CPER, please refer to Appendix N of UEFI
Chen, Gong147de142013-10-18 14:30:13 -070011 * Specification version 2.4.
Huang Ying06d65de2010-05-18 14:35:19 +080012 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/time.h>
30#include <linux/cper.h>
Chen, Gongfbeef852013-10-18 14:30:21 -070031#include <linux/dmi.h>
Huang Ying06d65de2010-05-18 14:35:19 +080032#include <linux/acpi.h>
Lance Ortiz1d521002013-01-03 15:34:08 -070033#include <linux/pci.h>
Huang Yingc413d762011-02-21 13:54:43 +080034#include <linux/aer.h>
Huang Ying06d65de2010-05-18 14:35:19 +080035
36/*
37 * CPER record ID need to be unique even after reboot, because record
38 * ID is used as index for ERST storage, while CPER records from
39 * multiple boot may co-exist in ERST.
40 */
41u64 cper_next_record_id(void)
42{
43 static atomic64_t seq;
44
45 if (!atomic64_read(&seq))
46 atomic64_set(&seq, ((u64)get_seconds()) << 32);
47
48 return atomic64_inc_return(&seq);
49}
50EXPORT_SYMBOL_GPL(cper_next_record_id);
51
Huang Yingf59c55d2010-12-07 10:22:30 +080052static const char *cper_severity_strs[] = {
53 "recoverable",
54 "fatal",
55 "corrected",
56 "info",
57};
58
59static const char *cper_severity_str(unsigned int severity)
60{
61 return severity < ARRAY_SIZE(cper_severity_strs) ?
62 cper_severity_strs[severity] : "unknown";
63}
64
65/*
66 * cper_print_bits - print strings for set bits
67 * @pfx: prefix for each line, including log level and prefix string
68 * @bits: bit mask
69 * @strs: string array, indexed by bit position
70 * @strs_size: size of the string array: @strs
71 *
72 * For each set bit in @bits, print the corresponding string in @strs.
73 * If the output length is longer than 80, multiple line will be
74 * printed, with @pfx is printed at the beginning of each line.
75 */
Huang Yingc413d762011-02-21 13:54:43 +080076void cper_print_bits(const char *pfx, unsigned int bits,
Chen, Gong88f074f2013-10-18 14:28:59 -070077 const char * const strs[], unsigned int strs_size)
Huang Yingf59c55d2010-12-07 10:22:30 +080078{
79 int i, len = 0;
80 const char *str;
81 char buf[84];
82
83 for (i = 0; i < strs_size; i++) {
84 if (!(bits & (1U << i)))
85 continue;
86 str = strs[i];
Huang Yingc413d762011-02-21 13:54:43 +080087 if (!str)
88 continue;
Huang Yingf59c55d2010-12-07 10:22:30 +080089 if (len && len + strlen(str) + 2 > 80) {
90 printk("%s\n", buf);
91 len = 0;
92 }
93 if (!len)
94 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
95 else
96 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
97 }
98 if (len)
99 printk("%s\n", buf);
100}
101
Chen, Gong88f074f2013-10-18 14:28:59 -0700102static const char * const cper_proc_type_strs[] = {
Huang Yingf59c55d2010-12-07 10:22:30 +0800103 "IA32/X64",
104 "IA64",
105};
106
Chen, Gong88f074f2013-10-18 14:28:59 -0700107static const char * const cper_proc_isa_strs[] = {
Huang Yingf59c55d2010-12-07 10:22:30 +0800108 "IA32",
109 "IA64",
110 "X64",
111};
112
Chen, Gong88f074f2013-10-18 14:28:59 -0700113static const char * const cper_proc_error_type_strs[] = {
Huang Yingf59c55d2010-12-07 10:22:30 +0800114 "cache error",
115 "TLB error",
116 "bus error",
117 "micro-architectural error",
118};
119
Chen, Gong88f074f2013-10-18 14:28:59 -0700120static const char * const cper_proc_op_strs[] = {
Huang Yingf59c55d2010-12-07 10:22:30 +0800121 "unknown or generic",
122 "data read",
123 "data write",
124 "instruction execution",
125};
126
Chen, Gong88f074f2013-10-18 14:28:59 -0700127static const char * const cper_proc_flag_strs[] = {
Huang Yingf59c55d2010-12-07 10:22:30 +0800128 "restartable",
129 "precise IP",
130 "overflow",
131 "corrected",
132};
133
134static void cper_print_proc_generic(const char *pfx,
135 const struct cper_sec_proc_generic *proc)
136{
137 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
138 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
139 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
140 cper_proc_type_strs[proc->proc_type] : "unknown");
141 if (proc->validation_bits & CPER_PROC_VALID_ISA)
142 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
143 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
144 cper_proc_isa_strs[proc->proc_isa] : "unknown");
145 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
146 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
147 cper_print_bits(pfx, proc->proc_error_type,
148 cper_proc_error_type_strs,
149 ARRAY_SIZE(cper_proc_error_type_strs));
150 }
151 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
152 printk("%s""operation: %d, %s\n", pfx, proc->operation,
153 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
154 cper_proc_op_strs[proc->operation] : "unknown");
155 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
156 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
157 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
158 ARRAY_SIZE(cper_proc_flag_strs));
159 }
160 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
161 printk("%s""level: %d\n", pfx, proc->level);
162 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
163 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
164 if (proc->validation_bits & CPER_PROC_VALID_ID)
165 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
166 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
167 printk("%s""target_address: 0x%016llx\n",
168 pfx, proc->target_addr);
169 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
170 printk("%s""requestor_id: 0x%016llx\n",
171 pfx, proc->requestor_id);
172 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
173 printk("%s""responder_id: 0x%016llx\n",
174 pfx, proc->responder_id);
175 if (proc->validation_bits & CPER_PROC_VALID_IP)
176 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
177}
178
179static const char *cper_mem_err_type_strs[] = {
180 "unknown",
181 "no error",
182 "single-bit ECC",
183 "multi-bit ECC",
184 "single-symbol chipkill ECC",
185 "multi-symbol chipkill ECC",
186 "master abort",
187 "target abort",
188 "parity error",
189 "watchdog timeout",
190 "invalid address",
191 "mirror Broken",
192 "memory sparing",
193 "scrub corrected error",
194 "scrub uncorrected error",
Chen, Gong147de142013-10-18 14:30:13 -0700195 "physical memory map-out event",
Huang Yingf59c55d2010-12-07 10:22:30 +0800196};
197
198static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
199{
200 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
201 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
Chen, Gong147de142013-10-18 14:30:13 -0700202 if (mem->validation_bits & CPER_MEM_VALID_PA)
Huang Yingf59c55d2010-12-07 10:22:30 +0800203 printk("%s""physical_address: 0x%016llx\n",
204 pfx, mem->physical_addr);
Chen, Gong147de142013-10-18 14:30:13 -0700205 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
Huang Yingf59c55d2010-12-07 10:22:30 +0800206 printk("%s""physical_address_mask: 0x%016llx\n",
207 pfx, mem->physical_addr_mask);
208 if (mem->validation_bits & CPER_MEM_VALID_NODE)
209 printk("%s""node: %d\n", pfx, mem->node);
210 if (mem->validation_bits & CPER_MEM_VALID_CARD)
211 printk("%s""card: %d\n", pfx, mem->card);
212 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
213 printk("%s""module: %d\n", pfx, mem->module);
Chen, Gongfbeef852013-10-18 14:30:21 -0700214 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
215 printk("%s""rank: %d\n", pfx, mem->rank);
Huang Yingf59c55d2010-12-07 10:22:30 +0800216 if (mem->validation_bits & CPER_MEM_VALID_BANK)
217 printk("%s""bank: %d\n", pfx, mem->bank);
218 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
219 printk("%s""device: %d\n", pfx, mem->device);
220 if (mem->validation_bits & CPER_MEM_VALID_ROW)
221 printk("%s""row: %d\n", pfx, mem->row);
222 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
223 printk("%s""column: %d\n", pfx, mem->column);
224 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
225 printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
226 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
227 printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
228 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
229 printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
230 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
231 printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
232 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
233 u8 etype = mem->error_type;
234 printk("%s""error_type: %d, %s\n", pfx, etype,
235 etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
236 cper_mem_err_type_strs[etype] : "unknown");
237 }
Chen, Gongfbeef852013-10-18 14:30:21 -0700238 if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
239 const char *bank = NULL, *device = NULL;
240 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
241 if (bank != NULL && device != NULL)
242 printk("%s""DIMM location: %s %s", pfx, bank, device);
243 else
244 printk("%s""DIMM DMI handle: 0x%.4x",
245 pfx, mem->mem_dev_handle);
246 }
Huang Yingf59c55d2010-12-07 10:22:30 +0800247}
248
249static const char *cper_pcie_port_type_strs[] = {
250 "PCIe end point",
251 "legacy PCI end point",
252 "unknown",
253 "unknown",
254 "root port",
255 "upstream switch port",
256 "downstream switch port",
257 "PCIe to PCI/PCI-X bridge",
258 "PCI/PCI-X to PCIe bridge",
259 "root complex integrated endpoint device",
260 "root complex event collector",
261};
262
Huang Yingc413d762011-02-21 13:54:43 +0800263static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
Chen, Gong88f074f2013-10-18 14:28:59 -0700264 const struct acpi_generic_data *gdata)
Huang Yingf59c55d2010-12-07 10:22:30 +0800265{
266 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
267 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
268 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
269 cper_pcie_port_type_strs[pcie->port_type] : "unknown");
270 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
271 printk("%s""version: %d.%d\n", pfx,
272 pcie->version.major, pcie->version.minor);
273 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
274 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
275 pcie->command, pcie->status);
276 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
277 const __u8 *p;
278 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
279 pcie->device_id.segment, pcie->device_id.bus,
280 pcie->device_id.device, pcie->device_id.function);
281 printk("%s""slot: %d\n", pfx,
282 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
283 printk("%s""secondary_bus: 0x%02x\n", pfx,
284 pcie->device_id.secondary_bus);
285 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
286 pcie->device_id.vendor_id, pcie->device_id.device_id);
287 p = pcie->device_id.class_code;
288 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
289 }
290 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
291 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
292 pcie->serial_number.lower, pcie->serial_number.upper);
293 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
294 printk(
295 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
296 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
297}
298
Chen, Gong88f074f2013-10-18 14:28:59 -0700299static const char * const cper_estatus_section_flag_strs[] = {
Huang Yingf59c55d2010-12-07 10:22:30 +0800300 "primary",
301 "containment warning",
302 "reset",
Chen, Gong88f074f2013-10-18 14:28:59 -0700303 "error threshold exceeded",
Huang Yingf59c55d2010-12-07 10:22:30 +0800304 "resource not accessible",
305 "latent error",
306};
307
Chen, Gong88f074f2013-10-18 14:28:59 -0700308static void cper_estatus_print_section(
309 const char *pfx, const struct acpi_generic_data *gdata, int sec_no)
Huang Yingf59c55d2010-12-07 10:22:30 +0800310{
311 uuid_le *sec_type = (uuid_le *)gdata->section_type;
312 __u16 severity;
313
314 severity = gdata->error_severity;
315 printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
316 cper_severity_str(severity));
317 printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
Chen, Gong88f074f2013-10-18 14:28:59 -0700318 cper_print_bits(pfx, gdata->flags, cper_estatus_section_flag_strs,
319 ARRAY_SIZE(cper_estatus_section_flag_strs));
Huang Yingf59c55d2010-12-07 10:22:30 +0800320 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
321 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
322 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
323 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
324
325 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
326 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
327 printk("%s""section_type: general processor error\n", pfx);
328 if (gdata->error_data_length >= sizeof(*proc_err))
329 cper_print_proc_generic(pfx, proc_err);
330 else
331 goto err_section_too_small;
332 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
333 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
334 printk("%s""section_type: memory error\n", pfx);
335 if (gdata->error_data_length >= sizeof(*mem_err))
336 cper_print_mem(pfx, mem_err);
337 else
338 goto err_section_too_small;
339 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
340 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
341 printk("%s""section_type: PCIe error\n", pfx);
342 if (gdata->error_data_length >= sizeof(*pcie))
Huang Yingc413d762011-02-21 13:54:43 +0800343 cper_print_pcie(pfx, pcie, gdata);
Huang Yingf59c55d2010-12-07 10:22:30 +0800344 else
345 goto err_section_too_small;
346 } else
347 printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
348
349 return;
350
351err_section_too_small:
352 pr_err(FW_WARN "error section length is too small\n");
353}
354
Chen, Gong88f074f2013-10-18 14:28:59 -0700355void cper_estatus_print(const char *pfx,
356 const struct acpi_generic_status *estatus)
Huang Yingf59c55d2010-12-07 10:22:30 +0800357{
Chen, Gong88f074f2013-10-18 14:28:59 -0700358 struct acpi_generic_data *gdata;
Huang Yingf59c55d2010-12-07 10:22:30 +0800359 unsigned int data_len, gedata_len;
360 int sec_no = 0;
361 __u16 severity;
362
Chen, Gong88f074f2013-10-18 14:28:59 -0700363 printk("%s""Generic Hardware Error Status\n", pfx);
Huang Yingf59c55d2010-12-07 10:22:30 +0800364 severity = estatus->error_severity;
365 printk("%s""severity: %d, %s\n", pfx, severity,
366 cper_severity_str(severity));
367 data_len = estatus->data_length;
Chen, Gong88f074f2013-10-18 14:28:59 -0700368 gdata = (struct acpi_generic_data *)(estatus + 1);
Chen, Gong833ba4b2013-10-18 14:27:51 -0700369 while (data_len >= sizeof(*gdata)) {
Huang Yingf59c55d2010-12-07 10:22:30 +0800370 gedata_len = gdata->error_data_length;
Chen, Gong88f074f2013-10-18 14:28:59 -0700371 cper_estatus_print_section(pfx, gdata, sec_no);
Huang Yingf59c55d2010-12-07 10:22:30 +0800372 data_len -= gedata_len + sizeof(*gdata);
Jiang Liu37d2a362012-02-15 00:01:44 +0800373 gdata = (void *)(gdata + 1) + gedata_len;
Huang Yingf59c55d2010-12-07 10:22:30 +0800374 sec_no++;
375 }
376}
Chen, Gong88f074f2013-10-18 14:28:59 -0700377EXPORT_SYMBOL_GPL(cper_estatus_print);
Huang Yingf59c55d2010-12-07 10:22:30 +0800378
Chen, Gong88f074f2013-10-18 14:28:59 -0700379int cper_estatus_check_header(const struct acpi_generic_status *estatus)
Huang Ying06d65de2010-05-18 14:35:19 +0800380{
381 if (estatus->data_length &&
Chen, Gong88f074f2013-10-18 14:28:59 -0700382 estatus->data_length < sizeof(struct acpi_generic_data))
Huang Ying06d65de2010-05-18 14:35:19 +0800383 return -EINVAL;
384 if (estatus->raw_data_length &&
385 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
386 return -EINVAL;
387
388 return 0;
389}
Chen, Gong88f074f2013-10-18 14:28:59 -0700390EXPORT_SYMBOL_GPL(cper_estatus_check_header);
Huang Ying06d65de2010-05-18 14:35:19 +0800391
Chen, Gong88f074f2013-10-18 14:28:59 -0700392int cper_estatus_check(const struct acpi_generic_status *estatus)
Huang Ying06d65de2010-05-18 14:35:19 +0800393{
Chen, Gong88f074f2013-10-18 14:28:59 -0700394 struct acpi_generic_data *gdata;
Huang Ying06d65de2010-05-18 14:35:19 +0800395 unsigned int data_len, gedata_len;
396 int rc;
397
Chen, Gong88f074f2013-10-18 14:28:59 -0700398 rc = cper_estatus_check_header(estatus);
Huang Ying06d65de2010-05-18 14:35:19 +0800399 if (rc)
400 return rc;
401 data_len = estatus->data_length;
Chen, Gong88f074f2013-10-18 14:28:59 -0700402 gdata = (struct acpi_generic_data *)(estatus + 1);
Chen Gongaaf9d932013-03-19 06:48:07 +0000403 while (data_len >= sizeof(*gdata)) {
Huang Ying06d65de2010-05-18 14:35:19 +0800404 gedata_len = gdata->error_data_length;
405 if (gedata_len > data_len - sizeof(*gdata))
406 return -EINVAL;
407 data_len -= gedata_len + sizeof(*gdata);
Jiang Liu37d2a362012-02-15 00:01:44 +0800408 gdata = (void *)(gdata + 1) + gedata_len;
Huang Ying06d65de2010-05-18 14:35:19 +0800409 }
410 if (data_len)
411 return -EINVAL;
412
413 return 0;
414}
Chen, Gong88f074f2013-10-18 14:28:59 -0700415EXPORT_SYMBOL_GPL(cper_estatus_check);