blob: 63702c0badb973651d5629f15a72d8eec10fcc13 [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * Machine check exception handling.
4 *
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
Olivier Deprez157378f2022-04-04 15:47:50 +020018#include <linux/extable.h>
19#include <linux/ftrace.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000020
21#include <asm/machdep.h>
22#include <asm/mce.h>
David Brazdil0f672f62019-12-10 10:32:29 +000023#include <asm/nmi.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000024
25static DEFINE_PER_CPU(int, mce_nest_count);
26static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
27
28/* Queue for delayed MCE events. */
29static DEFINE_PER_CPU(int, mce_queue_count);
30static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
31
32/* Queue for delayed MCE UE events. */
33static DEFINE_PER_CPU(int, mce_ue_count);
34static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
35 mce_ue_event_queue);
36
37static void machine_check_process_queued_event(struct irq_work *work);
David Brazdil0f672f62019-12-10 10:32:29 +000038static void machine_check_ue_irq_work(struct irq_work *work);
39static void machine_check_ue_event(struct machine_check_event *evt);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000040static void machine_process_ue_event(struct work_struct *work);
41
42static struct irq_work mce_event_process_work = {
43 .func = machine_check_process_queued_event,
44};
45
David Brazdil0f672f62019-12-10 10:32:29 +000046static struct irq_work mce_ue_event_irq_work = {
47 .func = machine_check_ue_irq_work,
48};
49
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000050DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
51
Olivier Deprez157378f2022-04-04 15:47:50 +020052static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
53
54int mce_register_notifier(struct notifier_block *nb)
55{
56 return blocking_notifier_chain_register(&mce_notifier_list, nb);
57}
58EXPORT_SYMBOL_GPL(mce_register_notifier);
59
60int mce_unregister_notifier(struct notifier_block *nb)
61{
62 return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
63}
64EXPORT_SYMBOL_GPL(mce_unregister_notifier);
65
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000066static void mce_set_error_info(struct machine_check_event *mce,
67 struct mce_error_info *mce_err)
68{
69 mce->error_type = mce_err->error_type;
70 switch (mce_err->error_type) {
71 case MCE_ERROR_TYPE_UE:
72 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
73 break;
74 case MCE_ERROR_TYPE_SLB:
75 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
76 break;
77 case MCE_ERROR_TYPE_ERAT:
78 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
79 break;
80 case MCE_ERROR_TYPE_TLB:
81 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
82 break;
83 case MCE_ERROR_TYPE_USER:
84 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
85 break;
86 case MCE_ERROR_TYPE_RA:
87 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
88 break;
89 case MCE_ERROR_TYPE_LINK:
90 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
91 break;
92 case MCE_ERROR_TYPE_UNKNOWN:
93 default:
94 break;
95 }
96}
97
98/*
99 * Decode and save high level MCE information into per cpu buffer which
100 * is an array of machine_check_event structure.
101 */
102void save_mce_event(struct pt_regs *regs, long handled,
103 struct mce_error_info *mce_err,
104 uint64_t nip, uint64_t addr, uint64_t phys_addr)
105{
106 int index = __this_cpu_inc_return(mce_nest_count) - 1;
107 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
108
109 /*
110 * Return if we don't have enough space to log mce event.
111 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
112 * the check below will stop buffer overrun.
113 */
114 if (index >= MAX_MC_EVT)
115 return;
116
117 /* Populate generic machine check info */
118 mce->version = MCE_V1;
119 mce->srr0 = nip;
120 mce->srr1 = regs->msr;
121 mce->gpr3 = regs->gpr[3];
122 mce->in_use = 1;
David Brazdil0f672f62019-12-10 10:32:29 +0000123 mce->cpu = get_paca()->paca_index;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000124
125 /* Mark it recovered if we have handled it and MSR(RI=1). */
126 if (handled && (regs->msr & MSR_RI))
127 mce->disposition = MCE_DISPOSITION_RECOVERED;
128 else
129 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
130
131 mce->initiator = mce_err->initiator;
132 mce->severity = mce_err->severity;
David Brazdil0f672f62019-12-10 10:32:29 +0000133 mce->sync_error = mce_err->sync_error;
134 mce->error_class = mce_err->error_class;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000135
136 /*
137 * Populate the mce error_type and type-specific error_type.
138 */
139 mce_set_error_info(mce, mce_err);
140
141 if (!addr)
142 return;
143
144 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
145 mce->u.tlb_error.effective_address_provided = true;
146 mce->u.tlb_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
148 mce->u.slb_error.effective_address_provided = true;
149 mce->u.slb_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
151 mce->u.erat_error.effective_address_provided = true;
152 mce->u.erat_error.effective_address = addr;
153 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
154 mce->u.user_error.effective_address_provided = true;
155 mce->u.user_error.effective_address = addr;
156 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
157 mce->u.ra_error.effective_address_provided = true;
158 mce->u.ra_error.effective_address = addr;
159 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
160 mce->u.link_error.effective_address_provided = true;
161 mce->u.link_error.effective_address = addr;
162 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
163 mce->u.ue_error.effective_address_provided = true;
164 mce->u.ue_error.effective_address = addr;
165 if (phys_addr != ULONG_MAX) {
166 mce->u.ue_error.physical_address_provided = true;
167 mce->u.ue_error.physical_address = phys_addr;
David Brazdil0f672f62019-12-10 10:32:29 +0000168 mce->u.ue_error.ignore_event = mce_err->ignore_event;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000169 machine_check_ue_event(mce);
170 }
171 }
172 return;
173}
174
175/*
176 * get_mce_event:
177 * mce Pointer to machine_check_event structure to be filled.
178 * release Flag to indicate whether to free the event slot or not.
179 * 0 <= do not release the mce event. Caller will invoke
180 * release_mce_event() once event has been consumed.
181 * 1 <= release the slot.
182 *
183 * return 1 = success
184 * 0 = failure
185 *
186 * get_mce_event() will be called by platform specific machine check
187 * handle routine and in KVM.
188 * When we call get_mce_event(), we are still in interrupt context and
189 * preemption will not be scheduled until ret_from_expect() routine
190 * is called.
191 */
192int get_mce_event(struct machine_check_event *mce, bool release)
193{
194 int index = __this_cpu_read(mce_nest_count) - 1;
195 struct machine_check_event *mc_evt;
196 int ret = 0;
197
198 /* Sanity check */
199 if (index < 0)
200 return ret;
201
202 /* Check if we have MCE info to process. */
203 if (index < MAX_MC_EVT) {
204 mc_evt = this_cpu_ptr(&mce_event[index]);
205 /* Copy the event structure and release the original */
206 if (mce)
207 *mce = *mc_evt;
208 if (release)
209 mc_evt->in_use = 0;
210 ret = 1;
211 }
212 /* Decrement the count to free the slot. */
213 if (release)
214 __this_cpu_dec(mce_nest_count);
215
216 return ret;
217}
218
219void release_mce_event(void)
220{
221 get_mce_event(NULL, true);
222}
223
David Brazdil0f672f62019-12-10 10:32:29 +0000224static void machine_check_ue_irq_work(struct irq_work *work)
225{
226 schedule_work(&mce_ue_event_work);
227}
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000228
229/*
230 * Queue up the MCE event which then can be handled later.
231 */
David Brazdil0f672f62019-12-10 10:32:29 +0000232static void machine_check_ue_event(struct machine_check_event *evt)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000233{
234 int index;
235
236 index = __this_cpu_inc_return(mce_ue_count) - 1;
237 /* If queue is full, just return for now. */
238 if (index >= MAX_MC_EVT) {
239 __this_cpu_dec(mce_ue_count);
240 return;
241 }
242 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
243
244 /* Queue work to process this event later. */
David Brazdil0f672f62019-12-10 10:32:29 +0000245 irq_work_queue(&mce_ue_event_irq_work);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000246}
247
248/*
249 * Queue up the MCE event which then can be handled later.
250 */
251void machine_check_queue_event(void)
252{
253 int index;
254 struct machine_check_event evt;
255
256 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
257 return;
258
259 index = __this_cpu_inc_return(mce_queue_count) - 1;
260 /* If queue is full, just return for now. */
261 if (index >= MAX_MC_EVT) {
262 __this_cpu_dec(mce_queue_count);
263 return;
264 }
265 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
266
267 /* Queue irq work to process this event later. */
268 irq_work_queue(&mce_event_process_work);
269}
Olivier Deprez157378f2022-04-04 15:47:50 +0200270
271void mce_common_process_ue(struct pt_regs *regs,
272 struct mce_error_info *mce_err)
273{
274 const struct exception_table_entry *entry;
275
276 entry = search_kernel_exception_table(regs->nip);
277 if (entry) {
278 mce_err->ignore_event = true;
279 regs->nip = extable_fixup(entry);
280 }
281}
282
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000283/*
284 * process pending MCE event from the mce event queue. This function will be
285 * called during syscall exit.
286 */
287static void machine_process_ue_event(struct work_struct *work)
288{
289 int index;
290 struct machine_check_event *evt;
291
292 while (__this_cpu_read(mce_ue_count) > 0) {
293 index = __this_cpu_read(mce_ue_count) - 1;
294 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
Olivier Deprez157378f2022-04-04 15:47:50 +0200295 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000296#ifdef CONFIG_MEMORY_FAILURE
297 /*
298 * This should probably queued elsewhere, but
299 * oh! well
David Brazdil0f672f62019-12-10 10:32:29 +0000300 *
301 * Don't report this machine check because the caller has a
302 * asked us to ignore the event, it has a fixup handler which
303 * will do the appropriate error handling and reporting.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000304 */
305 if (evt->error_type == MCE_ERROR_TYPE_UE) {
David Brazdil0f672f62019-12-10 10:32:29 +0000306 if (evt->u.ue_error.ignore_event) {
307 __this_cpu_dec(mce_ue_count);
308 continue;
309 }
310
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000311 if (evt->u.ue_error.physical_address_provided) {
312 unsigned long pfn;
313
314 pfn = evt->u.ue_error.physical_address >>
315 PAGE_SHIFT;
316 memory_failure(pfn, 0);
317 } else
318 pr_warn("Failed to identify bad address from "
319 "where the uncorrectable error (UE) "
320 "was generated\n");
321 }
322#endif
323 __this_cpu_dec(mce_ue_count);
324 }
325}
326/*
327 * process pending MCE event from the mce event queue. This function will be
328 * called during syscall exit.
329 */
330static void machine_check_process_queued_event(struct irq_work *work)
331{
332 int index;
333 struct machine_check_event *evt;
334
335 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
336
337 /*
338 * For now just print it to console.
339 * TODO: log this error event to FSP or nvram.
340 */
341 while (__this_cpu_read(mce_queue_count) > 0) {
342 index = __this_cpu_read(mce_queue_count) - 1;
343 evt = this_cpu_ptr(&mce_event_queue[index]);
David Brazdil0f672f62019-12-10 10:32:29 +0000344
345 if (evt->error_type == MCE_ERROR_TYPE_UE &&
346 evt->u.ue_error.ignore_event) {
347 __this_cpu_dec(mce_queue_count);
348 continue;
349 }
350 machine_check_print_event_info(evt, false, false);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000351 __this_cpu_dec(mce_queue_count);
352 }
353}
354
355void machine_check_print_event_info(struct machine_check_event *evt,
David Brazdil0f672f62019-12-10 10:32:29 +0000356 bool user_mode, bool in_guest)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000357{
David Brazdil0f672f62019-12-10 10:32:29 +0000358 const char *level, *sevstr, *subtype, *err_type, *initiator;
359 uint64_t ea = 0, pa = 0;
360 int n = 0;
361 char dar_str[50];
362 char pa_str[50];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000363 static const char *mc_ue_types[] = {
364 "Indeterminate",
365 "Instruction fetch",
366 "Page table walk ifetch",
367 "Load/Store",
368 "Page table walk Load/Store",
369 };
370 static const char *mc_slb_types[] = {
371 "Indeterminate",
372 "Parity",
373 "Multihit",
374 };
375 static const char *mc_erat_types[] = {
376 "Indeterminate",
377 "Parity",
378 "Multihit",
379 };
380 static const char *mc_tlb_types[] = {
381 "Indeterminate",
382 "Parity",
383 "Multihit",
384 };
385 static const char *mc_user_types[] = {
386 "Indeterminate",
387 "tlbie(l) invalid",
Olivier Deprez157378f2022-04-04 15:47:50 +0200388 "scv invalid",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000389 };
390 static const char *mc_ra_types[] = {
391 "Indeterminate",
392 "Instruction fetch (bad)",
393 "Instruction fetch (foreign)",
394 "Page table walk ifetch (bad)",
395 "Page table walk ifetch (foreign)",
396 "Load (bad)",
397 "Store (bad)",
398 "Page table walk Load/Store (bad)",
399 "Page table walk Load/Store (foreign)",
400 "Load/Store (foreign)",
401 };
402 static const char *mc_link_types[] = {
403 "Indeterminate",
404 "Instruction fetch (timeout)",
405 "Page table walk ifetch (timeout)",
406 "Load (timeout)",
407 "Store (timeout)",
408 "Page table walk Load/Store (timeout)",
409 };
David Brazdil0f672f62019-12-10 10:32:29 +0000410 static const char *mc_error_class[] = {
411 "Unknown",
412 "Hardware error",
413 "Probable Hardware error (some chance of software cause)",
414 "Software error",
415 "Probable Software error (some chance of hardware cause)",
416 };
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000417
418 /* Print things out */
419 if (evt->version != MCE_V1) {
420 pr_err("Machine Check Exception, Unknown event version %d !\n",
421 evt->version);
422 return;
423 }
424 switch (evt->severity) {
425 case MCE_SEV_NO_ERROR:
426 level = KERN_INFO;
427 sevstr = "Harmless";
428 break;
429 case MCE_SEV_WARNING:
430 level = KERN_WARNING;
David Brazdil0f672f62019-12-10 10:32:29 +0000431 sevstr = "Warning";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000432 break;
David Brazdil0f672f62019-12-10 10:32:29 +0000433 case MCE_SEV_SEVERE:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000434 level = KERN_ERR;
435 sevstr = "Severe";
436 break;
437 case MCE_SEV_FATAL:
438 default:
439 level = KERN_ERR;
440 sevstr = "Fatal";
441 break;
442 }
443
David Brazdil0f672f62019-12-10 10:32:29 +0000444 switch(evt->initiator) {
445 case MCE_INITIATOR_CPU:
446 initiator = "CPU";
447 break;
448 case MCE_INITIATOR_PCI:
449 initiator = "PCI";
450 break;
451 case MCE_INITIATOR_ISA:
452 initiator = "ISA";
453 break;
454 case MCE_INITIATOR_MEMORY:
455 initiator = "Memory";
456 break;
457 case MCE_INITIATOR_POWERMGM:
458 initiator = "Power Management";
459 break;
460 case MCE_INITIATOR_UNKNOWN:
461 default:
462 initiator = "Unknown";
463 break;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000464 }
465
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000466 switch (evt->error_type) {
467 case MCE_ERROR_TYPE_UE:
David Brazdil0f672f62019-12-10 10:32:29 +0000468 err_type = "UE";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000469 subtype = evt->u.ue_error.ue_error_type <
470 ARRAY_SIZE(mc_ue_types) ?
471 mc_ue_types[evt->u.ue_error.ue_error_type]
472 : "Unknown";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000473 if (evt->u.ue_error.effective_address_provided)
David Brazdil0f672f62019-12-10 10:32:29 +0000474 ea = evt->u.ue_error.effective_address;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000475 if (evt->u.ue_error.physical_address_provided)
David Brazdil0f672f62019-12-10 10:32:29 +0000476 pa = evt->u.ue_error.physical_address;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000477 break;
478 case MCE_ERROR_TYPE_SLB:
David Brazdil0f672f62019-12-10 10:32:29 +0000479 err_type = "SLB";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000480 subtype = evt->u.slb_error.slb_error_type <
481 ARRAY_SIZE(mc_slb_types) ?
482 mc_slb_types[evt->u.slb_error.slb_error_type]
483 : "Unknown";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000484 if (evt->u.slb_error.effective_address_provided)
David Brazdil0f672f62019-12-10 10:32:29 +0000485 ea = evt->u.slb_error.effective_address;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000486 break;
487 case MCE_ERROR_TYPE_ERAT:
David Brazdil0f672f62019-12-10 10:32:29 +0000488 err_type = "ERAT";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000489 subtype = evt->u.erat_error.erat_error_type <
490 ARRAY_SIZE(mc_erat_types) ?
491 mc_erat_types[evt->u.erat_error.erat_error_type]
492 : "Unknown";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000493 if (evt->u.erat_error.effective_address_provided)
David Brazdil0f672f62019-12-10 10:32:29 +0000494 ea = evt->u.erat_error.effective_address;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000495 break;
496 case MCE_ERROR_TYPE_TLB:
David Brazdil0f672f62019-12-10 10:32:29 +0000497 err_type = "TLB";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000498 subtype = evt->u.tlb_error.tlb_error_type <
499 ARRAY_SIZE(mc_tlb_types) ?
500 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
501 : "Unknown";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000502 if (evt->u.tlb_error.effective_address_provided)
David Brazdil0f672f62019-12-10 10:32:29 +0000503 ea = evt->u.tlb_error.effective_address;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000504 break;
505 case MCE_ERROR_TYPE_USER:
David Brazdil0f672f62019-12-10 10:32:29 +0000506 err_type = "User";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000507 subtype = evt->u.user_error.user_error_type <
508 ARRAY_SIZE(mc_user_types) ?
509 mc_user_types[evt->u.user_error.user_error_type]
510 : "Unknown";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000511 if (evt->u.user_error.effective_address_provided)
David Brazdil0f672f62019-12-10 10:32:29 +0000512 ea = evt->u.user_error.effective_address;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000513 break;
514 case MCE_ERROR_TYPE_RA:
David Brazdil0f672f62019-12-10 10:32:29 +0000515 err_type = "Real address";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000516 subtype = evt->u.ra_error.ra_error_type <
517 ARRAY_SIZE(mc_ra_types) ?
518 mc_ra_types[evt->u.ra_error.ra_error_type]
519 : "Unknown";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000520 if (evt->u.ra_error.effective_address_provided)
David Brazdil0f672f62019-12-10 10:32:29 +0000521 ea = evt->u.ra_error.effective_address;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000522 break;
523 case MCE_ERROR_TYPE_LINK:
David Brazdil0f672f62019-12-10 10:32:29 +0000524 err_type = "Link";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000525 subtype = evt->u.link_error.link_error_type <
526 ARRAY_SIZE(mc_link_types) ?
527 mc_link_types[evt->u.link_error.link_error_type]
528 : "Unknown";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000529 if (evt->u.link_error.effective_address_provided)
David Brazdil0f672f62019-12-10 10:32:29 +0000530 ea = evt->u.link_error.effective_address;
531 break;
532 case MCE_ERROR_TYPE_DCACHE:
533 err_type = "D-Cache";
534 subtype = "Unknown";
535 break;
536 case MCE_ERROR_TYPE_ICACHE:
537 err_type = "I-Cache";
538 subtype = "Unknown";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000539 break;
540 default:
541 case MCE_ERROR_TYPE_UNKNOWN:
David Brazdil0f672f62019-12-10 10:32:29 +0000542 err_type = "Unknown";
543 subtype = "";
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000544 break;
545 }
David Brazdil0f672f62019-12-10 10:32:29 +0000546
547 dar_str[0] = pa_str[0] = '\0';
548 if (ea && evt->srr0 != ea) {
549 /* Load/Store address */
550 n = sprintf(dar_str, "DAR: %016llx ", ea);
551 if (pa)
552 sprintf(dar_str + n, "paddr: %016llx ", pa);
553 } else if (pa) {
554 sprintf(pa_str, " paddr: %016llx", pa);
555 }
556
557 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
558 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
559 err_type, subtype, dar_str,
560 evt->disposition == MCE_DISPOSITION_RECOVERED ?
561 "Recovered" : "Not recovered");
562
563 if (in_guest || user_mode) {
564 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
565 level, evt->cpu, current->pid, current->comm,
566 in_guest ? "Guest " : "", evt->srr0, pa_str);
567 } else {
568 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
569 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
570 }
571
572 printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
573
574 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
575 mc_error_class[evt->error_class] : "Unknown";
576 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
577
578#ifdef CONFIG_PPC_BOOK3S_64
579 /* Display faulty slb contents for SLB errors. */
580 if (evt->error_type == MCE_ERROR_TYPE_SLB)
581 slb_dump_contents(local_paca->mce_faulty_slbs);
582#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000583}
584EXPORT_SYMBOL_GPL(machine_check_print_event_info);
585
586/*
587 * This function is called in real mode. Strictly no printk's please.
588 *
589 * regs->nip and regs->msr contains srr0 and ssr1.
590 */
Olivier Deprez157378f2022-04-04 15:47:50 +0200591long notrace machine_check_early(struct pt_regs *regs)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000592{
593 long handled = 0;
Olivier Deprez157378f2022-04-04 15:47:50 +0200594 u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
595
596 this_cpu_set_ftrace_enabled(0);
597 /* Do not use nmi_enter/exit for pseries hpte guest */
598 if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
599 nmi_enter();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000600
David Brazdil0f672f62019-12-10 10:32:29 +0000601 hv_nmi_check_nonrecoverable(regs);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000602
David Brazdil0f672f62019-12-10 10:32:29 +0000603 /*
604 * See if platform is capable of handling machine check.
605 */
606 if (ppc_md.machine_check_early)
607 handled = ppc_md.machine_check_early(regs);
Olivier Deprez157378f2022-04-04 15:47:50 +0200608
609 if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
610 nmi_exit();
611
612 this_cpu_set_ftrace_enabled(ftrace_enabled);
613
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000614 return handled;
615}
616
617/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
618static enum {
619 DTRIG_UNKNOWN,
620 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
621 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
622} hmer_debug_trig_function;
623
624static int init_debug_trig_function(void)
625{
626 int pvr;
627 struct device_node *cpun;
628 struct property *prop = NULL;
629 const char *str;
630
631 /* First look in the device tree */
632 preempt_disable();
633 cpun = of_get_cpu_node(smp_processor_id(), NULL);
634 if (cpun) {
635 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
636 prop, str) {
637 if (strcmp(str, "bit17-vector-ci-load") == 0)
638 hmer_debug_trig_function = DTRIG_VECTOR_CI;
639 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
640 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
641 }
642 of_node_put(cpun);
643 }
644 preempt_enable();
645
646 /* If we found the property, don't look at PVR */
647 if (prop)
648 goto out;
649
650 pvr = mfspr(SPRN_PVR);
651 /* Check for POWER9 Nimbus (scale-out) */
652 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
653 /* DD2.2 and later */
654 if ((pvr & 0xfff) >= 0x202)
655 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
656 /* DD2.0 and DD2.1 - used for vector CI load emulation */
657 else if ((pvr & 0xfff) >= 0x200)
658 hmer_debug_trig_function = DTRIG_VECTOR_CI;
659 }
660
661 out:
662 switch (hmer_debug_trig_function) {
663 case DTRIG_VECTOR_CI:
664 pr_debug("HMI debug trigger used for vector CI load\n");
665 break;
666 case DTRIG_SUSPEND_ESCAPE:
667 pr_debug("HMI debug trigger used for TM suspend escape\n");
668 break;
669 default:
670 break;
671 }
672 return 0;
673}
674__initcall(init_debug_trig_function);
675
676/*
677 * Handle HMIs that occur as a result of a debug trigger.
678 * Return values:
679 * -1 means this is not a HMI cause that we know about
680 * 0 means no further handling is required
681 * 1 means further handling is required
682 */
683long hmi_handle_debugtrig(struct pt_regs *regs)
684{
685 unsigned long hmer = mfspr(SPRN_HMER);
686 long ret = 0;
687
688 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
689 if (!((hmer & HMER_DEBUG_TRIG)
690 && hmer_debug_trig_function != DTRIG_UNKNOWN))
691 return -1;
692
693 hmer &= ~HMER_DEBUG_TRIG;
694 /* HMER is a write-AND register */
695 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
696
697 switch (hmer_debug_trig_function) {
698 case DTRIG_VECTOR_CI:
699 /*
700 * Now to avoid problems with soft-disable we
701 * only do the emulation if we are coming from
702 * host user space
703 */
704 if (regs && user_mode(regs))
705 ret = local_paca->hmi_p9_special_emu = 1;
706
707 break;
708
709 default:
710 break;
711 }
712
713 /*
714 * See if any other HMI causes remain to be handled
715 */
716 if (hmer & mfspr(SPRN_HMEER))
717 return -1;
718
719 return ret;
720}
721
722/*
723 * Return values:
724 */
725long hmi_exception_realmode(struct pt_regs *regs)
726{
727 int ret;
728
Olivier Deprez157378f2022-04-04 15:47:50 +0200729 local_paca->hmi_irqs++;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000730
731 ret = hmi_handle_debugtrig(regs);
732 if (ret >= 0)
733 return ret;
734
735 wait_for_subcore_guest_exit();
736
737 if (ppc_md.hmi_exception_early)
738 ppc_md.hmi_exception_early(regs);
739
740 wait_for_tb_resync();
741
742 return 1;
743}