blob: 4e4476b832be20221d492ebaa46866ff59773986 [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-only
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * Local APIC handling, local APIC timers
4 *
5 * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 *
7 * Fixes
8 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
9 * thanks to Eric Gilmore
10 * and Rolf G. Tews
11 * for testing these extensively.
12 * Maciej W. Rozycki : Various updates and fixes.
13 * Mikael Pettersson : Power Management for UP-APIC.
14 * Pavel Machek and
15 * Mikael Pettersson : PM converted to driver model.
16 */
17
18#include <linux/perf_event.h>
19#include <linux/kernel_stat.h>
20#include <linux/mc146818rtc.h>
21#include <linux/acpi_pmtmr.h>
22#include <linux/clockchips.h>
23#include <linux/interrupt.h>
David Brazdil0f672f62019-12-10 10:32:29 +000024#include <linux/memblock.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000025#include <linux/ftrace.h>
26#include <linux/ioport.h>
27#include <linux/export.h>
28#include <linux/syscore_ops.h>
29#include <linux/delay.h>
30#include <linux/timex.h>
31#include <linux/i8253.h>
32#include <linux/dmar.h>
33#include <linux/init.h>
34#include <linux/cpu.h>
35#include <linux/dmi.h>
36#include <linux/smp.h>
37#include <linux/mm.h>
38
39#include <asm/trace/irq_vectors.h>
40#include <asm/irq_remapping.h>
41#include <asm/perf_event.h>
42#include <asm/x86_init.h>
43#include <asm/pgalloc.h>
44#include <linux/atomic.h>
Olivier Deprez0e641232021-09-23 10:07:05 +020045#include <asm/barrier.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000046#include <asm/mpspec.h>
47#include <asm/i8259.h>
48#include <asm/proto.h>
David Brazdil0f672f62019-12-10 10:32:29 +000049#include <asm/traps.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000050#include <asm/apic.h>
51#include <asm/io_apic.h>
52#include <asm/desc.h>
53#include <asm/hpet.h>
54#include <asm/mtrr.h>
55#include <asm/time.h>
56#include <asm/smp.h>
57#include <asm/mce.h>
58#include <asm/tsc.h>
59#include <asm/hypervisor.h>
60#include <asm/cpu_device_id.h>
61#include <asm/intel-family.h>
62#include <asm/irq_regs.h>
63
64unsigned int num_processors;
65
66unsigned disabled_cpus;
67
68/* Processor that is doing the boot up */
David Brazdil0f672f62019-12-10 10:32:29 +000069unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000070EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
71
David Brazdil0f672f62019-12-10 10:32:29 +000072u8 boot_cpu_apic_version __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000073
74/*
75 * The highest APIC ID seen during enumeration.
76 */
77static unsigned int max_physical_apicid;
78
79/*
80 * Bitmask of physically existing CPUs:
81 */
82physid_mask_t phys_cpu_present_map;
83
84/*
85 * Processor to be disabled specified by kernel parameter
86 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
87 * avoid undefined behaviour caused by sending INIT from AP to BSP.
88 */
David Brazdil0f672f62019-12-10 10:32:29 +000089static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000090
91/*
92 * This variable controls which CPUs receive external NMIs. By default,
93 * external NMIs are delivered only to the BSP.
94 */
David Brazdil0f672f62019-12-10 10:32:29 +000095static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000096
97/*
98 * Map cpu index to physical APIC ID
99 */
100DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
101DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
102DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
103EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
104EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
105EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
106
107#ifdef CONFIG_X86_32
108
109/*
110 * On x86_32, the mapping between cpu and logical apicid may vary
111 * depending on apic in use. The following early percpu variable is
112 * used for the mapping. This is where the behaviors of x86_64 and 32
113 * actually diverge. Let's keep it ugly for now.
114 */
115DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
116
117/* Local APIC was disabled by the BIOS and enabled by the kernel */
David Brazdil0f672f62019-12-10 10:32:29 +0000118static int enabled_via_apicbase __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000119
120/*
121 * Handle interrupt mode configuration register (IMCR).
122 * This register controls whether the interrupt signals
123 * that reach the BSP come from the master PIC or from the
124 * local APIC. Before entering Symmetric I/O Mode, either
125 * the BIOS or the operating system must switch out of
126 * PIC Mode by changing the IMCR.
127 */
128static inline void imcr_pic_to_apic(void)
129{
130 /* select IMCR register */
131 outb(0x70, 0x22);
132 /* NMI and 8259 INTR go through APIC */
133 outb(0x01, 0x23);
134}
135
136static inline void imcr_apic_to_pic(void)
137{
138 /* select IMCR register */
139 outb(0x70, 0x22);
140 /* NMI and 8259 INTR go directly to BSP */
141 outb(0x00, 0x23);
142}
143#endif
144
145/*
146 * Knob to control our willingness to enable the local APIC.
147 *
148 * +1=force-enable
149 */
150static int force_enable_local_apic __initdata;
151
152/*
153 * APIC command line parameters
154 */
155static int __init parse_lapic(char *arg)
156{
157 if (IS_ENABLED(CONFIG_X86_32) && !arg)
158 force_enable_local_apic = 1;
159 else if (arg && !strncmp(arg, "notscdeadline", 13))
160 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
161 return 0;
162}
163early_param("lapic", parse_lapic);
164
165#ifdef CONFIG_X86_64
166static int apic_calibrate_pmtmr __initdata;
167static __init int setup_apicpmtimer(char *s)
168{
169 apic_calibrate_pmtmr = 1;
170 notsc_setup(NULL);
171 return 0;
172}
173__setup("apicpmtimer", setup_apicpmtimer);
174#endif
175
David Brazdil0f672f62019-12-10 10:32:29 +0000176unsigned long mp_lapic_addr __ro_after_init;
177int disable_apic __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000178/* Disable local APIC timer from the kernel commandline or via dmi quirk */
179static int disable_apic_timer __initdata;
180/* Local APIC timer works in C2 */
David Brazdil0f672f62019-12-10 10:32:29 +0000181int local_apic_timer_c2_ok __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000182EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
183
184/*
185 * Debug level, exported for io_apic.c
186 */
David Brazdil0f672f62019-12-10 10:32:29 +0000187int apic_verbosity __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000188
David Brazdil0f672f62019-12-10 10:32:29 +0000189int pic_mode __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000190
191/* Have we found an MP table */
David Brazdil0f672f62019-12-10 10:32:29 +0000192int smp_found_config __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000193
194static struct resource lapic_resource = {
195 .name = "Local APIC",
196 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
197};
198
David Brazdil0f672f62019-12-10 10:32:29 +0000199unsigned int lapic_timer_period = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000200
201static void apic_pm_activate(void);
202
David Brazdil0f672f62019-12-10 10:32:29 +0000203static unsigned long apic_phys __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000204
205/*
206 * Get the LAPIC version
207 */
208static inline int lapic_get_version(void)
209{
210 return GET_APIC_VERSION(apic_read(APIC_LVR));
211}
212
213/*
214 * Check, if the APIC is integrated or a separate chip
215 */
216static inline int lapic_is_integrated(void)
217{
218 return APIC_INTEGRATED(lapic_get_version());
219}
220
221/*
222 * Check, whether this is a modern or a first generation APIC
223 */
224static int modern_apic(void)
225{
226 /* AMD systems use old APIC versions, so check the CPU */
227 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
228 boot_cpu_data.x86 >= 0xf)
229 return 1;
David Brazdil0f672f62019-12-10 10:32:29 +0000230
231 /* Hygon systems use modern APIC */
232 if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
233 return 1;
234
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000235 return lapic_get_version() >= 0x14;
236}
237
238/*
239 * right after this call apic become NOOP driven
240 * so apic->write/read doesn't do anything
241 */
242static void __init apic_disable(void)
243{
244 pr_info("APIC: switched to apic NOOP\n");
245 apic = &apic_noop;
246}
247
248void native_apic_wait_icr_idle(void)
249{
250 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
251 cpu_relax();
252}
253
254u32 native_safe_apic_wait_icr_idle(void)
255{
256 u32 send_status;
257 int timeout;
258
259 timeout = 0;
260 do {
261 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
262 if (!send_status)
263 break;
264 inc_irq_stat(icr_read_retry_count);
265 udelay(100);
266 } while (timeout++ < 1000);
267
268 return send_status;
269}
270
271void native_apic_icr_write(u32 low, u32 id)
272{
273 unsigned long flags;
274
275 local_irq_save(flags);
276 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
277 apic_write(APIC_ICR, low);
278 local_irq_restore(flags);
279}
280
281u64 native_apic_icr_read(void)
282{
283 u32 icr1, icr2;
284
285 icr2 = apic_read(APIC_ICR2);
286 icr1 = apic_read(APIC_ICR);
287
288 return icr1 | ((u64)icr2 << 32);
289}
290
291#ifdef CONFIG_X86_32
292/**
293 * get_physical_broadcast - Get number of physical broadcast IDs
294 */
295int get_physical_broadcast(void)
296{
297 return modern_apic() ? 0xff : 0xf;
298}
299#endif
300
301/**
302 * lapic_get_maxlvt - get the maximum number of local vector table entries
303 */
304int lapic_get_maxlvt(void)
305{
306 /*
307 * - we always have APIC integrated on 64bit mode
308 * - 82489DXs do not report # of LVT entries
309 */
310 return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
311}
312
313/*
314 * Local APIC timer
315 */
316
317/* Clock divisor */
318#define APIC_DIVISOR 16
319#define TSC_DIVISOR 8
320
321/*
322 * This function sets up the local APIC timer, with a timeout of
323 * 'clocks' APIC bus clock. During calibration we actually call
324 * this function twice on the boot CPU, once with a bogus timeout
325 * value, second time for real. The other (noncalibrating) CPUs
326 * call this function only once, with the real, calibrated value.
327 *
328 * We do reads before writes even if unnecessary, to get around the
329 * P5 APIC double write bug.
330 */
331static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
332{
333 unsigned int lvtt_value, tmp_value;
334
335 lvtt_value = LOCAL_TIMER_VECTOR;
336 if (!oneshot)
337 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
338 else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
339 lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
340
341 if (!lapic_is_integrated())
342 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
343
344 if (!irqen)
345 lvtt_value |= APIC_LVT_MASKED;
346
347 apic_write(APIC_LVTT, lvtt_value);
348
349 if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
350 /*
351 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
352 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
353 * According to Intel, MFENCE can do the serialization here.
354 */
355 asm volatile("mfence" : : : "memory");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000356 return;
357 }
358
359 /*
360 * Divide PICLK by 16
361 */
362 tmp_value = apic_read(APIC_TDCR);
363 apic_write(APIC_TDCR,
364 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
365 APIC_TDR_DIV_16);
366
367 if (!oneshot)
368 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
369}
370
371/*
372 * Setup extended LVT, AMD specific
373 *
374 * Software should use the LVT offsets the BIOS provides. The offsets
375 * are determined by the subsystems using it like those for MCE
376 * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts
377 * are supported. Beginning with family 10h at least 4 offsets are
378 * available.
379 *
380 * Since the offsets must be consistent for all cores, we keep track
381 * of the LVT offsets in software and reserve the offset for the same
382 * vector also to be used on other cores. An offset is freed by
383 * setting the entry to APIC_EILVT_MASKED.
384 *
385 * If the BIOS is right, there should be no conflicts. Otherwise a
386 * "[Firmware Bug]: ..." error message is generated. However, if
387 * software does not properly determines the offsets, it is not
388 * necessarily a BIOS bug.
389 */
390
391static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
392
393static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
394{
395 return (old & APIC_EILVT_MASKED)
396 || (new == APIC_EILVT_MASKED)
397 || ((new & ~APIC_EILVT_MASKED) == old);
398}
399
400static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
401{
402 unsigned int rsvd, vector;
403
404 if (offset >= APIC_EILVT_NR_MAX)
405 return ~0;
406
407 rsvd = atomic_read(&eilvt_offsets[offset]);
408 do {
409 vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */
410 if (vector && !eilvt_entry_is_changeable(vector, new))
411 /* may not change if vectors are different */
412 return rsvd;
413 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
414 } while (rsvd != new);
415
416 rsvd &= ~APIC_EILVT_MASKED;
417 if (rsvd && rsvd != vector)
418 pr_info("LVT offset %d assigned for vector 0x%02x\n",
419 offset, rsvd);
420
421 return new;
422}
423
424/*
425 * If mask=1, the LVT entry does not generate interrupts while mask=0
426 * enables the vector. See also the BKDGs. Must be called with
427 * preemption disabled.
428 */
429
430int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
431{
432 unsigned long reg = APIC_EILVTn(offset);
433 unsigned int new, old, reserved;
434
435 new = (mask << 16) | (msg_type << 8) | vector;
436 old = apic_read(reg);
437 reserved = reserve_eilvt_offset(offset, new);
438
439 if (reserved != new) {
440 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
441 "vector 0x%x, but the register is already in use for "
442 "vector 0x%x on another cpu\n",
443 smp_processor_id(), reg, offset, new, reserved);
444 return -EINVAL;
445 }
446
447 if (!eilvt_entry_is_changeable(old, new)) {
448 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
449 "vector 0x%x, but the register is already in use for "
450 "vector 0x%x on this cpu\n",
451 smp_processor_id(), reg, offset, new, old);
452 return -EBUSY;
453 }
454
455 apic_write(reg, new);
456
457 return 0;
458}
459EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
460
461/*
462 * Program the next event, relative to now
463 */
464static int lapic_next_event(unsigned long delta,
465 struct clock_event_device *evt)
466{
467 apic_write(APIC_TMICT, delta);
468 return 0;
469}
470
471static int lapic_next_deadline(unsigned long delta,
472 struct clock_event_device *evt)
473{
474 u64 tsc;
475
Olivier Deprez0e641232021-09-23 10:07:05 +0200476 /* This MSR is special and need a special fence: */
477 weak_wrmsr_fence();
478
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000479 tsc = rdtsc();
480 wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
481 return 0;
482}
483
484static int lapic_timer_shutdown(struct clock_event_device *evt)
485{
486 unsigned int v;
487
488 /* Lapic used as dummy for broadcast ? */
489 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
490 return 0;
491
492 v = apic_read(APIC_LVTT);
493 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
494 apic_write(APIC_LVTT, v);
495 apic_write(APIC_TMICT, 0);
496 return 0;
497}
498
499static inline int
500lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
501{
502 /* Lapic used as dummy for broadcast ? */
503 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
504 return 0;
505
David Brazdil0f672f62019-12-10 10:32:29 +0000506 __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000507 return 0;
508}
509
510static int lapic_timer_set_periodic(struct clock_event_device *evt)
511{
512 return lapic_timer_set_periodic_oneshot(evt, false);
513}
514
515static int lapic_timer_set_oneshot(struct clock_event_device *evt)
516{
517 return lapic_timer_set_periodic_oneshot(evt, true);
518}
519
520/*
521 * Local APIC timer broadcast function
522 */
523static void lapic_timer_broadcast(const struct cpumask *mask)
524{
525#ifdef CONFIG_SMP
526 apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
527#endif
528}
529
530
531/*
532 * The local apic timer can be used for any function which is CPU local.
533 */
534static struct clock_event_device lapic_clockevent = {
535 .name = "lapic",
536 .features = CLOCK_EVT_FEAT_PERIODIC |
537 CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
538 | CLOCK_EVT_FEAT_DUMMY,
539 .shift = 32,
540 .set_state_shutdown = lapic_timer_shutdown,
541 .set_state_periodic = lapic_timer_set_periodic,
542 .set_state_oneshot = lapic_timer_set_oneshot,
543 .set_state_oneshot_stopped = lapic_timer_shutdown,
544 .set_next_event = lapic_next_event,
545 .broadcast = lapic_timer_broadcast,
546 .rating = 100,
547 .irq = -1,
548};
549static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
550
551#define DEADLINE_MODEL_MATCH_FUNC(model, func) \
552 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func }
553
554#define DEADLINE_MODEL_MATCH_REV(model, rev) \
555 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev }
556
Olivier Deprez0e641232021-09-23 10:07:05 +0200557static __init u32 hsx_deadline_rev(void)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000558{
559 switch (boot_cpu_data.x86_stepping) {
560 case 0x02: return 0x3a; /* EP */
561 case 0x04: return 0x0f; /* EX */
562 }
563
564 return ~0U;
565}
566
Olivier Deprez0e641232021-09-23 10:07:05 +0200567static __init u32 bdx_deadline_rev(void)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000568{
569 switch (boot_cpu_data.x86_stepping) {
570 case 0x02: return 0x00000011;
571 case 0x03: return 0x0700000e;
572 case 0x04: return 0x0f00000c;
573 case 0x05: return 0x0e000003;
574 }
575
576 return ~0U;
577}
578
Olivier Deprez0e641232021-09-23 10:07:05 +0200579static __init u32 skx_deadline_rev(void)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000580{
581 switch (boot_cpu_data.x86_stepping) {
582 case 0x03: return 0x01000136;
583 case 0x04: return 0x02000014;
584 }
585
586 if (boot_cpu_data.x86_stepping > 4)
587 return 0;
588
589 return ~0U;
590}
591
Olivier Deprez0e641232021-09-23 10:07:05 +0200592static const struct x86_cpu_id deadline_match[] __initconst = {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000593 DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
594 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
David Brazdil0f672f62019-12-10 10:32:29 +0000595 DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000596 DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
597
David Brazdil0f672f62019-12-10 10:32:29 +0000598 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22),
599 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20),
600 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000601
David Brazdil0f672f62019-12-10 10:32:29 +0000602 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25),
603 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000604
David Brazdil0f672f62019-12-10 10:32:29 +0000605 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2),
606 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000607
David Brazdil0f672f62019-12-10 10:32:29 +0000608 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52),
609 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000610
611 {},
612};
613
Olivier Deprez0e641232021-09-23 10:07:05 +0200614static __init bool apic_validate_deadline_timer(void)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000615{
616 const struct x86_cpu_id *m;
617 u32 rev;
618
Olivier Deprez0e641232021-09-23 10:07:05 +0200619 if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
620 return false;
621 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
622 return true;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000623
624 m = x86_match_cpu(deadline_match);
625 if (!m)
Olivier Deprez0e641232021-09-23 10:07:05 +0200626 return true;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000627
628 /*
629 * Function pointers will have the MSB set due to address layout,
630 * immediate revisions will not.
631 */
632 if ((long)m->driver_data < 0)
633 rev = ((u32 (*)(void))(m->driver_data))();
634 else
635 rev = (u32)m->driver_data;
636
637 if (boot_cpu_data.microcode >= rev)
Olivier Deprez0e641232021-09-23 10:07:05 +0200638 return true;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000639
640 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
641 pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
642 "please update microcode to version: 0x%x (or later)\n", rev);
Olivier Deprez0e641232021-09-23 10:07:05 +0200643 return false;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000644}
645
646/*
647 * Setup the local APIC timer for this CPU. Copy the initialized values
648 * of the boot CPU and register the clock event in the framework.
649 */
650static void setup_APIC_timer(void)
651{
652 struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
653
654 if (this_cpu_has(X86_FEATURE_ARAT)) {
655 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
656 /* Make LAPIC timer preferrable over percpu HPET */
657 lapic_clockevent.rating = 150;
658 }
659
660 memcpy(levt, &lapic_clockevent, sizeof(*levt));
661 levt->cpumask = cpumask_of(smp_processor_id());
662
663 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
664 levt->name = "lapic-deadline";
665 levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
666 CLOCK_EVT_FEAT_DUMMY);
667 levt->set_next_event = lapic_next_deadline;
668 clockevents_config_and_register(levt,
669 tsc_khz * (1000 / TSC_DIVISOR),
670 0xF, ~0UL);
671 } else
672 clockevents_register_device(levt);
673}
674
675/*
676 * Install the updated TSC frequency from recalibration at the TSC
677 * deadline clockevent devices.
678 */
679static void __lapic_update_tsc_freq(void *info)
680{
681 struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
682
683 if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
684 return;
685
686 clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
687}
688
689void lapic_update_tsc_freq(void)
690{
691 /*
692 * The clockevent device's ->mult and ->shift can both be
693 * changed. In order to avoid races, schedule the frequency
694 * update code on each CPU.
695 */
696 on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
697}
698
699/*
700 * In this functions we calibrate APIC bus clocks to the external timer.
701 *
702 * We want to do the calibration only once since we want to have local timer
703 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
704 * frequency.
705 *
706 * This was previously done by reading the PIT/HPET and waiting for a wrap
707 * around to find out, that a tick has elapsed. I have a box, where the PIT
708 * readout is broken, so it never gets out of the wait loop again. This was
709 * also reported by others.
710 *
711 * Monitoring the jiffies value is inaccurate and the clockevents
712 * infrastructure allows us to do a simple substitution of the interrupt
713 * handler.
714 *
715 * The calibration routine also uses the pm_timer when possible, as the PIT
716 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
717 * back to normal later in the boot process).
718 */
719
720#define LAPIC_CAL_LOOPS (HZ/10)
721
722static __initdata int lapic_cal_loops = -1;
723static __initdata long lapic_cal_t1, lapic_cal_t2;
724static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
725static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
726static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
727
728/*
David Brazdil0f672f62019-12-10 10:32:29 +0000729 * Temporary interrupt handler and polled calibration function.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000730 */
731static void __init lapic_cal_handler(struct clock_event_device *dev)
732{
733 unsigned long long tsc = 0;
734 long tapic = apic_read(APIC_TMCCT);
735 unsigned long pm = acpi_pm_read_early();
736
737 if (boot_cpu_has(X86_FEATURE_TSC))
738 tsc = rdtsc();
739
740 switch (lapic_cal_loops++) {
741 case 0:
742 lapic_cal_t1 = tapic;
743 lapic_cal_tsc1 = tsc;
744 lapic_cal_pm1 = pm;
745 lapic_cal_j1 = jiffies;
746 break;
747
748 case LAPIC_CAL_LOOPS:
749 lapic_cal_t2 = tapic;
750 lapic_cal_tsc2 = tsc;
751 if (pm < lapic_cal_pm1)
752 pm += ACPI_PM_OVRRUN;
753 lapic_cal_pm2 = pm;
754 lapic_cal_j2 = jiffies;
755 break;
756 }
757}
758
759static int __init
760calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
761{
762 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
763 const long pm_thresh = pm_100ms / 100;
764 unsigned long mult;
765 u64 res;
766
767#ifndef CONFIG_X86_PM_TIMER
768 return -1;
769#endif
770
771 apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
772
773 /* Check, if the PM timer is available */
774 if (!deltapm)
775 return -1;
776
777 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
778
779 if (deltapm > (pm_100ms - pm_thresh) &&
780 deltapm < (pm_100ms + pm_thresh)) {
781 apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
782 return 0;
783 }
784
785 res = (((u64)deltapm) * mult) >> 22;
786 do_div(res, 1000000);
787 pr_warning("APIC calibration not consistent "
788 "with PM-Timer: %ldms instead of 100ms\n",(long)res);
789
790 /* Correct the lapic counter value */
791 res = (((u64)(*delta)) * pm_100ms);
792 do_div(res, deltapm);
793 pr_info("APIC delta adjusted to PM-Timer: "
794 "%lu (%ld)\n", (unsigned long)res, *delta);
795 *delta = (long)res;
796
797 /* Correct the tsc counter value */
798 if (boot_cpu_has(X86_FEATURE_TSC)) {
799 res = (((u64)(*deltatsc)) * pm_100ms);
800 do_div(res, deltapm);
801 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
802 "PM-Timer: %lu (%ld)\n",
803 (unsigned long)res, *deltatsc);
804 *deltatsc = (long)res;
805 }
806
807 return 0;
808}
809
David Brazdil0f672f62019-12-10 10:32:29 +0000810static int __init lapic_init_clockevent(void)
811{
812 if (!lapic_timer_period)
813 return -1;
814
815 /* Calculate the scaled math multiplication factor */
816 lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
817 TICK_NSEC, lapic_clockevent.shift);
818 lapic_clockevent.max_delta_ns =
819 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
820 lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
821 lapic_clockevent.min_delta_ns =
822 clockevent_delta2ns(0xF, &lapic_clockevent);
823 lapic_clockevent.min_delta_ticks = 0xF;
824
825 return 0;
826}
827
828bool __init apic_needs_pit(void)
829{
830 /*
831 * If the frequencies are not known, PIT is required for both TSC
832 * and apic timer calibration.
833 */
834 if (!tsc_khz || !cpu_khz)
835 return true;
836
Olivier Deprez0e641232021-09-23 10:07:05 +0200837 /* Is there an APIC at all or is it disabled? */
838 if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic)
839 return true;
840
841 /*
842 * If interrupt delivery mode is legacy PIC or virtual wire without
843 * configuration, the local APIC timer wont be set up. Make sure
844 * that the PIT is initialized.
845 */
846 if (apic_intr_mode == APIC_PIC ||
847 apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
David Brazdil0f672f62019-12-10 10:32:29 +0000848 return true;
849
850 /* Virt guests may lack ARAT, but still have DEADLINE */
851 if (!boot_cpu_has(X86_FEATURE_ARAT))
852 return true;
853
854 /* Deadline timer is based on TSC so no further PIT action required */
855 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
856 return false;
857
858 /* APIC timer disabled? */
859 if (disable_apic_timer)
860 return true;
861 /*
862 * The APIC timer frequency is known already, no PIT calibration
863 * required. If unknown, let the PIT be initialized.
864 */
865 return lapic_timer_period == 0;
866}
867
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000868static int __init calibrate_APIC_clock(void)
869{
870 struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
David Brazdil0f672f62019-12-10 10:32:29 +0000871 u64 tsc_perj = 0, tsc_start = 0;
872 unsigned long jif_start;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000873 unsigned long deltaj;
874 long delta, deltatsc;
875 int pm_referenced = 0;
876
David Brazdil0f672f62019-12-10 10:32:29 +0000877 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
878 return 0;
879
880 /*
881 * Check if lapic timer has already been calibrated by platform
882 * specific routine, such as tsc calibration code. If so just fill
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000883 * in the clockevent structure and return.
884 */
David Brazdil0f672f62019-12-10 10:32:29 +0000885 if (!lapic_init_clockevent()) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000886 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
David Brazdil0f672f62019-12-10 10:32:29 +0000887 lapic_timer_period);
888 /*
889 * Direct calibration methods must have an always running
890 * local APIC timer, no need for broadcast timer.
891 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000892 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
893 return 0;
894 }
895
896 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
897 "calibrating APIC timer ...\n");
898
David Brazdil0f672f62019-12-10 10:32:29 +0000899 /*
900 * There are platforms w/o global clockevent devices. Instead of
901 * making the calibration conditional on that, use a polling based
902 * approach everywhere.
903 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000904 local_irq_disable();
905
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000906 /*
907 * Setup the APIC counter to maximum. There is no way the lapic
908 * can underflow in the 100ms detection time frame
909 */
910 __setup_APIC_LVTT(0xffffffff, 0, 0);
911
David Brazdil0f672f62019-12-10 10:32:29 +0000912 /*
913 * Methods to terminate the calibration loop:
914 * 1) Global clockevent if available (jiffies)
915 * 2) TSC if available and frequency is known
916 */
917 jif_start = READ_ONCE(jiffies);
918
919 if (tsc_khz) {
920 tsc_start = rdtsc();
921 tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
922 }
923
924 /*
925 * Enable interrupts so the tick can fire, if a global
926 * clockevent device is available
927 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000928 local_irq_enable();
929
David Brazdil0f672f62019-12-10 10:32:29 +0000930 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
931 /* Wait for a tick to elapse */
932 while (1) {
933 if (tsc_khz) {
934 u64 tsc_now = rdtsc();
935 if ((tsc_now - tsc_start) >= tsc_perj) {
936 tsc_start += tsc_perj;
937 break;
938 }
939 } else {
940 unsigned long jif_now = READ_ONCE(jiffies);
941
942 if (time_after(jif_now, jif_start)) {
943 jif_start = jif_now;
944 break;
945 }
946 }
947 cpu_relax();
948 }
949
950 /* Invoke the calibration routine */
951 local_irq_disable();
952 lapic_cal_handler(NULL);
953 local_irq_enable();
954 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000955
956 local_irq_disable();
957
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000958 /* Build delta t1-t2 as apic timer counts down */
959 delta = lapic_cal_t1 - lapic_cal_t2;
960 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
961
962 deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
963
964 /* we trust the PM based calibration if possible */
965 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
966 &delta, &deltatsc);
967
David Brazdil0f672f62019-12-10 10:32:29 +0000968 lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
969 lapic_init_clockevent();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000970
971 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
972 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
973 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
David Brazdil0f672f62019-12-10 10:32:29 +0000974 lapic_timer_period);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000975
976 if (boot_cpu_has(X86_FEATURE_TSC)) {
977 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
978 "%ld.%04ld MHz.\n",
979 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
980 (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
981 }
982
983 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
984 "%u.%04u MHz.\n",
David Brazdil0f672f62019-12-10 10:32:29 +0000985 lapic_timer_period / (1000000 / HZ),
986 lapic_timer_period % (1000000 / HZ));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000987
988 /*
989 * Do a sanity check on the APIC calibration result
990 */
David Brazdil0f672f62019-12-10 10:32:29 +0000991 if (lapic_timer_period < (1000000 / HZ)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000992 local_irq_enable();
993 pr_warning("APIC frequency too slow, disabling apic timer\n");
994 return -1;
995 }
996
997 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
998
999 /*
David Brazdil0f672f62019-12-10 10:32:29 +00001000 * PM timer calibration failed or not turned on so lets try APIC
1001 * timer based calibration, if a global clockevent device is
1002 * available.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001003 */
David Brazdil0f672f62019-12-10 10:32:29 +00001004 if (!pm_referenced && global_clock_event) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001005 apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
1006
1007 /*
1008 * Setup the apic timer manually
1009 */
1010 levt->event_handler = lapic_cal_handler;
1011 lapic_timer_set_periodic(levt);
1012 lapic_cal_loops = -1;
1013
1014 /* Let the interrupts run */
1015 local_irq_enable();
1016
1017 while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
1018 cpu_relax();
1019
1020 /* Stop the lapic timer */
1021 local_irq_disable();
1022 lapic_timer_shutdown(levt);
1023
1024 /* Jiffies delta */
1025 deltaj = lapic_cal_j2 - lapic_cal_j1;
1026 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
1027
1028 /* Check, if the jiffies result is consistent */
1029 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
1030 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
1031 else
1032 levt->features |= CLOCK_EVT_FEAT_DUMMY;
1033 }
1034 local_irq_enable();
1035
1036 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
1037 pr_warning("APIC timer disabled due to verification failure\n");
1038 return -1;
1039 }
1040
1041 return 0;
1042}
1043
1044/*
1045 * Setup the boot APIC
1046 *
1047 * Calibrate and verify the result.
1048 */
1049void __init setup_boot_APIC_clock(void)
1050{
1051 /*
1052 * The local apic timer can be disabled via the kernel
1053 * commandline or from the CPU detection code. Register the lapic
1054 * timer as a dummy clock event source on SMP systems, so the
1055 * broadcast mechanism is used. On UP systems simply ignore it.
1056 */
1057 if (disable_apic_timer) {
1058 pr_info("Disabling APIC timer\n");
1059 /* No broadcast on UP ! */
1060 if (num_possible_cpus() > 1) {
1061 lapic_clockevent.mult = 1;
1062 setup_APIC_timer();
1063 }
1064 return;
1065 }
1066
1067 if (calibrate_APIC_clock()) {
1068 /* No broadcast on UP ! */
1069 if (num_possible_cpus() > 1)
1070 setup_APIC_timer();
1071 return;
1072 }
1073
1074 /*
1075 * If nmi_watchdog is set to IO_APIC, we need the
1076 * PIT/HPET going. Otherwise register lapic as a dummy
1077 * device.
1078 */
1079 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
1080
1081 /* Setup the lapic or request the broadcast */
1082 setup_APIC_timer();
1083 amd_e400_c1e_apic_setup();
1084}
1085
1086void setup_secondary_APIC_clock(void)
1087{
1088 setup_APIC_timer();
1089 amd_e400_c1e_apic_setup();
1090}
1091
1092/*
1093 * The guts of the apic timer interrupt
1094 */
1095static void local_apic_timer_interrupt(void)
1096{
1097 struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
1098
1099 /*
1100 * Normally we should not be here till LAPIC has been initialized but
1101 * in some cases like kdump, its possible that there is a pending LAPIC
1102 * timer interrupt from previous kernel's context and is delivered in
1103 * new kernel the moment interrupts are enabled.
1104 *
1105 * Interrupts are enabled early and LAPIC is setup much later, hence
1106 * its possible that when we get here evt->event_handler is NULL.
1107 * Check for event_handler being NULL and discard the interrupt as
1108 * spurious.
1109 */
1110 if (!evt->event_handler) {
1111 pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
1112 smp_processor_id());
1113 /* Switch it off */
1114 lapic_timer_shutdown(evt);
1115 return;
1116 }
1117
1118 /*
1119 * the NMI deadlock-detector uses this.
1120 */
1121 inc_irq_stat(apic_timer_irqs);
1122
1123 evt->event_handler(evt);
1124}
1125
1126/*
1127 * Local APIC timer interrupt. This is the most natural way for doing
1128 * local interrupts, but local timer interrupts can be emulated by
1129 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1130 *
1131 * [ if a single-CPU system runs an SMP kernel then we call the local
1132 * interrupt as well. Thus we cannot inline the local irq ... ]
1133 */
1134__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
1135{
1136 struct pt_regs *old_regs = set_irq_regs(regs);
1137
1138 /*
1139 * NOTE! We'd better ACK the irq immediately,
1140 * because timer handling can be slow.
1141 *
1142 * update_process_times() expects us to have done irq_enter().
1143 * Besides, if we don't timer interrupts ignore the global
1144 * interrupt lock, which is the WrongThing (tm) to do.
1145 */
1146 entering_ack_irq();
1147 trace_local_timer_entry(LOCAL_TIMER_VECTOR);
1148 local_apic_timer_interrupt();
1149 trace_local_timer_exit(LOCAL_TIMER_VECTOR);
1150 exiting_irq();
1151
1152 set_irq_regs(old_regs);
1153}
1154
1155int setup_profiling_timer(unsigned int multiplier)
1156{
1157 return -EINVAL;
1158}
1159
1160/*
1161 * Local APIC start and shutdown
1162 */
1163
1164/**
1165 * clear_local_APIC - shutdown the local APIC
1166 *
1167 * This is called, when a CPU is disabled and before rebooting, so the state of
1168 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
1169 * leftovers during boot.
1170 */
1171void clear_local_APIC(void)
1172{
1173 int maxlvt;
1174 u32 v;
1175
1176 /* APIC hasn't been mapped yet */
1177 if (!x2apic_mode && !apic_phys)
1178 return;
1179
1180 maxlvt = lapic_get_maxlvt();
1181 /*
1182 * Masking an LVT entry can trigger a local APIC error
1183 * if the vector is zero. Mask LVTERR first to prevent this.
1184 */
1185 if (maxlvt >= 3) {
1186 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
1187 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
1188 }
1189 /*
1190 * Careful: we have to set masks only first to deassert
1191 * any level-triggered sources.
1192 */
1193 v = apic_read(APIC_LVTT);
1194 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1195 v = apic_read(APIC_LVT0);
1196 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1197 v = apic_read(APIC_LVT1);
1198 apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
1199 if (maxlvt >= 4) {
1200 v = apic_read(APIC_LVTPC);
1201 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
1202 }
1203
1204 /* lets not touch this if we didn't frob it */
1205#ifdef CONFIG_X86_THERMAL_VECTOR
1206 if (maxlvt >= 5) {
1207 v = apic_read(APIC_LVTTHMR);
1208 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
1209 }
1210#endif
1211#ifdef CONFIG_X86_MCE_INTEL
1212 if (maxlvt >= 6) {
1213 v = apic_read(APIC_LVTCMCI);
1214 if (!(v & APIC_LVT_MASKED))
1215 apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
1216 }
1217#endif
1218
1219 /*
1220 * Clean APIC state for other OSs:
1221 */
1222 apic_write(APIC_LVTT, APIC_LVT_MASKED);
1223 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1224 apic_write(APIC_LVT1, APIC_LVT_MASKED);
1225 if (maxlvt >= 3)
1226 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1227 if (maxlvt >= 4)
1228 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1229
1230 /* Integrated APIC (!82489DX) ? */
1231 if (lapic_is_integrated()) {
1232 if (maxlvt > 3)
1233 /* Clear ESR due to Pentium errata 3AP and 11AP */
1234 apic_write(APIC_ESR, 0);
1235 apic_read(APIC_ESR);
1236 }
1237}
1238
1239/**
David Brazdil0f672f62019-12-10 10:32:29 +00001240 * apic_soft_disable - Clears and software disables the local APIC on hotplug
1241 *
1242 * Contrary to disable_local_APIC() this does not touch the enable bit in
1243 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
1244 * bus would require a hardware reset as the APIC would lose track of bus
1245 * arbitration. On systems with FSB delivery APICBASE could be disabled,
1246 * but it has to be guaranteed that no interrupt is sent to the APIC while
1247 * in that state and it's not clear from the SDM whether it still responds
1248 * to INIT/SIPI messages. Stay on the safe side and use software disable.
1249 */
1250void apic_soft_disable(void)
1251{
1252 u32 value;
1253
1254 clear_local_APIC();
1255
1256 /* Soft disable APIC (implies clearing of registers for 82489DX!). */
1257 value = apic_read(APIC_SPIV);
1258 value &= ~APIC_SPIV_APIC_ENABLED;
1259 apic_write(APIC_SPIV, value);
1260}
1261
1262/**
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001263 * disable_local_APIC - clear and disable the local APIC
1264 */
1265void disable_local_APIC(void)
1266{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001267 /* APIC hasn't been mapped yet */
1268 if (!x2apic_mode && !apic_phys)
1269 return;
1270
David Brazdil0f672f62019-12-10 10:32:29 +00001271 apic_soft_disable();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001272
1273#ifdef CONFIG_X86_32
1274 /*
1275 * When LAPIC was disabled by the BIOS and enabled by the kernel,
1276 * restore the disabled state.
1277 */
1278 if (enabled_via_apicbase) {
1279 unsigned int l, h;
1280
1281 rdmsr(MSR_IA32_APICBASE, l, h);
1282 l &= ~MSR_IA32_APICBASE_ENABLE;
1283 wrmsr(MSR_IA32_APICBASE, l, h);
1284 }
1285#endif
1286}
1287
1288/*
1289 * If Linux enabled the LAPIC against the BIOS default disable it down before
1290 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
1291 * not power-off. Additionally clear all LVT entries before disable_local_APIC
1292 * for the case where Linux didn't enable the LAPIC.
1293 */
1294void lapic_shutdown(void)
1295{
1296 unsigned long flags;
1297
1298 if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1299 return;
1300
1301 local_irq_save(flags);
1302
1303#ifdef CONFIG_X86_32
1304 if (!enabled_via_apicbase)
1305 clear_local_APIC();
1306 else
1307#endif
1308 disable_local_APIC();
1309
1310
1311 local_irq_restore(flags);
1312}
1313
1314/**
1315 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1316 */
1317void __init sync_Arb_IDs(void)
1318{
1319 /*
1320 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1321 * needed on AMD.
1322 */
1323 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1324 return;
1325
1326 /*
1327 * Wait for idle.
1328 */
1329 apic_wait_icr_idle();
1330
1331 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
1332 apic_write(APIC_ICR, APIC_DEST_ALLINC |
1333 APIC_INT_LEVELTRIG | APIC_DM_INIT);
1334}
1335
David Brazdil0f672f62019-12-10 10:32:29 +00001336enum apic_intr_mode_id apic_intr_mode __ro_after_init;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001337
Olivier Deprez0e641232021-09-23 10:07:05 +02001338static int __init __apic_intr_mode_select(void)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001339{
1340 /* Check kernel option */
1341 if (disable_apic) {
1342 pr_info("APIC disabled via kernel command line\n");
1343 return APIC_PIC;
1344 }
1345
1346 /* Check BIOS */
1347#ifdef CONFIG_X86_64
1348 /* On 64-bit, the APIC must be integrated, Check local APIC only */
1349 if (!boot_cpu_has(X86_FEATURE_APIC)) {
1350 disable_apic = 1;
1351 pr_info("APIC disabled by BIOS\n");
1352 return APIC_PIC;
1353 }
1354#else
1355 /* On 32-bit, the APIC may be integrated APIC or 82489DX */
1356
1357 /* Neither 82489DX nor integrated APIC ? */
1358 if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
1359 disable_apic = 1;
1360 return APIC_PIC;
1361 }
1362
1363 /* If the BIOS pretends there is an integrated APIC ? */
1364 if (!boot_cpu_has(X86_FEATURE_APIC) &&
1365 APIC_INTEGRATED(boot_cpu_apic_version)) {
1366 disable_apic = 1;
1367 pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
1368 boot_cpu_physical_apicid);
1369 return APIC_PIC;
1370 }
1371#endif
1372
1373 /* Check MP table or ACPI MADT configuration */
1374 if (!smp_found_config) {
1375 disable_ioapic_support();
1376 if (!acpi_lapic) {
1377 pr_info("APIC: ACPI MADT or MP tables are not detected\n");
1378 return APIC_VIRTUAL_WIRE_NO_CONFIG;
1379 }
1380 return APIC_VIRTUAL_WIRE;
1381 }
1382
1383#ifdef CONFIG_SMP
1384 /* If SMP should be disabled, then really disable it! */
1385 if (!setup_max_cpus) {
1386 pr_info("APIC: SMP mode deactivated\n");
1387 return APIC_SYMMETRIC_IO_NO_ROUTING;
1388 }
1389
1390 if (read_apic_id() != boot_cpu_physical_apicid) {
1391 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1392 read_apic_id(), boot_cpu_physical_apicid);
1393 /* Or can we switch back to PIC here? */
1394 }
1395#endif
1396
1397 return APIC_SYMMETRIC_IO;
1398}
1399
Olivier Deprez0e641232021-09-23 10:07:05 +02001400/* Select the interrupt delivery mode for the BSP */
1401void __init apic_intr_mode_select(void)
1402{
1403 apic_intr_mode = __apic_intr_mode_select();
1404}
1405
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001406/*
1407 * An initial setup of the virtual wire mode.
1408 */
1409void __init init_bsp_APIC(void)
1410{
1411 unsigned int value;
1412
1413 /*
1414 * Don't do the setup now if we have a SMP BIOS as the
1415 * through-I/O-APIC virtual wire mode might be active.
1416 */
1417 if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1418 return;
1419
1420 /*
1421 * Do not trust the local APIC being empty at bootup.
1422 */
1423 clear_local_APIC();
1424
1425 /*
1426 * Enable APIC.
1427 */
1428 value = apic_read(APIC_SPIV);
1429 value &= ~APIC_VECTOR_MASK;
1430 value |= APIC_SPIV_APIC_ENABLED;
1431
1432#ifdef CONFIG_X86_32
1433 /* This bit is reserved on P4/Xeon and should be cleared */
1434 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1435 (boot_cpu_data.x86 == 15))
1436 value &= ~APIC_SPIV_FOCUS_DISABLED;
1437 else
1438#endif
1439 value |= APIC_SPIV_FOCUS_DISABLED;
1440 value |= SPURIOUS_APIC_VECTOR;
1441 apic_write(APIC_SPIV, value);
1442
1443 /*
1444 * Set up the virtual wire mode.
1445 */
1446 apic_write(APIC_LVT0, APIC_DM_EXTINT);
1447 value = APIC_DM_NMI;
1448 if (!lapic_is_integrated()) /* 82489DX */
1449 value |= APIC_LVT_LEVEL_TRIGGER;
1450 if (apic_extnmi == APIC_EXTNMI_NONE)
1451 value |= APIC_LVT_MASKED;
1452 apic_write(APIC_LVT1, value);
1453}
1454
David Brazdil0f672f62019-12-10 10:32:29 +00001455static void __init apic_bsp_setup(bool upmode);
1456
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001457/* Init the interrupt delivery mode for the BSP */
1458void __init apic_intr_mode_init(void)
1459{
1460 bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
1461
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001462 switch (apic_intr_mode) {
1463 case APIC_PIC:
1464 pr_info("APIC: Keep in PIC mode(8259)\n");
1465 return;
1466 case APIC_VIRTUAL_WIRE:
1467 pr_info("APIC: Switch to virtual wire mode setup\n");
1468 default_setup_apic_routing();
1469 break;
1470 case APIC_VIRTUAL_WIRE_NO_CONFIG:
1471 pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
1472 upmode = true;
1473 default_setup_apic_routing();
1474 break;
1475 case APIC_SYMMETRIC_IO:
1476 pr_info("APIC: Switch to symmetric I/O mode setup\n");
1477 default_setup_apic_routing();
1478 break;
1479 case APIC_SYMMETRIC_IO_NO_ROUTING:
1480 pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
1481 break;
1482 }
1483
1484 apic_bsp_setup(upmode);
1485}
1486
1487static void lapic_setup_esr(void)
1488{
1489 unsigned int oldvalue, value, maxlvt;
1490
1491 if (!lapic_is_integrated()) {
1492 pr_info("No ESR for 82489DX.\n");
1493 return;
1494 }
1495
1496 if (apic->disable_esr) {
1497 /*
1498 * Something untraceable is creating bad interrupts on
1499 * secondary quads ... for the moment, just leave the
1500 * ESR disabled - we can't do anything useful with the
1501 * errors anyway - mbligh
1502 */
1503 pr_info("Leaving ESR disabled.\n");
1504 return;
1505 }
1506
1507 maxlvt = lapic_get_maxlvt();
1508 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1509 apic_write(APIC_ESR, 0);
1510 oldvalue = apic_read(APIC_ESR);
1511
1512 /* enables sending errors */
1513 value = ERROR_APIC_VECTOR;
1514 apic_write(APIC_LVTERR, value);
1515
1516 /*
1517 * spec says clear errors after enabling vector.
1518 */
1519 if (maxlvt > 3)
1520 apic_write(APIC_ESR, 0);
1521 value = apic_read(APIC_ESR);
1522 if (value != oldvalue)
1523 apic_printk(APIC_VERBOSE, "ESR value before enabling "
1524 "vector: 0x%08x after: 0x%08x\n",
1525 oldvalue, value);
1526}
1527
David Brazdil0f672f62019-12-10 10:32:29 +00001528#define APIC_IR_REGS APIC_ISR_NR
1529#define APIC_IR_BITS (APIC_IR_REGS * 32)
1530#define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG)
1531
1532union apic_ir {
1533 unsigned long map[APIC_IR_MAPSIZE];
1534 u32 regs[APIC_IR_REGS];
1535};
1536
1537static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
1538{
1539 int i, bit;
1540
1541 /* Read the IRRs */
1542 for (i = 0; i < APIC_IR_REGS; i++)
1543 irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
1544
1545 /* Read the ISRs */
1546 for (i = 0; i < APIC_IR_REGS; i++)
1547 isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
1548
1549 /*
1550 * If the ISR map is not empty. ACK the APIC and run another round
1551 * to verify whether a pending IRR has been unblocked and turned
1552 * into a ISR.
1553 */
1554 if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
1555 /*
1556 * There can be multiple ISR bits set when a high priority
1557 * interrupt preempted a lower priority one. Issue an ACK
1558 * per set bit.
1559 */
1560 for_each_set_bit(bit, isr->map, APIC_IR_BITS)
1561 ack_APIC_irq();
1562 return true;
1563 }
1564
1565 return !bitmap_empty(irr->map, APIC_IR_BITS);
1566}
1567
1568/*
1569 * After a crash, we no longer service the interrupts and a pending
1570 * interrupt from previous kernel might still have ISR bit set.
1571 *
1572 * Most probably by now the CPU has serviced that pending interrupt and it
1573 * might not have done the ack_APIC_irq() because it thought, interrupt
1574 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
1575 * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
1576 * a vector might get locked. It was noticed for timer irq (vector
1577 * 0x31). Issue an extra EOI to clear ISR.
1578 *
1579 * If there are pending IRR bits they turn into ISR bits after a higher
1580 * priority ISR bit has been acked.
1581 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001582static void apic_pending_intr_clear(void)
1583{
David Brazdil0f672f62019-12-10 10:32:29 +00001584 union apic_ir irr, isr;
1585 unsigned int i;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001586
David Brazdil0f672f62019-12-10 10:32:29 +00001587 /* 512 loops are way oversized and give the APIC a chance to obey. */
1588 for (i = 0; i < 512; i++) {
1589 if (!apic_check_and_ack(&irr, &isr))
1590 return;
1591 }
1592 /* Dump the IRR/ISR content if that failed */
1593 pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001594}
1595
1596/**
1597 * setup_local_APIC - setup the local APIC
1598 *
1599 * Used to setup local APIC while initializing BSP or bringing up APs.
1600 * Always called with preemption disabled.
1601 */
1602static void setup_local_APIC(void)
1603{
1604 int cpu = smp_processor_id();
1605 unsigned int value;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001606
1607 if (disable_apic) {
1608 disable_ioapic_support();
1609 return;
1610 }
1611
David Brazdil0f672f62019-12-10 10:32:29 +00001612 /*
1613 * If this comes from kexec/kcrash the APIC might be enabled in
1614 * SPIV. Soft disable it before doing further initialization.
1615 */
1616 value = apic_read(APIC_SPIV);
1617 value &= ~APIC_SPIV_APIC_ENABLED;
1618 apic_write(APIC_SPIV, value);
1619
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001620#ifdef CONFIG_X86_32
1621 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1622 if (lapic_is_integrated() && apic->disable_esr) {
1623 apic_write(APIC_ESR, 0);
1624 apic_write(APIC_ESR, 0);
1625 apic_write(APIC_ESR, 0);
1626 apic_write(APIC_ESR, 0);
1627 }
1628#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001629 /*
1630 * Double-check whether this APIC is really registered.
1631 * This is meaningless in clustered apic mode, so we skip it.
1632 */
1633 BUG_ON(!apic->apic_id_registered());
1634
1635 /*
1636 * Intel recommends to set DFR, LDR and TPR before enabling
1637 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
1638 * document number 292116). So here it goes...
1639 */
1640 apic->init_apic_ldr();
1641
1642#ifdef CONFIG_X86_32
David Brazdil0f672f62019-12-10 10:32:29 +00001643 if (apic->dest_logical) {
1644 int logical_apicid, ldr_apicid;
1645
1646 /*
1647 * APIC LDR is initialized. If logical_apicid mapping was
1648 * initialized during get_smp_config(), make sure it matches
1649 * the actual value.
1650 */
1651 logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1652 ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1653 if (logical_apicid != BAD_APICID)
1654 WARN_ON(logical_apicid != ldr_apicid);
1655 /* Always use the value from LDR. */
1656 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
1657 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001658#endif
1659
1660 /*
David Brazdil0f672f62019-12-10 10:32:29 +00001661 * Set Task Priority to 'accept all except vectors 0-31'. An APIC
1662 * vector in the 16-31 range could be delivered if TPR == 0, but we
1663 * would think it's an exception and terrible things will happen. We
1664 * never change this later on.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001665 */
1666 value = apic_read(APIC_TASKPRI);
1667 value &= ~APIC_TPRI_MASK;
David Brazdil0f672f62019-12-10 10:32:29 +00001668 value |= 0x10;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001669 apic_write(APIC_TASKPRI, value);
1670
David Brazdil0f672f62019-12-10 10:32:29 +00001671 /* Clear eventually stale ISR/IRR bits */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001672 apic_pending_intr_clear();
1673
1674 /*
1675 * Now that we are all set up, enable the APIC
1676 */
1677 value = apic_read(APIC_SPIV);
1678 value &= ~APIC_VECTOR_MASK;
1679 /*
1680 * Enable APIC
1681 */
1682 value |= APIC_SPIV_APIC_ENABLED;
1683
1684#ifdef CONFIG_X86_32
1685 /*
1686 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1687 * certain networking cards. If high frequency interrupts are
1688 * happening on a particular IOAPIC pin, plus the IOAPIC routing
1689 * entry is masked/unmasked at a high rate as well then sooner or
1690 * later IOAPIC line gets 'stuck', no more interrupts are received
1691 * from the device. If focus CPU is disabled then the hang goes
1692 * away, oh well :-(
1693 *
1694 * [ This bug can be reproduced easily with a level-triggered
1695 * PCI Ne2000 networking cards and PII/PIII processors, dual
1696 * BX chipset. ]
1697 */
1698 /*
1699 * Actually disabling the focus CPU check just makes the hang less
1700 * frequent as it makes the interrupt distributon model be more
1701 * like LRU than MRU (the short-term load is more even across CPUs).
1702 */
1703
1704 /*
1705 * - enable focus processor (bit==0)
1706 * - 64bit mode always use processor focus
1707 * so no need to set it
1708 */
1709 value &= ~APIC_SPIV_FOCUS_DISABLED;
1710#endif
1711
1712 /*
1713 * Set spurious IRQ vector
1714 */
1715 value |= SPURIOUS_APIC_VECTOR;
1716 apic_write(APIC_SPIV, value);
1717
David Brazdil0f672f62019-12-10 10:32:29 +00001718 perf_events_lapic_init();
1719
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001720 /*
1721 * Set up LVT0, LVT1:
1722 *
1723 * set up through-local-APIC on the boot CPU's LINT0. This is not
1724 * strictly necessary in pure symmetric-IO mode, but sometimes
1725 * we delegate interrupts to the 8259A.
1726 */
1727 /*
1728 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1729 */
1730 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1731 if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
1732 value = APIC_DM_EXTINT;
1733 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1734 } else {
1735 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1736 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1737 }
1738 apic_write(APIC_LVT0, value);
1739
1740 /*
1741 * Only the BSP sees the LINT1 NMI signal by default. This can be
1742 * modified by apic_extnmi= boot option.
1743 */
1744 if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
1745 apic_extnmi == APIC_EXTNMI_ALL)
1746 value = APIC_DM_NMI;
1747 else
1748 value = APIC_DM_NMI | APIC_LVT_MASKED;
1749
1750 /* Is 82489DX ? */
1751 if (!lapic_is_integrated())
1752 value |= APIC_LVT_LEVEL_TRIGGER;
1753 apic_write(APIC_LVT1, value);
1754
1755#ifdef CONFIG_X86_MCE_INTEL
1756 /* Recheck CMCI information after local APIC is up on CPU #0 */
1757 if (!cpu)
1758 cmci_recheck();
1759#endif
1760}
1761
1762static void end_local_APIC_setup(void)
1763{
1764 lapic_setup_esr();
1765
1766#ifdef CONFIG_X86_32
1767 {
1768 unsigned int value;
1769 /* Disable the local apic timer */
1770 value = apic_read(APIC_LVTT);
1771 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1772 apic_write(APIC_LVTT, value);
1773 }
1774#endif
1775
1776 apic_pm_activate();
1777}
1778
1779/*
1780 * APIC setup function for application processors. Called from smpboot.c
1781 */
1782void apic_ap_setup(void)
1783{
1784 setup_local_APIC();
1785 end_local_APIC_setup();
1786}
1787
1788#ifdef CONFIG_X86_X2APIC
1789int x2apic_mode;
1790
1791enum {
1792 X2APIC_OFF,
1793 X2APIC_ON,
1794 X2APIC_DISABLED,
1795};
1796static int x2apic_state;
1797
1798static void __x2apic_disable(void)
1799{
1800 u64 msr;
1801
1802 if (!boot_cpu_has(X86_FEATURE_APIC))
1803 return;
1804
1805 rdmsrl(MSR_IA32_APICBASE, msr);
1806 if (!(msr & X2APIC_ENABLE))
1807 return;
1808 /* Disable xapic and x2apic first and then reenable xapic mode */
1809 wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1810 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1811 printk_once(KERN_INFO "x2apic disabled\n");
1812}
1813
1814static void __x2apic_enable(void)
1815{
1816 u64 msr;
1817
1818 rdmsrl(MSR_IA32_APICBASE, msr);
1819 if (msr & X2APIC_ENABLE)
1820 return;
1821 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1822 printk_once(KERN_INFO "x2apic enabled\n");
1823}
1824
1825static int __init setup_nox2apic(char *str)
1826{
1827 if (x2apic_enabled()) {
1828 int apicid = native_apic_msr_read(APIC_ID);
1829
1830 if (apicid >= 255) {
1831 pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
1832 apicid);
1833 return 0;
1834 }
1835 pr_warning("x2apic already enabled.\n");
1836 __x2apic_disable();
1837 }
1838 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1839 x2apic_state = X2APIC_DISABLED;
1840 x2apic_mode = 0;
1841 return 0;
1842}
1843early_param("nox2apic", setup_nox2apic);
1844
1845/* Called from cpu_init() to enable x2apic on (secondary) cpus */
1846void x2apic_setup(void)
1847{
1848 /*
1849 * If x2apic is not in ON state, disable it if already enabled
1850 * from BIOS.
1851 */
1852 if (x2apic_state != X2APIC_ON) {
1853 __x2apic_disable();
1854 return;
1855 }
1856 __x2apic_enable();
1857}
1858
1859static __init void x2apic_disable(void)
1860{
1861 u32 x2apic_id, state = x2apic_state;
1862
1863 x2apic_mode = 0;
1864 x2apic_state = X2APIC_DISABLED;
1865
1866 if (state != X2APIC_ON)
1867 return;
1868
1869 x2apic_id = read_apic_id();
1870 if (x2apic_id >= 255)
1871 panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1872
1873 __x2apic_disable();
1874 register_lapic_address(mp_lapic_addr);
1875}
1876
1877static __init void x2apic_enable(void)
1878{
1879 if (x2apic_state != X2APIC_OFF)
1880 return;
1881
1882 x2apic_mode = 1;
1883 x2apic_state = X2APIC_ON;
1884 __x2apic_enable();
1885}
1886
1887static __init void try_to_enable_x2apic(int remap_mode)
1888{
1889 if (x2apic_state == X2APIC_DISABLED)
1890 return;
1891
1892 if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
Olivier Deprez0e641232021-09-23 10:07:05 +02001893 /*
1894 * Using X2APIC without IR is not architecturally supported
1895 * on bare metal but may be supported in guests.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001896 */
Olivier Deprez0e641232021-09-23 10:07:05 +02001897 if (!x86_init.hyper.x2apic_available()) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001898 pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1899 x2apic_disable();
1900 return;
1901 }
1902
1903 /*
Olivier Deprez0e641232021-09-23 10:07:05 +02001904 * Without IR, all CPUs can be addressed by IOAPIC/MSI only
1905 * in physical mode, and CPUs with an APIC ID that cannnot
1906 * be addressed must not be brought online.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001907 */
Olivier Deprez0e641232021-09-23 10:07:05 +02001908 x2apic_set_max_apicid(255);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001909 x2apic_phys = 1;
1910 }
1911 x2apic_enable();
1912}
1913
1914void __init check_x2apic(void)
1915{
1916 if (x2apic_enabled()) {
1917 pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1918 x2apic_mode = 1;
1919 x2apic_state = X2APIC_ON;
1920 } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
1921 x2apic_state = X2APIC_DISABLED;
1922 }
1923}
1924#else /* CONFIG_X86_X2APIC */
1925static int __init validate_x2apic(void)
1926{
1927 if (!apic_is_x2apic_enabled())
1928 return 0;
1929 /*
1930 * Checkme: Can we simply turn off x2apic here instead of panic?
1931 */
1932 panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
1933}
1934early_initcall(validate_x2apic);
1935
1936static inline void try_to_enable_x2apic(int remap_mode) { }
1937static inline void __x2apic_enable(void) { }
1938#endif /* !CONFIG_X86_X2APIC */
1939
1940void __init enable_IR_x2apic(void)
1941{
1942 unsigned long flags;
1943 int ret, ir_stat;
1944
1945 if (skip_ioapic_setup) {
1946 pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1947 return;
1948 }
1949
1950 ir_stat = irq_remapping_prepare();
1951 if (ir_stat < 0 && !x2apic_supported())
1952 return;
1953
1954 ret = save_ioapic_entries();
1955 if (ret) {
1956 pr_info("Saving IO-APIC state failed: %d\n", ret);
1957 return;
1958 }
1959
1960 local_irq_save(flags);
1961 legacy_pic->mask_all();
1962 mask_ioapic_entries();
1963
1964 /* If irq_remapping_prepare() succeeded, try to enable it */
1965 if (ir_stat >= 0)
1966 ir_stat = irq_remapping_enable();
1967 /* ir_stat contains the remap mode or an error code */
1968 try_to_enable_x2apic(ir_stat);
1969
1970 if (ir_stat < 0)
1971 restore_ioapic_entries();
1972 legacy_pic->restore_mask();
1973 local_irq_restore(flags);
1974}
1975
1976#ifdef CONFIG_X86_64
1977/*
1978 * Detect and enable local APICs on non-SMP boards.
1979 * Original code written by Keir Fraser.
1980 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1981 * not correctly set up (usually the APIC timer won't work etc.)
1982 */
1983static int __init detect_init_APIC(void)
1984{
1985 if (!boot_cpu_has(X86_FEATURE_APIC)) {
1986 pr_info("No local APIC present\n");
1987 return -1;
1988 }
1989
1990 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1991 return 0;
1992}
1993#else
1994
1995static int __init apic_verify(void)
1996{
1997 u32 features, h, l;
1998
1999 /*
2000 * The APIC feature bit should now be enabled
2001 * in `cpuid'
2002 */
2003 features = cpuid_edx(1);
2004 if (!(features & (1 << X86_FEATURE_APIC))) {
2005 pr_warning("Could not enable APIC!\n");
2006 return -1;
2007 }
2008 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
2009 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
2010
2011 /* The BIOS may have set up the APIC at some other address */
2012 if (boot_cpu_data.x86 >= 6) {
2013 rdmsr(MSR_IA32_APICBASE, l, h);
2014 if (l & MSR_IA32_APICBASE_ENABLE)
2015 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
2016 }
2017
2018 pr_info("Found and enabled local APIC!\n");
2019 return 0;
2020}
2021
2022int __init apic_force_enable(unsigned long addr)
2023{
2024 u32 h, l;
2025
2026 if (disable_apic)
2027 return -1;
2028
2029 /*
2030 * Some BIOSes disable the local APIC in the APIC_BASE
2031 * MSR. This can only be done in software for Intel P6 or later
2032 * and AMD K7 (Model > 1) or later.
2033 */
2034 if (boot_cpu_data.x86 >= 6) {
2035 rdmsr(MSR_IA32_APICBASE, l, h);
2036 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
2037 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
2038 l &= ~MSR_IA32_APICBASE_BASE;
2039 l |= MSR_IA32_APICBASE_ENABLE | addr;
2040 wrmsr(MSR_IA32_APICBASE, l, h);
2041 enabled_via_apicbase = 1;
2042 }
2043 }
2044 return apic_verify();
2045}
2046
2047/*
2048 * Detect and initialize APIC
2049 */
2050static int __init detect_init_APIC(void)
2051{
2052 /* Disabled by kernel option? */
2053 if (disable_apic)
2054 return -1;
2055
2056 switch (boot_cpu_data.x86_vendor) {
2057 case X86_VENDOR_AMD:
2058 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
2059 (boot_cpu_data.x86 >= 15))
2060 break;
2061 goto no_apic;
David Brazdil0f672f62019-12-10 10:32:29 +00002062 case X86_VENDOR_HYGON:
2063 break;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002064 case X86_VENDOR_INTEL:
2065 if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
2066 (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
2067 break;
2068 goto no_apic;
2069 default:
2070 goto no_apic;
2071 }
2072
2073 if (!boot_cpu_has(X86_FEATURE_APIC)) {
2074 /*
2075 * Over-ride BIOS and try to enable the local APIC only if
2076 * "lapic" specified.
2077 */
2078 if (!force_enable_local_apic) {
2079 pr_info("Local APIC disabled by BIOS -- "
2080 "you can enable it with \"lapic\"\n");
2081 return -1;
2082 }
2083 if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
2084 return -1;
2085 } else {
2086 if (apic_verify())
2087 return -1;
2088 }
2089
2090 apic_pm_activate();
2091
2092 return 0;
2093
2094no_apic:
2095 pr_info("No local APIC present or hardware disabled\n");
2096 return -1;
2097}
2098#endif
2099
2100/**
2101 * init_apic_mappings - initialize APIC mappings
2102 */
2103void __init init_apic_mappings(void)
2104{
2105 unsigned int new_apicid;
2106
Olivier Deprez0e641232021-09-23 10:07:05 +02002107 if (apic_validate_deadline_timer())
2108 pr_info("TSC deadline timer available\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002109
2110 if (x2apic_mode) {
2111 boot_cpu_physical_apicid = read_apic_id();
2112 return;
2113 }
2114
2115 /* If no local APIC can be found return early */
2116 if (!smp_found_config && detect_init_APIC()) {
2117 /* lets NOP'ify apic operations */
2118 pr_info("APIC: disable apic facility\n");
2119 apic_disable();
2120 } else {
2121 apic_phys = mp_lapic_addr;
2122
2123 /*
2124 * If the system has ACPI MADT tables or MP info, the LAPIC
2125 * address is already registered.
2126 */
2127 if (!acpi_lapic && !smp_found_config)
2128 register_lapic_address(apic_phys);
2129 }
2130
2131 /*
2132 * Fetch the APIC ID of the BSP in case we have a
2133 * default configuration (or the MP table is broken).
2134 */
2135 new_apicid = read_apic_id();
2136 if (boot_cpu_physical_apicid != new_apicid) {
2137 boot_cpu_physical_apicid = new_apicid;
2138 /*
2139 * yeah -- we lie about apic_version
2140 * in case if apic was disabled via boot option
2141 * but it's not a problem for SMP compiled kernel
2142 * since apic_intr_mode_select is prepared for such
2143 * a case and disable smp mode
2144 */
2145 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2146 }
2147}
2148
2149void __init register_lapic_address(unsigned long address)
2150{
2151 mp_lapic_addr = address;
2152
2153 if (!x2apic_mode) {
2154 set_fixmap_nocache(FIX_APIC_BASE, address);
2155 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
2156 APIC_BASE, address);
2157 }
2158 if (boot_cpu_physical_apicid == -1U) {
2159 boot_cpu_physical_apicid = read_apic_id();
2160 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2161 }
2162}
2163
2164/*
2165 * Local APIC interrupts
2166 */
2167
2168/*
2169 * This interrupt should _never_ happen with our APIC/SMP architecture
2170 */
2171__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
2172{
2173 u8 vector = ~regs->orig_ax;
2174 u32 v;
2175
2176 entering_irq();
2177 trace_spurious_apic_entry(vector);
2178
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002179 inc_irq_stat(irq_spurious_count);
2180
David Brazdil0f672f62019-12-10 10:32:29 +00002181 /*
2182 * If this is a spurious interrupt then do not acknowledge
2183 */
2184 if (vector == SPURIOUS_APIC_VECTOR) {
2185 /* See SDM vol 3 */
2186 pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
2187 smp_processor_id());
2188 goto out;
2189 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002190
David Brazdil0f672f62019-12-10 10:32:29 +00002191 /*
2192 * If it is a vectored one, verify it's set in the ISR. If set,
2193 * acknowledge it.
2194 */
2195 v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
2196 if (v & (1 << (vector & 0x1f))) {
2197 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
2198 vector, smp_processor_id());
2199 ack_APIC_irq();
2200 } else {
2201 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
2202 vector, smp_processor_id());
2203 }
2204out:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002205 trace_spurious_apic_exit(vector);
2206 exiting_irq();
2207}
2208
2209/*
2210 * This interrupt should never happen with our APIC/SMP architecture
2211 */
2212__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
2213{
2214 static const char * const error_interrupt_reason[] = {
2215 "Send CS error", /* APIC Error Bit 0 */
2216 "Receive CS error", /* APIC Error Bit 1 */
2217 "Send accept error", /* APIC Error Bit 2 */
2218 "Receive accept error", /* APIC Error Bit 3 */
2219 "Redirectable IPI", /* APIC Error Bit 4 */
2220 "Send illegal vector", /* APIC Error Bit 5 */
2221 "Received illegal vector", /* APIC Error Bit 6 */
2222 "Illegal register address", /* APIC Error Bit 7 */
2223 };
2224 u32 v, i = 0;
2225
2226 entering_irq();
2227 trace_error_apic_entry(ERROR_APIC_VECTOR);
2228
2229 /* First tickle the hardware, only then report what went on. -- REW */
2230 if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */
2231 apic_write(APIC_ESR, 0);
2232 v = apic_read(APIC_ESR);
2233 ack_APIC_irq();
2234 atomic_inc(&irq_err_count);
2235
2236 apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
2237 smp_processor_id(), v);
2238
2239 v &= 0xff;
2240 while (v) {
2241 if (v & 0x1)
2242 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
2243 i++;
2244 v >>= 1;
2245 }
2246
2247 apic_printk(APIC_DEBUG, KERN_CONT "\n");
2248
2249 trace_error_apic_exit(ERROR_APIC_VECTOR);
2250 exiting_irq();
2251}
2252
2253/**
2254 * connect_bsp_APIC - attach the APIC to the interrupt system
2255 */
2256static void __init connect_bsp_APIC(void)
2257{
2258#ifdef CONFIG_X86_32
2259 if (pic_mode) {
2260 /*
2261 * Do not trust the local APIC being empty at bootup.
2262 */
2263 clear_local_APIC();
2264 /*
2265 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
2266 * local APIC to INT and NMI lines.
2267 */
2268 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
2269 "enabling APIC mode.\n");
2270 imcr_pic_to_apic();
2271 }
2272#endif
2273}
2274
2275/**
2276 * disconnect_bsp_APIC - detach the APIC from the interrupt system
2277 * @virt_wire_setup: indicates, whether virtual wire mode is selected
2278 *
2279 * Virtual wire mode is necessary to deliver legacy interrupts even when the
2280 * APIC is disabled.
2281 */
2282void disconnect_bsp_APIC(int virt_wire_setup)
2283{
2284 unsigned int value;
2285
2286#ifdef CONFIG_X86_32
2287 if (pic_mode) {
2288 /*
2289 * Put the board back into PIC mode (has an effect only on
2290 * certain older boards). Note that APIC interrupts, including
2291 * IPIs, won't work beyond this point! The only exception are
2292 * INIT IPIs.
2293 */
2294 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
2295 "entering PIC mode.\n");
2296 imcr_apic_to_pic();
2297 return;
2298 }
2299#endif
2300
2301 /* Go back to Virtual Wire compatibility mode */
2302
2303 /* For the spurious interrupt use vector F, and enable it */
2304 value = apic_read(APIC_SPIV);
2305 value &= ~APIC_VECTOR_MASK;
2306 value |= APIC_SPIV_APIC_ENABLED;
2307 value |= 0xf;
2308 apic_write(APIC_SPIV, value);
2309
2310 if (!virt_wire_setup) {
2311 /*
2312 * For LVT0 make it edge triggered, active high,
2313 * external and enabled
2314 */
2315 value = apic_read(APIC_LVT0);
2316 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2317 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2318 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2319 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2320 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
2321 apic_write(APIC_LVT0, value);
2322 } else {
2323 /* Disable LVT0 */
2324 apic_write(APIC_LVT0, APIC_LVT_MASKED);
2325 }
2326
2327 /*
2328 * For LVT1 make it edge triggered, active high,
2329 * nmi and enabled
2330 */
2331 value = apic_read(APIC_LVT1);
2332 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2333 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2334 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2335 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2336 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
2337 apic_write(APIC_LVT1, value);
2338}
2339
2340/*
2341 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
2342 * contiguously, it equals to current allocated max logical CPU ID plus 1.
2343 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
2344 * so the maximum of nr_logical_cpuids is nr_cpu_ids.
2345 *
2346 * NOTE: Reserve 0 for BSP.
2347 */
2348static int nr_logical_cpuids = 1;
2349
2350/*
2351 * Used to store mapping between logical CPU IDs and APIC IDs.
2352 */
2353static int cpuid_to_apicid[] = {
2354 [0 ... NR_CPUS - 1] = -1,
2355};
2356
Olivier Deprez0e641232021-09-23 10:07:05 +02002357bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
2358{
2359 return phys_id == cpuid_to_apicid[cpu];
2360}
2361
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002362#ifdef CONFIG_SMP
2363/**
2364 * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
2365 * @id: APIC ID to check
2366 */
2367bool apic_id_is_primary_thread(unsigned int apicid)
2368{
2369 u32 mask;
2370
2371 if (smp_num_siblings == 1)
2372 return true;
2373 /* Isolate the SMT bit(s) in the APICID and check for 0 */
2374 mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
2375 return !(apicid & mask);
2376}
2377#endif
2378
2379/*
2380 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
2381 * and cpuid_to_apicid[] synchronized.
2382 */
2383static int allocate_logical_cpuid(int apicid)
2384{
2385 int i;
2386
2387 /*
2388 * cpuid <-> apicid mapping is persistent, so when a cpu is up,
2389 * check if the kernel has allocated a cpuid for it.
2390 */
2391 for (i = 0; i < nr_logical_cpuids; i++) {
2392 if (cpuid_to_apicid[i] == apicid)
2393 return i;
2394 }
2395
2396 /* Allocate a new cpuid. */
2397 if (nr_logical_cpuids >= nr_cpu_ids) {
2398 WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
2399 "Processor %d/0x%x and the rest are ignored.\n",
2400 nr_cpu_ids, nr_logical_cpuids, apicid);
2401 return -EINVAL;
2402 }
2403
2404 cpuid_to_apicid[nr_logical_cpuids] = apicid;
2405 return nr_logical_cpuids++;
2406}
2407
2408int generic_processor_info(int apicid, int version)
2409{
2410 int cpu, max = nr_cpu_ids;
2411 bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
2412 phys_cpu_present_map);
2413
2414 /*
2415 * boot_cpu_physical_apicid is designed to have the apicid
2416 * returned by read_apic_id(), i.e, the apicid of the
2417 * currently booting-up processor. However, on some platforms,
2418 * it is temporarily modified by the apicid reported as BSP
2419 * through MP table. Concretely:
2420 *
2421 * - arch/x86/kernel/mpparse.c: MP_processor_info()
2422 * - arch/x86/mm/amdtopology.c: amd_numa_init()
2423 *
2424 * This function is executed with the modified
2425 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
2426 * parameter doesn't work to disable APs on kdump 2nd kernel.
2427 *
2428 * Since fixing handling of boot_cpu_physical_apicid requires
2429 * another discussion and tests on each platform, we leave it
2430 * for now and here we use read_apic_id() directly in this
2431 * function, generic_processor_info().
2432 */
2433 if (disabled_cpu_apicid != BAD_APICID &&
2434 disabled_cpu_apicid != read_apic_id() &&
2435 disabled_cpu_apicid == apicid) {
2436 int thiscpu = num_processors + disabled_cpus;
2437
2438 pr_warning("APIC: Disabling requested cpu."
2439 " Processor %d/0x%x ignored.\n",
2440 thiscpu, apicid);
2441
2442 disabled_cpus++;
2443 return -ENODEV;
2444 }
2445
2446 /*
2447 * If boot cpu has not been detected yet, then only allow upto
2448 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
2449 */
2450 if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
2451 apicid != boot_cpu_physical_apicid) {
2452 int thiscpu = max + disabled_cpus - 1;
2453
2454 pr_warning(
2455 "APIC: NR_CPUS/possible_cpus limit of %i almost"
2456 " reached. Keeping one slot for boot cpu."
2457 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2458
2459 disabled_cpus++;
2460 return -ENODEV;
2461 }
2462
2463 if (num_processors >= nr_cpu_ids) {
2464 int thiscpu = max + disabled_cpus;
2465
2466 pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
2467 "reached. Processor %d/0x%x ignored.\n",
2468 max, thiscpu, apicid);
2469
2470 disabled_cpus++;
2471 return -EINVAL;
2472 }
2473
2474 if (apicid == boot_cpu_physical_apicid) {
2475 /*
2476 * x86_bios_cpu_apicid is required to have processors listed
2477 * in same order as logical cpu numbers. Hence the first
2478 * entry is BSP, and so on.
2479 * boot_cpu_init() already hold bit 0 in cpu_present_mask
2480 * for BSP.
2481 */
2482 cpu = 0;
2483
2484 /* Logical cpuid 0 is reserved for BSP. */
2485 cpuid_to_apicid[0] = apicid;
2486 } else {
2487 cpu = allocate_logical_cpuid(apicid);
2488 if (cpu < 0) {
2489 disabled_cpus++;
2490 return -EINVAL;
2491 }
2492 }
2493
2494 /*
2495 * Validate version
2496 */
2497 if (version == 0x0) {
2498 pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
2499 cpu, apicid);
2500 version = 0x10;
2501 }
2502
2503 if (version != boot_cpu_apic_version) {
2504 pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
2505 boot_cpu_apic_version, cpu, version);
2506 }
2507
2508 if (apicid > max_physical_apicid)
2509 max_physical_apicid = apicid;
2510
2511#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
2512 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
2513 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
2514#endif
2515#ifdef CONFIG_X86_32
2516 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
2517 apic->x86_32_early_logical_apicid(cpu);
2518#endif
2519 set_cpu_possible(cpu, true);
2520 physid_set(apicid, phys_cpu_present_map);
2521 set_cpu_present(cpu, true);
2522 num_processors++;
2523
2524 return cpu;
2525}
2526
2527int hard_smp_processor_id(void)
2528{
2529 return read_apic_id();
2530}
2531
2532/*
2533 * Override the generic EOI implementation with an optimized version.
2534 * Only called during early boot when only one CPU is active and with
2535 * interrupts disabled, so we know this does not race with actual APIC driver
2536 * use.
2537 */
2538void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2539{
2540 struct apic **drv;
2541
2542 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
2543 /* Should happen once for each apic */
2544 WARN_ON((*drv)->eoi_write == eoi_write);
2545 (*drv)->native_eoi_write = (*drv)->eoi_write;
2546 (*drv)->eoi_write = eoi_write;
2547 }
2548}
2549
2550static void __init apic_bsp_up_setup(void)
2551{
2552#ifdef CONFIG_X86_64
2553 apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid));
2554#else
2555 /*
2556 * Hack: In case of kdump, after a crash, kernel might be booting
2557 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
2558 * might be zero if read from MP tables. Get it from LAPIC.
2559 */
2560# ifdef CONFIG_CRASH_DUMP
2561 boot_cpu_physical_apicid = read_apic_id();
2562# endif
2563#endif
2564 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
2565}
2566
2567/**
2568 * apic_bsp_setup - Setup function for local apic and io-apic
2569 * @upmode: Force UP mode (for APIC_init_uniprocessor)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002570 */
David Brazdil0f672f62019-12-10 10:32:29 +00002571static void __init apic_bsp_setup(bool upmode)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002572{
2573 connect_bsp_APIC();
2574 if (upmode)
2575 apic_bsp_up_setup();
2576 setup_local_APIC();
2577
2578 enable_IO_APIC();
2579 end_local_APIC_setup();
2580 irq_remap_enable_fault_handling();
2581 setup_IO_APIC();
Olivier Deprez0e641232021-09-23 10:07:05 +02002582 lapic_update_legacy_vectors();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002583}
2584
2585#ifdef CONFIG_UP_LATE_INIT
2586void __init up_late_init(void)
2587{
2588 if (apic_intr_mode == APIC_PIC)
2589 return;
2590
2591 /* Setup local timer */
2592 x86_init.timers.setup_percpu_clockev();
2593}
2594#endif
2595
2596/*
2597 * Power management
2598 */
2599#ifdef CONFIG_PM
2600
2601static struct {
2602 /*
2603 * 'active' is true if the local APIC was enabled by us and
2604 * not the BIOS; this signifies that we are also responsible
2605 * for disabling it before entering apm/acpi suspend
2606 */
2607 int active;
2608 /* r/w apic fields */
2609 unsigned int apic_id;
2610 unsigned int apic_taskpri;
2611 unsigned int apic_ldr;
2612 unsigned int apic_dfr;
2613 unsigned int apic_spiv;
2614 unsigned int apic_lvtt;
2615 unsigned int apic_lvtpc;
2616 unsigned int apic_lvt0;
2617 unsigned int apic_lvt1;
2618 unsigned int apic_lvterr;
2619 unsigned int apic_tmict;
2620 unsigned int apic_tdcr;
2621 unsigned int apic_thmr;
2622 unsigned int apic_cmci;
2623} apic_pm_state;
2624
2625static int lapic_suspend(void)
2626{
2627 unsigned long flags;
2628 int maxlvt;
2629
2630 if (!apic_pm_state.active)
2631 return 0;
2632
2633 maxlvt = lapic_get_maxlvt();
2634
2635 apic_pm_state.apic_id = apic_read(APIC_ID);
2636 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2637 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2638 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2639 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2640 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2641 if (maxlvt >= 4)
2642 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2643 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2644 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2645 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2646 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2647 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2648#ifdef CONFIG_X86_THERMAL_VECTOR
2649 if (maxlvt >= 5)
2650 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2651#endif
2652#ifdef CONFIG_X86_MCE_INTEL
2653 if (maxlvt >= 6)
2654 apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
2655#endif
2656
2657 local_irq_save(flags);
2658 disable_local_APIC();
2659
2660 irq_remapping_disable();
2661
2662 local_irq_restore(flags);
2663 return 0;
2664}
2665
2666static void lapic_resume(void)
2667{
2668 unsigned int l, h;
2669 unsigned long flags;
2670 int maxlvt;
2671
2672 if (!apic_pm_state.active)
2673 return;
2674
2675 local_irq_save(flags);
2676
2677 /*
2678 * IO-APIC and PIC have their own resume routines.
2679 * We just mask them here to make sure the interrupt
2680 * subsystem is completely quiet while we enable x2apic
2681 * and interrupt-remapping.
2682 */
2683 mask_ioapic_entries();
2684 legacy_pic->mask_all();
2685
2686 if (x2apic_mode) {
2687 __x2apic_enable();
2688 } else {
2689 /*
2690 * Make sure the APICBASE points to the right address
2691 *
2692 * FIXME! This will be wrong if we ever support suspend on
2693 * SMP! We'll need to do this as part of the CPU restore!
2694 */
2695 if (boot_cpu_data.x86 >= 6) {
2696 rdmsr(MSR_IA32_APICBASE, l, h);
2697 l &= ~MSR_IA32_APICBASE_BASE;
2698 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2699 wrmsr(MSR_IA32_APICBASE, l, h);
2700 }
2701 }
2702
2703 maxlvt = lapic_get_maxlvt();
2704 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2705 apic_write(APIC_ID, apic_pm_state.apic_id);
2706 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2707 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2708 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2709 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2710 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2711 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2712#ifdef CONFIG_X86_THERMAL_VECTOR
2713 if (maxlvt >= 5)
2714 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2715#endif
2716#ifdef CONFIG_X86_MCE_INTEL
2717 if (maxlvt >= 6)
2718 apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
2719#endif
2720 if (maxlvt >= 4)
2721 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2722 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2723 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2724 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2725 apic_write(APIC_ESR, 0);
2726 apic_read(APIC_ESR);
2727 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2728 apic_write(APIC_ESR, 0);
2729 apic_read(APIC_ESR);
2730
2731 irq_remapping_reenable(x2apic_mode);
2732
2733 local_irq_restore(flags);
2734}
2735
2736/*
2737 * This device has no shutdown method - fully functioning local APICs
2738 * are needed on every CPU up until machine_halt/restart/poweroff.
2739 */
2740
2741static struct syscore_ops lapic_syscore_ops = {
2742 .resume = lapic_resume,
2743 .suspend = lapic_suspend,
2744};
2745
2746static void apic_pm_activate(void)
2747{
2748 apic_pm_state.active = 1;
2749}
2750
2751static int __init init_lapic_sysfs(void)
2752{
2753 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2754 if (boot_cpu_has(X86_FEATURE_APIC))
2755 register_syscore_ops(&lapic_syscore_ops);
2756
2757 return 0;
2758}
2759
2760/* local apic needs to resume before other devices access its registers. */
2761core_initcall(init_lapic_sysfs);
2762
2763#else /* CONFIG_PM */
2764
2765static void apic_pm_activate(void) { }
2766
2767#endif /* CONFIG_PM */
2768
2769#ifdef CONFIG_X86_64
2770
2771static int multi_checked;
2772static int multi;
2773
2774static int set_multi(const struct dmi_system_id *d)
2775{
2776 if (multi)
2777 return 0;
2778 pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2779 multi = 1;
2780 return 0;
2781}
2782
2783static const struct dmi_system_id multi_dmi_table[] = {
2784 {
2785 .callback = set_multi,
2786 .ident = "IBM System Summit2",
2787 .matches = {
2788 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2789 DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2790 },
2791 },
2792 {}
2793};
2794
2795static void dmi_check_multi(void)
2796{
2797 if (multi_checked)
2798 return;
2799
2800 dmi_check_system(multi_dmi_table);
2801 multi_checked = 1;
2802}
2803
2804/*
2805 * apic_is_clustered_box() -- Check if we can expect good TSC
2806 *
2807 * Thus far, the major user of this is IBM's Summit2 series:
2808 * Clustered boxes may have unsynced TSC problems if they are
2809 * multi-chassis.
2810 * Use DMI to check them
2811 */
2812int apic_is_clustered_box(void)
2813{
2814 dmi_check_multi();
2815 return multi;
2816}
2817#endif
2818
2819/*
2820 * APIC command line parameters
2821 */
2822static int __init setup_disableapic(char *arg)
2823{
2824 disable_apic = 1;
2825 setup_clear_cpu_cap(X86_FEATURE_APIC);
2826 return 0;
2827}
2828early_param("disableapic", setup_disableapic);
2829
2830/* same as disableapic, for compatibility */
2831static int __init setup_nolapic(char *arg)
2832{
2833 return setup_disableapic(arg);
2834}
2835early_param("nolapic", setup_nolapic);
2836
2837static int __init parse_lapic_timer_c2_ok(char *arg)
2838{
2839 local_apic_timer_c2_ok = 1;
2840 return 0;
2841}
2842early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2843
2844static int __init parse_disable_apic_timer(char *arg)
2845{
2846 disable_apic_timer = 1;
2847 return 0;
2848}
2849early_param("noapictimer", parse_disable_apic_timer);
2850
2851static int __init parse_nolapic_timer(char *arg)
2852{
2853 disable_apic_timer = 1;
2854 return 0;
2855}
2856early_param("nolapic_timer", parse_nolapic_timer);
2857
2858static int __init apic_set_verbosity(char *arg)
2859{
2860 if (!arg) {
2861#ifdef CONFIG_X86_64
2862 skip_ioapic_setup = 0;
2863 return 0;
2864#endif
2865 return -EINVAL;
2866 }
2867
2868 if (strcmp("debug", arg) == 0)
2869 apic_verbosity = APIC_DEBUG;
2870 else if (strcmp("verbose", arg) == 0)
2871 apic_verbosity = APIC_VERBOSE;
2872#ifdef CONFIG_X86_64
2873 else {
2874 pr_warning("APIC Verbosity level %s not recognised"
2875 " use apic=verbose or apic=debug\n", arg);
2876 return -EINVAL;
2877 }
2878#endif
2879
2880 return 0;
2881}
2882early_param("apic", apic_set_verbosity);
2883
2884static int __init lapic_insert_resource(void)
2885{
2886 if (!apic_phys)
2887 return -1;
2888
2889 /* Put local APIC into the resource map. */
2890 lapic_resource.start = apic_phys;
2891 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2892 insert_resource(&iomem_resource, &lapic_resource);
2893
2894 return 0;
2895}
2896
2897/*
2898 * need call insert after e820__reserve_resources()
2899 * that is using request_resource
2900 */
2901late_initcall(lapic_insert_resource);
2902
2903static int __init apic_set_disabled_cpu_apicid(char *arg)
2904{
2905 if (!arg || !get_option(&arg, &disabled_cpu_apicid))
2906 return -EINVAL;
2907
2908 return 0;
2909}
2910early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
2911
2912static int __init apic_set_extnmi(char *arg)
2913{
2914 if (!arg)
2915 return -EINVAL;
2916
2917 if (!strncmp("all", arg, 3))
2918 apic_extnmi = APIC_EXTNMI_ALL;
2919 else if (!strncmp("none", arg, 4))
2920 apic_extnmi = APIC_EXTNMI_NONE;
2921 else if (!strncmp("bsp", arg, 3))
2922 apic_extnmi = APIC_EXTNMI_BSP;
2923 else {
2924 pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
2925 return -EINVAL;
2926 }
2927
2928 return 0;
2929}
2930early_param("apic_extnmi", apic_set_extnmi);