blob: 06c009489892f66f6ecb784ea109f2fee3f84ddd [file] [log] [blame]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001/* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
Olivier Deprez0e641232021-09-23 10:07:05 +02006#include <linux/sched/mm.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007#include <linux/proc_fs.h>
8#include <linux/smp.h>
9#include <linux/init.h>
10#include <linux/notifier.h>
11#include <linux/sched/signal.h>
12#include <linux/sched/hotplug.h>
David Brazdil0f672f62019-12-10 10:32:29 +000013#include <linux/sched/isolation.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000014#include <linux/sched/task.h>
15#include <linux/sched/smt.h>
16#include <linux/unistd.h>
17#include <linux/cpu.h>
18#include <linux/oom.h>
19#include <linux/rcupdate.h>
20#include <linux/export.h>
21#include <linux/bug.h>
22#include <linux/kthread.h>
23#include <linux/stop_machine.h>
24#include <linux/mutex.h>
25#include <linux/gfp.h>
26#include <linux/suspend.h>
27#include <linux/lockdep.h>
28#include <linux/tick.h>
29#include <linux/irq.h>
30#include <linux/nmi.h>
31#include <linux/smpboot.h>
32#include <linux/relay.h>
33#include <linux/slab.h>
34#include <linux/percpu-rwsem.h>
Olivier Deprez0e641232021-09-23 10:07:05 +020035#include <linux/cpuset.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000036
37#include <trace/events/power.h>
38#define CREATE_TRACE_POINTS
39#include <trace/events/cpuhp.h>
40
41#include "smpboot.h"
42
43/**
44 * cpuhp_cpu_state - Per cpu hotplug state storage
45 * @state: The current cpu state
46 * @target: The target state
47 * @thread: Pointer to the hotplug thread
48 * @should_run: Thread should execute
49 * @rollback: Perform a rollback
50 * @single: Single callback invocation
51 * @bringup: Single callback bringup or teardown selector
52 * @cb_state: The state for a single callback (install/uninstall)
53 * @result: Result of the operation
54 * @done_up: Signal completion to the issuer of the task for cpu-up
55 * @done_down: Signal completion to the issuer of the task for cpu-down
56 */
57struct cpuhp_cpu_state {
58 enum cpuhp_state state;
59 enum cpuhp_state target;
60 enum cpuhp_state fail;
61#ifdef CONFIG_SMP
62 struct task_struct *thread;
63 bool should_run;
64 bool rollback;
65 bool single;
66 bool bringup;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000067 struct hlist_node *node;
68 struct hlist_node *last;
69 enum cpuhp_state cb_state;
70 int result;
71 struct completion done_up;
72 struct completion done_down;
73#endif
74};
75
76static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
77 .fail = CPUHP_INVALID,
78};
79
David Brazdil0f672f62019-12-10 10:32:29 +000080#ifdef CONFIG_SMP
81cpumask_t cpus_booted_once_mask;
82#endif
83
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000084#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
85static struct lockdep_map cpuhp_state_up_map =
86 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
87static struct lockdep_map cpuhp_state_down_map =
88 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
89
90
91static inline void cpuhp_lock_acquire(bool bringup)
92{
93 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
94}
95
96static inline void cpuhp_lock_release(bool bringup)
97{
98 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
99}
100#else
101
102static inline void cpuhp_lock_acquire(bool bringup) { }
103static inline void cpuhp_lock_release(bool bringup) { }
104
105#endif
106
107/**
108 * cpuhp_step - Hotplug state machine step
109 * @name: Name of the step
110 * @startup: Startup function of the step
111 * @teardown: Teardown function of the step
112 * @cant_stop: Bringup/teardown can't be stopped at this step
113 */
114struct cpuhp_step {
115 const char *name;
116 union {
117 int (*single)(unsigned int cpu);
118 int (*multi)(unsigned int cpu,
119 struct hlist_node *node);
120 } startup;
121 union {
122 int (*single)(unsigned int cpu);
123 int (*multi)(unsigned int cpu,
124 struct hlist_node *node);
125 } teardown;
126 struct hlist_head list;
127 bool cant_stop;
128 bool multi_instance;
129};
130
131static DEFINE_MUTEX(cpuhp_state_mutex);
132static struct cpuhp_step cpuhp_hp_states[];
133
134static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
135{
136 return cpuhp_hp_states + state;
137}
138
139/**
140 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
141 * @cpu: The cpu for which the callback should be invoked
142 * @state: The state to do callbacks for
143 * @bringup: True if the bringup callback should be invoked
144 * @node: For multi-instance, do a single entry callback for install/remove
145 * @lastp: For multi-instance rollback, remember how far we got
146 *
147 * Called from cpu hotplug and from the state register machinery.
148 */
149static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
150 bool bringup, struct hlist_node *node,
151 struct hlist_node **lastp)
152{
153 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
154 struct cpuhp_step *step = cpuhp_get_step(state);
155 int (*cbm)(unsigned int cpu, struct hlist_node *node);
156 int (*cb)(unsigned int cpu);
157 int ret, cnt;
158
159 if (st->fail == state) {
160 st->fail = CPUHP_INVALID;
161
162 if (!(bringup ? step->startup.single : step->teardown.single))
163 return 0;
164
165 return -EAGAIN;
166 }
167
168 if (!step->multi_instance) {
169 WARN_ON_ONCE(lastp && *lastp);
170 cb = bringup ? step->startup.single : step->teardown.single;
171 if (!cb)
172 return 0;
173 trace_cpuhp_enter(cpu, st->target, state, cb);
174 ret = cb(cpu);
175 trace_cpuhp_exit(cpu, st->state, state, ret);
176 return ret;
177 }
178 cbm = bringup ? step->startup.multi : step->teardown.multi;
179 if (!cbm)
180 return 0;
181
182 /* Single invocation for instance add/remove */
183 if (node) {
184 WARN_ON_ONCE(lastp && *lastp);
185 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
186 ret = cbm(cpu, node);
187 trace_cpuhp_exit(cpu, st->state, state, ret);
188 return ret;
189 }
190
191 /* State transition. Invoke on all instances */
192 cnt = 0;
193 hlist_for_each(node, &step->list) {
194 if (lastp && node == *lastp)
195 break;
196
197 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
198 ret = cbm(cpu, node);
199 trace_cpuhp_exit(cpu, st->state, state, ret);
200 if (ret) {
201 if (!lastp)
202 goto err;
203
204 *lastp = node;
205 return ret;
206 }
207 cnt++;
208 }
209 if (lastp)
210 *lastp = NULL;
211 return 0;
212err:
213 /* Rollback the instances if one failed */
214 cbm = !bringup ? step->startup.multi : step->teardown.multi;
215 if (!cbm)
216 return ret;
217
218 hlist_for_each(node, &step->list) {
219 if (!cnt--)
220 break;
221
222 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
223 ret = cbm(cpu, node);
224 trace_cpuhp_exit(cpu, st->state, state, ret);
225 /*
226 * Rollback must not fail,
227 */
228 WARN_ON_ONCE(ret);
229 }
230 return ret;
231}
232
233#ifdef CONFIG_SMP
234static bool cpuhp_is_ap_state(enum cpuhp_state state)
235{
236 /*
237 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
238 * purposes as that state is handled explicitly in cpu_down.
239 */
240 return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
241}
242
243static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
244{
245 struct completion *done = bringup ? &st->done_up : &st->done_down;
246 wait_for_completion(done);
247}
248
249static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
250{
251 struct completion *done = bringup ? &st->done_up : &st->done_down;
252 complete(done);
253}
254
255/*
256 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
257 */
258static bool cpuhp_is_atomic_state(enum cpuhp_state state)
259{
260 return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
261}
262
263/* Serializes the updates to cpu_online_mask, cpu_present_mask */
264static DEFINE_MUTEX(cpu_add_remove_lock);
265bool cpuhp_tasks_frozen;
266EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
267
268/*
269 * The following two APIs (cpu_maps_update_begin/done) must be used when
270 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
271 */
272void cpu_maps_update_begin(void)
273{
274 mutex_lock(&cpu_add_remove_lock);
275}
276
277void cpu_maps_update_done(void)
278{
279 mutex_unlock(&cpu_add_remove_lock);
280}
281
282/*
283 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
284 * Should always be manipulated under cpu_add_remove_lock
285 */
286static int cpu_hotplug_disabled;
287
288#ifdef CONFIG_HOTPLUG_CPU
289
290DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
291
292void cpus_read_lock(void)
293{
294 percpu_down_read(&cpu_hotplug_lock);
295}
296EXPORT_SYMBOL_GPL(cpus_read_lock);
297
298int cpus_read_trylock(void)
299{
300 return percpu_down_read_trylock(&cpu_hotplug_lock);
301}
302EXPORT_SYMBOL_GPL(cpus_read_trylock);
303
304void cpus_read_unlock(void)
305{
306 percpu_up_read(&cpu_hotplug_lock);
307}
308EXPORT_SYMBOL_GPL(cpus_read_unlock);
309
310void cpus_write_lock(void)
311{
312 percpu_down_write(&cpu_hotplug_lock);
313}
314
315void cpus_write_unlock(void)
316{
317 percpu_up_write(&cpu_hotplug_lock);
318}
319
320void lockdep_assert_cpus_held(void)
321{
David Brazdil0f672f62019-12-10 10:32:29 +0000322 /*
323 * We can't have hotplug operations before userspace starts running,
324 * and some init codepaths will knowingly not take the hotplug lock.
325 * This is all valid, so mute lockdep until it makes sense to report
326 * unheld locks.
327 */
328 if (system_state < SYSTEM_RUNNING)
329 return;
330
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000331 percpu_rwsem_assert_held(&cpu_hotplug_lock);
332}
333
David Brazdil0f672f62019-12-10 10:32:29 +0000334static void lockdep_acquire_cpus_lock(void)
335{
336 rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
337}
338
339static void lockdep_release_cpus_lock(void)
340{
341 rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
342}
343
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000344/*
345 * Wait for currently running CPU hotplug operations to complete (if any) and
346 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
347 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
348 * hotplug path before performing hotplug operations. So acquiring that lock
349 * guarantees mutual exclusion from any currently running hotplug operations.
350 */
351void cpu_hotplug_disable(void)
352{
353 cpu_maps_update_begin();
354 cpu_hotplug_disabled++;
355 cpu_maps_update_done();
356}
357EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
358
359static void __cpu_hotplug_enable(void)
360{
361 if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
362 return;
363 cpu_hotplug_disabled--;
364}
365
366void cpu_hotplug_enable(void)
367{
368 cpu_maps_update_begin();
369 __cpu_hotplug_enable();
370 cpu_maps_update_done();
371}
372EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
David Brazdil0f672f62019-12-10 10:32:29 +0000373
374#else
375
376static void lockdep_acquire_cpus_lock(void)
377{
378}
379
380static void lockdep_release_cpus_lock(void)
381{
382}
383
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000384#endif /* CONFIG_HOTPLUG_CPU */
385
386/*
387 * Architectures that need SMT-specific errata handling during SMT hotplug
388 * should override this.
389 */
390void __weak arch_smt_update(void) { }
391
392#ifdef CONFIG_HOTPLUG_SMT
393enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000394
395void __init cpu_smt_disable(bool force)
396{
David Brazdil0f672f62019-12-10 10:32:29 +0000397 if (!cpu_smt_possible())
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000398 return;
399
400 if (force) {
401 pr_info("SMT: Force disabled\n");
402 cpu_smt_control = CPU_SMT_FORCE_DISABLED;
403 } else {
David Brazdil0f672f62019-12-10 10:32:29 +0000404 pr_info("SMT: disabled\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000405 cpu_smt_control = CPU_SMT_DISABLED;
406 }
407}
408
409/*
410 * The decision whether SMT is supported can only be done after the full
David Brazdil0f672f62019-12-10 10:32:29 +0000411 * CPU identification. Called from architecture code.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000412 */
413void __init cpu_smt_check_topology(void)
414{
David Brazdil0f672f62019-12-10 10:32:29 +0000415 if (!topology_smt_supported())
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000416 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
417}
418
419static int __init smt_cmdline_disable(char *str)
420{
421 cpu_smt_disable(str && !strcmp(str, "force"));
422 return 0;
423}
424early_param("nosmt", smt_cmdline_disable);
425
426static inline bool cpu_smt_allowed(unsigned int cpu)
427{
David Brazdil0f672f62019-12-10 10:32:29 +0000428 if (cpu_smt_control == CPU_SMT_ENABLED)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000429 return true;
430
David Brazdil0f672f62019-12-10 10:32:29 +0000431 if (topology_is_primary_thread(cpu))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000432 return true;
433
434 /*
435 * On x86 it's required to boot all logical CPUs at least once so
436 * that the init code can get a chance to set CR4.MCE on each
437 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
438 * core will shutdown the machine.
439 */
David Brazdil0f672f62019-12-10 10:32:29 +0000440 return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000441}
David Brazdil0f672f62019-12-10 10:32:29 +0000442
443/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
444bool cpu_smt_possible(void)
445{
446 return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
447 cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
448}
449EXPORT_SYMBOL_GPL(cpu_smt_possible);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000450#else
451static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
452#endif
453
454static inline enum cpuhp_state
455cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
456{
457 enum cpuhp_state prev_state = st->state;
458
459 st->rollback = false;
460 st->last = NULL;
461
462 st->target = target;
463 st->single = false;
464 st->bringup = st->state < target;
465
466 return prev_state;
467}
468
469static inline void
470cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
471{
472 st->rollback = true;
473
474 /*
475 * If we have st->last we need to undo partial multi_instance of this
476 * state first. Otherwise start undo at the previous state.
477 */
478 if (!st->last) {
479 if (st->bringup)
480 st->state--;
481 else
482 st->state++;
483 }
484
485 st->target = prev_state;
486 st->bringup = !st->bringup;
487}
488
489/* Regular hotplug invocation of the AP hotplug thread */
490static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
491{
492 if (!st->single && st->state == st->target)
493 return;
494
495 st->result = 0;
496 /*
497 * Make sure the above stores are visible before should_run becomes
498 * true. Paired with the mb() above in cpuhp_thread_fun()
499 */
500 smp_mb();
501 st->should_run = true;
502 wake_up_process(st->thread);
503 wait_for_ap_thread(st, st->bringup);
504}
505
506static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
507{
508 enum cpuhp_state prev_state;
509 int ret;
510
511 prev_state = cpuhp_set_state(st, target);
512 __cpuhp_kick_ap(st);
513 if ((ret = st->result)) {
514 cpuhp_reset_state(st, prev_state);
515 __cpuhp_kick_ap(st);
516 }
517
518 return ret;
519}
520
521static int bringup_wait_for_ap(unsigned int cpu)
522{
523 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
524
525 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
526 wait_for_ap_thread(st, true);
527 if (WARN_ON_ONCE((!cpu_online(cpu))))
528 return -ECANCELED;
529
Olivier Deprez0e641232021-09-23 10:07:05 +0200530 /* Unpark the hotplug thread of the target cpu */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000531 kthread_unpark(st->thread);
532
533 /*
534 * SMT soft disabling on X86 requires to bring the CPU out of the
535 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
David Brazdil0f672f62019-12-10 10:32:29 +0000536 * CPU marked itself as booted_once in notify_cpu_starting() so the
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000537 * cpu_smt_allowed() check will now return false if this is not the
538 * primary sibling.
539 */
540 if (!cpu_smt_allowed(cpu))
541 return -ECANCELED;
542
543 if (st->target <= CPUHP_AP_ONLINE_IDLE)
544 return 0;
545
546 return cpuhp_kick_ap(st, st->target);
547}
548
549static int bringup_cpu(unsigned int cpu)
550{
551 struct task_struct *idle = idle_thread_get(cpu);
552 int ret;
553
554 /*
555 * Some architectures have to walk the irq descriptors to
556 * setup the vector space for the cpu which comes online.
557 * Prevent irq alloc/free across the bringup.
558 */
559 irq_lock_sparse();
560
561 /* Arch-specific enabling code. */
562 ret = __cpu_up(cpu, idle);
563 irq_unlock_sparse();
564 if (ret)
565 return ret;
566 return bringup_wait_for_ap(cpu);
567}
568
Olivier Deprez0e641232021-09-23 10:07:05 +0200569static int finish_cpu(unsigned int cpu)
570{
571 struct task_struct *idle = idle_thread_get(cpu);
572 struct mm_struct *mm = idle->active_mm;
573
574 /*
575 * idle_task_exit() will have switched to &init_mm, now
576 * clean up any remaining active_mm state.
577 */
578 if (mm != &init_mm)
579 idle->active_mm = &init_mm;
580 mmdrop(mm);
581 return 0;
582}
583
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000584/*
585 * Hotplug state machine related functions
586 */
587
588static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
589{
590 for (st->state--; st->state > st->target; st->state--)
591 cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
592}
593
David Brazdil0f672f62019-12-10 10:32:29 +0000594static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
595{
596 if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
597 return true;
598 /*
599 * When CPU hotplug is disabled, then taking the CPU down is not
600 * possible because takedown_cpu() and the architecture and
601 * subsystem specific mechanisms are not available. So the CPU
602 * which would be completely unplugged again needs to stay around
603 * in the current state.
604 */
605 return st->state <= CPUHP_BRINGUP_CPU;
606}
607
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000608static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
609 enum cpuhp_state target)
610{
611 enum cpuhp_state prev_state = st->state;
612 int ret = 0;
613
614 while (st->state < target) {
615 st->state++;
616 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
617 if (ret) {
David Brazdil0f672f62019-12-10 10:32:29 +0000618 if (can_rollback_cpu(st)) {
619 st->target = prev_state;
620 undo_cpu_up(cpu, st);
621 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000622 break;
623 }
624 }
625 return ret;
626}
627
628/*
629 * The cpu hotplug threads manage the bringup and teardown of the cpus
630 */
631static void cpuhp_create(unsigned int cpu)
632{
633 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
634
635 init_completion(&st->done_up);
636 init_completion(&st->done_down);
637}
638
639static int cpuhp_should_run(unsigned int cpu)
640{
641 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
642
643 return st->should_run;
644}
645
646/*
647 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
648 * callbacks when a state gets [un]installed at runtime.
649 *
650 * Each invocation of this function by the smpboot thread does a single AP
651 * state callback.
652 *
653 * It has 3 modes of operation:
654 * - single: runs st->cb_state
655 * - up: runs ++st->state, while st->state < st->target
656 * - down: runs st->state--, while st->state > st->target
657 *
658 * When complete or on error, should_run is cleared and the completion is fired.
659 */
660static void cpuhp_thread_fun(unsigned int cpu)
661{
662 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
663 bool bringup = st->bringup;
664 enum cpuhp_state state;
665
666 if (WARN_ON_ONCE(!st->should_run))
667 return;
668
669 /*
670 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
671 * that if we see ->should_run we also see the rest of the state.
672 */
673 smp_mb();
674
David Brazdil0f672f62019-12-10 10:32:29 +0000675 /*
676 * The BP holds the hotplug lock, but we're now running on the AP,
677 * ensure that anybody asserting the lock is held, will actually find
678 * it so.
679 */
680 lockdep_acquire_cpus_lock();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000681 cpuhp_lock_acquire(bringup);
682
683 if (st->single) {
684 state = st->cb_state;
685 st->should_run = false;
686 } else {
687 if (bringup) {
688 st->state++;
689 state = st->state;
690 st->should_run = (st->state < st->target);
691 WARN_ON_ONCE(st->state > st->target);
692 } else {
693 state = st->state;
694 st->state--;
695 st->should_run = (st->state > st->target);
696 WARN_ON_ONCE(st->state < st->target);
697 }
698 }
699
700 WARN_ON_ONCE(!cpuhp_is_ap_state(state));
701
702 if (cpuhp_is_atomic_state(state)) {
703 local_irq_disable();
704 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
705 local_irq_enable();
706
707 /*
708 * STARTING/DYING must not fail!
709 */
710 WARN_ON_ONCE(st->result);
711 } else {
712 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
713 }
714
715 if (st->result) {
716 /*
717 * If we fail on a rollback, we're up a creek without no
718 * paddle, no way forward, no way back. We loose, thanks for
719 * playing.
720 */
721 WARN_ON_ONCE(st->rollback);
722 st->should_run = false;
723 }
724
725 cpuhp_lock_release(bringup);
David Brazdil0f672f62019-12-10 10:32:29 +0000726 lockdep_release_cpus_lock();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000727
728 if (!st->should_run)
729 complete_ap_thread(st, bringup);
730}
731
732/* Invoke a single callback on a remote cpu */
733static int
734cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
735 struct hlist_node *node)
736{
737 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
738 int ret;
739
740 if (!cpu_online(cpu))
741 return 0;
742
743 cpuhp_lock_acquire(false);
744 cpuhp_lock_release(false);
745
746 cpuhp_lock_acquire(true);
747 cpuhp_lock_release(true);
748
749 /*
750 * If we are up and running, use the hotplug thread. For early calls
751 * we invoke the thread function directly.
752 */
753 if (!st->thread)
754 return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
755
756 st->rollback = false;
757 st->last = NULL;
758
759 st->node = node;
760 st->bringup = bringup;
761 st->cb_state = state;
762 st->single = true;
763
764 __cpuhp_kick_ap(st);
765
766 /*
767 * If we failed and did a partial, do a rollback.
768 */
769 if ((ret = st->result) && st->last) {
770 st->rollback = true;
771 st->bringup = !bringup;
772
773 __cpuhp_kick_ap(st);
774 }
775
776 /*
777 * Clean up the leftovers so the next hotplug operation wont use stale
778 * data.
779 */
780 st->node = st->last = NULL;
781 return ret;
782}
783
784static int cpuhp_kick_ap_work(unsigned int cpu)
785{
786 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
787 enum cpuhp_state prev_state = st->state;
788 int ret;
789
790 cpuhp_lock_acquire(false);
791 cpuhp_lock_release(false);
792
793 cpuhp_lock_acquire(true);
794 cpuhp_lock_release(true);
795
796 trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
797 ret = cpuhp_kick_ap(st, st->target);
798 trace_cpuhp_exit(cpu, st->state, prev_state, ret);
799
800 return ret;
801}
802
803static struct smp_hotplug_thread cpuhp_threads = {
804 .store = &cpuhp_state.thread,
805 .create = &cpuhp_create,
806 .thread_should_run = cpuhp_should_run,
807 .thread_fn = cpuhp_thread_fun,
808 .thread_comm = "cpuhp/%u",
809 .selfparking = true,
810};
811
812void __init cpuhp_threads_init(void)
813{
814 BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
815 kthread_unpark(this_cpu_read(cpuhp_state.thread));
816}
817
Olivier Deprez0e641232021-09-23 10:07:05 +0200818/*
819 *
820 * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
821 * protected region.
822 *
823 * The operation is still serialized against concurrent CPU hotplug via
824 * cpu_add_remove_lock, i.e. CPU map protection. But it is _not_
825 * serialized against other hotplug related activity like adding or
826 * removing of state callbacks and state instances, which invoke either the
827 * startup or the teardown callback of the affected state.
828 *
829 * This is required for subsystems which are unfixable vs. CPU hotplug and
830 * evade lock inversion problems by scheduling work which has to be
831 * completed _before_ cpu_up()/_cpu_down() returns.
832 *
833 * Don't even think about adding anything to this for any new code or even
834 * drivers. It's only purpose is to keep existing lock order trainwrecks
835 * working.
836 *
837 * For cpu_down() there might be valid reasons to finish cleanups which are
838 * not required to be done under cpu_hotplug_lock, but that's a different
839 * story and would be not invoked via this.
840 */
841static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
842{
843 /*
844 * cpusets delegate hotplug operations to a worker to "solve" the
845 * lock order problems. Wait for the worker, but only if tasks are
846 * _not_ frozen (suspend, hibernate) as that would wait forever.
847 *
848 * The wait is required because otherwise the hotplug operation
849 * returns with inconsistent state, which could even be observed in
850 * user space when a new CPU is brought up. The CPU plug uevent
851 * would be delivered and user space reacting on it would fail to
852 * move tasks to the newly plugged CPU up to the point where the
853 * work has finished because up to that point the newly plugged CPU
854 * is not assignable in cpusets/cgroups. On unplug that's not
855 * necessarily a visible issue, but it is still inconsistent state,
856 * which is the real problem which needs to be "fixed". This can't
857 * prevent the transient state between scheduling the work and
858 * returning from waiting for it.
859 */
860 if (!tasks_frozen)
861 cpuset_wait_for_hotplug();
862}
863
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000864#ifdef CONFIG_HOTPLUG_CPU
Olivier Deprez0e641232021-09-23 10:07:05 +0200865#ifndef arch_clear_mm_cpumask_cpu
866#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
867#endif
868
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000869/**
870 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
871 * @cpu: a CPU id
872 *
873 * This function walks all processes, finds a valid mm struct for each one and
874 * then clears a corresponding bit in mm's cpumask. While this all sounds
875 * trivial, there are various non-obvious corner cases, which this function
876 * tries to solve in a safe manner.
877 *
878 * Also note that the function uses a somewhat relaxed locking scheme, so it may
879 * be called only for an already offlined CPU.
880 */
881void clear_tasks_mm_cpumask(int cpu)
882{
883 struct task_struct *p;
884
885 /*
886 * This function is called after the cpu is taken down and marked
887 * offline, so its not like new tasks will ever get this cpu set in
888 * their mm mask. -- Peter Zijlstra
889 * Thus, we may use rcu_read_lock() here, instead of grabbing
890 * full-fledged tasklist_lock.
891 */
892 WARN_ON(cpu_online(cpu));
893 rcu_read_lock();
894 for_each_process(p) {
895 struct task_struct *t;
896
897 /*
898 * Main thread might exit, but other threads may still have
899 * a valid mm. Find one.
900 */
901 t = find_lock_task_mm(p);
902 if (!t)
903 continue;
Olivier Deprez0e641232021-09-23 10:07:05 +0200904 arch_clear_mm_cpumask_cpu(cpu, t->mm);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000905 task_unlock(t);
906 }
907 rcu_read_unlock();
908}
909
910/* Take this CPU down. */
911static int take_cpu_down(void *_param)
912{
913 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
914 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
915 int err, cpu = smp_processor_id();
916 int ret;
917
918 /* Ensure this CPU doesn't handle any more interrupts. */
919 err = __cpu_disable();
920 if (err < 0)
921 return err;
922
923 /*
924 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
925 * do this step again.
926 */
927 WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
928 st->state--;
929 /* Invoke the former CPU_DYING callbacks */
930 for (; st->state > target; st->state--) {
931 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
932 /*
933 * DYING must not fail!
934 */
935 WARN_ON_ONCE(ret);
936 }
937
938 /* Give up timekeeping duties */
939 tick_handover_do_timer();
David Brazdil0f672f62019-12-10 10:32:29 +0000940 /* Remove CPU from timer broadcasting */
941 tick_offline_cpu(cpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000942 /* Park the stopper thread */
943 stop_machine_park(cpu);
944 return 0;
945}
946
947static int takedown_cpu(unsigned int cpu)
948{
949 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
950 int err;
951
952 /* Park the smpboot threads */
953 kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
954
955 /*
956 * Prevent irq alloc/free while the dying cpu reorganizes the
957 * interrupt affinities.
958 */
959 irq_lock_sparse();
960
961 /*
962 * So now all preempt/rcu users must observe !cpu_active().
963 */
964 err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
965 if (err) {
966 /* CPU refused to die */
967 irq_unlock_sparse();
968 /* Unpark the hotplug thread so we can rollback there */
969 kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
970 return err;
971 }
972 BUG_ON(cpu_online(cpu));
973
974 /*
975 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
976 * all runnable tasks from the CPU, there's only the idle task left now
977 * that the migration thread is done doing the stop_machine thing.
978 *
979 * Wait for the stop thread to go away.
980 */
981 wait_for_ap_thread(st, false);
982 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
983
984 /* Interrupts are moved away from the dying cpu, reenable alloc/free */
985 irq_unlock_sparse();
986
987 hotplug_cpu__broadcast_tick_pull(cpu);
988 /* This actually kills the CPU. */
989 __cpu_die(cpu);
990
991 tick_cleanup_dead_cpu(cpu);
992 rcutree_migrate_callbacks(cpu);
993 return 0;
994}
995
996static void cpuhp_complete_idle_dead(void *arg)
997{
998 struct cpuhp_cpu_state *st = arg;
999
1000 complete_ap_thread(st, false);
1001}
1002
1003void cpuhp_report_idle_dead(void)
1004{
1005 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1006
1007 BUG_ON(st->state != CPUHP_AP_OFFLINE);
1008 rcu_report_dead(smp_processor_id());
1009 st->state = CPUHP_AP_IDLE_DEAD;
1010 /*
1011 * We cannot call complete after rcu_report_dead() so we delegate it
1012 * to an online cpu.
1013 */
1014 smp_call_function_single(cpumask_first(cpu_online_mask),
1015 cpuhp_complete_idle_dead, st, 0);
1016}
1017
1018static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
1019{
1020 for (st->state++; st->state < st->target; st->state++)
1021 cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1022}
1023
1024static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1025 enum cpuhp_state target)
1026{
1027 enum cpuhp_state prev_state = st->state;
1028 int ret = 0;
1029
1030 for (; st->state > target; st->state--) {
1031 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
1032 if (ret) {
1033 st->target = prev_state;
1034 if (st->state < prev_state)
1035 undo_cpu_down(cpu, st);
1036 break;
1037 }
1038 }
1039 return ret;
1040}
1041
1042/* Requires cpu_add_remove_lock to be held */
1043static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1044 enum cpuhp_state target)
1045{
1046 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1047 int prev_state, ret = 0;
1048
1049 if (num_online_cpus() == 1)
1050 return -EBUSY;
1051
1052 if (!cpu_present(cpu))
1053 return -EINVAL;
1054
1055 cpus_write_lock();
1056
1057 cpuhp_tasks_frozen = tasks_frozen;
1058
1059 prev_state = cpuhp_set_state(st, target);
1060 /*
1061 * If the current CPU state is in the range of the AP hotplug thread,
1062 * then we need to kick the thread.
1063 */
1064 if (st->state > CPUHP_TEARDOWN_CPU) {
1065 st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1066 ret = cpuhp_kick_ap_work(cpu);
1067 /*
1068 * The AP side has done the error rollback already. Just
1069 * return the error code..
1070 */
1071 if (ret)
1072 goto out;
1073
1074 /*
1075 * We might have stopped still in the range of the AP hotplug
1076 * thread. Nothing to do anymore.
1077 */
1078 if (st->state > CPUHP_TEARDOWN_CPU)
1079 goto out;
1080
1081 st->target = target;
1082 }
1083 /*
1084 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1085 * to do the further cleanups.
1086 */
1087 ret = cpuhp_down_callbacks(cpu, st, target);
1088 if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
1089 cpuhp_reset_state(st, prev_state);
1090 __cpuhp_kick_ap(st);
1091 }
1092
1093out:
1094 cpus_write_unlock();
1095 /*
1096 * Do post unplug cleanup. This is still protected against
1097 * concurrent CPU hotplug via cpu_add_remove_lock.
1098 */
1099 lockup_detector_cleanup();
1100 arch_smt_update();
Olivier Deprez0e641232021-09-23 10:07:05 +02001101 cpu_up_down_serialize_trainwrecks(tasks_frozen);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001102 return ret;
1103}
1104
1105static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1106{
1107 if (cpu_hotplug_disabled)
1108 return -EBUSY;
1109 return _cpu_down(cpu, 0, target);
1110}
1111
1112static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
1113{
1114 int err;
1115
1116 cpu_maps_update_begin();
1117 err = cpu_down_maps_locked(cpu, target);
1118 cpu_maps_update_done();
1119 return err;
1120}
1121
1122int cpu_down(unsigned int cpu)
1123{
1124 return do_cpu_down(cpu, CPUHP_OFFLINE);
1125}
1126EXPORT_SYMBOL(cpu_down);
1127
1128#else
1129#define takedown_cpu NULL
1130#endif /*CONFIG_HOTPLUG_CPU*/
1131
1132/**
1133 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1134 * @cpu: cpu that just started
1135 *
1136 * It must be called by the arch code on the new cpu, before the new cpu
1137 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1138 */
1139void notify_cpu_starting(unsigned int cpu)
1140{
1141 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1142 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1143 int ret;
1144
1145 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
David Brazdil0f672f62019-12-10 10:32:29 +00001146 cpumask_set_cpu(cpu, &cpus_booted_once_mask);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001147 while (st->state < target) {
1148 st->state++;
1149 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1150 /*
1151 * STARTING must not fail!
1152 */
1153 WARN_ON_ONCE(ret);
1154 }
1155}
1156
1157/*
1158 * Called from the idle task. Wake up the controlling task which brings the
Olivier Deprez0e641232021-09-23 10:07:05 +02001159 * hotplug thread of the upcoming CPU up and then delegates the rest of the
1160 * online bringup to the hotplug thread.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001161 */
1162void cpuhp_online_idle(enum cpuhp_state state)
1163{
1164 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1165
1166 /* Happens for the boot cpu */
1167 if (state != CPUHP_AP_ONLINE_IDLE)
1168 return;
1169
Olivier Deprez0e641232021-09-23 10:07:05 +02001170 /*
1171 * Unpart the stopper thread before we start the idle loop (and start
1172 * scheduling); this ensures the stopper task is always available.
1173 */
1174 stop_machine_unpark(smp_processor_id());
1175
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001176 st->state = CPUHP_AP_ONLINE_IDLE;
1177 complete_ap_thread(st, true);
1178}
1179
1180/* Requires cpu_add_remove_lock to be held */
1181static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1182{
1183 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1184 struct task_struct *idle;
1185 int ret = 0;
1186
1187 cpus_write_lock();
1188
1189 if (!cpu_present(cpu)) {
1190 ret = -EINVAL;
1191 goto out;
1192 }
1193
1194 /*
1195 * The caller of do_cpu_up might have raced with another
1196 * caller. Ignore it for now.
1197 */
1198 if (st->state >= target)
1199 goto out;
1200
1201 if (st->state == CPUHP_OFFLINE) {
1202 /* Let it fail before we try to bring the cpu up */
1203 idle = idle_thread_get(cpu);
1204 if (IS_ERR(idle)) {
1205 ret = PTR_ERR(idle);
1206 goto out;
1207 }
1208 }
1209
1210 cpuhp_tasks_frozen = tasks_frozen;
1211
1212 cpuhp_set_state(st, target);
1213 /*
1214 * If the current CPU state is in the range of the AP hotplug thread,
1215 * then we need to kick the thread once more.
1216 */
1217 if (st->state > CPUHP_BRINGUP_CPU) {
1218 ret = cpuhp_kick_ap_work(cpu);
1219 /*
1220 * The AP side has done the error rollback already. Just
1221 * return the error code..
1222 */
1223 if (ret)
1224 goto out;
1225 }
1226
1227 /*
1228 * Try to reach the target state. We max out on the BP at
1229 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1230 * responsible for bringing it up to the target state.
1231 */
1232 target = min((int)target, CPUHP_BRINGUP_CPU);
1233 ret = cpuhp_up_callbacks(cpu, st, target);
1234out:
1235 cpus_write_unlock();
1236 arch_smt_update();
Olivier Deprez0e641232021-09-23 10:07:05 +02001237 cpu_up_down_serialize_trainwrecks(tasks_frozen);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001238 return ret;
1239}
1240
1241static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1242{
1243 int err = 0;
1244
1245 if (!cpu_possible(cpu)) {
1246 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1247 cpu);
1248#if defined(CONFIG_IA64)
1249 pr_err("please check additional_cpus= boot parameter\n");
1250#endif
1251 return -EINVAL;
1252 }
1253
1254 err = try_online_node(cpu_to_node(cpu));
1255 if (err)
1256 return err;
1257
1258 cpu_maps_update_begin();
1259
1260 if (cpu_hotplug_disabled) {
1261 err = -EBUSY;
1262 goto out;
1263 }
1264 if (!cpu_smt_allowed(cpu)) {
1265 err = -EPERM;
1266 goto out;
1267 }
1268
1269 err = _cpu_up(cpu, 0, target);
1270out:
1271 cpu_maps_update_done();
1272 return err;
1273}
1274
1275int cpu_up(unsigned int cpu)
1276{
1277 return do_cpu_up(cpu, CPUHP_ONLINE);
1278}
1279EXPORT_SYMBOL_GPL(cpu_up);
1280
1281#ifdef CONFIG_PM_SLEEP_SMP
1282static cpumask_var_t frozen_cpus;
1283
Olivier Deprez0e641232021-09-23 10:07:05 +02001284int __freeze_secondary_cpus(int primary, bool suspend)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001285{
1286 int cpu, error = 0;
1287
1288 cpu_maps_update_begin();
David Brazdil0f672f62019-12-10 10:32:29 +00001289 if (primary == -1) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001290 primary = cpumask_first(cpu_online_mask);
David Brazdil0f672f62019-12-10 10:32:29 +00001291 if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
1292 primary = housekeeping_any_cpu(HK_FLAG_TIMER);
1293 } else {
1294 if (!cpu_online(primary))
1295 primary = cpumask_first(cpu_online_mask);
1296 }
1297
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001298 /*
1299 * We take down all of the non-boot CPUs in one shot to avoid races
1300 * with the userspace trying to use the CPU hotplug at the same time
1301 */
1302 cpumask_clear(frozen_cpus);
1303
1304 pr_info("Disabling non-boot CPUs ...\n");
1305 for_each_online_cpu(cpu) {
1306 if (cpu == primary)
1307 continue;
David Brazdil0f672f62019-12-10 10:32:29 +00001308
Olivier Deprez0e641232021-09-23 10:07:05 +02001309 if (suspend && pm_wakeup_pending()) {
David Brazdil0f672f62019-12-10 10:32:29 +00001310 pr_info("Wakeup pending. Abort CPU freeze\n");
1311 error = -EBUSY;
1312 break;
1313 }
1314
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001315 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1316 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1317 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1318 if (!error)
1319 cpumask_set_cpu(cpu, frozen_cpus);
1320 else {
1321 pr_err("Error taking CPU%d down: %d\n", cpu, error);
1322 break;
1323 }
1324 }
1325
1326 if (!error)
1327 BUG_ON(num_online_cpus() > 1);
1328 else
1329 pr_err("Non-boot CPUs are not disabled\n");
1330
1331 /*
1332 * Make sure the CPUs won't be enabled by someone else. We need to do
1333 * this even in case of failure as all disable_nonboot_cpus() users are
1334 * supposed to do enable_nonboot_cpus() on the failure path.
1335 */
1336 cpu_hotplug_disabled++;
1337
1338 cpu_maps_update_done();
1339 return error;
1340}
1341
1342void __weak arch_enable_nonboot_cpus_begin(void)
1343{
1344}
1345
1346void __weak arch_enable_nonboot_cpus_end(void)
1347{
1348}
1349
1350void enable_nonboot_cpus(void)
1351{
1352 int cpu, error;
1353
1354 /* Allow everyone to use the CPU hotplug again */
1355 cpu_maps_update_begin();
1356 __cpu_hotplug_enable();
1357 if (cpumask_empty(frozen_cpus))
1358 goto out;
1359
1360 pr_info("Enabling non-boot CPUs ...\n");
1361
1362 arch_enable_nonboot_cpus_begin();
1363
1364 for_each_cpu(cpu, frozen_cpus) {
1365 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1366 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1367 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1368 if (!error) {
1369 pr_info("CPU%d is up\n", cpu);
1370 continue;
1371 }
1372 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1373 }
1374
1375 arch_enable_nonboot_cpus_end();
1376
1377 cpumask_clear(frozen_cpus);
1378out:
1379 cpu_maps_update_done();
1380}
1381
1382static int __init alloc_frozen_cpus(void)
1383{
1384 if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1385 return -ENOMEM;
1386 return 0;
1387}
1388core_initcall(alloc_frozen_cpus);
1389
1390/*
1391 * When callbacks for CPU hotplug notifications are being executed, we must
1392 * ensure that the state of the system with respect to the tasks being frozen
1393 * or not, as reported by the notification, remains unchanged *throughout the
1394 * duration* of the execution of the callbacks.
1395 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1396 *
1397 * This synchronization is implemented by mutually excluding regular CPU
1398 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1399 * Hibernate notifications.
1400 */
1401static int
1402cpu_hotplug_pm_callback(struct notifier_block *nb,
1403 unsigned long action, void *ptr)
1404{
1405 switch (action) {
1406
1407 case PM_SUSPEND_PREPARE:
1408 case PM_HIBERNATION_PREPARE:
1409 cpu_hotplug_disable();
1410 break;
1411
1412 case PM_POST_SUSPEND:
1413 case PM_POST_HIBERNATION:
1414 cpu_hotplug_enable();
1415 break;
1416
1417 default:
1418 return NOTIFY_DONE;
1419 }
1420
1421 return NOTIFY_OK;
1422}
1423
1424
1425static int __init cpu_hotplug_pm_sync_init(void)
1426{
1427 /*
1428 * cpu_hotplug_pm_callback has higher priority than x86
1429 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1430 * to disable cpu hotplug to avoid cpu hotplug race.
1431 */
1432 pm_notifier(cpu_hotplug_pm_callback, 0);
1433 return 0;
1434}
1435core_initcall(cpu_hotplug_pm_sync_init);
1436
1437#endif /* CONFIG_PM_SLEEP_SMP */
1438
1439int __boot_cpu_id;
1440
1441#endif /* CONFIG_SMP */
1442
1443/* Boot processor state steps */
1444static struct cpuhp_step cpuhp_hp_states[] = {
1445 [CPUHP_OFFLINE] = {
1446 .name = "offline",
1447 .startup.single = NULL,
1448 .teardown.single = NULL,
1449 },
1450#ifdef CONFIG_SMP
1451 [CPUHP_CREATE_THREADS]= {
1452 .name = "threads:prepare",
1453 .startup.single = smpboot_create_threads,
1454 .teardown.single = NULL,
1455 .cant_stop = true,
1456 },
1457 [CPUHP_PERF_PREPARE] = {
1458 .name = "perf:prepare",
1459 .startup.single = perf_event_init_cpu,
1460 .teardown.single = perf_event_exit_cpu,
1461 },
1462 [CPUHP_WORKQUEUE_PREP] = {
1463 .name = "workqueue:prepare",
1464 .startup.single = workqueue_prepare_cpu,
1465 .teardown.single = NULL,
1466 },
1467 [CPUHP_HRTIMERS_PREPARE] = {
1468 .name = "hrtimers:prepare",
1469 .startup.single = hrtimers_prepare_cpu,
1470 .teardown.single = hrtimers_dead_cpu,
1471 },
1472 [CPUHP_SMPCFD_PREPARE] = {
1473 .name = "smpcfd:prepare",
1474 .startup.single = smpcfd_prepare_cpu,
1475 .teardown.single = smpcfd_dead_cpu,
1476 },
1477 [CPUHP_RELAY_PREPARE] = {
1478 .name = "relay:prepare",
1479 .startup.single = relay_prepare_cpu,
1480 .teardown.single = NULL,
1481 },
1482 [CPUHP_SLAB_PREPARE] = {
1483 .name = "slab:prepare",
1484 .startup.single = slab_prepare_cpu,
1485 .teardown.single = slab_dead_cpu,
1486 },
1487 [CPUHP_RCUTREE_PREP] = {
1488 .name = "RCU/tree:prepare",
1489 .startup.single = rcutree_prepare_cpu,
1490 .teardown.single = rcutree_dead_cpu,
1491 },
1492 /*
1493 * On the tear-down path, timers_dead_cpu() must be invoked
1494 * before blk_mq_queue_reinit_notify() from notify_dead(),
1495 * otherwise a RCU stall occurs.
1496 */
1497 [CPUHP_TIMERS_PREPARE] = {
1498 .name = "timers:prepare",
1499 .startup.single = timers_prepare_cpu,
1500 .teardown.single = timers_dead_cpu,
1501 },
1502 /* Kicks the plugged cpu into life */
1503 [CPUHP_BRINGUP_CPU] = {
1504 .name = "cpu:bringup",
1505 .startup.single = bringup_cpu,
Olivier Deprez0e641232021-09-23 10:07:05 +02001506 .teardown.single = finish_cpu,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001507 .cant_stop = true,
1508 },
1509 /* Final state before CPU kills itself */
1510 [CPUHP_AP_IDLE_DEAD] = {
1511 .name = "idle:dead",
1512 },
1513 /*
1514 * Last state before CPU enters the idle loop to die. Transient state
1515 * for synchronization.
1516 */
1517 [CPUHP_AP_OFFLINE] = {
1518 .name = "ap:offline",
1519 .cant_stop = true,
1520 },
1521 /* First state is scheduler control. Interrupts are disabled */
1522 [CPUHP_AP_SCHED_STARTING] = {
1523 .name = "sched:starting",
1524 .startup.single = sched_cpu_starting,
1525 .teardown.single = sched_cpu_dying,
1526 },
1527 [CPUHP_AP_RCUTREE_DYING] = {
1528 .name = "RCU/tree:dying",
1529 .startup.single = NULL,
1530 .teardown.single = rcutree_dying_cpu,
1531 },
1532 [CPUHP_AP_SMPCFD_DYING] = {
1533 .name = "smpcfd:dying",
1534 .startup.single = NULL,
1535 .teardown.single = smpcfd_dying_cpu,
1536 },
1537 /* Entry state on starting. Interrupts enabled from here on. Transient
1538 * state for synchronsization */
1539 [CPUHP_AP_ONLINE] = {
1540 .name = "ap:online",
1541 },
1542 /*
1543 * Handled on controll processor until the plugged processor manages
1544 * this itself.
1545 */
1546 [CPUHP_TEARDOWN_CPU] = {
1547 .name = "cpu:teardown",
1548 .startup.single = NULL,
1549 .teardown.single = takedown_cpu,
1550 .cant_stop = true,
1551 },
1552 /* Handle smpboot threads park/unpark */
1553 [CPUHP_AP_SMPBOOT_THREADS] = {
1554 .name = "smpboot/threads:online",
1555 .startup.single = smpboot_unpark_threads,
1556 .teardown.single = smpboot_park_threads,
1557 },
1558 [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1559 .name = "irq/affinity:online",
1560 .startup.single = irq_affinity_online_cpu,
1561 .teardown.single = NULL,
1562 },
1563 [CPUHP_AP_PERF_ONLINE] = {
1564 .name = "perf:online",
1565 .startup.single = perf_event_init_cpu,
1566 .teardown.single = perf_event_exit_cpu,
1567 },
1568 [CPUHP_AP_WATCHDOG_ONLINE] = {
1569 .name = "lockup_detector:online",
1570 .startup.single = lockup_detector_online_cpu,
1571 .teardown.single = lockup_detector_offline_cpu,
1572 },
1573 [CPUHP_AP_WORKQUEUE_ONLINE] = {
1574 .name = "workqueue:online",
1575 .startup.single = workqueue_online_cpu,
1576 .teardown.single = workqueue_offline_cpu,
1577 },
1578 [CPUHP_AP_RCUTREE_ONLINE] = {
1579 .name = "RCU/tree:online",
1580 .startup.single = rcutree_online_cpu,
1581 .teardown.single = rcutree_offline_cpu,
1582 },
1583#endif
1584 /*
1585 * The dynamically registered state space is here
1586 */
1587
1588#ifdef CONFIG_SMP
1589 /* Last state is scheduler control setting the cpu active */
1590 [CPUHP_AP_ACTIVE] = {
1591 .name = "sched:active",
1592 .startup.single = sched_cpu_activate,
1593 .teardown.single = sched_cpu_deactivate,
1594 },
1595#endif
1596
1597 /* CPU is fully up and running. */
1598 [CPUHP_ONLINE] = {
1599 .name = "online",
1600 .startup.single = NULL,
1601 .teardown.single = NULL,
1602 },
1603};
1604
1605/* Sanity check for callbacks */
1606static int cpuhp_cb_check(enum cpuhp_state state)
1607{
1608 if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1609 return -EINVAL;
1610 return 0;
1611}
1612
1613/*
1614 * Returns a free for dynamic slot assignment of the Online state. The states
1615 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1616 * by having no name assigned.
1617 */
1618static int cpuhp_reserve_state(enum cpuhp_state state)
1619{
1620 enum cpuhp_state i, end;
1621 struct cpuhp_step *step;
1622
1623 switch (state) {
1624 case CPUHP_AP_ONLINE_DYN:
1625 step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1626 end = CPUHP_AP_ONLINE_DYN_END;
1627 break;
1628 case CPUHP_BP_PREPARE_DYN:
1629 step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1630 end = CPUHP_BP_PREPARE_DYN_END;
1631 break;
1632 default:
1633 return -EINVAL;
1634 }
1635
1636 for (i = state; i <= end; i++, step++) {
1637 if (!step->name)
1638 return i;
1639 }
1640 WARN(1, "No more dynamic states available for CPU hotplug\n");
1641 return -ENOSPC;
1642}
1643
1644static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1645 int (*startup)(unsigned int cpu),
1646 int (*teardown)(unsigned int cpu),
1647 bool multi_instance)
1648{
1649 /* (Un)Install the callbacks for further cpu hotplug operations */
1650 struct cpuhp_step *sp;
1651 int ret = 0;
1652
1653 /*
1654 * If name is NULL, then the state gets removed.
1655 *
1656 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1657 * the first allocation from these dynamic ranges, so the removal
1658 * would trigger a new allocation and clear the wrong (already
1659 * empty) state, leaving the callbacks of the to be cleared state
1660 * dangling, which causes wreckage on the next hotplug operation.
1661 */
1662 if (name && (state == CPUHP_AP_ONLINE_DYN ||
1663 state == CPUHP_BP_PREPARE_DYN)) {
1664 ret = cpuhp_reserve_state(state);
1665 if (ret < 0)
1666 return ret;
1667 state = ret;
1668 }
1669 sp = cpuhp_get_step(state);
1670 if (name && sp->name)
1671 return -EBUSY;
1672
1673 sp->startup.single = startup;
1674 sp->teardown.single = teardown;
1675 sp->name = name;
1676 sp->multi_instance = multi_instance;
1677 INIT_HLIST_HEAD(&sp->list);
1678 return ret;
1679}
1680
1681static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1682{
1683 return cpuhp_get_step(state)->teardown.single;
1684}
1685
1686/*
1687 * Call the startup/teardown function for a step either on the AP or
1688 * on the current CPU.
1689 */
1690static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1691 struct hlist_node *node)
1692{
1693 struct cpuhp_step *sp = cpuhp_get_step(state);
1694 int ret;
1695
1696 /*
1697 * If there's nothing to do, we done.
1698 * Relies on the union for multi_instance.
1699 */
1700 if ((bringup && !sp->startup.single) ||
1701 (!bringup && !sp->teardown.single))
1702 return 0;
1703 /*
1704 * The non AP bound callbacks can fail on bringup. On teardown
1705 * e.g. module removal we crash for now.
1706 */
1707#ifdef CONFIG_SMP
1708 if (cpuhp_is_ap_state(state))
1709 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1710 else
1711 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1712#else
1713 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1714#endif
1715 BUG_ON(ret && !bringup);
1716 return ret;
1717}
1718
1719/*
1720 * Called from __cpuhp_setup_state on a recoverable failure.
1721 *
1722 * Note: The teardown callbacks for rollback are not allowed to fail!
1723 */
1724static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1725 struct hlist_node *node)
1726{
1727 int cpu;
1728
1729 /* Roll back the already executed steps on the other cpus */
1730 for_each_present_cpu(cpu) {
1731 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1732 int cpustate = st->state;
1733
1734 if (cpu >= failedcpu)
1735 break;
1736
1737 /* Did we invoke the startup call on that cpu ? */
1738 if (cpustate >= state)
1739 cpuhp_issue_call(cpu, state, false, node);
1740 }
1741}
1742
1743int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
1744 struct hlist_node *node,
1745 bool invoke)
1746{
1747 struct cpuhp_step *sp;
1748 int cpu;
1749 int ret;
1750
1751 lockdep_assert_cpus_held();
1752
1753 sp = cpuhp_get_step(state);
1754 if (sp->multi_instance == false)
1755 return -EINVAL;
1756
1757 mutex_lock(&cpuhp_state_mutex);
1758
1759 if (!invoke || !sp->startup.multi)
1760 goto add_node;
1761
1762 /*
1763 * Try to call the startup callback for each present cpu
1764 * depending on the hotplug state of the cpu.
1765 */
1766 for_each_present_cpu(cpu) {
1767 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1768 int cpustate = st->state;
1769
1770 if (cpustate < state)
1771 continue;
1772
1773 ret = cpuhp_issue_call(cpu, state, true, node);
1774 if (ret) {
1775 if (sp->teardown.multi)
1776 cpuhp_rollback_install(cpu, state, node);
1777 goto unlock;
1778 }
1779 }
1780add_node:
1781 ret = 0;
1782 hlist_add_head(node, &sp->list);
1783unlock:
1784 mutex_unlock(&cpuhp_state_mutex);
1785 return ret;
1786}
1787
1788int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1789 bool invoke)
1790{
1791 int ret;
1792
1793 cpus_read_lock();
1794 ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1795 cpus_read_unlock();
1796 return ret;
1797}
1798EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1799
1800/**
1801 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1802 * @state: The state to setup
1803 * @invoke: If true, the startup function is invoked for cpus where
1804 * cpu state >= @state
1805 * @startup: startup callback function
1806 * @teardown: teardown callback function
1807 * @multi_instance: State is set up for multiple instances which get
1808 * added afterwards.
1809 *
1810 * The caller needs to hold cpus read locked while calling this function.
1811 * Returns:
1812 * On success:
1813 * Positive state number if @state is CPUHP_AP_ONLINE_DYN
1814 * 0 for all other states
1815 * On failure: proper (negative) error code
1816 */
1817int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
1818 const char *name, bool invoke,
1819 int (*startup)(unsigned int cpu),
1820 int (*teardown)(unsigned int cpu),
1821 bool multi_instance)
1822{
1823 int cpu, ret = 0;
1824 bool dynstate;
1825
1826 lockdep_assert_cpus_held();
1827
1828 if (cpuhp_cb_check(state) || !name)
1829 return -EINVAL;
1830
1831 mutex_lock(&cpuhp_state_mutex);
1832
1833 ret = cpuhp_store_callbacks(state, name, startup, teardown,
1834 multi_instance);
1835
1836 dynstate = state == CPUHP_AP_ONLINE_DYN;
1837 if (ret > 0 && dynstate) {
1838 state = ret;
1839 ret = 0;
1840 }
1841
1842 if (ret || !invoke || !startup)
1843 goto out;
1844
1845 /*
1846 * Try to call the startup callback for each present cpu
1847 * depending on the hotplug state of the cpu.
1848 */
1849 for_each_present_cpu(cpu) {
1850 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1851 int cpustate = st->state;
1852
1853 if (cpustate < state)
1854 continue;
1855
1856 ret = cpuhp_issue_call(cpu, state, true, NULL);
1857 if (ret) {
1858 if (teardown)
1859 cpuhp_rollback_install(cpu, state, NULL);
1860 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1861 goto out;
1862 }
1863 }
1864out:
1865 mutex_unlock(&cpuhp_state_mutex);
1866 /*
1867 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1868 * dynamically allocated state in case of success.
1869 */
1870 if (!ret && dynstate)
1871 return state;
1872 return ret;
1873}
1874EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
1875
1876int __cpuhp_setup_state(enum cpuhp_state state,
1877 const char *name, bool invoke,
1878 int (*startup)(unsigned int cpu),
1879 int (*teardown)(unsigned int cpu),
1880 bool multi_instance)
1881{
1882 int ret;
1883
1884 cpus_read_lock();
1885 ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
1886 teardown, multi_instance);
1887 cpus_read_unlock();
1888 return ret;
1889}
1890EXPORT_SYMBOL(__cpuhp_setup_state);
1891
1892int __cpuhp_state_remove_instance(enum cpuhp_state state,
1893 struct hlist_node *node, bool invoke)
1894{
1895 struct cpuhp_step *sp = cpuhp_get_step(state);
1896 int cpu;
1897
1898 BUG_ON(cpuhp_cb_check(state));
1899
1900 if (!sp->multi_instance)
1901 return -EINVAL;
1902
1903 cpus_read_lock();
1904 mutex_lock(&cpuhp_state_mutex);
1905
1906 if (!invoke || !cpuhp_get_teardown_cb(state))
1907 goto remove;
1908 /*
1909 * Call the teardown callback for each present cpu depending
1910 * on the hotplug state of the cpu. This function is not
1911 * allowed to fail currently!
1912 */
1913 for_each_present_cpu(cpu) {
1914 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1915 int cpustate = st->state;
1916
1917 if (cpustate >= state)
1918 cpuhp_issue_call(cpu, state, false, node);
1919 }
1920
1921remove:
1922 hlist_del(node);
1923 mutex_unlock(&cpuhp_state_mutex);
1924 cpus_read_unlock();
1925
1926 return 0;
1927}
1928EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1929
1930/**
1931 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1932 * @state: The state to remove
1933 * @invoke: If true, the teardown function is invoked for cpus where
1934 * cpu state >= @state
1935 *
1936 * The caller needs to hold cpus read locked while calling this function.
1937 * The teardown callback is currently not allowed to fail. Think
1938 * about module removal!
1939 */
1940void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1941{
1942 struct cpuhp_step *sp = cpuhp_get_step(state);
1943 int cpu;
1944
1945 BUG_ON(cpuhp_cb_check(state));
1946
1947 lockdep_assert_cpus_held();
1948
1949 mutex_lock(&cpuhp_state_mutex);
1950 if (sp->multi_instance) {
1951 WARN(!hlist_empty(&sp->list),
1952 "Error: Removing state %d which has instances left.\n",
1953 state);
1954 goto remove;
1955 }
1956
1957 if (!invoke || !cpuhp_get_teardown_cb(state))
1958 goto remove;
1959
1960 /*
1961 * Call the teardown callback for each present cpu depending
1962 * on the hotplug state of the cpu. This function is not
1963 * allowed to fail currently!
1964 */
1965 for_each_present_cpu(cpu) {
1966 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1967 int cpustate = st->state;
1968
1969 if (cpustate >= state)
1970 cpuhp_issue_call(cpu, state, false, NULL);
1971 }
1972remove:
1973 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1974 mutex_unlock(&cpuhp_state_mutex);
1975}
1976EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
1977
1978void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
1979{
1980 cpus_read_lock();
1981 __cpuhp_remove_state_cpuslocked(state, invoke);
1982 cpus_read_unlock();
1983}
1984EXPORT_SYMBOL(__cpuhp_remove_state);
1985
Olivier Deprez0e641232021-09-23 10:07:05 +02001986#ifdef CONFIG_HOTPLUG_SMT
1987static void cpuhp_offline_cpu_device(unsigned int cpu)
1988{
1989 struct device *dev = get_cpu_device(cpu);
1990
1991 dev->offline = true;
1992 /* Tell user space about the state change */
1993 kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
1994}
1995
1996static void cpuhp_online_cpu_device(unsigned int cpu)
1997{
1998 struct device *dev = get_cpu_device(cpu);
1999
2000 dev->offline = false;
2001 /* Tell user space about the state change */
2002 kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2003}
2004
2005int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2006{
2007 int cpu, ret = 0;
2008
2009 cpu_maps_update_begin();
2010 for_each_online_cpu(cpu) {
2011 if (topology_is_primary_thread(cpu))
2012 continue;
2013 ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2014 if (ret)
2015 break;
2016 /*
2017 * As this needs to hold the cpu maps lock it's impossible
2018 * to call device_offline() because that ends up calling
2019 * cpu_down() which takes cpu maps lock. cpu maps lock
2020 * needs to be held as this might race against in kernel
2021 * abusers of the hotplug machinery (thermal management).
2022 *
2023 * So nothing would update device:offline state. That would
2024 * leave the sysfs entry stale and prevent onlining after
2025 * smt control has been changed to 'off' again. This is
2026 * called under the sysfs hotplug lock, so it is properly
2027 * serialized against the regular offline usage.
2028 */
2029 cpuhp_offline_cpu_device(cpu);
2030 }
2031 if (!ret)
2032 cpu_smt_control = ctrlval;
2033 cpu_maps_update_done();
2034 return ret;
2035}
2036
2037int cpuhp_smt_enable(void)
2038{
2039 int cpu, ret = 0;
2040
2041 cpu_maps_update_begin();
2042 cpu_smt_control = CPU_SMT_ENABLED;
2043 for_each_present_cpu(cpu) {
2044 /* Skip online CPUs and CPUs on offline nodes */
2045 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2046 continue;
2047 ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2048 if (ret)
2049 break;
2050 /* See comment in cpuhp_smt_disable() */
2051 cpuhp_online_cpu_device(cpu);
2052 }
2053 cpu_maps_update_done();
2054 return ret;
2055}
2056#endif
2057
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002058#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
2059static ssize_t show_cpuhp_state(struct device *dev,
2060 struct device_attribute *attr, char *buf)
2061{
2062 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2063
2064 return sprintf(buf, "%d\n", st->state);
2065}
2066static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
2067
2068static ssize_t write_cpuhp_target(struct device *dev,
2069 struct device_attribute *attr,
2070 const char *buf, size_t count)
2071{
2072 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2073 struct cpuhp_step *sp;
2074 int target, ret;
2075
2076 ret = kstrtoint(buf, 10, &target);
2077 if (ret)
2078 return ret;
2079
2080#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2081 if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
2082 return -EINVAL;
2083#else
2084 if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2085 return -EINVAL;
2086#endif
2087
2088 ret = lock_device_hotplug_sysfs();
2089 if (ret)
2090 return ret;
2091
2092 mutex_lock(&cpuhp_state_mutex);
2093 sp = cpuhp_get_step(target);
2094 ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2095 mutex_unlock(&cpuhp_state_mutex);
2096 if (ret)
2097 goto out;
2098
2099 if (st->state < target)
2100 ret = do_cpu_up(dev->id, target);
2101 else
2102 ret = do_cpu_down(dev->id, target);
2103out:
2104 unlock_device_hotplug();
2105 return ret ? ret : count;
2106}
2107
2108static ssize_t show_cpuhp_target(struct device *dev,
2109 struct device_attribute *attr, char *buf)
2110{
2111 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2112
2113 return sprintf(buf, "%d\n", st->target);
2114}
2115static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
2116
2117
2118static ssize_t write_cpuhp_fail(struct device *dev,
2119 struct device_attribute *attr,
2120 const char *buf, size_t count)
2121{
2122 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2123 struct cpuhp_step *sp;
2124 int fail, ret;
2125
2126 ret = kstrtoint(buf, 10, &fail);
2127 if (ret)
2128 return ret;
2129
David Brazdil0f672f62019-12-10 10:32:29 +00002130 if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
2131 return -EINVAL;
2132
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002133 /*
2134 * Cannot fail STARTING/DYING callbacks.
2135 */
2136 if (cpuhp_is_atomic_state(fail))
2137 return -EINVAL;
2138
2139 /*
2140 * Cannot fail anything that doesn't have callbacks.
2141 */
2142 mutex_lock(&cpuhp_state_mutex);
2143 sp = cpuhp_get_step(fail);
2144 if (!sp->startup.single && !sp->teardown.single)
2145 ret = -EINVAL;
2146 mutex_unlock(&cpuhp_state_mutex);
2147 if (ret)
2148 return ret;
2149
2150 st->fail = fail;
2151
2152 return count;
2153}
2154
2155static ssize_t show_cpuhp_fail(struct device *dev,
2156 struct device_attribute *attr, char *buf)
2157{
2158 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2159
2160 return sprintf(buf, "%d\n", st->fail);
2161}
2162
2163static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
2164
2165static struct attribute *cpuhp_cpu_attrs[] = {
2166 &dev_attr_state.attr,
2167 &dev_attr_target.attr,
2168 &dev_attr_fail.attr,
2169 NULL
2170};
2171
2172static const struct attribute_group cpuhp_cpu_attr_group = {
2173 .attrs = cpuhp_cpu_attrs,
2174 .name = "hotplug",
2175 NULL
2176};
2177
2178static ssize_t show_cpuhp_states(struct device *dev,
2179 struct device_attribute *attr, char *buf)
2180{
2181 ssize_t cur, res = 0;
2182 int i;
2183
2184 mutex_lock(&cpuhp_state_mutex);
2185 for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2186 struct cpuhp_step *sp = cpuhp_get_step(i);
2187
2188 if (sp->name) {
2189 cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2190 buf += cur;
2191 res += cur;
2192 }
2193 }
2194 mutex_unlock(&cpuhp_state_mutex);
2195 return res;
2196}
2197static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
2198
2199static struct attribute *cpuhp_cpu_root_attrs[] = {
2200 &dev_attr_states.attr,
2201 NULL
2202};
2203
2204static const struct attribute_group cpuhp_cpu_root_attr_group = {
2205 .attrs = cpuhp_cpu_root_attrs,
2206 .name = "hotplug",
2207 NULL
2208};
2209
2210#ifdef CONFIG_HOTPLUG_SMT
2211
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002212static ssize_t
David Brazdil0f672f62019-12-10 10:32:29 +00002213__store_smt_control(struct device *dev, struct device_attribute *attr,
2214 const char *buf, size_t count)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002215{
2216 int ctrlval, ret;
2217
2218 if (sysfs_streq(buf, "on"))
2219 ctrlval = CPU_SMT_ENABLED;
2220 else if (sysfs_streq(buf, "off"))
2221 ctrlval = CPU_SMT_DISABLED;
2222 else if (sysfs_streq(buf, "forceoff"))
2223 ctrlval = CPU_SMT_FORCE_DISABLED;
2224 else
2225 return -EINVAL;
2226
2227 if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2228 return -EPERM;
2229
2230 if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2231 return -ENODEV;
2232
2233 ret = lock_device_hotplug_sysfs();
2234 if (ret)
2235 return ret;
2236
2237 if (ctrlval != cpu_smt_control) {
2238 switch (ctrlval) {
2239 case CPU_SMT_ENABLED:
2240 ret = cpuhp_smt_enable();
2241 break;
2242 case CPU_SMT_DISABLED:
2243 case CPU_SMT_FORCE_DISABLED:
2244 ret = cpuhp_smt_disable(ctrlval);
2245 break;
2246 }
2247 }
2248
2249 unlock_device_hotplug();
2250 return ret ? ret : count;
2251}
David Brazdil0f672f62019-12-10 10:32:29 +00002252
2253#else /* !CONFIG_HOTPLUG_SMT */
2254static ssize_t
2255__store_smt_control(struct device *dev, struct device_attribute *attr,
2256 const char *buf, size_t count)
2257{
2258 return -ENODEV;
2259}
2260#endif /* CONFIG_HOTPLUG_SMT */
2261
2262static const char *smt_states[] = {
2263 [CPU_SMT_ENABLED] = "on",
2264 [CPU_SMT_DISABLED] = "off",
2265 [CPU_SMT_FORCE_DISABLED] = "forceoff",
2266 [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2267 [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2268};
2269
2270static ssize_t
2271show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
2272{
2273 const char *state = smt_states[cpu_smt_control];
2274
2275 return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
2276}
2277
2278static ssize_t
2279store_smt_control(struct device *dev, struct device_attribute *attr,
2280 const char *buf, size_t count)
2281{
2282 return __store_smt_control(dev, attr, buf, count);
2283}
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002284static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2285
2286static ssize_t
2287show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
2288{
David Brazdil0f672f62019-12-10 10:32:29 +00002289 return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002290}
2291static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2292
2293static struct attribute *cpuhp_smt_attrs[] = {
2294 &dev_attr_control.attr,
2295 &dev_attr_active.attr,
2296 NULL
2297};
2298
2299static const struct attribute_group cpuhp_smt_attr_group = {
2300 .attrs = cpuhp_smt_attrs,
2301 .name = "smt",
2302 NULL
2303};
2304
David Brazdil0f672f62019-12-10 10:32:29 +00002305static int __init cpu_smt_sysfs_init(void)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002306{
2307 return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2308 &cpuhp_smt_attr_group);
2309}
2310
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002311static int __init cpuhp_sysfs_init(void)
2312{
2313 int cpu, ret;
2314
David Brazdil0f672f62019-12-10 10:32:29 +00002315 ret = cpu_smt_sysfs_init();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002316 if (ret)
2317 return ret;
2318
2319 ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
2320 &cpuhp_cpu_root_attr_group);
2321 if (ret)
2322 return ret;
2323
2324 for_each_possible_cpu(cpu) {
2325 struct device *dev = get_cpu_device(cpu);
2326
2327 if (!dev)
2328 continue;
2329 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2330 if (ret)
2331 return ret;
2332 }
2333 return 0;
2334}
2335device_initcall(cpuhp_sysfs_init);
David Brazdil0f672f62019-12-10 10:32:29 +00002336#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002337
2338/*
2339 * cpu_bit_bitmap[] is a special, "compressed" data structure that
2340 * represents all NR_CPUS bits binary values of 1<<nr.
2341 *
2342 * It is used by cpumask_of() to get a constant address to a CPU
2343 * mask value that has a single bit set only.
2344 */
2345
2346/* cpu_bit_bitmap[0] is empty - so we can back into it */
2347#define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
2348#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2349#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2350#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2351
2352const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2353
2354 MASK_DECLARE_8(0), MASK_DECLARE_8(8),
2355 MASK_DECLARE_8(16), MASK_DECLARE_8(24),
2356#if BITS_PER_LONG > 32
2357 MASK_DECLARE_8(32), MASK_DECLARE_8(40),
2358 MASK_DECLARE_8(48), MASK_DECLARE_8(56),
2359#endif
2360};
2361EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2362
2363const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2364EXPORT_SYMBOL(cpu_all_bits);
2365
2366#ifdef CONFIG_INIT_ALL_POSSIBLE
2367struct cpumask __cpu_possible_mask __read_mostly
2368 = {CPU_BITS_ALL};
2369#else
2370struct cpumask __cpu_possible_mask __read_mostly;
2371#endif
2372EXPORT_SYMBOL(__cpu_possible_mask);
2373
2374struct cpumask __cpu_online_mask __read_mostly;
2375EXPORT_SYMBOL(__cpu_online_mask);
2376
2377struct cpumask __cpu_present_mask __read_mostly;
2378EXPORT_SYMBOL(__cpu_present_mask);
2379
2380struct cpumask __cpu_active_mask __read_mostly;
2381EXPORT_SYMBOL(__cpu_active_mask);
2382
David Brazdil0f672f62019-12-10 10:32:29 +00002383atomic_t __num_online_cpus __read_mostly;
2384EXPORT_SYMBOL(__num_online_cpus);
2385
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002386void init_cpu_present(const struct cpumask *src)
2387{
2388 cpumask_copy(&__cpu_present_mask, src);
2389}
2390
2391void init_cpu_possible(const struct cpumask *src)
2392{
2393 cpumask_copy(&__cpu_possible_mask, src);
2394}
2395
2396void init_cpu_online(const struct cpumask *src)
2397{
2398 cpumask_copy(&__cpu_online_mask, src);
2399}
2400
David Brazdil0f672f62019-12-10 10:32:29 +00002401void set_cpu_online(unsigned int cpu, bool online)
2402{
2403 /*
2404 * atomic_inc/dec() is required to handle the horrid abuse of this
2405 * function by the reboot and kexec code which invoke it from
2406 * IPI/NMI broadcasts when shutting down CPUs. Invocation from
2407 * regular CPU hotplug is properly serialized.
2408 *
2409 * Note, that the fact that __num_online_cpus is of type atomic_t
2410 * does not protect readers which are not serialized against
2411 * concurrent hotplug operations.
2412 */
2413 if (online) {
2414 if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
2415 atomic_inc(&__num_online_cpus);
2416 } else {
2417 if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
2418 atomic_dec(&__num_online_cpus);
2419 }
2420}
2421
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002422/*
2423 * Activate the first processor.
2424 */
2425void __init boot_cpu_init(void)
2426{
2427 int cpu = smp_processor_id();
2428
2429 /* Mark the boot cpu "present", "online" etc for SMP and UP case */
2430 set_cpu_online(cpu, true);
2431 set_cpu_active(cpu, true);
2432 set_cpu_present(cpu, true);
2433 set_cpu_possible(cpu, true);
2434
2435#ifdef CONFIG_SMP
2436 __boot_cpu_id = cpu;
2437#endif
2438}
2439
2440/*
2441 * Must be called _AFTER_ setting up the per_cpu areas
2442 */
2443void __init boot_cpu_hotplug_init(void)
2444{
2445#ifdef CONFIG_SMP
David Brazdil0f672f62019-12-10 10:32:29 +00002446 cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002447#endif
2448 this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2449}
David Brazdil0f672f62019-12-10 10:32:29 +00002450
2451/*
2452 * These are used for a global "mitigations=" cmdline option for toggling
2453 * optional CPU mitigations.
2454 */
2455enum cpu_mitigations {
2456 CPU_MITIGATIONS_OFF,
2457 CPU_MITIGATIONS_AUTO,
2458 CPU_MITIGATIONS_AUTO_NOSMT,
2459};
2460
2461static enum cpu_mitigations cpu_mitigations __ro_after_init =
2462 CPU_MITIGATIONS_AUTO;
2463
2464static int __init mitigations_parse_cmdline(char *arg)
2465{
2466 if (!strcmp(arg, "off"))
2467 cpu_mitigations = CPU_MITIGATIONS_OFF;
2468 else if (!strcmp(arg, "auto"))
2469 cpu_mitigations = CPU_MITIGATIONS_AUTO;
2470 else if (!strcmp(arg, "auto,nosmt"))
2471 cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2472 else
2473 pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
2474 arg);
2475
2476 return 0;
2477}
2478early_param("mitigations", mitigations_parse_cmdline);
2479
2480/* mitigations=off */
2481bool cpu_mitigations_off(void)
2482{
2483 return cpu_mitigations == CPU_MITIGATIONS_OFF;
2484}
2485EXPORT_SYMBOL_GPL(cpu_mitigations_off);
2486
2487/* mitigations=auto,nosmt */
2488bool cpu_mitigations_auto_nosmt(void)
2489{
2490 return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
2491}
2492EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);