Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | /* |
| 2 | * arch/arm64/kernel/topology.c |
| 3 | * |
| 4 | * Copyright (C) 2011,2013,2014 Linaro Limited. |
| 5 | * |
| 6 | * Based on the arm32 version written by Vincent Guittot in turn based on |
| 7 | * arch/sh/kernel/topology.c |
| 8 | * |
| 9 | * This file is subject to the terms and conditions of the GNU General Public |
| 10 | * License. See the file "COPYING" in the main directory of this archive |
| 11 | * for more details. |
| 12 | */ |
| 13 | |
| 14 | #include <linux/acpi.h> |
| 15 | #include <linux/arch_topology.h> |
| 16 | #include <linux/cacheinfo.h> |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 17 | #include <linux/cpufreq.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 18 | #include <linux/init.h> |
| 19 | #include <linux/percpu.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 20 | |
| 21 | #include <asm/cpu.h> |
| 22 | #include <asm/cputype.h> |
| 23 | #include <asm/topology.h> |
| 24 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 25 | void store_cpu_topology(unsigned int cpuid) |
| 26 | { |
| 27 | struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; |
| 28 | u64 mpidr; |
| 29 | |
| 30 | if (cpuid_topo->package_id != -1) |
| 31 | goto topology_populated; |
| 32 | |
| 33 | mpidr = read_cpuid_mpidr(); |
| 34 | |
| 35 | /* Uniprocessor systems can rely on default topology values */ |
| 36 | if (mpidr & MPIDR_UP_BITMASK) |
| 37 | return; |
| 38 | |
Olivier Deprez | 0e64123 | 2021-09-23 10:07:05 +0200 | [diff] [blame] | 39 | /* |
| 40 | * This would be the place to create cpu topology based on MPIDR. |
| 41 | * |
| 42 | * However, it cannot be trusted to depict the actual topology; some |
| 43 | * pieces of the architecture enforce an artificial cap on Aff0 values |
| 44 | * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an |
| 45 | * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up |
| 46 | * having absolutely no relationship to the actual underlying system |
| 47 | * topology, and cannot be reasonably used as core / package ID. |
| 48 | * |
| 49 | * If the MT bit is set, Aff0 *could* be used to define a thread ID, but |
| 50 | * we still wouldn't be able to obtain a sane core ID. This means we |
| 51 | * need to entirely ignore MPIDR for any topology deduction. |
| 52 | */ |
| 53 | cpuid_topo->thread_id = -1; |
| 54 | cpuid_topo->core_id = cpuid; |
| 55 | cpuid_topo->package_id = cpu_to_node(cpuid); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 56 | |
| 57 | pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", |
| 58 | cpuid, cpuid_topo->package_id, cpuid_topo->core_id, |
| 59 | cpuid_topo->thread_id, mpidr); |
| 60 | |
| 61 | topology_populated: |
| 62 | update_siblings_masks(cpuid); |
| 63 | } |
| 64 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 65 | #ifdef CONFIG_ACPI |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 66 | static bool __init acpi_cpu_is_threaded(int cpu) |
| 67 | { |
| 68 | int is_threaded = acpi_pptt_cpu_is_thread(cpu); |
| 69 | |
| 70 | /* |
| 71 | * if the PPTT doesn't have thread information, assume a homogeneous |
| 72 | * machine and return the current CPU's thread state. |
| 73 | */ |
| 74 | if (is_threaded < 0) |
| 75 | is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; |
| 76 | |
| 77 | return !!is_threaded; |
| 78 | } |
| 79 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 80 | /* |
| 81 | * Propagate the topology information of the processor_topology_node tree to the |
| 82 | * cpu_topology array. |
| 83 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 84 | int __init parse_acpi_topology(void) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 85 | { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 86 | int cpu, topology_id; |
| 87 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 88 | if (acpi_disabled) |
| 89 | return 0; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 90 | |
| 91 | for_each_possible_cpu(cpu) { |
| 92 | int i, cache_id; |
| 93 | |
| 94 | topology_id = find_acpi_cpu_topology(cpu, 0); |
| 95 | if (topology_id < 0) |
| 96 | return topology_id; |
| 97 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 98 | if (acpi_cpu_is_threaded(cpu)) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 99 | cpu_topology[cpu].thread_id = topology_id; |
| 100 | topology_id = find_acpi_cpu_topology(cpu, 1); |
| 101 | cpu_topology[cpu].core_id = topology_id; |
| 102 | } else { |
| 103 | cpu_topology[cpu].thread_id = -1; |
| 104 | cpu_topology[cpu].core_id = topology_id; |
| 105 | } |
| 106 | topology_id = find_acpi_cpu_topology_package(cpu); |
| 107 | cpu_topology[cpu].package_id = topology_id; |
| 108 | |
| 109 | i = acpi_find_last_cache_level(cpu); |
| 110 | |
| 111 | if (i > 0) { |
| 112 | /* |
| 113 | * this is the only part of cpu_topology that has |
| 114 | * a direct relationship with the cache topology |
| 115 | */ |
| 116 | cache_id = find_acpi_cpu_cache_topology(cpu, i); |
| 117 | if (cache_id > 0) |
| 118 | cpu_topology[cpu].llc_id = cache_id; |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | return 0; |
| 123 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 124 | #endif |
| 125 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 126 | #ifdef CONFIG_ARM64_AMU_EXTN |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 127 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 128 | #undef pr_fmt |
| 129 | #define pr_fmt(fmt) "AMU: " fmt |
| 130 | |
| 131 | static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale); |
| 132 | static DEFINE_PER_CPU(u64, arch_const_cycles_prev); |
| 133 | static DEFINE_PER_CPU(u64, arch_core_cycles_prev); |
| 134 | static cpumask_var_t amu_fie_cpus; |
| 135 | |
| 136 | /* Initialize counter reference per-cpu variables for the current CPU */ |
| 137 | void init_cpu_freq_invariance_counters(void) |
| 138 | { |
| 139 | this_cpu_write(arch_core_cycles_prev, |
| 140 | read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)); |
| 141 | this_cpu_write(arch_const_cycles_prev, |
| 142 | read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)); |
| 143 | } |
| 144 | |
| 145 | static int validate_cpu_freq_invariance_counters(int cpu) |
| 146 | { |
| 147 | u64 max_freq_hz, ratio; |
| 148 | |
| 149 | if (!cpu_has_amu_feat(cpu)) { |
| 150 | pr_debug("CPU%d: counters are not supported.\n", cpu); |
| 151 | return -EINVAL; |
| 152 | } |
| 153 | |
| 154 | if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || |
| 155 | !per_cpu(arch_core_cycles_prev, cpu))) { |
| 156 | pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); |
| 157 | return -EINVAL; |
| 158 | } |
| 159 | |
| 160 | /* Convert maximum frequency from KHz to Hz and validate */ |
| 161 | max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000; |
| 162 | if (unlikely(!max_freq_hz)) { |
| 163 | pr_debug("CPU%d: invalid maximum frequency.\n", cpu); |
| 164 | return -EINVAL; |
| 165 | } |
| 166 | |
| 167 | /* |
| 168 | * Pre-compute the fixed ratio between the frequency of the constant |
| 169 | * counter and the maximum frequency of the CPU. |
| 170 | * |
| 171 | * const_freq |
| 172 | * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE² |
| 173 | * cpuinfo_max_freq |
| 174 | * |
| 175 | * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE² |
| 176 | * in order to ensure a good resolution for arch_max_freq_scale for |
| 177 | * very low arch timer frequencies (down to the KHz range which should |
| 178 | * be unlikely). |
| 179 | */ |
| 180 | ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT); |
| 181 | ratio = div64_u64(ratio, max_freq_hz); |
| 182 | if (!ratio) { |
| 183 | WARN_ONCE(1, "System timer frequency too low.\n"); |
| 184 | return -EINVAL; |
| 185 | } |
| 186 | |
| 187 | per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio; |
| 188 | |
| 189 | return 0; |
| 190 | } |
| 191 | |
| 192 | static inline bool |
| 193 | enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) |
| 194 | { |
| 195 | struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); |
| 196 | |
| 197 | if (!policy) { |
| 198 | pr_debug("CPU%d: No cpufreq policy found.\n", cpu); |
| 199 | return false; |
| 200 | } |
| 201 | |
| 202 | if (cpumask_subset(policy->related_cpus, valid_cpus)) |
| 203 | cpumask_or(amu_fie_cpus, policy->related_cpus, |
| 204 | amu_fie_cpus); |
| 205 | |
| 206 | cpufreq_cpu_put(policy); |
| 207 | |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | static DEFINE_STATIC_KEY_FALSE(amu_fie_key); |
| 212 | #define amu_freq_invariant() static_branch_unlikely(&amu_fie_key) |
| 213 | |
| 214 | static int __init init_amu_fie(void) |
| 215 | { |
| 216 | cpumask_var_t valid_cpus; |
| 217 | bool have_policy = false; |
| 218 | int ret = 0; |
| 219 | int cpu; |
| 220 | |
| 221 | if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL)) |
| 222 | return -ENOMEM; |
| 223 | |
| 224 | if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { |
| 225 | ret = -ENOMEM; |
| 226 | goto free_valid_mask; |
| 227 | } |
| 228 | |
| 229 | for_each_present_cpu(cpu) { |
| 230 | if (validate_cpu_freq_invariance_counters(cpu)) |
| 231 | continue; |
| 232 | cpumask_set_cpu(cpu, valid_cpus); |
| 233 | have_policy |= enable_policy_freq_counters(cpu, valid_cpus); |
| 234 | } |
| 235 | |
| 236 | /* |
| 237 | * If we are not restricted by cpufreq policies, we only enable |
| 238 | * the use of the AMU feature for FIE if all CPUs support AMU. |
| 239 | * Otherwise, enable_policy_freq_counters has already enabled |
| 240 | * policy cpus. |
| 241 | */ |
| 242 | if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask)) |
| 243 | cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus); |
| 244 | |
| 245 | if (!cpumask_empty(amu_fie_cpus)) { |
| 246 | pr_info("CPUs[%*pbl]: counters will be used for FIE.", |
| 247 | cpumask_pr_args(amu_fie_cpus)); |
| 248 | static_branch_enable(&amu_fie_key); |
| 249 | } |
| 250 | |
| 251 | /* |
| 252 | * If the system is not fully invariant after AMU init, disable |
| 253 | * partial use of counters for frequency invariance. |
| 254 | */ |
| 255 | if (!topology_scale_freq_invariant()) |
| 256 | static_branch_disable(&amu_fie_key); |
| 257 | |
| 258 | free_valid_mask: |
| 259 | free_cpumask_var(valid_cpus); |
| 260 | |
| 261 | return ret; |
| 262 | } |
| 263 | late_initcall_sync(init_amu_fie); |
| 264 | |
| 265 | bool arch_freq_counters_available(const struct cpumask *cpus) |
| 266 | { |
| 267 | return amu_freq_invariant() && |
| 268 | cpumask_subset(cpus, amu_fie_cpus); |
| 269 | } |
| 270 | |
| 271 | void topology_scale_freq_tick(void) |
| 272 | { |
| 273 | u64 prev_core_cnt, prev_const_cnt; |
| 274 | u64 core_cnt, const_cnt, scale; |
| 275 | int cpu = smp_processor_id(); |
| 276 | |
| 277 | if (!amu_freq_invariant()) |
| 278 | return; |
| 279 | |
| 280 | if (!cpumask_test_cpu(cpu, amu_fie_cpus)) |
| 281 | return; |
| 282 | |
| 283 | const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0); |
| 284 | core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0); |
| 285 | prev_const_cnt = this_cpu_read(arch_const_cycles_prev); |
| 286 | prev_core_cnt = this_cpu_read(arch_core_cycles_prev); |
| 287 | |
| 288 | if (unlikely(core_cnt <= prev_core_cnt || |
| 289 | const_cnt <= prev_const_cnt)) |
| 290 | goto store_and_exit; |
| 291 | |
| 292 | /* |
| 293 | * /\core arch_max_freq_scale |
| 294 | * scale = ------- * -------------------- |
| 295 | * /\const SCHED_CAPACITY_SCALE |
| 296 | * |
| 297 | * See validate_cpu_freq_invariance_counters() for details on |
| 298 | * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT. |
| 299 | */ |
| 300 | scale = core_cnt - prev_core_cnt; |
| 301 | scale *= this_cpu_read(arch_max_freq_scale); |
| 302 | scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT, |
| 303 | const_cnt - prev_const_cnt); |
| 304 | |
| 305 | scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); |
| 306 | this_cpu_write(freq_scale, (unsigned long)scale); |
| 307 | |
| 308 | store_and_exit: |
| 309 | this_cpu_write(arch_core_cycles_prev, core_cnt); |
| 310 | this_cpu_write(arch_const_cycles_prev, const_cnt); |
| 311 | } |
| 312 | #endif /* CONFIG_ARM64_AMU_EXTN */ |