Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * CPU accounting code for task groups. |
| 4 | * |
| 5 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh |
| 6 | * (balbir@in.ibm.com). |
| 7 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 8 | #include <asm/irq_regs.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 9 | #include "sched.h" |
| 10 | |
| 11 | /* Time spent by the tasks of the CPU accounting group executing in ... */ |
| 12 | enum cpuacct_stat_index { |
| 13 | CPUACCT_STAT_USER, /* ... user mode */ |
| 14 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ |
| 15 | |
| 16 | CPUACCT_STAT_NSTATS, |
| 17 | }; |
| 18 | |
| 19 | static const char * const cpuacct_stat_desc[] = { |
| 20 | [CPUACCT_STAT_USER] = "user", |
| 21 | [CPUACCT_STAT_SYSTEM] = "system", |
| 22 | }; |
| 23 | |
| 24 | struct cpuacct_usage { |
| 25 | u64 usages[CPUACCT_STAT_NSTATS]; |
| 26 | }; |
| 27 | |
| 28 | /* track CPU usage of a group of tasks and its child groups */ |
| 29 | struct cpuacct { |
| 30 | struct cgroup_subsys_state css; |
| 31 | /* cpuusage holds pointer to a u64-type object on every CPU */ |
| 32 | struct cpuacct_usage __percpu *cpuusage; |
| 33 | struct kernel_cpustat __percpu *cpustat; |
| 34 | }; |
| 35 | |
| 36 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
| 37 | { |
| 38 | return css ? container_of(css, struct cpuacct, css) : NULL; |
| 39 | } |
| 40 | |
| 41 | /* Return CPU accounting group to which this task belongs */ |
| 42 | static inline struct cpuacct *task_ca(struct task_struct *tsk) |
| 43 | { |
| 44 | return css_ca(task_css(tsk, cpuacct_cgrp_id)); |
| 45 | } |
| 46 | |
| 47 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
| 48 | { |
| 49 | return css_ca(ca->css.parent); |
| 50 | } |
| 51 | |
| 52 | static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage); |
| 53 | static struct cpuacct root_cpuacct = { |
| 54 | .cpustat = &kernel_cpustat, |
| 55 | .cpuusage = &root_cpuacct_cpuusage, |
| 56 | }; |
| 57 | |
| 58 | /* Create a new CPU accounting group */ |
| 59 | static struct cgroup_subsys_state * |
| 60 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) |
| 61 | { |
| 62 | struct cpuacct *ca; |
| 63 | |
| 64 | if (!parent_css) |
| 65 | return &root_cpuacct.css; |
| 66 | |
| 67 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
| 68 | if (!ca) |
| 69 | goto out; |
| 70 | |
| 71 | ca->cpuusage = alloc_percpu(struct cpuacct_usage); |
| 72 | if (!ca->cpuusage) |
| 73 | goto out_free_ca; |
| 74 | |
| 75 | ca->cpustat = alloc_percpu(struct kernel_cpustat); |
| 76 | if (!ca->cpustat) |
| 77 | goto out_free_cpuusage; |
| 78 | |
| 79 | return &ca->css; |
| 80 | |
| 81 | out_free_cpuusage: |
| 82 | free_percpu(ca->cpuusage); |
| 83 | out_free_ca: |
| 84 | kfree(ca); |
| 85 | out: |
| 86 | return ERR_PTR(-ENOMEM); |
| 87 | } |
| 88 | |
| 89 | /* Destroy an existing CPU accounting group */ |
| 90 | static void cpuacct_css_free(struct cgroup_subsys_state *css) |
| 91 | { |
| 92 | struct cpuacct *ca = css_ca(css); |
| 93 | |
| 94 | free_percpu(ca->cpustat); |
| 95 | free_percpu(ca->cpuusage); |
| 96 | kfree(ca); |
| 97 | } |
| 98 | |
| 99 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, |
| 100 | enum cpuacct_stat_index index) |
| 101 | { |
| 102 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| 103 | u64 data; |
| 104 | |
| 105 | /* |
| 106 | * We allow index == CPUACCT_STAT_NSTATS here to read |
| 107 | * the sum of suages. |
| 108 | */ |
| 109 | BUG_ON(index > CPUACCT_STAT_NSTATS); |
| 110 | |
| 111 | #ifndef CONFIG_64BIT |
| 112 | /* |
| 113 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. |
| 114 | */ |
| 115 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
| 116 | #endif |
| 117 | |
| 118 | if (index == CPUACCT_STAT_NSTATS) { |
| 119 | int i = 0; |
| 120 | |
| 121 | data = 0; |
| 122 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
| 123 | data += cpuusage->usages[i]; |
| 124 | } else { |
| 125 | data = cpuusage->usages[index]; |
| 126 | } |
| 127 | |
| 128 | #ifndef CONFIG_64BIT |
| 129 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
| 130 | #endif |
| 131 | |
| 132 | return data; |
| 133 | } |
| 134 | |
| 135 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) |
| 136 | { |
| 137 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| 138 | int i; |
| 139 | |
| 140 | #ifndef CONFIG_64BIT |
| 141 | /* |
| 142 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. |
| 143 | */ |
| 144 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
| 145 | #endif |
| 146 | |
| 147 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
| 148 | cpuusage->usages[i] = val; |
| 149 | |
| 150 | #ifndef CONFIG_64BIT |
| 151 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
| 152 | #endif |
| 153 | } |
| 154 | |
| 155 | /* Return total CPU usage (in nanoseconds) of a group */ |
| 156 | static u64 __cpuusage_read(struct cgroup_subsys_state *css, |
| 157 | enum cpuacct_stat_index index) |
| 158 | { |
| 159 | struct cpuacct *ca = css_ca(css); |
| 160 | u64 totalcpuusage = 0; |
| 161 | int i; |
| 162 | |
| 163 | for_each_possible_cpu(i) |
| 164 | totalcpuusage += cpuacct_cpuusage_read(ca, i, index); |
| 165 | |
| 166 | return totalcpuusage; |
| 167 | } |
| 168 | |
| 169 | static u64 cpuusage_user_read(struct cgroup_subsys_state *css, |
| 170 | struct cftype *cft) |
| 171 | { |
| 172 | return __cpuusage_read(css, CPUACCT_STAT_USER); |
| 173 | } |
| 174 | |
| 175 | static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, |
| 176 | struct cftype *cft) |
| 177 | { |
| 178 | return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); |
| 179 | } |
| 180 | |
| 181 | static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) |
| 182 | { |
| 183 | return __cpuusage_read(css, CPUACCT_STAT_NSTATS); |
| 184 | } |
| 185 | |
| 186 | static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
| 187 | u64 val) |
| 188 | { |
| 189 | struct cpuacct *ca = css_ca(css); |
| 190 | int cpu; |
| 191 | |
| 192 | /* |
| 193 | * Only allow '0' here to do a reset. |
| 194 | */ |
| 195 | if (val) |
| 196 | return -EINVAL; |
| 197 | |
| 198 | for_each_possible_cpu(cpu) |
| 199 | cpuacct_cpuusage_write(ca, cpu, 0); |
| 200 | |
| 201 | return 0; |
| 202 | } |
| 203 | |
| 204 | static int __cpuacct_percpu_seq_show(struct seq_file *m, |
| 205 | enum cpuacct_stat_index index) |
| 206 | { |
| 207 | struct cpuacct *ca = css_ca(seq_css(m)); |
| 208 | u64 percpu; |
| 209 | int i; |
| 210 | |
| 211 | for_each_possible_cpu(i) { |
| 212 | percpu = cpuacct_cpuusage_read(ca, i, index); |
| 213 | seq_printf(m, "%llu ", (unsigned long long) percpu); |
| 214 | } |
| 215 | seq_printf(m, "\n"); |
| 216 | return 0; |
| 217 | } |
| 218 | |
| 219 | static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) |
| 220 | { |
| 221 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); |
| 222 | } |
| 223 | |
| 224 | static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) |
| 225 | { |
| 226 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); |
| 227 | } |
| 228 | |
| 229 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) |
| 230 | { |
| 231 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); |
| 232 | } |
| 233 | |
| 234 | static int cpuacct_all_seq_show(struct seq_file *m, void *V) |
| 235 | { |
| 236 | struct cpuacct *ca = css_ca(seq_css(m)); |
| 237 | int index; |
| 238 | int cpu; |
| 239 | |
| 240 | seq_puts(m, "cpu"); |
| 241 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) |
| 242 | seq_printf(m, " %s", cpuacct_stat_desc[index]); |
| 243 | seq_puts(m, "\n"); |
| 244 | |
| 245 | for_each_possible_cpu(cpu) { |
| 246 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| 247 | |
| 248 | seq_printf(m, "%d", cpu); |
| 249 | |
| 250 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) { |
| 251 | #ifndef CONFIG_64BIT |
| 252 | /* |
| 253 | * Take rq->lock to make 64-bit read safe on 32-bit |
| 254 | * platforms. |
| 255 | */ |
| 256 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
| 257 | #endif |
| 258 | |
| 259 | seq_printf(m, " %llu", cpuusage->usages[index]); |
| 260 | |
| 261 | #ifndef CONFIG_64BIT |
| 262 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
| 263 | #endif |
| 264 | } |
| 265 | seq_puts(m, "\n"); |
| 266 | } |
| 267 | return 0; |
| 268 | } |
| 269 | |
| 270 | static int cpuacct_stats_show(struct seq_file *sf, void *v) |
| 271 | { |
| 272 | struct cpuacct *ca = css_ca(seq_css(sf)); |
| 273 | s64 val[CPUACCT_STAT_NSTATS]; |
| 274 | int cpu; |
| 275 | int stat; |
| 276 | |
| 277 | memset(val, 0, sizeof(val)); |
| 278 | for_each_possible_cpu(cpu) { |
| 279 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
| 280 | |
| 281 | val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; |
| 282 | val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; |
| 283 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; |
| 284 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; |
| 285 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; |
| 286 | } |
| 287 | |
| 288 | for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { |
| 289 | seq_printf(sf, "%s %lld\n", |
| 290 | cpuacct_stat_desc[stat], |
| 291 | (long long)nsec_to_clock_t(val[stat])); |
| 292 | } |
| 293 | |
| 294 | return 0; |
| 295 | } |
| 296 | |
| 297 | static struct cftype files[] = { |
| 298 | { |
| 299 | .name = "usage", |
| 300 | .read_u64 = cpuusage_read, |
| 301 | .write_u64 = cpuusage_write, |
| 302 | }, |
| 303 | { |
| 304 | .name = "usage_user", |
| 305 | .read_u64 = cpuusage_user_read, |
| 306 | }, |
| 307 | { |
| 308 | .name = "usage_sys", |
| 309 | .read_u64 = cpuusage_sys_read, |
| 310 | }, |
| 311 | { |
| 312 | .name = "usage_percpu", |
| 313 | .seq_show = cpuacct_percpu_seq_show, |
| 314 | }, |
| 315 | { |
| 316 | .name = "usage_percpu_user", |
| 317 | .seq_show = cpuacct_percpu_user_seq_show, |
| 318 | }, |
| 319 | { |
| 320 | .name = "usage_percpu_sys", |
| 321 | .seq_show = cpuacct_percpu_sys_seq_show, |
| 322 | }, |
| 323 | { |
| 324 | .name = "usage_all", |
| 325 | .seq_show = cpuacct_all_seq_show, |
| 326 | }, |
| 327 | { |
| 328 | .name = "stat", |
| 329 | .seq_show = cpuacct_stats_show, |
| 330 | }, |
| 331 | { } /* terminate */ |
| 332 | }; |
| 333 | |
| 334 | /* |
| 335 | * charge this task's execution time to its accounting group. |
| 336 | * |
| 337 | * called with rq->lock held. |
| 338 | */ |
| 339 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) |
| 340 | { |
| 341 | struct cpuacct *ca; |
| 342 | int index = CPUACCT_STAT_SYSTEM; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 343 | struct pt_regs *regs = get_irq_regs() ? : task_pt_regs(tsk); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 344 | |
| 345 | if (regs && user_mode(regs)) |
| 346 | index = CPUACCT_STAT_USER; |
| 347 | |
| 348 | rcu_read_lock(); |
| 349 | |
| 350 | for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 351 | __this_cpu_add(ca->cpuusage->usages[index], cputime); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 352 | |
| 353 | rcu_read_unlock(); |
| 354 | } |
| 355 | |
| 356 | /* |
| 357 | * Add user/system time to cpuacct. |
| 358 | * |
| 359 | * Note: it's the caller that updates the account of the root cgroup. |
| 360 | */ |
| 361 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) |
| 362 | { |
| 363 | struct cpuacct *ca; |
| 364 | |
| 365 | rcu_read_lock(); |
| 366 | for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 367 | __this_cpu_add(ca->cpustat->cpustat[index], val); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 368 | rcu_read_unlock(); |
| 369 | } |
| 370 | |
| 371 | struct cgroup_subsys cpuacct_cgrp_subsys = { |
| 372 | .css_alloc = cpuacct_css_alloc, |
| 373 | .css_free = cpuacct_css_free, |
| 374 | .legacy_cftypes = files, |
| 375 | .early_init = true, |
| 376 | }; |