Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * CPU accounting code for task groups. |
| 4 | * |
| 5 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh |
| 6 | * (balbir@in.ibm.com). |
| 7 | */ |
| 8 | #include "sched.h" |
| 9 | |
| 10 | /* Time spent by the tasks of the CPU accounting group executing in ... */ |
| 11 | enum cpuacct_stat_index { |
| 12 | CPUACCT_STAT_USER, /* ... user mode */ |
| 13 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ |
| 14 | |
| 15 | CPUACCT_STAT_NSTATS, |
| 16 | }; |
| 17 | |
| 18 | static const char * const cpuacct_stat_desc[] = { |
| 19 | [CPUACCT_STAT_USER] = "user", |
| 20 | [CPUACCT_STAT_SYSTEM] = "system", |
| 21 | }; |
| 22 | |
| 23 | struct cpuacct_usage { |
| 24 | u64 usages[CPUACCT_STAT_NSTATS]; |
| 25 | }; |
| 26 | |
| 27 | /* track CPU usage of a group of tasks and its child groups */ |
| 28 | struct cpuacct { |
| 29 | struct cgroup_subsys_state css; |
| 30 | /* cpuusage holds pointer to a u64-type object on every CPU */ |
| 31 | struct cpuacct_usage __percpu *cpuusage; |
| 32 | struct kernel_cpustat __percpu *cpustat; |
| 33 | }; |
| 34 | |
| 35 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
| 36 | { |
| 37 | return css ? container_of(css, struct cpuacct, css) : NULL; |
| 38 | } |
| 39 | |
| 40 | /* Return CPU accounting group to which this task belongs */ |
| 41 | static inline struct cpuacct *task_ca(struct task_struct *tsk) |
| 42 | { |
| 43 | return css_ca(task_css(tsk, cpuacct_cgrp_id)); |
| 44 | } |
| 45 | |
| 46 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
| 47 | { |
| 48 | return css_ca(ca->css.parent); |
| 49 | } |
| 50 | |
| 51 | static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage); |
| 52 | static struct cpuacct root_cpuacct = { |
| 53 | .cpustat = &kernel_cpustat, |
| 54 | .cpuusage = &root_cpuacct_cpuusage, |
| 55 | }; |
| 56 | |
| 57 | /* Create a new CPU accounting group */ |
| 58 | static struct cgroup_subsys_state * |
| 59 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) |
| 60 | { |
| 61 | struct cpuacct *ca; |
| 62 | |
| 63 | if (!parent_css) |
| 64 | return &root_cpuacct.css; |
| 65 | |
| 66 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
| 67 | if (!ca) |
| 68 | goto out; |
| 69 | |
| 70 | ca->cpuusage = alloc_percpu(struct cpuacct_usage); |
| 71 | if (!ca->cpuusage) |
| 72 | goto out_free_ca; |
| 73 | |
| 74 | ca->cpustat = alloc_percpu(struct kernel_cpustat); |
| 75 | if (!ca->cpustat) |
| 76 | goto out_free_cpuusage; |
| 77 | |
| 78 | return &ca->css; |
| 79 | |
| 80 | out_free_cpuusage: |
| 81 | free_percpu(ca->cpuusage); |
| 82 | out_free_ca: |
| 83 | kfree(ca); |
| 84 | out: |
| 85 | return ERR_PTR(-ENOMEM); |
| 86 | } |
| 87 | |
| 88 | /* Destroy an existing CPU accounting group */ |
| 89 | static void cpuacct_css_free(struct cgroup_subsys_state *css) |
| 90 | { |
| 91 | struct cpuacct *ca = css_ca(css); |
| 92 | |
| 93 | free_percpu(ca->cpustat); |
| 94 | free_percpu(ca->cpuusage); |
| 95 | kfree(ca); |
| 96 | } |
| 97 | |
| 98 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, |
| 99 | enum cpuacct_stat_index index) |
| 100 | { |
| 101 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| 102 | u64 data; |
| 103 | |
| 104 | /* |
| 105 | * We allow index == CPUACCT_STAT_NSTATS here to read |
| 106 | * the sum of suages. |
| 107 | */ |
| 108 | BUG_ON(index > CPUACCT_STAT_NSTATS); |
| 109 | |
| 110 | #ifndef CONFIG_64BIT |
| 111 | /* |
| 112 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. |
| 113 | */ |
| 114 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
| 115 | #endif |
| 116 | |
| 117 | if (index == CPUACCT_STAT_NSTATS) { |
| 118 | int i = 0; |
| 119 | |
| 120 | data = 0; |
| 121 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
| 122 | data += cpuusage->usages[i]; |
| 123 | } else { |
| 124 | data = cpuusage->usages[index]; |
| 125 | } |
| 126 | |
| 127 | #ifndef CONFIG_64BIT |
| 128 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
| 129 | #endif |
| 130 | |
| 131 | return data; |
| 132 | } |
| 133 | |
| 134 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) |
| 135 | { |
| 136 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| 137 | int i; |
| 138 | |
| 139 | #ifndef CONFIG_64BIT |
| 140 | /* |
| 141 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. |
| 142 | */ |
| 143 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
| 144 | #endif |
| 145 | |
| 146 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
| 147 | cpuusage->usages[i] = val; |
| 148 | |
| 149 | #ifndef CONFIG_64BIT |
| 150 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
| 151 | #endif |
| 152 | } |
| 153 | |
| 154 | /* Return total CPU usage (in nanoseconds) of a group */ |
| 155 | static u64 __cpuusage_read(struct cgroup_subsys_state *css, |
| 156 | enum cpuacct_stat_index index) |
| 157 | { |
| 158 | struct cpuacct *ca = css_ca(css); |
| 159 | u64 totalcpuusage = 0; |
| 160 | int i; |
| 161 | |
| 162 | for_each_possible_cpu(i) |
| 163 | totalcpuusage += cpuacct_cpuusage_read(ca, i, index); |
| 164 | |
| 165 | return totalcpuusage; |
| 166 | } |
| 167 | |
| 168 | static u64 cpuusage_user_read(struct cgroup_subsys_state *css, |
| 169 | struct cftype *cft) |
| 170 | { |
| 171 | return __cpuusage_read(css, CPUACCT_STAT_USER); |
| 172 | } |
| 173 | |
| 174 | static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, |
| 175 | struct cftype *cft) |
| 176 | { |
| 177 | return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); |
| 178 | } |
| 179 | |
| 180 | static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) |
| 181 | { |
| 182 | return __cpuusage_read(css, CPUACCT_STAT_NSTATS); |
| 183 | } |
| 184 | |
| 185 | static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
| 186 | u64 val) |
| 187 | { |
| 188 | struct cpuacct *ca = css_ca(css); |
| 189 | int cpu; |
| 190 | |
| 191 | /* |
| 192 | * Only allow '0' here to do a reset. |
| 193 | */ |
| 194 | if (val) |
| 195 | return -EINVAL; |
| 196 | |
| 197 | for_each_possible_cpu(cpu) |
| 198 | cpuacct_cpuusage_write(ca, cpu, 0); |
| 199 | |
| 200 | return 0; |
| 201 | } |
| 202 | |
| 203 | static int __cpuacct_percpu_seq_show(struct seq_file *m, |
| 204 | enum cpuacct_stat_index index) |
| 205 | { |
| 206 | struct cpuacct *ca = css_ca(seq_css(m)); |
| 207 | u64 percpu; |
| 208 | int i; |
| 209 | |
| 210 | for_each_possible_cpu(i) { |
| 211 | percpu = cpuacct_cpuusage_read(ca, i, index); |
| 212 | seq_printf(m, "%llu ", (unsigned long long) percpu); |
| 213 | } |
| 214 | seq_printf(m, "\n"); |
| 215 | return 0; |
| 216 | } |
| 217 | |
| 218 | static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) |
| 219 | { |
| 220 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); |
| 221 | } |
| 222 | |
| 223 | static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) |
| 224 | { |
| 225 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); |
| 226 | } |
| 227 | |
| 228 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) |
| 229 | { |
| 230 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); |
| 231 | } |
| 232 | |
| 233 | static int cpuacct_all_seq_show(struct seq_file *m, void *V) |
| 234 | { |
| 235 | struct cpuacct *ca = css_ca(seq_css(m)); |
| 236 | int index; |
| 237 | int cpu; |
| 238 | |
| 239 | seq_puts(m, "cpu"); |
| 240 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) |
| 241 | seq_printf(m, " %s", cpuacct_stat_desc[index]); |
| 242 | seq_puts(m, "\n"); |
| 243 | |
| 244 | for_each_possible_cpu(cpu) { |
| 245 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| 246 | |
| 247 | seq_printf(m, "%d", cpu); |
| 248 | |
| 249 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) { |
| 250 | #ifndef CONFIG_64BIT |
| 251 | /* |
| 252 | * Take rq->lock to make 64-bit read safe on 32-bit |
| 253 | * platforms. |
| 254 | */ |
| 255 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
| 256 | #endif |
| 257 | |
| 258 | seq_printf(m, " %llu", cpuusage->usages[index]); |
| 259 | |
| 260 | #ifndef CONFIG_64BIT |
| 261 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
| 262 | #endif |
| 263 | } |
| 264 | seq_puts(m, "\n"); |
| 265 | } |
| 266 | return 0; |
| 267 | } |
| 268 | |
| 269 | static int cpuacct_stats_show(struct seq_file *sf, void *v) |
| 270 | { |
| 271 | struct cpuacct *ca = css_ca(seq_css(sf)); |
| 272 | s64 val[CPUACCT_STAT_NSTATS]; |
| 273 | int cpu; |
| 274 | int stat; |
| 275 | |
| 276 | memset(val, 0, sizeof(val)); |
| 277 | for_each_possible_cpu(cpu) { |
| 278 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
| 279 | |
| 280 | val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; |
| 281 | val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; |
| 282 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; |
| 283 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; |
| 284 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; |
| 285 | } |
| 286 | |
| 287 | for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { |
| 288 | seq_printf(sf, "%s %lld\n", |
| 289 | cpuacct_stat_desc[stat], |
| 290 | (long long)nsec_to_clock_t(val[stat])); |
| 291 | } |
| 292 | |
| 293 | return 0; |
| 294 | } |
| 295 | |
| 296 | static struct cftype files[] = { |
| 297 | { |
| 298 | .name = "usage", |
| 299 | .read_u64 = cpuusage_read, |
| 300 | .write_u64 = cpuusage_write, |
| 301 | }, |
| 302 | { |
| 303 | .name = "usage_user", |
| 304 | .read_u64 = cpuusage_user_read, |
| 305 | }, |
| 306 | { |
| 307 | .name = "usage_sys", |
| 308 | .read_u64 = cpuusage_sys_read, |
| 309 | }, |
| 310 | { |
| 311 | .name = "usage_percpu", |
| 312 | .seq_show = cpuacct_percpu_seq_show, |
| 313 | }, |
| 314 | { |
| 315 | .name = "usage_percpu_user", |
| 316 | .seq_show = cpuacct_percpu_user_seq_show, |
| 317 | }, |
| 318 | { |
| 319 | .name = "usage_percpu_sys", |
| 320 | .seq_show = cpuacct_percpu_sys_seq_show, |
| 321 | }, |
| 322 | { |
| 323 | .name = "usage_all", |
| 324 | .seq_show = cpuacct_all_seq_show, |
| 325 | }, |
| 326 | { |
| 327 | .name = "stat", |
| 328 | .seq_show = cpuacct_stats_show, |
| 329 | }, |
| 330 | { } /* terminate */ |
| 331 | }; |
| 332 | |
| 333 | /* |
| 334 | * charge this task's execution time to its accounting group. |
| 335 | * |
| 336 | * called with rq->lock held. |
| 337 | */ |
| 338 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) |
| 339 | { |
| 340 | struct cpuacct *ca; |
| 341 | int index = CPUACCT_STAT_SYSTEM; |
| 342 | struct pt_regs *regs = task_pt_regs(tsk); |
| 343 | |
| 344 | if (regs && user_mode(regs)) |
| 345 | index = CPUACCT_STAT_USER; |
| 346 | |
| 347 | rcu_read_lock(); |
| 348 | |
| 349 | for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) |
| 350 | this_cpu_ptr(ca->cpuusage)->usages[index] += cputime; |
| 351 | |
| 352 | rcu_read_unlock(); |
| 353 | } |
| 354 | |
| 355 | /* |
| 356 | * Add user/system time to cpuacct. |
| 357 | * |
| 358 | * Note: it's the caller that updates the account of the root cgroup. |
| 359 | */ |
| 360 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) |
| 361 | { |
| 362 | struct cpuacct *ca; |
| 363 | |
| 364 | rcu_read_lock(); |
| 365 | for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) |
| 366 | this_cpu_ptr(ca->cpustat)->cpustat[index] += val; |
| 367 | rcu_read_unlock(); |
| 368 | } |
| 369 | |
| 370 | struct cgroup_subsys cpuacct_cgrp_subsys = { |
| 371 | .css_alloc = cpuacct_css_alloc, |
| 372 | .css_free = cpuacct_css_free, |
| 373 | .legacy_cftypes = files, |
| 374 | .early_init = true, |
| 375 | }; |