Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 817c02b..cb5629b 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config PROC_FS
bool "/proc file system support" if EXPERT
default y
@@ -57,7 +58,8 @@
snapshot.
If you say Y here, the collected device dumps will be added
- as ELF notes to /proc/vmcore.
+ as ELF notes to /proc/vmcore. You can still disable device
+ dump using the kernel command line option 'novmcoredd'.
config PROC_SYSCTL
bool "Sysctl support (/proc/sys)" if EXPERT
@@ -71,7 +73,7 @@
interface is through /proc/sys. If you say Y here a tree of
modifiable sysctl entries will be generated beneath the
/proc/sys directory. They are explained in the files
- in <file:Documentation/sysctl/>. Note that enabling this
+ in <file:Documentation/admin-guide/sysctl/>. Note that enabling this
option will enlarge the kernel by at least 8 KB.
As it is generally a good thing, you should say Y here unless
@@ -97,3 +99,7 @@
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
+
+config PROC_PID_ARCH_STATUS
+ def_bool n
+ depends on PROC_FS
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0ceb3b6..46dcb6f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -343,28 +343,28 @@
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
- seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ seq_puts(m, "\nSpeculation_Store_Bypass:\t");
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
case -EINVAL:
- seq_printf(m, "unknown");
+ seq_puts(m, "unknown");
break;
case PR_SPEC_NOT_AFFECTED:
- seq_printf(m, "not vulnerable");
+ seq_puts(m, "not vulnerable");
break;
case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
- seq_printf(m, "thread force mitigated");
+ seq_puts(m, "thread force mitigated");
break;
case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
- seq_printf(m, "thread mitigated");
+ seq_puts(m, "thread mitigated");
break;
case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
- seq_printf(m, "thread vulnerable");
+ seq_puts(m, "thread vulnerable");
break;
case PR_SPEC_DISABLE:
- seq_printf(m, "globally mitigated");
+ seq_puts(m, "globally mitigated");
break;
default:
- seq_printf(m, "vulnerable");
+ seq_puts(m, "vulnerable");
break;
}
seq_putc(m, '\n');
@@ -381,9 +381,9 @@
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t%*pb\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
}
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
@@ -392,6 +392,15 @@
seq_putc(m, '\n');
}
+static inline void task_thp_status(struct seq_file *m, struct mm_struct *mm)
+{
+ bool thp_enabled = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE);
+
+ if (thp_enabled)
+ thp_enabled = !test_bit(MMF_DISABLE_THP, &mm->flags);
+ seq_printf(m, "THP_enabled:\t%d\n", thp_enabled);
+}
+
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -406,6 +415,7 @@
if (mm) {
task_mem(m, mm);
task_core_dumping(m, mm);
+ task_thp_status(m, mm);
mmput(mm);
}
task_sig(m, task);
@@ -452,7 +462,7 @@
* a program is not able to use ptrace(2) in that case. It is
* safe because the task has stopped executing permanently.
*/
- if (permitted && (task->flags & PF_DUMPCORE)) {
+ if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
if (try_get_task_stack(task)) {
eip = KSTK_EIP(task);
esp = KSTK_ESP(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7e9f07b..ebea950 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -59,6 +59,7 @@
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/generic-radix-tree.h>
#include <linux/string.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
@@ -92,7 +93,6 @@
#include <linux/sched/coredump.h>
#include <linux/sched/debug.h>
#include <linux/sched/stat.h>
-#include <linux/flex_array.h>
#include <linux/posix-timers.h>
#include <trace/events/oom.h>
#include "internal.h"
@@ -140,9 +140,13 @@
#define REG(NAME, MODE, fops) \
NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
#define ONE(NAME, MODE, show) \
- NOD(NAME, (S_IFREG|(MODE)), \
+ NOD(NAME, (S_IFREG|(MODE)), \
NULL, &proc_single_file_operations, \
{ .proc_show = show } )
+#define ATTR(LSM, NAME, MODE) \
+ NOD(NAME, (S_IFREG|(MODE)), \
+ NULL, &proc_pid_attr_operations, \
+ { .lsm = LSM })
/*
* Count the number of hardlinks for the pid_entry table, excluding the .
@@ -205,12 +209,53 @@
return result;
}
+/*
+ * If the user used setproctitle(), we just get the string from
+ * user space at arg_start, and limit it to a maximum of one page.
+ */
+static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
+ size_t count, unsigned long pos,
+ unsigned long arg_start)
+{
+ char *page;
+ int ret, got;
+
+ if (pos >= PAGE_SIZE)
+ return 0;
+
+ page = (char *)__get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ ret = 0;
+ got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
+ if (got > 0) {
+ int len = strnlen(page, got);
+
+ /* Include the NUL character if it was found */
+ if (len < got)
+ len++;
+
+ if (len > pos) {
+ len -= pos;
+ if (len > count)
+ len = count;
+ len -= copy_to_user(buf, page+pos, len);
+ if (!len)
+ len = -EFAULT;
+ ret = len;
+ }
+ }
+ free_page((unsigned long)page);
+ return ret;
+}
+
static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long pos, len;
- char *page;
+ char *page, c;
/* Check if process spawned far enough to have cmdline. */
if (!mm->env_end)
@@ -227,28 +272,42 @@
return 0;
/*
- * We have traditionally allowed the user to re-write
- * the argument strings and overflow the end result
- * into the environment section. But only do that if
- * the environment area is contiguous to the arguments.
+ * We allow setproctitle() to overwrite the argument
+ * strings, and overflow past the original end. But
+ * only when it overflows into the environment area.
*/
- if (env_start != arg_end || env_start >= env_end)
+ if (env_start != arg_end || env_end < env_start)
env_start = env_end = arg_end;
-
- /* .. and limit it to a maximum of one page of slop */
- if (env_end >= arg_end + PAGE_SIZE)
- env_end = arg_end + PAGE_SIZE - 1;
+ len = env_end - arg_start;
/* We're not going to care if "*ppos" has high bits set */
- pos = arg_start + *ppos;
-
- /* .. but we do check the result is in the proper range */
- if (pos < arg_start || pos >= env_end)
+ pos = *ppos;
+ if (pos >= len)
+ return 0;
+ if (count > len - pos)
+ count = len - pos;
+ if (!count)
return 0;
- /* .. and we never go past env_end */
- if (env_end - pos < count)
- count = env_end - pos;
+ /*
+ * Magical special case: if the argv[] end byte is not
+ * zero, the user has overwritten it with setproctitle(3).
+ *
+ * Possible future enhancement: do this only once when
+ * pos is 0, and set a flag in the 'struct file'.
+ */
+ if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
+ return get_mm_proctitle(mm, buf, count, pos, arg_start);
+
+ /*
+ * For the non-setproctitle() case we limit things strictly
+ * to the [arg_start, arg_end[ range.
+ */
+ pos += arg_start;
+ if (pos < arg_start || pos >= arg_end)
+ return 0;
+ if (count > arg_end - pos)
+ count = arg_end - pos;
page = (char *)__get_free_page(GFP_KERNEL);
if (!page)
@@ -258,48 +317,11 @@
while (count) {
int got;
size_t size = min_t(size_t, PAGE_SIZE, count);
- long offset;
- /*
- * Are we already starting past the official end?
- * We always include the last byte that is *supposed*
- * to be NUL
- */
- offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
-
- got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
- if (got <= offset)
+ got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
+ if (got <= 0)
break;
- got -= offset;
-
- /* Don't walk past a NUL character once you hit arg_end */
- if (pos + got >= arg_end) {
- int n = 0;
-
- /*
- * If we started before 'arg_end' but ended up
- * at or after it, we start the NUL character
- * check at arg_end-1 (where we expect the normal
- * EOF to be).
- *
- * NOTE! This is smaller than 'got', because
- * pos + got >= arg_end
- */
- if (pos < arg_end)
- n = arg_end - pos - 1;
-
- /* Cut off at first NUL after 'n' */
- got = n + strnlen(page+n, offset+got-n);
- if (got < offset)
- break;
- got -= offset;
-
- /* Include the NUL if it existed */
- if (got < size)
- got++;
- }
-
- got -= copy_to_user(buf, page+offset, got);
+ got -= copy_to_user(buf, page, got);
if (unlikely(!got)) {
if (!len)
len = -EFAULT;
@@ -403,7 +425,6 @@
static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- struct stack_trace trace;
unsigned long *entries;
int err;
@@ -426,20 +447,17 @@
if (!entries)
return -ENOMEM;
- trace.nr_entries = 0;
- trace.max_entries = MAX_STACK_TRACE_DEPTH;
- trace.entries = entries;
- trace.skip = 0;
-
err = lock_trace(task);
if (!err) {
- unsigned int i;
+ unsigned int i, nr_entries;
- save_stack_trace_tsk(task, &trace);
+ nr_entries = stack_trace_save_tsk(task, entries,
+ MAX_STACK_TRACE_DEPTH, 0);
- for (i = 0; i < trace.nr_entries; i++) {
+ for (i = 0; i < nr_entries; i++) {
seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
}
+
unlock_trace(task);
}
kfree(entries);
@@ -456,7 +474,7 @@
struct pid *pid, struct task_struct *task)
{
if (unlikely(!sched_info_on()))
- seq_printf(m, "0 0 0\n");
+ seq_puts(m, "0 0 0\n");
else
seq_printf(m, "%llu %llu %lu\n",
(unsigned long long)task->se.sum_exec_runtime,
@@ -485,10 +503,9 @@
lr->count, lr->time, lr->max);
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
unsigned long bt = lr->backtrace[q];
+
if (!bt)
break;
- if (bt == ULONG_MAX)
- break;
seq_printf(m, " %ps", (void *)bt);
}
seq_putc(m, '\n');
@@ -511,7 +528,7 @@
if (!task)
return -ESRCH;
- clear_all_latency_tracing(task);
+ clear_tsk_latency_tracing(task);
put_task_struct(task);
return count;
@@ -530,11 +547,10 @@
static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- unsigned long totalpages = totalram_pages + total_swap_pages;
+ unsigned long totalpages = totalram_pages() + total_swap_pages;
unsigned long points = 0;
- points = oom_badness(task, NULL, NULL, totalpages) *
- 1000 / totalpages;
+ points = oom_badness(task, totalpages) * 1000 / totalpages;
seq_printf(m, "%lu\n", points);
return 0;
@@ -581,8 +597,10 @@
/*
* print the file header
*/
- seq_printf(m, "%-25s %-20s %-20s %-10s\n",
- "Limit", "Soft Limit", "Hard Limit", "Units");
+ seq_puts(m, "Limit "
+ "Soft Limit "
+ "Hard Limit "
+ "Units \n");
for (i = 0; i < RLIM_NLIMITS; i++) {
if (rlim[i].rlim_cur == RLIM_INFINITY)
@@ -610,24 +628,25 @@
static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- long nr;
- unsigned long args[6], sp, pc;
+ struct syscall_info info;
+ u64 *args = &info.data.args[0];
int res;
res = lock_trace(task);
if (res)
return res;
- if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+ if (task_current_syscall(task, &info))
seq_puts(m, "running\n");
- else if (nr < 0)
- seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+ else if (info.data.nr < 0)
+ seq_printf(m, "%d 0x%llx 0x%llx\n",
+ info.data.nr, info.sp, info.data.instruction_pointer);
else
seq_printf(m,
- "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
- nr,
+ "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
+ info.data.nr,
args[0], args[1], args[2], args[3], args[4], args[5],
- sp, pc);
+ info.sp, info.data.instruction_pointer);
unlock_trace(task);
return 0;
@@ -1084,10 +1103,6 @@
task_lock(p);
if (!p->vfork_done && process_shares_mm(p, mm)) {
- pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
- task_pid_nr(p), p->comm,
- p->signal->oom_score_adj, oom_adj,
- task_pid_nr(task), task->comm);
p->signal->oom_score_adj = oom_adj;
if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
p->signal->oom_score_adj_min = (short)oom_adj;
@@ -1208,7 +1223,7 @@
.llseek = default_llseek,
};
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
#define TMPBUFLEN 11
static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
@@ -1964,9 +1979,12 @@
goto out;
if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
- down_read(&mm->mmap_sem);
- exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
- up_read(&mm->mmap_sem);
+ status = down_read_killable(&mm->mmap_sem);
+ if (!status) {
+ exact_vma_exists = !!find_exact_vma(mm, vm_start,
+ vm_end);
+ up_read(&mm->mmap_sem);
+ }
}
mmput(mm);
@@ -2012,8 +2030,11 @@
if (rc)
goto out_mmput;
+ rc = down_read_killable(&mm->mmap_sem);
+ if (rc)
+ goto out_mmput;
+
rc = -ENOENT;
- down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
*path = vma->vm_file->f_path;
@@ -2109,7 +2130,11 @@
if (!mm)
goto out_put_task;
- down_read(&mm->mmap_sem);
+ result = ERR_PTR(-EINTR);
+ if (down_read_killable(&mm->mmap_sem))
+ goto out_put_mm;
+
+ result = ERR_PTR(-ENOENT);
vma = find_exact_vma(mm, vm_start, vm_end);
if (!vma)
goto out_no_vma;
@@ -2120,6 +2145,7 @@
out_no_vma:
up_read(&mm->mmap_sem);
+out_put_mm:
mmput(mm);
out_put_task:
put_task_struct(task);
@@ -2140,11 +2166,12 @@
struct task_struct *task;
struct mm_struct *mm;
unsigned long nr_files, pos, i;
- struct flex_array *fa = NULL;
- struct map_files_info info;
+ GENRADIX(struct map_files_info) fa;
struct map_files_info *p;
int ret;
+ genradix_init(&fa);
+
ret = -ENOENT;
task = get_proc_task(file_inode(file));
if (!task)
@@ -2161,7 +2188,12 @@
mm = get_task_mm(task);
if (!mm)
goto out_put_task;
- down_read(&mm->mmap_sem);
+
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret) {
+ mmput(mm);
+ goto out_put_task;
+ }
nr_files = 0;
@@ -2176,35 +2208,22 @@
*/
for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
- if (vma->vm_file && ++pos > ctx->pos)
- nr_files++;
- }
+ if (!vma->vm_file)
+ continue;
+ if (++pos <= ctx->pos)
+ continue;
- if (nr_files) {
- fa = flex_array_alloc(sizeof(info), nr_files,
- GFP_KERNEL);
- if (!fa || flex_array_prealloc(fa, 0, nr_files,
- GFP_KERNEL)) {
+ p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
+ if (!p) {
ret = -ENOMEM;
- if (fa)
- flex_array_free(fa);
up_read(&mm->mmap_sem);
mmput(mm);
goto out_put_task;
}
- for (i = 0, vma = mm->mmap, pos = 2; vma;
- vma = vma->vm_next) {
- if (!vma->vm_file)
- continue;
- if (++pos <= ctx->pos)
- continue;
- info.start = vma->vm_start;
- info.end = vma->vm_end;
- info.mode = vma->vm_file->f_mode;
- if (flex_array_put(fa, i++, &info, GFP_KERNEL))
- BUG();
- }
+ p->start = vma->vm_start;
+ p->end = vma->vm_end;
+ p->mode = vma->vm_file->f_mode;
}
up_read(&mm->mmap_sem);
mmput(mm);
@@ -2213,7 +2232,7 @@
char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
unsigned int len;
- p = flex_array_get(fa, i);
+ p = genradix_ptr(&fa, i);
len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
if (!proc_fill_cache(file, ctx,
buf, len,
@@ -2223,12 +2242,11 @@
break;
ctx->pos++;
}
- if (fa)
- flex_array_free(fa);
out_put_task:
put_task_struct(task);
out:
+ genradix_free(&fa);
return ret;
}
@@ -2356,10 +2374,13 @@
return -ESRCH;
if (p != current) {
- if (!capable(CAP_SYS_NICE)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
count = -EPERM;
goto out;
}
+ rcu_read_unlock();
err = security_task_setscheduler(p);
if (err) {
@@ -2392,11 +2413,14 @@
return -ESRCH;
if (p != current) {
-
- if (!capable(CAP_SYS_NICE)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
+
err = security_task_getscheduler(p);
if (err)
goto out;
@@ -2451,11 +2475,10 @@
static struct dentry *proc_pident_lookup(struct inode *dir,
struct dentry *dentry,
- const struct pid_entry *ents,
- unsigned int nents)
+ const struct pid_entry *p,
+ const struct pid_entry *end)
{
struct task_struct *task = get_proc_task(dir);
- const struct pid_entry *p, *last;
struct dentry *res = ERR_PTR(-ENOENT);
if (!task)
@@ -2465,8 +2488,7 @@
* Yes, it does not scale. And it should not. Don't add
* new entries into /proc/<tgid>/ without very good reasons.
*/
- last = &ents[nents];
- for (p = ents; p < last; p++) {
+ for (; p < end; p++) {
if (p->len != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, p->name, p->len)) {
@@ -2517,7 +2539,7 @@
if (!task)
return -ESRCH;
- length = security_getprocattr(task,
+ length = security_getprocattr(task, PROC_I(inode)->op.lsm,
(char*)file->f_path.dentry->d_name.name,
&p);
put_task_struct(task);
@@ -2546,6 +2568,11 @@
rcu_read_unlock();
return -EACCES;
}
+ /* Prevent changes to overridden credentials. */
+ if (current_cred() != current_real_cred()) {
+ rcu_read_unlock();
+ return -EBUSY;
+ }
rcu_read_unlock();
if (count > PAGE_SIZE)
@@ -2566,7 +2593,9 @@
if (rv < 0)
goto out_free;
- rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count);
+ rv = security_setprocattr(PROC_I(inode)->op.lsm,
+ file->f_path.dentry->d_name.name, page,
+ count);
mutex_unlock(¤t->signal->cred_guard_mutex);
out_free:
kfree(page);
@@ -2580,13 +2609,53 @@
.llseek = generic_file_llseek,
};
+#define LSM_DIR_OPS(LSM) \
+static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
+ struct dir_context *ctx) \
+{ \
+ return proc_pident_readdir(filp, ctx, \
+ LSM##_attr_dir_stuff, \
+ ARRAY_SIZE(LSM##_attr_dir_stuff)); \
+} \
+\
+static const struct file_operations proc_##LSM##_attr_dir_ops = { \
+ .read = generic_read_dir, \
+ .iterate = proc_##LSM##_attr_dir_iterate, \
+ .llseek = default_llseek, \
+}; \
+\
+static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
+ struct dentry *dentry, unsigned int flags) \
+{ \
+ return proc_pident_lookup(dir, dentry, \
+ LSM##_attr_dir_stuff, \
+ LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
+} \
+\
+static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
+ .lookup = proc_##LSM##_attr_dir_lookup, \
+ .getattr = pid_getattr, \
+ .setattr = proc_setattr, \
+}
+
+#ifdef CONFIG_SECURITY_SMACK
+static const struct pid_entry smack_attr_dir_stuff[] = {
+ ATTR("smack", "current", 0666),
+};
+LSM_DIR_OPS(smack);
+#endif
+
static const struct pid_entry attr_dir_stuff[] = {
- REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
- REG("prev", S_IRUGO, proc_pid_attr_operations),
- REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
- REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
- REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
- REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
+ ATTR(NULL, "current", 0666),
+ ATTR(NULL, "prev", 0444),
+ ATTR(NULL, "exec", 0666),
+ ATTR(NULL, "fscreate", 0666),
+ ATTR(NULL, "keycreate", 0666),
+ ATTR(NULL, "sockcreate", 0666),
+#ifdef CONFIG_SECURITY_SMACK
+ DIR("smack", 0555,
+ proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
+#endif
};
static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
@@ -2605,7 +2674,8 @@
struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
- attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
+ attr_dir_stuff,
+ attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
}
static const struct inode_operations proc_attr_dir_inode_operations = {
@@ -2905,6 +2975,21 @@
}
#endif /* CONFIG_LIVEPATCH */
+#ifdef CONFIG_STACKLEAK_METRICS
+static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ unsigned long prev_depth = THREAD_SIZE -
+ (task->prev_lowest_stack & (THREAD_SIZE - 1));
+ unsigned long depth = THREAD_SIZE -
+ (task->lowest_stack & (THREAD_SIZE - 1));
+
+ seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
+ prev_depth, depth);
+ return 0;
+}
+#endif /* CONFIG_STACKLEAK_METRICS */
+
/*
* Thread groups
*/
@@ -2979,7 +3064,7 @@
ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
#endif
@@ -3006,6 +3091,12 @@
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
+#ifdef CONFIG_STACKLEAK_METRICS
+ ONE("stack_depth", S_IRUGO, proc_stack_depth),
+#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3020,10 +3111,19 @@
.llseek = generic_file_llseek,
};
+struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+ if (file->f_op != &proc_tgid_base_operations)
+ return ERR_PTR(-EBADF);
+
+ return proc_pid(file_inode(file));
+}
+
static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
- tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
+ tgid_base_stuff,
+ tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
}
static const struct inode_operations proc_tgid_base_inode_operations = {
@@ -3139,7 +3239,7 @@
return d_splice_alias(inode, dentry);
}
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
+struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
{
struct task_struct *task;
unsigned tgid;
@@ -3364,7 +3464,7 @@
ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
#endif
@@ -3384,6 +3484,9 @@
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3395,7 +3498,8 @@
static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
- tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
+ tid_base_stuff,
+ tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
}
static const struct file_operations proc_tid_base_operations = {
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index 954caf0..dfe6ce3 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -1,7 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2010 Werner Fink, Jiri Slaby
- *
- * Licensed under GPLv2
*/
#include <linux/console.h>
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 8ae1094..64e9ee1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* proc/fs/generic.c --- generic routines for the proc-fs
*
@@ -256,7 +257,7 @@
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
- d_set_d_op(dentry, &proc_misc_dentry_ops);
+ d_set_d_op(dentry, de->proc_dops);
return d_splice_alias(inode, dentry);
}
read_unlock(&proc_subdir_lock);
@@ -429,6 +430,8 @@
INIT_LIST_HEAD(&ent->pde_openers);
proc_set_user(ent, (*parent)->uid, (*parent)->gid);
+ ent->proc_dops = &proc_misc_dentry_ops;
+
out:
return ent;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index fc5306a..dbe43a5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -24,7 +24,6 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/mount.h>
-#include <linux/magic.h>
#include <linux/uaccess.h>
@@ -59,7 +58,6 @@
static struct inode *proc_alloc_inode(struct super_block *sb)
{
struct proc_inode *ei;
- struct inode *inode;
ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
if (!ei)
@@ -71,21 +69,14 @@
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
ei->ns_ops = NULL;
- inode = &ei->vfs_inode;
- return inode;
+ return &ei->vfs_inode;
}
-static void proc_i_callback(struct rcu_head *head)
+static void proc_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}
-static void proc_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, proc_i_callback);
-}
-
static void init_once(void *foo)
{
struct proc_inode *ei = (struct proc_inode *) foo;
@@ -124,13 +115,12 @@
return 0;
}
-static const struct super_operations proc_sops = {
+const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
- .destroy_inode = proc_destroy_inode,
+ .free_inode = proc_free_inode,
.drop_inode = generic_delete_inode,
.evict_inode = proc_evict_inode,
.statfs = simple_statfs,
- .remount_fs = proc_remount,
.show_options = proc_show_options,
};
@@ -210,7 +200,8 @@
struct proc_dir_entry *pde = PDE(file_inode(file));
loff_t rv = -EINVAL;
if (use_pde(pde)) {
- loff_t (*llseek)(struct file *, loff_t, int);
+ typeof_member(struct file_operations, llseek) llseek;
+
llseek = pde->proc_fops->llseek;
if (!llseek)
llseek = default_llseek;
@@ -222,10 +213,11 @@
static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
- ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
+ typeof_member(struct file_operations, read) read;
+
read = pde->proc_fops->read;
if (read)
rv = read(file, buf, count, ppos);
@@ -236,10 +228,11 @@
static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
- ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
+ typeof_member(struct file_operations, write) write;
+
write = pde->proc_fops->write;
if (write)
rv = write(file, buf, count, ppos);
@@ -252,8 +245,9 @@
{
struct proc_dir_entry *pde = PDE(file_inode(file));
__poll_t rv = DEFAULT_POLLMASK;
- __poll_t (*poll)(struct file *, struct poll_table_struct *);
if (use_pde(pde)) {
+ typeof_member(struct file_operations, poll) poll;
+
poll = pde->proc_fops->poll;
if (poll)
rv = poll(file, pts);
@@ -266,8 +260,9 @@
{
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
- long (*ioctl)(struct file *, unsigned int, unsigned long);
if (use_pde(pde)) {
+ typeof_member(struct file_operations, unlocked_ioctl) ioctl;
+
ioctl = pde->proc_fops->unlocked_ioctl;
if (ioctl)
rv = ioctl(file, cmd, arg);
@@ -281,8 +276,9 @@
{
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
- long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
if (use_pde(pde)) {
+ typeof_member(struct file_operations, compat_ioctl) compat_ioctl;
+
compat_ioctl = pde->proc_fops->compat_ioctl;
if (compat_ioctl)
rv = compat_ioctl(file, cmd, arg);
@@ -296,8 +292,9 @@
{
struct proc_dir_entry *pde = PDE(file_inode(file));
int rv = -EIO;
- int (*mmap)(struct file *, struct vm_area_struct *);
if (use_pde(pde)) {
+ typeof_member(struct file_operations, mmap) mmap;
+
mmap = pde->proc_fops->mmap;
if (mmap)
rv = mmap(file, vma);
@@ -315,7 +312,7 @@
unsigned long rv = -EIO;
if (use_pde(pde)) {
- typeof(proc_reg_get_unmapped_area) *get_area;
+ typeof_member(struct file_operations, get_unmapped_area) get_area;
get_area = pde->proc_fops->get_unmapped_area;
#ifdef CONFIG_MMU
@@ -336,8 +333,8 @@
{
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
- int (*open)(struct inode *, struct file *);
- int (*release)(struct inode *, struct file *);
+ typeof_member(struct file_operations, open) open;
+ typeof_member(struct file_operations, release) release;
struct pde_opener *pdeo;
/*
@@ -490,48 +487,3 @@
pde_put(de);
return inode;
}
-
-int proc_fill_super(struct super_block *s, void *data, int silent)
-{
- struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
- struct inode *root_inode;
- int ret;
-
- if (!proc_parse_options(data, ns))
- return -EINVAL;
-
- /* User space would break if executables or devices appear on proc */
- s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
- s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
- s->s_blocksize = 1024;
- s->s_blocksize_bits = 10;
- s->s_magic = PROC_SUPER_MAGIC;
- s->s_op = &proc_sops;
- s->s_time_gran = 1;
-
- /*
- * procfs isn't actually a stacking filesystem; however, there is
- * too much magic going on inside it to permit stacking things on
- * top of it
- */
- s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
-
- pde_get(&proc_root);
- root_inode = proc_get_inode(s, &proc_root);
- if (!root_inode) {
- pr_err("proc_fill_super: get root inode failed\n");
- return -ENOMEM;
- }
-
- s->s_root = d_make_root(root_inode);
- if (!s->s_root) {
- pr_err("proc_fill_super: allocate dentry failed\n");
- return -ENOMEM;
- }
-
- ret = proc_setup_self(s);
- if (ret) {
- return ret;
- }
- return proc_setup_thread_self(s);
-}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5185d7f..cd0c8d5 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Internal procfs definitions
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/proc_fs.h>
@@ -44,6 +40,7 @@
struct completion *pde_unload_completion;
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
+ const struct dentry_operations *proc_dops;
union {
const struct seq_operations *seq_ops;
int (*single_show)(struct seq_file *, void *);
@@ -81,6 +78,7 @@
int (*proc_show)(struct seq_file *m,
struct pid_namespace *ns, struct pid *pid,
struct task_struct *task);
+ const char *lsm;
};
struct proc_inode {
@@ -161,7 +159,7 @@
extern void pid_update_inode(struct task_struct *, struct inode *);
extern int pid_delete_dentry(const struct dentry *);
extern int proc_pid_readdir(struct file *, struct dir_context *);
-extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
+struct dentry *proc_pid_lookup(struct dentry *, unsigned int);
extern loff_t mem_lseek(struct file *, loff_t, int);
/* Lookups */
@@ -205,13 +203,12 @@
struct completion *c;
} __randomize_layout;
extern const struct inode_operations proc_link_inode_operations;
-
extern const struct inode_operations proc_pid_link_inode_operations;
+extern const struct super_operations proc_sops;
void proc_init_kmemcache(void);
void set_proc_pid_nlink(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
-extern int proc_fill_super(struct super_block *, void *data, int flags);
extern void proc_entry_rundown(struct proc_dir_entry *);
/*
@@ -269,10 +266,8 @@
* root.c
*/
extern struct proc_dir_entry proc_root;
-extern int proc_parse_options(char *options, struct pid_namespace *pid);
extern void proc_self_init(void);
-extern int proc_remount(struct super_block *, int *, char *);
/*
* task_[no]mmu.c
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d297fe4..e2ed8e0 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -22,7 +22,7 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/printk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -31,6 +31,7 @@
#include <linux/ioport.h>
#include <linux/memory.h>
#include <linux/sched/task.h>
+#include <linux/security.h>
#include <asm/sections.h>
#include "internal.h"
@@ -54,6 +55,28 @@
static DECLARE_RWSEM(kclist_lock);
static int kcore_need_update = 1;
+/*
+ * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
+ * Same as oldmem_pfn_is_ram in vmcore
+ */
+static int (*mem_pfn_is_ram)(unsigned long pfn);
+
+int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
+{
+ if (mem_pfn_is_ram)
+ return -EBUSY;
+ mem_pfn_is_ram = fn;
+ return 0;
+}
+
+static int pfn_is_ram(unsigned long pfn)
+{
+ if (mem_pfn_is_ram)
+ return mem_pfn_is_ram(pfn);
+ else
+ return 1;
+}
+
/* This doesn't grab kclist_lock, so it should only be used at init time. */
void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
int type)
@@ -465,6 +488,11 @@
goto out;
}
m = NULL; /* skip the list anchor */
+ } else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else if (m->type == KCORE_VMALLOC) {
vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
@@ -518,9 +546,14 @@
static int open_kcore(struct inode *inode, struct file *filp)
{
+ int ret = security_locked_down(LOCKDOWN_KCORE);
+
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+ if (ret)
+ return ret;
+
filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!filp->private_data)
return -ENOMEM;
@@ -588,7 +621,7 @@
/*
* MODULES_VADDR has no intersection with VMALLOC_ADDR.
*/
-struct kcore_list kcore_modules;
+static struct kcore_list kcore_modules;
static void __init add_modules_range(void)
{
if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index d066947..8468bae 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -10,9 +10,6 @@
#include <linux/seqlock.h>
#include <linux/time.h>
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
static int loadavg_proc_show(struct seq_file *m, void *v)
{
unsigned long avnrun[3];
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index edda898..8c1f1bb 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -8,7 +8,6 @@
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
#include <linux/percpu.h>
-#include <linux/quicklist.h>
#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/vmstat.h>
@@ -38,6 +37,7 @@
long cached;
long available;
unsigned long pages[NR_LRU_LISTS];
+ unsigned long sreclaimable, sunreclaim;
int lru;
si_meminfo(&i);
@@ -53,6 +53,8 @@
pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
available = si_mem_available();
+ sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE);
+ sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE);
show_val_kb(m, "MemTotal: ", i.totalram);
show_val_kb(m, "MemFree: ", i.freeram);
@@ -94,21 +96,15 @@
show_val_kb(m, "Mapped: ",
global_node_page_state(NR_FILE_MAPPED));
show_val_kb(m, "Shmem: ", i.sharedram);
- show_val_kb(m, "Slab: ",
- global_node_page_state(NR_SLAB_RECLAIMABLE) +
- global_node_page_state(NR_SLAB_UNRECLAIMABLE));
-
- show_val_kb(m, "SReclaimable: ",
- global_node_page_state(NR_SLAB_RECLAIMABLE));
- show_val_kb(m, "SUnreclaim: ",
- global_node_page_state(NR_SLAB_UNRECLAIMABLE));
+ show_val_kb(m, "KReclaimable: ", sreclaimable +
+ global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
+ show_val_kb(m, "Slab: ", sreclaimable + sunreclaim);
+ show_val_kb(m, "SReclaimable: ", sreclaimable);
+ show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));
-#ifdef CONFIG_QUICKLIST
- show_val_kb(m, "Quicklists: ", quicklist_total_size());
-#endif
show_val_kb(m, "NFS_Unstable: ",
global_node_page_state(NR_UNSTABLE_NFS));
@@ -120,7 +116,7 @@
show_val_kb(m, "Committed_AS: ", committed);
seq_printf(m, "VmallocTotal: %8lu kB\n",
(unsigned long)VMALLOC_TOTAL >> 10);
- show_val_kb(m, "VmallocUsed: ", 0ul);
+ show_val_kb(m, "VmallocUsed: ", vmalloc_nr_pages());
show_val_kb(m, "VmallocChunk: ", 0ul);
show_val_kb(m, "Percpu: ", pcpu_nr_pages());
@@ -136,6 +132,10 @@
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
show_val_kb(m, "ShmemPmdMapped: ",
global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+ show_val_kb(m, "FileHugePages: ",
+ global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+ show_val_kb(m, "FilePmdMapped: ",
+ global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
#endif
#ifdef CONFIG_CMA
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 3b63be6..14c2bad 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* nommu.c: mmu-less memory info files
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/init.h>
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 792c78a..7c952ee 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/init.h>
@@ -42,11 +42,13 @@
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
- if (!ppage || PageSlab(ppage))
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
+
+ if (!ppage || PageSlab(ppage) || page_has_type(ppage))
pcount = 0;
else
pcount = page_mapcount(ppage);
@@ -152,8 +154,8 @@
else if (page_count(page) == 0 && is_free_buddy_page(page))
u |= 1 << KPF_BUDDY;
- if (PageBalloon(page))
- u |= 1 << KPF_BALLOON;
+ if (PageOffline(page))
+ u |= 1 << KPF_OFFLINE;
if (PageTable(page))
u |= 1 << KPF_PGTABLE;
@@ -216,10 +218,11 @@
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
if (put_user(stable_page_flags(ppage), out)) {
ret = -EFAULT;
@@ -261,10 +264,11 @@
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
if (ppage)
ino = page_cgroup_ino(ppage);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index d5e0fcb..76ae278 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/proc/net.c
*
@@ -38,6 +39,22 @@
return maybe_get_net(PDE_NET(PDE(inode)));
}
+static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return 0;
+}
+
+static const struct dentry_operations proc_net_dentry_ops = {
+ .d_revalidate = proc_net_d_revalidate,
+ .d_delete = always_delete_dentry,
+};
+
+static void pde_force_lookup(struct proc_dir_entry *pde)
+{
+ /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
+ pde->proc_dops = &proc_net_dentry_ops;
+}
+
static int seq_open_net(struct inode *inode, struct file *file)
{
unsigned int state_size = PDE(inode)->state_size;
@@ -90,6 +107,7 @@
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
+ pde_force_lookup(p);
p->proc_fops = &proc_net_seq_fops;
p->seq_ops = ops;
p->state_size = state_size;
@@ -133,6 +151,7 @@
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
+ pde_force_lookup(p);
p->proc_fops = &proc_net_seq_fops;
p->seq_ops = ops;
p->state_size = state_size;
@@ -181,6 +200,7 @@
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
+ pde_force_lookup(p);
p->proc_fops = &proc_net_single_fops;
p->single_show = show;
return proc_register(parent, p);
@@ -223,6 +243,7 @@
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
+ pde_force_lookup(p);
p->proc_fops = &proc_net_single_fops;
p->single_show = show;
p->write = write;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 4d598a3..d80989b 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -13,6 +13,7 @@
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/bpf-cgroup.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@@ -21,6 +22,10 @@
static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
+/* shared constants to be used in various sysctls */
+const int sysctl_vals[] = { 0, 1, INT_MAX };
+EXPORT_SYMBOL(sysctl_vals);
+
/* Support for permanently empty directories */
struct ctl_table sysctl_mount_point[] = {
@@ -498,6 +503,10 @@
if (root->set_ownership)
root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
+ else {
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ }
return inode;
}
@@ -569,8 +578,8 @@
struct inode *inode = file_inode(filp);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+ void *new_buf = NULL;
ssize_t error;
- size_t res;
if (IS_ERR(head))
return PTR_ERR(head);
@@ -588,11 +597,27 @@
if (!table->proc_handler)
goto out;
+ error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
+ ppos, &new_buf);
+ if (error)
+ goto out;
+
/* careful: calling conventions are nasty here */
- res = count;
- error = table->proc_handler(table, write, buf, &res, ppos);
+ if (new_buf) {
+ mm_segment_t old_fs;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ error = table->proc_handler(table, write, (void __user *)new_buf,
+ &count, ppos);
+ set_fs(old_fs);
+ kfree(new_buf);
+ } else {
+ error = table->proc_handler(table, write, buf, &count, ppos);
+ }
+
if (!error)
- error = res;
+ error = count;
out:
sysctl_head_finish(head);
@@ -1626,8 +1651,11 @@
if (--header->nreg)
return;
- put_links(header);
- start_unregistering(header);
+ if (parent) {
+ put_links(header);
+ start_unregistering(header);
+ }
+
if (!--header->count)
kfree_rcu(header, rcu);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index f4b1a9d..0b7c8df 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -19,86 +19,176 @@
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/user_namespace.h>
+#include <linux/fs_context.h>
#include <linux/mount.h>
#include <linux/pid_namespace.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
#include <linux/cred.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
#include "internal.h"
-enum {
- Opt_gid, Opt_hidepid, Opt_err,
+struct proc_fs_context {
+ struct pid_namespace *pid_ns;
+ unsigned int mask;
+ int hidepid;
+ int gid;
};
-static const match_table_t tokens = {
- {Opt_hidepid, "hidepid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_err, NULL},
+enum proc_param {
+ Opt_gid,
+ Opt_hidepid,
};
-int proc_parse_options(char *options, struct pid_namespace *pid)
+static const struct fs_parameter_spec proc_param_specs[] = {
+ fsparam_u32("gid", Opt_gid),
+ fsparam_u32("hidepid", Opt_hidepid),
+ {}
+};
+
+static const struct fs_parameter_description proc_fs_parameters = {
+ .name = "proc",
+ .specs = proc_param_specs,
+};
+
+static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int option;
+ struct proc_fs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
- if (!options)
- return 1;
+ opt = fs_parse(fc, &proc_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
+ switch (opt) {
+ case Opt_gid:
+ ctx->gid = result.uint_32;
+ break;
- args[0].to = args[0].from = NULL;
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_gid:
- if (match_int(&args[0], &option))
- return 0;
- pid->pid_gid = make_kgid(current_user_ns(), option);
- break;
- case Opt_hidepid:
- if (match_int(&args[0], &option))
- return 0;
- if (option < HIDEPID_OFF ||
- option > HIDEPID_INVISIBLE) {
- pr_err("proc: hidepid value must be between 0 and 2.\n");
- return 0;
- }
- pid->hide_pid = option;
- break;
- default:
- pr_err("proc: unrecognized mount option \"%s\" "
- "or missing value\n", p);
- return 0;
- }
+ case Opt_hidepid:
+ ctx->hidepid = result.uint_32;
+ if (ctx->hidepid < HIDEPID_OFF ||
+ ctx->hidepid > HIDEPID_INVISIBLE)
+ return invalf(fc, "proc: hidepid value must be between 0 and 2.\n");
+ break;
+
+ default:
+ return -EINVAL;
}
- return 1;
+ ctx->mask |= 1 << opt;
+ return 0;
}
-int proc_remount(struct super_block *sb, int *flags, char *data)
+static void proc_apply_options(struct super_block *s,
+ struct fs_context *fc,
+ struct pid_namespace *pid_ns,
+ struct user_namespace *user_ns)
{
+ struct proc_fs_context *ctx = fc->fs_private;
+
+ if (ctx->mask & (1 << Opt_gid))
+ pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
+ if (ctx->mask & (1 << Opt_hidepid))
+ pid_ns->hide_pid = ctx->hidepid;
+}
+
+static int proc_fill_super(struct super_block *s, struct fs_context *fc)
+{
+ struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
+ struct inode *root_inode;
+ int ret;
+
+ proc_apply_options(s, fc, pid_ns, current_user_ns());
+
+ /* User space would break if executables or devices appear on proc */
+ s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
+ s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
+ s->s_blocksize = 1024;
+ s->s_blocksize_bits = 10;
+ s->s_magic = PROC_SUPER_MAGIC;
+ s->s_op = &proc_sops;
+ s->s_time_gran = 1;
+
+ /*
+ * procfs isn't actually a stacking filesystem; however, there is
+ * too much magic going on inside it to permit stacking things on
+ * top of it
+ */
+ s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+
+ /* procfs dentries and inodes don't require IO to create */
+ s->s_shrink.seeks = 0;
+
+ pde_get(&proc_root);
+ root_inode = proc_get_inode(s, &proc_root);
+ if (!root_inode) {
+ pr_err("proc_fill_super: get root inode failed\n");
+ return -ENOMEM;
+ }
+
+ s->s_root = d_make_root(root_inode);
+ if (!s->s_root) {
+ pr_err("proc_fill_super: allocate dentry failed\n");
+ return -ENOMEM;
+ }
+
+ ret = proc_setup_self(s);
+ if (ret) {
+ return ret;
+ }
+ return proc_setup_thread_self(s);
+}
+
+static int proc_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
struct pid_namespace *pid = sb->s_fs_info;
sync_filesystem(sb);
- return !proc_parse_options(data, pid);
+
+ proc_apply_options(sb, fc, pid, current_user_ns());
+ return 0;
}
-static struct dentry *proc_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int proc_get_tree(struct fs_context *fc)
{
- struct pid_namespace *ns;
+ struct proc_fs_context *ctx = fc->fs_private;
- if (flags & SB_KERNMOUNT) {
- ns = data;
- data = NULL;
- } else {
- ns = task_active_pid_ns(current);
- }
+ return get_tree_keyed(fc, proc_fill_super, ctx->pid_ns);
+}
- return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super);
+static void proc_fs_context_free(struct fs_context *fc)
+{
+ struct proc_fs_context *ctx = fc->fs_private;
+
+ put_pid_ns(ctx->pid_ns);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations proc_fs_context_ops = {
+ .free = proc_fs_context_free,
+ .parse_param = proc_parse_param,
+ .get_tree = proc_get_tree,
+ .reconfigure = proc_reconfigure,
+};
+
+static int proc_init_fs_context(struct fs_context *fc)
+{
+ struct proc_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
+ fc->fs_private = ctx;
+ fc->ops = &proc_fs_context_ops;
+ return 0;
}
static void proc_kill_sb(struct super_block *sb)
@@ -115,10 +205,11 @@
}
static struct file_system_type proc_fs_type = {
- .name = "proc",
- .mount = proc_mount,
- .kill_sb = proc_kill_sb,
- .fs_flags = FS_USERNS_MOUNT,
+ .name = "proc",
+ .init_fs_context = proc_init_fs_context,
+ .parameters = &proc_fs_parameters,
+ .kill_sb = proc_kill_sb,
+ .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
};
void __init proc_root_init(void)
@@ -154,9 +245,9 @@
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
{
- if (!proc_pid_lookup(dir, dentry, flags))
+ if (!proc_pid_lookup(dentry, flags))
return NULL;
-
+
return proc_lookup(dir, dentry, flags);
}
@@ -209,9 +300,28 @@
int pid_ns_prepare_proc(struct pid_namespace *ns)
{
+ struct proc_fs_context *ctx;
+ struct fs_context *fc;
struct vfsmount *mnt;
- mnt = kern_mount_data(&proc_fs_type, ns);
+ fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT);
+ if (IS_ERR(fc))
+ return PTR_ERR(fc);
+
+ if (fc->user_ns != ns->user_ns) {
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(ns->user_ns);
+ }
+
+ ctx = fc->fs_private;
+ if (ctx->pid_ns != ns) {
+ put_pid_ns(ctx->pid_ns);
+ get_pid_ns(ns);
+ ctx->pid_ns = ns;
+ }
+
+ mnt = fc_mount(fc);
+ put_fs_context(fc);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 127265e..57c0a10 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -38,6 +38,7 @@
struct inode *root_inode = d_inode(s->s_root);
struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *self;
+ int ret = -ENOMEM;
inode_lock(root_inode);
self = d_alloc_name(s->s_root, "self");
@@ -51,20 +52,19 @@
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_op = &proc_self_inode_operations;
d_add(self, inode);
+ ret = 0;
} else {
dput(self);
- self = ERR_PTR(-ENOMEM);
}
- } else {
- self = ERR_PTR(-ENOMEM);
}
inode_unlock(root_inode);
- if (IS_ERR(self)) {
+
+ if (ret)
pr_err("proc_fill_super: can't allocate /proc/self\n");
- return PTR_ERR(self);
- }
- ns->proc_self = self;
- return 0;
+ else
+ ns->proc_self = self;
+
+ return ret;
}
void __init proc_self_init(void)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 535eda7..80c305f 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -23,21 +23,21 @@
#ifdef arch_idle_time
-static u64 get_idle_time(int cpu)
+static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
{
u64 idle;
- idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+ idle = kcs->cpustat[CPUTIME_IDLE];
if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
idle += arch_idle_time(cpu);
return idle;
}
-static u64 get_iowait_time(int cpu)
+static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
{
u64 iowait;
- iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+ iowait = kcs->cpustat[CPUTIME_IOWAIT];
if (cpu_online(cpu) && nr_iowait_cpu(cpu))
iowait += arch_idle_time(cpu);
return iowait;
@@ -45,7 +45,7 @@
#else
-static u64 get_idle_time(int cpu)
+static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
{
u64 idle, idle_usecs = -1ULL;
@@ -54,14 +54,14 @@
if (idle_usecs == -1ULL)
/* !NO_HZ or cpu offline so we can rely on cpustat.idle */
- idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+ idle = kcs->cpustat[CPUTIME_IDLE];
else
idle = idle_usecs * NSEC_PER_USEC;
return idle;
}
-static u64 get_iowait_time(int cpu)
+static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
{
u64 iowait, iowait_usecs = -1ULL;
@@ -70,7 +70,7 @@
if (iowait_usecs == -1ULL)
/* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
- iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+ iowait = kcs->cpustat[CPUTIME_IOWAIT];
else
iowait = iowait_usecs * NSEC_PER_USEC;
@@ -79,6 +79,31 @@
#endif
+static void show_irq_gap(struct seq_file *p, unsigned int gap)
+{
+ static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";
+
+ while (gap > 0) {
+ unsigned int inc;
+
+ inc = min_t(unsigned int, gap, ARRAY_SIZE(zeros) / 2);
+ seq_write(p, zeros, 2 * inc);
+ gap -= inc;
+ }
+}
+
+static void show_all_irqs(struct seq_file *p)
+{
+ unsigned int i, next = 0;
+
+ for_each_active_irq(i) {
+ show_irq_gap(p, i - next);
+ seq_put_decimal_ull(p, " ", kstat_irqs_usr(i));
+ next = i + 1;
+ }
+ show_irq_gap(p, nr_irqs - next);
+}
+
static int show_stat(struct seq_file *p, void *v)
{
int i, j;
@@ -95,16 +120,18 @@
getboottime64(&boottime);
for_each_possible_cpu(i) {
- user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
- nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
- system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
- idle += get_idle_time(i);
- iowait += get_iowait_time(i);
- irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
- softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
- steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
- guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
- guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+ struct kernel_cpustat *kcs = &kcpustat_cpu(i);
+
+ user += kcs->cpustat[CPUTIME_USER];
+ nice += kcs->cpustat[CPUTIME_NICE];
+ system += kcs->cpustat[CPUTIME_SYSTEM];
+ idle += get_idle_time(kcs, i);
+ iowait += get_iowait_time(kcs, i);
+ irq += kcs->cpustat[CPUTIME_IRQ];
+ softirq += kcs->cpustat[CPUTIME_SOFTIRQ];
+ steal += kcs->cpustat[CPUTIME_STEAL];
+ guest += kcs->cpustat[CPUTIME_GUEST];
+ guest_nice += kcs->cpustat[CPUTIME_GUEST_NICE];
sum += kstat_cpu_irqs_sum(i);
sum += arch_irq_stat_cpu(i);
@@ -130,17 +157,19 @@
seq_putc(p, '\n');
for_each_online_cpu(i) {
+ struct kernel_cpustat *kcs = &kcpustat_cpu(i);
+
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
- user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
- nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
- system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
- idle = get_idle_time(i);
- iowait = get_iowait_time(i);
- irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
- softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
- steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
- guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
- guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+ user = kcs->cpustat[CPUTIME_USER];
+ nice = kcs->cpustat[CPUTIME_NICE];
+ system = kcs->cpustat[CPUTIME_SYSTEM];
+ idle = get_idle_time(kcs, i);
+ iowait = get_iowait_time(kcs, i);
+ irq = kcs->cpustat[CPUTIME_IRQ];
+ softirq = kcs->cpustat[CPUTIME_SOFTIRQ];
+ steal = kcs->cpustat[CPUTIME_STEAL];
+ guest = kcs->cpustat[CPUTIME_GUEST];
+ guest_nice = kcs->cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, "cpu%d", i);
seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
@@ -156,9 +185,7 @@
}
seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
- /* sum again ? it could be updated? */
- for_each_irq_nr(j)
- seq_put_decimal_ull(p, " ", kstat_irqs_usr(j));
+ show_all_irqs(p);
seq_printf(p,
"\nctxt %llu\n"
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a027473..9442631 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
#include <linux/vmacache.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
@@ -59,7 +59,7 @@
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
- SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+ SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
@@ -166,7 +166,11 @@
if (!mm || !mmget_not_zero(mm))
return NULL;
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ mmput(mm);
+ return ERR_PTR(-EINTR);
+ }
+
hold_task_mempolicy(priv);
priv->tail_vma = get_gate_vma(mm);
@@ -413,21 +417,58 @@
unsigned long lazyfree;
unsigned long anonymous_thp;
unsigned long shmem_thp;
+ unsigned long file_thp;
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
u64 pss;
+ u64 pss_anon;
+ u64 pss_file;
+ u64 pss_shmem;
u64 pss_locked;
u64 swap_pss;
bool check_shmem_swap;
};
-static void smaps_account(struct mem_size_stats *mss, struct page *page,
- bool compound, bool young, bool dirty)
+static void smaps_page_accumulate(struct mem_size_stats *mss,
+ struct page *page, unsigned long size, unsigned long pss,
+ bool dirty, bool locked, bool private)
{
- int i, nr = compound ? 1 << compound_order(page) : 1;
+ mss->pss += pss;
+
+ if (PageAnon(page))
+ mss->pss_anon += pss;
+ else if (PageSwapBacked(page))
+ mss->pss_shmem += pss;
+ else
+ mss->pss_file += pss;
+
+ if (locked)
+ mss->pss_locked += pss;
+
+ if (dirty || PageDirty(page)) {
+ if (private)
+ mss->private_dirty += size;
+ else
+ mss->shared_dirty += size;
+ } else {
+ if (private)
+ mss->private_clean += size;
+ else
+ mss->shared_clean += size;
+ }
+}
+
+static void smaps_account(struct mem_size_stats *mss, struct page *page,
+ bool compound, bool young, bool dirty, bool locked)
+{
+ int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE;
+ /*
+ * First accumulate quantities that depend only on |size| and the type
+ * of the compound page.
+ */
if (PageAnon(page)) {
mss->anonymous += size;
if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
@@ -440,35 +481,25 @@
mss->referenced += size;
/*
+ * Then accumulate quantities that may depend on sharing, or that may
+ * differ page-by-page.
+ *
* page_count(page) == 1 guarantees the page is mapped exactly once.
* If any subpage of the compound page mapped with PTE it would elevate
* page_count().
*/
if (page_count(page) == 1) {
- if (dirty || PageDirty(page))
- mss->private_dirty += size;
- else
- mss->private_clean += size;
- mss->pss += (u64)size << PSS_SHIFT;
+ smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
+ locked, true);
return;
}
-
for (i = 0; i < nr; i++, page++) {
int mapcount = page_mapcount(page);
-
- if (mapcount >= 2) {
- if (dirty || PageDirty(page))
- mss->shared_dirty += PAGE_SIZE;
- else
- mss->shared_clean += PAGE_SIZE;
- mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
- } else {
- if (dirty || PageDirty(page))
- mss->private_dirty += PAGE_SIZE;
- else
- mss->private_clean += PAGE_SIZE;
- mss->pss += PAGE_SIZE << PSS_SHIFT;
- }
+ unsigned long pss = PAGE_SIZE << PSS_SHIFT;
+ if (mapcount >= 2)
+ pss /= mapcount;
+ smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked,
+ mapcount < 2);
}
}
@@ -483,13 +514,16 @@
return 0;
}
-#endif
+#else
+#define smaps_pte_hole NULL
+#endif /* CONFIG_SHMEM */
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct mm_walk *walk)
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
+ bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
if (pte_present(*pte)) {
@@ -521,7 +555,7 @@
if (!page)
return;
- if (radix_tree_exceptional_entry(page))
+ if (xa_is_value(page))
mss->swap += PAGE_SIZE;
else
put_page(page);
@@ -532,7 +566,7 @@
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
+ smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -541,6 +575,7 @@
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
+ bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page;
/* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -554,8 +589,8 @@
else if (is_zone_device_page(page))
/* pass */;
else
- VM_BUG_ON_PAGE(1, page);
- smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
+ mss->file_thp += HPAGE_PMD_SIZE;
+ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -697,21 +732,24 @@
}
return 0;
}
+#else
+#define smaps_hugetlb_range NULL
#endif /* HUGETLB_PAGE */
+static const struct mm_walk_ops smaps_walk_ops = {
+ .pmd_entry = smaps_pte_range,
+ .hugetlb_entry = smaps_hugetlb_range,
+};
+
+static const struct mm_walk_ops smaps_shmem_walk_ops = {
+ .pmd_entry = smaps_pte_range,
+ .hugetlb_entry = smaps_hugetlb_range,
+ .pte_hole = smaps_pte_hole,
+};
+
static void smap_gather_stats(struct vm_area_struct *vma,
struct mem_size_stats *mss)
{
- struct mm_walk smaps_walk = {
- .pmd_entry = smaps_pte_range,
-#ifdef CONFIG_HUGETLB_PAGE
- .hugetlb_entry = smaps_hugetlb_range,
-#endif
- .mm = vma->vm_mm,
- };
-
- smaps_walk.private = mss;
-
#ifdef CONFIG_SHMEM
/* In case of smaps_rollup, reset the value from previous vma */
mss->check_shmem_swap = false;
@@ -733,25 +771,36 @@
mss->swap += shmem_swapped;
} else {
mss->check_shmem_swap = true;
- smaps_walk.pte_hole = smaps_pte_hole;
+ walk_page_vma(vma, &smaps_shmem_walk_ops, mss);
+ return;
}
}
#endif
-
/* mmap_sem is held in m_start */
- walk_page_vma(vma, &smaps_walk);
- if (vma->vm_flags & VM_LOCKED)
- mss->pss_locked += mss->pss;
+ walk_page_vma(vma, &smaps_walk_ops, mss);
}
#define SEQ_PUT_DEC(str, val) \
seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
/* Show the contents common for smaps and smaps_rollup */
-static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss)
+static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
+ bool rollup_mode)
{
SEQ_PUT_DEC("Rss: ", mss->resident);
SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
+ if (rollup_mode) {
+ /*
+ * These are meaningful only for smaps_rollup, otherwise two of
+ * them are zero, and the other one is the same as Pss.
+ */
+ SEQ_PUT_DEC(" kB\nPss_Anon: ",
+ mss->pss_anon >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nPss_File: ",
+ mss->pss_file >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nPss_Shmem: ",
+ mss->pss_shmem >> PSS_SHIFT);
+ }
SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
@@ -761,6 +810,7 @@
SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+ SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
mss->private_hugetlb >> 10, 7);
@@ -788,7 +838,10 @@
SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
seq_puts(m, " kB\n");
- __show_smap(m, &mss);
+ __show_smap(m, &mss, false);
+
+ seq_printf(m, "THPeligible: %d\n",
+ transparent_hugepage_enabled(vma));
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
@@ -820,7 +873,10 @@
memset(&mss, 0, sizeof(mss));
- down_read(&mm->mmap_sem);
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret)
+ goto out_put_mm;
+
hold_task_mempolicy(priv);
for (vma = priv->mm->mmap; vma; vma = vma->vm_next) {
@@ -833,12 +889,13 @@
seq_pad(m, ' ');
seq_puts(m, "[rollup]\n");
- __show_smap(m, &mss);
+ __show_smap(m, &mss, true);
release_task_mempolicy(priv);
up_read(&mm->mmap_sem);
- mmput(mm);
+out_put_mm:
+ mmput(mm);
out_put_task:
put_task_struct(priv->task);
priv->task = NULL;
@@ -940,10 +997,12 @@
pte_t ptent = *pte;
if (pte_present(ptent)) {
- ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte);
- ptent = pte_wrprotect(ptent);
+ pte_t old_pte;
+
+ old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
- ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent);
+ ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
@@ -1067,6 +1126,11 @@
return 0;
}
+static const struct mm_walk_ops clear_refs_walk_ops = {
+ .pmd_entry = clear_refs_pte_range,
+ .test_walk = clear_refs_test_walk,
+};
+
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -1096,15 +1160,10 @@
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
+ struct mmu_notifier_range range;
struct clear_refs_private cp = {
.type = type,
};
- struct mm_walk clear_refs_walk = {
- .pmd_entry = clear_refs_pte_range,
- .test_walk = clear_refs_test_walk,
- .mm = mm,
- .private = &cp,
- };
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
if (down_write_killable(&mm->mmap_sem)) {
@@ -1121,7 +1180,10 @@
goto out_mm;
}
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ count = -EINTR;
+ goto out_mm;
+ }
tlb_gather_mmu(&tlb, mm, 0, -1);
if (type == CLEAR_REFS_SOFT_DIRTY) {
for (vma = mm->mmap; vma; vma = vma->vm_next) {
@@ -1132,6 +1194,24 @@
count = -EINTR;
goto out_mm;
}
+ /*
+ * Avoid to modify vma->vm_flags
+ * without locked ops while the
+ * coredump reads the vm_flags.
+ */
+ if (!mmget_still_valid(mm)) {
+ /*
+ * Silently return "count"
+ * like if get_task_mm()
+ * failed. FIXME: should this
+ * function have returned
+ * -ESRCH if get_task_mm()
+ * failed like if
+ * get_proc_task() fails?
+ */
+ up_write(&mm->mmap_sem);
+ goto out_mm;
+ }
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
@@ -1139,11 +1219,15 @@
downgrade_write(&mm->mmap_sem);
break;
}
- mmu_notifier_invalidate_range_start(mm, 0, -1);
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+ 0, NULL, mm, 0, -1UL);
+ mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
+ walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
+ &cp);
if (type == CLEAR_REFS_SOFT_DIRTY)
- mmu_notifier_invalidate_range_end(mm, 0, -1);
+ mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, 0, -1);
up_read(&mm->mmap_sem);
out_mm:
@@ -1247,7 +1331,7 @@
if (pm->show_pfn)
frame = pte_pfn(pte);
flags |= PM_PRESENT;
- page = _vm_normal_page(vma, addr, pte, true);
+ page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
@@ -1413,8 +1497,16 @@
return err;
}
+#else
+#define pagemap_hugetlb_range NULL
#endif /* HUGETLB_PAGE */
+static const struct mm_walk_ops pagemap_ops = {
+ .pmd_entry = pagemap_pmd_range,
+ .pte_hole = pagemap_pte_hole,
+ .hugetlb_entry = pagemap_hugetlb_range,
+};
+
/*
* /proc/pid/pagemap - an array mapping virtual pages to pfns
*
@@ -1446,7 +1538,6 @@
{
struct mm_struct *mm = file->private_data;
struct pagemapread pm;
- struct mm_walk pagemap_walk = {};
unsigned long src;
unsigned long svpfn;
unsigned long start_vaddr;
@@ -1474,14 +1565,6 @@
if (!pm.buffer)
goto out_mm;
- pagemap_walk.pmd_entry = pagemap_pmd_range;
- pagemap_walk.pte_hole = pagemap_pte_hole;
-#ifdef CONFIG_HUGETLB_PAGE
- pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
-#endif
- pagemap_walk.mm = mm;
- pagemap_walk.private = ±
-
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
start_vaddr = svpfn << PAGE_SHIFT;
@@ -1507,8 +1590,10 @@
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
- down_read(&mm->mmap_sem);
- ret = walk_page_range(start_vaddr, end, &pagemap_walk);
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret)
+ goto out_free;
+ ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
up_read(&mm->mmap_sem);
start_vaddr = end;
@@ -1720,6 +1805,11 @@
}
#endif
+static const struct mm_walk_ops show_numa_ops = {
+ .hugetlb_entry = gather_hugetlb_stats,
+ .pmd_entry = gather_pte_stats,
+};
+
/*
* Display pages allocated per node and memory policy via /proc.
*/
@@ -1731,12 +1821,6 @@
struct numa_maps *md = &numa_priv->md;
struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
- struct mm_walk walk = {
- .hugetlb_entry = gather_hugetlb_stats,
- .pmd_entry = gather_pte_stats,
- .private = md,
- .mm = mm,
- };
struct mempolicy *pol;
char buffer[64];
int nid;
@@ -1770,7 +1854,7 @@
seq_puts(m, " huge");
/* mmap_sem is held by m_start */
- walk_page_vma(vma, &walk);
+ walk_page_vma(vma, &show_numa_ops, md);
if (!md->pages)
goto out;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 0b63d68..7907e64 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -64,7 +64,7 @@
else
bytes += kobjsize(current->files);
- if (current->sighand && atomic_read(¤t->sighand->count) > 1)
+ if (current->sighand && refcount_read(¤t->sighand->count) > 1)
sbytes += kobjsize(current->sighand);
else
bytes += kobjsize(current->sighand);
@@ -178,7 +178,7 @@
seq_file_path(m, file, "");
} else if (mm && is_stack(vma)) {
seq_pad(m, ' ');
- seq_printf(m, "[stack]");
+ seq_puts(m, "[stack]");
}
seq_putc(m, '\n');
@@ -211,7 +211,11 @@
if (!mm || !mmget_not_zero(mm))
return NULL;
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ mmput(mm);
+ return ERR_PTR(-EINTR);
+ }
+
/* start from the Nth VMA */
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
if (n-- == 0)
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index b905010..f61ae53 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -38,6 +38,7 @@
struct inode *root_inode = d_inode(s->s_root);
struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *thread_self;
+ int ret = -ENOMEM;
inode_lock(root_inode);
thread_self = d_alloc_name(s->s_root, "thread-self");
@@ -51,20 +52,19 @@
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_op = &proc_thread_self_inode_operations;
d_add(thread_self, inode);
+ ret = 0;
} else {
dput(thread_self);
- thread_self = ERR_PTR(-ENOMEM);
}
- } else {
- thread_self = ERR_PTR(-ENOMEM);
}
inode_unlock(root_inode);
- if (IS_ERR(thread_self)) {
+
+ if (ret)
pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
- return PTR_ERR(thread_self);
- }
- ns->proc_thread_self = thread_self;
- return 0;
+ else
+ ns->proc_thread_self = thread_self;
+
+ return ret;
}
void __init proc_thread_self_init(void)
diff --git a/fs/proc/util.c b/fs/proc/util.c
index b161cfa..98f8adc 100644
--- a/fs/proc/util.c
+++ b/fs/proc/util.c
@@ -1,4 +1,5 @@
#include <linux/dcache.h>
+#include "internal.h"
unsigned name_to_int(const struct qstr *qstr)
{
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index cbde728..7b13988 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* fs/proc/vmcore.c Interface for accessing the crash
* dump from the system's previous life.
@@ -16,14 +17,17 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/printk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/init.h>
#include <linux/crash_dump.h>
#include <linux/list.h>
+#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
+#include <linux/mem_encrypt.h>
+#include <asm/pgtable.h>
#include <asm/io.h>
#include "internal.h"
@@ -51,6 +55,9 @@
/* Device Dump list and mutex to synchronize access to list */
static LIST_HEAD(vmcoredd_list);
static DEFINE_MUTEX(vmcoredd_mutex);
+
+static bool vmcoredd_disabled;
+core_param(novmcoredd, vmcoredd_disabled, bool, 0);
#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
/* Device Dump Size */
@@ -97,8 +104,9 @@
}
/* Reads a page from the oldmem device from given offset. */
-static ssize_t read_from_oldmem(char *buf, size_t count,
- u64 *ppos, int userbuf)
+ssize_t read_from_oldmem(char *buf, size_t count,
+ u64 *ppos, int userbuf,
+ bool encrypted)
{
unsigned long pfn, offset;
size_t nr_bytes;
@@ -120,8 +128,15 @@
if (pfn_is_ram(pfn) == 0)
memset(buf, 0, nr_bytes);
else {
- tmp = copy_oldmem_page(pfn, buf, nr_bytes,
- offset, userbuf);
+ if (encrypted)
+ tmp = copy_oldmem_page_encrypted(pfn, buf,
+ nr_bytes,
+ offset,
+ userbuf);
+ else
+ tmp = copy_oldmem_page(pfn, buf, nr_bytes,
+ offset, userbuf);
+
if (tmp < 0)
return tmp;
}
@@ -155,7 +170,7 @@
*/
ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0);
+ return read_from_oldmem(buf, count, ppos, 0, false);
}
/*
@@ -163,7 +178,7 @@
*/
ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0);
+ return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active());
}
/*
@@ -173,10 +188,21 @@
unsigned long from, unsigned long pfn,
unsigned long size, pgprot_t prot)
{
+ prot = pgprot_encrypted(prot);
return remap_pfn_range(vma, from, pfn, size, prot);
}
/*
+ * Architectures which support memory encryption override this.
+ */
+ssize_t __weak
+copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
+{
+ return copy_oldmem_page(pfn, buf, csize, offset, userbuf);
+}
+
+/*
* Copy to either kernel or user space
*/
static int copy_to(void *target, void *src, size_t size, int userbuf)
@@ -351,7 +377,8 @@
m->offset + m->size - *fpos,
buflen);
start = m->paddr + *fpos - m->offset;
- tmp = read_from_oldmem(buffer, tsz, &start, userbuf);
+ tmp = read_from_oldmem(buffer, tsz, &start,
+ userbuf, mem_encrypt_active());
if (tmp < 0)
return tmp;
buflen -= tsz;
@@ -401,7 +428,7 @@
if (rc < 0) {
unlock_page(page);
put_page(page);
- return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ return vmf_error(rc);
}
SetPageUptodate(page);
}
@@ -1429,6 +1456,11 @@
size_t data_size;
int ret;
+ if (vmcoredd_disabled) {
+ pr_err_once("Device dump is disabled\n");
+ return -EINVAL;
+ }
+
if (!data || !strlen(data->dump_name) ||
!data->vmcoredd_callback || !data->size)
return -EINVAL;