Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index a82ec01..2f2a8ce 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -170,8 +170,8 @@
PROVIDE (end = .);
STABS_DEBUG
-
DWARF_DEBUG
+ ELF_DETAILS
DISCARDS
}
diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c
index 369fd84..43edc2a 100644
--- a/arch/um/kernel/exitcode.c
+++ b/arch/um/kernel/exitcode.c
@@ -55,20 +55,19 @@
return count;
}
-static const struct file_operations exitcode_proc_fops = {
- .owner = THIS_MODULE,
- .open = exitcode_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .write = exitcode_proc_write,
+static const struct proc_ops exitcode_proc_ops = {
+ .proc_open = exitcode_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+ .proc_write = exitcode_proc_write,
};
static int make_proc_exitcode(void)
{
struct proc_dir_entry *ent;
- ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_fops);
+ ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_ops);
if (ent == NULL) {
printk(KERN_WARNING "make_proc_exitcode : Failed to register "
"/proc/exitcode\n");
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index 98bdf69..e4abac6 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -9,20 +9,19 @@
enum kmsg_dump_reason reason)
{
static char line[1024];
-
+ struct console *con;
size_t len = 0;
- bool con_available = false;
/* only dump kmsg when no console is available */
if (!console_trylock())
return;
- if (console_drivers != NULL)
- con_available = true;
+ for_each_console(con)
+ break;
console_unlock();
- if (con_available == true)
+ if (con)
return;
printf("kmsg_dump:\n");
diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
index 67b2e0f..8ccd568 100644
--- a/arch/um/kernel/maccess.c
+++ b/arch/um/kernel/maccess.c
@@ -7,15 +7,13 @@
#include <linux/kernel.h>
#include <os.h>
-long probe_kernel_read(void *dst, const void *src, size_t size)
+bool copy_from_kernel_nofault_allowed(const void *src, size_t size)
{
void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
if ((unsigned long)src < PAGE_SIZE || size <= 0)
- return -EFAULT;
-
+ return false;
if (os_mincore(psrc, size + src - psrc) <= 0)
- return -EFAULT;
-
- return __probe_kernel_read(dst, src, size);
+ return false;
+ return true;
}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 417ff64..9242dc9 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -96,6 +96,7 @@
pgd_t *pgd_base)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
int i, j;
@@ -107,7 +108,8 @@
pgd = pgd_base + i;
for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
if (pud_none(*pud))
one_md_table_init(pud);
pmd = pmd_offset(pud, vaddr);
@@ -123,9 +125,6 @@
{
#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
long size = FIXADDR_USER_END - FIXADDR_USER_START;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
pte_t *pte;
phys_t p;
unsigned long v, vaddr = FIXADDR_USER_START;
@@ -143,10 +142,7 @@
p = __pa(v);
for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
p += PAGE_SIZE) {
- pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
- pmd = pmd_offset(pud, vaddr);
- pte = pte_offset_kernel(pmd, vaddr);
+ pte = virt_to_kpte(vaddr);
pte_set_val(*pte, p, PAGE_READONLY);
}
#endif
@@ -154,8 +150,8 @@
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES], vaddr;
- int i;
+ unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
+ unsigned long vaddr;
empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE,
PAGE_SIZE);
@@ -163,12 +159,8 @@
panic("%s: Failed to allocate %lu bytes align=%lx\n",
__func__, PAGE_SIZE, PAGE_SIZE);
- for (i = 0; i < ARRAY_SIZE(zones_size); i++)
- zones_size[i] = 0;
-
- zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) -
- (uml_physmem >> PAGE_SHIFT);
- free_area_init(zones_size);
+ max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT;
+ free_area_init(max_zone_pfn);
/*
* Fixed mappings, only the page table structure has to be
@@ -204,23 +196,6 @@
return pgd;
}
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- free_page((unsigned long) pgd);
-}
-
-#ifdef CONFIG_3_LEVEL_PGTABLES
-pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-{
- pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
-
- if (pmd)
- memset(pmd, 0, PAGE_SIZE);
-
- return pmd;
-}
-#endif
-
void *uml_kmalloc(int size, int flags)
{
return kmalloc(size, flags);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 17045e7..9505a7e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -25,14 +25,13 @@
#include <linux/threads.h>
#include <linux/tracehook.h>
#include <asm/current.h>
-#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include <linux/uaccess.h>
#include <as-layout.h>
#include <kern_util.h>
#include <os.h>
#include <skas.h>
-#include <timer-internal.h>
+#include <linux/time-internal.h>
/*
* This is a per-cpu array. A processor only modifies its entry and it only
@@ -102,7 +101,7 @@
schedule();
if (test_thread_flag(TIF_SIGPENDING))
do_signal(regs);
- if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
+ if (test_thread_flag(TIF_NOTIFY_RESUME))
tracehook_notify_resume(regs);
}
@@ -153,7 +152,7 @@
userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs);
}
-int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct * p, unsigned long tls)
{
void (*handler)(void);
@@ -203,43 +202,6 @@
kmalloc_ok = save_kmalloc_ok;
}
-static void time_travel_sleep(unsigned long long duration)
-{
- unsigned long long next = time_travel_time + duration;
-
- if (time_travel_mode != TT_MODE_INFCPU)
- os_timer_disable();
-
- while (time_travel_timer_mode == TT_TMR_PERIODIC &&
- time_travel_timer_expiry < time_travel_time)
- time_travel_set_timer_expiry(time_travel_timer_expiry +
- time_travel_timer_interval);
-
- if (time_travel_timer_mode != TT_TMR_DISABLED &&
- time_travel_timer_expiry < next) {
- if (time_travel_timer_mode == TT_TMR_ONESHOT)
- time_travel_set_timer_mode(TT_TMR_DISABLED);
- /*
- * In basic mode, time_travel_time will be adjusted in
- * the timer IRQ handler so it works even when the signal
- * comes from the OS timer, see there.
- */
- if (time_travel_mode != TT_MODE_BASIC)
- time_travel_set_time(time_travel_timer_expiry);
-
- deliver_alarm();
- } else {
- time_travel_set_time(next);
- }
-
- if (time_travel_mode != TT_MODE_INFCPU) {
- if (time_travel_timer_mode == TT_TMR_PERIODIC)
- os_timer_set_interval(time_travel_timer_interval);
- else if (time_travel_timer_mode == TT_TMR_ONESHOT)
- os_timer_one_shot(time_travel_timer_expiry - next);
- }
-}
-
static void um_idle_sleep(void)
{
unsigned long long duration = UM_NSEC_PER_SEC;
@@ -255,7 +217,7 @@
{
cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
um_idle_sleep();
- local_irq_enable();
+ raw_local_irq_enable();
}
int __cant_sleep(void) {
@@ -348,13 +310,12 @@
return count;
}
-static const struct file_operations sysemu_proc_fops = {
- .owner = THIS_MODULE,
- .open = sysemu_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .write = sysemu_proc_write,
+static const struct proc_ops sysemu_proc_ops = {
+ .proc_open = sysemu_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+ .proc_write = sysemu_proc_write,
};
int __init make_proc_sysemu(void)
@@ -363,7 +324,7 @@
if (!sysemu_supported)
return 0;
- ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_fops);
+ ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops);
if (ent == NULL)
{
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 3d57c71..88cd9b5 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -70,7 +70,7 @@
PT_REGS_SYSCALL_RET(regs) = -EINTR;
break;
}
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOINTR:
PT_REGS_RESTART_SYSCALL(regs);
PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs);
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index b5e3d91..d996116 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -9,7 +9,6 @@
#include <linux/slab.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/sections.h>
#include <as-layout.h>
#include <os.h>
@@ -19,15 +18,21 @@
unsigned long kernel)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, proc);
- pud = pud_alloc(mm, pgd, proc);
- if (!pud)
+
+ p4d = p4d_alloc(mm, pgd, proc);
+ if (!p4d)
goto out;
+ pud = pud_alloc(mm, p4d, proc);
+ if (!pud)
+ goto out_pud;
+
pmd = pmd_alloc(mm, pud, proc);
if (!pmd)
goto out_pmd;
@@ -44,6 +49,8 @@
pmd_free(mm, pmd);
out_pmd:
pud_free(mm, pud);
+ out_pud:
+ p4d_free(mm, p4d);
out:
return -ENOMEM;
}
@@ -107,7 +114,7 @@
mm->context.stub_pages[0] = virt_to_page(__syscall_stub_start);
mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack);
- /* dup_mmap already holds mmap_sem */
+ /* dup_mmap already holds mmap_lock */
err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
VM_READ | VM_MAYREAD | VM_EXEC |
VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP,
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index f574b18..3d91f89 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -10,7 +10,8 @@
#include <sysdep/ptrace.h>
#include <sysdep/ptrace_user.h>
#include <sysdep/syscalls.h>
-#include <shared/timer-internal.h>
+#include <linux/time-internal.h>
+#include <asm/unistd.h>
void handle_syscall(struct uml_pt_regs *r)
{
@@ -24,7 +25,8 @@
* went to sleep, even if said userspace interacts with the kernel in
* various ways.
*/
- if (time_travel_mode == TT_MODE_INFCPU)
+ if (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL)
schedule();
/* Initialize the syscall number and default return value. */
@@ -35,7 +37,7 @@
goto out;
/* Do the seccomp check after ptrace; failures should be fast. */
- if (secure_computing(NULL) == -1)
+ if (secure_computing() == -1)
goto out;
syscall = UPT_SYSCALL_NR(r);
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 3236052..2dec915 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -10,13 +10,13 @@
#include <linux/sched.h>
#include <asm/current.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <kern_util.h>
#include <os.h>
pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -27,7 +27,11 @@
if (!pgd_present(*pgd))
return NULL;
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d))
+ return NULL;
+
+ pud = pud_offset(p4d, addr);
if (!pud_present(*pud))
return NULL;
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index c71b5ef..7452f70 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -17,7 +17,9 @@
static void _print_addr(void *data, unsigned long address, int reliable)
{
- pr_info(" [<%08lx>] %s%pS\n", address, reliable ? "" : "? ",
+ const char *loglvl = data;
+
+ printk("%s [<%08lx>] %s%pS\n", loglvl, address, reliable ? "" : "? ",
(void *)address);
}
@@ -25,9 +27,9 @@
.address = _print_addr
};
-void show_stack(struct task_struct *task, unsigned long *stack)
+void show_stack(struct task_struct *task, unsigned long *stack,
+ const char *loglvl)
{
- unsigned long *sp = stack;
struct pt_regs *segv_regs = current->thread.segv_regs;
int i;
@@ -38,10 +40,9 @@
}
if (!stack)
- sp = get_stack_pointer(task, segv_regs);
+ stack = get_stack_pointer(task, segv_regs);
- pr_info("Stack:\n");
- stack = sp;
+ printk("%sStack:\n", loglvl);
for (i = 0; i < 3 * STACKSLOTS_PER_LINE; i++) {
if (kstack_end(stack))
break;
@@ -49,9 +50,7 @@
pr_cont("\n");
pr_cont(" %08lx", *stack++);
}
- pr_cont("\n");
- pr_info("Call Trace:\n");
- dump_trace(current, &stackops, NULL);
- pr_info("\n");
+ printk("%sCall Trace:\n", loglvl);
+ dump_trace(current, &stackops, (void *)loglvl);
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 94ea87b..8dafc3f 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -4,6 +4,7 @@
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2012-2014 Cisco Systems
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2019 Intel Corporation
*/
#include <linux/clockchips.h>
@@ -18,21 +19,484 @@
#include <asm/param.h>
#include <kern_util.h>
#include <os.h>
-#include <timer-internal.h>
+#include <linux/time-internal.h>
+#include <linux/um_timetravel.h>
#include <shared/init.h>
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
enum time_travel_mode time_travel_mode;
-unsigned long long time_travel_time;
-enum time_travel_timer_mode time_travel_timer_mode;
-unsigned long long time_travel_timer_expiry;
-unsigned long long time_travel_timer_interval;
+EXPORT_SYMBOL_GPL(time_travel_mode);
static bool time_travel_start_set;
static unsigned long long time_travel_start;
-#else
+static unsigned long long time_travel_time;
+static LIST_HEAD(time_travel_events);
+static unsigned long long time_travel_timer_interval;
+static unsigned long long time_travel_next_event;
+static struct time_travel_event time_travel_timer_event;
+static int time_travel_ext_fd = -1;
+static unsigned int time_travel_ext_waiting;
+static bool time_travel_ext_prev_request_valid;
+static unsigned long long time_travel_ext_prev_request;
+static bool time_travel_ext_free_until_valid;
+static unsigned long long time_travel_ext_free_until;
+
+static void time_travel_set_time(unsigned long long ns)
+{
+ if (unlikely(ns < time_travel_time))
+ panic("time-travel: time goes backwards %lld -> %lld\n",
+ time_travel_time, ns);
+ time_travel_time = ns;
+}
+
+enum time_travel_message_handling {
+ TTMH_IDLE,
+ TTMH_POLL,
+ TTMH_READ,
+};
+
+static void time_travel_handle_message(struct um_timetravel_msg *msg,
+ enum time_travel_message_handling mode)
+{
+ struct um_timetravel_msg resp = {
+ .op = UM_TIMETRAVEL_ACK,
+ };
+ int ret;
+
+ /*
+ * Poll outside the locked section (if we're not called to only read
+ * the response) so we can get interrupts for e.g. virtio while we're
+ * here, but then we need to lock to not get interrupted between the
+ * read of the message and write of the ACK.
+ */
+ if (mode != TTMH_READ) {
+ bool disabled = irqs_disabled();
+
+ BUG_ON(mode == TTMH_IDLE && !disabled);
+
+ if (disabled)
+ local_irq_enable();
+ while (os_poll(1, &time_travel_ext_fd) != 0) {
+ /* nothing */
+ }
+ if (disabled)
+ local_irq_disable();
+ }
+
+ ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+
+ if (ret == 0)
+ panic("time-travel external link is broken\n");
+ if (ret != sizeof(*msg))
+ panic("invalid time-travel message - %d bytes\n", ret);
+
+ switch (msg->op) {
+ default:
+ WARN_ONCE(1, "time-travel: unexpected message %lld\n",
+ (unsigned long long)msg->op);
+ break;
+ case UM_TIMETRAVEL_ACK:
+ return;
+ case UM_TIMETRAVEL_RUN:
+ time_travel_set_time(msg->time);
+ break;
+ case UM_TIMETRAVEL_FREE_UNTIL:
+ time_travel_ext_free_until_valid = true;
+ time_travel_ext_free_until = msg->time;
+ break;
+ }
+
+ resp.seq = msg->seq;
+ os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
+}
+
+static u64 time_travel_ext_req(u32 op, u64 time)
+{
+ static int seq;
+ int mseq = ++seq;
+ struct um_timetravel_msg msg = {
+ .op = op,
+ .time = time,
+ .seq = mseq,
+ };
+ unsigned long flags;
+
+ /*
+ * We need to save interrupts here and only restore when we
+ * got the ACK - otherwise we can get interrupted and send
+ * another request while we're still waiting for an ACK, but
+ * the peer doesn't know we got interrupted and will send
+ * the ACKs in the same order as the message, but we'd need
+ * to see them in the opposite order ...
+ *
+ * This wouldn't matter *too* much, but some ACKs carry the
+ * current time (for UM_TIMETRAVEL_GET) and getting another
+ * ACK without a time would confuse us a lot!
+ *
+ * The sequence number assignment that happens here lets us
+ * debug such message handling issues more easily.
+ */
+ local_irq_save(flags);
+ os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
+
+ while (msg.op != UM_TIMETRAVEL_ACK)
+ time_travel_handle_message(&msg, TTMH_READ);
+
+ if (msg.seq != mseq)
+ panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
+ msg.op, msg.seq, mseq, msg.time);
+
+ if (op == UM_TIMETRAVEL_GET)
+ time_travel_set_time(msg.time);
+ local_irq_restore(flags);
+
+ return msg.time;
+}
+
+void __time_travel_wait_readable(int fd)
+{
+ int fds[2] = { fd, time_travel_ext_fd };
+ int ret;
+
+ if (time_travel_mode != TT_MODE_EXTERNAL)
+ return;
+
+ while ((ret = os_poll(2, fds))) {
+ struct um_timetravel_msg msg;
+
+ if (ret == 1)
+ time_travel_handle_message(&msg, TTMH_READ);
+ }
+}
+EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
+
+static void time_travel_ext_update_request(unsigned long long time)
+{
+ if (time_travel_mode != TT_MODE_EXTERNAL)
+ return;
+
+ /* asked for exactly this time previously */
+ if (time_travel_ext_prev_request_valid &&
+ time == time_travel_ext_prev_request)
+ return;
+
+ time_travel_ext_prev_request = time;
+ time_travel_ext_prev_request_valid = true;
+ time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
+}
+
+void __time_travel_propagate_time(void)
+{
+ time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
+}
+EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
+
+/* returns true if we must do a wait to the simtime device */
+static bool time_travel_ext_request(unsigned long long time)
+{
+ /*
+ * If we received an external sync point ("free until") then we
+ * don't have to request/wait for anything until then, unless
+ * we're already waiting.
+ */
+ if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
+ time < time_travel_ext_free_until)
+ return false;
+
+ time_travel_ext_update_request(time);
+ return true;
+}
+
+static void time_travel_ext_wait(bool idle)
+{
+ struct um_timetravel_msg msg = {
+ .op = UM_TIMETRAVEL_ACK,
+ };
+
+ time_travel_ext_prev_request_valid = false;
+ time_travel_ext_waiting++;
+
+ time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
+
+ /*
+ * Here we are deep in the idle loop, so we have to break out of the
+ * kernel abstraction in a sense and implement this in terms of the
+ * UML system waiting on the VQ interrupt while sleeping, when we get
+ * the signal it'll call time_travel_ext_vq_notify_done() completing the
+ * call.
+ */
+ while (msg.op != UM_TIMETRAVEL_RUN)
+ time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
+
+ time_travel_ext_waiting--;
+
+ /* we might request more stuff while polling - reset when we run */
+ time_travel_ext_prev_request_valid = false;
+}
+
+static void time_travel_ext_get_time(void)
+{
+ time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
+}
+
+static void __time_travel_update_time(unsigned long long ns, bool idle)
+{
+ if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
+ time_travel_ext_wait(idle);
+ else
+ time_travel_set_time(ns);
+}
+
+static struct time_travel_event *time_travel_first_event(void)
+{
+ return list_first_entry_or_null(&time_travel_events,
+ struct time_travel_event,
+ list);
+}
+
+static void __time_travel_add_event(struct time_travel_event *e,
+ unsigned long long time)
+{
+ struct time_travel_event *tmp;
+ bool inserted = false;
+
+ if (e->pending)
+ return;
+
+ e->pending = true;
+ e->time = time;
+
+ list_for_each_entry(tmp, &time_travel_events, list) {
+ /*
+ * Add the new entry before one with higher time,
+ * or if they're equal and both on stack, because
+ * in that case we need to unwind the stack in the
+ * right order, and the later event (timer sleep
+ * or such) must be dequeued first.
+ */
+ if ((tmp->time > e->time) ||
+ (tmp->time == e->time && tmp->onstack && e->onstack)) {
+ list_add_tail(&e->list, &tmp->list);
+ inserted = true;
+ break;
+ }
+ }
+
+ if (!inserted)
+ list_add_tail(&e->list, &time_travel_events);
+
+ tmp = time_travel_first_event();
+ time_travel_ext_update_request(tmp->time);
+ time_travel_next_event = tmp->time;
+}
+
+static void time_travel_add_event(struct time_travel_event *e,
+ unsigned long long time)
+{
+ if (WARN_ON(!e->fn))
+ return;
+
+ __time_travel_add_event(e, time);
+}
+
+void time_travel_periodic_timer(struct time_travel_event *e)
+{
+ time_travel_add_event(&time_travel_timer_event,
+ time_travel_time + time_travel_timer_interval);
+ deliver_alarm();
+}
+
+static void time_travel_deliver_event(struct time_travel_event *e)
+{
+ if (e == &time_travel_timer_event) {
+ /*
+ * deliver_alarm() does the irq_enter/irq_exit
+ * by itself, so must handle it specially here
+ */
+ e->fn(e);
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ irq_enter();
+ e->fn(e);
+ irq_exit();
+ local_irq_restore(flags);
+ }
+}
+
+static bool time_travel_del_event(struct time_travel_event *e)
+{
+ if (!e->pending)
+ return false;
+ list_del(&e->list);
+ e->pending = false;
+ return true;
+}
+
+static void time_travel_update_time(unsigned long long next, bool idle)
+{
+ struct time_travel_event ne = {
+ .onstack = true,
+ };
+ struct time_travel_event *e;
+ bool finished = idle;
+
+ /* add it without a handler - we deal with that specifically below */
+ __time_travel_add_event(&ne, next);
+
+ do {
+ e = time_travel_first_event();
+
+ BUG_ON(!e);
+ __time_travel_update_time(e->time, idle);
+
+ /* new events may have been inserted while we were waiting */
+ if (e == time_travel_first_event()) {
+ BUG_ON(!time_travel_del_event(e));
+ BUG_ON(time_travel_time != e->time);
+
+ if (e == &ne) {
+ finished = true;
+ } else {
+ if (e->onstack)
+ panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
+ time_travel_time, e->time, e);
+ time_travel_deliver_event(e);
+ }
+ }
+
+ e = time_travel_first_event();
+ if (e)
+ time_travel_ext_update_request(e->time);
+ } while (ne.pending && !finished);
+
+ time_travel_del_event(&ne);
+}
+
+void time_travel_ndelay(unsigned long nsec)
+{
+ time_travel_update_time(time_travel_time + nsec, false);
+}
+EXPORT_SYMBOL(time_travel_ndelay);
+
+void time_travel_add_irq_event(struct time_travel_event *e)
+{
+ BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
+
+ time_travel_ext_get_time();
+ /*
+ * We could model interrupt latency here, for now just
+ * don't have any latency at all and request the exact
+ * same time (again) to run the interrupt...
+ */
+ time_travel_add_event(e, time_travel_time);
+}
+EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
+
+static void time_travel_oneshot_timer(struct time_travel_event *e)
+{
+ deliver_alarm();
+}
+
+void time_travel_sleep(unsigned long long duration)
+{
+ unsigned long long next = time_travel_time + duration;
+
+ if (time_travel_mode == TT_MODE_BASIC)
+ os_timer_disable();
+
+ time_travel_update_time(next, true);
+
+ if (time_travel_mode == TT_MODE_BASIC &&
+ time_travel_timer_event.pending) {
+ if (time_travel_timer_event.fn == time_travel_periodic_timer) {
+ /*
+ * This is somewhat wrong - we should get the first
+ * one sooner like the os_timer_one_shot() below...
+ */
+ os_timer_set_interval(time_travel_timer_interval);
+ } else {
+ os_timer_one_shot(time_travel_timer_event.time - next);
+ }
+ }
+}
+
+static void time_travel_handle_real_alarm(void)
+{
+ time_travel_set_time(time_travel_next_event);
+
+ time_travel_del_event(&time_travel_timer_event);
+
+ if (time_travel_timer_event.fn == time_travel_periodic_timer)
+ time_travel_add_event(&time_travel_timer_event,
+ time_travel_time +
+ time_travel_timer_interval);
+}
+
+static void time_travel_set_interval(unsigned long long interval)
+{
+ time_travel_timer_interval = interval;
+}
+
+static int time_travel_connect_external(const char *socket)
+{
+ const char *sep;
+ unsigned long long id = (unsigned long long)-1;
+ int rc;
+
+ if ((sep = strchr(socket, ':'))) {
+ char buf[25] = {};
+ if (sep - socket > sizeof(buf) - 1)
+ goto invalid_number;
+
+ memcpy(buf, socket, sep - socket);
+ if (kstrtoull(buf, 0, &id)) {
+invalid_number:
+ panic("time-travel: invalid external ID in string '%s'\n",
+ socket);
+ return -EINVAL;
+ }
+
+ socket = sep + 1;
+ }
+
+ rc = os_connect_socket(socket);
+ if (rc < 0) {
+ panic("time-travel: failed to connect to external socket %s\n",
+ socket);
+ return rc;
+ }
+
+ time_travel_ext_fd = rc;
+
+ time_travel_ext_req(UM_TIMETRAVEL_START, id);
+
+ return 1;
+}
+#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
#define time_travel_start_set 0
#define time_travel_start 0
+#define time_travel_time 0
+
+static inline void time_travel_update_time(unsigned long long ns, bool retearly)
+{
+}
+
+static inline void time_travel_handle_real_alarm(void)
+{
+}
+
+static void time_travel_set_interval(unsigned long long interval)
+{
+}
+
+/* fail link if this actually gets used */
+extern u64 time_travel_ext_req(u32 op, u64 time);
+
+/* these are empty macros so the struct/fn need not exist */
+#define time_travel_add_event(e, time) do { } while (0)
+#define time_travel_del_event(e) do { } while (0)
#endif
void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
@@ -48,7 +512,7 @@
* never get any real signals from the OS.
*/
if (time_travel_mode == TT_MODE_BASIC)
- time_travel_set_time(time_travel_timer_expiry);
+ time_travel_handle_real_alarm();
local_irq_save(flags);
do_IRQ(TIMER_IRQ, regs);
@@ -58,9 +522,10 @@
static int itimer_shutdown(struct clock_event_device *evt)
{
if (time_travel_mode != TT_MODE_OFF)
- time_travel_set_timer_mode(TT_TMR_DISABLED);
+ time_travel_del_event(&time_travel_timer_event);
- if (time_travel_mode != TT_MODE_INFCPU)
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL)
os_timer_disable();
return 0;
@@ -71,12 +536,16 @@
unsigned long long interval = NSEC_PER_SEC / HZ;
if (time_travel_mode != TT_MODE_OFF) {
- time_travel_set_timer_mode(TT_TMR_PERIODIC);
- time_travel_set_timer_expiry(time_travel_time + interval);
- time_travel_set_timer_interval(interval);
+ time_travel_del_event(&time_travel_timer_event);
+ time_travel_set_event_fn(&time_travel_timer_event,
+ time_travel_periodic_timer);
+ time_travel_set_interval(interval);
+ time_travel_add_event(&time_travel_timer_event,
+ time_travel_time + interval);
}
- if (time_travel_mode != TT_MODE_INFCPU)
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL)
os_timer_set_interval(interval);
return 0;
@@ -88,11 +557,15 @@
delta += 1;
if (time_travel_mode != TT_MODE_OFF) {
- time_travel_set_timer_mode(TT_TMR_ONESHOT);
- time_travel_set_timer_expiry(time_travel_time + delta);
+ time_travel_del_event(&time_travel_timer_event);
+ time_travel_set_event_fn(&time_travel_timer_event,
+ time_travel_oneshot_timer);
+ time_travel_add_event(&time_travel_timer_event,
+ time_travel_time + delta);
}
- if (time_travel_mode != TT_MODE_INFCPU)
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL)
return os_timer_one_shot(delta);
return 0;
@@ -143,8 +616,17 @@
* stuck in loops that expect time to move more than the
* exact requested sleep amount, e.g. python's socket server,
* see https://bugs.python.org/issue37026.
+ *
+ * However, don't do that when we're in interrupt or such as
+ * then we might recurse into our own processing, and get to
+ * even more waiting, and that's not good - it messes up the
+ * "what do I do next" and onstack event we use to know when
+ * to return from time_travel_update_time().
*/
- time_travel_set_time(time_travel_time + TIMER_MULTIPLIER);
+ if (!irqs_disabled() && !in_interrupt() && !in_softirq())
+ time_travel_update_time(time_travel_time +
+ TIMER_MULTIPLIER,
+ false);
return time_travel_time / TIMER_MULTIPLIER;
}
@@ -188,6 +670,8 @@
if (time_travel_start_set)
nsecs = time_travel_start + time_travel_time;
+ else if (time_travel_mode == TT_MODE_EXTERNAL)
+ nsecs = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
else
nsecs = os_persistent_clock_emulation();
@@ -204,7 +688,8 @@
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
unsigned long calibrate_delay_is_known(void)
{
- if (time_travel_mode == TT_MODE_INFCPU)
+ if (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL)
return 1;
return 0;
}
@@ -218,6 +703,13 @@
return 1;
}
+ if (strncmp(str, "=ext:", 5) == 0) {
+ time_travel_mode = TT_MODE_EXTERNAL;
+ timer_clockevent.name = "time-travel-timer-external";
+ timer_clocksource.name = "time-travel-clock-external";
+ return time_travel_connect_external(str + 5);
+ }
+
if (!*str) {
time_travel_mode = TT_MODE_BASIC;
timer_clockevent.name = "time-travel-timer";
@@ -242,7 +734,15 @@
"are no wall clock timers, and any CPU processing happens - as seen from the\n"
"guest - instantly. This can be useful for accurate simulation regardless of\n"
"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
-"easily lead to getting stuck (e.g. if anything in the system busy loops).\n");
+"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
+"\n"
+"time-travel=ext:[ID:]/path/to/socket\n"
+"This enables time travel mode similar to =inf-cpu, except the system will\n"
+"use the given socket to coordinate with a central scheduler, in order to\n"
+"have more than one system simultaneously be on simulated time. The virtio\n"
+"driver code in UML knows about this so you can also simulate networks and\n"
+"devices using it, assuming the device has the right capabilities.\n"
+"The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
int setup_time_travel_start(char *str)
{
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 1149913..5be1b0d 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -7,7 +7,6 @@
#include <linux/module.h>
#include <linux/sched/signal.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <as-layout.h>
#include <mem_user.h>
@@ -283,7 +282,7 @@
return ret;
}
-static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end,
struct host_vm_change *hvc)
{
@@ -291,7 +290,7 @@
unsigned long next;
int ret = 0;
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
@@ -305,6 +304,28 @@
return ret;
}
+static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end,
+ struct host_vm_change *hvc)
+{
+ p4d_t *p4d;
+ unsigned long next;
+ int ret = 0;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (!p4d_present(*p4d)) {
+ if (hvc->force || p4d_newpage(*p4d)) {
+ ret = add_munmap(addr, next - addr, hvc);
+ p4d_mkuptodate(*p4d);
+ }
+ } else
+ ret = update_pud_range(p4d, addr, next, hvc);
+ } while (p4d++, addr = next, ((addr < end) && !ret));
+ return ret;
+}
+
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
unsigned long end_addr, int force)
{
@@ -322,8 +343,8 @@
ret = add_munmap(addr, next - addr, &hvc);
pgd_mkuptodate(*pgd);
}
- }
- else ret = update_pud_range(pgd, addr, next, &hvc);
+ } else
+ ret = update_p4d_range(pgd, addr, next, &hvc);
} while (pgd++, addr = next, ((addr < end_addr) && !ret));
if (!ret)
@@ -331,12 +352,11 @@
/* This is not an else because ret is modified above */
if (ret) {
+ struct mm_id *mm_idp = ¤t->mm->context.id;
+
printk(KERN_ERR "fix_range_common: failed, killing current "
"process: %d\n", task_tgid_vnr(current));
- /* We are under mmap_sem, release it such that current can terminate */
- up_write(¤t->mm->mmap_sem);
- force_sig(SIGKILL);
- do_signal(¤t->thread.regs);
+ mm_idp->kill = 1;
}
}
@@ -344,6 +364,7 @@
{
struct mm_struct *mm;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -370,7 +391,23 @@
continue;
}
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d)) {
+ last = ADD_ROUND(addr, P4D_SIZE);
+ if (last > end)
+ last = end;
+ if (p4d_newpage(*p4d)) {
+ updated = 1;
+ err = add_munmap(addr, last - addr, &hvc);
+ if (err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pud = pud_offset(p4d, addr);
if (!pud_present(*pud)) {
last = ADD_ROUND(addr, PUD_SIZE);
if (last > end)
@@ -430,6 +467,7 @@
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -447,7 +485,11 @@
if (!pgd_present(*pgd))
goto kill;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ goto kill;
+
+ pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto kill;
@@ -500,35 +542,6 @@
force_sig(SIGKILL);
}
-pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
-{
- return pgd_offset(mm, address);
-}
-
-pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
-{
- return pud_offset(pgd, address);
-}
-
-pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
-{
- return pmd_offset(pud, address);
-}
-
-pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
-{
- return pte_offset_kernel(pmd, address);
-}
-
-pte_t *addr_pte(struct task_struct *task, unsigned long addr)
-{
- pgd_t *pgd = pgd_offset(task->mm, addr);
- pud_t *pud = pud_offset(pgd, addr);
- pmd_t *pmd = pmd_offset(pud, addr);
-
- return pte_offset_map(pmd, addr);
-}
-
void flush_tlb_all(void)
{
/*
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index e62296c..ad12f78 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -10,7 +10,6 @@
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
#include <asm/current.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <arch.h>
#include <as-layout.h>
@@ -27,12 +26,10 @@
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- pgd_t *pgd;
- pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int err = -EFAULT;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ unsigned int flags = FAULT_FLAG_DEFAULT;
*code_out = SEGV_MAPERR;
@@ -46,7 +43,7 @@
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_vma(mm, address);
if (!vma)
goto out;
@@ -74,7 +71,7 @@
do {
vm_fault_t fault;
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, NULL);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
goto out_nosemaphore;
@@ -91,21 +88,14 @@
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
- pgd = pgd_offset(mm, address);
- pud = pud_offset(pgd, address);
- pmd = pmd_offset(pud, address);
+ pmd = pmd_off(mm, address);
pte = pte_offset_kernel(pmd, address);
} while (!pte_present(*pte));
err = 0;
@@ -122,7 +112,7 @@
#endif
flush_tlb_page(vma, address);
out:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out_nosemaphore:
return err;
@@ -131,7 +121,7 @@
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
if (!is_user)
goto out_nosemaphore;
pagefault_out_of_memory();
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 0f40ecc..76b3729 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -14,7 +14,6 @@
#include <linux/sched/task.h>
#include <linux/kmsg_dump.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -53,7 +52,7 @@
};
union thread_union cpu0_irqstack
- __attribute__((__section__(".data..init_irqstack"))) =
+ __section(".data..init_irqstack") =
{ .thread_info = INIT_THREAD_INFO(init_task) };
/* Changed in setup_arch, which is called in early boot */
@@ -362,3 +361,19 @@
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
+
+void *text_poke(void *addr, const void *opcode, size_t len)
+{
+ /*
+ * In UML, the only reference to this function is in
+ * apply_relocate_add(), which shouldn't ever actually call this
+ * because UML doesn't have live patching.
+ */
+ WARN_ON(1);
+
+ return memcpy(addr, opcode, len);
+}
+
+void text_poke_sync(void)
+{
+}
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 85b404d..7a8e2b1 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -25,10 +25,10 @@
__binary_start = START;
. = START + SIZEOF_HEADERS;
+ . = ALIGN(PAGE_SIZE);
_text = .;
INIT_TEXT_SECTION(0)
- . = ALIGN(PAGE_SIZE);
.text :
{
@@ -114,8 +114,8 @@
PROVIDE (end = .);
STABS_DEBUG
-
DWARF_DEBUG
+ ELF_DETAILS
DISCARDS
}