Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 06fc70c..08bf95d 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -3,10 +3,24 @@
# Makefile for the x86 low level entry code
#
-OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_syscall_x32.o = $(CC_FLAGS_FTRACE)
+
+CFLAGS_common.o += -fno-stack-protector
+CFLAGS_syscall_64.o += -fno-stack-protector
+CFLAGS_syscall_32.o += -fno-stack-protector
+CFLAGS_syscall_x32.o += -fno-stack-protector
CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,)
CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,)
+CFLAGS_syscall_x32.o += $(call cc-option,-Wno-override-init,)
+
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o
@@ -14,4 +28,5 @@
obj-y += vsyscall/
obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
+obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index b3f1214..07a9331 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -341,7 +341,16 @@
#endif
.endm
-#endif /* CONFIG_X86_64 */
+.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
+ rdgsbase \save_reg
+ GET_PERCPU_BASE \scratch_reg
+ wrgsbase \scratch_reg
+.endm
+
+#else /* CONFIG_X86_64 */
+# undef UNWIND_HINT_IRET_REGS
+# define UNWIND_HINT_IRET_REGS
+#endif /* !CONFIG_X86_64 */
.macro STACKLEAK_ERASE
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
@@ -349,22 +358,37 @@
#endif
.endm
+#ifdef CONFIG_SMP
+
/*
- * This does 'call enter_from_user_mode' unless we can avoid it based on
- * kernel config or using the static jump infrastructure.
+ * CPU/node NR is loaded from the limit (size) field of a special segment
+ * descriptor entry in GDT.
*/
-.macro CALL_enter_from_user_mode
-#ifdef CONFIG_CONTEXT_TRACKING
-#ifdef CONFIG_JUMP_LABEL
- STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
-#endif
- call enter_from_user_mode
-.Lafter_call_\@:
-#endif
+.macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
+ movq $__CPUNODE_SEG, \reg
+ lsl \reg, \reg
.endm
-#ifdef CONFIG_PARAVIRT_XXL
-#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
+/*
+ * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
+ * We normally use %gs for accessing per-CPU data, but we are setting up
+ * %gs here and obviously can not use %gs itself to access per-CPU data.
+ *
+ * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
+ * may not restore the host's value until the CPU returns to userspace.
+ * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
+ * while running KVM's run loop.
+ */
+.macro GET_PERCPU_BASE reg:req
+ LOAD_CPU_AND_NODE_SEG_LIMIT \reg
+ andq $VDSO_CPUNODE_MASK, \reg
+ movq __per_cpu_offset(, \reg, 8), \reg
+.endm
+
#else
-#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
-#endif
+
+.macro GET_PERCPU_BASE reg:req
+ movq pcpu_unit_offsets(%rip), \reg
+.endm
+
+#endif /* CONFIG_SMP */
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 3f8e226..93a3122 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -10,281 +10,37 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <linux/entry-common.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
-#include <linux/tracehook.h>
-#include <linux/audit.h>
-#include <linux/seccomp.h>
-#include <linux/signal.h>
#include <linux/export.h>
-#include <linux/context_tracking.h>
-#include <linux/user-return-notifier.h>
#include <linux/nospec.h>
-#include <linux/uprobes.h>
-#include <linux/livepatch.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#ifdef CONFIG_XEN_PV
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#endif
+
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
#include <asm/nospec-branch.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
-#ifdef CONFIG_CONTEXT_TRACKING
-/* Called on entry from user mode with IRQs off. */
-__visible inline void enter_from_user_mode(void)
-{
- CT_WARN_ON(ct_state() != CONTEXT_USER);
- user_exit_irqoff();
-}
-#else
-static inline void enter_from_user_mode(void) {}
-#endif
-
-static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
-{
-#ifdef CONFIG_X86_64
- if (arch == AUDIT_ARCH_X86_64) {
- audit_syscall_entry(regs->orig_ax, regs->di,
- regs->si, regs->dx, regs->r10);
- } else
-#endif
- {
- audit_syscall_entry(regs->orig_ax, regs->bx,
- regs->cx, regs->dx, regs->si);
- }
-}
-
-/*
- * Returns the syscall nr to run (which should match regs->orig_ax) or -1
- * to skip the syscall.
- */
-static long syscall_trace_enter(struct pt_regs *regs)
-{
- u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
-
- struct thread_info *ti = current_thread_info();
- unsigned long ret = 0;
- u32 work;
-
- if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
- BUG_ON(regs != task_pt_regs(current));
-
- work = READ_ONCE(ti->flags);
-
- if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
- ret = tracehook_report_syscall_entry(regs);
- if (ret || (work & _TIF_SYSCALL_EMU))
- return -1L;
- }
-
-#ifdef CONFIG_SECCOMP
- /*
- * Do seccomp after ptrace, to catch any tracer changes.
- */
- if (work & _TIF_SECCOMP) {
- struct seccomp_data sd;
-
- sd.arch = arch;
- sd.nr = regs->orig_ax;
- sd.instruction_pointer = regs->ip;
-#ifdef CONFIG_X86_64
- if (arch == AUDIT_ARCH_X86_64) {
- sd.args[0] = regs->di;
- sd.args[1] = regs->si;
- sd.args[2] = regs->dx;
- sd.args[3] = regs->r10;
- sd.args[4] = regs->r8;
- sd.args[5] = regs->r9;
- } else
-#endif
- {
- sd.args[0] = regs->bx;
- sd.args[1] = regs->cx;
- sd.args[2] = regs->dx;
- sd.args[3] = regs->si;
- sd.args[4] = regs->di;
- sd.args[5] = regs->bp;
- }
-
- ret = __secure_computing(&sd);
- if (ret == -1)
- return ret;
- }
-#endif
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_enter(regs, regs->orig_ax);
-
- do_audit_syscall_entry(regs, arch);
-
- return ret ?: regs->orig_ax;
-}
-
-#define EXIT_TO_USERMODE_LOOP_FLAGS \
- (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING)
-
-static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
-{
- /*
- * In order to return to user mode, we need to have IRQs off with
- * none of EXIT_TO_USERMODE_LOOP_FLAGS set. Several of these flags
- * can be set at any time on preemptible kernels if we have IRQs on,
- * so we need to loop. Disabling preemption wouldn't help: doing the
- * work to clear some of the flags can sleep.
- */
- while (true) {
- /* We have work to do. */
- local_irq_enable();
-
- if (cached_flags & _TIF_NEED_RESCHED)
- schedule();
-
- if (cached_flags & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- if (cached_flags & _TIF_PATCH_PENDING)
- klp_update_patch_state(current);
-
- /* deal with pending signal delivery */
- if (cached_flags & _TIF_SIGPENDING)
- do_signal(regs);
-
- if (cached_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- rseq_handle_notify_resume(NULL, regs);
- }
-
- if (cached_flags & _TIF_USER_RETURN_NOTIFY)
- fire_user_return_notifiers();
-
- /* Disable IRQs and retry */
- local_irq_disable();
-
- cached_flags = READ_ONCE(current_thread_info()->flags);
-
- if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
- break;
- }
-}
-
-/* Called with IRQs disabled. */
-__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
-{
- struct thread_info *ti = current_thread_info();
- u32 cached_flags;
-
- addr_limit_user_check();
-
- lockdep_assert_irqs_disabled();
- lockdep_sys_exit();
-
- cached_flags = READ_ONCE(ti->flags);
-
- if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
- exit_to_usermode_loop(regs, cached_flags);
-
- /* Reload ti->flags; we may have rescheduled above. */
- cached_flags = READ_ONCE(ti->flags);
-
- fpregs_assert_state_consistent();
- if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
- switch_fpu_return();
-
-#ifdef CONFIG_COMPAT
- /*
- * Compat syscalls set TS_COMPAT. Make sure we clear it before
- * returning to user mode. We need to clear it *after* signal
- * handling, because syscall restart has a fixup for compat
- * syscalls. The fixup is exercised by the ptrace_syscall_32
- * selftest.
- *
- * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
- * special case only applies after poking regs and before the
- * very next return to user mode.
- */
- ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
-#endif
-
- user_enter_irqoff();
-
- mds_user_clear_cpu_buffers();
-}
-
-#define SYSCALL_EXIT_WORK_FLAGS \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
- _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
-
-static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
-{
- bool step;
-
- audit_syscall_exit(regs);
-
- if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
- trace_sys_exit(regs, regs->ax);
-
- /*
- * If TIF_SYSCALL_EMU is set, we only get here because of
- * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
- * We already reported this syscall instruction in
- * syscall_trace_enter().
- */
- step = unlikely(
- (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
- == _TIF_SINGLESTEP);
- if (step || cached_flags & _TIF_SYSCALL_TRACE)
- tracehook_report_syscall_exit(regs, step);
-}
-
-/*
- * Called with IRQs on and fully valid regs. Returns with IRQs off in a
- * state such that we can immediately switch to user mode.
- */
-__visible inline void syscall_return_slowpath(struct pt_regs *regs)
-{
- struct thread_info *ti = current_thread_info();
- u32 cached_flags = READ_ONCE(ti->flags);
-
- CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
-
- if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
- WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
- local_irq_enable();
-
- rseq_syscall(regs);
-
- /*
- * First do one-time work. If these work items are enabled, we
- * want to run them exactly once per syscall exit with IRQs on.
- */
- if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
- syscall_slow_exit_work(regs, cached_flags);
-
- local_irq_disable();
- prepare_exit_to_usermode(regs);
-}
+#include <asm/io_bitmap.h>
+#include <asm/syscall.h>
+#include <asm/irq_stack.h>
#ifdef CONFIG_X86_64
-__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
- struct thread_info *ti;
+ nr = syscall_enter_from_user_mode(regs, nr);
- enter_from_user_mode();
- local_irq_enable();
- ti = current_thread_info();
- if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
- nr = syscall_trace_enter(regs);
-
+ instrumentation_begin();
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
@@ -296,76 +52,107 @@
regs->ax = x32_sys_call_table[nr](regs);
#endif
}
-
- syscall_return_slowpath(regs);
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
}
#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
-/*
- * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
- * all entry and exit work and returns with IRQs off. This function is
- * extremely hot in workloads that use it, and it's usually called from
- * do_fast_syscall_32, so forcibly inline it to improve performance.
- */
-static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
{
- struct thread_info *ti = current_thread_info();
- unsigned int nr = (unsigned int)regs->orig_ax;
+ if (IS_ENABLED(CONFIG_IA32_EMULATION))
+ current_thread_info()->status |= TS_COMPAT;
-#ifdef CONFIG_IA32_EMULATION
- ti->status |= TS_COMPAT;
-#endif
+ return (unsigned int)regs->orig_ax;
+}
- if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
- /*
- * Subtlety here: if ptrace pokes something larger than
- * 2^32-1 into orig_ax, this truncates it. This may or
- * may not be necessary, but it matches the old asm
- * behavior.
- */
- nr = syscall_trace_enter(regs);
- }
-
+/*
+ * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
+ */
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
+ unsigned int nr)
+{
if (likely(nr < IA32_NR_syscalls)) {
nr = array_index_nospec(nr, IA32_NR_syscalls);
-#ifdef CONFIG_IA32_EMULATION
regs->ax = ia32_sys_call_table[nr](regs);
-#else
- /*
- * It's possible that a 32-bit syscall implementation
- * takes a 64-bit parameter but nonetheless assumes that
- * the high bits are zero. Make sure we zero-extend all
- * of the args.
- */
- regs->ax = ia32_sys_call_table[nr](
- (unsigned int)regs->bx, (unsigned int)regs->cx,
- (unsigned int)regs->dx, (unsigned int)regs->si,
- (unsigned int)regs->di, (unsigned int)regs->bp);
-#endif /* CONFIG_IA32_EMULATION */
}
-
- syscall_return_slowpath(regs);
}
/* Handles int $0x80 */
-__visible void do_int80_syscall_32(struct pt_regs *regs)
+__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
- enter_from_user_mode();
- local_irq_enable();
- do_syscall_32_irqs_on(regs);
+ unsigned int nr = syscall_32_enter(regs);
+
+ /*
+ * Subtlety here: if ptrace pokes something larger than 2^32-1 into
+ * orig_ax, the unsigned int return value truncates it. This may
+ * or may not be necessary, but it matches the old asm behavior.
+ */
+ nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
+ instrumentation_begin();
+
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+
+static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
+{
+ unsigned int nr = syscall_32_enter(regs);
+ int res;
+
+ /*
+ * This cannot use syscall_enter_from_user_mode() as it has to
+ * fetch EBP before invoking any of the syscall entry work
+ * functions.
+ */
+ syscall_enter_from_user_mode_prepare(regs);
+
+ instrumentation_begin();
+ /* Fetch EBP from where the vDSO stashed it. */
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /*
+ * Micro-optimization: the pointer we're following is
+ * explicitly 32 bits, so it can't be out of range.
+ */
+ res = __get_user(*(u32 *)®s->bp,
+ (u32 __user __force *)(unsigned long)(u32)regs->sp);
+ } else {
+ res = get_user(*(u32 *)®s->bp,
+ (u32 __user __force *)(unsigned long)(u32)regs->sp);
+ }
+
+ if (res) {
+ /* User code screwed up. */
+ regs->ax = -EFAULT;
+
+ local_irq_disable();
+ instrumentation_end();
+ irqentry_exit_to_user_mode(regs);
+ return false;
+ }
+
+ /* The case truncates any ptrace induced syscall nr > 2^32 -1 */
+ nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+
+ /* Now this is just like a normal syscall. */
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+ return true;
}
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
-__visible long do_fast_syscall_32(struct pt_regs *regs)
+__visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
{
/*
* Called using the internal vDSO SYSENTER/SYSCALL32 calling
* convention. Adjust regs so it looks like we entered using int80.
*/
-
unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
- vdso_image_32.sym_int80_landing_pad;
+ vdso_image_32.sym_int80_landing_pad;
/*
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
@@ -374,34 +161,9 @@
*/
regs->ip = landing_pad;
- enter_from_user_mode();
-
- local_irq_enable();
-
- /* Fetch EBP from where the vDSO stashed it. */
- if (
-#ifdef CONFIG_X86_64
- /*
- * Micro-optimization: the pointer we're following is explicitly
- * 32 bits, so it can't be out of range.
- */
- __get_user(*(u32 *)®s->bp,
- (u32 __user __force *)(unsigned long)(u32)regs->sp)
-#else
- get_user(*(u32 *)®s->bp,
- (u32 __user __force *)(unsigned long)(u32)regs->sp)
-#endif
- ) {
-
- /* User code screwed up. */
- local_irq_disable();
- regs->ax = -EFAULT;
- prepare_exit_to_usermode(regs);
- return 0; /* Keep it simple: use IRET. */
- }
-
- /* Now this is just like a normal syscall. */
- do_syscall_32_irqs_on(regs);
+ /* Invoke the syscall. If it failed, keep it simple: use IRET. */
+ if (!__do_fast_syscall_32(regs))
+ return 0;
#ifdef CONFIG_X86_64
/*
@@ -433,4 +195,94 @@
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif
}
+
+/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
+__visible noinstr long do_SYSENTER_32(struct pt_regs *regs)
+{
+ /* SYSENTER loses RSP, but the vDSO saved it in RBP. */
+ regs->sp = regs->bp;
+
+ /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */
+ regs->flags |= X86_EFLAGS_IF;
+
+ return do_fast_syscall_32(regs);
+}
#endif
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+ return -ENOSYS;
+}
+
+#ifdef CONFIG_XEN_PV
+#ifndef CONFIG_PREEMPTION
+/*
+ * Some hypercalls issued by the toolstack can take many 10s of
+ * seconds. Allow tasks running hypercalls via the privcmd driver to
+ * be voluntarily preempted even if full kernel preemption is
+ * disabled.
+ *
+ * Such preemptible hypercalls are bracketed by
+ * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
+ * calls.
+ */
+DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
+EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+
+/*
+ * In case of scheduling the flag must be cleared and restored after
+ * returning from schedule as the task might move to a different CPU.
+ */
+static __always_inline bool get_and_clear_inhcall(void)
+{
+ bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
+
+ __this_cpu_write(xen_in_preemptible_hcall, false);
+ return inhcall;
+}
+
+static __always_inline void restore_inhcall(bool inhcall)
+{
+ __this_cpu_write(xen_in_preemptible_hcall, inhcall);
+}
+#else
+static __always_inline bool get_and_clear_inhcall(void) { return false; }
+static __always_inline void restore_inhcall(bool inhcall) { }
+#endif
+
+static void __xen_pv_evtchn_do_upcall(void)
+{
+ irq_enter_rcu();
+ inc_irq_stat(irq_hv_callback_count);
+
+ xen_hvm_evtchn_do_upcall();
+
+ irq_exit_rcu();
+}
+
+__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs;
+ bool inhcall;
+ irqentry_state_t state;
+
+ state = irqentry_enter(regs);
+ old_regs = set_irq_regs(regs);
+
+ instrumentation_begin();
+ run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
+ instrumentation_end();
+
+ set_irq_regs(old_regs);
+
+ inhcall = get_and_clear_inhcall();
+ if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
+ instrumentation_begin();
+ irqentry_exit_cond_resched();
+ instrumentation_end();
+ restore_inhcall(inhcall);
+ } else {
+ irqentry_exit(regs, state);
+ }
+}
+#endif /* CONFIG_XEN_PV */
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index bde3e0f..df8c017 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,40 +44,13 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/frame.h>
+#include <asm/trapnr.h>
#include <asm/nospec-branch.h>
#include "calling.h"
.section .entry.text, "ax"
-/*
- * We use macros for low-level operations which need to be overridden
- * for paravirtualization. The following will never clobber any registers:
- * INTERRUPT_RETURN (aka. "iret")
- * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
- *
- * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
- * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
- * Allowing a register to be clobbered can shrink the paravirt replacement
- * enough to patch inline, increasing performance.
- */
-
-#ifdef CONFIG_PREEMPTION
-# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
-#else
-# define preempt_stop(clobbers)
-#endif
-
-.macro TRACE_IRQS_IRET
-#ifdef CONFIG_TRACE_IRQFLAGS
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
- jz 1f
- TRACE_IRQS_ON
-1:
-#endif
-.endm
-
#define PTI_SWITCH_MASK (1 << PAGE_SHIFT)
/*
@@ -476,8 +449,6 @@
.macro SWITCH_TO_KERNEL_STACK
- ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
-
BUG_IF_WRONG_CR3
SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
@@ -626,8 +597,6 @@
*/
.macro SWITCH_TO_ENTRY_STACK
- ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
-
/* Bytes to copy */
movl $PTREGS_SIZE, %ecx
@@ -726,11 +695,69 @@
.Lend_\@:
.endm
+
+/**
+ * idtentry - Macro to generate entry stubs for simple IDT entries
+ * @vector: Vector number
+ * @asmsym: ASM symbol for the entry point
+ * @cfunc: C function to be called
+ * @has_error_code: Hardware pushed error code on stack
+ */
+.macro idtentry vector asmsym cfunc has_error_code:req
+SYM_CODE_START(\asmsym)
+ ASM_CLAC
+ cld
+
+ .if \has_error_code == 0
+ pushl $0 /* Clear the error code */
+ .endif
+
+ /* Push the C-function address into the GS slot */
+ pushl $\cfunc
+ /* Invoke the common exception entry */
+ jmp handle_exception
+SYM_CODE_END(\asmsym)
+.endm
+
+.macro idtentry_irq vector cfunc
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+SYM_CODE_START_LOCAL(asm_\cfunc)
+ ASM_CLAC
+ SAVE_ALL switch_stacks=1
+ ENCODE_FRAME_POINTER
+ movl %esp, %eax
+ movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
+ movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
+ call \cfunc
+ jmp handle_exception_return
+SYM_CODE_END(asm_\cfunc)
+.endm
+
+.macro idtentry_sysvec vector cfunc
+ idtentry \vector asm_\cfunc \cfunc has_error_code=0
+.endm
+
+/*
+ * Include the defines which emit the idt entries which are shared
+ * shared between 32 and 64 bit and emit the __irqentry_text_* markers
+ * so the stacktrace boundary checks work.
+ */
+ .align 16
+ .globl __irqentry_text_start
+__irqentry_text_start:
+
+#include <asm/idtentry.h>
+
+ .align 16
+ .globl __irqentry_text_end
+__irqentry_text_end:
+
/*
* %eax: prev task
* %edx: next task
*/
-ENTRY(__switch_to_asm)
+.pushsection .text, "ax"
+SYM_CODE_START(__switch_to_asm)
/*
* Save callee-saved registers
* This must match the order in struct inactive_task_frame
@@ -739,6 +766,11 @@
pushl %ebx
pushl %edi
pushl %esi
+ /*
+ * Flags are saved to prevent AC leakage. This could go
+ * away if objtool would have 32bit support to verify
+ * the STAC/CLAC correctness.
+ */
pushfl
/* switch stack */
@@ -761,15 +793,17 @@
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
- /* restore callee-saved registers */
+ /* Restore flags or the incoming task to restore AC state. */
popfl
+ /* restore callee-saved registers */
popl %esi
popl %edi
popl %ebx
popl %ebp
jmp __switch_to
-END(__switch_to_asm)
+SYM_CODE_END(__switch_to_asm)
+.popsection
/*
* The unwinder expects the last frame on the stack to always be at the same
@@ -778,7 +812,8 @@
* asmlinkage function so its argument has to be pushed on the stack. This
* wrapper creates a proper "end of stack" frame header before the call.
*/
-ENTRY(schedule_tail_wrapper)
+.pushsection .text, "ax"
+SYM_FUNC_START(schedule_tail_wrapper)
FRAME_BEGIN
pushl %eax
@@ -787,7 +822,9 @@
FRAME_END
ret
-ENDPROC(schedule_tail_wrapper)
+SYM_FUNC_END(schedule_tail_wrapper)
+.popsection
+
/*
* A newly forked process directly context switches into this address.
*
@@ -795,7 +832,8 @@
* ebx: kernel thread func (NULL for user thread)
* edi: kernel thread arg
*/
-ENTRY(ret_from_fork)
+.pushsection .text, "ax"
+SYM_CODE_START(ret_from_fork)
call schedule_tail_wrapper
testl %ebx, %ebx
@@ -804,57 +842,23 @@
2:
/* When we fork, we trace the syscall return in the child, too. */
movl %esp, %eax
- call syscall_return_slowpath
- STACKLEAK_ERASE
- jmp restore_all
+ call syscall_exit_to_user_mode
+ jmp .Lsyscall_32_done
/* kernel thread */
1: movl %edi, %eax
- CALL_NOSPEC %ebx
+ CALL_NOSPEC ebx
/*
* A kernel thread is allowed to return here after successfully
- * calling do_execve(). Exit to userspace to complete the execve()
+ * calling kernel_execve(). Exit to userspace to complete the execve()
* syscall.
*/
movl $0, PT_EAX(%esp)
jmp 2b
-END(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
+.popsection
-/*
- * Return to user mode is not as complex as all this looks,
- * but we want the default path for a system call return to
- * go as quickly as possible which is why some of this is
- * less clear than it otherwise should be.
- */
-
- # userspace resumption stub bypassing syscall exit tracing
- ALIGN
-ret_from_exception:
- preempt_stop(CLBR_ANY)
-ret_from_intr:
-#ifdef CONFIG_VM86
- movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
- movb PT_CS(%esp), %al
- andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
-#else
- /*
- * We can be coming here from child spawned by kernel_thread().
- */
- movl PT_CS(%esp), %eax
- andl $SEGMENT_RPL_MASK, %eax
-#endif
- cmpl $USER_RPL, %eax
- jb restore_all_kernel # not returning to v8086 or userspace
-
-ENTRY(resume_userspace)
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- movl %esp, %eax
- call prepare_exit_to_usermode
- jmp restore_all
-END(ret_from_exception)
-
-GLOBAL(__begin_SYSENTER_singlestep_region)
+SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
* to being single-stepped if a user program sets TF and executes SYSENTER.
@@ -864,17 +868,6 @@
* will ignore all of the single-step traps generated in this range.
*/
-#ifdef CONFIG_XEN_PV
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-SYM_CODE_START(xen_sysenter_target)
- addl $5*4, %esp /* remove xen-provided frame */
- jmp .Lsysenter_past_esp
-SYM_CODE_END(xen_sysenter_target)
-#endif
-
/*
* 32-bit SYSENTER entry.
*
@@ -907,7 +900,7 @@
* ebp user stack
* 0(%ebp) arg6
*/
-ENTRY(entry_SYSENTER_32)
+SYM_FUNC_START(entry_SYSENTER_32)
/*
* On entry-stack with all userspace-regs live - save and
* restore eflags and %eax to use it as scratch-reg for the cr3
@@ -925,9 +918,8 @@
.Lsysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
- pushl %ebp /* pt_regs->sp (stashed in bp) */
+ pushl $0 /* pt_regs->sp (placeholder) */
pushfl /* pt_regs->flags (except IF = 0) */
- orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
pushl $0 /* pt_regs->ip = 0 (placeholder) */
pushl %eax /* pt_regs->orig_ax */
@@ -956,22 +948,14 @@
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
- /*
- * User mode is traced as though IRQs are on, and SYSENTER
- * turned them off.
- */
- TRACE_IRQS_OFF
-
movl %esp, %eax
- call do_fast_syscall_32
- /* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ call do_SYSENTER_32
+ testl %eax, %eax
+ jz .Lsyscall_32_done
STACKLEAK_ERASE
-/* Opportunistic SYSEXIT */
- TRACE_IRQS_ON /* User mode traces as IRQs on. */
+ /* Opportunistic SYSEXIT */
/*
* Setup entry stack - we keep the pointer in %eax and do the
@@ -1034,8 +1018,8 @@
pushl $X86_EFLAGS_FIXED
popfl
jmp .Lsysenter_flags_fixed
-GLOBAL(__end_SYSENTER_singlestep_region)
-ENDPROC(entry_SYSENTER_32)
+SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
+SYM_FUNC_END(entry_SYSENTER_32)
/*
* 32-bit legacy system call entry.
@@ -1065,30 +1049,21 @@
* edi arg5
* ebp arg6
*/
-ENTRY(entry_INT80_32)
+SYM_FUNC_START(entry_INT80_32)
ASM_CLAC
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1 /* save rest */
- /*
- * User mode is traced as though IRQs are on, and the interrupt gate
- * turned them off.
- */
- TRACE_IRQS_OFF
-
movl %esp, %eax
call do_int80_syscall_32
.Lsyscall_32_done:
-
STACKLEAK_ERASE
-restore_all:
- TRACE_IRQS_IRET
+restore_all_switch_stack:
SWITCH_TO_ENTRY_STACK
-.Lrestore_all_notrace:
CHECK_AND_APPLY_ESPFIX
-.Lrestore_nocheck:
+
/* Switch back to user CR3 */
SWITCH_TO_USER_CR3 scratch_reg=%eax
@@ -1104,26 +1079,10 @@
*/
INTERRUPT_RETURN
-restore_all_kernel:
-#ifdef CONFIG_PREEMPTION
- DISABLE_INTERRUPTS(CLBR_ANY)
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz .Lno_preempt
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
- jz .Lno_preempt
- call preempt_schedule_irq
-.Lno_preempt:
-#endif
- TRACE_IRQS_IRET
- PARANOID_EXIT_TO_KERNEL_MODE
- BUG_IF_WRONG_CR3
- RESTORE_REGS 4
- jmp .Lirq_return
-
.section .fixup, "ax"
-ENTRY(iret_exc )
+SYM_CODE_START(asm_iret_error)
pushl $0 # no error code
- pushl $do_iret_error
+ pushl $iret_error
#ifdef CONFIG_DEBUG_ENTRY
/*
@@ -1137,10 +1096,11 @@
popl %eax
#endif
- jmp common_exception
+ jmp handle_exception
+SYM_CODE_END(asm_iret_error)
.previous
- _ASM_EXTABLE(.Lirq_return, iret_exc)
-ENDPROC(entry_INT80_32)
+ _ASM_EXTABLE(.Lirq_return, asm_iret_error)
+SYM_FUNC_END(entry_INT80_32)
.macro FIXUP_ESPFIX_STACK
/*
@@ -1189,319 +1149,7 @@
#endif
.endm
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
- .align 8
-ENTRY(irq_entries_start)
- vector=FIRST_EXTERNAL_VECTOR
- .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
- pushl $(~vector+0x80) /* Note: always in signed byte range */
- vector=vector+1
- jmp common_interrupt
- .align 8
- .endr
-END(irq_entries_start)
-
-#ifdef CONFIG_X86_LOCAL_APIC
- .align 8
-ENTRY(spurious_entries_start)
- vector=FIRST_SYSTEM_VECTOR
- .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
- pushl $(~vector+0x80) /* Note: always in signed byte range */
- vector=vector+1
- jmp common_spurious
- .align 8
- .endr
-END(spurious_entries_start)
-
-common_spurious:
- ASM_CLAC
- addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
- SAVE_ALL switch_stacks=1
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
- movl %esp, %eax
- call smp_spurious_interrupt
- jmp ret_from_intr
-ENDPROC(common_spurious)
-#endif
-
-/*
- * the CPU automatically disables interrupts when executing an IRQ vector,
- * so IRQ-flags tracing has to follow that:
- */
- .p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
- ASM_CLAC
- addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
-
- SAVE_ALL switch_stacks=1
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
- movl %esp, %eax
- call do_IRQ
- jmp ret_from_intr
-ENDPROC(common_interrupt)
-
-#define BUILD_INTERRUPT3(name, nr, fn) \
-ENTRY(name) \
- ASM_CLAC; \
- pushl $~(nr); \
- SAVE_ALL switch_stacks=1; \
- ENCODE_FRAME_POINTER; \
- TRACE_IRQS_OFF \
- movl %esp, %eax; \
- call fn; \
- jmp ret_from_intr; \
-ENDPROC(name)
-
-#define BUILD_INTERRUPT(name, nr) \
- BUILD_INTERRUPT3(name, nr, smp_##name); \
-
-/* The include is where all of the SMP etc. interrupts come from */
-#include <asm/entry_arch.h>
-
-ENTRY(coprocessor_error)
- ASM_CLAC
- pushl $0
- pushl $do_coprocessor_error
- jmp common_exception
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
- ASM_CLAC
- pushl $0
-#ifdef CONFIG_X86_INVD_BUG
- /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
- ALTERNATIVE "pushl $do_general_protection", \
- "pushl $do_simd_coprocessor_error", \
- X86_FEATURE_XMM
-#else
- pushl $do_simd_coprocessor_error
-#endif
- jmp common_exception
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
- ASM_CLAC
- pushl $-1 # mark this as an int
- pushl $do_device_not_available
- jmp common_exception
-END(device_not_available)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
- iret
- _ASM_EXTABLE(native_iret, iret_exc)
-END(native_iret)
-#endif
-
-ENTRY(overflow)
- ASM_CLAC
- pushl $0
- pushl $do_overflow
- jmp common_exception
-END(overflow)
-
-ENTRY(bounds)
- ASM_CLAC
- pushl $0
- pushl $do_bounds
- jmp common_exception
-END(bounds)
-
-ENTRY(invalid_op)
- ASM_CLAC
- pushl $0
- pushl $do_invalid_op
- jmp common_exception
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
- ASM_CLAC
- pushl $0
- pushl $do_coprocessor_segment_overrun
- jmp common_exception
-END(coprocessor_segment_overrun)
-
-ENTRY(invalid_TSS)
- ASM_CLAC
- pushl $do_invalid_TSS
- jmp common_exception
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
- ASM_CLAC
- pushl $do_segment_not_present
- jmp common_exception
-END(segment_not_present)
-
-ENTRY(stack_segment)
- ASM_CLAC
- pushl $do_stack_segment
- jmp common_exception
-END(stack_segment)
-
-ENTRY(alignment_check)
- ASM_CLAC
- pushl $do_alignment_check
- jmp common_exception
-END(alignment_check)
-
-ENTRY(divide_error)
- ASM_CLAC
- pushl $0 # no error code
- pushl $do_divide_error
- jmp common_exception
-END(divide_error)
-
-#ifdef CONFIG_X86_MCE
-ENTRY(machine_check)
- ASM_CLAC
- pushl $0
- pushl machine_check_vector
- jmp common_exception
-END(machine_check)
-#endif
-
-ENTRY(spurious_interrupt_bug)
- ASM_CLAC
- pushl $0
- pushl $do_spurious_interrupt_bug
- jmp common_exception
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_XEN_PV
-ENTRY(xen_hypervisor_callback)
- /*
- * Check to see if we got the event in the critical
- * region in xen_iret_direct, after we've reenabled
- * events and checked for pending events. This simulates
- * iret instruction's behaviour where it delivers a
- * pending interrupt when enabling interrupts:
- */
- cmpl $xen_iret_start_crit, (%esp)
- jb 1f
- cmpl $xen_iret_end_crit, (%esp)
- jae 1f
- call xen_iret_crit_fixup
-1:
- pushl $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
- mov %esp, %eax
- call xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPTION
- call xen_maybe_preempt_hcall
-#endif
- jmp ret_from_intr
-ENDPROC(xen_hypervisor_callback)
-
-/*
- * Hypervisor uses this for application faults while it executes.
- * We get here for two reasons:
- * 1. Fault while reloading DS, ES, FS or GS
- * 2. Fault while executing IRET
- * Category 1 we fix up by reattempting the load, and zeroing the segment
- * register if the load fails.
- * Category 2 we fix up by jumping to do_iret_error. We cannot use the
- * normal Linux return path in this case because if we use the IRET hypercall
- * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
- * We distinguish between categories by maintaining a status value in EAX.
- */
-ENTRY(xen_failsafe_callback)
- pushl %eax
- movl $1, %eax
-1: mov 4(%esp), %ds
-2: mov 8(%esp), %es
-3: mov 12(%esp), %fs
-4: mov 16(%esp), %gs
- /* EAX == 0 => Category 1 (Bad segment)
- EAX != 0 => Category 2 (Bad IRET) */
- testl %eax, %eax
- popl %eax
- lea 16(%esp), %esp
- jz 5f
- jmp iret_exc
-5: pushl $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- ENCODE_FRAME_POINTER
- jmp ret_from_exception
-
-.section .fixup, "ax"
-6: xorl %eax, %eax
- movl %eax, 4(%esp)
- jmp 1b
-7: xorl %eax, %eax
- movl %eax, 8(%esp)
- jmp 2b
-8: xorl %eax, %eax
- movl %eax, 12(%esp)
- jmp 3b
-9: xorl %eax, %eax
- movl %eax, 16(%esp)
- jmp 4b
-.previous
- _ASM_EXTABLE(1b, 6b)
- _ASM_EXTABLE(2b, 7b)
- _ASM_EXTABLE(3b, 8b)
- _ASM_EXTABLE(4b, 9b)
-ENDPROC(xen_failsafe_callback)
-#endif /* CONFIG_XEN_PV */
-
-#ifdef CONFIG_XEN_PVHVM
-BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- xen_evtchn_do_upcall)
-#endif
-
-
-#if IS_ENABLED(CONFIG_HYPERV)
-
-BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- hyperv_vector_handler)
-
-BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
- hyperv_reenlightenment_intr)
-
-BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
- hv_stimer0_vector_handler)
-
-#endif /* CONFIG_HYPERV */
-
-ENTRY(page_fault)
- ASM_CLAC
- pushl $do_page_fault
- jmp common_exception_read_cr2
-END(page_fault)
-
-common_exception_read_cr2:
- /* the function address is in %gs's slot on the stack */
- SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
-
- ENCODE_FRAME_POINTER
-
- /* fixup %gs */
- GS_TO_REG %ecx
- movl PT_GS(%esp), %edi
- REG_TO_PTGS %ecx
- SET_KERNEL_GS %ecx
-
- GET_CR2_INTO(%ecx) # might clobber %eax
-
- /* fixup orig %eax */
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
-
- TRACE_IRQS_OFF
- movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC %edi
- jmp ret_from_exception
-END(common_exception_read_cr2)
-
-common_exception:
+SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
ENCODE_FRAME_POINTER
@@ -1516,21 +1164,73 @@
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
- TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC %edi
- jmp ret_from_exception
-END(common_exception)
+ CALL_NOSPEC edi
-ENTRY(debug)
+handle_exception_return:
+#ifdef CONFIG_VM86
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb PT_CS(%esp), %al
+ andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+#else
/*
- * Entry from sysenter is now handled in common_exception
+ * We can be coming here from child spawned by kernel_thread().
*/
- ASM_CLAC
- pushl $-1 # mark this as an int
- pushl $do_debug
- jmp common_exception
-END(debug)
+ movl PT_CS(%esp), %eax
+ andl $SEGMENT_RPL_MASK, %eax
+#endif
+ cmpl $USER_RPL, %eax # returning to v8086 or userspace ?
+ jnb ret_to_user
+
+ PARANOID_EXIT_TO_KERNEL_MODE
+ BUG_IF_WRONG_CR3
+ RESTORE_REGS 4
+ jmp .Lirq_return
+
+ret_to_user:
+ movl %esp, %eax
+ jmp restore_all_switch_stack
+SYM_CODE_END(handle_exception)
+
+SYM_CODE_START(asm_exc_double_fault)
+1:
+ /*
+ * This is a task gate handler, not an interrupt gate handler.
+ * The error code is on the stack, but the stack is otherwise
+ * empty. Interrupts are off. Our state is sane with the following
+ * exceptions:
+ *
+ * - CR0.TS is set. "TS" literally means "task switched".
+ * - EFLAGS.NT is set because we're a "nested task".
+ * - The doublefault TSS has back_link set and has been marked busy.
+ * - TR points to the doublefault TSS and the normal TSS is busy.
+ * - CR3 is the normal kernel PGD. This would be delightful, except
+ * that the CPU didn't bother to save the old CR3 anywhere. This
+ * would make it very awkward to return back to the context we came
+ * from.
+ *
+ * The rest of EFLAGS is sanitized for us, so we don't need to
+ * worry about AC or DF.
+ *
+ * Don't even bother popping the error code. It's always zero,
+ * and ignoring it makes us a bit more robust against buggy
+ * hypervisor task gate implementations.
+ *
+ * We will manually undo the task switch instead of doing a
+ * task-switching IRET.
+ */
+
+ clts /* clear CR0.TS */
+ pushl $X86_EFLAGS_FIXED
+ popfl /* clear EFLAGS.NT */
+
+ call doublefault_shim
+
+ /* We don't support returning, so we have no IRET here. */
+1:
+ hlt
+ jmp 1b
+SYM_CODE_END(asm_exc_double_fault)
/*
* NMI is doubly nasty. It can happen on the first instruction of
@@ -1539,7 +1239,7 @@
* switched stacks. We handle both conditions by simply checking whether we
* interrupted kernel code running on the SYSENTER stack.
*/
-ENTRY(nmi)
+SYM_CODE_START(asm_exc_nmi)
ASM_CLAC
#ifdef CONFIG_X86_ESPFIX32
@@ -1568,7 +1268,7 @@
jb .Lnmi_from_sysenter_stack
/* Not on SYSENTER stack. */
- call do_nmi
+ call exc_nmi
jmp .Lnmi_return
.Lnmi_from_sysenter_stack:
@@ -1578,7 +1278,7 @@
*/
movl %esp, %ebx
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
- call do_nmi
+ call exc_nmi
movl %ebx, %esp
.Lnmi_return:
@@ -1632,36 +1332,10 @@
lss (1+5+6)*4(%esp), %esp # back to espfix stack
jmp .Lirq_return
#endif
-END(nmi)
+SYM_CODE_END(asm_exc_nmi)
-ENTRY(int3)
- ASM_CLAC
- pushl $-1 # mark this as an int
-
- SAVE_ALL switch_stacks=1
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
- xorl %edx, %edx # zero error code
- movl %esp, %eax # pt_regs pointer
- call do_int3
- jmp ret_from_exception
-END(int3)
-
-ENTRY(general_protection)
- ASM_CLAC
- pushl $do_general_protection
- jmp common_exception
-END(general_protection)
-
-#ifdef CONFIG_KVM_GUEST
-ENTRY(async_page_fault)
- ASM_CLAC
- pushl $do_async_page_fault
- jmp common_exception_read_cr2
-END(async_page_fault)
-#endif
-
-ENTRY(rewind_stack_do_exit)
+.pushsection .text, "ax"
+SYM_CODE_START(rewind_stack_do_exit)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
@@ -1670,4 +1344,5 @@
call do_exit
1: jmp 1b
-END(rewind_stack_do_exit)
+SYM_CODE_END(rewind_stack_do_exit)
+.popsection
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 2ba3d53..a24ce59 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -15,8 +15,7 @@
* at the top of the kernel process stack.
*
* Some macro usage:
- * - ENTRY/END: Define functions in the symbol table.
- * - TRACE_IRQ_*: Trace hardirq state for lock debugging.
+ * - SYM_FUNC_START/END:Define functions in the symbol table.
* - idtentry: Define exception entry points.
*/
#include <linux/linkage.h>
@@ -37,7 +36,9 @@
#include <asm/pgtable_types.h>
#include <asm/export.h>
#include <asm/frame.h>
+#include <asm/trapnr.h>
#include <asm/nospec-branch.h>
+#include <asm/fsgsbase.h>
#include <linux/err.h>
#include "calling.h"
@@ -45,64 +46,13 @@
.code64
.section .entry.text, "ax"
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret64)
+#ifdef CONFIG_PARAVIRT_XXL
+SYM_CODE_START(native_usergs_sysret64)
UNWIND_HINT_EMPTY
swapgs
sysretq
-END(native_usergs_sysret64)
-#endif /* CONFIG_PARAVIRT */
-
-.macro TRACE_IRQS_FLAGS flags:req
-#ifdef CONFIG_TRACE_IRQFLAGS
- btl $9, \flags /* interrupts off? */
- jnc 1f
- TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-.macro TRACE_IRQS_IRETQ
- TRACE_IRQS_FLAGS EFLAGS(%rsp)
-.endm
-
-/*
- * When dynamic function tracer is enabled it will add a breakpoint
- * to all locations that it is about to modify, sync CPUs, update
- * all the code, sync CPUs, then remove the breakpoints. In this time
- * if lockdep is enabled, it might jump back into the debug handler
- * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
- *
- * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
- * make sure the stack pointer does not get reset back to the top
- * of the debug stack, and instead just reuses the current stack.
- */
-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
-
-.macro TRACE_IRQS_OFF_DEBUG
- call debug_stack_set_zero
- TRACE_IRQS_OFF
- call debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_ON_DEBUG
- call debug_stack_set_zero
- TRACE_IRQS_ON
- call debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_IRETQ_DEBUG
- btl $9, EFLAGS(%rsp) /* interrupts off? */
- jnc 1f
- TRACE_IRQS_ON_DEBUG
-1:
-.endm
-
-#else
-# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
-# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
-# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
-#endif
+SYM_CODE_END(native_usergs_sysret64)
+#endif /* CONFIG_PARAVIRT_XXL */
/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
@@ -142,13 +92,8 @@
* with them due to bugs in both AMD and Intel CPUs.
*/
-ENTRY(entry_SYSCALL_64)
+SYM_CODE_START(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
- /*
- * Interrupts are off on entry.
- * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
- * it is too small to ever cause noticeable irq latency.
- */
swapgs
/* tss.sp2 is scratch space. */
@@ -156,26 +101,24 @@
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
-GLOBAL(entry_SYSCALL_64_after_hwframe)
+SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
pushq %rax /* pt_regs->orig_ax */
PUSH_AND_CLEAR_REGS rax=$-ENOSYS
- TRACE_IRQS_OFF
-
/* IRQs are off. */
movq %rax, %rdi
movq %rsp, %rsi
call do_syscall_64 /* returns with IRQs disabled */
- TRACE_IRQS_IRETQ /* we're about to change IF */
-
/*
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context. If we're not,
@@ -273,14 +216,14 @@
popq %rdi
popq %rsp
USERGS_SYSRET64
-END(entry_SYSCALL_64)
+SYM_CODE_END(entry_SYSCALL_64)
/*
* %rdi: prev task
* %rsi: next task
*/
-ENTRY(__switch_to_asm)
- UNWIND_HINT_FUNC
+.pushsection .text, "ax"
+SYM_FUNC_START(__switch_to_asm)
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
@@ -321,7 +264,8 @@
popq %rbp
jmp __switch_to
-END(__switch_to_asm)
+SYM_FUNC_END(__switch_to_asm)
+.popsection
/*
* A newly forked process directly context switches into this address.
@@ -330,7 +274,8 @@
* rbx: kernel thread func (NULL for user thread)
* r12: kernel thread arg
*/
-ENTRY(ret_from_fork)
+.pushsection .text, "ax"
+SYM_CODE_START(ret_from_fork)
UNWIND_HINT_EMPTY
movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
@@ -341,51 +286,23 @@
2:
UNWIND_HINT_REGS
movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
- TRACE_IRQS_ON /* user mode is traced as IRQS on */
+ call syscall_exit_to_user_mode /* returns with IRQs disabled */
jmp swapgs_restore_regs_and_return_to_usermode
1:
/* kernel thread */
UNWIND_HINT_EMPTY
movq %r12, %rdi
- CALL_NOSPEC %rbx
+ CALL_NOSPEC rbx
/*
* A kernel thread is allowed to return here after successfully
- * calling do_execve(). Exit to userspace to complete the execve()
+ * calling kernel_execve(). Exit to userspace to complete the execve()
* syscall.
*/
movq $0, RAX(%rsp)
jmp 2b
-END(ret_from_fork)
-
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
- .align 8
-ENTRY(irq_entries_start)
- vector=FIRST_EXTERNAL_VECTOR
- .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
- UNWIND_HINT_IRET_REGS
- pushq $(~vector+0x80) /* Note: always in signed byte range */
- jmp common_interrupt
- .align 8
- vector=vector+1
- .endr
-END(irq_entries_start)
-
- .align 8
-ENTRY(spurious_entries_start)
- vector=FIRST_SYSTEM_VECTOR
- .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
- UNWIND_HINT_IRET_REGS
- pushq $(~vector+0x80) /* Note: always in signed byte range */
- jmp common_spurious
- .align 8
- vector=vector+1
- .endr
-END(spurious_entries_start)
+SYM_CODE_END(ret_from_fork)
+.popsection
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
@@ -399,229 +316,258 @@
#endif
.endm
-/*
- * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
- * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
- * Requires kernel GSBASE.
- *
- * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
+/**
+ * idtentry_body - Macro to emit code calling the C function
+ * @cfunc: C function to be called
+ * @has_error_code: Hardware pushed error code on stack
*/
-.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
- DEBUG_ENTRY_ASSERT_IRQS_OFF
+.macro idtentry_body cfunc has_error_code:req
- .if \save_ret
- /*
- * If save_ret is set, the original stack contains one additional
- * entry -- the return address. Therefore, move the address one
- * entry below %rsp to \old_rsp.
- */
- leaq 8(%rsp), \old_rsp
- .else
- movq %rsp, \old_rsp
+ call error_entry
+ UNWIND_HINT_REGS
+
+ movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
+
+ .if \has_error_code == 1
+ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
.endif
- .if \regs
- UNWIND_HINT_REGS base=\old_rsp
+ call \cfunc
+
+ jmp error_return
+.endm
+
+/**
+ * idtentry - Macro to generate entry stubs for simple IDT entries
+ * @vector: Vector number
+ * @asmsym: ASM symbol for the entry point
+ * @cfunc: C function to be called
+ * @has_error_code: Hardware pushed error code on stack
+ *
+ * The macro emits code to set up the kernel context for straight forward
+ * and simple IDT entries. No IST stack, no paranoid entry checks.
+ */
+.macro idtentry vector asmsym cfunc has_error_code:req
+SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+ ASM_CLAC
+
+ .if \has_error_code == 0
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
- incl PER_CPU_VAR(irq_count)
- jnz .Lirq_stack_push_old_rsp_\@
+ .if \vector == X86_TRAP_BP
+ /*
+ * If coming from kernel space, create a 6-word gap to allow the
+ * int3 handler to emulate a call instruction.
+ */
+ testb $3, CS-ORIG_RAX(%rsp)
+ jnz .Lfrom_usermode_no_gap_\@
+ .rept 6
+ pushq 5*8(%rsp)
+ .endr
+ UNWIND_HINT_IRET_REGS offset=8
+.Lfrom_usermode_no_gap_\@:
+ .endif
+
+ idtentry_body \cfunc \has_error_code
+
+_ASM_NOKPROBE(\asmsym)
+SYM_CODE_END(\asmsym)
+.endm
+
+/*
+ * Interrupt entry/exit.
+ *
+ + The interrupt stubs push (vector) onto the stack, which is the error_code
+ * position of idtentry exceptions, and jump to one of the two idtentry points
+ * (common/spurious).
+ *
+ * common_interrupt is a hotpath, align it to a cache line
+ */
+.macro idtentry_irq vector cfunc
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+ idtentry \vector asm_\cfunc \cfunc has_error_code=1
+.endm
+
+/*
+ * System vectors which invoke their handlers directly and are not
+ * going through the regular common device interrupt handling code.
+ */
+.macro idtentry_sysvec vector cfunc
+ idtentry \vector asm_\cfunc \cfunc has_error_code=0
+.endm
+
+/**
+ * idtentry_mce_db - Macro to generate entry stubs for #MC and #DB
+ * @vector: Vector number
+ * @asmsym: ASM symbol for the entry point
+ * @cfunc: C function to be called
+ *
+ * The macro emits code to set up the kernel context for #MC and #DB
+ *
+ * If the entry comes from user space it uses the normal entry path
+ * including the return to user space work and preemption checks on
+ * exit.
+ *
+ * If hits in kernel mode then it needs to go through the paranoid
+ * entry as the exception can hit any random state. No preemption
+ * check on exit to keep the paranoid path simple.
+ */
+.macro idtentry_mce_db vector asmsym cfunc
+SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS
+ ASM_CLAC
+
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
/*
- * Right now, if we just incremented irq_count to zero, we've
- * claimed the IRQ stack but we haven't switched to it yet.
- *
- * If anything is added that can interrupt us here without using IST,
- * it must be *extremely* careful to limit its stack usage. This
- * could include kprobes and a hypothetical future IST-less #DB
- * handler.
- *
- * The OOPS unwinder relies on the word at the top of the IRQ
- * stack linking back to the previous RSP for the entire time we're
- * on the IRQ stack. For this to work reliably, we need to write
- * it before we actually move ourselves to the IRQ stack.
+ * If the entry is from userspace, switch stacks and treat it as
+ * a normal entry.
*/
+ testb $3, CS-ORIG_RAX(%rsp)
+ jnz .Lfrom_usermode_switch_stack_\@
- movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
- movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
+ /* paranoid_entry returns GS information for paranoid_exit in EBX. */
+ call paranoid_entry
-#ifdef CONFIG_DEBUG_ENTRY
+ UNWIND_HINT_REGS
+
+ movq %rsp, %rdi /* pt_regs pointer */
+
+ call \cfunc
+
+ jmp paranoid_exit
+
+ /* Switch to the regular task stack and use the noist entry point */
+.Lfrom_usermode_switch_stack_\@:
+ idtentry_body noist_\cfunc, has_error_code=0
+
+_ASM_NOKPROBE(\asmsym)
+SYM_CODE_END(\asmsym)
+.endm
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/**
+ * idtentry_vc - Macro to generate entry stub for #VC
+ * @vector: Vector number
+ * @asmsym: ASM symbol for the entry point
+ * @cfunc: C function to be called
+ *
+ * The macro emits code to set up the kernel context for #VC. The #VC handler
+ * runs on an IST stack and needs to be able to cause nested #VC exceptions.
+ *
+ * To make this work the #VC entry code tries its best to pretend it doesn't use
+ * an IST stack by switching to the task stack if coming from user-space (which
+ * includes early SYSCALL entry path) or back to the stack in the IRET frame if
+ * entered from kernel-mode.
+ *
+ * If entered from kernel-mode the return stack is validated first, and if it is
+ * not safe to use (e.g. because it points to the entry stack) the #VC handler
+ * will switch to a fall-back stack (VC2) and call a special handler function.
+ *
+ * The macro is only used for one vector, but it is planned to be extended in
+ * the future for the #HV exception.
+ */
+.macro idtentry_vc vector asmsym cfunc
+SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS
+ ASM_CLAC
+
/*
- * If the first movq above becomes wrong due to IRQ stack layout
- * changes, the only way we'll notice is if we try to unwind right
- * here. Assert that we set up the stack right to catch this type
- * of bug quickly.
+ * If the entry is from userspace, switch stacks and treat it as
+ * a normal entry.
*/
- cmpq -8(%rsp), \old_rsp
- je .Lirq_stack_okay\@
- ud2
- .Lirq_stack_okay\@:
+ testb $3, CS-ORIG_RAX(%rsp)
+ jnz .Lfrom_usermode_switch_stack_\@
+
+ /*
+ * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX.
+ * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS
+ */
+ call paranoid_entry
+
+ UNWIND_HINT_REGS
+
+ /*
+ * Switch off the IST stack to make it free for nested exceptions. The
+ * vc_switch_off_ist() function will switch back to the interrupted
+ * stack if it is safe to do so. If not it switches to the VC fall-back
+ * stack.
+ */
+ movq %rsp, %rdi /* pt_regs pointer */
+ call vc_switch_off_ist
+ movq %rax, %rsp /* Switch to new stack */
+
+ UNWIND_HINT_REGS
+
+ /* Update pt_regs */
+ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+
+ movq %rsp, %rdi /* pt_regs pointer */
+
+ call kernel_\cfunc
+
+ /*
+ * No need to switch back to the IST stack. The current stack is either
+ * identical to the stack in the IRET frame or the VC fall-back stack,
+ * so it is definitly mapped even with PTI enabled.
+ */
+ jmp paranoid_exit
+
+ /* Switch to the regular task stack */
+.Lfrom_usermode_switch_stack_\@:
+ idtentry_body user_\cfunc, has_error_code=1
+
+_ASM_NOKPROBE(\asmsym)
+SYM_CODE_END(\asmsym)
+.endm
#endif
-.Lirq_stack_push_old_rsp_\@:
- pushq \old_rsp
-
- .if \regs
- UNWIND_HINT_REGS indirect=1
- .endif
-
- .if \save_ret
- /*
- * Push the return address to the stack. This return address can
- * be found at the "real" original RSP, which was offset by 8 at
- * the beginning of this macro.
- */
- pushq -8(\old_rsp)
- .endif
-.endm
-
/*
- * Undoes ENTER_IRQ_STACK.
+ * Double fault entry. Straight paranoid. No checks from which context
+ * this comes because for the espfix induced #DF this would do the wrong
+ * thing.
*/
-.macro LEAVE_IRQ_STACK regs=1
- DEBUG_ENTRY_ASSERT_IRQS_OFF
- /* We need to be off the IRQ stack before decrementing irq_count. */
- popq %rsp
-
- .if \regs
- UNWIND_HINT_REGS
- .endif
-
- /*
- * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
- * the irq stack but we're not on it.
- */
-
- decl PER_CPU_VAR(irq_count)
-.endm
-
-/*
- * Interrupt entry helper function.
- *
- * Entry runs with interrupts off. Stack layout at entry:
- * +----------------------------------------------------+
- * | regs->ss |
- * | regs->rsp |
- * | regs->eflags |
- * | regs->cs |
- * | regs->ip |
- * +----------------------------------------------------+
- * | regs->orig_ax = ~(interrupt number) |
- * +----------------------------------------------------+
- * | return address |
- * +----------------------------------------------------+
- */
-ENTRY(interrupt_entry)
- UNWIND_HINT_IRET_REGS offset=16
+.macro idtentry_df vector asmsym cfunc
+SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS offset=8
ASM_CLAC
- cld
- testb $3, CS-ORIG_RAX+8(%rsp)
- jz 1f
- SWAPGS
- FENCE_SWAPGS_USER_ENTRY
- /*
- * Switch to the thread stack. The IRET frame and orig_ax are
- * on the stack, as well as the return address. RDI..R12 are
- * not (yet) on the stack and space has not (yet) been
- * allocated for them.
- */
- pushq %rdi
+ /* paranoid_entry returns GS information for paranoid_exit in EBX. */
+ call paranoid_entry
+ UNWIND_HINT_REGS
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
- movq %rsp, %rdi
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ movq %rsp, %rdi /* pt_regs pointer into first argument */
+ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+ call \cfunc
- /*
- * We have RDI, return address, and orig_ax on the stack on
- * top of the IRET frame. That means offset=24
- */
- UNWIND_HINT_IRET_REGS base=%rdi offset=24
+ jmp paranoid_exit
- pushq 7*8(%rdi) /* regs->ss */
- pushq 6*8(%rdi) /* regs->rsp */
- pushq 5*8(%rdi) /* regs->eflags */
- pushq 4*8(%rdi) /* regs->cs */
- pushq 3*8(%rdi) /* regs->ip */
- UNWIND_HINT_IRET_REGS
- pushq 2*8(%rdi) /* regs->orig_ax */
- pushq 8(%rdi) /* return address */
-
- movq (%rdi), %rdi
- jmp 2f
-1:
- FENCE_SWAPGS_KERNEL_ENTRY
-2:
- PUSH_AND_CLEAR_REGS save_ret=1
- ENCODE_FRAME_POINTER 8
-
- testb $3, CS+8(%rsp)
- jz 1f
-
- /*
- * IRQ from user mode.
- *
- * We need to tell lockdep that IRQs are off. We can't do this until
- * we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
- * the simplest way to handle it is to just call it twice if
- * we enter from user mode. There's no reason to optimize this since
- * TRACE_IRQS_OFF is a no-op if lockdep is off.
- */
- TRACE_IRQS_OFF
-
- CALL_enter_from_user_mode
-
-1:
- ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
- /* We entered an interrupt context - irqs are off: */
- TRACE_IRQS_OFF
-
- ret
-END(interrupt_entry)
-_ASM_NOKPROBE(interrupt_entry)
-
-
-/* Interrupt entry/exit. */
+_ASM_NOKPROBE(\asmsym)
+SYM_CODE_END(\asmsym)
+.endm
/*
- * The interrupt stubs push (~vector+0x80) onto the stack and
- * then jump to common_spurious/interrupt.
+ * Include the defines which emit the idt entries which are shared
+ * shared between 32 and 64 bit and emit the __irqentry_text_* markers
+ * so the stacktrace boundary checks work.
*/
-common_spurious:
- addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
- call interrupt_entry
- UNWIND_HINT_REGS indirect=1
- call smp_spurious_interrupt /* rdi points to pt_regs */
- jmp ret_from_intr
-END(common_spurious)
-_ASM_NOKPROBE(common_spurious)
+ .align 16
+ .globl __irqentry_text_start
+__irqentry_text_start:
-/* common_interrupt is a hotpath. Align it */
- .p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
- addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
- call interrupt_entry
- UNWIND_HINT_REGS indirect=1
- call do_IRQ /* rdi points to pt_regs */
- /* 0(%rsp): old RSP */
-ret_from_intr:
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
+#include <asm/idtentry.h>
- LEAVE_IRQ_STACK
+ .align 16
+ .globl __irqentry_text_end
+__irqentry_text_end:
- testb $3, CS(%rsp)
- jz retint_kernel
-
- /* Interrupt came from user space */
-GLOBAL(retint_user)
- mov %rsp,%rdi
- call prepare_exit_to_usermode
- TRACE_IRQS_IRETQ
-
-GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+SYM_CODE_START_LOCAL(common_interrupt_return)
+SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
@@ -629,6 +575,10 @@
ud2
1:
#endif
+#ifdef CONFIG_XEN_PV
+ ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
+#endif
+
POP_REGS pop_rdi=0
/*
@@ -663,24 +613,7 @@
INTERRUPT_RETURN
-/* Returning to kernel space */
-retint_kernel:
-#ifdef CONFIG_PREEMPTION
- /* Interrupts are off */
- /* Check if we need preemption */
- btl $9, EFLAGS(%rsp) /* were interrupts off? */
- jnc 1f
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz 1f
- call preempt_schedule_irq
-1:
-#endif
- /*
- * The iretq could re-enable interrupts:
- */
- TRACE_IRQS_IRETQ
-
-GLOBAL(restore_regs_and_return_to_kernel)
+SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates kernel mode. */
testb $3, CS(%rsp)
@@ -696,7 +629,7 @@
*/
INTERRUPT_RETURN
-ENTRY(native_iret)
+SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL)
UNWIND_HINT_IRET_REGS
/*
* Are we returning to a stack segment from the LDT? Note: in
@@ -707,12 +640,11 @@
jnz native_irq_return_ldt
#endif
-.global native_irq_return_iret
-native_irq_return_iret:
+SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL)
/*
* This may fault. Non-paranoid faults on return to userspace are
* handled by fixup_bad_iret. These include #SS, #GP, and #NP.
- * Double-faults due to espfix64 are handled in do_double_fault.
+ * Double-faults due to espfix64 are handled in exc_double_fault.
* Other faults here are fatal.
*/
iretq
@@ -741,7 +673,7 @@
*/
pushq %rdi /* Stash user RDI */
- SWAPGS /* to kernel GS */
+ swapgs /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
movq PER_CPU_VAR(espfix_waddr), %rdi
@@ -771,7 +703,7 @@
orq PER_CPU_VAR(espfix_stack), %rax
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
- SWAPGS /* to user GS */
+ swapgs /* to user GS */
popq %rdi /* Restore user RDI */
movq %rax, %rsp
@@ -790,280 +722,32 @@
*/
jmp native_irq_return_iret
#endif
-END(common_interrupt)
-_ASM_NOKPROBE(common_interrupt)
+SYM_CODE_END(common_interrupt_return)
+_ASM_NOKPROBE(common_interrupt_return)
/*
- * APIC interrupts.
+ * Reload gs selector with exception handling
+ * edi: new selector
+ *
+ * Is in entry.text as it shouldn't be instrumented.
*/
-.macro apicinterrupt3 num sym do_sym
-ENTRY(\sym)
- UNWIND_HINT_IRET_REGS
- pushq $~(\num)
-.Lcommon_\sym:
- call interrupt_entry
- UNWIND_HINT_REGS indirect=1
- call \do_sym /* rdi points to pt_regs */
- jmp ret_from_intr
-END(\sym)
-_ASM_NOKPROBE(\sym)
-.endm
-
-/* Make sure APIC interrupt handlers end up in the irqentry section: */
-#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
-#define POP_SECTION_IRQENTRY .popsection
-
-.macro apicinterrupt num sym do_sym
-PUSH_SECTION_IRQENTRY
-apicinterrupt3 \num \sym \do_sym
-POP_SECTION_IRQENTRY
-.endm
-
-#ifdef CONFIG_SMP
-apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt
-#endif
-
-#ifdef CONFIG_X86_UV
-apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
-#endif
-
-apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt
-apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi
-
-#ifdef CONFIG_HAVE_KVM
-apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
-apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
-apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi
-#endif
-
-#ifdef CONFIG_X86_MCE_THRESHOLD
-apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt
-#endif
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt
-#endif
-
-#ifdef CONFIG_SMP
-apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt
-apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt
-apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
-#endif
-
-apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
-apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
-
-#ifdef CONFIG_IRQ_WORK
-apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
-#endif
-
-/*
- * Exception entry points.
- */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
-
-.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
-
- .if \paranoid
- call paranoid_entry
- /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
- .else
- call error_entry
- .endif
- UNWIND_HINT_REGS
-
- .if \read_cr2
- /*
- * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
- * intermediate storage as RDX can be clobbered in enter_from_user_mode().
- * GET_CR2_INTO can clobber RAX.
- */
- GET_CR2_INTO(%r12);
- .endif
-
- .if \shift_ist != -1
- TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
- .else
- TRACE_IRQS_OFF
- .endif
-
- .if \paranoid == 0
- testb $3, CS(%rsp)
- jz .Lfrom_kernel_no_context_tracking_\@
- CALL_enter_from_user_mode
-.Lfrom_kernel_no_context_tracking_\@:
- .endif
-
- movq %rsp, %rdi /* pt_regs pointer */
-
- .if \has_error_code
- movq ORIG_RAX(%rsp), %rsi /* get error code */
- movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- .else
- xorl %esi, %esi /* no error code */
- .endif
-
- .if \shift_ist != -1
- subq $\ist_offset, CPU_TSS_IST(\shift_ist)
- .endif
-
- .if \read_cr2
- movq %r12, %rdx /* Move CR2 into 3rd argument */
- .endif
-
- call \do_sym
-
- .if \shift_ist != -1
- addq $\ist_offset, CPU_TSS_IST(\shift_ist)
- .endif
-
- .if \paranoid
- /* this procedure expect "no swapgs" flag in ebx */
- jmp paranoid_exit
- .else
- jmp error_exit
- .endif
-
-.endm
-
-/**
- * idtentry - Generate an IDT entry stub
- * @sym: Name of the generated entry point
- * @do_sym: C function to be called
- * @has_error_code: True if this IDT vector has an error code on the stack
- * @paranoid: non-zero means that this vector may be invoked from
- * kernel mode with user GSBASE and/or user CR3.
- * 2 is special -- see below.
- * @shift_ist: Set to an IST index if entries from kernel mode should
- * decrement the IST stack so that nested entries get a
- * fresh stack. (This is for #DB, which has a nasty habit
- * of recursing.)
- * @create_gap: create a 6-word stack gap when coming from kernel mode.
- * @read_cr2: load CR2 into the 3rd argument; done before calling any C code
- *
- * idtentry generates an IDT stub that sets up a usable kernel context,
- * creates struct pt_regs, and calls @do_sym. The stub has the following
- * special behaviors:
- *
- * On an entry from user mode, the stub switches from the trampoline or
- * IST stack to the normal thread stack. On an exit to user mode, the
- * normal exit-to-usermode path is invoked.
- *
- * On an exit to kernel mode, if @paranoid == 0, we check for preemption,
- * whereas we omit the preemption check if @paranoid != 0. This is purely
- * because the implementation is simpler this way. The kernel only needs
- * to check for asynchronous kernel preemption when IRQ handlers return.
- *
- * If @paranoid == 0, then the stub will handle IRET faults by pretending
- * that the fault came from user mode. It will handle gs_change faults by
- * pretending that the fault happened with kernel GSBASE. Since this handling
- * is omitted for @paranoid != 0, the #GP, #SS, and #NP stubs must have
- * @paranoid == 0. This special handling will do the wrong thing for
- * espfix-induced #DF on IRET, so #DF must not use @paranoid == 0.
- *
- * @paranoid == 2 is special: the stub will never switch stacks. This is for
- * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
- */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
-ENTRY(\sym)
- UNWIND_HINT_IRET_REGS offset=\has_error_code*8
-
- /* Sanity check */
- .if \shift_ist != -1 && \paranoid != 1
- .error "using shift_ist requires paranoid=1"
- .endif
-
- .if \create_gap && \paranoid
- .error "using create_gap requires paranoid=0"
- .endif
-
- ASM_CLAC
-
- .if \has_error_code == 0
- pushq $-1 /* ORIG_RAX: no syscall to restart */
- .endif
-
- .if \paranoid == 1
- testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
- jnz .Lfrom_usermode_switch_stack_\@
- .endif
-
- .if \create_gap == 1
- /*
- * If coming from kernel space, create a 6-word gap to allow the
- * int3 handler to emulate a call instruction.
- */
- testb $3, CS-ORIG_RAX(%rsp)
- jnz .Lfrom_usermode_no_gap_\@
- .rept 6
- pushq 5*8(%rsp)
- .endr
- UNWIND_HINT_IRET_REGS offset=8
-.Lfrom_usermode_no_gap_\@:
- .endif
-
- idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
-
- .if \paranoid == 1
- /*
- * Entry from userspace. Switch stacks and treat it
- * as a normal entry. This means that paranoid handlers
- * run in real process context if user_mode(regs).
- */
-.Lfrom_usermode_switch_stack_\@:
- idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
- .endif
-
-_ASM_NOKPROBE(\sym)
-END(\sym)
-.endm
-
-idtentry divide_error do_divide_error has_error_code=0
-idtentry overflow do_overflow has_error_code=0
-idtentry bounds do_bounds has_error_code=0
-idtentry invalid_op do_invalid_op has_error_code=0
-idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1
-idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
-idtentry invalid_TSS do_invalid_TSS has_error_code=1
-idtentry segment_not_present do_segment_not_present has_error_code=1
-idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
-idtentry coprocessor_error do_coprocessor_error has_error_code=0
-idtentry alignment_check do_alignment_check has_error_code=1
-idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
-
-
- /*
- * Reload gs selector with exception handling
- * edi: new selector
- */
-ENTRY(native_load_gs_index)
+SYM_FUNC_START(asm_load_gs_index)
FRAME_BEGIN
- pushfq
- DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
- TRACE_IRQS_OFF
- SWAPGS
+ swapgs
.Lgs_change:
movl %edi, %gs
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
- SWAPGS
- TRACE_IRQS_FLAGS (%rsp)
- popfq
+ swapgs
FRAME_END
ret
-ENDPROC(native_load_gs_index)
-EXPORT_SYMBOL(native_load_gs_index)
+SYM_FUNC_END(asm_load_gs_index)
+EXPORT_SYMBOL(asm_load_gs_index)
_ASM_EXTABLE(.Lgs_change, .Lbad_gs)
.section .fixup, "ax"
/* running with kernelgs */
-.Lbad_gs:
- SWAPGS /* switch back to user gs */
+SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
+ swapgs /* switch back to user gs */
.macro ZAP_GS
/* This can't be a string because the preprocessor needs to see it. */
movl $__USER_DS, %eax
@@ -1073,22 +757,51 @@
xorl %eax, %eax
movl %eax, %gs
jmp 2b
+SYM_CODE_END(.Lbad_gs)
.previous
-/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(do_softirq_own_stack)
- pushq %rbp
- mov %rsp, %rbp
- ENTER_IRQ_STACK regs=0 old_rsp=%r11
- call __do_softirq
- LEAVE_IRQ_STACK regs=0
+/*
+ * rdi: New stack pointer points to the top word of the stack
+ * rsi: Function pointer
+ * rdx: Function argument (can be NULL if none)
+ */
+SYM_FUNC_START(asm_call_on_stack)
+SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL)
+SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL)
+ /*
+ * Save the frame pointer unconditionally. This allows the ORC
+ * unwinder to handle the stack switch.
+ */
+ pushq %rbp
+ mov %rsp, %rbp
+
+ /*
+ * The unwinder relies on the word at the top of the new stack
+ * page linking back to the previous RSP.
+ */
+ mov %rsp, (%rdi)
+ mov %rdi, %rsp
+ /* Move the argument to the right place */
+ mov %rdx, %rdi
+
+1:
+ .pushsection .discard.instr_begin
+ .long 1b - .
+ .popsection
+
+ CALL_NOSPEC rsi
+
+2:
+ .pushsection .discard.instr_end
+ .long 2b - .
+ .popsection
+
+ /* Restore the previous stack pointer from RBP. */
leaveq
ret
-ENDPROC(do_softirq_own_stack)
+SYM_FUNC_END(asm_call_on_stack)
#ifdef CONFIG_XEN_PV
-idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
-
/*
* A note on the "critical region" in our callback handler.
* We want to avoid stacking callback handlers due to events occurring
@@ -1101,8 +814,10 @@
* So, on entry to the handler we detect whether we interrupted an
* existing activation in its critical region -- if so, we pop the current
* activation and restart the handler using the previous one.
+ *
+ * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs)
*/
-ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
+SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback)
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
@@ -1112,15 +827,10 @@
movq %rdi, %rsp /* we don't return, adjust the stack frame */
UNWIND_HINT_REGS
- ENTER_IRQ_STACK old_rsp=%r10
- call xen_evtchn_do_upcall
- LEAVE_IRQ_STACK
+ call xen_pv_evtchn_do_upcall
-#ifndef CONFIG_PREEMPTION
- call xen_maybe_preempt_hcall
-#endif
- jmp error_exit
-END(xen_do_hypervisor_callback)
+ jmp error_return
+SYM_CODE_END(exc_xen_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
@@ -1135,7 +845,7 @@
* We distinguish between categories by comparing each saved segment register
* with its current contents: any discrepancy means we in category 1.
*/
-ENTRY(xen_failsafe_callback)
+SYM_CODE_START(xen_failsafe_callback)
UNWIND_HINT_EMPTY
movl %ds, %ecx
cmpw %cx, 0x10(%rsp)
@@ -1155,7 +865,7 @@
addq $0x30, %rsp
pushq $0 /* RIP */
UNWIND_HINT_IRET_REGS offset=8
- jmp general_protection
+ jmp asm_exc_general_protection
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq (%rsp), %rcx
movq 8(%rsp), %r11
@@ -1164,71 +874,26 @@
pushq $-1 /* orig_ax = -1 => not a system call */
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
- jmp error_exit
-END(xen_failsafe_callback)
+ jmp error_return
+SYM_CODE_END(xen_failsafe_callback)
#endif /* CONFIG_XEN_PV */
-#ifdef CONFIG_XEN_PVHVM
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
- xen_hvm_callback_vector xen_evtchn_do_upcall
-#endif
-
-
-#if IS_ENABLED(CONFIG_HYPERV)
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
- hyperv_callback_vector hyperv_vector_handler
-
-apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
- hyperv_reenlightenment_vector hyperv_reenlightenment_intr
-
-apicinterrupt3 HYPERV_STIMER0_VECTOR \
- hv_stimer0_callback_vector hv_stimer0_vector_handler
-#endif /* CONFIG_HYPERV */
-
-#if IS_ENABLED(CONFIG_ACRN_GUEST)
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
- acrn_hv_callback_vector acrn_hv_vector_handler
-#endif
-
-idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
-idtentry int3 do_int3 has_error_code=0 create_gap=1
-idtentry stack_segment do_stack_segment has_error_code=1
-
-#ifdef CONFIG_XEN_PV
-idtentry xennmi do_nmi has_error_code=0
-idtentry xendebug do_debug has_error_code=0
-#endif
-
-idtentry general_protection do_general_protection has_error_code=1
-idtentry page_fault do_page_fault has_error_code=1 read_cr2=1
-
-#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1
-#endif
-
-#ifdef CONFIG_X86_MCE
-idtentry machine_check do_mce has_error_code=0 paranoid=1
-#endif
-
/*
- * Save all registers in pt_regs, and switch gs if needed.
- * Use slow, but surefire "are we in kernel?" check.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ * Save all registers in pt_regs. Return GSBASE related information
+ * in EBX depending on the availability of the FSGSBASE instructions:
+ *
+ * FSGSBASE R/EBX
+ * N 0 -> SWAPGS on exit
+ * 1 -> no SWAPGS on exit
+ *
+ * Y GSBASE value at entry, must be restored in paranoid_exit
*/
-ENTRY(paranoid_entry)
+SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
cld
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
- movl $1, %ebx
- movl $MSR_GS_BASE, %ecx
- rdmsr
- testl %edx, %edx
- js 1f /* negative -> in kernel */
- SWAPGS
- xorl %ebx, %ebx
-1:
/*
* Always stash CR3 in %r14. This value will be restored,
* verbatim, at exit. Needed if paranoid_entry interrupted
@@ -1238,18 +903,57 @@
* This is also why CS (stashed in the "iret frame" by the
* hardware at entry) can not be used: this may be a return
* to kernel code, but with a user CR3 value.
+ *
+ * Switching CR3 does not depend on kernel GSBASE so it can
+ * be done before switching to the kernel GSBASE. This is
+ * required for FSGSBASE because the kernel GSBASE has to
+ * be retrieved from a kernel internal table.
*/
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
/*
- * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
- * unconditional CR3 write, even in the PTI case. So do an lfence
- * to prevent GS speculation, regardless of whether PTI is enabled.
+ * Handling GSBASE depends on the availability of FSGSBASE.
+ *
+ * Without FSGSBASE the kernel enforces that negative GSBASE
+ * values indicate kernel GSBASE. With FSGSBASE no assumptions
+ * can be made about the GSBASE value when entering from user
+ * space.
*/
- FENCE_SWAPGS_KERNEL_ENTRY
+ ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
+ /*
+ * Read the current GSBASE and store it in %rbx unconditionally,
+ * retrieve and set the current CPUs kernel GSBASE. The stored value
+ * has to be restored in paranoid_exit unconditionally.
+ *
+ * The unconditional write to GS base below ensures that no subsequent
+ * loads based on a mispredicted GS base can happen, therefore no LFENCE
+ * is needed here.
+ */
+ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
ret
-END(paranoid_entry)
+
+.Lparanoid_entry_checkgs:
+ /* EBX = 1 -> kernel GSBASE active, no restore required */
+ movl $1, %ebx
+
+ /*
+ * The kernel-enforced convention is a negative GSBASE indicates
+ * a kernel value. No SWAPGS needed on entry and exit.
+ */
+ movl $MSR_GS_BASE, %ecx
+ rdmsr
+ testl %edx, %edx
+ js .Lparanoid_kernel_gsbase
+
+ /* EBX = 0 -> SWAPGS required on exit */
+ xorl %ebx, %ebx
+ swapgs
+.Lparanoid_kernel_gsbase:
+
+ FENCE_SWAPGS_KERNEL_ENTRY
+ ret
+SYM_CODE_END(paranoid_entry)
/*
* "Paranoid" exit path from exception stack. This is invoked
@@ -1258,34 +962,51 @@
*
* We may be returning to very strange contexts (e.g. very early
* in syscall entry), so checking for preemption here would
- * be complicated. Fortunately, we there's no good reason
- * to try to handle preemption here.
+ * be complicated. Fortunately, there's no good reason to try
+ * to handle preemption here.
*
- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
+ * R/EBX contains the GSBASE related information depending on the
+ * availability of the FSGSBASE instructions:
+ *
+ * FSGSBASE R/EBX
+ * N 0 -> SWAPGS on exit
+ * 1 -> no SWAPGS on exit
+ *
+ * Y User space GSBASE, must be restored unconditionally
*/
-ENTRY(paranoid_exit)
+SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF_DEBUG
- testl %ebx, %ebx /* swapgs needed? */
- jnz .Lparanoid_exit_no_swapgs
- TRACE_IRQS_IRETQ
- /* Always restore stashed CR3 value (see paranoid_entry) */
- RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
- SWAPGS_UNSAFE_STACK
- jmp .Lparanoid_exit_restore
-.Lparanoid_exit_no_swapgs:
- TRACE_IRQS_IRETQ_DEBUG
- /* Always restore stashed CR3 value (see paranoid_entry) */
- RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
-.Lparanoid_exit_restore:
- jmp restore_regs_and_return_to_kernel
-END(paranoid_exit)
+ /*
+ * The order of operations is important. RESTORE_CR3 requires
+ * kernel GSBASE.
+ *
+ * NB to anyone to try to optimize this code: this code does
+ * not execute at all for exceptions from user mode. Those
+ * exceptions go through error_exit instead.
+ */
+ RESTORE_CR3 scratch_reg=%rax save_reg=%r14
+
+ /* Handle the three GSBASE cases */
+ ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
+
+ /* With FSGSBASE enabled, unconditionally restore GSBASE */
+ wrgsbase %rbx
+ jmp restore_regs_and_return_to_kernel
+
+.Lparanoid_exit_checkgs:
+ /* On non-FSGSBASE systems, conditionally do SWAPGS */
+ testl %ebx, %ebx
+ jnz restore_regs_and_return_to_kernel
+
+ /* We are returning to a context with user GSBASE */
+ swapgs
+ jmp restore_regs_and_return_to_kernel
+SYM_CODE_END(paranoid_exit)
/*
* Save all registers in pt_regs, and switch GS if needed.
*/
-ENTRY(error_entry)
+SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
cld
PUSH_AND_CLEAR_REGS save_ret=1
@@ -1312,11 +1033,6 @@
pushq %r12
ret
-.Lerror_entry_done_lfence:
- FENCE_SWAPGS_KERNEL_ENTRY
-.Lerror_entry_done:
- ret
-
/*
* There are two places in the kernel that can potentially fault with
* usergs. Handle them here. B stepping K8s sometimes report a
@@ -1339,9 +1055,14 @@
* .Lgs_change's error handler with kernel gsbase.
*/
SWAPGS
- FENCE_SWAPGS_USER_ENTRY
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
- jmp .Lerror_entry_done
+
+ /*
+ * Issue an LFENCE to prevent GS speculation, regardless of whether it is a
+ * kernel or user gsbase.
+ */
+.Lerror_entry_done_lfence:
+ FENCE_SWAPGS_KERNEL_ENTRY
+ ret
.Lbstep_iret:
/* Fix truncated RIP */
@@ -1365,16 +1086,15 @@
call fixup_bad_iret
mov %rax, %rsp
jmp .Lerror_entry_from_usermode_after_swapgs
-END(error_entry)
+SYM_CODE_END(error_entry)
-ENTRY(error_exit)
+SYM_CODE_START_LOCAL(error_return)
UNWIND_HINT_REGS
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
testb $3, CS(%rsp)
- jz retint_kernel
- jmp retint_user
-END(error_exit)
+ jz restore_regs_and_return_to_kernel
+ jmp swapgs_restore_regs_and_return_to_usermode
+SYM_CODE_END(error_return)
/*
* Runs on exception stack. Xen PV does not go through this path at all,
@@ -1384,7 +1104,7 @@
* %r14: Used to save/restore the CR3 of the interrupted context
* when PAGE_TABLE_ISOLATION is in use. Do not clobber.
*/
-ENTRY(nmi)
+SYM_CODE_START(asm_exc_nmi)
UNWIND_HINT_IRET_REGS
/*
@@ -1469,7 +1189,7 @@
movq %rsp, %rdi
movq $-1, %rsi
- call do_nmi
+ call exc_nmi
/*
* Return back to user mode. We must *not* do the normal exit
@@ -1526,7 +1246,7 @@
* end_repeat_nmi, then we are a nested NMI. We must not
* modify the "iret" frame because it's being written by
* the outer NMI. That's okay; the outer NMI handler is
- * about to about to call do_nmi anyway, so we can just
+ * about to about to call exc_nmi() anyway, so we can just
* resume the outer NMI.
*/
@@ -1645,7 +1365,7 @@
* RSP is pointing to "outermost RIP". gsbase is unknown, but, if
* we're repeating an NMI, gsbase has the same value that it had on
* the first iteration. paranoid_entry will load the kernel
- * gsbase if needed before we call do_nmi. "NMI executing"
+ * gsbase if needed before we call exc_nmi(). "NMI executing"
* is zero.
*/
movq $1, 10*8(%rsp) /* Set "NMI executing". */
@@ -1679,18 +1399,34 @@
call paranoid_entry
UNWIND_HINT_REGS
- /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp, %rdi
movq $-1, %rsi
- call do_nmi
+ call exc_nmi
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
- testl %ebx, %ebx /* swapgs needed? */
+ /*
+ * The above invocation of paranoid_entry stored the GSBASE
+ * related information in R/EBX depending on the availability
+ * of FSGSBASE.
+ *
+ * If FSGSBASE is enabled, restore the saved GSBASE value
+ * unconditionally, otherwise take the conditional SWAPGS path.
+ */
+ ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE
+
+ wrgsbase %rbx
+ jmp nmi_restore
+
+nmi_no_fsgsbase:
+ /* EBX == 0 -> invoke SWAPGS */
+ testl %ebx, %ebx
jnz nmi_restore
+
nmi_swapgs:
- SWAPGS_UNSAFE_STACK
+ swapgs
+
nmi_restore:
POP_REGS
@@ -1719,21 +1455,22 @@
* about espfix64 on the way back to kernel mode.
*/
iretq
-END(nmi)
+SYM_CODE_END(asm_exc_nmi)
#ifndef CONFIG_IA32_EMULATION
/*
* This handles SYSCALL from 32-bit code. There is no way to program
* MSRs to fully disable 32-bit SYSCALL.
*/
-ENTRY(ignore_sysret)
+SYM_CODE_START(ignore_sysret)
UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
- sysret
-END(ignore_sysret)
+ sysretl
+SYM_CODE_END(ignore_sysret)
#endif
-ENTRY(rewind_stack_do_exit)
+.pushsection .text, "ax"
+SYM_CODE_START(rewind_stack_do_exit)
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
@@ -1743,4 +1480,5 @@
UNWIND_HINT_REGS
call do_exit
-END(rewind_stack_do_exit)
+SYM_CODE_END(rewind_stack_do_exit)
+.popsection
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 3991377..0051cf5 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -46,15 +46,32 @@
* ebp user stack
* 0(%ebp) arg6
*/
-ENTRY(entry_SYSENTER_compat)
+SYM_CODE_START(entry_SYSENTER_compat)
+ UNWIND_HINT_EMPTY
/* Interrupts are off on entry. */
SWAPGS
- /* We are about to clobber %rsp anyway, clobbering here is OK */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+ pushq %rax
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ popq %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ /* Construct struct pt_regs on stack */
+ pushq $__USER32_DS /* pt_regs->ss */
+ pushq $0 /* pt_regs->sp = 0 (placeholder) */
+
+ /*
+ * Push flags. This is nasty. First, interrupts are currently
+ * off, but we need pt_regs->flags to have IF set. Second, if TS
+ * was set in usermode, it's still set, and we're singlestepping
+ * through this code. do_SYSENTER_32() will fix up IF.
+ */
+ pushfq /* pt_regs->flags (except IF = 0) */
+ pushq $__USER32_CS /* pt_regs->cs */
+ pushq $0 /* pt_regs->ip = 0 (placeholder) */
+SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+
/*
* User tracing code (ptrace or signal handlers) might assume that
* the saved RAX contains a 32-bit number when we're invoking a 32-bit
@@ -64,20 +81,6 @@
*/
movl %eax, %eax
- /* Construct struct pt_regs on stack */
- pushq $__USER32_DS /* pt_regs->ss */
- pushq %rbp /* pt_regs->sp (stashed in bp) */
-
- /*
- * Push flags. This is nasty. First, interrupts are currently
- * off, but we need pt_regs->flags to have IF set. Second, even
- * if TF was set when SYSENTER started, it's clear by now. We fix
- * that later using TIF_SINGLESTEP.
- */
- pushfq /* pt_regs->flags (except IF = 0) */
- orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
- pushq $__USER32_CS /* pt_regs->cs */
- pushq $0 /* pt_regs->ip = 0 (placeholder) */
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -104,6 +107,9 @@
xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
xorl %r15d, %r15d /* nospec r15 */
+
+ UNWIND_HINT_REGS
+
cld
/*
@@ -129,25 +135,19 @@
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
- /*
- * User mode is traced as though IRQs are on, and SYSENTER
- * turned them off.
- */
- TRACE_IRQS_OFF
-
movq %rsp, %rdi
- call do_fast_syscall_32
+ call do_SYSENTER_32
/* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
+ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
jmp sysret32_from_system_call
.Lsysenter_fix_flags:
pushq $X86_EFLAGS_FIXED
popfq
jmp .Lsysenter_flags_fixed
-GLOBAL(__end_entry_SYSENTER_compat)
-ENDPROC(entry_SYSENTER_compat)
+SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL)
+SYM_CODE_END(entry_SYSENTER_compat)
/*
* 32-bit SYSCALL entry.
@@ -196,7 +196,8 @@
* esp user stack
* 0(%esp) arg6
*/
-ENTRY(entry_SYSCALL_compat)
+SYM_CODE_START(entry_SYSCALL_compat)
+ UNWIND_HINT_EMPTY
/* Interrupts are off on entry. */
swapgs
@@ -209,13 +210,15 @@
/* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
pushq %r8 /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER32_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
-GLOBAL(entry_SYSCALL_compat_after_hwframe)
+SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
@@ -247,17 +250,13 @@
pushq $0 /* pt_regs->r15 = 0 */
xorl %r15d, %r15d /* nospec r15 */
- /*
- * User mode is traced as though IRQs are on, and SYSENTER
- * turned them off.
- */
- TRACE_IRQS_OFF
+ UNWIND_HINT_REGS
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
+ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
/* Opportunistic SYSRET */
sysret32_from_system_call:
@@ -266,7 +265,7 @@
* stack. So let's erase the thread stack right now.
*/
STACKLEAK_ERASE
- TRACE_IRQS_ON /* User mode traces as IRQs on. */
+
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@@ -311,7 +310,7 @@
xorl %r10d, %r10d
swapgs
sysretl
-END(entry_SYSCALL_compat)
+SYM_CODE_END(entry_SYSCALL_compat)
/*
* 32-bit legacy system call entry.
@@ -339,7 +338,8 @@
* edi arg5
* ebp arg6
*/
-ENTRY(entry_INT80_compat)
+SYM_CODE_START(entry_INT80_compat)
+ UNWIND_HINT_EMPTY
/*
* Interrupts are off on entry.
*/
@@ -361,8 +361,11 @@
/* Need to switch before accessing the thread stack. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+
/* In the Xen PV case we already run on the thread stack. */
- ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
+ ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
+
+ movq %rsp, %rdi
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq 6*8(%rdi) /* regs->ss */
@@ -401,19 +404,12 @@
xorl %r14d, %r14d /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
xorl %r15d, %r15d /* nospec r15 */
- cld
- /*
- * User mode is traced as though IRQs are on, and the interrupt
- * gate turned them off.
- */
- TRACE_IRQS_OFF
+ UNWIND_HINT_REGS
+
+ cld
movq %rsp, %rdi
call do_int80_syscall_32
-.Lsyscall_32_done:
-
- /* Go back to user mode. */
- TRACE_IRQS_ON
jmp swapgs_restore_regs_and_return_to_usermode
-END(entry_INT80_compat)
+SYM_CODE_END(entry_INT80_compat)
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 7d17b3a..86eb0d8 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -4,29 +4,22 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
-#include <asm/asm-offsets.h>
+#include <linux/syscalls.h>
+#include <asm/unistd.h>
#include <asm/syscall.h>
-#ifdef CONFIG_IA32_EMULATION
-/* On X86_64, we use struct pt_regs * to pass parameters to syscalls */
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
-#define __sys_ni_syscall __ia32_sys_ni_syscall
-#else /* CONFIG_IA32_EMULATION */
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-#define __sys_ni_syscall sys_ni_syscall
-#endif /* CONFIG_IA32_EMULATION */
+#define __SYSCALL_I386(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_I386(nr, sym) [nr] = __ia32_##sym,
-__visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = {
+__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
- [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
+ [0 ... __NR_ia32_syscall_max] = &__ia32_sys_ni_syscall,
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index adf619a..1594ec7 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -5,24 +5,17 @@
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
-#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
#include <asm/syscall.h>
-extern asmlinkage long sys_ni_syscall(void);
+#define __SYSCALL_X32(nr, sym)
+#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
-SYSCALL_DEFINE0(ni_syscall)
-{
- return sys_ni_syscall();
-}
-
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
-#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_64(nr, sym) extern long __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#undef __SYSCALL_X32
-#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
-#define __SYSCALL_X32(nr, sym, qual)
+#define __SYSCALL_64(nr, sym) [nr] = __x64_##sym,
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
@@ -32,25 +25,3 @@
[0 ... __NR_syscall_max] = &__x64_sys_ni_syscall,
#include <asm/syscalls_64.h>
};
-
-#undef __SYSCALL_64
-#undef __SYSCALL_X32
-
-#ifdef CONFIG_X86_X32_ABI
-
-#define __SYSCALL_64(nr, sym, qual)
-#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
-
-asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
- /*
- * Smells like a compiler bug -- it doesn't work
- * when the & below is removed.
- */
- [0 ... __NR_syscall_x32_max] = &__x64_sys_ni_syscall,
-#include <asm/syscalls_64.h>
-};
-
-#undef __SYSCALL_64
-#undef __SYSCALL_X32
-
-#endif
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
new file mode 100644
index 0000000..f2fe0a3
--- /dev/null
+++ b/arch/x86/entry/syscall_x32.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* System call table for x32 ABI. */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <linux/cache.h>
+#include <linux/syscalls.h>
+#include <asm/unistd.h>
+#include <asm/syscall.h>
+
+/*
+ * Reuse the 64-bit entry points for the x32 versions that occupy different
+ * slots in the syscall table.
+ */
+#define __x32_sys_readv __x64_sys_readv
+#define __x32_sys_writev __x64_sys_writev
+#define __x32_sys_getsockopt __x64_sys_getsockopt
+#define __x32_sys_setsockopt __x64_sys_setsockopt
+#define __x32_sys_vmsplice __x64_sys_vmsplice
+#define __x32_sys_process_vm_readv __x64_sys_process_vm_readv
+#define __x32_sys_process_vm_writev __x64_sys_process_vm_writev
+
+#define __SYSCALL_64(nr, sym)
+
+#define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct pt_regs *);
+#define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#include <asm/syscalls_64.h>
+#undef __SYSCALL_X32
+#undef __SYSCALL_COMMON
+
+#define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
+#define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
+
+asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_x32_syscall_max] = &__x64_sys_ni_syscall,
+#include <asm/syscalls_64.h>
+};
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 15908eb..0d0667a 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -11,432 +11,437 @@
#
# The abi is always "i386" for this file.
#
-0 i386 restart_syscall sys_restart_syscall __ia32_sys_restart_syscall
-1 i386 exit sys_exit __ia32_sys_exit
-2 i386 fork sys_fork __ia32_sys_fork
-3 i386 read sys_read __ia32_sys_read
-4 i386 write sys_write __ia32_sys_write
-5 i386 open sys_open __ia32_compat_sys_open
-6 i386 close sys_close __ia32_sys_close
-7 i386 waitpid sys_waitpid __ia32_sys_waitpid
-8 i386 creat sys_creat __ia32_sys_creat
-9 i386 link sys_link __ia32_sys_link
-10 i386 unlink sys_unlink __ia32_sys_unlink
-11 i386 execve sys_execve __ia32_compat_sys_execve
-12 i386 chdir sys_chdir __ia32_sys_chdir
-13 i386 time sys_time32 __ia32_sys_time32
-14 i386 mknod sys_mknod __ia32_sys_mknod
-15 i386 chmod sys_chmod __ia32_sys_chmod
-16 i386 lchown sys_lchown16 __ia32_sys_lchown16
+0 i386 restart_syscall sys_restart_syscall
+1 i386 exit sys_exit
+2 i386 fork sys_fork
+3 i386 read sys_read
+4 i386 write sys_write
+5 i386 open sys_open compat_sys_open
+6 i386 close sys_close
+7 i386 waitpid sys_waitpid
+8 i386 creat sys_creat
+9 i386 link sys_link
+10 i386 unlink sys_unlink
+11 i386 execve sys_execve compat_sys_execve
+12 i386 chdir sys_chdir
+13 i386 time sys_time32
+14 i386 mknod sys_mknod
+15 i386 chmod sys_chmod
+16 i386 lchown sys_lchown16
17 i386 break
-18 i386 oldstat sys_stat __ia32_sys_stat
-19 i386 lseek sys_lseek __ia32_compat_sys_lseek
-20 i386 getpid sys_getpid __ia32_sys_getpid
-21 i386 mount sys_mount __ia32_compat_sys_mount
-22 i386 umount sys_oldumount __ia32_sys_oldumount
-23 i386 setuid sys_setuid16 __ia32_sys_setuid16
-24 i386 getuid sys_getuid16 __ia32_sys_getuid16
-25 i386 stime sys_stime32 __ia32_sys_stime32
-26 i386 ptrace sys_ptrace __ia32_compat_sys_ptrace
-27 i386 alarm sys_alarm __ia32_sys_alarm
-28 i386 oldfstat sys_fstat __ia32_sys_fstat
-29 i386 pause sys_pause __ia32_sys_pause
-30 i386 utime sys_utime32 __ia32_sys_utime32
+18 i386 oldstat sys_stat
+19 i386 lseek sys_lseek compat_sys_lseek
+20 i386 getpid sys_getpid
+21 i386 mount sys_mount
+22 i386 umount sys_oldumount
+23 i386 setuid sys_setuid16
+24 i386 getuid sys_getuid16
+25 i386 stime sys_stime32
+26 i386 ptrace sys_ptrace compat_sys_ptrace
+27 i386 alarm sys_alarm
+28 i386 oldfstat sys_fstat
+29 i386 pause sys_pause
+30 i386 utime sys_utime32
31 i386 stty
32 i386 gtty
-33 i386 access sys_access __ia32_sys_access
-34 i386 nice sys_nice __ia32_sys_nice
+33 i386 access sys_access
+34 i386 nice sys_nice
35 i386 ftime
-36 i386 sync sys_sync __ia32_sys_sync
-37 i386 kill sys_kill __ia32_sys_kill
-38 i386 rename sys_rename __ia32_sys_rename
-39 i386 mkdir sys_mkdir __ia32_sys_mkdir
-40 i386 rmdir sys_rmdir __ia32_sys_rmdir
-41 i386 dup sys_dup __ia32_sys_dup
-42 i386 pipe sys_pipe __ia32_sys_pipe
-43 i386 times sys_times __ia32_compat_sys_times
+36 i386 sync sys_sync
+37 i386 kill sys_kill
+38 i386 rename sys_rename
+39 i386 mkdir sys_mkdir
+40 i386 rmdir sys_rmdir
+41 i386 dup sys_dup
+42 i386 pipe sys_pipe
+43 i386 times sys_times compat_sys_times
44 i386 prof
-45 i386 brk sys_brk __ia32_sys_brk
-46 i386 setgid sys_setgid16 __ia32_sys_setgid16
-47 i386 getgid sys_getgid16 __ia32_sys_getgid16
-48 i386 signal sys_signal __ia32_sys_signal
-49 i386 geteuid sys_geteuid16 __ia32_sys_geteuid16
-50 i386 getegid sys_getegid16 __ia32_sys_getegid16
-51 i386 acct sys_acct __ia32_sys_acct
-52 i386 umount2 sys_umount __ia32_sys_umount
+45 i386 brk sys_brk
+46 i386 setgid sys_setgid16
+47 i386 getgid sys_getgid16
+48 i386 signal sys_signal
+49 i386 geteuid sys_geteuid16
+50 i386 getegid sys_getegid16
+51 i386 acct sys_acct
+52 i386 umount2 sys_umount
53 i386 lock
-54 i386 ioctl sys_ioctl __ia32_compat_sys_ioctl
-55 i386 fcntl sys_fcntl __ia32_compat_sys_fcntl64
+54 i386 ioctl sys_ioctl compat_sys_ioctl
+55 i386 fcntl sys_fcntl compat_sys_fcntl64
56 i386 mpx
-57 i386 setpgid sys_setpgid __ia32_sys_setpgid
+57 i386 setpgid sys_setpgid
58 i386 ulimit
-59 i386 oldolduname sys_olduname __ia32_sys_olduname
-60 i386 umask sys_umask __ia32_sys_umask
-61 i386 chroot sys_chroot __ia32_sys_chroot
-62 i386 ustat sys_ustat __ia32_compat_sys_ustat
-63 i386 dup2 sys_dup2 __ia32_sys_dup2
-64 i386 getppid sys_getppid __ia32_sys_getppid
-65 i386 getpgrp sys_getpgrp __ia32_sys_getpgrp
-66 i386 setsid sys_setsid __ia32_sys_setsid
-67 i386 sigaction sys_sigaction __ia32_compat_sys_sigaction
-68 i386 sgetmask sys_sgetmask __ia32_sys_sgetmask
-69 i386 ssetmask sys_ssetmask __ia32_sys_ssetmask
-70 i386 setreuid sys_setreuid16 __ia32_sys_setreuid16
-71 i386 setregid sys_setregid16 __ia32_sys_setregid16
-72 i386 sigsuspend sys_sigsuspend __ia32_sys_sigsuspend
-73 i386 sigpending sys_sigpending __ia32_compat_sys_sigpending
-74 i386 sethostname sys_sethostname __ia32_sys_sethostname
-75 i386 setrlimit sys_setrlimit __ia32_compat_sys_setrlimit
-76 i386 getrlimit sys_old_getrlimit __ia32_compat_sys_old_getrlimit
-77 i386 getrusage sys_getrusage __ia32_compat_sys_getrusage
-78 i386 gettimeofday sys_gettimeofday __ia32_compat_sys_gettimeofday
-79 i386 settimeofday sys_settimeofday __ia32_compat_sys_settimeofday
-80 i386 getgroups sys_getgroups16 __ia32_sys_getgroups16
-81 i386 setgroups sys_setgroups16 __ia32_sys_setgroups16
-82 i386 select sys_old_select __ia32_compat_sys_old_select
-83 i386 symlink sys_symlink __ia32_sys_symlink
-84 i386 oldlstat sys_lstat __ia32_sys_lstat
-85 i386 readlink sys_readlink __ia32_sys_readlink
-86 i386 uselib sys_uselib __ia32_sys_uselib
-87 i386 swapon sys_swapon __ia32_sys_swapon
-88 i386 reboot sys_reboot __ia32_sys_reboot
-89 i386 readdir sys_old_readdir __ia32_compat_sys_old_readdir
-90 i386 mmap sys_old_mmap __ia32_compat_sys_x86_mmap
-91 i386 munmap sys_munmap __ia32_sys_munmap
-92 i386 truncate sys_truncate __ia32_compat_sys_truncate
-93 i386 ftruncate sys_ftruncate __ia32_compat_sys_ftruncate
-94 i386 fchmod sys_fchmod __ia32_sys_fchmod
-95 i386 fchown sys_fchown16 __ia32_sys_fchown16
-96 i386 getpriority sys_getpriority __ia32_sys_getpriority
-97 i386 setpriority sys_setpriority __ia32_sys_setpriority
+59 i386 oldolduname sys_olduname
+60 i386 umask sys_umask
+61 i386 chroot sys_chroot
+62 i386 ustat sys_ustat compat_sys_ustat
+63 i386 dup2 sys_dup2
+64 i386 getppid sys_getppid
+65 i386 getpgrp sys_getpgrp
+66 i386 setsid sys_setsid
+67 i386 sigaction sys_sigaction compat_sys_sigaction
+68 i386 sgetmask sys_sgetmask
+69 i386 ssetmask sys_ssetmask
+70 i386 setreuid sys_setreuid16
+71 i386 setregid sys_setregid16
+72 i386 sigsuspend sys_sigsuspend
+73 i386 sigpending sys_sigpending compat_sys_sigpending
+74 i386 sethostname sys_sethostname
+75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
+76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
+77 i386 getrusage sys_getrusage compat_sys_getrusage
+78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 i386 settimeofday sys_settimeofday compat_sys_settimeofday
+80 i386 getgroups sys_getgroups16
+81 i386 setgroups sys_setgroups16
+82 i386 select sys_old_select compat_sys_old_select
+83 i386 symlink sys_symlink
+84 i386 oldlstat sys_lstat
+85 i386 readlink sys_readlink
+86 i386 uselib sys_uselib
+87 i386 swapon sys_swapon
+88 i386 reboot sys_reboot
+89 i386 readdir sys_old_readdir compat_sys_old_readdir
+90 i386 mmap sys_old_mmap compat_sys_ia32_mmap
+91 i386 munmap sys_munmap
+92 i386 truncate sys_truncate compat_sys_truncate
+93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
+94 i386 fchmod sys_fchmod
+95 i386 fchown sys_fchown16
+96 i386 getpriority sys_getpriority
+97 i386 setpriority sys_setpriority
98 i386 profil
-99 i386 statfs sys_statfs __ia32_compat_sys_statfs
-100 i386 fstatfs sys_fstatfs __ia32_compat_sys_fstatfs
-101 i386 ioperm sys_ioperm __ia32_sys_ioperm
-102 i386 socketcall sys_socketcall __ia32_compat_sys_socketcall
-103 i386 syslog sys_syslog __ia32_sys_syslog
-104 i386 setitimer sys_setitimer __ia32_compat_sys_setitimer
-105 i386 getitimer sys_getitimer __ia32_compat_sys_getitimer
-106 i386 stat sys_newstat __ia32_compat_sys_newstat
-107 i386 lstat sys_newlstat __ia32_compat_sys_newlstat
-108 i386 fstat sys_newfstat __ia32_compat_sys_newfstat
-109 i386 olduname sys_uname __ia32_sys_uname
-110 i386 iopl sys_iopl __ia32_sys_iopl
-111 i386 vhangup sys_vhangup __ia32_sys_vhangup
+99 i386 statfs sys_statfs compat_sys_statfs
+100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
+101 i386 ioperm sys_ioperm
+102 i386 socketcall sys_socketcall compat_sys_socketcall
+103 i386 syslog sys_syslog
+104 i386 setitimer sys_setitimer compat_sys_setitimer
+105 i386 getitimer sys_getitimer compat_sys_getitimer
+106 i386 stat sys_newstat compat_sys_newstat
+107 i386 lstat sys_newlstat compat_sys_newlstat
+108 i386 fstat sys_newfstat compat_sys_newfstat
+109 i386 olduname sys_uname
+110 i386 iopl sys_iopl
+111 i386 vhangup sys_vhangup
112 i386 idle
-113 i386 vm86old sys_vm86old __ia32_sys_ni_syscall
-114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4
-115 i386 swapoff sys_swapoff __ia32_sys_swapoff
-116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo
-117 i386 ipc sys_ipc __ia32_compat_sys_ipc
-118 i386 fsync sys_fsync __ia32_sys_fsync
-119 i386 sigreturn sys_sigreturn __ia32_compat_sys_sigreturn
-120 i386 clone sys_clone __ia32_compat_sys_x86_clone
-121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname
-122 i386 uname sys_newuname __ia32_sys_newuname
-123 i386 modify_ldt sys_modify_ldt __ia32_sys_modify_ldt
-124 i386 adjtimex sys_adjtimex_time32 __ia32_sys_adjtimex_time32
-125 i386 mprotect sys_mprotect __ia32_sys_mprotect
-126 i386 sigprocmask sys_sigprocmask __ia32_compat_sys_sigprocmask
+113 i386 vm86old sys_vm86old sys_ni_syscall
+114 i386 wait4 sys_wait4 compat_sys_wait4
+115 i386 swapoff sys_swapoff
+116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
+117 i386 ipc sys_ipc compat_sys_ipc
+118 i386 fsync sys_fsync
+119 i386 sigreturn sys_sigreturn compat_sys_sigreturn
+120 i386 clone sys_clone compat_sys_ia32_clone
+121 i386 setdomainname sys_setdomainname
+122 i386 uname sys_newuname
+123 i386 modify_ldt sys_modify_ldt
+124 i386 adjtimex sys_adjtimex_time32
+125 i386 mprotect sys_mprotect
+126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
127 i386 create_module
-128 i386 init_module sys_init_module __ia32_sys_init_module
-129 i386 delete_module sys_delete_module __ia32_sys_delete_module
+128 i386 init_module sys_init_module
+129 i386 delete_module sys_delete_module
130 i386 get_kernel_syms
-131 i386 quotactl sys_quotactl __ia32_compat_sys_quotactl32
-132 i386 getpgid sys_getpgid __ia32_sys_getpgid
-133 i386 fchdir sys_fchdir __ia32_sys_fchdir
-134 i386 bdflush sys_bdflush __ia32_sys_bdflush
-135 i386 sysfs sys_sysfs __ia32_sys_sysfs
-136 i386 personality sys_personality __ia32_sys_personality
+131 i386 quotactl sys_quotactl
+132 i386 getpgid sys_getpgid
+133 i386 fchdir sys_fchdir
+134 i386 bdflush sys_bdflush
+135 i386 sysfs sys_sysfs
+136 i386 personality sys_personality
137 i386 afs_syscall
-138 i386 setfsuid sys_setfsuid16 __ia32_sys_setfsuid16
-139 i386 setfsgid sys_setfsgid16 __ia32_sys_setfsgid16
-140 i386 _llseek sys_llseek __ia32_sys_llseek
-141 i386 getdents sys_getdents __ia32_compat_sys_getdents
-142 i386 _newselect sys_select __ia32_compat_sys_select
-143 i386 flock sys_flock __ia32_sys_flock
-144 i386 msync sys_msync __ia32_sys_msync
-145 i386 readv sys_readv __ia32_compat_sys_readv
-146 i386 writev sys_writev __ia32_compat_sys_writev
-147 i386 getsid sys_getsid __ia32_sys_getsid
-148 i386 fdatasync sys_fdatasync __ia32_sys_fdatasync
-149 i386 _sysctl sys_sysctl __ia32_compat_sys_sysctl
-150 i386 mlock sys_mlock __ia32_sys_mlock
-151 i386 munlock sys_munlock __ia32_sys_munlock
-152 i386 mlockall sys_mlockall __ia32_sys_mlockall
-153 i386 munlockall sys_munlockall __ia32_sys_munlockall
-154 i386 sched_setparam sys_sched_setparam __ia32_sys_sched_setparam
-155 i386 sched_getparam sys_sched_getparam __ia32_sys_sched_getparam
-156 i386 sched_setscheduler sys_sched_setscheduler __ia32_sys_sched_setscheduler
-157 i386 sched_getscheduler sys_sched_getscheduler __ia32_sys_sched_getscheduler
-158 i386 sched_yield sys_sched_yield __ia32_sys_sched_yield
-159 i386 sched_get_priority_max sys_sched_get_priority_max __ia32_sys_sched_get_priority_max
-160 i386 sched_get_priority_min sys_sched_get_priority_min __ia32_sys_sched_get_priority_min
-161 i386 sched_rr_get_interval sys_sched_rr_get_interval_time32 __ia32_sys_sched_rr_get_interval_time32
-162 i386 nanosleep sys_nanosleep_time32 __ia32_sys_nanosleep_time32
-163 i386 mremap sys_mremap __ia32_sys_mremap
-164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16
-165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16
-166 i386 vm86 sys_vm86 __ia32_sys_ni_syscall
+138 i386 setfsuid sys_setfsuid16
+139 i386 setfsgid sys_setfsgid16
+140 i386 _llseek sys_llseek
+141 i386 getdents sys_getdents compat_sys_getdents
+142 i386 _newselect sys_select compat_sys_select
+143 i386 flock sys_flock
+144 i386 msync sys_msync
+145 i386 readv sys_readv
+146 i386 writev sys_writev
+147 i386 getsid sys_getsid
+148 i386 fdatasync sys_fdatasync
+149 i386 _sysctl sys_ni_syscall
+150 i386 mlock sys_mlock
+151 i386 munlock sys_munlock
+152 i386 mlockall sys_mlockall
+153 i386 munlockall sys_munlockall
+154 i386 sched_setparam sys_sched_setparam
+155 i386 sched_getparam sys_sched_getparam
+156 i386 sched_setscheduler sys_sched_setscheduler
+157 i386 sched_getscheduler sys_sched_getscheduler
+158 i386 sched_yield sys_sched_yield
+159 i386 sched_get_priority_max sys_sched_get_priority_max
+160 i386 sched_get_priority_min sys_sched_get_priority_min
+161 i386 sched_rr_get_interval sys_sched_rr_get_interval_time32
+162 i386 nanosleep sys_nanosleep_time32
+163 i386 mremap sys_mremap
+164 i386 setresuid sys_setresuid16
+165 i386 getresuid sys_getresuid16
+166 i386 vm86 sys_vm86 sys_ni_syscall
167 i386 query_module
-168 i386 poll sys_poll __ia32_sys_poll
+168 i386 poll sys_poll
169 i386 nfsservctl
-170 i386 setresgid sys_setresgid16 __ia32_sys_setresgid16
-171 i386 getresgid sys_getresgid16 __ia32_sys_getresgid16
-172 i386 prctl sys_prctl __ia32_sys_prctl
-173 i386 rt_sigreturn sys_rt_sigreturn __ia32_compat_sys_rt_sigreturn
-174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
-175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask
-176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
-177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32
-178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo
-179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend
-180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread
-181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite
-182 i386 chown sys_chown16 __ia32_sys_chown16
-183 i386 getcwd sys_getcwd __ia32_sys_getcwd
-184 i386 capget sys_capget __ia32_sys_capget
-185 i386 capset sys_capset __ia32_sys_capset
-186 i386 sigaltstack sys_sigaltstack __ia32_compat_sys_sigaltstack
-187 i386 sendfile sys_sendfile __ia32_compat_sys_sendfile
+170 i386 setresgid sys_setresgid16
+171 i386 getresgid sys_getresgid16
+172 i386 prctl sys_prctl
+173 i386 rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+175 i386 rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 compat_sys_rt_sigtimedwait_time32
+178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+179 i386 rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+180 i386 pread64 sys_ia32_pread64
+181 i386 pwrite64 sys_ia32_pwrite64
+182 i386 chown sys_chown16
+183 i386 getcwd sys_getcwd
+184 i386 capget sys_capget
+185 i386 capset sys_capset
+186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack
+187 i386 sendfile sys_sendfile compat_sys_sendfile
188 i386 getpmsg
189 i386 putpmsg
-190 i386 vfork sys_vfork __ia32_sys_vfork
-191 i386 ugetrlimit sys_getrlimit __ia32_compat_sys_getrlimit
-192 i386 mmap2 sys_mmap_pgoff __ia32_sys_mmap_pgoff
-193 i386 truncate64 sys_truncate64 __ia32_compat_sys_x86_truncate64
-194 i386 ftruncate64 sys_ftruncate64 __ia32_compat_sys_x86_ftruncate64
-195 i386 stat64 sys_stat64 __ia32_compat_sys_x86_stat64
-196 i386 lstat64 sys_lstat64 __ia32_compat_sys_x86_lstat64
-197 i386 fstat64 sys_fstat64 __ia32_compat_sys_x86_fstat64
-198 i386 lchown32 sys_lchown __ia32_sys_lchown
-199 i386 getuid32 sys_getuid __ia32_sys_getuid
-200 i386 getgid32 sys_getgid __ia32_sys_getgid
-201 i386 geteuid32 sys_geteuid __ia32_sys_geteuid
-202 i386 getegid32 sys_getegid __ia32_sys_getegid
-203 i386 setreuid32 sys_setreuid __ia32_sys_setreuid
-204 i386 setregid32 sys_setregid __ia32_sys_setregid
-205 i386 getgroups32 sys_getgroups __ia32_sys_getgroups
-206 i386 setgroups32 sys_setgroups __ia32_sys_setgroups
-207 i386 fchown32 sys_fchown __ia32_sys_fchown
-208 i386 setresuid32 sys_setresuid __ia32_sys_setresuid
-209 i386 getresuid32 sys_getresuid __ia32_sys_getresuid
-210 i386 setresgid32 sys_setresgid __ia32_sys_setresgid
-211 i386 getresgid32 sys_getresgid __ia32_sys_getresgid
-212 i386 chown32 sys_chown __ia32_sys_chown
-213 i386 setuid32 sys_setuid __ia32_sys_setuid
-214 i386 setgid32 sys_setgid __ia32_sys_setgid
-215 i386 setfsuid32 sys_setfsuid __ia32_sys_setfsuid
-216 i386 setfsgid32 sys_setfsgid __ia32_sys_setfsgid
-217 i386 pivot_root sys_pivot_root __ia32_sys_pivot_root
-218 i386 mincore sys_mincore __ia32_sys_mincore
-219 i386 madvise sys_madvise __ia32_sys_madvise
-220 i386 getdents64 sys_getdents64 __ia32_sys_getdents64
-221 i386 fcntl64 sys_fcntl64 __ia32_compat_sys_fcntl64
+190 i386 vfork sys_vfork
+191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
+192 i386 mmap2 sys_mmap_pgoff
+193 i386 truncate64 sys_ia32_truncate64
+194 i386 ftruncate64 sys_ia32_ftruncate64
+195 i386 stat64 sys_stat64 compat_sys_ia32_stat64
+196 i386 lstat64 sys_lstat64 compat_sys_ia32_lstat64
+197 i386 fstat64 sys_fstat64 compat_sys_ia32_fstat64
+198 i386 lchown32 sys_lchown
+199 i386 getuid32 sys_getuid
+200 i386 getgid32 sys_getgid
+201 i386 geteuid32 sys_geteuid
+202 i386 getegid32 sys_getegid
+203 i386 setreuid32 sys_setreuid
+204 i386 setregid32 sys_setregid
+205 i386 getgroups32 sys_getgroups
+206 i386 setgroups32 sys_setgroups
+207 i386 fchown32 sys_fchown
+208 i386 setresuid32 sys_setresuid
+209 i386 getresuid32 sys_getresuid
+210 i386 setresgid32 sys_setresgid
+211 i386 getresgid32 sys_getresgid
+212 i386 chown32 sys_chown
+213 i386 setuid32 sys_setuid
+214 i386 setgid32 sys_setgid
+215 i386 setfsuid32 sys_setfsuid
+216 i386 setfsgid32 sys_setfsgid
+217 i386 pivot_root sys_pivot_root
+218 i386 mincore sys_mincore
+219 i386 madvise sys_madvise
+220 i386 getdents64 sys_getdents64
+221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
# 222 is unused
# 223 is unused
-224 i386 gettid sys_gettid __ia32_sys_gettid
-225 i386 readahead sys_readahead __ia32_compat_sys_x86_readahead
-226 i386 setxattr sys_setxattr __ia32_sys_setxattr
-227 i386 lsetxattr sys_lsetxattr __ia32_sys_lsetxattr
-228 i386 fsetxattr sys_fsetxattr __ia32_sys_fsetxattr
-229 i386 getxattr sys_getxattr __ia32_sys_getxattr
-230 i386 lgetxattr sys_lgetxattr __ia32_sys_lgetxattr
-231 i386 fgetxattr sys_fgetxattr __ia32_sys_fgetxattr
-232 i386 listxattr sys_listxattr __ia32_sys_listxattr
-233 i386 llistxattr sys_llistxattr __ia32_sys_llistxattr
-234 i386 flistxattr sys_flistxattr __ia32_sys_flistxattr
-235 i386 removexattr sys_removexattr __ia32_sys_removexattr
-236 i386 lremovexattr sys_lremovexattr __ia32_sys_lremovexattr
-237 i386 fremovexattr sys_fremovexattr __ia32_sys_fremovexattr
-238 i386 tkill sys_tkill __ia32_sys_tkill
-239 i386 sendfile64 sys_sendfile64 __ia32_sys_sendfile64
-240 i386 futex sys_futex_time32 __ia32_sys_futex_time32
-241 i386 sched_setaffinity sys_sched_setaffinity __ia32_compat_sys_sched_setaffinity
-242 i386 sched_getaffinity sys_sched_getaffinity __ia32_compat_sys_sched_getaffinity
-243 i386 set_thread_area sys_set_thread_area __ia32_sys_set_thread_area
-244 i386 get_thread_area sys_get_thread_area __ia32_sys_get_thread_area
-245 i386 io_setup sys_io_setup __ia32_compat_sys_io_setup
-246 i386 io_destroy sys_io_destroy __ia32_sys_io_destroy
-247 i386 io_getevents sys_io_getevents_time32 __ia32_sys_io_getevents_time32
-248 i386 io_submit sys_io_submit __ia32_compat_sys_io_submit
-249 i386 io_cancel sys_io_cancel __ia32_sys_io_cancel
-250 i386 fadvise64 sys_fadvise64 __ia32_compat_sys_x86_fadvise64
+224 i386 gettid sys_gettid
+225 i386 readahead sys_ia32_readahead
+226 i386 setxattr sys_setxattr
+227 i386 lsetxattr sys_lsetxattr
+228 i386 fsetxattr sys_fsetxattr
+229 i386 getxattr sys_getxattr
+230 i386 lgetxattr sys_lgetxattr
+231 i386 fgetxattr sys_fgetxattr
+232 i386 listxattr sys_listxattr
+233 i386 llistxattr sys_llistxattr
+234 i386 flistxattr sys_flistxattr
+235 i386 removexattr sys_removexattr
+236 i386 lremovexattr sys_lremovexattr
+237 i386 fremovexattr sys_fremovexattr
+238 i386 tkill sys_tkill
+239 i386 sendfile64 sys_sendfile64
+240 i386 futex sys_futex_time32
+241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+243 i386 set_thread_area sys_set_thread_area
+244 i386 get_thread_area sys_get_thread_area
+245 i386 io_setup sys_io_setup compat_sys_io_setup
+246 i386 io_destroy sys_io_destroy
+247 i386 io_getevents sys_io_getevents_time32
+248 i386 io_submit sys_io_submit compat_sys_io_submit
+249 i386 io_cancel sys_io_cancel
+250 i386 fadvise64 sys_ia32_fadvise64
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
-252 i386 exit_group sys_exit_group __ia32_sys_exit_group
-253 i386 lookup_dcookie sys_lookup_dcookie __ia32_compat_sys_lookup_dcookie
-254 i386 epoll_create sys_epoll_create __ia32_sys_epoll_create
-255 i386 epoll_ctl sys_epoll_ctl __ia32_sys_epoll_ctl
-256 i386 epoll_wait sys_epoll_wait __ia32_sys_epoll_wait
-257 i386 remap_file_pages sys_remap_file_pages __ia32_sys_remap_file_pages
-258 i386 set_tid_address sys_set_tid_address __ia32_sys_set_tid_address
-259 i386 timer_create sys_timer_create __ia32_compat_sys_timer_create
-260 i386 timer_settime sys_timer_settime32 __ia32_sys_timer_settime32
-261 i386 timer_gettime sys_timer_gettime32 __ia32_sys_timer_gettime32
-262 i386 timer_getoverrun sys_timer_getoverrun __ia32_sys_timer_getoverrun
-263 i386 timer_delete sys_timer_delete __ia32_sys_timer_delete
-264 i386 clock_settime sys_clock_settime32 __ia32_sys_clock_settime32
-265 i386 clock_gettime sys_clock_gettime32 __ia32_sys_clock_gettime32
-266 i386 clock_getres sys_clock_getres_time32 __ia32_sys_clock_getres_time32
-267 i386 clock_nanosleep sys_clock_nanosleep_time32 __ia32_sys_clock_nanosleep_time32
-268 i386 statfs64 sys_statfs64 __ia32_compat_sys_statfs64
-269 i386 fstatfs64 sys_fstatfs64 __ia32_compat_sys_fstatfs64
-270 i386 tgkill sys_tgkill __ia32_sys_tgkill
-271 i386 utimes sys_utimes_time32 __ia32_sys_utimes_time32
-272 i386 fadvise64_64 sys_fadvise64_64 __ia32_compat_sys_x86_fadvise64_64
+252 i386 exit_group sys_exit_group
+253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+254 i386 epoll_create sys_epoll_create
+255 i386 epoll_ctl sys_epoll_ctl
+256 i386 epoll_wait sys_epoll_wait
+257 i386 remap_file_pages sys_remap_file_pages
+258 i386 set_tid_address sys_set_tid_address
+259 i386 timer_create sys_timer_create compat_sys_timer_create
+260 i386 timer_settime sys_timer_settime32
+261 i386 timer_gettime sys_timer_gettime32
+262 i386 timer_getoverrun sys_timer_getoverrun
+263 i386 timer_delete sys_timer_delete
+264 i386 clock_settime sys_clock_settime32
+265 i386 clock_gettime sys_clock_gettime32
+266 i386 clock_getres sys_clock_getres_time32
+267 i386 clock_nanosleep sys_clock_nanosleep_time32
+268 i386 statfs64 sys_statfs64 compat_sys_statfs64
+269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+270 i386 tgkill sys_tgkill
+271 i386 utimes sys_utimes_time32
+272 i386 fadvise64_64 sys_ia32_fadvise64_64
273 i386 vserver
-274 i386 mbind sys_mbind __ia32_sys_mbind
-275 i386 get_mempolicy sys_get_mempolicy __ia32_compat_sys_get_mempolicy
-276 i386 set_mempolicy sys_set_mempolicy __ia32_sys_set_mempolicy
-277 i386 mq_open sys_mq_open __ia32_compat_sys_mq_open
-278 i386 mq_unlink sys_mq_unlink __ia32_sys_mq_unlink
-279 i386 mq_timedsend sys_mq_timedsend_time32 __ia32_sys_mq_timedsend_time32
-280 i386 mq_timedreceive sys_mq_timedreceive_time32 __ia32_sys_mq_timedreceive_time32
-281 i386 mq_notify sys_mq_notify __ia32_compat_sys_mq_notify
-282 i386 mq_getsetattr sys_mq_getsetattr __ia32_compat_sys_mq_getsetattr
-283 i386 kexec_load sys_kexec_load __ia32_compat_sys_kexec_load
-284 i386 waitid sys_waitid __ia32_compat_sys_waitid
+274 i386 mbind sys_mbind
+275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
+276 i386 set_mempolicy sys_set_mempolicy
+277 i386 mq_open sys_mq_open compat_sys_mq_open
+278 i386 mq_unlink sys_mq_unlink
+279 i386 mq_timedsend sys_mq_timedsend_time32
+280 i386 mq_timedreceive sys_mq_timedreceive_time32
+281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
+282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
+284 i386 waitid sys_waitid compat_sys_waitid
# 285 sys_setaltroot
-286 i386 add_key sys_add_key __ia32_sys_add_key
-287 i386 request_key sys_request_key __ia32_sys_request_key
-288 i386 keyctl sys_keyctl __ia32_compat_sys_keyctl
-289 i386 ioprio_set sys_ioprio_set __ia32_sys_ioprio_set
-290 i386 ioprio_get sys_ioprio_get __ia32_sys_ioprio_get
-291 i386 inotify_init sys_inotify_init __ia32_sys_inotify_init
-292 i386 inotify_add_watch sys_inotify_add_watch __ia32_sys_inotify_add_watch
-293 i386 inotify_rm_watch sys_inotify_rm_watch __ia32_sys_inotify_rm_watch
-294 i386 migrate_pages sys_migrate_pages __ia32_sys_migrate_pages
-295 i386 openat sys_openat __ia32_compat_sys_openat
-296 i386 mkdirat sys_mkdirat __ia32_sys_mkdirat
-297 i386 mknodat sys_mknodat __ia32_sys_mknodat
-298 i386 fchownat sys_fchownat __ia32_sys_fchownat
-299 i386 futimesat sys_futimesat_time32 __ia32_sys_futimesat_time32
-300 i386 fstatat64 sys_fstatat64 __ia32_compat_sys_x86_fstatat
-301 i386 unlinkat sys_unlinkat __ia32_sys_unlinkat
-302 i386 renameat sys_renameat __ia32_sys_renameat
-303 i386 linkat sys_linkat __ia32_sys_linkat
-304 i386 symlinkat sys_symlinkat __ia32_sys_symlinkat
-305 i386 readlinkat sys_readlinkat __ia32_sys_readlinkat
-306 i386 fchmodat sys_fchmodat __ia32_sys_fchmodat
-307 i386 faccessat sys_faccessat __ia32_sys_faccessat
-308 i386 pselect6 sys_pselect6_time32 __ia32_compat_sys_pselect6_time32
-309 i386 ppoll sys_ppoll_time32 __ia32_compat_sys_ppoll_time32
-310 i386 unshare sys_unshare __ia32_sys_unshare
-311 i386 set_robust_list sys_set_robust_list __ia32_compat_sys_set_robust_list
-312 i386 get_robust_list sys_get_robust_list __ia32_compat_sys_get_robust_list
-313 i386 splice sys_splice __ia32_sys_splice
-314 i386 sync_file_range sys_sync_file_range __ia32_compat_sys_x86_sync_file_range
-315 i386 tee sys_tee __ia32_sys_tee
-316 i386 vmsplice sys_vmsplice __ia32_compat_sys_vmsplice
-317 i386 move_pages sys_move_pages __ia32_compat_sys_move_pages
-318 i386 getcpu sys_getcpu __ia32_sys_getcpu
-319 i386 epoll_pwait sys_epoll_pwait __ia32_sys_epoll_pwait
-320 i386 utimensat sys_utimensat_time32 __ia32_sys_utimensat_time32
-321 i386 signalfd sys_signalfd __ia32_compat_sys_signalfd
-322 i386 timerfd_create sys_timerfd_create __ia32_sys_timerfd_create
-323 i386 eventfd sys_eventfd __ia32_sys_eventfd
-324 i386 fallocate sys_fallocate __ia32_compat_sys_x86_fallocate
-325 i386 timerfd_settime sys_timerfd_settime32 __ia32_sys_timerfd_settime32
-326 i386 timerfd_gettime sys_timerfd_gettime32 __ia32_sys_timerfd_gettime32
-327 i386 signalfd4 sys_signalfd4 __ia32_compat_sys_signalfd4
-328 i386 eventfd2 sys_eventfd2 __ia32_sys_eventfd2
-329 i386 epoll_create1 sys_epoll_create1 __ia32_sys_epoll_create1
-330 i386 dup3 sys_dup3 __ia32_sys_dup3
-331 i386 pipe2 sys_pipe2 __ia32_sys_pipe2
-332 i386 inotify_init1 sys_inotify_init1 __ia32_sys_inotify_init1
-333 i386 preadv sys_preadv __ia32_compat_sys_preadv
-334 i386 pwritev sys_pwritev __ia32_compat_sys_pwritev
-335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo __ia32_compat_sys_rt_tgsigqueueinfo
-336 i386 perf_event_open sys_perf_event_open __ia32_sys_perf_event_open
-337 i386 recvmmsg sys_recvmmsg_time32 __ia32_compat_sys_recvmmsg_time32
-338 i386 fanotify_init sys_fanotify_init __ia32_sys_fanotify_init
-339 i386 fanotify_mark sys_fanotify_mark __ia32_compat_sys_fanotify_mark
-340 i386 prlimit64 sys_prlimit64 __ia32_sys_prlimit64
-341 i386 name_to_handle_at sys_name_to_handle_at __ia32_sys_name_to_handle_at
-342 i386 open_by_handle_at sys_open_by_handle_at __ia32_compat_sys_open_by_handle_at
-343 i386 clock_adjtime sys_clock_adjtime32 __ia32_sys_clock_adjtime32
-344 i386 syncfs sys_syncfs __ia32_sys_syncfs
-345 i386 sendmmsg sys_sendmmsg __ia32_compat_sys_sendmmsg
-346 i386 setns sys_setns __ia32_sys_setns
-347 i386 process_vm_readv sys_process_vm_readv __ia32_compat_sys_process_vm_readv
-348 i386 process_vm_writev sys_process_vm_writev __ia32_compat_sys_process_vm_writev
-349 i386 kcmp sys_kcmp __ia32_sys_kcmp
-350 i386 finit_module sys_finit_module __ia32_sys_finit_module
-351 i386 sched_setattr sys_sched_setattr __ia32_sys_sched_setattr
-352 i386 sched_getattr sys_sched_getattr __ia32_sys_sched_getattr
-353 i386 renameat2 sys_renameat2 __ia32_sys_renameat2
-354 i386 seccomp sys_seccomp __ia32_sys_seccomp
-355 i386 getrandom sys_getrandom __ia32_sys_getrandom
-356 i386 memfd_create sys_memfd_create __ia32_sys_memfd_create
-357 i386 bpf sys_bpf __ia32_sys_bpf
-358 i386 execveat sys_execveat __ia32_compat_sys_execveat
-359 i386 socket sys_socket __ia32_sys_socket
-360 i386 socketpair sys_socketpair __ia32_sys_socketpair
-361 i386 bind sys_bind __ia32_sys_bind
-362 i386 connect sys_connect __ia32_sys_connect
-363 i386 listen sys_listen __ia32_sys_listen
-364 i386 accept4 sys_accept4 __ia32_sys_accept4
-365 i386 getsockopt sys_getsockopt __ia32_compat_sys_getsockopt
-366 i386 setsockopt sys_setsockopt __ia32_compat_sys_setsockopt
-367 i386 getsockname sys_getsockname __ia32_sys_getsockname
-368 i386 getpeername sys_getpeername __ia32_sys_getpeername
-369 i386 sendto sys_sendto __ia32_sys_sendto
-370 i386 sendmsg sys_sendmsg __ia32_compat_sys_sendmsg
-371 i386 recvfrom sys_recvfrom __ia32_compat_sys_recvfrom
-372 i386 recvmsg sys_recvmsg __ia32_compat_sys_recvmsg
-373 i386 shutdown sys_shutdown __ia32_sys_shutdown
-374 i386 userfaultfd sys_userfaultfd __ia32_sys_userfaultfd
-375 i386 membarrier sys_membarrier __ia32_sys_membarrier
-376 i386 mlock2 sys_mlock2 __ia32_sys_mlock2
-377 i386 copy_file_range sys_copy_file_range __ia32_sys_copy_file_range
-378 i386 preadv2 sys_preadv2 __ia32_compat_sys_preadv2
-379 i386 pwritev2 sys_pwritev2 __ia32_compat_sys_pwritev2
-380 i386 pkey_mprotect sys_pkey_mprotect __ia32_sys_pkey_mprotect
-381 i386 pkey_alloc sys_pkey_alloc __ia32_sys_pkey_alloc
-382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
-383 i386 statx sys_statx __ia32_sys_statx
-384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
-385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
-386 i386 rseq sys_rseq __ia32_sys_rseq
-393 i386 semget sys_semget __ia32_sys_semget
-394 i386 semctl sys_semctl __ia32_compat_sys_semctl
-395 i386 shmget sys_shmget __ia32_sys_shmget
-396 i386 shmctl sys_shmctl __ia32_compat_sys_shmctl
-397 i386 shmat sys_shmat __ia32_compat_sys_shmat
-398 i386 shmdt sys_shmdt __ia32_sys_shmdt
-399 i386 msgget sys_msgget __ia32_sys_msgget
-400 i386 msgsnd sys_msgsnd __ia32_compat_sys_msgsnd
-401 i386 msgrcv sys_msgrcv __ia32_compat_sys_msgrcv
-402 i386 msgctl sys_msgctl __ia32_compat_sys_msgctl
-403 i386 clock_gettime64 sys_clock_gettime __ia32_sys_clock_gettime
-404 i386 clock_settime64 sys_clock_settime __ia32_sys_clock_settime
-405 i386 clock_adjtime64 sys_clock_adjtime __ia32_sys_clock_adjtime
-406 i386 clock_getres_time64 sys_clock_getres __ia32_sys_clock_getres
-407 i386 clock_nanosleep_time64 sys_clock_nanosleep __ia32_sys_clock_nanosleep
-408 i386 timer_gettime64 sys_timer_gettime __ia32_sys_timer_gettime
-409 i386 timer_settime64 sys_timer_settime __ia32_sys_timer_settime
-410 i386 timerfd_gettime64 sys_timerfd_gettime __ia32_sys_timerfd_gettime
-411 i386 timerfd_settime64 sys_timerfd_settime __ia32_sys_timerfd_settime
-412 i386 utimensat_time64 sys_utimensat __ia32_sys_utimensat
-413 i386 pselect6_time64 sys_pselect6 __ia32_compat_sys_pselect6_time64
-414 i386 ppoll_time64 sys_ppoll __ia32_compat_sys_ppoll_time64
-416 i386 io_pgetevents_time64 sys_io_pgetevents __ia32_sys_io_pgetevents
-417 i386 recvmmsg_time64 sys_recvmmsg __ia32_compat_sys_recvmmsg_time64
-418 i386 mq_timedsend_time64 sys_mq_timedsend __ia32_sys_mq_timedsend
-419 i386 mq_timedreceive_time64 sys_mq_timedreceive __ia32_sys_mq_timedreceive
-420 i386 semtimedop_time64 sys_semtimedop __ia32_sys_semtimedop
-421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait_time64
-422 i386 futex_time64 sys_futex __ia32_sys_futex
-423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval __ia32_sys_sched_rr_get_interval
-424 i386 pidfd_send_signal sys_pidfd_send_signal __ia32_sys_pidfd_send_signal
-425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
-426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
-427 i386 io_uring_register sys_io_uring_register __ia32_sys_io_uring_register
-428 i386 open_tree sys_open_tree __ia32_sys_open_tree
-429 i386 move_mount sys_move_mount __ia32_sys_move_mount
-430 i386 fsopen sys_fsopen __ia32_sys_fsopen
-431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
-432 i386 fsmount sys_fsmount __ia32_sys_fsmount
-433 i386 fspick sys_fspick __ia32_sys_fspick
-434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
-435 i386 clone3 sys_clone3 __ia32_sys_clone3
+286 i386 add_key sys_add_key
+287 i386 request_key sys_request_key
+288 i386 keyctl sys_keyctl compat_sys_keyctl
+289 i386 ioprio_set sys_ioprio_set
+290 i386 ioprio_get sys_ioprio_get
+291 i386 inotify_init sys_inotify_init
+292 i386 inotify_add_watch sys_inotify_add_watch
+293 i386 inotify_rm_watch sys_inotify_rm_watch
+294 i386 migrate_pages sys_migrate_pages
+295 i386 openat sys_openat compat_sys_openat
+296 i386 mkdirat sys_mkdirat
+297 i386 mknodat sys_mknodat
+298 i386 fchownat sys_fchownat
+299 i386 futimesat sys_futimesat_time32
+300 i386 fstatat64 sys_fstatat64 compat_sys_ia32_fstatat64
+301 i386 unlinkat sys_unlinkat
+302 i386 renameat sys_renameat
+303 i386 linkat sys_linkat
+304 i386 symlinkat sys_symlinkat
+305 i386 readlinkat sys_readlinkat
+306 i386 fchmodat sys_fchmodat
+307 i386 faccessat sys_faccessat
+308 i386 pselect6 sys_pselect6_time32 compat_sys_pselect6_time32
+309 i386 ppoll sys_ppoll_time32 compat_sys_ppoll_time32
+310 i386 unshare sys_unshare
+311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
+312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
+313 i386 splice sys_splice
+314 i386 sync_file_range sys_ia32_sync_file_range
+315 i386 tee sys_tee
+316 i386 vmsplice sys_vmsplice
+317 i386 move_pages sys_move_pages compat_sys_move_pages
+318 i386 getcpu sys_getcpu
+319 i386 epoll_pwait sys_epoll_pwait
+320 i386 utimensat sys_utimensat_time32
+321 i386 signalfd sys_signalfd compat_sys_signalfd
+322 i386 timerfd_create sys_timerfd_create
+323 i386 eventfd sys_eventfd
+324 i386 fallocate sys_ia32_fallocate
+325 i386 timerfd_settime sys_timerfd_settime32
+326 i386 timerfd_gettime sys_timerfd_gettime32
+327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
+328 i386 eventfd2 sys_eventfd2
+329 i386 epoll_create1 sys_epoll_create1
+330 i386 dup3 sys_dup3
+331 i386 pipe2 sys_pipe2
+332 i386 inotify_init1 sys_inotify_init1
+333 i386 preadv sys_preadv compat_sys_preadv
+334 i386 pwritev sys_pwritev compat_sys_pwritev
+335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+336 i386 perf_event_open sys_perf_event_open
+337 i386 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
+338 i386 fanotify_init sys_fanotify_init
+339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+340 i386 prlimit64 sys_prlimit64
+341 i386 name_to_handle_at sys_name_to_handle_at
+342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+343 i386 clock_adjtime sys_clock_adjtime32
+344 i386 syncfs sys_syncfs
+345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
+346 i386 setns sys_setns
+347 i386 process_vm_readv sys_process_vm_readv
+348 i386 process_vm_writev sys_process_vm_writev
+349 i386 kcmp sys_kcmp
+350 i386 finit_module sys_finit_module
+351 i386 sched_setattr sys_sched_setattr
+352 i386 sched_getattr sys_sched_getattr
+353 i386 renameat2 sys_renameat2
+354 i386 seccomp sys_seccomp
+355 i386 getrandom sys_getrandom
+356 i386 memfd_create sys_memfd_create
+357 i386 bpf sys_bpf
+358 i386 execveat sys_execveat compat_sys_execveat
+359 i386 socket sys_socket
+360 i386 socketpair sys_socketpair
+361 i386 bind sys_bind
+362 i386 connect sys_connect
+363 i386 listen sys_listen
+364 i386 accept4 sys_accept4
+365 i386 getsockopt sys_getsockopt sys_getsockopt
+366 i386 setsockopt sys_setsockopt sys_setsockopt
+367 i386 getsockname sys_getsockname
+368 i386 getpeername sys_getpeername
+369 i386 sendto sys_sendto
+370 i386 sendmsg sys_sendmsg compat_sys_sendmsg
+371 i386 recvfrom sys_recvfrom compat_sys_recvfrom
+372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
+373 i386 shutdown sys_shutdown
+374 i386 userfaultfd sys_userfaultfd
+375 i386 membarrier sys_membarrier
+376 i386 mlock2 sys_mlock2
+377 i386 copy_file_range sys_copy_file_range
+378 i386 preadv2 sys_preadv2 compat_sys_preadv2
+379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
+380 i386 pkey_mprotect sys_pkey_mprotect
+381 i386 pkey_alloc sys_pkey_alloc
+382 i386 pkey_free sys_pkey_free
+383 i386 statx sys_statx
+384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
+385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
+386 i386 rseq sys_rseq
+393 i386 semget sys_semget
+394 i386 semctl sys_semctl compat_sys_semctl
+395 i386 shmget sys_shmget
+396 i386 shmctl sys_shmctl compat_sys_shmctl
+397 i386 shmat sys_shmat compat_sys_shmat
+398 i386 shmdt sys_shmdt
+399 i386 msgget sys_msgget
+400 i386 msgsnd sys_msgsnd compat_sys_msgsnd
+401 i386 msgrcv sys_msgrcv compat_sys_msgrcv
+402 i386 msgctl sys_msgctl compat_sys_msgctl
+403 i386 clock_gettime64 sys_clock_gettime
+404 i386 clock_settime64 sys_clock_settime
+405 i386 clock_adjtime64 sys_clock_adjtime
+406 i386 clock_getres_time64 sys_clock_getres
+407 i386 clock_nanosleep_time64 sys_clock_nanosleep
+408 i386 timer_gettime64 sys_timer_gettime
+409 i386 timer_settime64 sys_timer_settime
+410 i386 timerfd_gettime64 sys_timerfd_gettime
+411 i386 timerfd_settime64 sys_timerfd_settime
+412 i386 utimensat_time64 sys_utimensat
+413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
+414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
+416 i386 io_pgetevents_time64 sys_io_pgetevents
+417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
+418 i386 mq_timedsend_time64 sys_mq_timedsend
+419 i386 mq_timedreceive_time64 sys_mq_timedreceive
+420 i386 semtimedop_time64 sys_semtimedop
+421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+422 i386 futex_time64 sys_futex
+423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 i386 pidfd_send_signal sys_pidfd_send_signal
+425 i386 io_uring_setup sys_io_uring_setup
+426 i386 io_uring_enter sys_io_uring_enter
+427 i386 io_uring_register sys_io_uring_register
+428 i386 open_tree sys_open_tree
+429 i386 move_mount sys_move_mount
+430 i386 fsopen sys_fsopen
+431 i386 fsconfig sys_fsconfig
+432 i386 fsmount sys_fsmount
+433 i386 fspick sys_fspick
+434 i386 pidfd_open sys_pidfd_open
+435 i386 clone3 sys_clone3
+436 i386 close_range sys_close_range
+437 i386 openat2 sys_openat2
+438 i386 pidfd_getfd sys_pidfd_getfd
+439 i386 faccessat2 sys_faccessat2
+440 i386 process_madvise sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index c29976e..3798192 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -8,395 +8,402 @@
#
# The abi is "common", "64" or "x32" for this file.
#
-0 common read __x64_sys_read
-1 common write __x64_sys_write
-2 common open __x64_sys_open
-3 common close __x64_sys_close
-4 common stat __x64_sys_newstat
-5 common fstat __x64_sys_newfstat
-6 common lstat __x64_sys_newlstat
-7 common poll __x64_sys_poll
-8 common lseek __x64_sys_lseek
-9 common mmap __x64_sys_mmap
-10 common mprotect __x64_sys_mprotect
-11 common munmap __x64_sys_munmap
-12 common brk __x64_sys_brk
-13 64 rt_sigaction __x64_sys_rt_sigaction
-14 common rt_sigprocmask __x64_sys_rt_sigprocmask
-15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs
-16 64 ioctl __x64_sys_ioctl
-17 common pread64 __x64_sys_pread64
-18 common pwrite64 __x64_sys_pwrite64
-19 64 readv __x64_sys_readv
-20 64 writev __x64_sys_writev
-21 common access __x64_sys_access
-22 common pipe __x64_sys_pipe
-23 common select __x64_sys_select
-24 common sched_yield __x64_sys_sched_yield
-25 common mremap __x64_sys_mremap
-26 common msync __x64_sys_msync
-27 common mincore __x64_sys_mincore
-28 common madvise __x64_sys_madvise
-29 common shmget __x64_sys_shmget
-30 common shmat __x64_sys_shmat
-31 common shmctl __x64_sys_shmctl
-32 common dup __x64_sys_dup
-33 common dup2 __x64_sys_dup2
-34 common pause __x64_sys_pause
-35 common nanosleep __x64_sys_nanosleep
-36 common getitimer __x64_sys_getitimer
-37 common alarm __x64_sys_alarm
-38 common setitimer __x64_sys_setitimer
-39 common getpid __x64_sys_getpid
-40 common sendfile __x64_sys_sendfile64
-41 common socket __x64_sys_socket
-42 common connect __x64_sys_connect
-43 common accept __x64_sys_accept
-44 common sendto __x64_sys_sendto
-45 64 recvfrom __x64_sys_recvfrom
-46 64 sendmsg __x64_sys_sendmsg
-47 64 recvmsg __x64_sys_recvmsg
-48 common shutdown __x64_sys_shutdown
-49 common bind __x64_sys_bind
-50 common listen __x64_sys_listen
-51 common getsockname __x64_sys_getsockname
-52 common getpeername __x64_sys_getpeername
-53 common socketpair __x64_sys_socketpair
-54 64 setsockopt __x64_sys_setsockopt
-55 64 getsockopt __x64_sys_getsockopt
-56 common clone __x64_sys_clone/ptregs
-57 common fork __x64_sys_fork/ptregs
-58 common vfork __x64_sys_vfork/ptregs
-59 64 execve __x64_sys_execve/ptregs
-60 common exit __x64_sys_exit
-61 common wait4 __x64_sys_wait4
-62 common kill __x64_sys_kill
-63 common uname __x64_sys_newuname
-64 common semget __x64_sys_semget
-65 common semop __x64_sys_semop
-66 common semctl __x64_sys_semctl
-67 common shmdt __x64_sys_shmdt
-68 common msgget __x64_sys_msgget
-69 common msgsnd __x64_sys_msgsnd
-70 common msgrcv __x64_sys_msgrcv
-71 common msgctl __x64_sys_msgctl
-72 common fcntl __x64_sys_fcntl
-73 common flock __x64_sys_flock
-74 common fsync __x64_sys_fsync
-75 common fdatasync __x64_sys_fdatasync
-76 common truncate __x64_sys_truncate
-77 common ftruncate __x64_sys_ftruncate
-78 common getdents __x64_sys_getdents
-79 common getcwd __x64_sys_getcwd
-80 common chdir __x64_sys_chdir
-81 common fchdir __x64_sys_fchdir
-82 common rename __x64_sys_rename
-83 common mkdir __x64_sys_mkdir
-84 common rmdir __x64_sys_rmdir
-85 common creat __x64_sys_creat
-86 common link __x64_sys_link
-87 common unlink __x64_sys_unlink
-88 common symlink __x64_sys_symlink
-89 common readlink __x64_sys_readlink
-90 common chmod __x64_sys_chmod
-91 common fchmod __x64_sys_fchmod
-92 common chown __x64_sys_chown
-93 common fchown __x64_sys_fchown
-94 common lchown __x64_sys_lchown
-95 common umask __x64_sys_umask
-96 common gettimeofday __x64_sys_gettimeofday
-97 common getrlimit __x64_sys_getrlimit
-98 common getrusage __x64_sys_getrusage
-99 common sysinfo __x64_sys_sysinfo
-100 common times __x64_sys_times
-101 64 ptrace __x64_sys_ptrace
-102 common getuid __x64_sys_getuid
-103 common syslog __x64_sys_syslog
-104 common getgid __x64_sys_getgid
-105 common setuid __x64_sys_setuid
-106 common setgid __x64_sys_setgid
-107 common geteuid __x64_sys_geteuid
-108 common getegid __x64_sys_getegid
-109 common setpgid __x64_sys_setpgid
-110 common getppid __x64_sys_getppid
-111 common getpgrp __x64_sys_getpgrp
-112 common setsid __x64_sys_setsid
-113 common setreuid __x64_sys_setreuid
-114 common setregid __x64_sys_setregid
-115 common getgroups __x64_sys_getgroups
-116 common setgroups __x64_sys_setgroups
-117 common setresuid __x64_sys_setresuid
-118 common getresuid __x64_sys_getresuid
-119 common setresgid __x64_sys_setresgid
-120 common getresgid __x64_sys_getresgid
-121 common getpgid __x64_sys_getpgid
-122 common setfsuid __x64_sys_setfsuid
-123 common setfsgid __x64_sys_setfsgid
-124 common getsid __x64_sys_getsid
-125 common capget __x64_sys_capget
-126 common capset __x64_sys_capset
-127 64 rt_sigpending __x64_sys_rt_sigpending
-128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait
-129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo
-130 common rt_sigsuspend __x64_sys_rt_sigsuspend
-131 64 sigaltstack __x64_sys_sigaltstack
-132 common utime __x64_sys_utime
-133 common mknod __x64_sys_mknod
+0 common read sys_read
+1 common write sys_write
+2 common open sys_open
+3 common close sys_close
+4 common stat sys_newstat
+5 common fstat sys_newfstat
+6 common lstat sys_newlstat
+7 common poll sys_poll
+8 common lseek sys_lseek
+9 common mmap sys_mmap
+10 common mprotect sys_mprotect
+11 common munmap sys_munmap
+12 common brk sys_brk
+13 64 rt_sigaction sys_rt_sigaction
+14 common rt_sigprocmask sys_rt_sigprocmask
+15 64 rt_sigreturn sys_rt_sigreturn
+16 64 ioctl sys_ioctl
+17 common pread64 sys_pread64
+18 common pwrite64 sys_pwrite64
+19 64 readv sys_readv
+20 64 writev sys_writev
+21 common access sys_access
+22 common pipe sys_pipe
+23 common select sys_select
+24 common sched_yield sys_sched_yield
+25 common mremap sys_mremap
+26 common msync sys_msync
+27 common mincore sys_mincore
+28 common madvise sys_madvise
+29 common shmget sys_shmget
+30 common shmat sys_shmat
+31 common shmctl sys_shmctl
+32 common dup sys_dup
+33 common dup2 sys_dup2
+34 common pause sys_pause
+35 common nanosleep sys_nanosleep
+36 common getitimer sys_getitimer
+37 common alarm sys_alarm
+38 common setitimer sys_setitimer
+39 common getpid sys_getpid
+40 common sendfile sys_sendfile64
+41 common socket sys_socket
+42 common connect sys_connect
+43 common accept sys_accept
+44 common sendto sys_sendto
+45 64 recvfrom sys_recvfrom
+46 64 sendmsg sys_sendmsg
+47 64 recvmsg sys_recvmsg
+48 common shutdown sys_shutdown
+49 common bind sys_bind
+50 common listen sys_listen
+51 common getsockname sys_getsockname
+52 common getpeername sys_getpeername
+53 common socketpair sys_socketpair
+54 64 setsockopt sys_setsockopt
+55 64 getsockopt sys_getsockopt
+56 common clone sys_clone
+57 common fork sys_fork
+58 common vfork sys_vfork
+59 64 execve sys_execve
+60 common exit sys_exit
+61 common wait4 sys_wait4
+62 common kill sys_kill
+63 common uname sys_newuname
+64 common semget sys_semget
+65 common semop sys_semop
+66 common semctl sys_semctl
+67 common shmdt sys_shmdt
+68 common msgget sys_msgget
+69 common msgsnd sys_msgsnd
+70 common msgrcv sys_msgrcv
+71 common msgctl sys_msgctl
+72 common fcntl sys_fcntl
+73 common flock sys_flock
+74 common fsync sys_fsync
+75 common fdatasync sys_fdatasync
+76 common truncate sys_truncate
+77 common ftruncate sys_ftruncate
+78 common getdents sys_getdents
+79 common getcwd sys_getcwd
+80 common chdir sys_chdir
+81 common fchdir sys_fchdir
+82 common rename sys_rename
+83 common mkdir sys_mkdir
+84 common rmdir sys_rmdir
+85 common creat sys_creat
+86 common link sys_link
+87 common unlink sys_unlink
+88 common symlink sys_symlink
+89 common readlink sys_readlink
+90 common chmod sys_chmod
+91 common fchmod sys_fchmod
+92 common chown sys_chown
+93 common fchown sys_fchown
+94 common lchown sys_lchown
+95 common umask sys_umask
+96 common gettimeofday sys_gettimeofday
+97 common getrlimit sys_getrlimit
+98 common getrusage sys_getrusage
+99 common sysinfo sys_sysinfo
+100 common times sys_times
+101 64 ptrace sys_ptrace
+102 common getuid sys_getuid
+103 common syslog sys_syslog
+104 common getgid sys_getgid
+105 common setuid sys_setuid
+106 common setgid sys_setgid
+107 common geteuid sys_geteuid
+108 common getegid sys_getegid
+109 common setpgid sys_setpgid
+110 common getppid sys_getppid
+111 common getpgrp sys_getpgrp
+112 common setsid sys_setsid
+113 common setreuid sys_setreuid
+114 common setregid sys_setregid
+115 common getgroups sys_getgroups
+116 common setgroups sys_setgroups
+117 common setresuid sys_setresuid
+118 common getresuid sys_getresuid
+119 common setresgid sys_setresgid
+120 common getresgid sys_getresgid
+121 common getpgid sys_getpgid
+122 common setfsuid sys_setfsuid
+123 common setfsgid sys_setfsgid
+124 common getsid sys_getsid
+125 common capget sys_capget
+126 common capset sys_capset
+127 64 rt_sigpending sys_rt_sigpending
+128 64 rt_sigtimedwait sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
+130 common rt_sigsuspend sys_rt_sigsuspend
+131 64 sigaltstack sys_sigaltstack
+132 common utime sys_utime
+133 common mknod sys_mknod
134 64 uselib
-135 common personality __x64_sys_personality
-136 common ustat __x64_sys_ustat
-137 common statfs __x64_sys_statfs
-138 common fstatfs __x64_sys_fstatfs
-139 common sysfs __x64_sys_sysfs
-140 common getpriority __x64_sys_getpriority
-141 common setpriority __x64_sys_setpriority
-142 common sched_setparam __x64_sys_sched_setparam
-143 common sched_getparam __x64_sys_sched_getparam
-144 common sched_setscheduler __x64_sys_sched_setscheduler
-145 common sched_getscheduler __x64_sys_sched_getscheduler
-146 common sched_get_priority_max __x64_sys_sched_get_priority_max
-147 common sched_get_priority_min __x64_sys_sched_get_priority_min
-148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval
-149 common mlock __x64_sys_mlock
-150 common munlock __x64_sys_munlock
-151 common mlockall __x64_sys_mlockall
-152 common munlockall __x64_sys_munlockall
-153 common vhangup __x64_sys_vhangup
-154 common modify_ldt __x64_sys_modify_ldt
-155 common pivot_root __x64_sys_pivot_root
-156 64 _sysctl __x64_sys_sysctl
-157 common prctl __x64_sys_prctl
-158 common arch_prctl __x64_sys_arch_prctl
-159 common adjtimex __x64_sys_adjtimex
-160 common setrlimit __x64_sys_setrlimit
-161 common chroot __x64_sys_chroot
-162 common sync __x64_sys_sync
-163 common acct __x64_sys_acct
-164 common settimeofday __x64_sys_settimeofday
-165 common mount __x64_sys_mount
-166 common umount2 __x64_sys_umount
-167 common swapon __x64_sys_swapon
-168 common swapoff __x64_sys_swapoff
-169 common reboot __x64_sys_reboot
-170 common sethostname __x64_sys_sethostname
-171 common setdomainname __x64_sys_setdomainname
-172 common iopl __x64_sys_iopl/ptregs
-173 common ioperm __x64_sys_ioperm
+135 common personality sys_personality
+136 common ustat sys_ustat
+137 common statfs sys_statfs
+138 common fstatfs sys_fstatfs
+139 common sysfs sys_sysfs
+140 common getpriority sys_getpriority
+141 common setpriority sys_setpriority
+142 common sched_setparam sys_sched_setparam
+143 common sched_getparam sys_sched_getparam
+144 common sched_setscheduler sys_sched_setscheduler
+145 common sched_getscheduler sys_sched_getscheduler
+146 common sched_get_priority_max sys_sched_get_priority_max
+147 common sched_get_priority_min sys_sched_get_priority_min
+148 common sched_rr_get_interval sys_sched_rr_get_interval
+149 common mlock sys_mlock
+150 common munlock sys_munlock
+151 common mlockall sys_mlockall
+152 common munlockall sys_munlockall
+153 common vhangup sys_vhangup
+154 common modify_ldt sys_modify_ldt
+155 common pivot_root sys_pivot_root
+156 64 _sysctl sys_ni_syscall
+157 common prctl sys_prctl
+158 common arch_prctl sys_arch_prctl
+159 common adjtimex sys_adjtimex
+160 common setrlimit sys_setrlimit
+161 common chroot sys_chroot
+162 common sync sys_sync
+163 common acct sys_acct
+164 common settimeofday sys_settimeofday
+165 common mount sys_mount
+166 common umount2 sys_umount
+167 common swapon sys_swapon
+168 common swapoff sys_swapoff
+169 common reboot sys_reboot
+170 common sethostname sys_sethostname
+171 common setdomainname sys_setdomainname
+172 common iopl sys_iopl
+173 common ioperm sys_ioperm
174 64 create_module
-175 common init_module __x64_sys_init_module
-176 common delete_module __x64_sys_delete_module
+175 common init_module sys_init_module
+176 common delete_module sys_delete_module
177 64 get_kernel_syms
178 64 query_module
-179 common quotactl __x64_sys_quotactl
+179 common quotactl sys_quotactl
180 64 nfsservctl
181 common getpmsg
182 common putpmsg
183 common afs_syscall
184 common tuxcall
185 common security
-186 common gettid __x64_sys_gettid
-187 common readahead __x64_sys_readahead
-188 common setxattr __x64_sys_setxattr
-189 common lsetxattr __x64_sys_lsetxattr
-190 common fsetxattr __x64_sys_fsetxattr
-191 common getxattr __x64_sys_getxattr
-192 common lgetxattr __x64_sys_lgetxattr
-193 common fgetxattr __x64_sys_fgetxattr
-194 common listxattr __x64_sys_listxattr
-195 common llistxattr __x64_sys_llistxattr
-196 common flistxattr __x64_sys_flistxattr
-197 common removexattr __x64_sys_removexattr
-198 common lremovexattr __x64_sys_lremovexattr
-199 common fremovexattr __x64_sys_fremovexattr
-200 common tkill __x64_sys_tkill
-201 common time __x64_sys_time
-202 common futex __x64_sys_futex
-203 common sched_setaffinity __x64_sys_sched_setaffinity
-204 common sched_getaffinity __x64_sys_sched_getaffinity
+186 common gettid sys_gettid
+187 common readahead sys_readahead
+188 common setxattr sys_setxattr
+189 common lsetxattr sys_lsetxattr
+190 common fsetxattr sys_fsetxattr
+191 common getxattr sys_getxattr
+192 common lgetxattr sys_lgetxattr
+193 common fgetxattr sys_fgetxattr
+194 common listxattr sys_listxattr
+195 common llistxattr sys_llistxattr
+196 common flistxattr sys_flistxattr
+197 common removexattr sys_removexattr
+198 common lremovexattr sys_lremovexattr
+199 common fremovexattr sys_fremovexattr
+200 common tkill sys_tkill
+201 common time sys_time
+202 common futex sys_futex
+203 common sched_setaffinity sys_sched_setaffinity
+204 common sched_getaffinity sys_sched_getaffinity
205 64 set_thread_area
-206 64 io_setup __x64_sys_io_setup
-207 common io_destroy __x64_sys_io_destroy
-208 common io_getevents __x64_sys_io_getevents
-209 64 io_submit __x64_sys_io_submit
-210 common io_cancel __x64_sys_io_cancel
+206 64 io_setup sys_io_setup
+207 common io_destroy sys_io_destroy
+208 common io_getevents sys_io_getevents
+209 64 io_submit sys_io_submit
+210 common io_cancel sys_io_cancel
211 64 get_thread_area
-212 common lookup_dcookie __x64_sys_lookup_dcookie
-213 common epoll_create __x64_sys_epoll_create
+212 common lookup_dcookie sys_lookup_dcookie
+213 common epoll_create sys_epoll_create
214 64 epoll_ctl_old
215 64 epoll_wait_old
-216 common remap_file_pages __x64_sys_remap_file_pages
-217 common getdents64 __x64_sys_getdents64
-218 common set_tid_address __x64_sys_set_tid_address
-219 common restart_syscall __x64_sys_restart_syscall
-220 common semtimedop __x64_sys_semtimedop
-221 common fadvise64 __x64_sys_fadvise64
-222 64 timer_create __x64_sys_timer_create
-223 common timer_settime __x64_sys_timer_settime
-224 common timer_gettime __x64_sys_timer_gettime
-225 common timer_getoverrun __x64_sys_timer_getoverrun
-226 common timer_delete __x64_sys_timer_delete
-227 common clock_settime __x64_sys_clock_settime
-228 common clock_gettime __x64_sys_clock_gettime
-229 common clock_getres __x64_sys_clock_getres
-230 common clock_nanosleep __x64_sys_clock_nanosleep
-231 common exit_group __x64_sys_exit_group
-232 common epoll_wait __x64_sys_epoll_wait
-233 common epoll_ctl __x64_sys_epoll_ctl
-234 common tgkill __x64_sys_tgkill
-235 common utimes __x64_sys_utimes
+216 common remap_file_pages sys_remap_file_pages
+217 common getdents64 sys_getdents64
+218 common set_tid_address sys_set_tid_address
+219 common restart_syscall sys_restart_syscall
+220 common semtimedop sys_semtimedop
+221 common fadvise64 sys_fadvise64
+222 64 timer_create sys_timer_create
+223 common timer_settime sys_timer_settime
+224 common timer_gettime sys_timer_gettime
+225 common timer_getoverrun sys_timer_getoverrun
+226 common timer_delete sys_timer_delete
+227 common clock_settime sys_clock_settime
+228 common clock_gettime sys_clock_gettime
+229 common clock_getres sys_clock_getres
+230 common clock_nanosleep sys_clock_nanosleep
+231 common exit_group sys_exit_group
+232 common epoll_wait sys_epoll_wait
+233 common epoll_ctl sys_epoll_ctl
+234 common tgkill sys_tgkill
+235 common utimes sys_utimes
236 64 vserver
-237 common mbind __x64_sys_mbind
-238 common set_mempolicy __x64_sys_set_mempolicy
-239 common get_mempolicy __x64_sys_get_mempolicy
-240 common mq_open __x64_sys_mq_open
-241 common mq_unlink __x64_sys_mq_unlink
-242 common mq_timedsend __x64_sys_mq_timedsend
-243 common mq_timedreceive __x64_sys_mq_timedreceive
-244 64 mq_notify __x64_sys_mq_notify
-245 common mq_getsetattr __x64_sys_mq_getsetattr
-246 64 kexec_load __x64_sys_kexec_load
-247 64 waitid __x64_sys_waitid
-248 common add_key __x64_sys_add_key
-249 common request_key __x64_sys_request_key
-250 common keyctl __x64_sys_keyctl
-251 common ioprio_set __x64_sys_ioprio_set
-252 common ioprio_get __x64_sys_ioprio_get
-253 common inotify_init __x64_sys_inotify_init
-254 common inotify_add_watch __x64_sys_inotify_add_watch
-255 common inotify_rm_watch __x64_sys_inotify_rm_watch
-256 common migrate_pages __x64_sys_migrate_pages
-257 common openat __x64_sys_openat
-258 common mkdirat __x64_sys_mkdirat
-259 common mknodat __x64_sys_mknodat
-260 common fchownat __x64_sys_fchownat
-261 common futimesat __x64_sys_futimesat
-262 common newfstatat __x64_sys_newfstatat
-263 common unlinkat __x64_sys_unlinkat
-264 common renameat __x64_sys_renameat
-265 common linkat __x64_sys_linkat
-266 common symlinkat __x64_sys_symlinkat
-267 common readlinkat __x64_sys_readlinkat
-268 common fchmodat __x64_sys_fchmodat
-269 common faccessat __x64_sys_faccessat
-270 common pselect6 __x64_sys_pselect6
-271 common ppoll __x64_sys_ppoll
-272 common unshare __x64_sys_unshare
-273 64 set_robust_list __x64_sys_set_robust_list
-274 64 get_robust_list __x64_sys_get_robust_list
-275 common splice __x64_sys_splice
-276 common tee __x64_sys_tee
-277 common sync_file_range __x64_sys_sync_file_range
-278 64 vmsplice __x64_sys_vmsplice
-279 64 move_pages __x64_sys_move_pages
-280 common utimensat __x64_sys_utimensat
-281 common epoll_pwait __x64_sys_epoll_pwait
-282 common signalfd __x64_sys_signalfd
-283 common timerfd_create __x64_sys_timerfd_create
-284 common eventfd __x64_sys_eventfd
-285 common fallocate __x64_sys_fallocate
-286 common timerfd_settime __x64_sys_timerfd_settime
-287 common timerfd_gettime __x64_sys_timerfd_gettime
-288 common accept4 __x64_sys_accept4
-289 common signalfd4 __x64_sys_signalfd4
-290 common eventfd2 __x64_sys_eventfd2
-291 common epoll_create1 __x64_sys_epoll_create1
-292 common dup3 __x64_sys_dup3
-293 common pipe2 __x64_sys_pipe2
-294 common inotify_init1 __x64_sys_inotify_init1
-295 64 preadv __x64_sys_preadv
-296 64 pwritev __x64_sys_pwritev
-297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo
-298 common perf_event_open __x64_sys_perf_event_open
-299 64 recvmmsg __x64_sys_recvmmsg
-300 common fanotify_init __x64_sys_fanotify_init
-301 common fanotify_mark __x64_sys_fanotify_mark
-302 common prlimit64 __x64_sys_prlimit64
-303 common name_to_handle_at __x64_sys_name_to_handle_at
-304 common open_by_handle_at __x64_sys_open_by_handle_at
-305 common clock_adjtime __x64_sys_clock_adjtime
-306 common syncfs __x64_sys_syncfs
-307 64 sendmmsg __x64_sys_sendmmsg
-308 common setns __x64_sys_setns
-309 common getcpu __x64_sys_getcpu
-310 64 process_vm_readv __x64_sys_process_vm_readv
-311 64 process_vm_writev __x64_sys_process_vm_writev
-312 common kcmp __x64_sys_kcmp
-313 common finit_module __x64_sys_finit_module
-314 common sched_setattr __x64_sys_sched_setattr
-315 common sched_getattr __x64_sys_sched_getattr
-316 common renameat2 __x64_sys_renameat2
-317 common seccomp __x64_sys_seccomp
-318 common getrandom __x64_sys_getrandom
-319 common memfd_create __x64_sys_memfd_create
-320 common kexec_file_load __x64_sys_kexec_file_load
-321 common bpf __x64_sys_bpf
-322 64 execveat __x64_sys_execveat/ptregs
-323 common userfaultfd __x64_sys_userfaultfd
-324 common membarrier __x64_sys_membarrier
-325 common mlock2 __x64_sys_mlock2
-326 common copy_file_range __x64_sys_copy_file_range
-327 64 preadv2 __x64_sys_preadv2
-328 64 pwritev2 __x64_sys_pwritev2
-329 common pkey_mprotect __x64_sys_pkey_mprotect
-330 common pkey_alloc __x64_sys_pkey_alloc
-331 common pkey_free __x64_sys_pkey_free
-332 common statx __x64_sys_statx
-333 common io_pgetevents __x64_sys_io_pgetevents
-334 common rseq __x64_sys_rseq
+237 common mbind sys_mbind
+238 common set_mempolicy sys_set_mempolicy
+239 common get_mempolicy sys_get_mempolicy
+240 common mq_open sys_mq_open
+241 common mq_unlink sys_mq_unlink
+242 common mq_timedsend sys_mq_timedsend
+243 common mq_timedreceive sys_mq_timedreceive
+244 64 mq_notify sys_mq_notify
+245 common mq_getsetattr sys_mq_getsetattr
+246 64 kexec_load sys_kexec_load
+247 64 waitid sys_waitid
+248 common add_key sys_add_key
+249 common request_key sys_request_key
+250 common keyctl sys_keyctl
+251 common ioprio_set sys_ioprio_set
+252 common ioprio_get sys_ioprio_get
+253 common inotify_init sys_inotify_init
+254 common inotify_add_watch sys_inotify_add_watch
+255 common inotify_rm_watch sys_inotify_rm_watch
+256 common migrate_pages sys_migrate_pages
+257 common openat sys_openat
+258 common mkdirat sys_mkdirat
+259 common mknodat sys_mknodat
+260 common fchownat sys_fchownat
+261 common futimesat sys_futimesat
+262 common newfstatat sys_newfstatat
+263 common unlinkat sys_unlinkat
+264 common renameat sys_renameat
+265 common linkat sys_linkat
+266 common symlinkat sys_symlinkat
+267 common readlinkat sys_readlinkat
+268 common fchmodat sys_fchmodat
+269 common faccessat sys_faccessat
+270 common pselect6 sys_pselect6
+271 common ppoll sys_ppoll
+272 common unshare sys_unshare
+273 64 set_robust_list sys_set_robust_list
+274 64 get_robust_list sys_get_robust_list
+275 common splice sys_splice
+276 common tee sys_tee
+277 common sync_file_range sys_sync_file_range
+278 64 vmsplice sys_vmsplice
+279 64 move_pages sys_move_pages
+280 common utimensat sys_utimensat
+281 common epoll_pwait sys_epoll_pwait
+282 common signalfd sys_signalfd
+283 common timerfd_create sys_timerfd_create
+284 common eventfd sys_eventfd
+285 common fallocate sys_fallocate
+286 common timerfd_settime sys_timerfd_settime
+287 common timerfd_gettime sys_timerfd_gettime
+288 common accept4 sys_accept4
+289 common signalfd4 sys_signalfd4
+290 common eventfd2 sys_eventfd2
+291 common epoll_create1 sys_epoll_create1
+292 common dup3 sys_dup3
+293 common pipe2 sys_pipe2
+294 common inotify_init1 sys_inotify_init1
+295 64 preadv sys_preadv
+296 64 pwritev sys_pwritev
+297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+298 common perf_event_open sys_perf_event_open
+299 64 recvmmsg sys_recvmmsg
+300 common fanotify_init sys_fanotify_init
+301 common fanotify_mark sys_fanotify_mark
+302 common prlimit64 sys_prlimit64
+303 common name_to_handle_at sys_name_to_handle_at
+304 common open_by_handle_at sys_open_by_handle_at
+305 common clock_adjtime sys_clock_adjtime
+306 common syncfs sys_syncfs
+307 64 sendmmsg sys_sendmmsg
+308 common setns sys_setns
+309 common getcpu sys_getcpu
+310 64 process_vm_readv sys_process_vm_readv
+311 64 process_vm_writev sys_process_vm_writev
+312 common kcmp sys_kcmp
+313 common finit_module sys_finit_module
+314 common sched_setattr sys_sched_setattr
+315 common sched_getattr sys_sched_getattr
+316 common renameat2 sys_renameat2
+317 common seccomp sys_seccomp
+318 common getrandom sys_getrandom
+319 common memfd_create sys_memfd_create
+320 common kexec_file_load sys_kexec_file_load
+321 common bpf sys_bpf
+322 64 execveat sys_execveat
+323 common userfaultfd sys_userfaultfd
+324 common membarrier sys_membarrier
+325 common mlock2 sys_mlock2
+326 common copy_file_range sys_copy_file_range
+327 64 preadv2 sys_preadv2
+328 64 pwritev2 sys_pwritev2
+329 common pkey_mprotect sys_pkey_mprotect
+330 common pkey_alloc sys_pkey_alloc
+331 common pkey_free sys_pkey_free
+332 common statx sys_statx
+333 common io_pgetevents sys_io_pgetevents
+334 common rseq sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
-424 common pidfd_send_signal __x64_sys_pidfd_send_signal
-425 common io_uring_setup __x64_sys_io_uring_setup
-426 common io_uring_enter __x64_sys_io_uring_enter
-427 common io_uring_register __x64_sys_io_uring_register
-428 common open_tree __x64_sys_open_tree
-429 common move_mount __x64_sys_move_mount
-430 common fsopen __x64_sys_fsopen
-431 common fsconfig __x64_sys_fsconfig
-432 common fsmount __x64_sys_fsmount
-433 common fspick __x64_sys_fspick
-434 common pidfd_open __x64_sys_pidfd_open
-435 common clone3 __x64_sys_clone3/ptregs
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
+436 common close_range sys_close_range
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
#
-# x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation. The __x32_compat_sys stubs are created
-# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
-# is defined.
+# Due to a historical design error, certain syscalls are numbered differently
+# in x32 as compared to native x86_64. These syscalls have numbers 512-547.
+# Do not add new syscalls to this range. Numbers 548 and above are available
+# for non-x32 use.
#
-512 x32 rt_sigaction __x32_compat_sys_rt_sigaction
-513 x32 rt_sigreturn sys32_x32_rt_sigreturn
-514 x32 ioctl __x32_compat_sys_ioctl
-515 x32 readv __x32_compat_sys_readv
-516 x32 writev __x32_compat_sys_writev
-517 x32 recvfrom __x32_compat_sys_recvfrom
-518 x32 sendmsg __x32_compat_sys_sendmsg
-519 x32 recvmsg __x32_compat_sys_recvmsg
-520 x32 execve __x32_compat_sys_execve/ptregs
-521 x32 ptrace __x32_compat_sys_ptrace
-522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
-523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64
-524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
-525 x32 sigaltstack __x32_compat_sys_sigaltstack
-526 x32 timer_create __x32_compat_sys_timer_create
-527 x32 mq_notify __x32_compat_sys_mq_notify
-528 x32 kexec_load __x32_compat_sys_kexec_load
-529 x32 waitid __x32_compat_sys_waitid
-530 x32 set_robust_list __x32_compat_sys_set_robust_list
-531 x32 get_robust_list __x32_compat_sys_get_robust_list
-532 x32 vmsplice __x32_compat_sys_vmsplice
-533 x32 move_pages __x32_compat_sys_move_pages
-534 x32 preadv __x32_compat_sys_preadv64
-535 x32 pwritev __x32_compat_sys_pwritev64
-536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
-537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64
-538 x32 sendmmsg __x32_compat_sys_sendmmsg
-539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
-540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
-541 x32 setsockopt __x32_compat_sys_setsockopt
-542 x32 getsockopt __x32_compat_sys_getsockopt
-543 x32 io_setup __x32_compat_sys_io_setup
-544 x32 io_submit __x32_compat_sys_io_submit
-545 x32 execveat __x32_compat_sys_execveat/ptregs
-546 x32 preadv2 __x32_compat_sys_preadv64v2
-547 x32 pwritev2 __x32_compat_sys_pwritev64v2
+512 x32 rt_sigaction compat_sys_rt_sigaction
+513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
+514 x32 ioctl compat_sys_ioctl
+515 x32 readv sys_readv
+516 x32 writev sys_writev
+517 x32 recvfrom compat_sys_recvfrom
+518 x32 sendmsg compat_sys_sendmsg
+519 x32 recvmsg compat_sys_recvmsg
+520 x32 execve compat_sys_execve
+521 x32 ptrace compat_sys_ptrace
+522 x32 rt_sigpending compat_sys_rt_sigpending
+523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+525 x32 sigaltstack compat_sys_sigaltstack
+526 x32 timer_create compat_sys_timer_create
+527 x32 mq_notify compat_sys_mq_notify
+528 x32 kexec_load compat_sys_kexec_load
+529 x32 waitid compat_sys_waitid
+530 x32 set_robust_list compat_sys_set_robust_list
+531 x32 get_robust_list compat_sys_get_robust_list
+532 x32 vmsplice sys_vmsplice
+533 x32 move_pages compat_sys_move_pages
+534 x32 preadv compat_sys_preadv64
+535 x32 pwritev compat_sys_pwritev64
+536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+537 x32 recvmmsg compat_sys_recvmmsg_time64
+538 x32 sendmmsg compat_sys_sendmmsg
+539 x32 process_vm_readv sys_process_vm_readv
+540 x32 process_vm_writev sys_process_vm_writev
+541 x32 setsockopt sys_setsockopt
+542 x32 getsockopt sys_getsockopt
+543 x32 io_setup compat_sys_io_setup
+544 x32 io_submit compat_sys_io_submit
+545 x32 execveat compat_sys_execveat
+546 x32 preadv2 compat_sys_preadv64v2
+547 x32 pwritev2 compat_sys_pwritev64v2
+# This is the end of the legacy x32 range. Numbers 548 and above are
+# not special and are not to be used for x32-specific syscalls.
diff --git a/arch/x86/entry/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh
index 12fbbcf..cc1e638 100644
--- a/arch/x86/entry/syscalls/syscallhdr.sh
+++ b/arch/x86/entry/syscalls/syscallhdr.sh
@@ -15,14 +15,21 @@
echo "#define ${fileguard} 1"
echo ""
+ max=0
while read nr abi name entry ; do
if [ -z "$offset" ]; then
echo "#define __NR_${prefix}${name} $nr"
else
echo "#define __NR_${prefix}${name} ($offset + $nr)"
fi
+
+ max=$nr
done
echo ""
+ echo "#ifdef __KERNEL__"
+ echo "#define __NR_${prefix}syscall_max $max"
+ echo "#endif"
+ echo ""
echo "#endif /* ${fileguard} */"
) > "$out"
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 1af2be3..929bde1 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -9,15 +9,7 @@
local nr="$2"
local entry="$3"
- # Entry can be either just a function name or "function/qualifier"
- real_entry="${entry%%/*}"
- if [ "$entry" = "$real_entry" ]; then
- qualifier=
- else
- qualifier=${entry#*/}
- fi
-
- echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
+ echo "__SYSCALL_${abi}($nr, $entry)"
}
emit() {
@@ -25,27 +17,15 @@
local nr="$2"
local entry="$3"
local compat="$4"
- local umlentry=""
if [ "$abi" != "I386" -a -n "$compat" ]; then
echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
exit 1
fi
- # For CONFIG_UML, we need to strip the __x64_sys prefix
- if [ "$abi" = "64" -a "${entry}" != "${entry#__x64_sys}" ]; then
- umlentry="sys${entry#__x64_sys}"
- fi
-
if [ -z "$compat" ]; then
- if [ -n "$entry" -a -z "$umlentry" ]; then
+ if [ -n "$entry" ]; then
syscall_macro "$abi" "$nr" "$entry"
- elif [ -n "$umlentry" ]; then # implies -n "$entry"
- echo "#ifdef CONFIG_X86"
- syscall_macro "$abi" "$nr" "$entry"
- echo "#else /* CONFIG_UML */"
- syscall_macro "$abi" "$nr" "$umlentry"
- echo "#endif"
fi
else
echo "#ifdef CONFIG_X86_32"
@@ -61,24 +41,6 @@
grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
- if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
- emit 64 "$nr" "$entry" "$compat"
- if [ "$abi" = "COMMON" ]; then
- # COMMON means that this syscall exists in the same form for
- # 64-bit and X32.
- echo "#ifdef CONFIG_X86_X32_ABI"
- emit X32 "$nr" "$entry" "$compat"
- echo "#endif"
- fi
- elif [ "$abi" = "X32" ]; then
- echo "#ifdef CONFIG_X86_X32_ABI"
- emit X32 "$nr" "$entry" "$compat"
- echo "#endif"
- elif [ "$abi" = "I386" ]; then
- emit "$abi" "$nr" "$entry" "$compat"
- else
- echo "Unknown abi $abi" >&2
- exit 1
- fi
+ emit "$abi" "$nr" "$entry" "$compat"
done
) > "$out"
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index 2713490..f1f96d4 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -10,8 +10,7 @@
/* put return address in eax (arg1) */
.macro THUNK name, func, put_ret_addr_in_eax=0
- .globl \name
-\name:
+SYM_CODE_START_NOALIGN(\name)
pushl %eax
pushl %ecx
pushl %edx
@@ -27,17 +26,13 @@
popl %eax
ret
_ASM_NOKPROBE(\name)
+SYM_CODE_END(\name)
.endm
-#ifdef CONFIG_TRACE_IRQFLAGS
- THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
- THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
-#endif
-
#ifdef CONFIG_PREEMPTION
- THUNK ___preempt_schedule, preempt_schedule
- THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
- EXPORT_SYMBOL(___preempt_schedule)
- EXPORT_SYMBOL(___preempt_schedule_notrace)
+ THUNK preempt_schedule_thunk, preempt_schedule
+ THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+ EXPORT_SYMBOL(preempt_schedule_thunk)
+ EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
#endif
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index ea5c416..c9a9fbf 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -3,7 +3,6 @@
* Save registers before calling assembly functions. This avoids
* disturbance of register allocation in some inline assembly constructs.
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
- * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
*/
#include <linux/linkage.h>
#include "calling.h"
@@ -12,7 +11,7 @@
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func, put_ret_addr_in_rdi=0
- ENTRY(\name)
+SYM_FUNC_START_NOALIGN(\name)
pushq %rbp
movq %rsp, %rbp
@@ -32,31 +31,20 @@
.endif
call \func
- jmp .L_restore
- ENDPROC(\name)
+ jmp __thunk_restore
+SYM_FUNC_END(\name)
_ASM_NOKPROBE(\name)
.endm
-#ifdef CONFIG_TRACE_IRQFLAGS
- THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
- THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
-#endif
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- THUNK lockdep_sys_exit_thunk,lockdep_sys_exit
+#ifdef CONFIG_PREEMPTION
+ THUNK preempt_schedule_thunk, preempt_schedule
+ THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+ EXPORT_SYMBOL(preempt_schedule_thunk)
+ EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
#endif
#ifdef CONFIG_PREEMPTION
- THUNK ___preempt_schedule, preempt_schedule
- THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
- EXPORT_SYMBOL(___preempt_schedule)
- EXPORT_SYMBOL(___preempt_schedule_notrace)
-#endif
-
-#if defined(CONFIG_TRACE_IRQFLAGS) \
- || defined(CONFIG_DEBUG_LOCK_ALLOC) \
- || defined(CONFIG_PREEMPTION)
-.L_restore:
+SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore)
popq %r11
popq %r10
popq %r9
@@ -68,5 +56,6 @@
popq %rdi
popq %rbp
ret
- _ASM_NOKPROBE(.L_restore)
+ _ASM_NOKPROBE(__thunk_restore)
+SYM_CODE_END(__thunk_restore)
#endif
diff --git a/arch/x86/entry/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore
index aae8ffd..37a6129 100644
--- a/arch/x86/entry/vdso/.gitignore
+++ b/arch/x86/entry/vdso/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso.lds
vdsox32.lds
vdso32-syscall-syms.lds
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 0f21541..2124374 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -9,9 +9,10 @@
ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
include $(srctree)/lib/vdso/Makefile
-KBUILD_CFLAGS += $(DISABLE_LTO)
+# Sanitizer runtimes are unavailable and cannot be linked here.
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
+KCSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD := y
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
@@ -24,9 +25,14 @@
# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
+vobjs32-y += vdso32/vclock_gettime.o
# files to link into kernel
obj-y += vma.o
+KASAN_SANITIZE_vma.o := y
+UBSAN_SANITIZE_vma.o := y
+KCSAN_SANITIZE_vma.o := y
OBJECT_FILES_NON_STANDARD_vma.o := n
# vDSO images to build
@@ -37,10 +43,12 @@
obj-$(VDSO32-y) += vdso32-setup.o
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F)
$(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.lds $(vobjs-y)
+targets += vdso32/vdso32.lds $(vobjs32-y)
# Build the vDSO image C files and link them in.
vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
@@ -59,7 +67,7 @@
$(call if_changed,vdso_and_check)
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
-hostprogs-y += vdso2c
+hostprogs += vdso2c
quiet_cmd_vdso2c = VDSO2C $@
cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@
@@ -72,7 +80,7 @@
# optimize sibling calls.
#
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
- $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
+ $(filter -g%,$(KBUILD_CFLAGS)) -fno-stack-protector \
-fno-omit-frame-pointer -foptimize-sibling-calls \
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
@@ -87,11 +95,9 @@
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
#
-CFLAGS_REMOVE_vdso-note.o = -pg
CFLAGS_REMOVE_vclock_gettime.o = -pg
CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
CFLAGS_REMOVE_vgetcpu.o = -pg
-CFLAGS_REMOVE_vvar.o = -pg
#
# X32 processes use x32 vDSO to access 64bit kernel data.
@@ -132,10 +138,6 @@
CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
-targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
-targets += vdso32/vclock_gettime.o
-
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32
@@ -147,7 +149,7 @@
KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
-KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
+KBUILD_CFLAGS_32 += -fno-stack-protector
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
@@ -160,12 +162,7 @@
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
-$(obj)/vdso32.so.dbg: FORCE \
- $(obj)/vdso32/vdso32.lds \
- $(obj)/vdso32/vclock_gettime.o \
- $(obj)/vdso32/note.o \
- $(obj)/vdso32/system_call.o \
- $(obj)/vdso32/sigreturn.o
+$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE
$(call if_changed,vdso_and_check)
#
@@ -177,7 +174,7 @@
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-VDSO_LDFLAGS = -shared --hash-style=both --build-id \
+VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
$(call ld-option, --eh-frame-hdr) -Bsymbolic
GCOV_PROFILE := n
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index d9ff616..7d70935 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -15,7 +15,7 @@
#include "../../../../lib/vdso/gettimeofday.c"
extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
-extern time_t __vdso_time(time_t *t);
+extern __kernel_old_time_t __vdso_time(__kernel_old_time_t *t);
int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
@@ -25,12 +25,12 @@
int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
__attribute__((weak, alias("__vdso_gettimeofday")));
-time_t __vdso_time(time_t *t)
+__kernel_old_time_t __vdso_time(__kernel_old_time_t *t)
{
return __cvdso_time(t);
}
-time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
+__kernel_old_time_t time(__kernel_old_time_t *t) __attribute__((weak, alias("__vdso_time")));
#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 93c6dc7..4d15293 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -16,18 +16,23 @@
* segment.
*/
- vvar_start = . - 3 * PAGE_SIZE;
- vvar_page = vvar_start;
+ vvar_start = . - 4 * PAGE_SIZE;
+ vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
-#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
pvclock_page = vvar_start + PAGE_SIZE;
hvclock_page = vvar_start + 2 * PAGE_SIZE;
+ timens_page = vvar_start + 3 * PAGE_SIZE;
+
+#undef _ASM_X86_VVAR_H
+ /* Place all vvars in timens too at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset;
+#include <asm/vvar.h>
+#undef EMIT_VVAR
. = SIZEOF_HEADERS;
@@ -52,6 +57,13 @@
*(.gnu.linkonce.b.*)
} :text
+ /*
+ * Discard .note.gnu.property sections which are unused and have
+ * different alignment requirement from vDSO note sections.
+ */
+ /DISCARD/ : {
+ *(.note.gnu.property)
+ }
.note : { *(.note.*) } :text :note
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 3a4d8d4..7380908 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -75,12 +75,14 @@
sym_vvar_page,
sym_pvclock_page,
sym_hvclock_page,
+ sym_timens_page,
};
const int special_pages[] = {
sym_vvar_page,
sym_pvclock_page,
sym_hvclock_page,
+ sym_timens_page,
};
struct vdso_sym {
@@ -93,6 +95,7 @@
[sym_vvar_page] = {"vvar_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_hvclock_page] = {"hvclock_page", true},
+ [sym_timens_page] = {"timens_page", true},
{"VDSO32_NOTE_MASK", true},
{"__kernel_vsyscall", true},
{"__kernel_sigreturn", true},
@@ -184,7 +187,7 @@
int fd = open(name, O_RDONLY);
if (fd == -1)
- err(1, "%s", name);
+ err(1, "open(%s)", name);
tmp_len = lseek(fd, 0, SEEK_END);
if (tmp_len == (off_t)-1)
@@ -237,7 +240,7 @@
outfilename = argv[3];
outfile = fopen(outfilename, "w");
if (!outfile)
- err(1, "%s", argv[2]);
+ err(1, "fopen(%s)", outfilename);
go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index a20b134..6f46e11 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -13,8 +13,7 @@
unsigned long load_size = -1; /* Work around bogus warning */
unsigned long mapping_size;
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
- int i;
- unsigned long j;
+ unsigned long i, syms_nr;
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
*alt_sec = NULL;
ELF(Dyn) *dyn = 0, *dyn_end = 0;
@@ -86,11 +85,10 @@
strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
+ syms_nr = GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
/* Walk the symbol table */
- for (i = 0;
- i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
- i++) {
- int k;
+ for (i = 0; i < syms_nr; i++) {
+ unsigned int k;
ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
GET_LE(&symtab_hdr->sh_entsize) * i;
const char *sym_name = raw_addr +
@@ -150,11 +148,11 @@
fprintf(outfile,
"static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
mapping_size);
- for (j = 0; j < stripped_len; j++) {
- if (j % 10 == 0)
+ for (i = 0; i < stripped_len; i++) {
+ if (i % 10 == 0)
fprintf(outfile, "\n\t");
fprintf(outfile, "0x%02X, ",
- (int)((unsigned char *)stripped_addr)[j]);
+ (int)((unsigned char *)stripped_addr)[i]);
}
fprintf(outfile, "\n};\n\n");
diff --git a/arch/x86/entry/vdso/vdso32/.gitignore b/arch/x86/entry/vdso/vdso32/.gitignore
index e45fba9..5167384 100644
--- a/arch/x86/entry/vdso/vdso32/.gitignore
+++ b/arch/x86/entry/vdso/vdso32/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso32.lds
diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
index e78047d..2cbd399 100644
--- a/arch/x86/entry/vdso/vdso32/note.S
+++ b/arch/x86/entry/vdso/vdso32/note.S
@@ -16,33 +16,3 @@
ELFNOTE_END
BUILD_SALT
-
-#ifdef CONFIG_XEN
-/*
- * Add a special note telling glibc's dynamic linker a fake hardware
- * flavor that it will use to choose the search path for libraries in the
- * same way it uses real hardware capabilities like "mmx".
- * We supply "nosegneg" as the fake capability, to indicate that we
- * do not like negative offsets in instructions using segment overrides,
- * since we implement those inefficiently. This makes it possible to
- * install libraries optimized to avoid those access patterns in someplace
- * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
- * corresponding to the bits here is needed to make ldconfig work right.
- * It should contain:
- * hwcap 1 nosegneg
- * to match the mapping of bit to name that we give here.
- *
- * At runtime, the fake hardware feature will be considered to be present
- * if its bit is set in the mask word. So, we start with the mask 0, and
- * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
- */
-
-#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
-
-ELFNOTE_START(GNU, 2, "a")
- .long 1 /* ncaps */
-VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
- .long 0 /* mask */
- .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
-ELFNOTE_END
-#endif
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 263d743..de1fff7 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -62,7 +62,7 @@
/* Enter using int $0x80 */
int $0x80
-GLOBAL(int80_landing_pad)
+SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL)
/*
* Restore EDX and ECX in case they were clobbered. EBP is not
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 9242b28..283ed9d 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -1,10 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define BUILD_VDSO32
-#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
-#undef CONFIG_OPTIMIZE_INLINING
-#endif
-
#ifdef CONFIG_X86_64
/*
@@ -13,10 +9,12 @@
*/
#undef CONFIG_64BIT
#undef CONFIG_X86_64
+#undef CONFIG_COMPAT
#undef CONFIG_PGTABLE_LEVELS
#undef CONFIG_ILLEGAL_POINTER_VALUE
#undef CONFIG_SPARSEMEM_VMEMMAP
#undef CONFIG_NR_CPUS
+#undef CONFIG_PARAVIRT_XXL
#define CONFIG_X86_32 1
#define CONFIG_PGTABLE_LEVELS 2
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index f593774..9185cb1 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,16 +14,32 @@
#include <linux/elf.h>
#include <linux/cpu.h>
#include <linux/ptrace.h>
+#include <linux/time_namespace.h>
+
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
#include <asm/vvar.h>
+#include <asm/tlb.h>
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
#include <clocksource/hyperv_timer.h>
+#undef _ASM_X86_VVAR_H
+#define EMIT_VVAR(name, offset) \
+ const size_t name ## _offset = offset;
+#include <asm/vvar.h>
+
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return (struct vdso_data *)(vvar_page + _vdso_data_offset);
+}
+#undef EMIT_VVAR
+
+unsigned int vclocks_used __read_mostly;
+
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
#endif
@@ -37,6 +53,7 @@
image->alt_len));
}
+static const struct vm_special_mapping vvar_mapping;
struct linux_binprm;
static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
@@ -84,10 +101,73 @@
return 0;
}
+static int vvar_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ const struct vdso_image *image = new_vma->vm_mm->context.vdso_image;
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+
+ if (new_size != -image->sym_vvar_start)
+ return -EINVAL;
+
+ return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops().
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+
+ WARN(1, "vvar_page accessed remotely");
+
+ return NULL;
+}
+
+/*
+ * The vvar page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_data() for details.
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
+
+ if (vma_is_special_mapping(vma, &vvar_mapping))
+ zap_page_range(vma, vma->vm_start, size);
+ }
+
+ mmap_read_unlock(mm);
+ return 0;
+}
+#else
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+#endif
+
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+ unsigned long pfn;
long sym_offset;
if (!image)
@@ -107,12 +187,40 @@
return VM_FAULT_SIGBUS;
if (sym_offset == image->sym_vvar_page) {
- return vmf_insert_pfn(vma, vmf->address,
- __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+ struct page *timens_page = find_timens_vvar_page(vma);
+
+ pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the sym_vvar_page offset and
+ * the real VVAR page is mapped with the sym_timens_page
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (timens_page) {
+ unsigned long addr;
+ vm_fault_t err;
+
+ /*
+ * Optimization: inside time namespace pre-fault
+ * VVAR page too. As on timens page there are only
+ * offsets for clocks on VVAR, it'll be faulted
+ * shortly by VDSO code.
+ */
+ addr = vmf->address + (image->sym_timens_page - sym_offset);
+ err = vmf_insert_pfn(vma, addr, pfn);
+ if (unlikely(err & VM_FAULT_ERROR))
+ return err;
+
+ pfn = page_to_pfn(timens_page);
+ }
+
+ return vmf_insert_pfn(vma, vmf->address, pfn);
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
pvclock_get_pvti_cpu0_va();
- if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
+ if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) {
return vmf_insert_pfn_prot(vma, vmf->address,
__pa(pvti) >> PAGE_SHIFT,
pgprot_decrypted(vma->vm_page_prot));
@@ -120,9 +228,17 @@
} else if (sym_offset == image->sym_hvclock_page) {
struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
- if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
+ if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
return vmf_insert_pfn(vma, vmf->address,
virt_to_phys(tsc_pg) >> PAGE_SHIFT);
+ } else if (sym_offset == image->sym_timens_page) {
+ struct page *timens_page = find_timens_vvar_page(vma);
+
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+
+ pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+ return vmf_insert_pfn(vma, vmf->address, pfn);
}
return VM_FAULT_SIGBUS;
@@ -136,6 +252,7 @@
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
+ .mremap = vvar_mremap,
};
/*
@@ -150,7 +267,7 @@
unsigned long text_start;
int ret = 0;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
addr = get_unmapped_area(NULL, addr,
@@ -193,7 +310,7 @@
}
up_fail:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
@@ -255,7 +372,7 @@
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
/*
* Check if we have already mapped vdso blob - fail to prevent
* abusing from userspace install_speciall_mapping, which may
@@ -266,11 +383,11 @@
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma_is_special_mapping(vma, &vdso_mapping) ||
vma_is_special_mapping(vma, &vvar_mapping)) {
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return -EEXIST;
}
}
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return map_vdso(image, addr);
}
@@ -329,6 +446,8 @@
static int __init init_vdso(void)
{
+ BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32);
+
init_vdso_image(&vdso_image_64);
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e7c596d..44c3310 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -184,7 +184,7 @@
*/
switch (vsyscall_nr) {
case 0:
- if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
+ if (!write_ok_or_segv(regs->di, sizeof(struct __kernel_old_timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone))) {
ret = -EFAULT;
goto check_fault;
@@ -194,7 +194,7 @@
break;
case 1:
- if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
+ if (!write_ok_or_segv(regs->di, sizeof(__kernel_old_time_t))) {
ret = -EFAULT;
goto check_fault;
}
@@ -222,7 +222,7 @@
*/
regs->orig_ax = syscall_nr;
regs->ax = -ENOSYS;
- tmp = secure_computing(NULL);
+ tmp = secure_computing();
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");