Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/um/.gitignore b/arch/um/.gitignore
index a73d3a1..6323e55 100644
--- a/arch/um/.gitignore
+++ b/arch/um/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
kernel/config.c
kernel/config.tmp
kernel/vmlinux.lds
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index c56d352..4b799fa 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -14,11 +14,12 @@
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_BUGVERBOSE
- select HAVE_COPY_THREAD_TLS
+ select NO_DMA
select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES
select GENERIC_CLOCKEVENTS
select HAVE_GCC_PLUGINS
+ select SET_FS
select TTY # Needed for line.c
config MMU
@@ -62,9 +63,12 @@
source "arch/$(HEADER_ARCH)/um/Kconfig"
+config MAY_HAVE_RUNTIME_DEPS
+ bool
+
config STATIC_LINK
bool "Force a static link"
- default n
+ depends on CC_CAN_LINK_STATIC_NO_RUNTIME_DEPS || !MAY_HAVE_RUNTIME_DEPS
help
This option gives you the ability to force a static link of UML.
Normally, UML is linked as a shared binary. This is inconvenient for
@@ -73,6 +77,9 @@
Additionally, this option enables using higher memory spaces (up to
2.75G) for UML.
+ NOTE: This option is incompatible with some networking features which
+ depend on features that require being dynamically loaded (like NSS).
+
config LD_SCRIPT_STATIC
bool
default y
@@ -154,7 +161,7 @@
It is possible to reduce the stack to 1 for 64BIT and 0 for 32BIT on
older (pre-2017) CPUs. It is not recommended on newer CPUs due to the
increase in the size of the state which needs to be saved when handling
- signals.
+ signals.
config MMAPPER
tristate "iomem emulation driver"
@@ -162,33 +169,17 @@
This driver allows a host file to be used as emulated IO memory inside
UML.
-config NO_DMA
- def_bool y
-
config PGTABLE_LEVELS
int
default 3 if 3_LEVEL_PGTABLES
default 2
-config SECCOMP
- def_bool y
- prompt "Enable seccomp to safely compute untrusted bytecode"
- ---help---
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y.
-
config UML_TIME_TRAVEL_SUPPORT
bool
prompt "Support time-travel mode (e.g. for test execution)"
+ # inf-cpu mode is incompatible with the benchmarking
+ depends on !RAID6_PQ_BENCHMARK
+ depends on !SMP
help
Enable this option to support time travel inside the UML instance.
diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug
index e4a0f12..1dfb295 100644
--- a/arch/um/Kconfig.debug
+++ b/arch/um/Kconfig.debug
@@ -31,7 +31,7 @@
config EARLY_PRINTK
bool "Early printk"
default y
- ---help---
+ help
Write kernel log output directly to stdout.
This is useful for kernel debugging when your machine crashes very
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 275f5ff..1cea46f 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -121,8 +121,7 @@
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
- $(call cc-option, -fno-stack-protector,) \
- $(call cc-option, -fno-stack-protector-all,)
+ -fno-stack-protector $(call cc-option, -fno-stack-protector-all)
# Options used by linker script
export LDS_START := $(START)
@@ -140,7 +139,7 @@
# When cleaning we don't include .config, so we don't include
# TT or skas makefiles and don't clean skas_ptregs.h.
CLEAN_FILES += linux x.i gmon.out
-MRPROPER_DIRS += arch/$(SUBARCH)/include/generated
+MRPROPER_FILES += arch/$(SUBARCH)/include/generated
archclean:
@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
index 73e98bb..fb51bd2 100644
--- a/arch/um/configs/i386_defconfig
+++ b/arch/um/configs/i386_defconfig
@@ -26,7 +26,7 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-CONFIG_IOSCHED_CFQ=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_SSL=y
CONFIG_NULL_CHAN=y
CONFIG_PORT_CHAN=y
diff --git a/arch/um/configs/kunit_defconfig b/arch/um/configs/kunit_defconfig
new file mode 100644
index 0000000..9235b7d
--- /dev/null
+++ b/arch/um/configs/kunit_defconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_KUNIT_TEST=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig
index 3281d76..477b873 100644
--- a/arch/um/configs/x86_64_defconfig
+++ b/arch/um/configs/x86_64_defconfig
@@ -24,7 +24,7 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-CONFIG_IOSCHED_CFQ=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_SSL=y
CONFIG_NULL_CHAN=y
CONFIG_PORT_CHAN=y
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 388096f..2e7b8e0 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -147,7 +147,7 @@
make use of UML networking.
config UML_NET_ETHERTAP
- bool "Ethertap transport"
+ bool "Ethertap transport (obsolete)"
depends on UML_NET
help
The Ethertap User-Mode Linux network transport allows a single
@@ -167,14 +167,13 @@
has examples of the UML command line to use to enable Ethertap
networking.
- If you'd like to set up an IP network with the host and/or the
- outside world, say Y to this, the Daemon Transport and/or the
- Slip Transport. You'll need at least one of them, but may choose
- more than one without conflict. If you don't need UML networking,
- say N.
+ NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+ migrate to UML_NET_VECTOR.
+
+ If unsure, say N.
config UML_NET_TUNTAP
- bool "TUN/TAP transport"
+ bool "TUN/TAP transport (obsolete)"
depends on UML_NET
help
The UML TUN/TAP network transport allows a UML instance to exchange
@@ -185,8 +184,13 @@
To use this transport, your host kernel must have support for TUN/TAP
devices, either built-in or as a module.
+ NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+ migrate to UML_NET_VECTOR.
+
+ If unsure, say N.
+
config UML_NET_SLIP
- bool "SLIP transport"
+ bool "SLIP transport (obsolete)"
depends on UML_NET
help
The slip User-Mode Linux network transport allows a running UML to
@@ -201,16 +205,13 @@
has examples of the UML command line to use to enable slip
networking, and details of a few quirks with it.
- The Ethertap Transport is preferred over slip because of its
- limitations. If you prefer slip, however, say Y here. Otherwise
- choose the Multicast transport (to network multiple UMLs on
- multiple hosts), Ethertap (to network with the host and the
- outside world), and/or the Daemon transport (to network multiple
- UMLs on a single host). You may choose more than one without
- conflict. If you don't need UML networking, say N.
+ NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+ migrate to UML_NET_VECTOR.
+
+ If unsure, say N.
config UML_NET_DAEMON
- bool "Daemon transport"
+ bool "Daemon transport (obsolete)"
depends on UML_NET
help
This User-Mode Linux network transport allows one or more running
@@ -225,17 +226,15 @@
has examples of the UML command line to use to enable Daemon
networking.
- If you'd like to set up a network with other UMLs on a single host,
- say Y. If you need a network between UMLs on multiple physical
- hosts, choose the Multicast Transport. To set up a network with
- the host and/or other IP machines, say Y to the Ethertap or Slip
- transports. You'll need at least one of them, but may choose
- more than one without conflict. If you don't need UML networking,
- say N.
+ NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+ migrate to UML_NET_VECTOR.
+
+ If unsure, say N.
config UML_NET_VECTOR
bool "Vector I/O high performance network devices"
depends on UML_NET
+ select MAY_HAVE_RUNTIME_DEPS
help
This User-Mode Linux network driver uses multi-message send
and receive functions. The host running the UML guest must have
@@ -245,8 +244,9 @@
drivers.
config UML_NET_VDE
- bool "VDE transport"
+ bool "VDE transport (obsolete)"
depends on UML_NET
+ select MAY_HAVE_RUNTIME_DEPS
help
This User-Mode Linux network transport allows one or more running
UMLs on a single host to communicate with each other and also
@@ -263,11 +263,13 @@
That site has a good overview of what VDE is and also examples
of the UML command line to use to enable VDE networking.
- If you need UML networking with VDE,
- say Y.
+ NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+ migrate to UML_NET_VECTOR.
+
+ If unsure, say N.
config UML_NET_MCAST
- bool "Multicast transport"
+ bool "Multicast transport (obsolete)"
depends on UML_NET
help
This Multicast User-Mode Linux network transport allows multiple
@@ -284,16 +286,15 @@
has examples of the UML command line to use to enable Multicast
networking, and notes about the security of this approach.
- If you need UMLs on multiple physical hosts to communicate as if
- they shared an Ethernet network, say Y. If you need to communicate
- with other IP machines, make sure you select one of the other
- transports (possibly in addition to Multicast; they're not
- exclusive). If you don't need to network UMLs say N to each of
- the transports.
+ NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+ migrate to UML_NET_VECTOR.
+
+ If unsure, say N.
config UML_NET_PCAP
- bool "pcap transport"
+ bool "pcap transport (obsolete)"
depends on UML_NET
+ select MAY_HAVE_RUNTIME_DEPS
help
The pcap transport makes a pcap packet stream on the host look
like an ethernet device inside UML. This is useful for making
@@ -304,11 +305,13 @@
<http://user-mode-linux.sourceforge.net/old/networking.html> That site
has examples of the UML command line to use to enable this option.
- If you intend to use UML as a network monitor for the host, say
- Y here. Otherwise, say N.
+ NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+ migrate to UML_NET_VECTOR.
+
+ If unsure, say N.
config UML_NET_SLIRP
- bool "SLiRP transport"
+ bool "SLiRP transport (obsolete)"
depends on UML_NET
help
The SLiRP User-Mode Linux network transport allows a running UML
@@ -328,9 +331,10 @@
that of a host behind a firewall that masquerades all network
connections passing through it (but is less secure).
- To use this you should first have slirp compiled somewhere
- accessible on the host, and have read its documentation. If you
- don't need UML networking, say N.
+ NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
+ migrate to UML_NET_VECTOR.
+
+ If unsure, say N.
Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index a290821..2a249f6 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -18,9 +18,9 @@
port-objs := port_kern.o port_user.o
harddog-objs := harddog_kern.o harddog_user.o
-LDFLAGS_pcap.o := -r $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
+LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
-LDFLAGS_vde.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
+LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o
diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h
index 72222bb..4e51b85 100644
--- a/arch/um/drivers/chan_user.h
+++ b/arch/um/drivers/chan_user.h
@@ -11,7 +11,7 @@
struct chan_opts {
void (*const announce)(char *dev_name, int dev);
char *xterm_title;
- const int raw;
+ int raw;
};
struct chan_ops {
diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
index 760c507..103adac 100644
--- a/arch/um/drivers/cow.h
+++ b/arch/um/drivers/cow.h
@@ -11,7 +11,7 @@
extern int file_reader(__u64 offset, char *buf, int len, void *arg);
extern int read_cow_header(int (*reader)(__u64, char *, int, void *),
void *arg, __u32 *version_out,
- char **backing_file_out, time_t *mtime_out,
+ char **backing_file_out, long long *mtime_out,
unsigned long long *size_out, int *sectorsize_out,
__u32 *align_out, int *bitmap_offset_out);
diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
index 74b0c26..29b4658 100644
--- a/arch/um/drivers/cow_user.c
+++ b/arch/um/drivers/cow_user.c
@@ -17,6 +17,7 @@
#define PATH_LEN_V1 256
+/* unsigned time_t works until year 2106 */
typedef __u32 time32_t;
struct cow_header_v1 {
@@ -197,7 +198,7 @@
int sectorsize, int alignment, unsigned long long *size)
{
struct cow_header_v3 *header;
- unsigned long modtime;
+ long long modtime;
int err;
err = cow_seek_file(fd, 0);
@@ -276,7 +277,7 @@
int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
__u32 *version_out, char **backing_file_out,
- time_t *mtime_out, unsigned long long *size_out,
+ long long *mtime_out, unsigned long long *size_out,
int *sectorsize_out, __u32 *align_out,
int *bitmap_offset_out)
{
@@ -363,7 +364,7 @@
/*
* this was used until Dec2005 - 64bits are needed to represent
- * 2038+. I.e. we can safely do this truncating cast.
+ * 2106+. I.e. we can safely do this truncating cast.
*
* Additionally, we must use be32toh() instead of be64toh(), since
* the program used to use the former (tested - I got mtime
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index 3695821..785baed 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -7,6 +7,7 @@
*/
#include <stdint.h>
+#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 000cb69..e6d4f43 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -165,6 +165,7 @@
.owner = THIS_MODULE,
.write = harddog_write,
.unlocked_ioctl = harddog_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = harddog_open,
.release = harddog_release,
.llseek = no_llseek,
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index bf75b1c..d35d3f3 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -298,6 +298,7 @@
.write = hostaudio_write,
.poll = hostaudio_poll,
.unlocked_ioctl = hostaudio_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.mmap = NULL,
.open = hostaudio_open,
.release = hostaudio_release,
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 4f2a4ac..14ad9f4 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -184,11 +184,6 @@
line_flush_buffer(tty);
}
-int line_put_char(struct tty_struct *tty, unsigned char ch)
-{
- return line_write(tty, &ch, sizeof(ch));
-}
-
int line_write(struct tty_struct *tty, const unsigned char *buf, int len)
{
struct line *line = tty->driver_data;
diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h
index a151ff5..01d21e7 100644
--- a/arch/um/drivers/line.h
+++ b/arch/um/drivers/line.h
@@ -66,7 +66,6 @@
char *init, char *name);
extern int line_write(struct tty_struct *tty, const unsigned char *buf,
int len);
-extern int line_put_char(struct tty_struct *tty, unsigned char ch);
extern void line_set_termios(struct tty_struct *tty, struct ktermios * old);
extern int line_chars_in_buffer(struct tty_struct *tty);
extern void line_flush_buffer(struct tty_struct *tty);
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 0117489..a2e680f 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -36,6 +36,8 @@
#include "mconsole_kern.h"
#include <os.h>
+static struct vfsmount *proc_mnt = NULL;
+
static int do_unlink_socket(struct notifier_block *notifier,
unsigned long what, void *data)
{
@@ -123,7 +125,7 @@
void mconsole_proc(struct mc_request *req)
{
- struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
+ struct vfsmount *mnt = proc_mnt;
char *buf;
int len;
struct file *file;
@@ -134,6 +136,10 @@
ptr += strlen("proc");
ptr = skip_spaces(ptr);
+ if (!mnt) {
+ mconsole_reply(req, "Proc not available", 1, 0);
+ goto out;
+ }
file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0);
if (IS_ERR(file)) {
mconsole_reply(req, "Failed to open file", 1, 0);
@@ -642,7 +648,7 @@
{
struct task_struct *task = arg;
- show_stack(task, NULL);
+ show_stack(task, NULL, KERN_INFO);
}
/*
@@ -683,6 +689,24 @@
with_console(req, stack_proc, to);
}
+static int __init mount_proc(void)
+{
+ struct file_system_type *proc_fs_type;
+ struct vfsmount *mnt;
+
+ proc_fs_type = get_fs_type("proc");
+ if (!proc_fs_type)
+ return -ENODEV;
+
+ mnt = kern_mount(proc_fs_type);
+ put_filesystem(proc_fs_type);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+ proc_mnt = mnt;
+ return 0;
+}
+
/*
* Changed by mconsole_setup, which is __setup, and called before SMP is
* active.
@@ -696,6 +720,8 @@
int err;
char file[UNIX_PATH_MAX];
+ mount_proc();
+
if (umid_file_name("mconsole", file, sizeof(file)))
return -1;
snprintf(mconsole_socket_name, sizeof(file), "%s", file);
@@ -752,10 +778,9 @@
return count;
}
-static const struct file_operations mconsole_proc_fops = {
- .owner = THIS_MODULE,
- .write = mconsole_proc_write,
- .llseek = noop_llseek,
+static const struct proc_ops mconsole_proc_ops = {
+ .proc_write = mconsole_proc_write,
+ .proc_lseek = noop_llseek,
};
static int create_proc_mconsole(void)
@@ -765,7 +790,7 @@
if (notify_socket == NULL)
return 0;
- ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_fops);
+ ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_ops);
if (ent == NULL) {
printk(KERN_INFO "create_proc_mconsole : proc_create failed\n");
return 0;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 327b728..1802cf4 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -247,7 +247,7 @@
return;
}
-static void uml_net_tx_timeout(struct net_device *dev)
+static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
netif_trans_update(dev);
netif_wake_queue(dev);
@@ -266,7 +266,6 @@
struct ethtool_drvinfo *info)
{
strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
- strlcpy(info->version, "42", sizeof(info->version));
}
static const struct ethtool_ops uml_net_ethtool_ops = {
@@ -275,17 +274,6 @@
.get_ts_info = ethtool_op_get_ts_info,
};
-static void uml_net_user_timer_expire(struct timer_list *t)
-{
-#ifdef undef
- struct uml_net_private *lp = from_timer(lp, t, tl);
- struct connection *conn = &lp->user;
-
- dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn);
- do_connect(conn);
-#endif
-}
-
void uml_net_setup_etheraddr(struct net_device *dev, char *str)
{
unsigned char *addr = dev->dev_addr;
@@ -456,7 +444,6 @@
.add_address = transport->user->add_address,
.delete_address = transport->user->delete_address });
- timer_setup(&lp->tl, uml_net_user_timer_expire, 0);
spin_lock_init(&lp->lock);
memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac));
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
index bbd2063..52ddda3 100644
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
@@ -32,7 +32,7 @@
return 0;
}
-static int pcap_open(void *data)
+static int pcap_user_open(void *data)
{
struct pcap_data *pri = data;
__u32 netmask;
@@ -44,14 +44,14 @@
if (pri->filter != NULL) {
err = dev_netmask(pri->dev, &netmask);
if (err < 0) {
- printk(UM_KERN_ERR "pcap_open : dev_netmask failed\n");
+ printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n");
return -EIO;
}
pri->compiled = uml_kmalloc(sizeof(struct bpf_program),
UM_GFP_KERNEL);
if (pri->compiled == NULL) {
- printk(UM_KERN_ERR "pcap_open : kmalloc failed\n");
+ printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n");
return -ENOMEM;
}
@@ -59,14 +59,14 @@
(struct bpf_program *) pri->compiled,
pri->filter, pri->optimize, netmask);
if (err < 0) {
- printk(UM_KERN_ERR "pcap_open : pcap_compile failed - "
+ printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - "
"'%s'\n", pcap_geterr(pri->pcap));
goto out;
}
err = pcap_setfilter(pri->pcap, pri->compiled);
if (err < 0) {
- printk(UM_KERN_ERR "pcap_open : pcap_setfilter "
+ printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter "
"failed - '%s'\n", pcap_geterr(pri->pcap));
goto out;
}
@@ -127,7 +127,7 @@
const struct net_user_info pcap_user_info = {
.init = pcap_user_init,
- .open = pcap_open,
+ .open = pcap_user_open,
.close = NULL,
.remove = pcap_remove,
.add_address = NULL,
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 1d5d305..e4b9b2c 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -11,6 +11,7 @@
#include <linux/fs.h>
#include <linux/interrupt.h>
#include <linux/miscdevice.h>
+#include <linux/hw_random.h>
#include <linux/delay.h>
#include <linux/uaccess.h>
#include <init.h>
@@ -18,100 +19,45 @@
#include <os.h>
/*
- * core module and version information
+ * core module information
*/
-#define RNG_VERSION "1.0.0"
#define RNG_MODULE_NAME "hw_random"
-#define RNG_MISCDEV_MINOR 183 /* official */
-
/* Changed at init time, in the non-modular case, and at module load
* time, in the module case. Presumably, the module subsystem
* protects against a module being loaded twice at the same time.
*/
static int random_fd = -1;
-static DECLARE_WAIT_QUEUE_HEAD(host_read_wait);
+static struct hwrng hwrng = { 0, };
+static DECLARE_COMPLETION(have_data);
-static int rng_dev_open (struct inode *inode, struct file *filp)
+static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block)
{
- /* enforce read-only access to this chrdev */
- if ((filp->f_mode & FMODE_READ) == 0)
- return -EINVAL;
- if ((filp->f_mode & FMODE_WRITE) != 0)
- return -EINVAL;
+ int ret;
- return 0;
-}
-
-static atomic_t host_sleep_count = ATOMIC_INIT(0);
-
-static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
- loff_t *offp)
-{
- u32 data;
- int n, ret = 0, have_data;
-
- while (size) {
- n = os_read_file(random_fd, &data, sizeof(data));
- if (n > 0) {
- have_data = n;
- while (have_data && size) {
- if (put_user((u8) data, buf++)) {
- ret = ret ? : -EFAULT;
- break;
- }
- size--;
- ret++;
- have_data--;
- data >>= 8;
- }
- }
- else if (n == -EAGAIN) {
- DECLARE_WAITQUEUE(wait, current);
-
- if (filp->f_flags & O_NONBLOCK)
- return ret ? : -EAGAIN;
-
- atomic_inc(&host_sleep_count);
+ for (;;) {
+ ret = os_read_file(random_fd, buf, max);
+ if (block && ret == -EAGAIN) {
add_sigio_fd(random_fd);
- add_wait_queue(&host_read_wait, &wait);
- set_current_state(TASK_INTERRUPTIBLE);
+ ret = wait_for_completion_killable(&have_data);
- schedule();
- remove_wait_queue(&host_read_wait, &wait);
+ ignore_sigio_fd(random_fd);
+ deactivate_fd(random_fd, RANDOM_IRQ);
- if (atomic_dec_and_test(&host_sleep_count)) {
- ignore_sigio_fd(random_fd);
- deactivate_fd(random_fd, RANDOM_IRQ);
- }
+ if (ret < 0)
+ break;
+ } else {
+ break;
}
- else
- return n;
-
- if (signal_pending (current))
- return ret ? : -ERESTARTSYS;
}
- return ret;
+
+ return ret != -EAGAIN ? ret : 0;
}
-static const struct file_operations rng_chrdev_ops = {
- .owner = THIS_MODULE,
- .open = rng_dev_open,
- .read = rng_dev_read,
- .llseek = noop_llseek,
-};
-
-/* rng_init shouldn't be called more than once at boot time */
-static struct miscdevice rng_miscdev = {
- RNG_MISCDEV_MINOR,
- RNG_MODULE_NAME,
- &rng_chrdev_ops,
-};
-
static irqreturn_t random_interrupt(int irq, void *data)
{
- wake_up(&host_read_wait);
+ complete(&have_data);
return IRQ_HANDLED;
}
@@ -128,18 +74,19 @@
goto out;
random_fd = err;
-
err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt,
0, "random", NULL);
if (err)
goto err_out_cleanup_hw;
sigio_broken(random_fd, 1);
+ hwrng.name = RNG_MODULE_NAME;
+ hwrng.read = rng_dev_read;
+ hwrng.quality = 1024;
- err = misc_register (&rng_miscdev);
+ err = hwrng_register(&hwrng);
if (err) {
- printk (KERN_ERR RNG_MODULE_NAME ": misc device register "
- "failed\n");
+ pr_err(RNG_MODULE_NAME " registering failed (%d)\n", err);
goto err_out_cleanup_hw;
}
out:
@@ -163,8 +110,8 @@
static void __exit rng_cleanup(void)
{
+ hwrng_unregister(&hwrng);
os_close_file(random_fd);
- misc_deregister (&rng_miscdev);
}
module_init (rng_init);
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index 8d736eb..7334019 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -9,7 +9,7 @@
#include <errno.h>
#include <fcntl.h>
#include <string.h>
-#include <sys/termios.h>
+#include <termios.h>
#include <sys/wait.h>
#include <net_user.h>
#include <os.h>
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index b213201..6476b28 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -95,7 +95,6 @@
.open = line_open,
.close = line_close,
.write = line_write,
- .put_char = line_put_char,
.write_room = line_write_room,
.chars_in_buffer = line_chars_in_buffer,
.flush_buffer = line_flush_buffer,
@@ -196,3 +195,11 @@
__setup("ssl", ssl_chan_setup);
__channel_help(ssl_chan_setup, "ssl");
+
+static int ssl_non_raw_setup(char *str)
+{
+ opts.raw = 0;
+ return 1;
+}
+__setup("ssl-non-raw", ssl_non_raw_setup);
+__channel_help(ssl_non_raw_setup, "set serial lines to non-raw mode");
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 0021d7f..37b1279 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -102,7 +102,6 @@
.install = con_install,
.close = line_close,
.write = line_write,
- .put_char = line_put_char,
.write_room = line_write_room,
.chars_in_buffer = line_chars_in_buffer,
.flush_buffer = line_flush_buffer,
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 4e59ab8..b12c1b0 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -120,6 +120,7 @@
.open = ubd_open,
.release = ubd_release,
.ioctl = ubd_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
.getgeo = ubd_getgeo,
};
@@ -563,7 +564,7 @@
__u32 version;
__u32 align;
char *backing_file;
- time_t mtime;
+ time64_t mtime;
unsigned long long size;
int sector_size;
int bitmap_offset;
@@ -602,9 +603,9 @@
return 0;
}
-static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
+static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
{
- unsigned long modtime;
+ time64_t modtime;
unsigned long long actual;
int err;
@@ -630,7 +631,7 @@
return -EINVAL;
}
if (modtime != mtime) {
- printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
+ printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
"backing file\n", mtime, modtime);
return -EINVAL;
}
@@ -673,7 +674,7 @@
unsigned long *bitmap_len_out, int *data_offset_out,
int *create_cow_out)
{
- time_t mtime;
+ time64_t mtime;
unsigned long long size;
__u32 version, align;
char *backing_file;
@@ -1625,11 +1626,11 @@
&io_remainder_size,
UBD_REQ_BUFFER_SIZE
);
- if (n < 0) {
- if (n == -EAGAIN) {
+ if (n <= 0) {
+ if (n == -EAGAIN)
ubd_read_poll(-1);
- continue;
- }
+
+ continue;
}
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 769ffbd..555203e 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2017 - 2019 Cambridge Greys Limited
* Copyright (C) 2011 - 2014 Cisco Systems Inc
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
@@ -21,6 +21,9 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/firmware.h>
+#include <linux/fs.h>
+#include <uapi/linux/filter.h>
#include <init.h>
#include <irq_kern.h>
#include <irq_user.h>
@@ -43,7 +46,6 @@
#define DRIVER_NAME "uml-vector"
-#define DRIVER_VERSION "01"
struct vector_cmd_line_arg {
struct list_head list;
int unit;
@@ -128,6 +130,23 @@
return ETH_MAX_PACKET;
}
+static char *get_bpf_file(struct arglist *def)
+{
+ return uml_vector_fetch_arg(def, "bpffile");
+}
+
+static bool get_bpf_flash(struct arglist *def)
+{
+ char *allow = uml_vector_fetch_arg(def, "bpfflash");
+ long result;
+
+ if (allow != NULL) {
+ if (kstrtoul(allow, 10, &result) == 0)
+ return (allow > 0);
+ }
+ return false;
+}
+
static int get_depth(struct arglist *def)
{
char *mtu = uml_vector_fetch_arg(def, "depth");
@@ -176,6 +195,10 @@
int vec_rx = VECTOR_RX;
int vec_tx = VECTOR_TX;
long parsed;
+ int result = 0;
+
+ if (transport == NULL)
+ return -EINVAL;
if (vector != NULL) {
if (kstrtoul(vector, 10, &parsed) == 0) {
@@ -186,14 +209,16 @@
}
}
+ if (get_bpf_flash(def))
+ result = VECTOR_BPF_FLASH;
if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
- return 0;
+ return result;
if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
- return (vec_rx | VECTOR_BPF);
+ return (result | vec_rx | VECTOR_BPF);
if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
- return (vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
- return (vec_rx | vec_tx);
+ return (result | vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
+ return (result | vec_rx | vec_tx);
}
@@ -1139,6 +1164,8 @@
}
tasklet_kill(&vp->tx_poll);
if (vp->fds->rx_fd > 0) {
+ if (vp->bpf)
+ uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
os_close_file(vp->fds->rx_fd);
vp->fds->rx_fd = -1;
}
@@ -1146,7 +1173,10 @@
os_close_file(vp->fds->tx_fd);
vp->fds->tx_fd = -1;
}
+ if (vp->bpf != NULL)
+ kfree(vp->bpf->filter);
kfree(vp->bpf);
+ vp->bpf = NULL;
kfree(vp->fds->remote_addr);
kfree(vp->transport_data);
kfree(vp->header_rxbuffer);
@@ -1181,6 +1211,7 @@
netif_start_queue(vp->dev);
netif_wake_queue(vp->dev);
}
+
static int vector_net_open(struct net_device *dev)
{
struct vector_private *vp = netdev_priv(dev);
@@ -1196,6 +1227,8 @@
vp->opened = true;
spin_unlock_irqrestore(&vp->lock, flags);
+ vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));
+
vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
if (vp->fds == NULL)
@@ -1267,8 +1300,11 @@
if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
vp->options |= VECTOR_BPF;
}
- if ((vp->options & VECTOR_BPF) != 0)
- vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
+ if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL))
+ vp->bpf = uml_vector_default_bpf(dev->dev_addr);
+
+ if (vp->bpf != NULL)
+ uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
netif_start_queue(dev);
@@ -1298,7 +1334,7 @@
return;
}
-static void vector_net_tx_timeout(struct net_device *dev)
+static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct vector_private *vp = netdev_priv(dev);
@@ -1344,7 +1380,65 @@
struct ethtool_drvinfo *info)
{
strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
- strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
+}
+
+static int vector_net_load_bpf_flash(struct net_device *dev,
+ struct ethtool_flash *efl)
+{
+ struct vector_private *vp = netdev_priv(dev);
+ struct vector_device *vdevice;
+ const struct firmware *fw;
+ int result = 0;
+
+ if (!(vp->options & VECTOR_BPF_FLASH)) {
+ netdev_err(dev, "loading firmware not permitted: %s\n", efl->data);
+ return -1;
+ }
+
+ spin_lock(&vp->lock);
+
+ if (vp->bpf != NULL) {
+ if (vp->opened)
+ uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
+ kfree(vp->bpf->filter);
+ vp->bpf->filter = NULL;
+ } else {
+ vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC);
+ if (vp->bpf == NULL) {
+ netdev_err(dev, "failed to allocate memory for firmware\n");
+ goto flash_fail;
+ }
+ }
+
+ vdevice = find_device(vp->unit);
+
+ if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
+ goto flash_fail;
+
+ vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_ATOMIC);
+ if (!vp->bpf->filter)
+ goto free_buffer;
+
+ vp->bpf->len = fw->size / sizeof(struct sock_filter);
+ release_firmware(fw);
+
+ if (vp->opened)
+ result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
+
+ spin_unlock(&vp->lock);
+
+ return result;
+
+free_buffer:
+ release_firmware(fw);
+
+flash_fail:
+ spin_unlock(&vp->lock);
+ if (vp->bpf != NULL)
+ kfree(vp->bpf->filter);
+ kfree(vp->bpf);
+ vp->bpf = NULL;
+ return -1;
}
static void vector_get_ringparam(struct net_device *netdev,
@@ -1415,6 +1509,7 @@
}
static const struct ethtool_ops vector_net_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_TX_USECS,
.get_drvinfo = vector_net_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ts_info = ethtool_op_get_ts_info,
@@ -1424,6 +1519,7 @@
.get_ethtool_stats = vector_get_ethtool_stats,
.get_coalesce = vector_get_coalesce,
.set_coalesce = vector_set_coalesce,
+ .flash_device = vector_net_load_bpf_flash,
};
@@ -1528,8 +1624,9 @@
.in_write_poll = false,
.coalesce = 2,
.req_size = get_req_size(def),
- .in_error = false
- });
+ .in_error = false,
+ .bpf = NULL
+ });
dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
index 4d292e6..8fff93a 100644
--- a/arch/um/drivers/vector_kern.h
+++ b/arch/um/drivers/vector_kern.h
@@ -29,10 +29,13 @@
#define VECTOR_TX (1 << 1)
#define VECTOR_BPF (1 << 2)
#define VECTOR_QDISC_BYPASS (1 << 3)
+#define VECTOR_BPF_FLASH (1 << 4)
#define ETH_MAX_PACKET 1500
#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
+#define MAX_FILTER_PROG (2 << 16)
+
struct vector_queue {
struct mmsghdr *mmsg_vector;
void **skbuff_vector;
@@ -118,12 +121,15 @@
bool in_write_poll;
bool in_error;
+ /* guest allowed to use ethtool flash to load bpf */
+ bool bpf_via_flash;
+
/* ethtool stats */
struct vector_estats estats;
- void *bpf;
+ struct sock_fprog *bpf;
- char user[0];
+ char user[];
};
extern int build_transport_data(struct vector_private *vp);
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index e2c969b..bae5322 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -18,9 +18,7 @@
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/un.h>
-#include <net/ethernet.h>
#include <netinet/ip.h>
-#include <netinet/ether.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <sys/wait.h>
@@ -29,6 +27,7 @@
#include <netdb.h>
#include <stdlib.h>
#include <os.h>
+#include <limits.h>
#include <um_malloc.h>
#include "vector_user.h"
@@ -38,18 +37,26 @@
#define ID_MAX 2
#define TOKEN_IFNAME "ifname"
+#define TOKEN_SCRIPT "ifup"
#define TRANS_RAW "raw"
#define TRANS_RAW_LEN strlen(TRANS_RAW)
+#define TRANS_FD "fd"
+#define TRANS_FD_LEN strlen(TRANS_FD)
+
#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
#define UNIX_BIND_FAIL "unix_open : could not bind socket err=%i"
-#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
+#define BPF_ATTACH_FAIL "Failed to attach filter size %d prog %px to %d, err %d\n"
+#define BPF_DETACH_FAIL "Failed to detach filter size %d prog %px to %d, err %d\n"
#define MAX_UN_LEN 107
+static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+static const char *template = "tapXXXXXX";
+
/* This is very ugly and brute force lookup, but it is done
* only once at initialization so not worth doing hashes or
* anything more intelligent
@@ -186,16 +193,21 @@
return err;
}
+
static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
{
- int fd = -1;
+ int fd = -1, i;
char *iface;
struct vector_fds *result = NULL;
+ bool dynamic = false;
+ char dynamic_ifname[IFNAMSIZ];
+ char *argv[] = {NULL, NULL, NULL, NULL};
iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
if (iface == NULL) {
- printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
- goto tap_cleanup;
+ dynamic = true;
+ iface = dynamic_ifname;
+ srand(getpid());
}
result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
@@ -209,19 +221,34 @@
result->remote_addr_size = 0;
/* TAP */
+ do {
+ if (dynamic) {
+ strcpy(iface, template);
+ for (i = 0; i < strlen(iface); i++) {
+ if (iface[i] == 'X') {
+ iface[i] = padchar[rand() % strlen(padchar)];
+ }
+ }
+ }
+ fd = create_tap_fd(iface);
+ if ((fd < 0) && (!dynamic)) {
+ printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n");
+ goto tap_cleanup;
+ }
+ result->tx_fd = fd;
+ result->rx_fd = fd;
+ } while (fd < 0);
- fd = create_tap_fd(iface);
- if (fd < 0) {
- printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n");
- goto tap_cleanup;
+ argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+ if (argv[0]) {
+ argv[1] = iface;
+ run_helper(NULL, NULL, argv);
}
- result->tx_fd = fd;
- result->rx_fd = fd;
+
return result;
tap_cleanup:
printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd);
- if (result != NULL)
- kfree(result);
+ kfree(result);
return NULL;
}
@@ -229,6 +256,7 @@
{
char *iface;
struct vector_fds *result = NULL;
+ char *argv[] = {NULL, NULL, NULL, NULL};
iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
if (iface == NULL) {
@@ -262,11 +290,16 @@
"uml_tap: failed to create paired raw socket: %i\n", result->rx_fd);
goto hybrid_cleanup;
}
+
+ argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+ if (argv[0]) {
+ argv[1] = iface;
+ run_helper(NULL, NULL, argv);
+ }
return result;
hybrid_cleanup:
printk(UM_KERN_ERR "user_init_hybrid: init failed");
- if (result != NULL)
- kfree(result);
+ kfree(result);
return NULL;
}
@@ -329,7 +362,7 @@
}
switch (id) {
case ID_BESS:
- if (connect(fd, remote_addr, sizeof(struct sockaddr_un)) < 0) {
+ if (connect(fd, (const struct sockaddr *) remote_addr, sizeof(struct sockaddr_un)) < 0) {
printk(UM_KERN_ERR "bess open:cannot connect to %s %i", remote_addr->sun_path, -errno);
goto unix_cleanup;
}
@@ -343,10 +376,60 @@
unix_cleanup:
if (fd >= 0)
os_close_file(fd);
- if (remote_addr != NULL)
- kfree(remote_addr);
- if (result != NULL)
- kfree(result);
+ kfree(remote_addr);
+ kfree(result);
+ return NULL;
+}
+
+static int strtofd(const char *nptr)
+{
+ long fd;
+ char *endptr;
+
+ if (nptr == NULL)
+ return -1;
+
+ errno = 0;
+ fd = strtol(nptr, &endptr, 10);
+ if (nptr == endptr ||
+ errno != 0 ||
+ *endptr != '\0' ||
+ fd < 0 ||
+ fd > INT_MAX) {
+ return -1;
+ }
+ return fd;
+}
+
+static struct vector_fds *user_init_fd_fds(struct arglist *ifspec)
+{
+ int fd = -1;
+ char *fdarg = NULL;
+ struct vector_fds *result = NULL;
+
+ fdarg = uml_vector_fetch_arg(ifspec, "fd");
+ fd = strtofd(fdarg);
+ if (fd == -1) {
+ printk(UM_KERN_ERR "fd open: bad or missing fd argument");
+ goto fd_cleanup;
+ }
+
+ result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+ if (result == NULL) {
+ printk(UM_KERN_ERR "fd open: allocation failed");
+ goto fd_cleanup;
+ }
+
+ result->rx_fd = fd;
+ result->tx_fd = fd;
+ result->remote_addr_size = 0;
+ result->remote_addr = NULL;
+ return result;
+
+fd_cleanup:
+ if (fd >= 0)
+ os_close_file(fd);
+ kfree(result);
return NULL;
}
@@ -356,6 +439,7 @@
int err = -ENOMEM;
char *iface;
struct vector_fds *result = NULL;
+ char *argv[] = {NULL, NULL, NULL, NULL};
iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
if (iface == NULL)
@@ -378,11 +462,15 @@
result->remote_addr = NULL;
result->remote_addr_size = 0;
}
+ argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+ if (argv[0]) {
+ argv[1] = iface;
+ run_helper(NULL, NULL, argv);
+ }
return result;
raw_cleanup:
printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
- if (result != NULL)
- kfree(result);
+ kfree(result);
return NULL;
}
@@ -582,6 +670,8 @@
return user_init_socket_fds(parsed, ID_L2TPV3);
if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0)
return user_init_unix_fds(parsed, ID_BESS);
+ if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0)
+ return user_init_fd_fds(parsed);
return NULL;
}
@@ -660,31 +750,44 @@
else
return -errno;
}
-int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len)
+int uml_vector_attach_bpf(int fd, void *bpf)
{
- int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len);
+ struct sock_fprog *prog = bpf;
+
+ int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, sizeof(struct sock_fprog));
if (err < 0)
- printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno);
+ printk(KERN_ERR BPF_ATTACH_FAIL, prog->len, prog->filter, fd, -errno);
return err;
}
-#define DEFAULT_BPF_LEN 6
+int uml_vector_detach_bpf(int fd, void *bpf)
+{
+ struct sock_fprog *prog = bpf;
-void *uml_vector_default_bpf(int fd, void *mac)
+ int err = setsockopt(fd, SOL_SOCKET, SO_DETACH_FILTER, bpf, sizeof(struct sock_fprog));
+ if (err < 0)
+ printk(KERN_ERR BPF_DETACH_FAIL, prog->len, prog->filter, fd, -errno);
+ return err;
+}
+void *uml_vector_default_bpf(void *mac)
{
struct sock_filter *bpf;
uint32_t *mac1 = (uint32_t *)(mac + 2);
uint16_t *mac2 = (uint16_t *) mac;
- struct sock_fprog bpf_prog = {
- .len = 6,
- .filter = NULL,
- };
+ struct sock_fprog *bpf_prog;
+ bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL);
+ if (bpf_prog) {
+ bpf_prog->len = DEFAULT_BPF_LEN;
+ bpf_prog->filter = NULL;
+ } else {
+ return NULL;
+ }
bpf = uml_kmalloc(
sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
- if (bpf != NULL) {
- bpf_prog.filter = bpf;
+ if (bpf) {
+ bpf_prog->filter = bpf;
/* ld [8] */
bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
/* jeq #0xMAC[2-6] jt 2 jf 5*/
@@ -697,12 +800,58 @@
bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
/* ret #0x40000 */
bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
- if (uml_vector_attach_bpf(
- fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) {
- kfree(bpf);
- bpf = NULL;
- }
+ } else {
+ kfree(bpf_prog);
+ bpf_prog = NULL;
}
- return bpf;
+ return bpf_prog;
}
+/* Note - this function requires a valid mac being passed as an arg */
+
+void *uml_vector_user_bpf(char *filename)
+{
+ struct sock_filter *bpf;
+ struct sock_fprog *bpf_prog;
+ struct stat statbuf;
+ int res, ffd = -1;
+
+ if (filename == NULL)
+ return NULL;
+
+ if (stat(filename, &statbuf) < 0) {
+ printk(KERN_ERR "Error %d reading bpf file", -errno);
+ return false;
+ }
+ bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL);
+ if (bpf_prog == NULL) {
+ printk(KERN_ERR "Failed to allocate bpf prog buffer");
+ return NULL;
+ }
+ bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter);
+ bpf_prog->filter = NULL;
+ ffd = os_open_file(filename, of_read(OPENFLAGS()), 0);
+ if (ffd < 0) {
+ printk(KERN_ERR "Error %d opening bpf file", -errno);
+ goto bpf_failed;
+ }
+ bpf = uml_kmalloc(statbuf.st_size, UM_GFP_KERNEL);
+ if (bpf == NULL) {
+ printk(KERN_ERR "Failed to allocate bpf buffer");
+ goto bpf_failed;
+ }
+ bpf_prog->filter = bpf;
+ res = os_read_file(ffd, bpf, statbuf.st_size);
+ if (res < statbuf.st_size) {
+ printk(KERN_ERR "Failed to read bpf program %s, error %d", filename, res);
+ kfree(bpf);
+ goto bpf_failed;
+ }
+ os_close_file(ffd);
+ return bpf_prog;
+bpf_failed:
+ if (ffd > 0)
+ os_close_file(ffd);
+ kfree(bpf_prog);
+ return NULL;
+}
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
index 649ec25..d29d5fd 100644
--- a/arch/um/drivers/vector_user.h
+++ b/arch/um/drivers/vector_user.h
@@ -17,7 +17,7 @@
#define TRANS_TAP_LEN strlen(TRANS_TAP)
#define TRANS_GRE "gre"
-#define TRANS_GRE_LEN strlen(TRANS_RAW)
+#define TRANS_GRE_LEN strlen(TRANS_GRE)
#define TRANS_L2TPV3 "l2tpv3"
#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
@@ -28,6 +28,8 @@
#define TRANS_BESS "bess"
#define TRANS_BESS_LEN strlen(TRANS_BESS)
+#define DEFAULT_BPF_LEN 6
+
#ifndef IPPROTO_GRE
#define IPPROTO_GRE 0x2F
#endif
@@ -95,8 +97,10 @@
unsigned int vlen,
unsigned int flags
);
-extern void *uml_vector_default_bpf(int fd, void *mac);
-extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
+extern void *uml_vector_default_bpf(void *mac);
+extern void *uml_vector_user_bpf(char *filename);
+extern int uml_vector_attach_bpf(int fd, void *bpf);
+extern int uml_vector_detach_bpf(int fd, void *bpf);
extern bool uml_raw_enable_qdisc_bypass(int fd);
extern bool uml_raw_enable_vnet_headers(int fd);
extern bool uml_tap_enable_vnet_headers(int fd);
diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
index 45ff5ea..6f147cd 100644
--- a/arch/um/drivers/vhost_user.h
+++ b/arch/um/drivers/vhost_user.h
@@ -10,9 +10,10 @@
/* Feature bits */
#define VHOST_USER_F_PROTOCOL_FEATURES 30
/* Protocol feature bits */
-#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
-#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
-#define VHOST_USER_PROTOCOL_F_CONFIG 9
+#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
+#define VHOST_USER_PROTOCOL_F_CONFIG 9
+#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
/* Vring state index masks */
#define VHOST_USER_VRING_INDEX_MASK 0xff
#define VHOST_USER_VRING_POLL_MASK BIT(8)
@@ -24,7 +25,8 @@
/* Supported protocol features */
#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
- BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))
+ BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \
+ BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS))
enum vhost_user_request {
VHOST_USER_GET_FEATURES = 1,
@@ -52,12 +54,14 @@
VHOST_USER_SET_VRING_ENDIAN = 23,
VHOST_USER_GET_CONFIG = 24,
VHOST_USER_SET_CONFIG = 25,
+ VHOST_USER_VRING_KICK = 35,
};
enum vhost_user_slave_request {
VHOST_USER_SLAVE_IOTLB_MSG = 1,
VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+ VHOST_USER_SLAVE_VRING_CALL = 4,
};
struct vhost_user_header {
@@ -74,7 +78,7 @@
u32 offset;
u32 size;
u32 flags;
- u8 payload[0]; /* Variable length */
+ u8 payload[]; /* Variable length */
} __packed;
struct vhost_user_vring_state {
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index 18618af..d5d7681 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -26,6 +26,7 @@
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
+#include <linux/time-internal.h>
#include <shared/as-layout.h>
#include <irq_kern.h>
#include <init.h>
@@ -42,24 +43,38 @@
#define to_virtio_uml_device(_vdev) \
container_of(_vdev, struct virtio_uml_device, vdev)
+struct virtio_uml_platform_data {
+ u32 virtio_device_id;
+ const char *socket_path;
+ struct work_struct conn_broken_wk;
+ struct platform_device *pdev;
+};
+
struct virtio_uml_device {
struct virtio_device vdev;
struct platform_device *pdev;
+ spinlock_t sock_lock;
int sock, req_fd;
u64 features;
u64 protocol_features;
u8 status;
+ u8 registered:1;
};
struct virtio_uml_vq_info {
int kick_fd, call_fd;
char name[32];
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+ struct virtqueue *vq;
+ vq_callback_t *callback;
+ struct time_travel_event defer;
+#endif
};
extern unsigned long long physmem_size, highmem;
-#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, __VA_ARGS__)
+#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
/* Vhost-user protocol */
@@ -107,12 +122,38 @@
return full_read(fd, msg, sizeof(msg->header), true);
}
-static int vhost_user_recv(int fd, struct vhost_user_msg *msg,
- size_t max_payload_size)
+static int vhost_user_recv(struct virtio_uml_device *vu_dev,
+ int fd, struct vhost_user_msg *msg,
+ size_t max_payload_size, bool wait)
{
size_t size;
- int rc = vhost_user_recv_header(fd, msg);
+ int rc;
+ /*
+ * In virtio time-travel mode, we're handling all the vhost-user
+ * FDs by polling them whenever appropriate. However, we may get
+ * into a situation where we're sending out an interrupt message
+ * to a device (e.g. a net device) and need to handle a simulation
+ * time message while doing so, e.g. one that tells us to update
+ * our idea of how long we can run without scheduling.
+ *
+ * Thus, we need to not just read() from the given fd, but need
+ * to also handle messages for the simulation time - this function
+ * does that for us while waiting for the given fd to be readable.
+ */
+ if (wait)
+ time_travel_wait_readable(fd);
+
+ rc = vhost_user_recv_header(fd, msg);
+
+ if (rc == -ECONNRESET && vu_dev->registered) {
+ struct virtio_uml_platform_data *pdata;
+
+ pdata = vu_dev->pdev->dev.platform_data;
+
+ virtio_break_device(&vu_dev->vdev);
+ schedule_work(&pdata->conn_broken_wk);
+ }
if (rc)
return rc;
size = msg->header.size;
@@ -125,7 +166,8 @@
struct vhost_user_msg *msg,
size_t max_payload_size)
{
- int rc = vhost_user_recv(vu_dev->sock, msg, max_payload_size);
+ int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
+ max_payload_size, true);
if (rc)
return rc;
@@ -155,7 +197,8 @@
struct vhost_user_msg *msg,
size_t max_payload_size)
{
- int rc = vhost_user_recv(vu_dev->req_fd, msg, max_payload_size);
+ int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
+ max_payload_size, false);
if (rc)
return rc;
@@ -172,6 +215,7 @@
int *fds, size_t num_fds)
{
size_t size = sizeof(msg->header) + msg->header.size;
+ unsigned long flags;
bool request_ack;
int rc;
@@ -190,24 +234,28 @@
if (request_ack)
msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
+ spin_lock_irqsave(&vu_dev->sock_lock, flags);
rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
if (rc < 0)
- return rc;
+ goto out;
if (request_ack) {
uint64_t status;
rc = vhost_user_recv_u64(vu_dev, &status);
if (rc)
- return rc;
+ goto out;
if (status) {
vu_err(vu_dev, "slave reports error: %llu\n", status);
- return -EIO;
+ rc = -EIO;
+ goto out;
}
}
- return 0;
+out:
+ spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
+ return rc;
}
static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
@@ -307,6 +355,7 @@
static irqreturn_t vu_req_interrupt(int irq, void *data)
{
struct virtio_uml_device *vu_dev = data;
+ struct virtqueue *vq;
int response = 1;
struct {
struct vhost_user_msg msg;
@@ -326,8 +375,17 @@
virtio_config_changed(&vu_dev->vdev);
response = 0;
break;
+ case VHOST_USER_SLAVE_VRING_CALL:
+ virtio_device_for_each_vq((&vu_dev->vdev), vq) {
+ if (vq->index == msg.msg.payload.vring_state.index) {
+ response = 0;
+ vring_interrupt(0 /* ignored */, vq);
+ break;
+ }
+ }
+ break;
case VHOST_USER_SLAVE_IOTLB_MSG:
- /* not supported - VIRTIO_F_IOMMU_PLATFORM */
+ /* not supported - VIRTIO_F_ACCESS_PLATFORM */
case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
default:
@@ -667,6 +725,17 @@
const uint64_t n = 1;
int rc;
+ time_travel_propagate_time();
+
+ if (info->kick_fd < 0) {
+ struct virtio_uml_device *vu_dev;
+
+ vu_dev = to_virtio_uml_device(vq->vdev);
+
+ return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
+ vq->index, 0) == 0;
+ }
+
do {
rc = os_write_file(info->kick_fd, &n, sizeof(n));
} while (rc == -EINTR);
@@ -732,10 +801,13 @@
{
struct virtio_uml_vq_info *info = vq->priv;
- um_free_irq(VIRTIO_IRQ, vq);
+ if (info->call_fd >= 0) {
+ um_free_irq(VIRTIO_IRQ, vq);
+ os_close_file(info->call_fd);
+ }
- os_close_file(info->call_fd);
- os_close_file(info->kick_fd);
+ if (info->kick_fd >= 0)
+ os_close_file(info->kick_fd);
vring_del_virtqueue(vq);
kfree(info);
@@ -765,6 +837,15 @@
int call_fds[2];
int rc;
+ /* no call FD needed/desired in this case */
+ if (vu_dev->protocol_features &
+ BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
+ vu_dev->protocol_features &
+ BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
+ info->call_fd = -1;
+ return 0;
+ }
+
/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
rc = os_pipe(call_fds, true, true);
if (rc < 0)
@@ -793,6 +874,23 @@
return rc;
}
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+static void vu_defer_irq_handle(struct time_travel_event *d)
+{
+ struct virtio_uml_vq_info *info;
+
+ info = container_of(d, struct virtio_uml_vq_info, defer);
+ info->callback(info->vq);
+}
+
+static void vu_defer_irq_callback(struct virtqueue *vq)
+{
+ struct virtio_uml_vq_info *info = vq->priv;
+
+ time_travel_add_irq_event(&info->defer);
+}
+#endif
+
static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
unsigned index, vq_callback_t *callback,
const char *name, bool ctx)
@@ -812,6 +910,19 @@
snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
pdev->id, name);
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+ /*
+ * When we get an interrupt, we must bounce it through the simulation
+ * calendar (the simtime device), except for the simtime device itself
+ * since that's part of the simulation control.
+ */
+ if (time_travel_mode == TT_MODE_EXTERNAL && callback) {
+ info->callback = callback;
+ callback = vu_defer_irq_callback;
+ time_travel_set_event_fn(&info->defer, vu_defer_irq_handle);
+ }
+#endif
+
vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
ctx, vu_notify, callback, info->name);
if (!vq) {
@@ -820,11 +931,19 @@
}
vq->priv = info;
num = virtqueue_get_vring_size(vq);
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+ info->vq = vq;
+#endif
- rc = os_eventfd(0, 0);
- if (rc < 0)
- goto error_kick;
- info->kick_fd = rc;
+ if (vu_dev->protocol_features &
+ BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
+ info->kick_fd = -1;
+ } else {
+ rc = os_eventfd(0, 0);
+ if (rc < 0)
+ goto error_kick;
+ info->kick_fd = rc;
+ }
rc = vu_setup_vq_call_fd(vu_dev, vq);
if (rc)
@@ -849,10 +968,13 @@
return vq;
error_setup:
- um_free_irq(VIRTIO_IRQ, vq);
- os_close_file(info->call_fd);
+ if (info->call_fd >= 0) {
+ um_free_irq(VIRTIO_IRQ, vq);
+ os_close_file(info->call_fd);
+ }
error_call:
- os_close_file(info->kick_fd);
+ if (info->kick_fd >= 0)
+ os_close_file(info->kick_fd);
error_kick:
vring_del_virtqueue(vq);
error_create:
@@ -891,10 +1013,12 @@
list_for_each_entry(vq, &vdev->vqs, list) {
struct virtio_uml_vq_info *info = vq->priv;
- rc = vhost_user_set_vring_kick(vu_dev, vq->index,
- info->kick_fd);
- if (rc)
- goto error_setup;
+ if (info->kick_fd >= 0) {
+ rc = vhost_user_set_vring_kick(vu_dev, vq->index,
+ info->kick_fd);
+ if (rc)
+ goto error_setup;
+ }
rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
if (rc)
@@ -952,6 +1076,8 @@
container_of(d, struct virtio_device, dev);
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+ time_travel_propagate_time();
+
/* might not have been opened due to not negotiating the feature */
if (vu_dev->req_fd >= 0) {
um_free_irq(VIRTIO_IRQ, vu_dev);
@@ -964,11 +1090,6 @@
/* Platform device */
-struct virtio_uml_platform_data {
- u32 virtio_device_id;
- const char *socket_path;
-};
-
static int virtio_uml_probe(struct platform_device *pdev)
{
struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
@@ -990,13 +1111,17 @@
vu_dev->pdev = pdev;
vu_dev->req_fd = -1;
+ time_travel_propagate_time();
+
do {
rc = os_connect_socket(pdata->socket_path);
} while (rc == -EINTR);
if (rc < 0)
- return rc;
+ goto error_free;
vu_dev->sock = rc;
+ spin_lock_init(&vu_dev->sock_lock);
+
rc = vhost_user_init(vu_dev);
if (rc)
goto error_init;
@@ -1006,10 +1131,13 @@
rc = register_virtio_device(&vu_dev->vdev);
if (rc)
put_device(&vu_dev->vdev.dev);
+ vu_dev->registered = 1;
return rc;
error_init:
os_close_file(vu_dev->sock);
+error_free:
+ kfree(vu_dev);
return rc;
}
@@ -1035,13 +1163,31 @@
static bool vu_cmdline_parent_registered;
static int vu_cmdline_id;
+static int vu_unregister_cmdline_device(struct device *dev, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+
+ kfree(pdata->socket_path);
+ platform_device_unregister(pdev);
+ return 0;
+}
+
+static void vu_conn_broken(struct work_struct *wk)
+{
+ struct virtio_uml_platform_data *pdata;
+
+ pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
+ vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
+}
+
static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
{
const char *ids = strchr(device, ':');
unsigned int virtio_device_id;
int processed, consumed, err;
char *socket_path;
- struct virtio_uml_platform_data pdata;
+ struct virtio_uml_platform_data pdata, *ppdata;
struct platform_device *pdev;
if (!ids || ids == device)
@@ -1080,6 +1226,11 @@
err = PTR_ERR_OR_ZERO(pdev);
if (err)
goto free;
+
+ ppdata = pdev->dev.platform_data;
+ ppdata->pdev = pdev;
+ INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
+
return 0;
free:
@@ -1122,16 +1273,6 @@
);
-static int vu_unregister_cmdline_device(struct device *dev, void *data)
-{
- struct platform_device *pdev = to_platform_device(dev);
- struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
-
- kfree(pdata->socket_path);
- platform_device_unregister(pdev);
- return 0;
-}
-
static void vu_unregister_cmdline_devices(void)
{
if (vu_cmdline_parent_registered) {
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 398006d..1c63b26 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -3,7 +3,6 @@
generic-y += bug.h
generic-y += compat.h
generic-y += current.h
-generic-y += delay.h
generic-y += device.h
generic-y += emergency-restart.h
generic-y += exec.h
@@ -17,6 +16,7 @@
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += mmiowb.h
+generic-y += module.lds.h
generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
@@ -25,5 +25,4 @@
generic-y += topology.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
-generic-y += xor.h
generic-y += kprobes.h
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index 4049f2c..eca6c45 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -9,14 +9,13 @@
_sdata = .;
PROVIDE (sdata = .);
- RODATA
+ RO_DATA(4096)
.unprotected : { *(.unprotected) }
. = ALIGN(4096);
PROVIDE (_unprotected_end = .);
. = ALIGN(4096);
- NOTES
EXCEPTION_TABLE(0)
BUG_TABLE
diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h
new file mode 100644
index 0000000..e79b2ab
--- /dev/null
+++ b/arch/um/include/asm/delay.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_DELAY_H
+#define __UM_DELAY_H
+#include <asm-generic/delay.h>
+#include <linux/time-internal.h>
+
+static inline void um_ndelay(unsigned long nsecs)
+{
+ if (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL) {
+ time_travel_ndelay(nsecs);
+ return;
+ }
+ ndelay(nsecs);
+}
+#undef ndelay
+#define ndelay(n) um_ndelay(n)
+
+static inline void um_udelay(unsigned long usecs)
+{
+ if (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL) {
+ time_travel_ndelay(1000 * usecs);
+ return;
+ }
+ udelay(usecs);
+}
+#undef udelay
+#define udelay(n) um_udelay(n)
+#endif /* __UM_DELAY_H */
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 5aee062..17ddd4e 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
#include <asm/mmu.h>
@@ -25,11 +26,6 @@
unsigned long start, unsigned long end)
{
}
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
-}
-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
@@ -52,9 +48,9 @@
* when the new ->mm is used for the first time.
*/
__switch_mm(&new->context.id);
- down_write_nested(&new->mmap_sem, 1);
+ mmap_write_lock_nested(new, SINGLE_DEPTH_NESTING);
uml_setup_stubs(new);
- up_write(&new->mmap_sem);
+ mmap_write_unlock(new);
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 881e76d..2bbf28c 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -10,7 +10,7 @@
#include <linux/mm.h>
-#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+#include <asm-generic/pgalloc.h>
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
@@ -25,7 +25,6 @@
* Allocate and free page tables.
*/
extern pgd_t *pgd_alloc(struct mm_struct *);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
#define __pte_free_tlb(tlb,pte, address) \
do { \
@@ -35,12 +34,12 @@
#ifdef CONFIG_3_LEVEL_PGTABLES
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
- free_page((unsigned long)pmd);
-}
+#define __pmd_free_tlb(tlb, pmd, address) \
+do { \
+ pgtable_pmd_page_dtor(virt_to_page(pmd)); \
+ tlb_remove_page((tlb),virt_to_page(pmd)); \
+} while (0) \
-#define __pmd_free_tlb(tlb,x, address) tlb_remove_page((tlb),virt_to_page(x))
#endif
#endif
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index 32b3d26..32106d3 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,7 +8,6 @@
#ifndef __UM_PGTABLE_2LEVEL_H
#define __UM_PGTABLE_2LEVEL_H
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
/* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 9812269..7e6a418 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,7 +7,6 @@
#ifndef __UM_PGTABLE_3LEVEL_H
#define __UM_PGTABLE_3LEVEL_H
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
/* PGDIR_SHIFT determines what a third-level page table entry can map */
@@ -79,9 +78,6 @@
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
#endif
-struct mm_struct;
-extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address);
-
static inline void pud_clear (pud_t *pud)
{
set_pud(pud, __pud(_PAGE_NEWPAGE));
@@ -90,10 +86,6 @@
#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \
- pmd_index(address))
-
static inline unsigned long pte_pfn(pte_t pte)
{
return phys_to_pfn(pte_val(pte));
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 36a44d5..def3761 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
+/*
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Copyright 2003 PathScale, Inc.
* Derived from include/asm-i386/pgtable.h
@@ -106,6 +106,9 @@
#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE)
#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE)
+#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE)
+#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE)
+
#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
#define pte_page(x) pfn_to_page(pte_pfn(x))
@@ -128,7 +131,7 @@
* Undefined behaviour if not..
*/
static inline int pte_read(pte_t pte)
-{
+{
return((pte_get_bits(pte, _PAGE_USER)) &&
!(pte_get_bits(pte, _PAGE_PROTNONE)));
}
@@ -160,13 +163,8 @@
}
static inline int pte_newprot(pte_t pte)
-{
- return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT)));
-}
-
-static inline int pte_special(pte_t pte)
{
- return 0;
+ return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT)));
}
/*
@@ -187,31 +185,31 @@
return(pte);
}
-static inline pte_t pte_mkold(pte_t pte)
-{
+static inline pte_t pte_mkold(pte_t pte)
+{
pte_clear_bits(pte, _PAGE_ACCESSED);
return(pte);
}
static inline pte_t pte_wrprotect(pte_t pte)
-{
+{
if (likely(pte_get_bits(pte, _PAGE_RW)))
pte_clear_bits(pte, _PAGE_RW);
else
return pte;
- return(pte_mknewprot(pte));
+ return(pte_mknewprot(pte));
}
static inline pte_t pte_mkread(pte_t pte)
-{
+{
if (unlikely(pte_get_bits(pte, _PAGE_USER)))
return pte;
pte_set_bits(pte, _PAGE_USER);
- return(pte_mknewprot(pte));
+ return(pte_mknewprot(pte));
}
static inline pte_t pte_mkdirty(pte_t pte)
-{
+{
pte_set_bits(pte, _PAGE_DIRTY);
return(pte);
}
@@ -222,20 +220,20 @@
return(pte);
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite(pte_t pte)
{
if (unlikely(pte_get_bits(pte, _PAGE_RW)))
return pte;
pte_set_bits(pte, _PAGE_RW);
- return(pte_mknewprot(pte));
+ return(pte_mknewprot(pte));
}
-static inline pte_t pte_mkuptodate(pte_t pte)
+static inline pte_t pte_mkuptodate(pte_t pte)
{
pte_clear_bits(pte, _PAGE_NEWPAGE);
if(pte_present(pte))
pte_clear_bits(pte, _PAGE_NEWPROT);
- return(pte);
+ return(pte);
}
static inline pte_t pte_mknewpage(pte_t pte)
@@ -244,11 +242,6 @@
return(pte);
}
-static inline pte_t pte_mkspecial(pte_t pte)
-{
- return(pte);
-}
-
static inline void set_pte(pte_t *pteptr, pte_t pteval)
{
pte_copy(*pteptr, pteval);
@@ -295,53 +288,16 @@
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pte_set_val(pte, (pte_val(pte) & _PAGE_CHG_MASK), newprot);
- return pte;
+ return pte;
}
/*
- * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
- *
- * this macro returns the index of the entry in the pgd page which would
- * control the given virtual address
- */
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
-
-/*
- * pgd_offset() returns a (pgd_t *)
- * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
- */
-#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
-
-/*
- * a shortcut which implies the use of the kernel's pgd, instead
- * of a process's
- */
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-
-/*
* the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
*
* this macro returns the index of the entry in the pmd page which would
* control the given virtual address
*/
#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-
-#define pmd_page_vaddr(pmd) \
- ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
-/*
- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
- *
- * this macro returns the index of the entry in the pte page which would
- * control the given virtual address
- */
-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir, address) \
- ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
-#define pte_offset_map(dir, address) \
- ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
-#define pte_unmap(pte) do { } while (0)
struct mm_struct;
extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
@@ -360,8 +316,6 @@
#define kern_addr_valid(addr) (1)
-#include <asm-generic/pgtable.h>
-
/* Clear a kernel PTE and flush it from the TLB */
#define kpte_clear_flush(ptep, vaddr) \
do { \
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 70ee603..ff9c628 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -2,6 +2,8 @@
#ifndef __UM_TLB_H
#define __UM_TLB_H
+#include <linux/mm.h>
+
#include <asm/tlbflush.h>
#include <asm-generic/cacheflush.h>
#include <asm-generic/tlb.h>
diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h
new file mode 100644
index 0000000..9a7b9ed
--- /dev/null
+++ b/arch/um/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_UM_VMALLOC_H
+#define _ASM_UM_VMALLOC_H
+
+#endif /* _ASM_UM_VMALLOC_H */
diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
new file mode 100644
index 0000000..36b33d6
--- /dev/null
+++ b/arch/um/include/asm/xor.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm-generic/xor.h>
+#include <linux/time-internal.h>
+
+/* pick an arbitrary one - measuring isn't possible with inf-cpu */
+#define XOR_SELECT_TEMPLATE(x) \
+ (time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL)
diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h
new file mode 100644
index 0000000..f3b03d3
--- /dev/null
+++ b/arch/um/include/linux/time-internal.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 - 2014 Cisco Systems
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
+#ifndef __TIMER_INTERNAL_H__
+#define __TIMER_INTERNAL_H__
+#include <linux/list.h>
+
+#define TIMER_MULTIPLIER 256
+#define TIMER_MIN_DELTA 500
+
+enum time_travel_mode {
+ TT_MODE_OFF,
+ TT_MODE_BASIC,
+ TT_MODE_INFCPU,
+ TT_MODE_EXTERNAL,
+};
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+struct time_travel_event {
+ unsigned long long time;
+ void (*fn)(struct time_travel_event *d);
+ struct list_head list;
+ bool pending, onstack;
+};
+
+extern enum time_travel_mode time_travel_mode;
+
+void time_travel_sleep(unsigned long long duration);
+
+static inline void
+time_travel_set_event_fn(struct time_travel_event *e,
+ void (*fn)(struct time_travel_event *d))
+{
+ e->fn = fn;
+}
+
+void __time_travel_propagate_time(void);
+
+static inline void time_travel_propagate_time(void)
+{
+ if (time_travel_mode == TT_MODE_EXTERNAL)
+ __time_travel_propagate_time();
+}
+
+void __time_travel_wait_readable(int fd);
+
+static inline void time_travel_wait_readable(int fd)
+{
+ if (time_travel_mode == TT_MODE_EXTERNAL)
+ __time_travel_wait_readable(fd);
+}
+
+void time_travel_add_irq_event(struct time_travel_event *e);
+#else
+struct time_travel_event {
+};
+
+#define time_travel_mode TT_MODE_OFF
+
+static inline void time_travel_sleep(unsigned long long duration)
+{
+}
+
+/* this is a macro so the event/function need not exist */
+#define time_travel_set_event_fn(e, fn) do {} while (0)
+
+static inline void time_travel_propagate_time(void)
+{
+}
+
+static inline void time_travel_wait_readable(int fd)
+{
+}
+#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
+
+/*
+ * Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used,
+ * which is intentional since we really shouldn't link it in that case.
+ */
+void time_travel_ndelay(unsigned long nsec);
+#endif /* __TIMER_INTERNAL_H__ */
diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h
index c66de43..1a659e2 100644
--- a/arch/um/include/shared/init.h
+++ b/arch/um/include/shared/init.h
@@ -45,15 +45,15 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
-#define __init __section(.init.text)
-#define __initdata __section(.init.data)
-#define __exitdata __section(.exit.data)
-#define __exit_call __used __section(.exitcall.exit)
+#define __init __section(".init.text")
+#define __initdata __section(".init.data")
+#define __exitdata __section(".exit.data")
+#define __exit_call __used __section(".exitcall.exit")
#ifdef MODULE
-#define __exit __section(.exit.text)
+#define __exit __section(".exit.text")
#else
-#define __exit __used __section(.exit.text)
+#define __exit __used __section(".exit.text")
#endif
#endif
@@ -102,10 +102,10 @@
* Mark functions and data as being only used at initialization
* or exit time.
*/
-#define __uml_init_setup __used __section(.uml.setup.init)
-#define __uml_setup_help __used __section(.uml.help.init)
-#define __uml_postsetup_call __used __section(.uml.postsetup.init)
-#define __uml_exit_call __used __section(.uml.exitcall.exit)
+#define __uml_init_setup __used __section(".uml.setup.init")
+#define __uml_setup_help __used __section(".uml.help.init")
+#define __uml_postsetup_call __used __section(".uml.postsetup.init")
+#define __uml_exit_call __used __section(".uml.exitcall.exit")
#ifdef __UM_HOST__
@@ -120,7 +120,7 @@
#define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn
-#define __init_call __used __section(.initcall.init)
+#define __init_call __used __section(".initcall.init")
#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 506bcd1..f467d28 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -150,7 +150,7 @@
extern int os_file_size(const char *file, unsigned long long *size_out);
extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
-extern int os_file_modtime(const char *file, unsigned long *modtime);
+extern int os_file_modtime(const char *file, long long *modtime);
extern int os_pipe(int *fd, int stream, int close_on_exec);
extern int os_set_fd_async(int fd);
extern int os_clear_fd_async(int fd);
@@ -181,6 +181,7 @@
extern int os_eventfd(unsigned int initval, int flags);
extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
const int *fds, unsigned int fds_num);
+int os_poll(unsigned int n, const int *fds);
/* start_up.c */
extern void os_early_checks(void);
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
index 0c50fa6..fbb709a 100644
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -16,8 +16,8 @@
extern int save_fpx_registers(int pid, unsigned long *fp_regs);
extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
extern int save_registers(int pid, struct uml_pt_regs *regs);
-extern int restore_registers(int pid, struct uml_pt_regs *regs);
-extern int init_registers(int pid);
+extern int restore_pid_registers(int pid, struct uml_pt_regs *regs);
+extern int init_pid_registers(int pid);
extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs);
extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
extern int get_fp_registers(int pid, unsigned long *regs);
diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h
index 4337b4c..e82e203 100644
--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@@ -12,6 +12,7 @@
int pid;
} u;
unsigned long stack;
+ int kill;
};
#endif
diff --git a/arch/um/include/shared/timer-internal.h b/arch/um/include/shared/timer-internal.h
deleted file mode 100644
index 2d2d13c..0000000
--- a/arch/um/include/shared/timer-internal.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2012 - 2014 Cisco Systems
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __TIMER_INTERNAL_H__
-#define __TIMER_INTERNAL_H__
-
-#define TIMER_MULTIPLIER 256
-#define TIMER_MIN_DELTA 500
-
-enum time_travel_mode {
- TT_MODE_OFF,
- TT_MODE_BASIC,
- TT_MODE_INFCPU,
-};
-
-enum time_travel_timer_mode {
- TT_TMR_DISABLED,
- TT_TMR_ONESHOT,
- TT_TMR_PERIODIC,
-};
-
-#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
-extern enum time_travel_mode time_travel_mode;
-extern unsigned long long time_travel_time;
-extern enum time_travel_timer_mode time_travel_timer_mode;
-extern unsigned long long time_travel_timer_expiry;
-extern unsigned long long time_travel_timer_interval;
-
-static inline void time_travel_set_time(unsigned long long ns)
-{
- time_travel_time = ns;
-}
-
-static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode)
-{
- time_travel_timer_mode = mode;
-}
-
-static inline void time_travel_set_timer_expiry(unsigned long long expiry)
-{
- time_travel_timer_expiry = expiry;
-}
-
-static inline void time_travel_set_timer_interval(unsigned long long interval)
-{
- time_travel_timer_interval = interval;
-}
-#else
-#define time_travel_mode TT_MODE_OFF
-#define time_travel_time 0
-#define time_travel_timer_expiry 0
-#define time_travel_timer_interval 0
-
-static inline void time_travel_set_time(unsigned long long ns)
-{
-}
-
-static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode)
-{
-}
-
-static inline void time_travel_set_timer_expiry(unsigned long long expiry)
-{
-}
-
-static inline void time_travel_set_timer_interval(unsigned long long interval)
-{
-}
-
-#define time_travel_timer_mode TT_TMR_DISABLED
-#endif
-
-#endif
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index a82ec01..2f2a8ce 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -170,8 +170,8 @@
PROVIDE (end = .);
STABS_DEBUG
-
DWARF_DEBUG
+ ELF_DETAILS
DISCARDS
}
diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c
index 369fd84..43edc2a 100644
--- a/arch/um/kernel/exitcode.c
+++ b/arch/um/kernel/exitcode.c
@@ -55,20 +55,19 @@
return count;
}
-static const struct file_operations exitcode_proc_fops = {
- .owner = THIS_MODULE,
- .open = exitcode_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .write = exitcode_proc_write,
+static const struct proc_ops exitcode_proc_ops = {
+ .proc_open = exitcode_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+ .proc_write = exitcode_proc_write,
};
static int make_proc_exitcode(void)
{
struct proc_dir_entry *ent;
- ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_fops);
+ ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_ops);
if (ent == NULL) {
printk(KERN_WARNING "make_proc_exitcode : Failed to register "
"/proc/exitcode\n");
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index 98bdf69..e4abac6 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -9,20 +9,19 @@
enum kmsg_dump_reason reason)
{
static char line[1024];
-
+ struct console *con;
size_t len = 0;
- bool con_available = false;
/* only dump kmsg when no console is available */
if (!console_trylock())
return;
- if (console_drivers != NULL)
- con_available = true;
+ for_each_console(con)
+ break;
console_unlock();
- if (con_available == true)
+ if (con)
return;
printf("kmsg_dump:\n");
diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
index 67b2e0f..8ccd568 100644
--- a/arch/um/kernel/maccess.c
+++ b/arch/um/kernel/maccess.c
@@ -7,15 +7,13 @@
#include <linux/kernel.h>
#include <os.h>
-long probe_kernel_read(void *dst, const void *src, size_t size)
+bool copy_from_kernel_nofault_allowed(const void *src, size_t size)
{
void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
if ((unsigned long)src < PAGE_SIZE || size <= 0)
- return -EFAULT;
-
+ return false;
if (os_mincore(psrc, size + src - psrc) <= 0)
- return -EFAULT;
-
- return __probe_kernel_read(dst, src, size);
+ return false;
+ return true;
}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 417ff64..9242dc9 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -96,6 +96,7 @@
pgd_t *pgd_base)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
int i, j;
@@ -107,7 +108,8 @@
pgd = pgd_base + i;
for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
if (pud_none(*pud))
one_md_table_init(pud);
pmd = pmd_offset(pud, vaddr);
@@ -123,9 +125,6 @@
{
#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
long size = FIXADDR_USER_END - FIXADDR_USER_START;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
pte_t *pte;
phys_t p;
unsigned long v, vaddr = FIXADDR_USER_START;
@@ -143,10 +142,7 @@
p = __pa(v);
for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
p += PAGE_SIZE) {
- pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
- pmd = pmd_offset(pud, vaddr);
- pte = pte_offset_kernel(pmd, vaddr);
+ pte = virt_to_kpte(vaddr);
pte_set_val(*pte, p, PAGE_READONLY);
}
#endif
@@ -154,8 +150,8 @@
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES], vaddr;
- int i;
+ unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
+ unsigned long vaddr;
empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE,
PAGE_SIZE);
@@ -163,12 +159,8 @@
panic("%s: Failed to allocate %lu bytes align=%lx\n",
__func__, PAGE_SIZE, PAGE_SIZE);
- for (i = 0; i < ARRAY_SIZE(zones_size); i++)
- zones_size[i] = 0;
-
- zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) -
- (uml_physmem >> PAGE_SHIFT);
- free_area_init(zones_size);
+ max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT;
+ free_area_init(max_zone_pfn);
/*
* Fixed mappings, only the page table structure has to be
@@ -204,23 +196,6 @@
return pgd;
}
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- free_page((unsigned long) pgd);
-}
-
-#ifdef CONFIG_3_LEVEL_PGTABLES
-pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-{
- pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
-
- if (pmd)
- memset(pmd, 0, PAGE_SIZE);
-
- return pmd;
-}
-#endif
-
void *uml_kmalloc(int size, int flags)
{
return kmalloc(size, flags);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 17045e7..9505a7e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -25,14 +25,13 @@
#include <linux/threads.h>
#include <linux/tracehook.h>
#include <asm/current.h>
-#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include <linux/uaccess.h>
#include <as-layout.h>
#include <kern_util.h>
#include <os.h>
#include <skas.h>
-#include <timer-internal.h>
+#include <linux/time-internal.h>
/*
* This is a per-cpu array. A processor only modifies its entry and it only
@@ -102,7 +101,7 @@
schedule();
if (test_thread_flag(TIF_SIGPENDING))
do_signal(regs);
- if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
+ if (test_thread_flag(TIF_NOTIFY_RESUME))
tracehook_notify_resume(regs);
}
@@ -153,7 +152,7 @@
userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs);
}
-int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct * p, unsigned long tls)
{
void (*handler)(void);
@@ -203,43 +202,6 @@
kmalloc_ok = save_kmalloc_ok;
}
-static void time_travel_sleep(unsigned long long duration)
-{
- unsigned long long next = time_travel_time + duration;
-
- if (time_travel_mode != TT_MODE_INFCPU)
- os_timer_disable();
-
- while (time_travel_timer_mode == TT_TMR_PERIODIC &&
- time_travel_timer_expiry < time_travel_time)
- time_travel_set_timer_expiry(time_travel_timer_expiry +
- time_travel_timer_interval);
-
- if (time_travel_timer_mode != TT_TMR_DISABLED &&
- time_travel_timer_expiry < next) {
- if (time_travel_timer_mode == TT_TMR_ONESHOT)
- time_travel_set_timer_mode(TT_TMR_DISABLED);
- /*
- * In basic mode, time_travel_time will be adjusted in
- * the timer IRQ handler so it works even when the signal
- * comes from the OS timer, see there.
- */
- if (time_travel_mode != TT_MODE_BASIC)
- time_travel_set_time(time_travel_timer_expiry);
-
- deliver_alarm();
- } else {
- time_travel_set_time(next);
- }
-
- if (time_travel_mode != TT_MODE_INFCPU) {
- if (time_travel_timer_mode == TT_TMR_PERIODIC)
- os_timer_set_interval(time_travel_timer_interval);
- else if (time_travel_timer_mode == TT_TMR_ONESHOT)
- os_timer_one_shot(time_travel_timer_expiry - next);
- }
-}
-
static void um_idle_sleep(void)
{
unsigned long long duration = UM_NSEC_PER_SEC;
@@ -255,7 +217,7 @@
{
cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
um_idle_sleep();
- local_irq_enable();
+ raw_local_irq_enable();
}
int __cant_sleep(void) {
@@ -348,13 +310,12 @@
return count;
}
-static const struct file_operations sysemu_proc_fops = {
- .owner = THIS_MODULE,
- .open = sysemu_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .write = sysemu_proc_write,
+static const struct proc_ops sysemu_proc_ops = {
+ .proc_open = sysemu_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+ .proc_write = sysemu_proc_write,
};
int __init make_proc_sysemu(void)
@@ -363,7 +324,7 @@
if (!sysemu_supported)
return 0;
- ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_fops);
+ ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops);
if (ent == NULL)
{
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 3d57c71..88cd9b5 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -70,7 +70,7 @@
PT_REGS_SYSCALL_RET(regs) = -EINTR;
break;
}
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOINTR:
PT_REGS_RESTART_SYSCALL(regs);
PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs);
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index b5e3d91..d996116 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -9,7 +9,6 @@
#include <linux/slab.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/sections.h>
#include <as-layout.h>
#include <os.h>
@@ -19,15 +18,21 @@
unsigned long kernel)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, proc);
- pud = pud_alloc(mm, pgd, proc);
- if (!pud)
+
+ p4d = p4d_alloc(mm, pgd, proc);
+ if (!p4d)
goto out;
+ pud = pud_alloc(mm, p4d, proc);
+ if (!pud)
+ goto out_pud;
+
pmd = pmd_alloc(mm, pud, proc);
if (!pmd)
goto out_pmd;
@@ -44,6 +49,8 @@
pmd_free(mm, pmd);
out_pmd:
pud_free(mm, pud);
+ out_pud:
+ p4d_free(mm, p4d);
out:
return -ENOMEM;
}
@@ -107,7 +114,7 @@
mm->context.stub_pages[0] = virt_to_page(__syscall_stub_start);
mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack);
- /* dup_mmap already holds mmap_sem */
+ /* dup_mmap already holds mmap_lock */
err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
VM_READ | VM_MAYREAD | VM_EXEC |
VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP,
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index f574b18..3d91f89 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -10,7 +10,8 @@
#include <sysdep/ptrace.h>
#include <sysdep/ptrace_user.h>
#include <sysdep/syscalls.h>
-#include <shared/timer-internal.h>
+#include <linux/time-internal.h>
+#include <asm/unistd.h>
void handle_syscall(struct uml_pt_regs *r)
{
@@ -24,7 +25,8 @@
* went to sleep, even if said userspace interacts with the kernel in
* various ways.
*/
- if (time_travel_mode == TT_MODE_INFCPU)
+ if (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL)
schedule();
/* Initialize the syscall number and default return value. */
@@ -35,7 +37,7 @@
goto out;
/* Do the seccomp check after ptrace; failures should be fast. */
- if (secure_computing(NULL) == -1)
+ if (secure_computing() == -1)
goto out;
syscall = UPT_SYSCALL_NR(r);
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 3236052..2dec915 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -10,13 +10,13 @@
#include <linux/sched.h>
#include <asm/current.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <kern_util.h>
#include <os.h>
pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -27,7 +27,11 @@
if (!pgd_present(*pgd))
return NULL;
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d))
+ return NULL;
+
+ pud = pud_offset(p4d, addr);
if (!pud_present(*pud))
return NULL;
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index c71b5ef..7452f70 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -17,7 +17,9 @@
static void _print_addr(void *data, unsigned long address, int reliable)
{
- pr_info(" [<%08lx>] %s%pS\n", address, reliable ? "" : "? ",
+ const char *loglvl = data;
+
+ printk("%s [<%08lx>] %s%pS\n", loglvl, address, reliable ? "" : "? ",
(void *)address);
}
@@ -25,9 +27,9 @@
.address = _print_addr
};
-void show_stack(struct task_struct *task, unsigned long *stack)
+void show_stack(struct task_struct *task, unsigned long *stack,
+ const char *loglvl)
{
- unsigned long *sp = stack;
struct pt_regs *segv_regs = current->thread.segv_regs;
int i;
@@ -38,10 +40,9 @@
}
if (!stack)
- sp = get_stack_pointer(task, segv_regs);
+ stack = get_stack_pointer(task, segv_regs);
- pr_info("Stack:\n");
- stack = sp;
+ printk("%sStack:\n", loglvl);
for (i = 0; i < 3 * STACKSLOTS_PER_LINE; i++) {
if (kstack_end(stack))
break;
@@ -49,9 +50,7 @@
pr_cont("\n");
pr_cont(" %08lx", *stack++);
}
- pr_cont("\n");
- pr_info("Call Trace:\n");
- dump_trace(current, &stackops, NULL);
- pr_info("\n");
+ printk("%sCall Trace:\n", loglvl);
+ dump_trace(current, &stackops, (void *)loglvl);
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 94ea87b..8dafc3f 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -4,6 +4,7 @@
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2012-2014 Cisco Systems
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2019 Intel Corporation
*/
#include <linux/clockchips.h>
@@ -18,21 +19,484 @@
#include <asm/param.h>
#include <kern_util.h>
#include <os.h>
-#include <timer-internal.h>
+#include <linux/time-internal.h>
+#include <linux/um_timetravel.h>
#include <shared/init.h>
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
enum time_travel_mode time_travel_mode;
-unsigned long long time_travel_time;
-enum time_travel_timer_mode time_travel_timer_mode;
-unsigned long long time_travel_timer_expiry;
-unsigned long long time_travel_timer_interval;
+EXPORT_SYMBOL_GPL(time_travel_mode);
static bool time_travel_start_set;
static unsigned long long time_travel_start;
-#else
+static unsigned long long time_travel_time;
+static LIST_HEAD(time_travel_events);
+static unsigned long long time_travel_timer_interval;
+static unsigned long long time_travel_next_event;
+static struct time_travel_event time_travel_timer_event;
+static int time_travel_ext_fd = -1;
+static unsigned int time_travel_ext_waiting;
+static bool time_travel_ext_prev_request_valid;
+static unsigned long long time_travel_ext_prev_request;
+static bool time_travel_ext_free_until_valid;
+static unsigned long long time_travel_ext_free_until;
+
+static void time_travel_set_time(unsigned long long ns)
+{
+ if (unlikely(ns < time_travel_time))
+ panic("time-travel: time goes backwards %lld -> %lld\n",
+ time_travel_time, ns);
+ time_travel_time = ns;
+}
+
+enum time_travel_message_handling {
+ TTMH_IDLE,
+ TTMH_POLL,
+ TTMH_READ,
+};
+
+static void time_travel_handle_message(struct um_timetravel_msg *msg,
+ enum time_travel_message_handling mode)
+{
+ struct um_timetravel_msg resp = {
+ .op = UM_TIMETRAVEL_ACK,
+ };
+ int ret;
+
+ /*
+ * Poll outside the locked section (if we're not called to only read
+ * the response) so we can get interrupts for e.g. virtio while we're
+ * here, but then we need to lock to not get interrupted between the
+ * read of the message and write of the ACK.
+ */
+ if (mode != TTMH_READ) {
+ bool disabled = irqs_disabled();
+
+ BUG_ON(mode == TTMH_IDLE && !disabled);
+
+ if (disabled)
+ local_irq_enable();
+ while (os_poll(1, &time_travel_ext_fd) != 0) {
+ /* nothing */
+ }
+ if (disabled)
+ local_irq_disable();
+ }
+
+ ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+
+ if (ret == 0)
+ panic("time-travel external link is broken\n");
+ if (ret != sizeof(*msg))
+ panic("invalid time-travel message - %d bytes\n", ret);
+
+ switch (msg->op) {
+ default:
+ WARN_ONCE(1, "time-travel: unexpected message %lld\n",
+ (unsigned long long)msg->op);
+ break;
+ case UM_TIMETRAVEL_ACK:
+ return;
+ case UM_TIMETRAVEL_RUN:
+ time_travel_set_time(msg->time);
+ break;
+ case UM_TIMETRAVEL_FREE_UNTIL:
+ time_travel_ext_free_until_valid = true;
+ time_travel_ext_free_until = msg->time;
+ break;
+ }
+
+ resp.seq = msg->seq;
+ os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
+}
+
+static u64 time_travel_ext_req(u32 op, u64 time)
+{
+ static int seq;
+ int mseq = ++seq;
+ struct um_timetravel_msg msg = {
+ .op = op,
+ .time = time,
+ .seq = mseq,
+ };
+ unsigned long flags;
+
+ /*
+ * We need to save interrupts here and only restore when we
+ * got the ACK - otherwise we can get interrupted and send
+ * another request while we're still waiting for an ACK, but
+ * the peer doesn't know we got interrupted and will send
+ * the ACKs in the same order as the message, but we'd need
+ * to see them in the opposite order ...
+ *
+ * This wouldn't matter *too* much, but some ACKs carry the
+ * current time (for UM_TIMETRAVEL_GET) and getting another
+ * ACK without a time would confuse us a lot!
+ *
+ * The sequence number assignment that happens here lets us
+ * debug such message handling issues more easily.
+ */
+ local_irq_save(flags);
+ os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
+
+ while (msg.op != UM_TIMETRAVEL_ACK)
+ time_travel_handle_message(&msg, TTMH_READ);
+
+ if (msg.seq != mseq)
+ panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
+ msg.op, msg.seq, mseq, msg.time);
+
+ if (op == UM_TIMETRAVEL_GET)
+ time_travel_set_time(msg.time);
+ local_irq_restore(flags);
+
+ return msg.time;
+}
+
+void __time_travel_wait_readable(int fd)
+{
+ int fds[2] = { fd, time_travel_ext_fd };
+ int ret;
+
+ if (time_travel_mode != TT_MODE_EXTERNAL)
+ return;
+
+ while ((ret = os_poll(2, fds))) {
+ struct um_timetravel_msg msg;
+
+ if (ret == 1)
+ time_travel_handle_message(&msg, TTMH_READ);
+ }
+}
+EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
+
+static void time_travel_ext_update_request(unsigned long long time)
+{
+ if (time_travel_mode != TT_MODE_EXTERNAL)
+ return;
+
+ /* asked for exactly this time previously */
+ if (time_travel_ext_prev_request_valid &&
+ time == time_travel_ext_prev_request)
+ return;
+
+ time_travel_ext_prev_request = time;
+ time_travel_ext_prev_request_valid = true;
+ time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
+}
+
+void __time_travel_propagate_time(void)
+{
+ time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
+}
+EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
+
+/* returns true if we must do a wait to the simtime device */
+static bool time_travel_ext_request(unsigned long long time)
+{
+ /*
+ * If we received an external sync point ("free until") then we
+ * don't have to request/wait for anything until then, unless
+ * we're already waiting.
+ */
+ if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
+ time < time_travel_ext_free_until)
+ return false;
+
+ time_travel_ext_update_request(time);
+ return true;
+}
+
+static void time_travel_ext_wait(bool idle)
+{
+ struct um_timetravel_msg msg = {
+ .op = UM_TIMETRAVEL_ACK,
+ };
+
+ time_travel_ext_prev_request_valid = false;
+ time_travel_ext_waiting++;
+
+ time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
+
+ /*
+ * Here we are deep in the idle loop, so we have to break out of the
+ * kernel abstraction in a sense and implement this in terms of the
+ * UML system waiting on the VQ interrupt while sleeping, when we get
+ * the signal it'll call time_travel_ext_vq_notify_done() completing the
+ * call.
+ */
+ while (msg.op != UM_TIMETRAVEL_RUN)
+ time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
+
+ time_travel_ext_waiting--;
+
+ /* we might request more stuff while polling - reset when we run */
+ time_travel_ext_prev_request_valid = false;
+}
+
+static void time_travel_ext_get_time(void)
+{
+ time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
+}
+
+static void __time_travel_update_time(unsigned long long ns, bool idle)
+{
+ if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
+ time_travel_ext_wait(idle);
+ else
+ time_travel_set_time(ns);
+}
+
+static struct time_travel_event *time_travel_first_event(void)
+{
+ return list_first_entry_or_null(&time_travel_events,
+ struct time_travel_event,
+ list);
+}
+
+static void __time_travel_add_event(struct time_travel_event *e,
+ unsigned long long time)
+{
+ struct time_travel_event *tmp;
+ bool inserted = false;
+
+ if (e->pending)
+ return;
+
+ e->pending = true;
+ e->time = time;
+
+ list_for_each_entry(tmp, &time_travel_events, list) {
+ /*
+ * Add the new entry before one with higher time,
+ * or if they're equal and both on stack, because
+ * in that case we need to unwind the stack in the
+ * right order, and the later event (timer sleep
+ * or such) must be dequeued first.
+ */
+ if ((tmp->time > e->time) ||
+ (tmp->time == e->time && tmp->onstack && e->onstack)) {
+ list_add_tail(&e->list, &tmp->list);
+ inserted = true;
+ break;
+ }
+ }
+
+ if (!inserted)
+ list_add_tail(&e->list, &time_travel_events);
+
+ tmp = time_travel_first_event();
+ time_travel_ext_update_request(tmp->time);
+ time_travel_next_event = tmp->time;
+}
+
+static void time_travel_add_event(struct time_travel_event *e,
+ unsigned long long time)
+{
+ if (WARN_ON(!e->fn))
+ return;
+
+ __time_travel_add_event(e, time);
+}
+
+void time_travel_periodic_timer(struct time_travel_event *e)
+{
+ time_travel_add_event(&time_travel_timer_event,
+ time_travel_time + time_travel_timer_interval);
+ deliver_alarm();
+}
+
+static void time_travel_deliver_event(struct time_travel_event *e)
+{
+ if (e == &time_travel_timer_event) {
+ /*
+ * deliver_alarm() does the irq_enter/irq_exit
+ * by itself, so must handle it specially here
+ */
+ e->fn(e);
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ irq_enter();
+ e->fn(e);
+ irq_exit();
+ local_irq_restore(flags);
+ }
+}
+
+static bool time_travel_del_event(struct time_travel_event *e)
+{
+ if (!e->pending)
+ return false;
+ list_del(&e->list);
+ e->pending = false;
+ return true;
+}
+
+static void time_travel_update_time(unsigned long long next, bool idle)
+{
+ struct time_travel_event ne = {
+ .onstack = true,
+ };
+ struct time_travel_event *e;
+ bool finished = idle;
+
+ /* add it without a handler - we deal with that specifically below */
+ __time_travel_add_event(&ne, next);
+
+ do {
+ e = time_travel_first_event();
+
+ BUG_ON(!e);
+ __time_travel_update_time(e->time, idle);
+
+ /* new events may have been inserted while we were waiting */
+ if (e == time_travel_first_event()) {
+ BUG_ON(!time_travel_del_event(e));
+ BUG_ON(time_travel_time != e->time);
+
+ if (e == &ne) {
+ finished = true;
+ } else {
+ if (e->onstack)
+ panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
+ time_travel_time, e->time, e);
+ time_travel_deliver_event(e);
+ }
+ }
+
+ e = time_travel_first_event();
+ if (e)
+ time_travel_ext_update_request(e->time);
+ } while (ne.pending && !finished);
+
+ time_travel_del_event(&ne);
+}
+
+void time_travel_ndelay(unsigned long nsec)
+{
+ time_travel_update_time(time_travel_time + nsec, false);
+}
+EXPORT_SYMBOL(time_travel_ndelay);
+
+void time_travel_add_irq_event(struct time_travel_event *e)
+{
+ BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
+
+ time_travel_ext_get_time();
+ /*
+ * We could model interrupt latency here, for now just
+ * don't have any latency at all and request the exact
+ * same time (again) to run the interrupt...
+ */
+ time_travel_add_event(e, time_travel_time);
+}
+EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
+
+static void time_travel_oneshot_timer(struct time_travel_event *e)
+{
+ deliver_alarm();
+}
+
+void time_travel_sleep(unsigned long long duration)
+{
+ unsigned long long next = time_travel_time + duration;
+
+ if (time_travel_mode == TT_MODE_BASIC)
+ os_timer_disable();
+
+ time_travel_update_time(next, true);
+
+ if (time_travel_mode == TT_MODE_BASIC &&
+ time_travel_timer_event.pending) {
+ if (time_travel_timer_event.fn == time_travel_periodic_timer) {
+ /*
+ * This is somewhat wrong - we should get the first
+ * one sooner like the os_timer_one_shot() below...
+ */
+ os_timer_set_interval(time_travel_timer_interval);
+ } else {
+ os_timer_one_shot(time_travel_timer_event.time - next);
+ }
+ }
+}
+
+static void time_travel_handle_real_alarm(void)
+{
+ time_travel_set_time(time_travel_next_event);
+
+ time_travel_del_event(&time_travel_timer_event);
+
+ if (time_travel_timer_event.fn == time_travel_periodic_timer)
+ time_travel_add_event(&time_travel_timer_event,
+ time_travel_time +
+ time_travel_timer_interval);
+}
+
+static void time_travel_set_interval(unsigned long long interval)
+{
+ time_travel_timer_interval = interval;
+}
+
+static int time_travel_connect_external(const char *socket)
+{
+ const char *sep;
+ unsigned long long id = (unsigned long long)-1;
+ int rc;
+
+ if ((sep = strchr(socket, ':'))) {
+ char buf[25] = {};
+ if (sep - socket > sizeof(buf) - 1)
+ goto invalid_number;
+
+ memcpy(buf, socket, sep - socket);
+ if (kstrtoull(buf, 0, &id)) {
+invalid_number:
+ panic("time-travel: invalid external ID in string '%s'\n",
+ socket);
+ return -EINVAL;
+ }
+
+ socket = sep + 1;
+ }
+
+ rc = os_connect_socket(socket);
+ if (rc < 0) {
+ panic("time-travel: failed to connect to external socket %s\n",
+ socket);
+ return rc;
+ }
+
+ time_travel_ext_fd = rc;
+
+ time_travel_ext_req(UM_TIMETRAVEL_START, id);
+
+ return 1;
+}
+#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
#define time_travel_start_set 0
#define time_travel_start 0
+#define time_travel_time 0
+
+static inline void time_travel_update_time(unsigned long long ns, bool retearly)
+{
+}
+
+static inline void time_travel_handle_real_alarm(void)
+{
+}
+
+static void time_travel_set_interval(unsigned long long interval)
+{
+}
+
+/* fail link if this actually gets used */
+extern u64 time_travel_ext_req(u32 op, u64 time);
+
+/* these are empty macros so the struct/fn need not exist */
+#define time_travel_add_event(e, time) do { } while (0)
+#define time_travel_del_event(e) do { } while (0)
#endif
void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
@@ -48,7 +512,7 @@
* never get any real signals from the OS.
*/
if (time_travel_mode == TT_MODE_BASIC)
- time_travel_set_time(time_travel_timer_expiry);
+ time_travel_handle_real_alarm();
local_irq_save(flags);
do_IRQ(TIMER_IRQ, regs);
@@ -58,9 +522,10 @@
static int itimer_shutdown(struct clock_event_device *evt)
{
if (time_travel_mode != TT_MODE_OFF)
- time_travel_set_timer_mode(TT_TMR_DISABLED);
+ time_travel_del_event(&time_travel_timer_event);
- if (time_travel_mode != TT_MODE_INFCPU)
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL)
os_timer_disable();
return 0;
@@ -71,12 +536,16 @@
unsigned long long interval = NSEC_PER_SEC / HZ;
if (time_travel_mode != TT_MODE_OFF) {
- time_travel_set_timer_mode(TT_TMR_PERIODIC);
- time_travel_set_timer_expiry(time_travel_time + interval);
- time_travel_set_timer_interval(interval);
+ time_travel_del_event(&time_travel_timer_event);
+ time_travel_set_event_fn(&time_travel_timer_event,
+ time_travel_periodic_timer);
+ time_travel_set_interval(interval);
+ time_travel_add_event(&time_travel_timer_event,
+ time_travel_time + interval);
}
- if (time_travel_mode != TT_MODE_INFCPU)
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL)
os_timer_set_interval(interval);
return 0;
@@ -88,11 +557,15 @@
delta += 1;
if (time_travel_mode != TT_MODE_OFF) {
- time_travel_set_timer_mode(TT_TMR_ONESHOT);
- time_travel_set_timer_expiry(time_travel_time + delta);
+ time_travel_del_event(&time_travel_timer_event);
+ time_travel_set_event_fn(&time_travel_timer_event,
+ time_travel_oneshot_timer);
+ time_travel_add_event(&time_travel_timer_event,
+ time_travel_time + delta);
}
- if (time_travel_mode != TT_MODE_INFCPU)
+ if (time_travel_mode != TT_MODE_INFCPU &&
+ time_travel_mode != TT_MODE_EXTERNAL)
return os_timer_one_shot(delta);
return 0;
@@ -143,8 +616,17 @@
* stuck in loops that expect time to move more than the
* exact requested sleep amount, e.g. python's socket server,
* see https://bugs.python.org/issue37026.
+ *
+ * However, don't do that when we're in interrupt or such as
+ * then we might recurse into our own processing, and get to
+ * even more waiting, and that's not good - it messes up the
+ * "what do I do next" and onstack event we use to know when
+ * to return from time_travel_update_time().
*/
- time_travel_set_time(time_travel_time + TIMER_MULTIPLIER);
+ if (!irqs_disabled() && !in_interrupt() && !in_softirq())
+ time_travel_update_time(time_travel_time +
+ TIMER_MULTIPLIER,
+ false);
return time_travel_time / TIMER_MULTIPLIER;
}
@@ -188,6 +670,8 @@
if (time_travel_start_set)
nsecs = time_travel_start + time_travel_time;
+ else if (time_travel_mode == TT_MODE_EXTERNAL)
+ nsecs = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
else
nsecs = os_persistent_clock_emulation();
@@ -204,7 +688,8 @@
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
unsigned long calibrate_delay_is_known(void)
{
- if (time_travel_mode == TT_MODE_INFCPU)
+ if (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL)
return 1;
return 0;
}
@@ -218,6 +703,13 @@
return 1;
}
+ if (strncmp(str, "=ext:", 5) == 0) {
+ time_travel_mode = TT_MODE_EXTERNAL;
+ timer_clockevent.name = "time-travel-timer-external";
+ timer_clocksource.name = "time-travel-clock-external";
+ return time_travel_connect_external(str + 5);
+ }
+
if (!*str) {
time_travel_mode = TT_MODE_BASIC;
timer_clockevent.name = "time-travel-timer";
@@ -242,7 +734,15 @@
"are no wall clock timers, and any CPU processing happens - as seen from the\n"
"guest - instantly. This can be useful for accurate simulation regardless of\n"
"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
-"easily lead to getting stuck (e.g. if anything in the system busy loops).\n");
+"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
+"\n"
+"time-travel=ext:[ID:]/path/to/socket\n"
+"This enables time travel mode similar to =inf-cpu, except the system will\n"
+"use the given socket to coordinate with a central scheduler, in order to\n"
+"have more than one system simultaneously be on simulated time. The virtio\n"
+"driver code in UML knows about this so you can also simulate networks and\n"
+"devices using it, assuming the device has the right capabilities.\n"
+"The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
int setup_time_travel_start(char *str)
{
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 1149913..5be1b0d 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -7,7 +7,6 @@
#include <linux/module.h>
#include <linux/sched/signal.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <as-layout.h>
#include <mem_user.h>
@@ -283,7 +282,7 @@
return ret;
}
-static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end,
struct host_vm_change *hvc)
{
@@ -291,7 +290,7 @@
unsigned long next;
int ret = 0;
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
@@ -305,6 +304,28 @@
return ret;
}
+static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end,
+ struct host_vm_change *hvc)
+{
+ p4d_t *p4d;
+ unsigned long next;
+ int ret = 0;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (!p4d_present(*p4d)) {
+ if (hvc->force || p4d_newpage(*p4d)) {
+ ret = add_munmap(addr, next - addr, hvc);
+ p4d_mkuptodate(*p4d);
+ }
+ } else
+ ret = update_pud_range(p4d, addr, next, hvc);
+ } while (p4d++, addr = next, ((addr < end) && !ret));
+ return ret;
+}
+
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
unsigned long end_addr, int force)
{
@@ -322,8 +343,8 @@
ret = add_munmap(addr, next - addr, &hvc);
pgd_mkuptodate(*pgd);
}
- }
- else ret = update_pud_range(pgd, addr, next, &hvc);
+ } else
+ ret = update_p4d_range(pgd, addr, next, &hvc);
} while (pgd++, addr = next, ((addr < end_addr) && !ret));
if (!ret)
@@ -331,12 +352,11 @@
/* This is not an else because ret is modified above */
if (ret) {
+ struct mm_id *mm_idp = ¤t->mm->context.id;
+
printk(KERN_ERR "fix_range_common: failed, killing current "
"process: %d\n", task_tgid_vnr(current));
- /* We are under mmap_sem, release it such that current can terminate */
- up_write(¤t->mm->mmap_sem);
- force_sig(SIGKILL);
- do_signal(¤t->thread.regs);
+ mm_idp->kill = 1;
}
}
@@ -344,6 +364,7 @@
{
struct mm_struct *mm;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -370,7 +391,23 @@
continue;
}
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d)) {
+ last = ADD_ROUND(addr, P4D_SIZE);
+ if (last > end)
+ last = end;
+ if (p4d_newpage(*p4d)) {
+ updated = 1;
+ err = add_munmap(addr, last - addr, &hvc);
+ if (err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pud = pud_offset(p4d, addr);
if (!pud_present(*pud)) {
last = ADD_ROUND(addr, PUD_SIZE);
if (last > end)
@@ -430,6 +467,7 @@
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -447,7 +485,11 @@
if (!pgd_present(*pgd))
goto kill;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ goto kill;
+
+ pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto kill;
@@ -500,35 +542,6 @@
force_sig(SIGKILL);
}
-pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
-{
- return pgd_offset(mm, address);
-}
-
-pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
-{
- return pud_offset(pgd, address);
-}
-
-pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
-{
- return pmd_offset(pud, address);
-}
-
-pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
-{
- return pte_offset_kernel(pmd, address);
-}
-
-pte_t *addr_pte(struct task_struct *task, unsigned long addr)
-{
- pgd_t *pgd = pgd_offset(task->mm, addr);
- pud_t *pud = pud_offset(pgd, addr);
- pmd_t *pmd = pmd_offset(pud, addr);
-
- return pte_offset_map(pmd, addr);
-}
-
void flush_tlb_all(void)
{
/*
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index e62296c..ad12f78 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -10,7 +10,6 @@
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
#include <asm/current.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <arch.h>
#include <as-layout.h>
@@ -27,12 +26,10 @@
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- pgd_t *pgd;
- pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int err = -EFAULT;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ unsigned int flags = FAULT_FLAG_DEFAULT;
*code_out = SEGV_MAPERR;
@@ -46,7 +43,7 @@
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_vma(mm, address);
if (!vma)
goto out;
@@ -74,7 +71,7 @@
do {
vm_fault_t fault;
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, NULL);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
goto out_nosemaphore;
@@ -91,21 +88,14 @@
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
- pgd = pgd_offset(mm, address);
- pud = pud_offset(pgd, address);
- pmd = pmd_offset(pud, address);
+ pmd = pmd_off(mm, address);
pte = pte_offset_kernel(pmd, address);
} while (!pte_present(*pte));
err = 0;
@@ -122,7 +112,7 @@
#endif
flush_tlb_page(vma, address);
out:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out_nosemaphore:
return err;
@@ -131,7 +121,7 @@
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
if (!is_user)
goto out_nosemaphore;
pagefault_out_of_memory();
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 0f40ecc..76b3729 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -14,7 +14,6 @@
#include <linux/sched/task.h>
#include <linux/kmsg_dump.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -53,7 +52,7 @@
};
union thread_union cpu0_irqstack
- __attribute__((__section__(".data..init_irqstack"))) =
+ __section(".data..init_irqstack") =
{ .thread_info = INIT_THREAD_INFO(init_task) };
/* Changed in setup_arch, which is called in early boot */
@@ -362,3 +361,19 @@
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
+
+void *text_poke(void *addr, const void *opcode, size_t len)
+{
+ /*
+ * In UML, the only reference to this function is in
+ * apply_relocate_add(), which shouldn't ever actually call this
+ * because UML doesn't have live patching.
+ */
+ WARN_ON(1);
+
+ return memcpy(addr, opcode, len);
+}
+
+void text_poke_sync(void)
+{
+}
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 85b404d..7a8e2b1 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -25,10 +25,10 @@
__binary_start = START;
. = START + SIZEOF_HEADERS;
+ . = ALIGN(PAGE_SIZE);
_text = .;
INIT_TEXT_SECTION(0)
- . = ALIGN(PAGE_SIZE);
.text :
{
@@ -114,8 +114,8 @@
PROVIDE (end = .);
STABS_DEBUG
-
DWARF_DEBUG
+ ELF_DETAILS
DISCARDS
}
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 3996937..e4421db 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -5,6 +5,8 @@
#include <stdio.h>
#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
@@ -17,6 +19,7 @@
#include <sys/un.h>
#include <sys/types.h>
#include <sys/eventfd.h>
+#include <poll.h>
#include <os.h>
static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
@@ -287,7 +290,7 @@
int os_sync_file(int fd)
{
- int n = fsync(fd);
+ int n = fdatasync(fd);
if (n < 0)
return -errno;
@@ -342,7 +345,7 @@
return 0;
}
-int os_file_modtime(const char *file, unsigned long *modtime)
+int os_file_modtime(const char *file, long long *modtime)
{
struct uml_stat buf;
int err;
@@ -665,3 +668,31 @@
return -errno;
return err;
}
+
+int os_poll(unsigned int n, const int *fds)
+{
+ /* currently need 2 FDs at most so avoid dynamic allocation */
+ struct pollfd pollfds[2] = {};
+ unsigned int i;
+ int ret;
+
+ if (n > ARRAY_SIZE(pollfds))
+ return -EINVAL;
+
+ for (i = 0; i < n; i++) {
+ pollfds[i].fd = fds[i];
+ pollfds[i].events = POLLIN;
+ }
+
+ ret = poll(pollfds, n, -1);
+ if (ret < 0)
+ return -errno;
+
+ /* Return the index of the available FD */
+ for (i = 0; i < n; i++) {
+ if (pollfds[i].revents)
+ return i;
+ }
+
+ return -EIO;
+}
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index 2d92705..b123955 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -21,7 +21,7 @@
return 0;
}
-int restore_registers(int pid, struct uml_pt_regs *regs)
+int restore_pid_registers(int pid, struct uml_pt_regs *regs)
{
int err;
@@ -36,7 +36,7 @@
static unsigned long exec_regs[MAX_REG_NR];
static unsigned long exec_fp_regs[FP_SIZE];
-int init_registers(int pid)
+int init_pid_registers(int pid)
{
int err;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 4fb877b..94a7c41 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -249,6 +249,7 @@
}
int userspace_pid[NR_CPUS];
+int kill_userspace_mm[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
@@ -342,6 +343,8 @@
interrupt_end();
while (1) {
+ if (kill_userspace_mm[0])
+ fatal_sigsegv();
/*
* This can legitimately fail if the process loads a
@@ -650,4 +653,5 @@
void __switch_mm(struct mm_id *mm_idp)
{
userspace_pid[0] = mm_idp->u.pid;
+ kill_userspace_mm[0] = mm_idp->kill;
}
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index f79dc33..b28373a 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -336,7 +336,7 @@
check_tmpexec();
pid = start_ptraced_child();
- if (init_registers(pid))
+ if (init_pid_registers(pid))
fatal("Failed to initialize default registers");
stop_ptraced_child(pid, 1, 1);
}
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 432f8e1..90f6de2 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -14,7 +14,6 @@
#include <kern_util.h>
#include <os.h>
#include <string.h>
-#include <timer-internal.h>
static timer_t event_high_res_timer = 0;
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index ea5c60f..a3dd615 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -97,7 +97,7 @@
while ((ent = readdir(directory)) != NULL) {
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
continue;
- len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1;
+ len = strlen(dir) + strlen("/") + strlen(ent->d_name) + 1;
if (len > sizeof(file)) {
ret = -E2BIG;
goto out;
@@ -135,7 +135,7 @@
*/
static inline int is_umdir_used(char *dir)
{
- char pid[sizeof("nnnnn\0")], *end, *file;
+ char pid[sizeof("nnnnnnnnn")], *end, *file;
int dead, fd, p, n, err;
size_t filelen = strlen(dir) + sizeof("/pid") + 1;
@@ -210,14 +210,15 @@
static void __init create_pid_file(void)
{
- char pid[sizeof("nnnnn\0")], *file;
+ char pid[sizeof("nnnnnnnnn")], *file;
int fd, n;
- file = malloc(strlen(uml_dir) + UMID_LEN + sizeof("/pid\0"));
+ n = strlen(uml_dir) + UMID_LEN + sizeof("/pid");
+ file = malloc(n);
if (!file)
return;
- if (umid_file_name("pid", file, sizeof(file)))
+ if (umid_file_name("pid", file, n))
goto out;
fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644);
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index ecf2f39..0732742 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -10,7 +10,7 @@
#include <signal.h>
#include <string.h>
#include <termios.h>
-#include <wait.h>
+#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <init.h>