Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/samples/Kconfig b/samples/Kconfig
index c8dacb4..e76cdfc 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -6,6 +6,10 @@
if SAMPLES
+config SAMPLE_AUXDISPLAY
+ bool "auxdisplay sample"
+ depends on CC_CAN_LINK
+
config SAMPLE_TRACE_EVENTS
tristate "Build trace_events examples -- loadable modules only"
depends on EVENT_TRACING && m
@@ -19,6 +23,21 @@
This builds a module that calls trace_printk() and can be used to
test various trace_printk() calls from a module.
+config SAMPLE_FTRACE_DIRECT
+ tristate "Build register_ftrace_direct() example"
+ depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m
+ depends on X86_64 # has x86_64 inlined asm
+ help
+ This builds an ftrace direct function example
+ that hooks to wake_up_process and prints the parameters.
+
+config SAMPLE_TRACE_ARRAY
+ tristate "Build sample module for kernel access to Ftrace instancess"
+ depends on EVENT_TRACING && m
+ help
+ This builds a module that demonstrates the use of various APIs to
+ access Ftrace instances from within the kernel.
+
config SAMPLE_KOBJECT
tristate "Build kobject examples"
help
@@ -103,19 +122,29 @@
config SAMPLE_HIDRAW
bool "hidraw sample"
- depends on HEADERS_INSTALL
+ depends on CC_CAN_LINK && HEADERS_INSTALL
config SAMPLE_PIDFD
bool "pidfd sample"
- depends on HEADERS_INSTALL
+ depends on CC_CAN_LINK && HEADERS_INSTALL
config SAMPLE_SECCOMP
bool "Build seccomp sample code"
- depends on SECCOMP_FILTER && HEADERS_INSTALL
+ depends on SECCOMP_FILTER && CC_CAN_LINK && HEADERS_INSTALL
help
Build samples of seccomp filters using various methods of
BPF filter construction.
+config SAMPLE_TIMER
+ bool "Timer sample"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+
+config SAMPLE_UHID
+ bool "UHID sample"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build UHID sample program.
+
config SAMPLE_VFIO_MDEV_MTTY
tristate "Build VFIO mtty example mediated device sample code -- loadable modules only"
depends on VFIO_MDEV_DEVICE && m
@@ -156,17 +185,35 @@
config SAMPLE_ANDROID_BINDERFS
bool "Build Android binderfs example"
- depends on CONFIG_ANDROID_BINDERFS
+ depends on CC_CAN_LINK && HEADERS_INSTALL
help
Builds a sample program to illustrate the use of the Android binderfs
filesystem.
config SAMPLE_VFS
bool "Build example programs that use new VFS system calls"
- depends on HEADERS_INSTALL
+ depends on CC_CAN_LINK && HEADERS_INSTALL
help
Build example userspace programs that use new VFS system calls such
as mount API and statx(). Note that this is restricted to the x86
arch whilst it accesses system calls that aren't yet in all arches.
+config SAMPLE_INTEL_MEI
+ bool "Build example program working with intel mei driver"
+ depends on INTEL_MEI
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build a sample program to work with mei device.
+
+config SAMPLE_WATCHDOG
+ bool "watchdog sample"
+ depends on CC_CAN_LINK
+
+config SAMPLE_WATCH_QUEUE
+ bool "Build example watch_queue notification API consumer"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build example userspace program to use the new mount_notify(),
+ sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function.
+
endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index 7d6e4ca..c3392a5 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for Linux samples code
-obj-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs/
+subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay
+subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs
obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/
obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/
subdir-$(CONFIG_SAMPLE_HIDRAW) += hidraw
@@ -15,8 +16,16 @@
obj-$(CONFIG_SAMPLE_QMI_CLIENT) += qmi/
obj-$(CONFIG_SAMPLE_RPMSG_CLIENT) += rpmsg/
subdir-$(CONFIG_SAMPLE_SECCOMP) += seccomp
+subdir-$(CONFIG_SAMPLE_TIMER) += timers
obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace_events/
obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/
+obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += ftrace/
+subdir-$(CONFIG_SAMPLE_UHID) += uhid
obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/
obj-y += vfio-mdev/
subdir-$(CONFIG_SAMPLE_VFS) += vfs
+obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/
+subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog
+subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue
+obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/
diff --git a/samples/auxdisplay/.gitignore b/samples/auxdisplay/.gitignore
index 7af2228..2ed744c 100644
--- a/samples/auxdisplay/.gitignore
+++ b/samples/auxdisplay/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
cfag12864b-example
diff --git a/samples/auxdisplay/Makefile b/samples/auxdisplay/Makefile
index 0273bab..19d5568 100644
--- a/samples/auxdisplay/Makefile
+++ b/samples/auxdisplay/Makefile
@@ -1,10 +1,2 @@
# SPDX-License-Identifier: GPL-2.0
-CC := $(CROSS_COMPILE)gcc
-CFLAGS := -I../../usr/include
-
-PROGS := cfag12864b-example
-
-all: $(PROGS)
-
-clean:
- rm -fr $(PROGS)
+userprogs-always-y += cfag12864b-example
diff --git a/samples/binderfs/.gitignore b/samples/binderfs/.gitignore
new file mode 100644
index 0000000..eb60241
--- /dev/null
+++ b/samples/binderfs/.gitignore
@@ -0,0 +1 @@
+binderfs_example
diff --git a/samples/binderfs/Makefile b/samples/binderfs/Makefile
index ea4c93d..629e43b 100644
--- a/samples/binderfs/Makefile
+++ b/samples/binderfs/Makefile
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs_example.o
+userprogs-always-y += binderfs_example
+
+userccflags += -I usr/include
diff --git a/samples/binderfs/binderfs_example.c b/samples/binderfs/binderfs_example.c
index 5bbd2eb..0fd92cd 100644
--- a/samples/binderfs/binderfs_example.c
+++ b/samples/binderfs/binderfs_example.c
@@ -18,7 +18,6 @@
int main(int argc, char *argv[])
{
int fd, ret, saved_errno;
- size_t len;
struct binderfs_device device = { 0 };
ret = unshare(CLONE_NEWNS);
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 74d31fd..b2f29bc 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
cpustat
fds_example
hbm
@@ -49,3 +50,5 @@
xdp_sample_pkts
xdp_tx_iptunnel
xdpsock
+xsk_fwd
+testfile.img
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6d1df71..aeebf5d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -4,55 +4,56 @@
TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools
# List of programs to build
-hostprogs-y := test_lru_dist
-hostprogs-y += sock_example
-hostprogs-y += fds_example
-hostprogs-y += sockex1
-hostprogs-y += sockex2
-hostprogs-y += sockex3
-hostprogs-y += tracex1
-hostprogs-y += tracex2
-hostprogs-y += tracex3
-hostprogs-y += tracex4
-hostprogs-y += tracex5
-hostprogs-y += tracex6
-hostprogs-y += tracex7
-hostprogs-y += test_probe_write_user
-hostprogs-y += trace_output
-hostprogs-y += lathist
-hostprogs-y += offwaketime
-hostprogs-y += spintest
-hostprogs-y += map_perf_test
-hostprogs-y += test_overhead
-hostprogs-y += test_cgrp2_array_pin
-hostprogs-y += test_cgrp2_attach
-hostprogs-y += test_cgrp2_sock
-hostprogs-y += test_cgrp2_sock2
-hostprogs-y += xdp1
-hostprogs-y += xdp2
-hostprogs-y += xdp_router_ipv4
-hostprogs-y += test_current_task_under_cgroup
-hostprogs-y += trace_event
-hostprogs-y += sampleip
-hostprogs-y += tc_l2_redirect
-hostprogs-y += lwt_len_hist
-hostprogs-y += xdp_tx_iptunnel
-hostprogs-y += test_map_in_map
-hostprogs-y += per_socket_stats_example
-hostprogs-y += xdp_redirect
-hostprogs-y += xdp_redirect_map
-hostprogs-y += xdp_redirect_cpu
-hostprogs-y += xdp_monitor
-hostprogs-y += xdp_rxq_info
-hostprogs-y += syscall_tp
-hostprogs-y += cpustat
-hostprogs-y += xdp_adjust_tail
-hostprogs-y += xdpsock
-hostprogs-y += xdp_fwd
-hostprogs-y += task_fd_query
-hostprogs-y += xdp_sample_pkts
-hostprogs-y += ibumad
-hostprogs-y += hbm
+tprogs-y := test_lru_dist
+tprogs-y += sock_example
+tprogs-y += fds_example
+tprogs-y += sockex1
+tprogs-y += sockex2
+tprogs-y += sockex3
+tprogs-y += tracex1
+tprogs-y += tracex2
+tprogs-y += tracex3
+tprogs-y += tracex4
+tprogs-y += tracex5
+tprogs-y += tracex6
+tprogs-y += tracex7
+tprogs-y += test_probe_write_user
+tprogs-y += trace_output
+tprogs-y += lathist
+tprogs-y += offwaketime
+tprogs-y += spintest
+tprogs-y += map_perf_test
+tprogs-y += test_overhead
+tprogs-y += test_cgrp2_array_pin
+tprogs-y += test_cgrp2_attach
+tprogs-y += test_cgrp2_sock
+tprogs-y += test_cgrp2_sock2
+tprogs-y += xdp1
+tprogs-y += xdp2
+tprogs-y += xdp_router_ipv4
+tprogs-y += test_current_task_under_cgroup
+tprogs-y += trace_event
+tprogs-y += sampleip
+tprogs-y += tc_l2_redirect
+tprogs-y += lwt_len_hist
+tprogs-y += xdp_tx_iptunnel
+tprogs-y += test_map_in_map
+tprogs-y += per_socket_stats_example
+tprogs-y += xdp_redirect
+tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect_cpu
+tprogs-y += xdp_monitor
+tprogs-y += xdp_rxq_info
+tprogs-y += syscall_tp
+tprogs-y += cpustat
+tprogs-y += xdp_adjust_tail
+tprogs-y += xdpsock
+tprogs-y += xsk_fwd
+tprogs-y += xdp_fwd
+tprogs-y += task_fd_query
+tprogs-y += xdp_sample_pkts
+tprogs-y += ibumad
+tprogs-y += hbm
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -63,20 +64,20 @@
fds_example-objs := fds_example.o
sockex1-objs := sockex1_user.o
sockex2-objs := sockex2_user.o
-sockex3-objs := bpf_load.o sockex3_user.o
-tracex1-objs := bpf_load.o tracex1_user.o
-tracex2-objs := bpf_load.o tracex2_user.o
-tracex3-objs := bpf_load.o tracex3_user.o
-tracex4-objs := bpf_load.o tracex4_user.o
-tracex5-objs := bpf_load.o tracex5_user.o
-tracex6-objs := bpf_load.o tracex6_user.o
-tracex7-objs := bpf_load.o tracex7_user.o
-test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
-trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
-lathist-objs := bpf_load.o lathist_user.o
-offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS)
-spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS)
-map_perf_test-objs := bpf_load.o map_perf_test_user.o
+sockex3-objs := sockex3_user.o
+tracex1-objs := tracex1_user.o $(TRACE_HELPERS)
+tracex2-objs := tracex2_user.o
+tracex3-objs := tracex3_user.o
+tracex4-objs := tracex4_user.o
+tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
+tracex6-objs := tracex6_user.o
+tracex7-objs := tracex7_user.o
+test_probe_write_user-objs := test_probe_write_user_user.o
+trace_output-objs := trace_output_user.o $(TRACE_HELPERS)
+lathist-objs := lathist_user.o
+offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS)
+spintest-objs := spintest_user.o $(TRACE_HELPERS)
+map_perf_test-objs := map_perf_test_user.o
test_overhead-objs := bpf_load.o test_overhead_user.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
test_cgrp2_attach-objs := test_cgrp2_attach.o
@@ -86,24 +87,25 @@
# reuse xdp1 source intentionally
xdp2-objs := xdp1_user.o
xdp_router_ipv4-objs := xdp_router_ipv4_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \
+test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
-trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS)
-sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS)
+trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
+sampleip-objs := sampleip_user.o $(TRACE_HELPERS)
tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o
lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o
xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
-test_map_in_map-objs := bpf_load.o test_map_in_map_user.o
+test_map_in_map-objs := test_map_in_map_user.o
per_socket_stats_example-objs := cookie_uid_helper_example.o
xdp_redirect-objs := xdp_redirect_user.o
xdp_redirect_map-objs := xdp_redirect_map_user.o
-xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
-xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
+xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
+xdp_monitor-objs := xdp_monitor_user.o
xdp_rxq_info-objs := xdp_rxq_info_user.o
-syscall_tp-objs := bpf_load.o syscall_tp_user.o
-cpustat-objs := bpf_load.o cpustat_user.o
+syscall_tp-objs := syscall_tp_user.o
+cpustat-objs := cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := xdpsock_user.o
+xsk_fwd-objs := xsk_fwd.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
@@ -111,100 +113,119 @@
hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-always += sockex1_kern.o
-always += sockex2_kern.o
-always += sockex3_kern.o
-always += tracex1_kern.o
-always += tracex2_kern.o
-always += tracex3_kern.o
-always += tracex4_kern.o
-always += tracex5_kern.o
-always += tracex6_kern.o
-always += tracex7_kern.o
-always += sock_flags_kern.o
-always += test_probe_write_user_kern.o
-always += trace_output_kern.o
-always += tcbpf1_kern.o
-always += tc_l2_redirect_kern.o
-always += lathist_kern.o
-always += offwaketime_kern.o
-always += spintest_kern.o
-always += map_perf_test_kern.o
-always += test_overhead_tp_kern.o
-always += test_overhead_raw_tp_kern.o
-always += test_overhead_kprobe_kern.o
-always += parse_varlen.o parse_simple.o parse_ldabs.o
-always += test_cgrp2_tc_kern.o
-always += xdp1_kern.o
-always += xdp2_kern.o
-always += xdp_router_ipv4_kern.o
-always += test_current_task_under_cgroup_kern.o
-always += trace_event_kern.o
-always += sampleip_kern.o
-always += lwt_len_hist_kern.o
-always += xdp_tx_iptunnel_kern.o
-always += test_map_in_map_kern.o
-always += cookie_uid_helper_example.o
-always += tcp_synrto_kern.o
-always += tcp_rwnd_kern.o
-always += tcp_bufs_kern.o
-always += tcp_cong_kern.o
-always += tcp_iw_kern.o
-always += tcp_clamp_kern.o
-always += tcp_basertt_kern.o
-always += tcp_tos_reflect_kern.o
-always += tcp_dumpstats_kern.o
-always += xdp_redirect_kern.o
-always += xdp_redirect_map_kern.o
-always += xdp_redirect_cpu_kern.o
-always += xdp_monitor_kern.o
-always += xdp_rxq_info_kern.o
-always += xdp2skb_meta_kern.o
-always += syscall_tp_kern.o
-always += cpustat_kern.o
-always += xdp_adjust_tail_kern.o
-always += xdp_fwd_kern.o
-always += task_fd_query_kern.o
-always += xdp_sample_pkts_kern.o
-always += ibumad_kern.o
-always += hbm_out_kern.o
-always += hbm_edt_kern.o
+always-y := $(tprogs-y)
+always-y += sockex1_kern.o
+always-y += sockex2_kern.o
+always-y += sockex3_kern.o
+always-y += tracex1_kern.o
+always-y += tracex2_kern.o
+always-y += tracex3_kern.o
+always-y += tracex4_kern.o
+always-y += tracex5_kern.o
+always-y += tracex6_kern.o
+always-y += tracex7_kern.o
+always-y += sock_flags_kern.o
+always-y += test_probe_write_user_kern.o
+always-y += trace_output_kern.o
+always-y += tcbpf1_kern.o
+always-y += tc_l2_redirect_kern.o
+always-y += lathist_kern.o
+always-y += offwaketime_kern.o
+always-y += spintest_kern.o
+always-y += map_perf_test_kern.o
+always-y += test_overhead_tp_kern.o
+always-y += test_overhead_raw_tp_kern.o
+always-y += test_overhead_kprobe_kern.o
+always-y += parse_varlen.o parse_simple.o parse_ldabs.o
+always-y += test_cgrp2_tc_kern.o
+always-y += xdp1_kern.o
+always-y += xdp2_kern.o
+always-y += xdp_router_ipv4_kern.o
+always-y += test_current_task_under_cgroup_kern.o
+always-y += trace_event_kern.o
+always-y += sampleip_kern.o
+always-y += lwt_len_hist_kern.o
+always-y += xdp_tx_iptunnel_kern.o
+always-y += test_map_in_map_kern.o
+always-y += tcp_synrto_kern.o
+always-y += tcp_rwnd_kern.o
+always-y += tcp_bufs_kern.o
+always-y += tcp_cong_kern.o
+always-y += tcp_iw_kern.o
+always-y += tcp_clamp_kern.o
+always-y += tcp_basertt_kern.o
+always-y += tcp_tos_reflect_kern.o
+always-y += tcp_dumpstats_kern.o
+always-y += xdp_redirect_kern.o
+always-y += xdp_redirect_map_kern.o
+always-y += xdp_redirect_cpu_kern.o
+always-y += xdp_monitor_kern.o
+always-y += xdp_rxq_info_kern.o
+always-y += xdp2skb_meta_kern.o
+always-y += syscall_tp_kern.o
+always-y += cpustat_kern.o
+always-y += xdp_adjust_tail_kern.o
+always-y += xdp_fwd_kern.o
+always-y += task_fd_query_kern.o
+always-y += xdp_sample_pkts_kern.o
+always-y += ibumad_kern.o
+always-y += hbm_out_kern.o
+always-y += hbm_edt_kern.o
+always-y += xdpsock_kern.o
-KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf
-KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0
+ifeq ($(ARCH), arm)
+# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
+# headers when arm instruction set identification is requested.
+ARM_ARCH_SELECTOR := $(filter -D__LINUX_ARM_ARCH__%, $(KBUILD_CFLAGS))
+BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR)
+TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR)
+endif
-HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+TPROGS_CFLAGS += -Wall -O2
+TPROGS_CFLAGS += -Wmissing-prototypes
+TPROGS_CFLAGS += -Wstrict-prototypes
-KBUILD_HOSTLDLIBS += $(LIBBPF) -lelf
-HOSTLDLIBS_tracex4 += -lrt
-HOSTLDLIBS_trace_output += -lrt
-HOSTLDLIBS_map_perf_test += -lrt
-HOSTLDLIBS_test_overhead += -lrt
-HOSTLDLIBS_xdpsock += -pthread
+TPROGS_CFLAGS += -I$(objtree)/usr/include
+TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
+TPROGS_CFLAGS += -I$(srctree)/tools/lib/
+TPROGS_CFLAGS += -I$(srctree)/tools/include
+TPROGS_CFLAGS += -I$(srctree)/tools/perf
+TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
+
+ifdef SYSROOT
+TPROGS_CFLAGS += --sysroot=$(SYSROOT)
+TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
+endif
+
+TPROGCFLAGS_bpf_load.o += -Wno-unused-variable
+
+TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
+TPROGLDLIBS_tracex4 += -lrt
+TPROGLDLIBS_trace_output += -lrt
+TPROGLDLIBS_map_perf_test += -lrt
+TPROGLDLIBS_test_overhead += -lrt
+TPROGLDLIBS_xdpsock += -pthread
+TPROGLDLIBS_xsk_fwd += -pthread
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
-# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+# make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
+OPT ?= opt
+LLVM_DIS ?= llvm-dis
LLVM_OBJCOPY ?= llvm-objcopy
BTF_PAHOLE ?= pahole
# Detect that we're cross compiling and use the cross compiler
ifdef CROSS_COMPILE
-HOSTCC = $(CROSS_COMPILE)gcc
-CLANG_ARCH_ARGS = -target $(ARCH)
+CLANG_ARCH_ARGS = --target=$(notdir $(CROSS_COMPILE:%-=%))
endif
# Don't evaluate probes and warnings if we need to run make recursively
ifneq ($(src),)
-HDR_PROBE := $(shell echo "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
- $(HOSTCC) $(KBUILD_HOSTCFLAGS) -x c - -o /dev/null 2>/dev/null && \
- echo okay)
+HDR_PROBE := $(shell printf "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
+ $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \
+ -o /dev/null 2>/dev/null && echo okay)
ifeq ($(HDR_PROBE),)
$(warning WARNING: Detected possible issues with include path.)
@@ -221,10 +242,10 @@
BPF_EXTRA_CFLAGS += -fno-stack-protector
ifneq ($(BTF_LLVM_PROBE),)
- EXTRA_CFLAGS += -g
+ BPF_EXTRA_CFLAGS += -g
else
ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
- EXTRA_CFLAGS += -g
+ BPF_EXTRA_CFLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
endif
@@ -233,7 +254,7 @@
# Trick to allow make to be run from this directory
all:
- $(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR)
+ $(MAKE) -C ../../ M=$(CURDIR) BPF_SAMPLES_PATH=$(CURDIR)
clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
@@ -241,7 +262,8 @@
$(LIBBPF): FORCE
# Fix up variables inherited from Kbuild that tools/ build system won't like
- $(MAKE) -C $(dir $@) RM='rm -rf' LDFLAGS= srctree=$(BPF_SAMPLES_PATH)/../../ O=
+ $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
+ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O=
$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
$(call filechk,offsets,__SYSCALL_NRS_H__)
@@ -278,20 +300,30 @@
$(obj)/hbm.o: $(src)/hbm.h
$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
+-include $(BPF_SAMPLES_PATH)/Makefile.target
+
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
+# below we use long chain of commands, clang | opt | llvm-dis | llc,
+# to generate final object file. 'clang' compiles the source into IR
+# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin
+# processing (llvm12) and IR optimizations. 'llvm-dis' converts
+# 'opt' output to IR, and finally 'llc' generates bpf byte code.
$(obj)/%.o: $(src)/%.c
@echo " CLANG-bpf " $@
- $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
- -I$(srctree)/tools/testing/selftests/bpf/ \
+ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
+ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
+ -I$(srctree)/tools/lib/ \
-D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
-D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
+ -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
+ $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
+ $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif
diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target
new file mode 100644
index 0000000..7621f55
--- /dev/null
+++ b/samples/bpf/Makefile.target
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+# ==========================================================================
+# Building binaries on the host system
+# Binaries are not used during the compilation of the kernel, and intended
+# to be build for target board, target board can be host of course. Added to
+# build binaries to run not on host system.
+#
+# Sample syntax
+# tprogs-y := xsk_example
+# Will compile xsk_example.c and create an executable named xsk_example
+#
+# tprogs-y := xdpsock
+# xdpsock-objs := xdpsock_1.o xdpsock_2.o
+# Will compile xdpsock_1.c and xdpsock_2.c, and then link the executable
+# xdpsock, based on xdpsock_1.o and xdpsock_2.o
+#
+# Derived from scripts/Makefile.host
+#
+__tprogs := $(sort $(tprogs-y))
+
+# C code
+# Executables compiled from a single .c file
+tprog-csingle := $(foreach m,$(__tprogs), \
+ $(if $($(m)-objs),,$(m)))
+
+# C executables linked based on several .o files
+tprog-cmulti := $(foreach m,$(__tprogs),\
+ $(if $($(m)-objs),$(m)))
+
+# Object (.o) files compiled from .c files
+tprog-cobjs := $(sort $(foreach m,$(__tprogs),$($(m)-objs)))
+
+tprog-csingle := $(addprefix $(obj)/,$(tprog-csingle))
+tprog-cmulti := $(addprefix $(obj)/,$(tprog-cmulti))
+tprog-cobjs := $(addprefix $(obj)/,$(tprog-cobjs))
+
+#####
+# Handle options to gcc. Support building with separate output directory
+
+_tprogc_flags = $(TPROGS_CFLAGS) \
+ $(TPROGCFLAGS_$(basetarget).o)
+
+# $(objtree)/$(obj) for including generated headers from checkin source files
+ifeq ($(KBUILD_EXTMOD),)
+ifdef building_out_of_srctree
+_tprogc_flags += -I $(objtree)/$(obj)
+endif
+endif
+
+tprogc_flags = -Wp,-MD,$(depfile) $(_tprogc_flags)
+
+# Create executable from a single .c file
+# tprog-csingle -> Executable
+quiet_cmd_tprog-csingle = CC $@
+ cmd_tprog-csingle = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ $< \
+ $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-csingle): $(obj)/%: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-csingle)
+
+# Link an executable based on list of .o files, all plain c
+# tprog-cmulti -> executable
+quiet_cmd_tprog-cmulti = LD $@
+ cmd_tprog-cmulti = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ \
+ $(addprefix $(obj)/,$($(@F)-objs)) \
+ $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-cmulti): $(tprog-cobjs) FORCE
+ $(call if_changed,tprog-cmulti)
+$(call multi_depend, $(tprog-cmulti), , -objs)
+
+# Create .o file from a single .c file
+# tprog-cobjs -> .o
+quiet_cmd_tprog-cobjs = CC $@
+ cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $<
+$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-cobjs)
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index 5f27e4f..dd34b2d 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -14,6 +14,20 @@
Note that LLVM's tool 'llc' must support target 'bpf', list version
and supported targets with command: ``llc --version``
+Clean and configuration
+-----------------------
+
+It can be needed to clean tools, samples or kernel before trying new arch or
+after some changes (on demand)::
+
+ make -C tools clean
+ make -C samples/bpf clean
+ make clean
+
+Configure kernel, defconfig for instance::
+
+ make defconfig
+
Kernel headers
--------------
@@ -32,12 +46,10 @@
For building the BPF samples, issue the below command from the kernel
top level directory::
- make samples/bpf/
-
-Do notice the "/" slash after the directory name.
+ make M=samples/bpf
It is also possible to call make from this directory. This will just
-hide the the invocation of make as above with the appended "/".
+hide the invocation of make as above.
Manually compiling LLVM with 'bpf' support
------------------------------------------
@@ -63,14 +75,31 @@
It is also possible to point make to the newly compiled 'llc' or
'clang' command via redefining LLC or CLANG on the make command line::
- make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+ make M=samples/bpf LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
Cross compiling samples
-----------------------
In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH
-environment variables before calling make. This will direct make to build
-samples for the cross target.
+environment variables before calling make. But do this before clean,
+cofiguration and header install steps described above. This will direct make to
+build samples for the cross target::
-export ARCH=arm64
-export CROSS_COMPILE="aarch64-linux-gnu-"
-make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+ export ARCH=arm64
+ export CROSS_COMPILE="aarch64-linux-gnu-"
+
+Headers can be also installed on RFS of target board if need to keep them in
+sync (not necessarily and it creates a local "usr/include" directory also)::
+
+ make INSTALL_HDR_PATH=~/some_sysroot/usr headers_install
+
+Pointing LLC and CLANG is not necessarily if it's installed on HOST and have
+in its targets appropriate arm64 arch (usually it has several arches).
+Build samples::
+
+ make M=samples/bpf
+
+Or build samples with SYSROOT if some header or library is absent in toolchain,
+say libelf, providing address to file system containing headers and libs,
+can be RFS of target board::
+
+ make M=samples/bpf SYSROOT=~/some_sysroot
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 4574b19..c5ad528 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -665,23 +665,3 @@
{
return do_load_bpf_file(path, fixup_map);
}
-
-void read_trace_pipe(void)
-{
- int trace_fd;
-
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
- if (trace_fd < 0)
- return;
-
- while (1) {
- static char buf[4096];
- ssize_t sz;
-
- sz = read(trace_fd, buf, sizeof(buf) - 1);
- if (sz > 0) {
- buf[sz] = 0;
- puts(buf);
- }
- }
-}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 814894a..4fcd258 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -53,6 +53,5 @@
int load_bpf_file(char *path);
int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
-void read_trace_pipe(void);
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
#endif
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
index 68c84da..5aefd19 100644
--- a/samples/bpf/cpustat_kern.c
+++ b/samples/bpf/cpustat_kern.c
@@ -3,7 +3,7 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/*
* The CPU number, cstate number and pstate number are based
@@ -51,28 +51,28 @@
#define MAP_OFF_PSTATE_IDX 3
#define MAP_OFF_NUM 4
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAP_OFF_NUM,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAP_OFF_NUM);
+} my_map SEC(".maps");
/* cstate_duration records duration time for every idle state per CPU */
-struct bpf_map_def SEC("maps") cstate_duration = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
+} cstate_duration SEC(".maps");
/* pstate_duration records duration time for every operating point per CPU */
-struct bpf_map_def SEC("maps") pstate_duration = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
+} pstate_duration SEC(".maps");
/*
* The trace events for cpu_idle and cpu_frequency are taken from:
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
index 869a994..9667598 100644
--- a/samples/bpf/cpustat_user.c
+++ b/samples/bpf/cpustat_user.c
@@ -9,7 +9,6 @@
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
-#include <linux/bpf.h>
#include <locale.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -18,7 +17,9 @@
#include <sys/wait.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+
+static int cstate_map_fd, pstate_map_fd;
#define MAX_CPU 8
#define MAX_PSTATE_ENTRIES 5
@@ -181,21 +182,50 @@
{
cpu_stat_inject_cpu_idle_event();
cpu_stat_inject_cpu_frequency_event();
- cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
cpu_stat_print();
exit(0);
}
int main(int argc, char **argv)
{
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration");
+ pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration");
+ if (cstate_map_fd < 0 || pstate_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
ret = cpu_stat_inject_cpu_idle_event();
@@ -210,10 +240,13 @@
signal(SIGTERM, int_exit);
while (1) {
- cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
cpu_stat_print();
sleep(5);
}
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index 34b3fca..59f45fe 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c
@@ -14,7 +14,7 @@
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "bpf_insn.h"
#include "sock_example.h"
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index e0fbab9..ff4c533 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -40,6 +40,7 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/unistd.h>
+#include <linux/compiler.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
@@ -50,8 +51,8 @@
#include "cgroup_helpers.h"
#include "hbm.h"
#include "bpf_util.h"
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
bool outFlag = true;
int minRate = 1000; /* cgroup rate limit in Mbps */
@@ -147,7 +148,7 @@
}
if (ret) {
- printf("ERROR: load_bpf_file failed for: %s\n", prog);
+ printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog);
printf(" Output from verifier:\n%s\n------\n", bpf_log_buf);
ret = -1;
} else {
@@ -483,7 +484,7 @@
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
- // fallthrough
+ __fallthrough;
default:
Usage();
return 0;
diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
index aa207a2..e00f26f 100644
--- a/samples/bpf/hbm_kern.h
+++ b/samples/bpf/hbm_kern.h
@@ -22,8 +22,8 @@
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
#include <net/inet_ecn.h>
-#include "bpf_endian.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
#include "hbm.h"
#define DROP_PKT 0
@@ -59,21 +59,18 @@
#define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
#define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
-struct bpf_map_def SEC("maps") queue_state = {
- .type = BPF_MAP_TYPE_CGROUP_STORAGE,
- .key_size = sizeof(struct bpf_cgroup_storage_key),
- .value_size = sizeof(struct hbm_vqueue),
-};
-BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key,
- struct hbm_vqueue);
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct hbm_vqueue);
+} queue_state SEC(".maps");
-struct bpf_map_def SEC("maps") queue_stats = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct hbm_queue_stats),
- .max_entries = 1,
-};
-BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, struct hvm_queue_stats);
+} queue_stats SEC(".maps");
struct hbm_pkt_info {
int cwnd;
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
index f281df7..3a91b4c 100644
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -13,7 +13,7 @@
#define KBUILD_MODNAME "ibumad_count_pkts_by_class"
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct bpf_map_def SEC("maps") read_count = {
diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c
index cb5a8f9..fa06eef 100644
--- a/samples/bpf/ibumad_user.c
+++ b/samples/bpf/ibumad_user.c
@@ -25,7 +25,7 @@
#include "bpf_load.h"
#include "bpf_util.h"
-#include "libbpf.h"
+#include <bpf/libbpf.h>
static void dump_counts(int fd)
{
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
index 18fa088..4adfcbb 100644
--- a/samples/bpf/lathist_kern.c
+++ b/samples/bpf/lathist_kern.c
@@ -8,7 +8,7 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define MAX_ENTRIES 20
#define MAX_CPU 4
@@ -18,12 +18,12 @@
* trace_preempt_[on|off] tracepoints hooks is not supported.
*/
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU);
+} my_map SEC(".maps");
SEC("kprobe/trace_preempt_off")
int bpf_prog1(struct pt_regs *ctx)
@@ -61,12 +61,12 @@
return log2(v);
}
-struct bpf_map_def SEC("maps") my_lat = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(long),
- .max_entries = MAX_CPU * MAX_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, long);
+ __uint(max_entries, MAX_CPU * MAX_ENTRIES);
+} my_lat SEC(".maps");
SEC("kprobe/trace_preempt_on")
int bpf_prog2(struct pt_regs *ctx)
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
index 2ff2839..7d8ff24 100644
--- a/samples/bpf/lathist_user.c
+++ b/samples/bpf/lathist_user.c
@@ -6,9 +6,8 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
#define MAX_ENTRIES 20
#define MAX_CPU 4
@@ -81,20 +80,51 @@
int main(int argc, char **argv)
{
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd, i = 0;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_lat");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
while (1) {
- get_data(map_fd[1]);
+ get_data(map_fd);
print_hist();
sleep(5);
}
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist_kern.c
index df75383..9ed63e1 100644
--- a/samples/bpf/lwt_len_hist_kern.c
+++ b/samples/bpf/lwt_len_hist_kern.c
@@ -14,7 +14,7 @@
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/in.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
# define printk(fmt, ...) \
({ \
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 2b2ffb9..8773f22 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -8,94 +8,101 @@
#include <linux/netdevice.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
#define MAX_ENTRIES 1000
#define MAX_NR_CPUS 1024
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 10000);
+} lru_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 10000);
+ __uint(map_flags, BPF_F_NO_COMMON_LRU);
+} nocommon_lru_hash_map SEC(".maps");
+
+struct inner_lru {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NUMA_NODE);
+ __uint(numa_node, 0);
+} inner_lru_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_NR_CPUS);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_lru); /* use inner_lru as inner map */
+} array_of_lru_hashs SEC(".maps") = {
+ /* statically initialize the first element */
+ .values = { &inner_lru_hash_map },
};
-struct bpf_map_def SEC("maps") lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, MAX_ENTRIES);
+} percpu_hash_map SEC(".maps");
-struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 10000,
- .map_flags = BPF_F_NO_COMMON_LRU,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_map_alloc SEC(".maps");
-struct bpf_map_def SEC("maps") inner_lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NUMA_NODE,
- .numa_node = 0,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} percpu_hash_map_alloc SEC(".maps");
-struct bpf_map_def SEC("maps") array_of_lru_hashs = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(u32),
- .max_entries = MAX_NR_CPUS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(key_size, 8);
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 10000);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} lpm_trie_map_alloc SEC(".maps");
-struct bpf_map_def SEC("maps") percpu_hash_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} array_map SEC(".maps");
-struct bpf_map_def SEC("maps") hash_map_alloc = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NO_PREALLOC,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} lru_hash_lookup_map SEC(".maps");
-struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
- .type = BPF_MAP_TYPE_LPM_TRIE,
- .key_size = 8,
- .value_size = sizeof(long),
- .max_entries = 10000,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def SEC("maps") array_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-struct bpf_map_def SEC("maps") lru_hash_lookup_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-SEC("kprobe/sys_getuid")
+SEC("kprobe/" SYSCALL(sys_getuid))
int stress_hmap(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
@@ -110,7 +117,7 @@
return 0;
}
-SEC("kprobe/sys_geteuid")
+SEC("kprobe/" SYSCALL(sys_geteuid))
int stress_percpu_hmap(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
@@ -124,7 +131,7 @@
return 0;
}
-SEC("kprobe/sys_getgid")
+SEC("kprobe/" SYSCALL(sys_getgid))
int stress_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
@@ -138,7 +145,7 @@
return 0;
}
-SEC("kprobe/sys_getegid")
+SEC("kprobe/" SYSCALL(sys_getegid))
int stress_percpu_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
@@ -152,9 +159,10 @@
return 0;
}
-SEC("kprobe/sys_connect")
+SEC("kprobe/" SYSCALL(sys_connect))
int stress_lru_hmap_alloc(struct pt_regs *ctx)
{
+ struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn";
union {
u16 dst6[8];
@@ -173,14 +181,14 @@
long val = 1;
u32 key = 0;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
- addrlen = (int)PT_REGS_PARM3(ctx);
+ in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs);
+ addrlen = (int)PT_REGS_PARM3_CORE(real_regs);
if (addrlen != sizeof(*in6))
return 0;
- ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
- &in6->sin6_addr);
+ ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
+ &in6->sin6_addr);
if (ret)
goto done;
@@ -231,7 +239,7 @@
return 0;
}
-SEC("kprobe/sys_gettid")
+SEC("kprobe/" SYSCALL(sys_gettid))
int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
{
union {
@@ -253,7 +261,7 @@
return 0;
}
-SEC("kprobe/sys_getpgid")
+SEC("kprobe/" SYSCALL(sys_getpgid))
int stress_hash_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
@@ -266,7 +274,7 @@
return 0;
}
-SEC("kprobe/sys_getppid")
+SEC("kprobe/" SYSCALL(sys_getppid))
int stress_array_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index fe5564b..8b13230 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -11,7 +11,6 @@
#include <sys/wait.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <time.h>
#include <sys/resource.h>
@@ -19,7 +18,7 @@
#include <errno.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#define TEST_BIT(t) (1U << (t))
#define MAX_NR_CPUS 1024
@@ -61,12 +60,18 @@
[LRU_HASH_LOOKUP] = "lru_hash_lookup_map",
};
+enum map_idx {
+ array_of_lru_hashs_idx,
+ hash_map_alloc_idx,
+ lru_hash_lookup_idx,
+ NR_IDXES,
+};
+
+static int map_fd[NR_IDXES];
+
static int test_flags = ~0;
static uint32_t num_map_entries;
static uint32_t inner_lru_hash_size;
-static int inner_lru_hash_idx = -1;
-static int array_of_lru_hashs_idx = -1;
-static int lru_hash_lookup_idx = -1;
static int lru_hash_lookup_test_entries = 32;
static uint32_t max_cnt = 1000000;
@@ -122,30 +127,30 @@
__u64 start_time;
int i, ret;
- if (test == INNER_LRU_HASH_PREALLOC) {
+ if (test == INNER_LRU_HASH_PREALLOC && cpu) {
+ /* If CPU is not 0, create inner_lru hash map and insert the fd
+ * value into the array_of_lru_hash map. In case of CPU 0,
+ * 'inner_lru_hash_map' was statically inserted on the map init
+ */
int outer_fd = map_fd[array_of_lru_hashs_idx];
unsigned int mycpu, mynode;
assert(cpu < MAX_NR_CPUS);
- if (cpu) {
- ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL);
- assert(!ret);
+ ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL);
+ assert(!ret);
- inner_lru_map_fds[cpu] =
- bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH,
- test_map_names[INNER_LRU_HASH_PREALLOC],
- sizeof(uint32_t),
- sizeof(long),
- inner_lru_hash_size, 0,
- mynode);
- if (inner_lru_map_fds[cpu] == -1) {
- printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
- strerror(errno), errno);
- exit(1);
- }
- } else {
- inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx];
+ inner_lru_map_fds[cpu] =
+ bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH,
+ test_map_names[INNER_LRU_HASH_PREALLOC],
+ sizeof(uint32_t),
+ sizeof(long),
+ inner_lru_hash_size, 0,
+ mynode);
+ if (inner_lru_map_fds[cpu] == -1) {
+ printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
+ strerror(errno), errno);
+ exit(1);
}
ret = bpf_map_update_elem(outer_fd, &cpu,
@@ -377,7 +382,8 @@
key->data[1] = rand() & 0xff;
key->data[2] = rand() & 0xff;
key->data[3] = rand() & 0xff;
- r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ r = bpf_map_update_elem(map_fd[hash_map_alloc_idx],
+ key, &value, 0);
assert(!r);
}
@@ -388,59 +394,52 @@
key->data[3] = 1;
value = 128;
- r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ r = bpf_map_update_elem(map_fd[hash_map_alloc_idx], key, &value, 0);
assert(!r);
}
-static void fixup_map(struct bpf_map_data *map, int idx)
+static void fixup_map(struct bpf_object *obj)
{
+ struct bpf_map *map;
int i;
- if (!strcmp("inner_lru_hash_map", map->name)) {
- inner_lru_hash_idx = idx;
- inner_lru_hash_size = map->def.max_entries;
- }
+ bpf_object__for_each_map(map, obj) {
+ const char *name = bpf_map__name(map);
- if (!strcmp("array_of_lru_hashs", map->name)) {
- if (inner_lru_hash_idx == -1) {
- printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
- exit(1);
+ /* Only change the max_entries for the enabled test(s) */
+ for (i = 0; i < NR_TESTS; i++) {
+ if (!strcmp(test_map_names[i], name) &&
+ (check_test_flags(i))) {
+ bpf_map__resize(map, num_map_entries);
+ continue;
+ }
}
- map->def.inner_map_idx = inner_lru_hash_idx;
- array_of_lru_hashs_idx = idx;
}
- if (!strcmp("lru_hash_lookup_map", map->name))
- lru_hash_lookup_idx = idx;
-
- if (num_map_entries <= 0)
- return;
-
inner_lru_hash_size = num_map_entries;
-
- /* Only change the max_entries for the enabled test(s) */
- for (i = 0; i < NR_TESTS; i++) {
- if (!strcmp(test_map_names[i], map->name) &&
- (check_test_flags(i))) {
- map->def.max_entries = num_map_entries;
- }
- }
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct bpf_link *links[8];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_map *map;
char filename[256];
- int num_cpu = 8;
+ int i = 0;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
if (argc > 1)
test_flags = atoi(argv[1]) ? : test_flags;
if (argc > 2)
- num_cpu = atoi(argv[2]) ? : num_cpu;
+ nr_cpus = atoi(argv[2]) ? : nr_cpus;
if (argc > 3)
num_map_entries = atoi(argv[3]);
@@ -448,14 +447,61 @@
if (argc > 4)
max_cnt = atoi(argv[4]);
- if (load_bpf_file_fixup_map(filename, fixup_map)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "inner_lru_hash_map");
+ if (libbpf_get_error(map)) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ inner_lru_hash_size = bpf_map__max_entries(map);
+ if (!inner_lru_hash_size) {
+ fprintf(stderr, "ERROR: failed to get map attribute\n");
+ goto cleanup;
+ }
+
+ /* resize BPF map prior to loading */
+ if (num_map_entries > 0)
+ fixup_map(obj);
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "array_of_lru_hashs");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "hash_map_alloc");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "lru_hash_lookup_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
fill_lpm_trie();
- run_perf_test(num_cpu);
+ run_perf_test(nr_cpus);
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index e7d9a0a..14b7929 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -5,13 +5,19 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
#include <uapi/linux/ptrace.h>
#include <uapi/linux/perf_event.h>
#include <linux/version.h>
#include <linux/sched.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
#define MINBLOCK_US 1
@@ -22,38 +28,38 @@
u32 tret;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") start = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} start SEC(".maps");
struct wokeby_t {
char name[TASK_COMM_LEN];
u32 ret;
};
-struct bpf_map_def SEC("maps") wokeby = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct wokeby_t),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, struct wokeby_t);
+ __uint(max_entries, 10000);
+} wokeby SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index fc8767d..5734cfd 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -5,19 +5,19 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
#include <stdbool.h>
#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
#include "trace_helpers.h"
#define PRINT_RAW_ADDR 0
+/* counts, stackmap */
+static int map_fd[2];
+
static void print_ksym(__u64 addr)
{
struct ksym *sym;
@@ -52,14 +52,14 @@
int i;
printf("%s;", key->target);
- if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->tret, ip) != 0) {
printf("---;");
} else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
print_ksym(ip[i]);
}
printf("-;");
- if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->wret, ip) != 0) {
printf("---;");
} else {
for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
@@ -96,23 +96,54 @@
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ int delay = 1, i = 0;
char filename[256];
- int delay = 1;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
if (argc > 1)
@@ -120,5 +151,10 @@
sleep(delay);
print_stacks(map_fd[0]);
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/parse_ldabs.c b/samples/bpf/parse_ldabs.c
index 6db6b21..c6f65f9 100644
--- a/samples/bpf/parse_ldabs.c
+++ b/samples/bpf/parse_ldabs.c
@@ -11,7 +11,8 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define DEFAULT_PKTGEN_UDP_PORT 9
#define IP_MF 0x2000
diff --git a/samples/bpf/parse_simple.c b/samples/bpf/parse_simple.c
index 10af53d..4a486cb 100644
--- a/samples/bpf/parse_simple.c
+++ b/samples/bpf/parse_simple.c
@@ -12,7 +12,7 @@
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
#include <net/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define DEFAULT_PKTGEN_UDP_PORT 9
diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c
index 0b6f22f..d862384 100644
--- a/samples/bpf/parse_varlen.c
+++ b/samples/bpf/parse_varlen.c
@@ -14,7 +14,7 @@
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
#include <net/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define DEFAULT_PKTGEN_UDP_PORT 9
#define DEBUG 0
diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
index ceabf31..f24806a 100644
--- a/samples/bpf/sampleip_kern.c
+++ b/samples/bpf/sampleip_kern.c
@@ -8,16 +8,17 @@
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#define MAX_IPS 8192
-struct bpf_map_def SEC("maps") ip_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u64),
- .value_size = sizeof(u32),
- .max_entries = MAX_IPS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, u32);
+ __uint(max_entries, MAX_IPS);
+} ip_map SEC(".maps");
SEC("perf_event")
int do_sample(struct bpf_perf_event_data *ctx)
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index 6b5dc26..921c505 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -10,13 +10,11 @@
#include <errno.h>
#include <signal.h>
#include <string.h>
-#include <assert.h>
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
-#include <sys/ioctl.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
#include "trace_helpers.h"
@@ -25,6 +23,7 @@
#define MAX_IPS 8192
#define PAGE_OFFSET 0xffff880000000000
+static int map_fd;
static int nr_cpus;
static void usage(void)
@@ -34,9 +33,10 @@
printf(" duration # sampling duration (seconds), default 5\n");
}
-static int sampling_start(int *pmu_fd, int freq)
+static int sampling_start(int freq, struct bpf_program *prog,
+ struct bpf_link *links[])
{
- int i;
+ int i, pmu_fd;
struct perf_event_attr pe_sample_attr = {
.type = PERF_TYPE_SOFTWARE,
@@ -47,26 +47,30 @@
};
for (i = 0; i < nr_cpus; i++) {
- pmu_fd[i] = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
+ pmu_fd = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
-1 /* group_fd */, 0 /* flags */);
- if (pmu_fd[i] < 0) {
+ if (pmu_fd < 0) {
fprintf(stderr, "ERROR: Initializing perf sampling\n");
return 1;
}
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF,
- prog_fd[0]) == 0);
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+ links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: Attach perf event\n");
+ links[i] = NULL;
+ close(pmu_fd);
+ return 1;
+ }
}
return 0;
}
-static void sampling_end(int *pmu_fd)
+static void sampling_end(struct bpf_link *links[])
{
int i;
for (i = 0; i < nr_cpus; i++)
- close(pmu_fd[i]);
+ bpf_link__destroy(links[i]);
}
struct ipcount {
@@ -128,14 +132,17 @@
static void int_exit(int sig)
{
printf("\n");
- print_ip_map(map_fd[0]);
+ print_ip_map(map_fd);
exit(0);
}
int main(int argc, char **argv)
{
+ int opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS, error = 1;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ struct bpf_link **links;
char filename[256];
- int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS;
/* process arguments */
while ((opt = getopt(argc, argv, "F:h")) != -1) {
@@ -163,38 +170,58 @@
}
/* create perf FDs for each CPU */
- nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- pmu_fd = malloc(nr_cpus * sizeof(int));
- if (pmu_fd == NULL) {
- fprintf(stderr, "ERROR: malloc of pmu_fd\n");
- return 1;
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ links = calloc(nr_cpus, sizeof(struct bpf_link *));
+ if (!links) {
+ fprintf(stderr, "ERROR: malloc of links\n");
+ goto cleanup;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "do_sample");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
}
/* load BPF program */
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- fprintf(stderr, "ERROR: loading BPF program (errno %d):\n",
- errno);
- if (strcmp(bpf_log_buf, "") == 0)
- fprintf(stderr, "Try: ulimit -l unlimited\n");
- else
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
}
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "ip_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
/* do sampling */
printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n",
freq, secs);
- if (sampling_start(pmu_fd, freq) != 0)
- return 1;
+ if (sampling_start(freq, prog, links) != 0)
+ goto cleanup;
+
sleep(secs);
- sampling_end(pmu_fd);
- free(pmu_fd);
+ error = 0;
+cleanup:
+ sampling_end(links);
/* output sample counts */
- print_ip_map(map_fd[0]);
+ if (!error)
+ print_ip_map(map_fd);
- return 0;
+ free(links);
+ bpf_object__close(obj);
+ return error;
}
diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
index 05dcdf8..6d0ac75 100644
--- a/samples/bpf/sock_flags_kern.c
+++ b/samples/bpf/sock_flags_kern.c
@@ -3,7 +3,7 @@
#include <linux/net.h>
#include <uapi/linux/in.h>
#include <uapi/linux/in6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("cgroup/sock1")
int bpf_prog1(struct bpf_sock *sk)
diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c
index 43e38ce..431c956 100644
--- a/samples/bpf/sockex1_kern.c
+++ b/samples/bpf/sockex1_kern.c
@@ -2,7 +2,8 @@
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
index a219442..3c83722 100644
--- a/samples/bpf/sockex1_user.c
+++ b/samples/bpf/sockex1_user.c
@@ -3,7 +3,7 @@
#include <assert.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index ae4bdc8..b799754 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -1,11 +1,12 @@
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index 6de383d..af925a5 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -3,7 +3,7 @@
#include <assert.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index c527b57..b363503 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -5,7 +5,6 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
@@ -13,28 +12,32 @@
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
#include <uapi/linux/mpls.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
-struct bpf_map_def SEC("maps") jmp_table = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 8,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 8);
+} jmp_table SEC(".maps");
#define PARSE_VLAN 1
#define PARSE_MPLS 2
#define PARSE_IP 3
#define PARSE_IPV6 4
-/* protocol dispatch routine.
- * It tail-calls next BPF program depending on eth proto
- * Note, we could have used:
- * bpf_tail_call(skb, &jmp_table, proto);
- * but it would need large prog_array
+/* Protocol dispatch routine. It tail-calls next BPF program depending
+ * on eth proto. Note, we could have used ...
+ *
+ * bpf_tail_call(skb, &jmp_table, proto);
+ *
+ * ... but it would need large prog_array and cannot be optimised given
+ * the map key is not static.
*/
static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
{
@@ -91,12 +94,12 @@
struct flow_key_record flow;
};
-struct bpf_map_def SEC("maps") percpu_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct globals),
- .max_entries = 32,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct globals);
+ __uint(max_entries, 32);
+} percpu_map SEC(".maps");
/* user poor man's per_cpu until native support is ready */
static struct globals *this_cpu_globals(void)
@@ -112,12 +115,12 @@
__u64 bytes;
};
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct flow_key_record),
- .value_size = sizeof(struct pair),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct flow_key_record);
+ __type(value, struct pair);
+ __uint(max_entries, 1024);
+} hash_map SEC(".maps");
static void update_stats(struct __sk_buff *skb, struct globals *g)
{
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index bbb1cd0..7793f6a 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -1,18 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
-#include <linux/bpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/resource.h>
-#define PARSE_IP 3
-#define PARSE_IP_PROG_FD (prog_fd[0])
-#define PROG_ARRAY_FD (map_fd[0])
-
struct flow_key_record {
__be32 src;
__be32 dst;
@@ -30,31 +25,55 @@
int main(int argc, char **argv)
{
+ int i, sock, key, fd, main_prog_fd, jmp_table_fd, hash_map_fd;
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ const char *section;
char filename[256];
FILE *f;
- int i, sock, err, id, key = PARSE_IP;
- struct bpf_prog_info info = {};
- uint32_t info_len = sizeof(info);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
}
- /* Test fd array lookup which returns the id of the bpf_prog */
- err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
- assert(!err);
- err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
- assert(!err);
- assert(id == info.id);
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ jmp_table_fd = bpf_object__find_map_fd_by_name(obj, "jmp_table");
+ hash_map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
+ if (jmp_table_fd < 0 || hash_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ fd = bpf_program__fd(prog);
+
+ section = bpf_program__section_name(prog);
+ if (sscanf(section, "socket/%d", &key) != 1) {
+ fprintf(stderr, "ERROR: finding prog failed\n");
+ goto cleanup;
+ }
+
+ if (key == 0)
+ main_prog_fd = fd;
+ else
+ bpf_map_update_elem(jmp_table_fd, &key, &fd, BPF_ANY);
+ }
sock = open_raw_sock("lo");
- assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+ /* attach BPF program to socket */
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &main_prog_fd,
sizeof(__u32)) == 0);
if (argc > 1)
@@ -69,8 +88,8 @@
sleep(1);
printf("IP src.port -> dst.port bytes packets\n");
- while (bpf_map_get_next_key(map_fd[2], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[2], &next_key, &value);
+ while (bpf_map_get_next_key(hash_map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(hash_map_fd, &next_key, &value);
printf("%s.%05d -> %s.%05d %12lld %12lld\n",
inet_ntoa((struct in_addr){htonl(next_key.src)}),
next_key.port16[0],
@@ -80,5 +99,8 @@
key = next_key;
}
}
+
+cleanup:
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c
index ce0167d..455da77 100644
--- a/samples/bpf/spintest_kern.c
+++ b/samples/bpf/spintest_kern.c
@@ -9,27 +9,28 @@
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/perf_event.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
-struct bpf_map_def SEC("maps") my_map2 = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, long);
+ __uint(max_entries, 1024);
+} my_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(long));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 1024);
+} my_map2 SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define PROG(foo) \
int foo(struct pt_regs *ctx) \
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index 2556af2..f090d0d 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -1,40 +1,77 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <unistd.h>
-#include <linux/bpf.h>
#include <string.h>
#include <assert.h>
#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
#include "trace_helpers.h"
int main(int ac, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ char filename[256], symbol[256];
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[20];
long key, next_key, value;
- char filename[256];
+ struct bpf_program *prog;
+ int map_fd, i, j = 0;
+ const char *section;
struct ksym *sym;
- int i;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ section = bpf_program__section_name(prog);
+ if (sscanf(section, "kprobe/%s", symbol) != 1)
+ continue;
+
+ /* Attach prog only when symbol exists */
+ if (ksym_get_addr(symbol)) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
}
for (i = 0; i < 5; i++) {
key = 0;
printf("kprobing funcs:");
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &value);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
assert(next_key == value);
sym = ksym_search(value);
key = next_key;
@@ -48,10 +85,15 @@
if (key)
printf("\n");
key = 0;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0)
- bpf_map_delete_elem(map_fd[0], &next_key);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0)
+ bpf_map_delete_elem(map_fd, &next_key);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index 630ce8c..50231c2 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2017 Facebook
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct syscalls_enter_open_args {
unsigned long long unused;
@@ -18,19 +18,19 @@
long ret;
};
-struct bpf_map_def SEC("maps") enter_open_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} enter_open_map SEC(".maps");
-struct bpf_map_def SEC("maps") exit_open_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} exit_open_map SEC(".maps");
static __always_inline void count(void *map)
{
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 57014ba..76a1d00 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -5,16 +5,12 @@
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
-#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
-#include <stdbool.h>
#include <sys/resource.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
@@ -49,16 +45,44 @@
static int test(char *filename, int num_progs)
{
- int i, fd, map0_fds[num_progs], map1_fds[num_progs];
+ int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0;
+ struct bpf_link *links[num_progs * 4];
+ struct bpf_object *objs[num_progs];
+ struct bpf_program *prog;
for (i = 0; i < num_progs; i++) {
- if (load_bpf_file(filename)) {
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ objs[i] = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(objs[i])) {
+ fprintf(stderr, "opening BPF object file failed\n");
+ objs[i] = NULL;
+ goto cleanup;
}
- printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]);
- map0_fds[i] = map_fd[0];
- map1_fds[i] = map_fd[1];
+
+ /* load BPF program */
+ if (bpf_object__load(objs[i])) {
+ fprintf(stderr, "loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map0_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "enter_open_map");
+ map1_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "exit_open_map");
+ if (map0_fds[i] < 0 || map1_fds[i] < 0) {
+ fprintf(stderr, "finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, objs[i]) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+ printf("prog #%d: map ids %d %d\n", i, map0_fds[i], map1_fds[i]);
}
/* current load_bpf_file has perf_event_open default pid = -1
@@ -80,6 +104,12 @@
verify_map(map1_fds[i]);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ for (i--; i >= 0; i--)
+ bpf_object__close(objs[i]);
return 0;
}
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
index fb56fc2..c821294 100644
--- a/samples/bpf/task_fd_query_kern.c
+++ b/samples/bpf/task_fd_query_kern.c
@@ -2,7 +2,7 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("kprobe/blk_mq_start_request")
int bpf_prog1(struct pt_regs *ctx)
@@ -10,7 +10,7 @@
return 0;
}
-SEC("kretprobe/blk_account_io_completion")
+SEC("kretprobe/blk_account_io_done")
int bpf_prog2(struct pt_regs *ctx)
{
return 0;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index 4c31b30..b68bd2f 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -15,7 +15,7 @@
#include <sys/stat.h>
#include <linux/perf_event.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include "perf-sys.h"
@@ -290,7 +290,7 @@
int main(int argc, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
extern char __executable_start;
char filename[256], buf[256];
__u64 uprobe_file_offset;
@@ -314,7 +314,7 @@
/* test two functions in the corresponding *_kern.c file */
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request",
BPF_FD_TYPE_KPROBE));
- CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+ CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_done",
BPF_FD_TYPE_KRETPROBE));
/* test nondebug fs kprobe */
diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c
index 7ef2a12..fd2fa00 100644
--- a/samples/bpf/tc_l2_redirect_kern.c
+++ b/samples/bpf/tc_l2_redirect_kern.c
@@ -15,7 +15,7 @@
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define _htonl __builtin_bswap32
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 274c884..e935613 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -7,7 +7,8 @@
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
/* compiler workaround */
#define _htonl __builtin_bswap32
diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
index 9dba48c..8dfe09a 100644
--- a/samples/bpf/tcp_basertt_kern.c
+++ b/samples/bpf/tcp_basertt_kern.c
@@ -16,8 +16,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
index af8486f..6a80d08 100644
--- a/samples/bpf/tcp_bufs_kern.c
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -17,8 +17,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c
index 26c0fd0..e88bd9a 100644
--- a/samples/bpf/tcp_clamp_kern.c
+++ b/samples/bpf/tcp_clamp_kern.c
@@ -17,8 +17,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
index 6d4dc4c..2311fc9 100644
--- a/samples/bpf/tcp_cong_kern.c
+++ b/samples/bpf/tcp_cong_kern.c
@@ -16,8 +16,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
index 8557913..e80d3af 100644
--- a/samples/bpf/tcp_dumpstats_kern.c
+++ b/samples/bpf/tcp_dumpstats_kern.c
@@ -4,8 +4,8 @@
*/
#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define INTERVAL 1000000000ULL
diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c
index da61d53..d144455 100644
--- a/samples/bpf/tcp_iw_kern.c
+++ b/samples/bpf/tcp_iw_kern.c
@@ -17,8 +17,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
index d011e38..223d9c2 100644
--- a/samples/bpf/tcp_rwnd_kern.c
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -16,8 +16,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
index 720d195..d58004e 100644
--- a/samples/bpf/tcp_synrto_kern.c
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -16,8 +16,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_tos_reflect_kern.c b/samples/bpf/tcp_tos_reflect_kern.c
index 369faca..953fedc 100644
--- a/samples/bpf/tcp_tos_reflect_kern.c
+++ b/samples/bpf/tcp_tos_reflect_kern.c
@@ -15,8 +15,8 @@
#include <uapi/linux/ipv6.h>
#include <uapi/linux/in.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
index 1547b36..4dd532a 100644
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -10,7 +10,7 @@
#include <uapi/linux/ipv6.h>
#include <uapi/linux/pkt_cls.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* copy of 'struct ethhdr' without __packed */
struct eth_hdr {
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
index 86b28d7..fbd43e2 100644
--- a/samples/bpf/test_current_task_under_cgroup_kern.c
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c
@@ -8,25 +8,26 @@
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include <uapi/linux/utsname.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") cgroup_map = {
- .type = BPF_MAP_TYPE_CGROUP_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 1);
+} cgroup_map SEC(".maps");
-struct bpf_map_def SEC("maps") perf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 1);
+} perf_map SEC(".maps");
/* Writes the last PID that called sync to a map at index 0 */
-SEC("kprobe/sys_sync")
+SEC("kprobe/" SYSCALL(sys_sync))
int bpf_prog1(struct pt_regs *ctx)
{
u64 pid = bpf_get_current_pid_tgid();
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
index 06e9f8c..ac251a4 100644
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -4,10 +4,9 @@
#define _GNU_SOURCE
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
#define CGROUP_PATH "/my-cgroup"
@@ -15,13 +14,44 @@
int main(int argc, char **argv)
{
pid_t remote_pid, local_pid = getpid();
- int cg2, idx = 0, rc = 0;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ int cg2, idx = 0, rc = 1;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd[2];
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
if (setup_cgroup_environment())
@@ -70,12 +100,14 @@
goto err;
}
- goto out;
-err:
- rc = 1;
+ rc = 0;
-out:
+err:
close(cg2);
cleanup_cgroup_environment();
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return rc;
}
diff --git a/samples/bpf/test_lwt_bpf.c b/samples/bpf/test_lwt_bpf.c
index bacc801..1b56857 100644
--- a/samples/bpf/test_lwt_bpf.c
+++ b/samples/bpf/test_lwt_bpf.c
@@ -20,7 +20,7 @@
#include <linux/udp.h>
#include <linux/icmpv6.h>
#include <linux/if_ether.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include <string.h>
# define printk(fmt, ...) \
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c
index 42c44d0..b0200c8 100644
--- a/samples/bpf/test_map_in_map_kern.c
+++ b/samples/bpf/test_map_in_map_kern.c
@@ -10,65 +10,68 @@
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/in6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
#define MAX_NR_PORTS 65536
/* map #0 */
-struct bpf_map_def SEC("maps") port_a = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = MAX_NR_PORTS,
-};
+struct inner_a {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, MAX_NR_PORTS);
+} port_a SEC(".maps");
/* map #1 */
-struct bpf_map_def SEC("maps") port_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct inner_h {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} port_h SEC(".maps");
/* map #2 */
-struct bpf_map_def SEC("maps") reg_result_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} reg_result_h SEC(".maps");
/* map #3 */
-struct bpf_map_def SEC("maps") inline_result_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} inline_result_h SEC(".maps");
/* map #4 */ /* Test case #0 */
-struct bpf_map_def SEC("maps") a_of_port_a = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 0, /* map_fd[0] is port_a */
- .max_entries = MAX_NR_PORTS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_NR_PORTS);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_a); /* use inner_a as inner map */
+} a_of_port_a SEC(".maps");
/* map #5 */ /* Test case #1 */
-struct bpf_map_def SEC("maps") h_of_port_a = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 0, /* map_fd[0] is port_a */
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_a); /* use inner_a as inner map */
+} h_of_port_a SEC(".maps");
/* map #6 */ /* Test case #2 */
-struct bpf_map_def SEC("maps") h_of_port_h = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 1, /* map_fd[1] is port_h */
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_h); /* use inner_h as inner map */
+} h_of_port_h SEC(".maps");
static __always_inline int do_reg_lookup(void *inner_map, u32 port)
{
@@ -100,7 +103,7 @@
return result ? *result : -ENOENT;
}
-SEC("kprobe/sys_connect")
+SEC("kprobe/__sys_connect")
int trace_sys_connect(struct pt_regs *ctx)
{
struct sockaddr_in6 *in6;
@@ -110,13 +113,13 @@
void *outer_map, *inner_map;
bool inline_hash = false;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
- addrlen = (int)PT_REGS_PARM3(ctx);
+ in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx);
+ addrlen = (int)PT_REGS_PARM3_CORE(ctx);
if (addrlen != sizeof(*in6))
return 0;
- ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+ ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
if (ret) {
inline_ret = ret;
goto done;
@@ -127,7 +130,7 @@
test_case = dst6[7];
- ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
+ ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
if (ret) {
inline_ret = ret;
goto done;
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index eb29bcb..98656de 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -11,7 +11,9 @@
#include <stdlib.h>
#include <stdio.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+
+static int map_fd[7];
#define PORT_A (map_fd[0])
#define PORT_H (map_fd[1])
@@ -113,18 +115,59 @@
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- assert(!setrlimit(RLIMIT_MEMLOCK, &r));
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "trace_sys_connect");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "port_a");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "port_h");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "reg_result_h");
+ map_fd[3] = bpf_object__find_map_fd_by_name(obj, "inline_result_h");
+ map_fd[4] = bpf_object__find_map_fd_by_name(obj, "a_of_port_a");
+ map_fd[5] = bpf_object__find_map_fd_by_name(obj, "h_of_port_a");
+ map_fd[6] = bpf_object__find_map_fd_by_name(obj, "h_of_port_h");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0 ||
+ map_fd[3] < 0 || map_fd[4] < 0 || map_fd[5] < 0 || map_fd[6] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
test_map_in_map();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c
index 468a66a..f6d593e 100644
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ b/samples/bpf/test_overhead_kprobe_kern.c
@@ -7,9 +7,15 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val = 0; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
SEC("kprobe/__set_task_comm")
int prog(struct pt_regs *ctx)
@@ -24,8 +30,9 @@
tsk = (void *)PT_REGS_PARM1(ctx);
pid = _(tsk->pid);
- bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm);
- bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm);
+ bpf_probe_read_kernel(newcomm, sizeof(newcomm),
+ (void *)PT_REGS_PARM2(ctx));
signal = _(tsk->signal);
oom_score_adj = _(signal->oom_score_adj);
return 0;
diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c
index d2af8bc..8763181 100644
--- a/samples/bpf/test_overhead_raw_tp_kern.c
+++ b/samples/bpf/test_overhead_raw_tp_kern.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("raw_tracepoint/task_rename")
int prog(struct bpf_raw_tracepoint_args *ctx)
diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c
index 38f5c0b..eaa3269 100644
--- a/samples/bpf/test_overhead_tp_kern.c
+++ b/samples/bpf/test_overhead_tp_kern.c
@@ -5,7 +5,7 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* from /sys/kernel/debug/tracing/events/task/task_rename/format */
struct task_rename {
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
index 3a677c8..220a964 100644
--- a/samples/bpf/test_probe_write_user_kern.c
+++ b/samples/bpf/test_probe_write_user_kern.c
@@ -8,14 +8,17 @@
#include <linux/netdevice.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") dnat_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct sockaddr_in),
- .value_size = sizeof(struct sockaddr_in),
- .max_entries = 256,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct sockaddr_in);
+ __type(value, struct sockaddr_in);
+ __uint(max_entries, 256);
+} dnat_map SEC(".maps");
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
@@ -25,18 +28,19 @@
* This example sits on a syscall, and the syscall ABI is relatively stable
* of course, across platforms, and over time, the ABI may change.
*/
-SEC("kprobe/sys_connect")
+SEC("kprobe/" SYSCALL(sys_connect))
int bpf_prog1(struct pt_regs *ctx)
{
+ struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
+ void *sockaddr_arg = (void *)PT_REGS_PARM2_CORE(real_regs);
+ int sockaddr_len = (int)PT_REGS_PARM3_CORE(real_regs);
struct sockaddr_in new_addr, orig_addr = {};
struct sockaddr_in *mapped_addr;
- void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx);
- int sockaddr_len = (int)PT_REGS_PARM3(ctx);
if (sockaddr_len > sizeof(orig_addr))
return 0;
- if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
+ if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
return 0;
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
index 045eb5e..00ccfb8 100644
--- a/samples/bpf/test_probe_write_user_user.c
+++ b/samples/bpf/test_probe_write_user_user.c
@@ -1,21 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
-#include <linux/bpf.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include <sys/socket.h>
-#include <string.h>
#include <netinet/in.h>
#include <arpa/inet.h>
int main(int ac, char **argv)
{
- int serverfd, serverconnfd, clientfd;
- socklen_t sockaddr_len;
- struct sockaddr serv_addr, mapped_addr, tmp_addr;
struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
+ struct sockaddr serv_addr, mapped_addr, tmp_addr;
+ int serverfd, serverconnfd, clientfd, map_fd;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ socklen_t sockaddr_len;
char filename[256];
char *ip;
@@ -24,10 +25,35 @@
tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
@@ -51,7 +77,7 @@
mapped_addr_in->sin_port = htons(5555);
mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
- assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
+ assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY));
assert(listen(serverfd, 5) == 0);
@@ -75,5 +101,8 @@
/* Is the server's getsockname = the socket getpeername */
assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/trace_common.h b/samples/bpf/trace_common.h
new file mode 100644
index 0000000..8cb5400
--- /dev/null
+++ b/samples/bpf/trace_common.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __TRACE_COMMON_H
+#define __TRACE_COMMON_H
+
+#ifdef __x86_64__
+#define SYSCALL(SYS) "__x64_" __stringify(SYS)
+#elif defined(__s390x__)
+#define SYSCALL(SYS) "__s390x_" __stringify(SYS)
+#else
+#define SYSCALL(SYS) __stringify(SYS)
+#endif
+
+#endif
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index 7068fbd..7d3c66f 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -9,7 +9,8 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/perf_event.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
struct key_t {
char comm[TASK_COMM_LEN];
@@ -17,19 +18,19 @@
u32 userstack;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index 749a50f..ac1ba36 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -6,22 +6,22 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
-#include <fcntl.h>
-#include <poll.h>
-#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/bpf.h>
#include <signal.h>
-#include <assert.h>
#include <errno.h>
#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
#include "trace_helpers.h"
#define SAMPLE_FREQ 50
+static int pid;
+/* counts, stackmap */
+static int map_fd[2];
+struct bpf_program *prog;
static bool sys_read_seen, sys_write_seen;
static void print_ksym(__u64 addr)
@@ -91,10 +91,10 @@
}
}
-static void int_exit(int sig)
+static void err_exit(int err)
{
- kill(0, SIGKILL);
- exit(0);
+ kill(pid, SIGKILL);
+ exit(err);
}
static void print_stacks(void)
@@ -102,7 +102,7 @@
struct key_t key = {}, next_key;
__u64 value;
__u32 stackid = 0, next_id;
- int fd = map_fd[0], stack_map = map_fd[1];
+ int error = 1, fd = map_fd[0], stack_map = map_fd[1];
sys_read_seen = sys_write_seen = false;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
@@ -114,7 +114,7 @@
printf("\n");
if (!sys_read_seen || !sys_write_seen) {
printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n");
- int_exit(0);
+ err_exit(error);
}
/* clear stack map */
@@ -136,43 +136,52 @@
static void test_perf_event_all_cpu(struct perf_event_attr *attr)
{
- int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- int *pmu_fd = malloc(nr_cpus * sizeof(int));
- int i, error = 0;
+ int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct bpf_link **links = calloc(nr_cpus, sizeof(struct bpf_link *));
+ int i, pmu_fd, error = 1;
+
+ if (!links) {
+ printf("malloc of links failed\n");
+ goto err;
+ }
/* system wide perf event, no need to inherit */
attr->inherit = 0;
/* open perf_event on all cpus */
for (i = 0; i < nr_cpus; i++) {
- pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0);
- if (pmu_fd[i] < 0) {
+ pmu_fd = sys_perf_event_open(attr, -1, i, -1, 0);
+ if (pmu_fd < 0) {
printf("sys_perf_event_open failed\n");
- error = 1;
goto all_cpu_err;
}
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0);
+ links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(links[i])) {
+ printf("bpf_program__attach_perf_event failed\n");
+ links[i] = NULL;
+ close(pmu_fd);
+ goto all_cpu_err;
+ }
}
- if (generate_load() < 0) {
- error = 1;
+ if (generate_load() < 0)
goto all_cpu_err;
- }
+
print_stacks();
+ error = 0;
all_cpu_err:
- for (i--; i >= 0; i--) {
- ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
- close(pmu_fd[i]);
- }
- free(pmu_fd);
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+err:
+ free(links);
if (error)
- int_exit(0);
+ err_exit(error);
}
static void test_perf_event_task(struct perf_event_attr *attr)
{
- int pmu_fd, error = 0;
+ struct bpf_link *link = NULL;
+ int pmu_fd, error = 1;
/* per task perf event, enable inherit so the "dd ..." command can be traced properly.
* Enabling inherit will cause bpf_perf_prog_read_time helper failure.
@@ -183,21 +192,25 @@
pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
if (pmu_fd < 0) {
printf("sys_perf_event_open failed\n");
- int_exit(0);
- }
- assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
- assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0);
-
- if (generate_load() < 0) {
- error = 1;
goto err;
}
+ link = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(link)) {
+ printf("bpf_program__attach_perf_event failed\n");
+ link = NULL;
+ close(pmu_fd);
+ goto err;
+ }
+
+ if (generate_load() < 0)
+ goto err;
+
print_stacks();
+ error = 0;
err:
- ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
- close(pmu_fd);
+ bpf_link__destroy(link);
if (error)
- int_exit(0);
+ err_exit(error);
}
static void test_bpf_perf_event(void)
@@ -282,29 +295,60 @@
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
char filename[256];
+ int error = 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ signal(SIGINT, err_exit);
+ signal(SIGTERM, err_exit);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
- return 1;
+ goto cleanup;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 2;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ printf("opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
}
- if (fork() == 0) {
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ printf("finding a counts/stackmap map in obj file failed\n");
+ goto cleanup;
+ }
+
+ pid = fork();
+ if (pid == 0) {
read_trace_pipe();
return 0;
+ } else if (pid == -1) {
+ printf("couldn't spawn process\n");
+ goto cleanup;
}
+
test_bpf_perf_event();
- int_exit(0);
- return 0;
+ error = 0;
+
+cleanup:
+ bpf_object__close(obj);
+ err_exit(error);
}
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
index 9b96f4f..b64815a 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output_kern.c
@@ -1,16 +1,17 @@
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 2);
+} my_map SEC(".maps");
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
int bpf_prog1(struct pt_regs *ctx)
{
struct S {
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index 8ee4769..364b987 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -1,23 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
#include <fcntl.h>
#include <poll.h>
-#include <linux/perf_event.h>
-#include <linux/bpf.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <time.h>
#include <signal.h>
-#include <libbpf.h>
-#include "bpf_load.h"
-#include "perf-sys.h"
+#include <bpf/libbpf.h>
static __u64 time_get_ns(void)
{
@@ -57,20 +44,48 @@
int main(int argc, char **argv)
{
struct perf_buffer_opts pb_opts = {};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
struct perf_buffer *pb;
+ struct bpf_object *obj;
+ int map_fd, ret = 0;
char filename[256];
FILE *f;
- int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
pb_opts.sample_cb = print_bpf_output;
- pb = perf_buffer__new(map_fd[0], 8, &pb_opts);
+ pb = perf_buffer__new(map_fd, 8, &pb_opts);
ret = libbpf_get_error(pb);
if (ret) {
printf("failed to setup perf_buffer: %d\n", ret);
@@ -84,5 +99,9 @@
while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
}
kill(0, SIGINT);
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return ret;
}
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
index 9c74b45..ef30d2b 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -8,9 +8,15 @@
#include <linux/netdevice.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val = 0; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
@@ -33,7 +39,7 @@
dev = _(skb->dev);
len = _(skb->len);
- bpf_probe_read(devname, sizeof(devname), dev->name);
+ bpf_probe_read_kernel(devname, sizeof(devname), dev->name);
if (devname[0] == 'l' && devname[1] == 'o') {
char fmt[] = "skb %p len %d\n";
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
index af8c206..9d4adb7 100644
--- a/samples/bpf/tracex1_user.c
+++ b/samples/bpf/tracex1_user.c
@@ -1,20 +1,41 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include "trace_helpers.h"
int main(int ac, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
f = popen("taskset 1 ping -c5 localhost", "r");
@@ -22,5 +43,8 @@
read_trace_pipe();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index 5e11c20..5bc696b 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -8,14 +8,16 @@
#include <linux/netdevice.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, long);
+ __uint(max_entries, 1024);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
@@ -69,14 +71,14 @@
u64 index;
};
-struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(struct hist_key),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(struct hist_key));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 1024);
+} my_hist_map SEC(".maps");
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
int bpf_prog3(struct pt_regs *ctx)
{
long write_size = PT_REGS_PARM3(ctx);
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index c9544a4..3d6eab7 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -3,17 +3,19 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
#define MAX_INDEX 64
#define MAX_STARS 38
+/* my_map, my_hist_map */
+static int map_fd[2];
+
static void stars(char *str, long val, long max, int width)
{
int i;
@@ -114,19 +116,40 @@
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
- char filename[256];
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
long key, next_key, value;
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ char filename[256];
+ int i, j = 0;
FILE *f;
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "my_map");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "my_hist_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
@@ -138,9 +161,14 @@
f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
(void) f;
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
}
for (i = 0; i < 5; i++) {
@@ -156,5 +184,10 @@
}
print_hist(map_fd[1]);
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index ea1d4c1..710a441 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -8,14 +8,15 @@
#include <linux/netdevice.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(u64),
- .max_entries = 4096,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, u64);
+ __uint(max_entries, 4096);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
@@ -41,14 +42,14 @@
#define SLOTS 100
-struct bpf_map_def SEC("maps") lat_map = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = SLOTS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u64));
+ __uint(max_entries, SLOTS);
+} lat_map SEC(".maps");
-SEC("kprobe/blk_account_io_completion")
+SEC("kprobe/blk_account_io_done")
int bpf_prog2(struct pt_regs *ctx)
{
long rq = PT_REGS_PARM1(ctx);
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index cf8fedc..83e0fec 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -7,11 +7,10 @@
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
-#include <linux/bpf.h>
#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
#define SLOTS 100
@@ -108,21 +107,12 @@
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
+ int map_fd, i, j = 0;
for (i = 1; i < ac; i++) {
if (strcmp(argv[i], "-a") == 0) {
@@ -137,6 +127,40 @@
}
}
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "lat_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+
printf(" heatmap of IO latency\n");
if (text_only)
printf(" %s", sym[num_colors - 1]);
@@ -153,9 +177,14 @@
for (i = 0; ; i++) {
if (i % 20 == 0)
print_banner();
- print_hist(map_fd[1]);
+ print_hist(map_fd);
sleep(2);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c
index 6dd8e38..eb0f8fd 100644
--- a/samples/bpf/tracex4_kern.c
+++ b/samples/bpf/tracex4_kern.c
@@ -7,19 +7,20 @@
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
struct pair {
u64 val;
u64 ip;
};
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(struct pair),
- .max_entries = 1000000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, struct pair);
+ __uint(max_entries, 1000000);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index ec52203..e8faf8f 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -8,11 +8,10 @@
#include <stdbool.h>
#include <string.h>
#include <time.h>
-#include <linux/bpf.h>
#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
struct pair {
long long val;
@@ -36,8 +35,8 @@
key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
key = -1;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &v);
+ while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(fd, &next_key, &v);
key = next_key;
if (val - v.val < 1000000000ll)
/* object was allocated more then 1 sec ago */
@@ -50,25 +49,55 @@
int main(int ac, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ int map_fd, i, j = 0;
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
return 1;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
}
for (i = 0; ; i++) {
- print_old_objects(map_fd[1]);
+ print_old_objects(map_fd);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index 35cb0ee..64a1f75 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -10,20 +10,21 @@
#include <uapi/linux/seccomp.h>
#include <uapi/linux/unistd.h>
#include "syscall_nrs.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
-struct bpf_map_def SEC("maps") progs = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
#ifdef __mips__
- .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */
+ __uint(max_entries, 6000); /* MIPS n64 syscalls start at 5000 */
#else
- .max_entries = 1024,
+ __uint(max_entries, 1024);
#endif
-};
+} progs SEC(".maps");
SEC("kprobe/__seccomp_filter")
int bpf_prog1(struct pt_regs *ctx)
@@ -46,7 +47,7 @@
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@@ -59,7 +60,7 @@
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index c4ab91c..c17d3fb 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -1,13 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
-#include <linux/bpf.h>
+#include <stdlib.h>
#include <unistd.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <sys/prctl.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include <sys/resource.h>
+#include "trace_helpers.h"
+
+#ifdef __mips__
+#define MAX_ENTRIES 6000 /* MIPS n64 syscalls start at 5000 */
+#else
+#define MAX_ENTRIES 1024
+#endif
/* install fake seccomp program to enable seccomp code path inside the kernel,
* so that our kprobe attached to seccomp_phase1() can be triggered
@@ -27,16 +34,57 @@
int main(int ac, char **argv)
{
- FILE *f;
- char filename[256];
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int key, fd, progs_fd;
+ const char *section;
+ char filename[256];
+ FILE *f;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
+ }
+
+ progs_fd = bpf_object__find_map_fd_by_name(obj, "progs");
+ if (progs_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ section = bpf_program__section_name(prog);
+ /* register only syscalls to PROG_ARRAY */
+ if (sscanf(section, "kprobe/%d", &key) != 1)
+ continue;
+
+ fd = bpf_program__fd(prog);
+ bpf_map_update_elem(progs_fd, &key, &fd, BPF_ANY);
}
install_accept_all_seccomp();
@@ -46,5 +94,8 @@
read_trace_pipe();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
index 46c557a..acad571 100644
--- a/samples/bpf/tracex6_kern.c
+++ b/samples/bpf/tracex6_kern.c
@@ -1,26 +1,28 @@
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") counters = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values2 = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(struct bpf_perf_event_value),
- .max_entries = 64,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 64);
+} counters SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, 64);
+} values SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct bpf_perf_event_value);
+ __uint(max_entries, 64);
+} values2 SEC(".maps");
SEC("kprobe/htab_map_get_next_key")
int bpf_prog1(struct pt_regs *ctx)
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index 4bb3c83..33df978 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -4,7 +4,6 @@
#include <assert.h>
#include <fcntl.h>
#include <linux/perf_event.h>
-#include <linux/bpf.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
@@ -15,12 +14,15 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "bpf_load.h"
#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
#define SAMPLE_PERIOD 0x7fffffffffffffffULL
+/* counters, values, values2 */
+static int map_fd[3];
+
static void check_on_cpu(int cpu, struct perf_event_attr *attr)
{
struct bpf_perf_event_value value2;
@@ -174,16 +176,51 @@
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ int i = 0;
setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counters");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "values");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "values2");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
test_bpf_perf_event();
+
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c
index 1ab308a..c5a92df 100644
--- a/samples/bpf/tracex7_kern.c
+++ b/samples/bpf/tracex7_kern.c
@@ -1,7 +1,7 @@
#include <uapi/linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("kprobe/open_ctree")
int bpf_prog1(struct pt_regs *ctx)
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c
index 2ed13e9..8be7ce1 100644
--- a/samples/bpf/tracex7_user.c
+++ b/samples/bpf/tracex7_user.c
@@ -1,17 +1,18 @@
#define _GNU_SOURCE
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
int main(int argc, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
char command[256];
- int ret;
+ int ret = 0;
+ FILE *f;
if (!argv[1]) {
fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
@@ -19,15 +20,37 @@
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
snprintf(command, 256, "mount %s tmpmnt/", argv[1]);
f = popen(command, "r");
ret = pclose(f);
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return ret ? 0 : 1;
}
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
index db6870a..34b6439 100644
--- a/samples/bpf/xdp1_kern.c
+++ b/samples/bpf/xdp1_kern.c
@@ -12,7 +12,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index a8e5fa0..c447ad9 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -15,8 +15,8 @@
#include <net/if.h>
#include "bpf_util.h"
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
static int ifindex;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -98,7 +98,7 @@
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -109,6 +109,9 @@
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
@@ -139,7 +142,7 @@
map_fd = bpf_map__fd(map);
if (!prog_fd) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
index c74b52c..c787f4b 100644
--- a/samples/bpf/xdp2_kern.c
+++ b/samples/bpf/xdp2_kern.c
@@ -12,7 +12,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
index 0c12048..9b78331 100644
--- a/samples/bpf/xdp2skb_meta_kern.c
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -12,7 +12,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/*
* This struct is stored in the XDP 'data_meta' area, which is located
diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c
index cd9ff2a..ffdd548 100644
--- a/samples/bpf/xdp_adjust_tail_kern.c
+++ b/samples/bpf/xdp_adjust_tail_kern.c
@@ -18,13 +18,16 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/icmp.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define DEFAULT_TTL 64
#define MAX_PCKT_SIZE 600
#define ICMP_TOOBIG_SIZE 98
#define ICMP_TOOBIG_PAYLOAD_SIZE 92
+/* volatile to prevent compiler optimizations */
+static volatile __u32 max_pcktsz = MAX_PCKT_SIZE;
+
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, __u32);
@@ -92,7 +95,7 @@
orig_iph = data + off;
icmp_hdr->type = ICMP_DEST_UNREACH;
icmp_hdr->code = ICMP_FRAG_NEEDED;
- icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
+ icmp_hdr->un.frag.mtu = htons(max_pcktsz - sizeof(struct ethhdr));
icmp_hdr->checksum = 0;
ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
icmp_hdr->checksum = csum;
@@ -121,7 +124,7 @@
int pckt_size = data_end - data;
int offset;
- if (pckt_size > MAX_PCKT_SIZE) {
+ if (pckt_size > max(max_pcktsz, ICMP_TOOBIG_SIZE)) {
offset = pckt_size - ICMP_TOOBIG_SIZE;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_PASS;
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index a3596b6..ba482dc 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -19,10 +19,11 @@
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#define STATS_INTERVAL_S 2U
+#define MAX_PCKT_SIZE 600
static int ifindex = -1;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -72,6 +73,7 @@
printf("Usage: %s [...]\n", cmd);
printf(" -i <ifname|ifindex> Interface\n");
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
+ printf(" -P <MAX_PCKT_SIZE> Default: %u\n", MAX_PCKT_SIZE);
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
printf(" -F force loading prog\n");
@@ -85,13 +87,14 @@
.prog_type = BPF_PROG_TYPE_XDP,
};
unsigned char opt_flags[256] = {};
- const char *optstr = "i:T:SNFh";
+ const char *optstr = "i:T:P:SNFh";
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
int i, prog_fd, map_fd, opt;
struct bpf_object *obj;
- struct bpf_map *map;
+ __u32 max_pckt_size = 0;
+ __u32 key = 0;
char filename[256];
int err;
@@ -110,11 +113,14 @@
case 'T':
kill_after_s = atoi(optarg);
break;
+ case 'P':
+ max_pckt_size = atoi(optarg);
+ break;
case 'S':
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -126,6 +132,9 @@
opt_flags[opt] = 0;
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
@@ -150,15 +159,20 @@
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
- map = bpf_map__next(NULL, obj);
- if (!map) {
- printf("finding a map in obj file failed\n");
- return 1;
+ /* static global var 'max_pcktsz' is accessible from .data section */
+ if (max_pckt_size) {
+ map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data");
+ if (map_fd < 0) {
+ printf("finding a max_pcktsz map in obj file failed\n");
+ return 1;
+ }
+ bpf_map_update_elem(map_fd, &key, &max_pckt_size, BPF_ANY);
}
- map_fd = bpf_map__fd(map);
- if (!prog_fd) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ /* fetch icmpcnt map */
+ map_fd = bpf_object__find_map_fd_by_name(obj, "icmpcnt");
+ if (map_fd < 0) {
+ printf("finding a icmpcnt map in obj file failed\n");
return 1;
}
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
index d013029..54c099c 100644
--- a/samples/bpf/xdp_fwd_kern.c
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -19,7 +19,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 97ff1da..74a4583 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -24,14 +24,16 @@
#include <fcntl.h>
#include <libgen.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+
static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
{
int err;
- err = bpf_set_link_xdp_fd(idx, prog_fd, 0);
+ err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
@@ -49,7 +51,7 @@
{
int err;
- err = bpf_set_link_xdp_fd(idx, -1, 0);
+ err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
@@ -83,11 +85,17 @@
int attach = 1;
int ret = 0;
- while ((opt = getopt(argc, argv, ":dD")) != -1) {
+ while ((opt = getopt(argc, argv, ":dDSF")) != -1) {
switch (opt) {
case 'd':
attach = 0;
break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
case 'D':
prog_name = "xdp_fwd_direct";
break;
@@ -97,6 +105,9 @@
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index ad10fe7..5c955b8 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -4,23 +4,23 @@
* XDP monitor tool, based on tracepoints
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") redirect_err_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 2,
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 2);
/* TODO: have entries for all possible errno's */
-};
+} redirect_err_cnt SEC(".maps");
#define XDP_UNKNOWN XDP_REDIRECT + 1
-struct bpf_map_def SEC("maps") exception_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = XDP_UNKNOWN + 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, XDP_UNKNOWN + 1);
+} exception_cnt SEC(".maps");
/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
* Code in: kernel/include/trace/events/xdp.h
@@ -129,19 +129,19 @@
};
#define MAX_CPUS 64
-struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = MAX_CPUS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, struct datarec);
+ __uint(max_entries, MAX_CPUS);
+} cpumap_enqueue_cnt SEC(".maps");
-struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, struct datarec);
+ __uint(max_entries, 1);
+} cpumap_kthread_cnt SEC(".maps");
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
* Code in: kernel/include/trace/events/xdp.h
@@ -210,26 +210,24 @@
return 0;
}
-struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, struct datarec);
+ __uint(max_entries, 1);
+} devmap_xmit_cnt SEC(".maps");
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
* Code in: kernel/include/trace/events/xdp.h
*/
struct devmap_xmit_ctx {
u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
+ int from_ifindex; // offset:8; size:4; signed:1;
u32 act; // offset:12; size:4; signed:0;
- u32 map_index; // offset:16; size:4; signed:0;
+ int to_ifindex; // offset:16; size:4; signed:1;
int drops; // offset:20; size:4; signed:1;
int sent; // offset:24; size:4; signed:1;
- int from_ifindex; // offset:28; size:4; signed:1;
- int to_ifindex; // offset:32; size:4; signed:1;
- int err; // offset:36; size:4; signed:1;
+ int err; // offset:28; size:4; signed:1;
};
SEC("tracepoint/xdp/xdp_devmap_xmit")
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index ef53b93..03d0a18 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -26,12 +26,37 @@
#include <net/if.h>
#include <time.h>
+#include <signal.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
+enum map_type {
+ REDIRECT_ERR_CNT,
+ EXCEPTION_CNT,
+ CPUMAP_ENQUEUE_CNT,
+ CPUMAP_KTHREAD_CNT,
+ DEVMAP_XMIT_CNT,
+};
+
+static const char *const map_type_strings[] = {
+ [REDIRECT_ERR_CNT] = "redirect_err_cnt",
+ [EXCEPTION_CNT] = "exception_cnt",
+ [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
+ [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
+ [DEVMAP_XMIT_CNT] = "devmap_xmit_cnt",
+};
+
+#define NUM_MAP 5
+#define NUM_TP 8
+
+static int tp_cnt;
+static int map_cnt;
static int verbose = 1;
static bool debug = false;
+struct bpf_map *map_data[NUM_MAP] = {};
+struct bpf_link *tp_links[NUM_TP] = {};
+struct bpf_object *obj;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
@@ -41,6 +66,16 @@
{0, 0, NULL, 0 }
};
+static void int_exit(int sig)
+{
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
+ bpf_object__close(obj);
+ exit(0);
+}
+
/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
#define EXIT_FAIL_MEM 5
@@ -483,23 +518,23 @@
* this can happen by someone running perf-record -e
*/
- fd = map_data[0].fd; /* map0: redirect_err_cnt */
+ fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]);
for (i = 0; i < REDIR_RES_MAX; i++)
map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
- fd = map_data[1].fd; /* map1: exception_cnt */
+ fd = bpf_map__fd(map_data[EXCEPTION_CNT]);
for (i = 0; i < XDP_ACTION_MAX; i++) {
map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
}
- fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
+ fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]);
for (i = 0; i < MAX_CPUS; i++)
map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
- fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
+ fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]);
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
- fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
+ fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]);
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
return true;
@@ -598,8 +633,8 @@
/* TODO Need more advanced stats on error types */
if (verbose) {
- printf(" - Stats map0: %s\n", map_data[0].name);
- printf(" - Stats map1: %s\n", map_data[1].name);
+ printf(" - Stats map0: %s\n", bpf_map__name(map_data[0]));
+ printf(" - Stats map1: %s\n", bpf_map__name(map_data[1]));
printf("\n");
}
fflush(stdout);
@@ -618,44 +653,51 @@
static void print_bpf_prog_info(void)
{
- int i;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int i = 0;
/* Prog info */
- printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
- for (i = 0; i < prog_cnt; i++) {
- printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
+ printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt);
+ bpf_object__for_each_program(prog, obj) {
+ printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog));
+ i++;
}
+ i = 0;
/* Maps info */
- printf("Loaded BPF prog have %d map(s)\n", map_data_count);
- for (i = 0; i < map_data_count; i++) {
- char *name = map_data[i].name;
- int fd = map_data[i].fd;
+ printf("Loaded BPF prog have %d map(s)\n", map_cnt);
+ bpf_object__for_each_map(map, obj) {
+ const char *name = bpf_map__name(map);
+ int fd = bpf_map__fd(map);
printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
+ i++;
}
/* Event info */
- printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
- for (i = 0; i < prog_cnt; i++) {
- if (event_fd[i] != -1)
- printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
+ printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt);
+ for (i = 0; i < tp_cnt; i++) {
+ int fd = bpf_link__fd(tp_links[i]);
+
+ if (fd != -1)
+ printf(" - event_fd[%d] = fd(%d)\n", i, fd);
}
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_program *prog;
int longindex = 0, opt;
- int ret = EXIT_SUCCESS;
- char bpf_obj_file[256];
+ int ret = EXIT_FAILURE;
+ enum map_type type;
+ char filename[256];
/* Default settings: */
bool errors_only = true;
int interval = 2;
- snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
-
/* Parse commands line args */
while ((opt = getopt_long(argc, argv, "hDSs:",
long_options, &longindex)) != -1) {
@@ -672,40 +714,79 @@
case 'h':
default:
usage(argv);
- return EXIT_FAILURE;
+ return ret;
}
}
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
- return EXIT_FAILURE;
+ return ret;
}
- if (load_bpf_file(bpf_obj_file)) {
- printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
- return EXIT_FAILURE;
+ /* Remove tracepoint program when program is interrupted or killed */
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ printf("ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
}
- if (!prog_fd[0]) {
- printf("ERROR - load_bpf_file: %s\n", strerror(errno));
- return EXIT_FAILURE;
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ for (type = 0; type < NUM_MAP; type++) {
+ map_data[type] =
+ bpf_object__find_map_by_name(obj, map_type_strings[type]);
+
+ if (libbpf_get_error(map_data[type])) {
+ printf("ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+ map_cnt++;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ tp_links[tp_cnt] = bpf_program__attach(prog);
+ if (libbpf_get_error(tp_links[tp_cnt])) {
+ printf("ERROR: bpf_program__attach failed\n");
+ tp_links[tp_cnt] = NULL;
+ goto cleanup;
+ }
+ tp_cnt++;
}
if (debug) {
print_bpf_prog_info();
}
- /* Unload/stop tracepoint event by closing fd's */
+ /* Unload/stop tracepoint event by closing bpf_link's */
if (errors_only) {
- /* The prog_fd[i] and event_fd[i] depend on the
- * order the functions was defined in _kern.c
+ /* The bpf_link[i] depend on the order of
+ * the functions was defined in _kern.c
*/
- close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
- close(prog_fd[2]); /* func: trace_xdp_redirect */
- close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
- close(prog_fd[3]); /* func: trace_xdp_redirect_map */
+ bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */
+ tp_links[2] = NULL;
+
+ bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */
+ tp_links[3] = NULL;
}
stats_poll(interval, errors_only);
+ ret = EXIT_SUCCESS;
+
+cleanup:
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
+ bpf_object__close(obj);
return ret;
}
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index d94a999..8255025 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -12,7 +12,7 @@
#include <uapi/linux/udp.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include "hash_func01.h"
#define MAX_CPUS NR_CPUS
@@ -21,7 +21,7 @@
struct {
__uint(type, BPF_MAP_TYPE_CPUMAP);
__uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
__uint(max_entries, MAX_CPUS);
} cpu_map SEC(".maps");
@@ -30,6 +30,9 @@
__u64 processed;
__u64 dropped;
__u64 issue;
+ __u64 xdp_pass;
+ __u64 xdp_drop;
+ __u64 xdp_redirect;
};
/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
@@ -692,13 +695,16 @@
* Code in: kernel/include/trace/events/xdp.h
*/
struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
+ u64 __pad; // First 8 bytes are not accessible
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int sched; // offset:28; size:4; signed:1;
+ unsigned int xdp_pass; // offset:32; size:4; signed:0;
+ unsigned int xdp_drop; // offset:36; size:4; signed:0;
+ unsigned int xdp_redirect; // offset:40; size:4; signed:0;
};
SEC("tracepoint/xdp/xdp_cpumap_kthread")
@@ -712,6 +718,9 @@
return 0;
rec->processed += ctx->processed;
rec->dropped += ctx->drops;
+ rec->xdp_pass += ctx->xdp_pass;
+ rec->xdp_drop += ctx->xdp_drop;
+ rec->xdp_redirect += ctx->xdp_redirect;
/* Count times kthread yielded CPU via schedule call */
if (ctx->sched)
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 0a76725..16eb839 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -19,9 +19,6 @@
#include <time.h>
#include <linux/limits.h>
-#define __must_check
-#include <linux/err.h>
-
#include <arpa/inet.h>
#include <linux/if_link.h>
@@ -29,7 +26,7 @@
#define MAX_PROG 6
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
@@ -40,18 +37,35 @@
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int n_cpus;
-static int cpu_map_fd;
-static int rx_cnt_map_fd;
-static int redirect_err_cnt_map_fd;
-static int cpumap_enqueue_cnt_map_fd;
-static int cpumap_kthread_cnt_map_fd;
-static int cpus_available_map_fd;
-static int cpus_count_map_fd;
-static int cpus_iterator_map_fd;
-static int exception_cnt_map_fd;
+
+enum map_type {
+ CPU_MAP,
+ RX_CNT,
+ REDIRECT_ERR_CNT,
+ CPUMAP_ENQUEUE_CNT,
+ CPUMAP_KTHREAD_CNT,
+ CPUS_AVAILABLE,
+ CPUS_COUNT,
+ CPUS_ITERATOR,
+ EXCEPTION_CNT,
+};
+
+static const char *const map_type_strings[] = {
+ [CPU_MAP] = "cpu_map",
+ [RX_CNT] = "rx_cnt",
+ [REDIRECT_ERR_CNT] = "redirect_err_cnt",
+ [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
+ [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
+ [CPUS_AVAILABLE] = "cpus_available",
+ [CPUS_COUNT] = "cpus_count",
+ [CPUS_ITERATOR] = "cpus_iterator",
+ [EXCEPTION_CNT] = "exception_cnt",
+};
#define NUM_TP 5
-struct bpf_link *tp_links[NUM_TP] = { 0 };
+#define NUM_MAP 9
+struct bpf_link *tp_links[NUM_TP] = {};
+static int map_fds[NUM_MAP];
static int tp_cnt = 0;
/* Exit return codes */
@@ -73,6 +87,11 @@
{"stress-mode", no_argument, NULL, 'x' },
{"no-separators", no_argument, NULL, 'z' },
{"force", no_argument, NULL, 'F' },
+ {"mprog-disable", no_argument, NULL, 'n' },
+ {"mprog-name", required_argument, NULL, 'e' },
+ {"mprog-filename", required_argument, NULL, 'f' },
+ {"redirect-device", required_argument, NULL, 'r' },
+ {"redirect-map", required_argument, NULL, 'm' },
{0, 0, NULL, 0 }
};
@@ -109,7 +128,7 @@
bpf_object__for_each_program(pos, obj) {
if (bpf_program__is_xdp(pos))
- printf(" %s\n", bpf_program__title(pos, false));
+ printf(" %s\n", bpf_program__section_name(pos));
}
}
@@ -159,6 +178,9 @@
__u64 processed;
__u64 dropped;
__u64 issue;
+ __u64 xdp_pass;
+ __u64 xdp_drop;
+ __u64 xdp_redirect;
};
struct record {
__u64 timestamp;
@@ -178,6 +200,9 @@
/* For percpu maps, userspace gets a value per possible CPU */
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec values[nr_cpus];
+ __u64 sum_xdp_redirect = 0;
+ __u64 sum_xdp_pass = 0;
+ __u64 sum_xdp_drop = 0;
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_issue = 0;
@@ -199,10 +224,19 @@
sum_dropped += values[i].dropped;
rec->cpu[i].issue = values[i].issue;
sum_issue += values[i].issue;
+ rec->cpu[i].xdp_pass = values[i].xdp_pass;
+ sum_xdp_pass += values[i].xdp_pass;
+ rec->cpu[i].xdp_drop = values[i].xdp_drop;
+ sum_xdp_drop += values[i].xdp_drop;
+ rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
+ sum_xdp_redirect += values[i].xdp_redirect;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.issue = sum_issue;
+ rec->total.xdp_pass = sum_xdp_pass;
+ rec->total.xdp_drop = sum_xdp_drop;
+ rec->total.xdp_redirect = sum_xdp_redirect;
return true;
}
@@ -303,17 +337,33 @@
return pps;
}
+static void calc_xdp_pps(struct datarec *r, struct datarec *p,
+ double *xdp_pass, double *xdp_drop,
+ double *xdp_redirect, double period_)
+{
+ *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+ if (period_ > 0) {
+ *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+ *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+ *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+ }
+}
+
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
- char *prog_name)
+ char *prog_name, char *mprog_name, int mprog_fd)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
double pps = 0, drop = 0, err = 0;
+ bool mprog_enabled = false;
struct record *rec, *prev;
int to_cpu;
double t;
int i;
+ if (mprog_fd > 0)
+ mprog_enabled = true;
+
/* Header */
printf("Running XDP/eBPF prog_name:%s\n", prog_name);
printf("%-15s %-7s %-14s %-11s %-9s\n",
@@ -458,6 +508,34 @@
printf(fm2_err, "xdp_exception", "total", pps, drop);
}
+ /* CPUMAP attached XDP program that runs on remote/destination CPU */
+ if (mprog_enabled) {
+ char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
+ char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
+ double xdp_pass, xdp_drop, xdp_redirect;
+
+ printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
+ printf("%-15s %-7s %-14s %-11s %-9s\n",
+ "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
+ printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
+ xdp_redirect);
+ }
+ calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
+ }
+
printf("\n");
fflush(stdout);
}
@@ -466,20 +544,20 @@
{
int fd, i;
- fd = rx_cnt_map_fd;
+ fd = map_fds[RX_CNT];
map_collect_percpu(fd, 0, &rec->rx_cnt);
- fd = redirect_err_cnt_map_fd;
+ fd = map_fds[REDIRECT_ERR_CNT];
map_collect_percpu(fd, 1, &rec->redir_err);
- fd = cpumap_enqueue_cnt_map_fd;
+ fd = map_fds[CPUMAP_ENQUEUE_CNT];
for (i = 0; i < n_cpus; i++)
map_collect_percpu(fd, i, &rec->enq[i]);
- fd = cpumap_kthread_cnt_map_fd;
+ fd = map_fds[CPUMAP_KTHREAD_CNT];
map_collect_percpu(fd, 0, &rec->kthread);
- fd = exception_cnt_map_fd;
+ fd = map_fds[EXCEPTION_CNT];
map_collect_percpu(fd, 0, &rec->exception);
}
@@ -494,7 +572,7 @@
*b = tmp;
}
-static int create_cpu_entry(__u32 cpu, __u32 queue_size,
+static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
__u32 avail_idx, bool new)
{
__u32 curr_cpus_count = 0;
@@ -504,7 +582,7 @@
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
- ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
+ ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0);
if (ret) {
fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
exit(EXIT_FAIL_BPF);
@@ -513,21 +591,21 @@
/* Inform bpf_prog's that a new CPU is available to select
* from via some control maps.
*/
- ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
+ ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0);
if (ret) {
fprintf(stderr, "Add to avail CPUs failed\n");
exit(EXIT_FAIL_BPF);
}
/* When not replacing/updating existing entry, bump the count */
- ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
+ ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count);
if (ret) {
fprintf(stderr, "Failed reading curr cpus_count\n");
exit(EXIT_FAIL_BPF);
}
if (new) {
curr_cpus_count++;
- ret = bpf_map_update_elem(cpus_count_map_fd, &key,
+ ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key,
&curr_cpus_count, 0);
if (ret) {
fprintf(stderr, "Failed write curr cpus_count\n");
@@ -535,9 +613,9 @@
}
}
/* map_fd[7] = cpus_iterator */
- printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
+ printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
new ? "Add-new":"Replace", cpu, avail_idx,
- queue_size, curr_cpus_count);
+ value->qsize, value->bpf_prog.fd, curr_cpus_count);
return 0;
}
@@ -551,7 +629,7 @@
int ret, i;
for (i = 0; i < n_cpus; i++) {
- ret = bpf_map_update_elem(cpus_available_map_fd, &i,
+ ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i,
&invalid_cpu, 0);
if (ret) {
fprintf(stderr, "Failed marking CPU unavailable\n");
@@ -561,21 +639,26 @@
}
/* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(void)
+static void stress_cpumap(struct bpf_cpumap_val *value)
{
/* Changing qsize will cause kernel to free and alloc a new
* bpf_cpu_map_entry, with an associated/complicated tear-down
* procedure.
*/
- create_cpu_entry(1, 1024, 0, false);
- create_cpu_entry(1, 8, 0, false);
- create_cpu_entry(1, 16000, 0, false);
+ value->qsize = 1024;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 8;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 16000;
+ create_cpu_entry(1, value, 0, false);
}
static void stats_poll(int interval, bool use_separators, char *prog_name,
+ char *mprog_name, struct bpf_cpumap_val *value,
bool stress_mode)
{
struct stats_record *record, *prev;
+ int mprog_fd;
record = alloc_stats_record();
prev = alloc_stats_record();
@@ -587,103 +670,125 @@
while (1) {
swap(&prev, &record);
+ mprog_fd = value->bpf_prog.fd;
stats_collect(record);
- stats_print(record, prev, prog_name);
+ stats_print(record, prev, prog_name, mprog_name, mprog_fd);
sleep(interval);
if (stress_mode)
- stress_cpumap();
+ stress_cpumap(value);
}
free_stats_record(record);
free_stats_record(prev);
}
-static struct bpf_link * attach_tp(struct bpf_object *obj,
- const char *tp_category,
- const char* tp_name)
+static int init_tracepoints(struct bpf_object *obj)
{
struct bpf_program *prog;
- struct bpf_link *link;
- char sec_name[PATH_MAX];
- int len;
- len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
- tp_category, tp_name);
- if (len < 0)
- exit(EXIT_FAIL);
+ bpf_object__for_each_program(prog, obj) {
+ if (bpf_program__is_tracepoint(prog) != true)
+ continue;
- prog = bpf_object__find_program_by_title(obj, sec_name);
- if (!prog) {
- fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
- exit(EXIT_FAIL_BPF);
+ tp_links[tp_cnt] = bpf_program__attach(prog);
+ if (libbpf_get_error(tp_links[tp_cnt])) {
+ tp_links[tp_cnt] = NULL;
+ return -EINVAL;
+ }
+ tp_cnt++;
}
- link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
- if (IS_ERR(link))
- exit(EXIT_FAIL_BPF);
-
- return link;
-}
-
-static void init_tracepoints(struct bpf_object *obj) {
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
-}
-
-static int init_map_fds(struct bpf_object *obj)
-{
- /* Maps updated by tracepoints */
- redirect_err_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
- exception_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "exception_cnt");
- cpumap_enqueue_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
- cpumap_kthread_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
-
- /* Maps used by XDP */
- rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
- cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
- cpus_available_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpus_available");
- cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
- cpus_iterator_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
-
- if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
- redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
- cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
- cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
- exception_cnt_map_fd < 0)
- return -ENOENT;
-
return 0;
}
+static int init_map_fds(struct bpf_object *obj)
+{
+ enum map_type type;
+
+ for (type = 0; type < NUM_MAP; type++) {
+ map_fds[type] =
+ bpf_object__find_map_fd_by_name(obj,
+ map_type_strings[type]);
+
+ if (map_fds[type] < 0)
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int load_cpumap_prog(char *file_name, char *prog_name,
+ char *redir_interface, char *redir_map)
+{
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .expected_attach_type = BPF_XDP_CPUMAP,
+ .file = file_name,
+ };
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int fd;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
+ return -1;
+
+ if (fd < 0) {
+ fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+ strerror(errno));
+ return fd;
+ }
+
+ if (redir_interface && redir_map) {
+ int err, map_fd, ifindex_out, key = 0;
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
+ if (map_fd < 0)
+ return map_fd;
+
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ return -1;
+
+ err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
+ if (err < 0)
+ return err;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (!prog) {
+ fprintf(stderr, "bpf_object__find_program_by_title failed\n");
+ return EXIT_FAIL;
+ }
+
+ return bpf_program__fd(prog);
+}
+
int main(int argc, char **argv)
{
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
+ char *mprog_filename = "xdp_redirect_kern.o";
+ char *redir_interface = NULL, *redir_map = NULL;
+ char *mprog_name = "xdp_redirect_dummy";
+ bool mprog_disable = false;
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_UNSPEC,
};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
+ struct bpf_cpumap_val value;
bool use_separators = true;
bool stress_mode = false;
struct bpf_program *prog;
struct bpf_object *obj;
+ int err = EXIT_FAIL;
char filename[256];
int added_cpus = 0;
int longindex = 0;
int interval = 2;
int add_cpu = -1;
- int opt, err;
- int prog_fd;
+ int opt, prog_fd;
+ int *cpu, i;
__u32 qsize;
n_cpus = get_nprocs_conf();
@@ -705,22 +810,34 @@
}
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return EXIT_FAIL;
+ return err;
if (prog_fd < 0) {
fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
strerror(errno));
- return EXIT_FAIL;
+ return err;
}
- init_tracepoints(obj);
+
+ if (init_tracepoints(obj) < 0) {
+ fprintf(stderr, "ERR: bpf_program__attach failed\n");
+ return err;
+ }
+
if (init_map_fds(obj) < 0) {
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
- return EXIT_FAIL;
+ return err;
}
mark_cpus_unavailable();
+ cpu = malloc(n_cpus * sizeof(int));
+ if (!cpu) {
+ fprintf(stderr, "failed to allocate cpu array\n");
+ return err;
+ }
+ memset(cpu, 0, n_cpus * sizeof(int));
+
/* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
+ while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
@@ -754,6 +871,21 @@
/* Selecting eBPF prog to load */
prog_name = optarg;
break;
+ case 'n':
+ mprog_disable = true;
+ break;
+ case 'f':
+ mprog_filename = optarg;
+ break;
+ case 'e':
+ mprog_name = optarg;
+ break;
+ case 'r':
+ redir_interface = optarg;
+ break;
+ case 'm':
+ redir_map = optarg;
+ break;
case 'c':
/* Add multiple CPUs */
add_cpu = strtoul(optarg, NULL, 0);
@@ -763,8 +895,7 @@
errno, strerror(errno));
goto error;
}
- create_cpu_entry(add_cpu, qsize, added_cpus, true);
- added_cpus++;
+ cpu[added_cpus++] = add_cpu;
break;
case 'q':
qsize = atoi(optarg);
@@ -775,24 +906,44 @@
case 'h':
error:
default:
+ free(cpu);
usage(argv, obj);
return EXIT_FAIL_OPTION;
}
}
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ err = EXIT_FAIL_OPTION;
+ goto out;
}
/* Required option */
if (add_cpu == -1) {
fprintf(stderr, "ERR: required option --cpu missing\n");
fprintf(stderr, " Specify multiple --cpu option to add more\n");
usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ err = EXIT_FAIL_OPTION;
+ goto out;
}
+ value.bpf_prog.fd = 0;
+ if (!mprog_disable)
+ value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
+ redir_interface, redir_map);
+ if (value.bpf_prog.fd < 0) {
+ err = value.bpf_prog.fd;
+ goto out;
+ }
+ value.qsize = qsize;
+
+ for (i = 0; i < added_cpus; i++)
+ create_cpu_entry(cpu[i], &value, i, true);
+
/* Remove XDP program when program is interrupted or killed */
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
@@ -800,27 +951,33 @@
prog = bpf_object__find_program_by_title(obj, prog_name);
if (!prog) {
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
- return EXIT_FAIL;
+ goto out;
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
fprintf(stderr, "bpf_program__fd failed\n");
- return EXIT_FAIL;
+ goto out;
}
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
- return EXIT_FAIL_XDP;
+ err = EXIT_FAIL_XDP;
+ goto out;
}
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
- return err;
+ goto out;
}
prog_id = info.id;
- stats_poll(interval, use_separators, prog_name, stress_mode);
- return EXIT_OK;
+ stats_poll(interval, use_separators, prog_name, mprog_name,
+ &value, stress_mode);
+
+ err = EXIT_OK;
+out:
+ free(cpu);
+ return err;
}
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
index 1f0b7d0..d26ec3a 100644
--- a/samples/bpf/xdp_redirect_kern.c
+++ b/samples/bpf/xdp_redirect_kern.c
@@ -17,7 +17,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
index 4631b48..6489352 100644
--- a/samples/bpf/xdp_redirect_map_kern.c
+++ b/samples/bpf/xdp_redirect_map_kern.c
@@ -17,7 +17,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index f70ee33..35e16de 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -17,7 +17,7 @@
#include "bpf_util.h"
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
static int ifindex_in;
static int ifindex_out;
@@ -116,7 +116,7 @@
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -127,6 +127,9 @@
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index b7bc2a3..3c92adc 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -17,7 +17,7 @@
#include "bpf_util.h"
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
static int ifindex_in;
static int ifindex_out;
@@ -117,7 +117,7 @@
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -128,7 +128,10 @@
}
}
- if (optind == argc) {
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+ if (optind + 2 != argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
}
diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c
index bf11efc..b37ca2b 100644
--- a/samples/bpf/xdp_router_ipv4_kern.c
+++ b/samples/bpf/xdp_router_ipv4_kern.c
@@ -12,7 +12,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include <linux/slab.h>
#include <net/ip_fib.h>
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 1469b66..c2da1b5 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -21,7 +21,7 @@
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include "bpf_util.h"
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include <sys/resource.h>
#include <libgen.h>
@@ -662,6 +662,9 @@
}
}
+ if (!(flags & XDP_FLAGS_SKB_MODE))
+ flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == ac) {
usage(basename(argv[0]));
return 1;
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
index 272d0f8..5e7459f 100644
--- a/samples/bpf/xdp_rxq_info_kern.c
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -6,7 +6,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/in.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* Config setup from with userspace
*
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index 21d6e50..93fa1bc 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -22,8 +22,8 @@
#include <arpa/inet.h>
#include <linux/if_link.h>
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "bpf_util.h"
static int ifindex = -1;
@@ -51,8 +51,8 @@
{"sec", required_argument, NULL, 's' },
{"no-separators", no_argument, NULL, 'z' },
{"action", required_argument, NULL, 'a' },
- {"readmem", no_argument, NULL, 'r' },
- {"swapmac", no_argument, NULL, 'm' },
+ {"readmem", no_argument, NULL, 'r' },
+ {"swapmac", no_argument, NULL, 'm' },
{"force", no_argument, NULL, 'F' },
{0, 0, NULL, 0 }
};
@@ -450,7 +450,7 @@
int main(int argc, char **argv)
{
__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -492,7 +492,7 @@
map_fd = bpf_map__fd(map);
if (!prog_fd) {
- fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
+ fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", strerror(errno));
return EXIT_FAIL;
}
@@ -544,6 +544,10 @@
return EXIT_FAIL_OPTION;
}
}
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
index 6c7c7e0..9cf76b3 100644
--- a/samples/bpf/xdp_sample_pkts_kern.c
+++ b/samples/bpf/xdp_sample_pkts_kern.c
@@ -2,17 +2,15 @@
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define SAMPLE_SIZE 64ul
-#define MAX_CPUS 128
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = MAX_CPUS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+} my_map SEC(".maps");
SEC("xdp_sample")
int xdp_sample_prog(struct xdp_md *ctx)
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index 3002714..4b2a300 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -10,7 +10,7 @@
#include <sys/sysinfo.h>
#include <sys/ioctl.h>
#include <signal.h>
-#include <libbpf.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include <sys/resource.h>
#include <libgen.h>
@@ -18,7 +18,6 @@
#include "perf-sys.h"
-#define MAX_CPUS 128
static int if_idx;
static char *if_name;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -52,13 +51,13 @@
__u32 curr_prog_id = 0;
int err = 0;
- err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0);
+ err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
if (err) {
printf("bpf_get_link_xdp_id failed\n");
return err;
}
if (prog_id == curr_prog_id) {
- err = bpf_set_link_xdp_fd(idx, -1, 0);
+ err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
if (err < 0)
printf("ERROR: failed to detach prog from %s\n", name);
} else if (!curr_prog_id) {
@@ -115,7 +114,7 @@
.prog_type = BPF_PROG_TYPE_XDP,
};
struct perf_buffer_opts pb_opts = {};
- const char *optstr = "F";
+ const char *optstr = "FS";
int prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
@@ -127,12 +126,18 @@
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
default:
usage(basename(argv[0]));
return 1;
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
@@ -150,7 +155,7 @@
return 1;
if (!prog_fd) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
index 6db450a..575d57e 100644
--- a/samples/bpf/xdp_tx_iptunnel_kern.c
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -16,7 +16,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include "xdp_tx_iptunnel_common.h"
struct {
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index dfb6858..a419bee 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -15,7 +15,7 @@
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include "bpf_util.h"
#include "xdp_tx_iptunnel_common.h"
@@ -231,7 +231,7 @@
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -243,6 +243,9 @@
opt_flags[opt] = 0;
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
@@ -268,7 +271,7 @@
return 1;
if (!prog_fd) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h
new file mode 100644
index 0000000..b7eca15
--- /dev/null
+++ b/samples/bpf/xdpsock.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright(c) 2019 Intel Corporation.
+ */
+
+#ifndef XDPSOCK_H_
+#define XDPSOCK_H_
+
+#define MAX_SOCKS 4
+
+#endif /* XDPSOCK_H */
diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c
new file mode 100644
index 0000000..0543048
--- /dev/null
+++ b/samples/bpf/xdpsock_kern.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "xdpsock.h"
+
+/* This XDP program is only needed for the XDP_SHARED_UMEM mode.
+ * If you do not use this mode, libbpf can supply an XDP program for you.
+ */
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, MAX_SOCKS);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} xsks_map SEC(".maps");
+
+static unsigned int rr;
+
+SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+{
+ rr = (rr + 1) & (MAX_SOCKS - 1);
+
+ return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
+}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 3b604c1..2e4508a 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -10,6 +10,10 @@
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+#include <arpa/inet.h>
#include <locale.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -27,9 +31,10 @@
#include <time.h>
#include <unistd.h>
-#include "libbpf.h"
-#include "xsk.h"
+#include <bpf/libbpf.h>
+#include <bpf/xsk.h>
#include <bpf/bpf.h>
+#include "xdpsock.h"
#ifndef SOL_XDP
#define SOL_XDP 283
@@ -44,13 +49,14 @@
#endif
#define NUM_FRAMES (4 * 1024)
-#define BATCH_SIZE 64
+#define MIN_PKT_SIZE 64
#define DEBUG_HEXDUMP 0
-#define MAX_SOCKS 8
typedef __u64 u64;
typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
static unsigned long prev_time;
@@ -65,17 +71,67 @@
static const char *opt_if = "";
static int opt_ifindex;
static int opt_queue;
+static unsigned long opt_duration;
+static unsigned long start_time;
+static bool benchmark_done;
+static u32 opt_batch_size = 64;
+static int opt_pkt_count;
+static u16 opt_pkt_size = MIN_PKT_SIZE;
+static u32 opt_pkt_fill_pattern = 0x12345678;
+static bool opt_extra_stats;
+static bool opt_quiet;
+static bool opt_app_stats;
+static const char *opt_irq_str = "";
+static u32 irq_no;
+static int irqs_at_init = -1;
static int opt_poll;
static int opt_interval = 1;
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
static u32 opt_umem_flags;
static int opt_unaligned_chunks;
static int opt_mmap_flags;
-static u32 opt_xdp_bind_flags;
static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static int opt_timeout = 1000;
static bool opt_need_wakeup = true;
-static __u32 prog_id;
+static u32 opt_num_xsks = 1;
+static u32 prog_id;
+
+struct xsk_ring_stats {
+ unsigned long rx_npkts;
+ unsigned long tx_npkts;
+ unsigned long rx_dropped_npkts;
+ unsigned long rx_invalid_npkts;
+ unsigned long tx_invalid_npkts;
+ unsigned long rx_full_npkts;
+ unsigned long rx_fill_empty_npkts;
+ unsigned long tx_empty_npkts;
+ unsigned long prev_rx_npkts;
+ unsigned long prev_tx_npkts;
+ unsigned long prev_rx_dropped_npkts;
+ unsigned long prev_rx_invalid_npkts;
+ unsigned long prev_tx_invalid_npkts;
+ unsigned long prev_rx_full_npkts;
+ unsigned long prev_rx_fill_empty_npkts;
+ unsigned long prev_tx_empty_npkts;
+};
+
+struct xsk_driver_stats {
+ unsigned long intrs;
+ unsigned long prev_intrs;
+};
+
+struct xsk_app_stats {
+ unsigned long rx_empty_polls;
+ unsigned long fill_fail_polls;
+ unsigned long copy_tx_sendtos;
+ unsigned long tx_wakeup_sendtos;
+ unsigned long opt_polls;
+ unsigned long prev_rx_empty_polls;
+ unsigned long prev_fill_fail_polls;
+ unsigned long prev_copy_tx_sendtos;
+ unsigned long prev_tx_wakeup_sendtos;
+ unsigned long prev_opt_polls;
+};
struct xsk_umem_info {
struct xsk_ring_prod fq;
@@ -89,10 +145,9 @@
struct xsk_ring_prod tx;
struct xsk_umem_info *umem;
struct xsk_socket *xsk;
- unsigned long rx_npkts;
- unsigned long tx_npkts;
- unsigned long prev_rx_npkts;
- unsigned long prev_tx_npkts;
+ struct xsk_ring_stats ring_stats;
+ struct xsk_app_stats app_stats;
+ struct xsk_driver_stats drv_stats;
u32 outstanding_tx;
};
@@ -135,6 +190,163 @@
}
}
+static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk)
+{
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err)
+ return err;
+
+ if (optlen == sizeof(struct xdp_statistics)) {
+ xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped;
+ xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs;
+ xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs;
+ xsk->ring_stats.rx_full_npkts = stats.rx_ring_full;
+ xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs;
+ xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void dump_app_stats(long dt)
+{
+ int i;
+
+ for (i = 0; i < num_socks && xsks[i]; i++) {
+ char *fmt = "%-18s %'-14.0f %'-14lu\n";
+ double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps,
+ tx_wakeup_sendtos_ps, opt_polls_ps;
+
+ rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls -
+ xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt;
+ fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls -
+ xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt;
+ copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos -
+ xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt;
+ tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos -
+ xsks[i]->app_stats.prev_tx_wakeup_sendtos)
+ * 1000000000. / dt;
+ opt_polls_ps = (xsks[i]->app_stats.opt_polls -
+ xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt;
+
+ printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count");
+ printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls);
+ printf(fmt, "fill fail polls", fill_fail_polls_ps,
+ xsks[i]->app_stats.fill_fail_polls);
+ printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps,
+ xsks[i]->app_stats.copy_tx_sendtos);
+ printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps,
+ xsks[i]->app_stats.tx_wakeup_sendtos);
+ printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls);
+
+ xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls;
+ xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls;
+ xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos;
+ xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
+ xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
+ }
+}
+
+static bool get_interrupt_number(void)
+{
+ FILE *f_int_proc;
+ char line[4096];
+ bool found = false;
+
+ f_int_proc = fopen("/proc/interrupts", "r");
+ if (f_int_proc == NULL) {
+ printf("Failed to open /proc/interrupts.\n");
+ return found;
+ }
+
+ while (!feof(f_int_proc) && !found) {
+ /* Make sure to read a full line at a time */
+ if (fgets(line, sizeof(line), f_int_proc) == NULL ||
+ line[strlen(line) - 1] != '\n') {
+ printf("Error reading from interrupts file\n");
+ break;
+ }
+
+ /* Extract interrupt number from line */
+ if (strstr(line, opt_irq_str) != NULL) {
+ irq_no = atoi(line);
+ found = true;
+ break;
+ }
+ }
+
+ fclose(f_int_proc);
+
+ return found;
+}
+
+static int get_irqs(void)
+{
+ char count_path[PATH_MAX];
+ int total_intrs = -1;
+ FILE *f_count_proc;
+ char line[4096];
+
+ snprintf(count_path, sizeof(count_path),
+ "/sys/kernel/irq/%i/per_cpu_count", irq_no);
+ f_count_proc = fopen(count_path, "r");
+ if (f_count_proc == NULL) {
+ printf("Failed to open %s\n", count_path);
+ return total_intrs;
+ }
+
+ if (fgets(line, sizeof(line), f_count_proc) == NULL ||
+ line[strlen(line) - 1] != '\n') {
+ printf("Error reading from %s\n", count_path);
+ } else {
+ static const char com[2] = ",";
+ char *token;
+
+ total_intrs = 0;
+ token = strtok(line, com);
+ while (token != NULL) {
+ /* sum up interrupts across all cores */
+ total_intrs += atoi(token);
+ token = strtok(NULL, com);
+ }
+ }
+
+ fclose(f_count_proc);
+
+ return total_intrs;
+}
+
+static void dump_driver_stats(long dt)
+{
+ int i;
+
+ for (i = 0; i < num_socks && xsks[i]; i++) {
+ char *fmt = "%-18s %'-14.0f %'-14lu\n";
+ double intrs_ps;
+ int n_ints = get_irqs();
+
+ if (n_ints < 0) {
+ printf("error getting intr info for intr %i\n", irq_no);
+ return;
+ }
+ xsks[i]->drv_stats.intrs = n_ints - irqs_at_init;
+
+ intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) *
+ 1000000000. / dt;
+
+ printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count");
+ printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs);
+
+ xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs;
+ }
+}
+
static void dump_stats(void)
{
unsigned long now = get_nsecs();
@@ -144,32 +356,100 @@
prev_time = now;
for (i = 0; i < num_socks && xsks[i]; i++) {
- char *fmt = "%-15s %'-11.0f %'-11lu\n";
- double rx_pps, tx_pps;
+ char *fmt = "%-18s %'-14.0f %'-14lu\n";
+ double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps,
+ tx_invalid_pps, tx_empty_pps;
- rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
+ rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) *
1000000000. / dt;
- tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
+ tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) *
1000000000. / dt;
printf("\n sock%d@", i);
print_benchmark(false);
printf("\n");
- printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
+ printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts",
dt / 1000000000.);
- printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
- printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
+ printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts);
+ printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts);
- xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
- xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
+ xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts;
+ xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts;
+
+ if (opt_extra_stats) {
+ if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) {
+ dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts -
+ xsks[i]->ring_stats.prev_rx_dropped_npkts) *
+ 1000000000. / dt;
+ rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts -
+ xsks[i]->ring_stats.prev_rx_invalid_npkts) *
+ 1000000000. / dt;
+ tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts -
+ xsks[i]->ring_stats.prev_tx_invalid_npkts) *
+ 1000000000. / dt;
+ full_pps = (xsks[i]->ring_stats.rx_full_npkts -
+ xsks[i]->ring_stats.prev_rx_full_npkts) *
+ 1000000000. / dt;
+ fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts -
+ xsks[i]->ring_stats.prev_rx_fill_empty_npkts) *
+ 1000000000. / dt;
+ tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts -
+ xsks[i]->ring_stats.prev_tx_empty_npkts) *
+ 1000000000. / dt;
+
+ printf(fmt, "rx dropped", dropped_pps,
+ xsks[i]->ring_stats.rx_dropped_npkts);
+ printf(fmt, "rx invalid", rx_invalid_pps,
+ xsks[i]->ring_stats.rx_invalid_npkts);
+ printf(fmt, "tx invalid", tx_invalid_pps,
+ xsks[i]->ring_stats.tx_invalid_npkts);
+ printf(fmt, "rx queue full", full_pps,
+ xsks[i]->ring_stats.rx_full_npkts);
+ printf(fmt, "fill ring empty", fill_empty_pps,
+ xsks[i]->ring_stats.rx_fill_empty_npkts);
+ printf(fmt, "tx ring empty", tx_empty_pps,
+ xsks[i]->ring_stats.tx_empty_npkts);
+
+ xsks[i]->ring_stats.prev_rx_dropped_npkts =
+ xsks[i]->ring_stats.rx_dropped_npkts;
+ xsks[i]->ring_stats.prev_rx_invalid_npkts =
+ xsks[i]->ring_stats.rx_invalid_npkts;
+ xsks[i]->ring_stats.prev_tx_invalid_npkts =
+ xsks[i]->ring_stats.tx_invalid_npkts;
+ xsks[i]->ring_stats.prev_rx_full_npkts =
+ xsks[i]->ring_stats.rx_full_npkts;
+ xsks[i]->ring_stats.prev_rx_fill_empty_npkts =
+ xsks[i]->ring_stats.rx_fill_empty_npkts;
+ xsks[i]->ring_stats.prev_tx_empty_npkts =
+ xsks[i]->ring_stats.tx_empty_npkts;
+ } else {
+ printf("%-15s\n", "Error retrieving extra stats");
+ }
+ }
}
+
+ if (opt_app_stats)
+ dump_app_stats(dt);
+ if (irq_no)
+ dump_driver_stats(dt);
+}
+
+static bool is_benchmark_done(void)
+{
+ if (opt_duration > 0) {
+ unsigned long dt = (get_nsecs() - start_time);
+
+ if (dt >= opt_duration)
+ benchmark_done = true;
+ }
+ return benchmark_done;
}
static void *poller(void *arg)
{
(void)arg;
- for (;;) {
+ while (!is_benchmark_done()) {
sleep(opt_interval);
dump_stats();
}
@@ -179,7 +459,7 @@
static void remove_xdp_program(void)
{
- __u32 curr_prog_id = 0;
+ u32 curr_prog_id = 0;
if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
@@ -195,16 +475,19 @@
static void int_exit(int sig)
{
- struct xsk_umem *umem = xsks[0]->umem->umem;
+ benchmark_done = true;
+}
- (void)sig;
+static void xdpsock_cleanup(void)
+{
+ struct xsk_umem *umem = xsks[0]->umem->umem;
+ int i;
dump_stats();
- xsk_socket__delete(xsks[0]->xsk);
+ for (i = 0; i < num_socks; i++)
+ xsk_socket__delete(xsks[i]->xsk);
(void)xsk_umem__delete(umem);
remove_xdp_program();
-
- exit(EXIT_SUCCESS);
}
static void __exit_with_error(int error, const char *file, const char *func,
@@ -219,13 +502,6 @@
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
__LINE__)
-
-static const char pkt_data[] =
- "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
- "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
- "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
- "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
-
static void swap_mac_addresses(void *data)
{
struct ether_header *eth = (struct ether_header *)data;
@@ -273,24 +549,264 @@
printf("\n");
}
-static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
+static void *memset32_htonl(void *dest, u32 val, u32 size)
+{
+ u32 *ptr = (u32 *)dest;
+ int i;
+
+ val = htonl(val);
+
+ for (i = 0; i < (size & (~0x3)); i += 4)
+ ptr[i >> 2] = val;
+
+ for (; i < size; i++)
+ ((char *)dest)[i] = ((char *)&val)[i & 3];
+
+ return dest;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline unsigned short from32to16(unsigned int x)
+{
+ /* add up 16-bit and 16-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+ unsigned int result = 0;
+ int odd;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long)buff;
+ if (odd) {
+#ifdef __LITTLE_ENDIAN
+ result += (*buff << 8);
+#else
+ result = *buff;
+#endif
+ len--;
+ buff++;
+ }
+ if (len >= 2) {
+ if (2 & (unsigned long)buff) {
+ result += *(unsigned short *)buff;
+ len -= 2;
+ buff += 2;
+ }
+ if (len >= 4) {
+ const unsigned char *end = buff +
+ ((unsigned int)len & ~3);
+ unsigned int carry = 0;
+
+ do {
+ unsigned int w = *(unsigned int *)buff;
+
+ buff += 4;
+ result += carry;
+ result += w;
+ carry = (w > result);
+ } while (buff < end);
+ result += carry;
+ result = (result & 0xffff) + (result >> 16);
+ }
+ if (len & 2) {
+ result += *(unsigned short *)buff;
+ buff += 2;
+ }
+ }
+ if (len & 1)
+#ifdef __LITTLE_ENDIAN
+ result += *buff;
+#else
+ result += (*buff << 8);
+#endif
+ result = from32to16(result);
+ if (odd)
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+ return result;
+}
+
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ return (__force __sum16)~do_csum(iph, ihl * 4);
+}
+
+/*
+ * Fold a partial checksum
+ * This function code has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum);
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __wsum)from64to32(s);
+}
+
+/*
+ * This function has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
+ u8 proto, u16 *udp_pkt)
+{
+ u32 csum = 0;
+ u32 cnt = 0;
+
+ /* udp hdr and data */
+ for (; cnt < len; cnt += 2)
+ csum += udp_pkt[cnt >> 1];
+
+ return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+}
+
+#define ETH_FCS_SIZE 4
+
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+ sizeof(struct udphdr))
+
+#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
+#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+
+static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
+
+static void gen_eth_hdr_data(void)
+{
+ struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
+ sizeof(struct ethhdr) +
+ sizeof(struct iphdr));
+ struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
+ sizeof(struct ethhdr));
+ struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+
+ /* ethernet header */
+ memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
+ memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_IP);
+
+ /* IP header */
+ ip_hdr->version = IPVERSION;
+ ip_hdr->ihl = 0x5; /* 20 byte header */
+ ip_hdr->tos = 0x0;
+ ip_hdr->tot_len = htons(IP_PKT_SIZE);
+ ip_hdr->id = 0;
+ ip_hdr->frag_off = 0;
+ ip_hdr->ttl = IPDEFTTL;
+ ip_hdr->protocol = IPPROTO_UDP;
+ ip_hdr->saddr = htonl(0x0a0a0a10);
+ ip_hdr->daddr = htonl(0x0a0a0a20);
+
+ /* IP header checksum */
+ ip_hdr->check = 0;
+ ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
+
+ /* UDP header */
+ udp_hdr->source = htons(0x1000);
+ udp_hdr->dest = htons(0x1000);
+ udp_hdr->len = htons(UDP_PKT_SIZE);
+
+ /* UDP data */
+ memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
+ UDP_PKT_DATA_SIZE);
+
+ /* UDP header checksum */
+ udp_hdr->check = 0;
+ udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
+ IPPROTO_UDP, (u16 *)udp_hdr);
+}
+
+static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
{
memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
- sizeof(pkt_data) - 1);
- return sizeof(pkt_data) - 1;
+ PKT_SIZE);
}
static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
{
struct xsk_umem_info *umem;
struct xsk_umem_config cfg = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ /* We recommend that you set the fill ring size >= HW RX ring size +
+ * AF_XDP RX ring size. Make sure you fill up the fill ring
+ * with buffers at regular intervals, and you will with this setting
+ * avoid allocation failures in the driver. These are usually quite
+ * expensive since drivers have not been written to assume that
+ * allocation failures are common. For regular sockets, kernel
+ * allocated memory is used that only runs out in OOM situations
+ * that should be rare.
+ */
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = opt_xsk_frame_size,
.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
.flags = opt_umem_flags
};
-
int ret;
umem = calloc(1, sizeof(*umem));
@@ -299,7 +815,6 @@
ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
&cfg);
-
if (ret)
exit_with_error(-ret);
@@ -307,13 +822,29 @@
return umem;
}
-static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
+{
+ int ret, i;
+ u32 idx;
+
+ ret = xsk_ring_prod__reserve(&umem->fq,
+ XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx);
+ if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2)
+ exit_with_error(-ret);
+ for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx++) =
+ i * opt_xsk_frame_size;
+ xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2);
+}
+
+static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
+ bool rx, bool tx)
{
struct xsk_socket_config cfg;
struct xsk_socket_info *xsk;
+ struct xsk_ring_cons *rxr;
+ struct xsk_ring_prod *txr;
int ret;
- u32 idx;
- int i;
xsk = calloc(1, sizeof(*xsk));
if (!xsk)
@@ -322,11 +853,17 @@
xsk->umem = umem;
cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg.libbpf_flags = 0;
+ if (opt_num_xsks > 1)
+ cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
+ else
+ cfg.libbpf_flags = 0;
cfg.xdp_flags = opt_xdp_flags;
cfg.bind_flags = opt_xdp_bind_flags;
+
+ rxr = rx ? &xsk->rx : NULL;
+ txr = tx ? &xsk->tx : NULL;
ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
- &xsk->rx, &xsk->tx, &cfg);
+ rxr, txr, &cfg);
if (ret)
exit_with_error(-ret);
@@ -334,16 +871,16 @@
if (ret)
exit_with_error(-ret);
- ret = xsk_ring_prod__reserve(&xsk->umem->fq,
- XSK_RING_PROD__DEFAULT_NUM_DESCS,
- &idx);
- if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
- exit_with_error(-ret);
- for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) =
- i * opt_xsk_frame_size;
- xsk_ring_prod__submit(&xsk->umem->fq,
- XSK_RING_PROD__DEFAULT_NUM_DESCS);
+ xsk->app_stats.rx_empty_polls = 0;
+ xsk->app_stats.fill_fail_polls = 0;
+ xsk->app_stats.copy_tx_sendtos = 0;
+ xsk->app_stats.tx_wakeup_sendtos = 0;
+ xsk->app_stats.opt_polls = 0;
+ xsk->app_stats.prev_rx_empty_polls = 0;
+ xsk->app_stats.prev_fill_fail_polls = 0;
+ xsk->app_stats.prev_copy_tx_sendtos = 0;
+ xsk->app_stats.prev_tx_wakeup_sendtos = 0;
+ xsk->app_stats.prev_opt_polls = 0;
return xsk;
}
@@ -363,6 +900,17 @@
{"frame-size", required_argument, 0, 'f'},
{"no-need-wakeup", no_argument, 0, 'm'},
{"unaligned", no_argument, 0, 'u'},
+ {"shared-umem", no_argument, 0, 'M'},
+ {"force", no_argument, 0, 'F'},
+ {"duration", required_argument, 0, 'd'},
+ {"batch-size", required_argument, 0, 'b'},
+ {"tx-pkt-count", required_argument, 0, 'C'},
+ {"tx-pkt-size", required_argument, 0, 's'},
+ {"tx-pkt-pattern", required_argument, 0, 'P'},
+ {"extra-stats", no_argument, 0, 'x'},
+ {"quiet", no_argument, 0, 'Q'},
+ {"app-stats", no_argument, 0, 'a'},
+ {"irq-string", no_argument, 0, 'I'},
{0, 0, 0, 0}
};
@@ -378,16 +926,34 @@
" -q, --queue=n Use queue n (default 0)\n"
" -p, --poll Use poll syscall\n"
" -S, --xdp-skb=n Use XDP skb-mod\n"
- " -N, --xdp-native=n Enfore XDP native mode\n"
+ " -N, --xdp-native=n Enforce XDP native mode\n"
" -n, --interval=n Specify statistics update interval (default 1 sec).\n"
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
- " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n"
" -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
" -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n"
" -u, --unaligned Enable unaligned chunk placement\n"
+ " -M, --shared-umem Enable XDP_SHARED_UMEM\n"
+ " -F, --force Force loading the XDP prog\n"
+ " -d, --duration=n Duration in secs to run command.\n"
+ " Default: forever.\n"
+ " -b, --batch-size=n Batch size for sending or receiving\n"
+ " packets. Default: %d\n"
+ " -C, --tx-pkt-count=n Number of packets to send.\n"
+ " Default: Continuous packets.\n"
+ " -s, --tx-pkt-size=n Transmit packet size.\n"
+ " (Default: %d bytes)\n"
+ " Min size: %d, Max size %d.\n"
+ " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
+ " -x, --extra-stats Display extra statistics.\n"
+ " -Q, --quiet Do not display any stats.\n"
+ " -a, --app-stats Display application (syscall) statistics.\n"
+ " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n"
"\n";
- fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
+ fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
+ opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
+ XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
+
exit(EXIT_FAILURE);
}
@@ -398,7 +964,7 @@
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:mu",
+ c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:",
long_options, &option_index);
if (c == -1)
break;
@@ -427,7 +993,7 @@
opt_xdp_bind_flags |= XDP_COPY;
break;
case 'N':
- opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'n':
opt_interval = atoi(optarg);
@@ -448,16 +1014,64 @@
break;
case 'f':
opt_xsk_frame_size = atoi(optarg);
+ break;
case 'm':
opt_need_wakeup = false;
opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
break;
+ case 'M':
+ opt_num_xsks = MAX_SOCKS;
+ break;
+ case 'd':
+ opt_duration = atoi(optarg);
+ opt_duration *= 1000000000;
+ break;
+ case 'b':
+ opt_batch_size = atoi(optarg);
+ break;
+ case 'C':
+ opt_pkt_count = atoi(optarg);
+ break;
+ case 's':
+ opt_pkt_size = atoi(optarg);
+ if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) ||
+ opt_pkt_size < MIN_PKT_SIZE) {
+ fprintf(stderr,
+ "ERROR: Invalid frame size %d\n",
+ opt_pkt_size);
+ usage(basename(argv[0]));
+ }
+ break;
+ case 'P':
+ opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
+ break;
+ case 'x':
+ opt_extra_stats = 1;
+ break;
+ case 'Q':
+ opt_quiet = 1;
+ break;
+ case 'a':
+ opt_app_stats = 1;
+ break;
+ case 'I':
+ opt_irq_str = optarg;
+ if (get_interrupt_number())
+ irqs_at_init = get_irqs();
+ if (irqs_at_init < 0) {
+ fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str);
+ usage(basename(argv[0]));
+ }
+ break;
default:
usage(basename(argv[0]));
}
}
+ if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
+ opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+
opt_ifindex = if_nametoindex(opt_if);
if (!opt_ifindex) {
fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
@@ -478,7 +1092,8 @@
int ret;
ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
- if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
+ if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN ||
+ errno == EBUSY || errno == ENETDOWN)
return;
exit_with_error(errno);
}
@@ -494,10 +1109,17 @@
if (!xsk->outstanding_tx)
return;
- if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to
+ * really send the packets. In zero-copy mode we do not have to do this, since Tx
+ * is driven by the NAPI loop. So as an optimization, we do not have to call
+ * sendto() all the time in zero-copy mode for l2fwd.
+ */
+ if (opt_xdp_bind_flags & XDP_COPY) {
+ xsk->app_stats.copy_tx_sendtos++;
kick_tx(xsk);
+ }
- ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
+ ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
xsk->outstanding_tx;
/* re-add completed Tx buffers */
@@ -510,8 +1132,10 @@
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
- if (xsk_ring_prod__needs_wakeup(&umem->fq))
+ if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+ xsk->app_stats.fill_fail_polls++;
ret = poll(fds, num_socks, opt_timeout);
+ }
ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
}
@@ -522,11 +1146,12 @@
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
+ xsk->ring_stats.tx_npkts += rcvd;
}
}
-static inline void complete_tx_only(struct xsk_socket_info *xsk)
+static inline void complete_tx_only(struct xsk_socket_info *xsk,
+ int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -534,14 +1159,16 @@
if (!xsk->outstanding_tx)
return;
- if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ xsk->app_stats.tx_wakeup_sendtos++;
kick_tx(xsk);
+ }
- rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
+ rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
if (rcvd > 0) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
+ xsk->ring_stats.tx_npkts += rcvd;
}
}
@@ -551,10 +1178,12 @@
u32 idx_rx = 0, idx_fq = 0;
int ret;
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ xsk->app_stats.rx_empty_polls++;
ret = poll(fds, num_socks, opt_timeout);
+ }
return;
}
@@ -562,8 +1191,10 @@
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ xsk->app_stats.fill_fail_polls++;
ret = poll(fds, num_socks, opt_timeout);
+ }
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
}
@@ -581,16 +1212,14 @@
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
+ xsk->ring_stats.rx_npkts += rcvd;
}
static void rx_drop_all(void)
{
- struct pollfd fds[MAX_SOCKS + 1];
+ struct pollfd fds[MAX_SOCKS] = {};
int i, ret;
- memset(fds, 0, sizeof(fds));
-
for (i = 0; i < num_socks; i++) {
fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
fds[i].events = POLLIN;
@@ -598,6 +1227,8 @@
for (;;) {
if (opt_poll) {
+ for (i = 0; i < num_socks; i++)
+ xsks[i]->app_stats.opt_polls++;
ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
continue;
@@ -605,46 +1236,83 @@
for (i = 0; i < num_socks; i++)
rx_drop(xsks[i], fds);
+
+ if (benchmark_done)
+ break;
}
}
-static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
+static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
{
u32 idx;
+ unsigned int i;
- if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
- unsigned int i;
-
- for (i = 0; i < BATCH_SIZE; i++) {
- xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr =
- (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
- sizeof(pkt_data) - 1;
- }
-
- xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
- xsk->outstanding_tx += BATCH_SIZE;
- frame_nb += BATCH_SIZE;
- frame_nb %= NUM_FRAMES;
+ while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
+ batch_size) {
+ complete_tx_only(xsk, batch_size);
+ if (benchmark_done)
+ return;
}
- complete_tx_only(xsk);
+ for (i = 0; i < batch_size; i++) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
+ idx + i);
+ tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
+ tx_desc->len = PKT_SIZE;
+ }
+
+ xsk_ring_prod__submit(&xsk->tx, batch_size);
+ xsk->outstanding_tx += batch_size;
+ *frame_nb += batch_size;
+ *frame_nb %= NUM_FRAMES;
+ complete_tx_only(xsk, batch_size);
+}
+
+static inline int get_batch_size(int pkt_cnt)
+{
+ if (!opt_pkt_count)
+ return opt_batch_size;
+
+ if (pkt_cnt + opt_batch_size <= opt_pkt_count)
+ return opt_batch_size;
+
+ return opt_pkt_count - pkt_cnt;
+}
+
+static void complete_tx_only_all(void)
+{
+ bool pending;
+ int i;
+
+ do {
+ pending = false;
+ for (i = 0; i < num_socks; i++) {
+ if (xsks[i]->outstanding_tx) {
+ complete_tx_only(xsks[i], opt_batch_size);
+ pending = !!xsks[i]->outstanding_tx;
+ }
+ }
+ } while (pending);
}
static void tx_only_all(void)
{
- struct pollfd fds[MAX_SOCKS];
+ struct pollfd fds[MAX_SOCKS] = {};
u32 frame_nb[MAX_SOCKS] = {};
+ int pkt_cnt = 0;
int i, ret;
- memset(fds, 0, sizeof(fds));
for (i = 0; i < num_socks; i++) {
fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
fds[0].events = POLLOUT;
}
- for (;;) {
+ while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
+ int batch_size = get_batch_size(pkt_cnt);
+
if (opt_poll) {
+ for (i = 0; i < num_socks; i++)
+ xsks[i]->app_stats.opt_polls++;
ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
continue;
@@ -654,8 +1322,16 @@
}
for (i = 0; i < num_socks; i++)
- tx_only(xsks[i], frame_nb[i]);
+ tx_only(xsks[i], &frame_nb[i], batch_size);
+
+ pkt_cnt += batch_size;
+
+ if (benchmark_done)
+ break;
}
+
+ if (opt_pkt_count)
+ complete_tx_only_all();
}
static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
@@ -666,10 +1342,12 @@
complete_tx_l2fwd(xsk, fds);
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ xsk->app_stats.rx_empty_polls++;
ret = poll(fds, num_socks, opt_timeout);
+ }
return;
}
@@ -678,8 +1356,10 @@
if (ret < 0)
exit_with_error(-ret);
complete_tx_l2fwd(xsk, fds);
- if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ xsk->app_stats.tx_wakeup_sendtos++;
kick_tx(xsk);
+ }
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
}
@@ -701,17 +1381,15 @@
xsk_ring_prod__submit(&xsk->tx, rcvd);
xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
+ xsk->ring_stats.rx_npkts += rcvd;
xsk->outstanding_tx += rcvd;
}
static void l2fwd_all(void)
{
- struct pollfd fds[MAX_SOCKS];
+ struct pollfd fds[MAX_SOCKS] = {};
int i, ret;
- memset(fds, 0, sizeof(fds));
-
for (i = 0; i < num_socks; i++) {
fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
fds[i].events = POLLOUT | POLLIN;
@@ -719,6 +1397,8 @@
for (;;) {
if (opt_poll) {
+ for (i = 0; i < num_socks; i++)
+ xsks[i]->app_stats.opt_polls++;
ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
continue;
@@ -726,16 +1406,72 @@
for (i = 0; i < num_socks; i++)
l2fwd(xsks[i], fds);
+
+ if (benchmark_done)
+ break;
+ }
+}
+
+static void load_xdp_program(char **argv, struct bpf_object **obj)
+{
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ };
+ char xdp_filename[256];
+ int prog_fd;
+
+ snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = xdp_filename;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd))
+ exit(EXIT_FAILURE);
+ if (prog_fd < 0) {
+ fprintf(stderr, "ERROR: no program found: %s\n",
+ strerror(prog_fd));
+ exit(EXIT_FAILURE);
+ }
+
+ if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
+ fprintf(stderr, "ERROR: link set xdp fd failed\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void enter_xsks_into_map(struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ int i, xsks_map;
+
+ map = bpf_object__find_map_by_name(obj, "xsks_map");
+ xsks_map = bpf_map__fd(map);
+ if (xsks_map < 0) {
+ fprintf(stderr, "ERROR: no xsks map found: %s\n",
+ strerror(xsks_map));
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < num_socks; i++) {
+ int fd = xsk_socket__fd(xsks[i]->xsk);
+ int key, ret;
+
+ key = i;
+ ret = bpf_map_update_elem(xsks_map, &key, &fd, 0);
+ if (ret) {
+ fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
+ exit(EXIT_FAILURE);
+ }
}
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ bool rx = false, tx = false;
struct xsk_umem_info *umem;
+ struct bpf_object *obj;
pthread_t pt;
+ int i, ret;
void *bufs;
- int ret;
parse_command_line(argc, argv);
@@ -745,6 +1481,9 @@
exit(EXIT_FAILURE);
}
+ if (opt_num_xsks > 1)
+ load_xdp_program(argv, &obj);
+
/* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
PROT_READ | PROT_WRITE,
@@ -753,28 +1492,42 @@
printf("ERROR: mmap failed\n");
exit(EXIT_FAILURE);
}
- /* Create sockets... */
+
+ /* Create sockets... */
umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
- xsks[num_socks++] = xsk_configure_socket(umem);
+ if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) {
+ rx = true;
+ xsk_populate_fill_ring(umem);
+ }
+ if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY)
+ tx = true;
+ for (i = 0; i < opt_num_xsks; i++)
+ xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
if (opt_bench == BENCH_TXONLY) {
- int i;
+ gen_eth_hdr_data();
for (i = 0; i < NUM_FRAMES; i++)
- (void)gen_eth_frame(umem, i * opt_xsk_frame_size);
+ gen_eth_frame(umem, i * opt_xsk_frame_size);
}
+ if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
+ enter_xsks_into_map(obj);
+
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
signal(SIGABRT, int_exit);
setlocale(LC_ALL, "");
- ret = pthread_create(&pt, NULL, poller, NULL);
- if (ret)
- exit_with_error(ret);
+ if (!opt_quiet) {
+ ret = pthread_create(&pt, NULL, poller, NULL);
+ if (ret)
+ exit_with_error(ret);
+ }
prev_time = get_nsecs();
+ start_time = prev_time;
if (opt_bench == BENCH_RXDROP)
rx_drop_all();
@@ -783,6 +1536,13 @@
else
l2fwd_all();
+ benchmark_done = true;
+
+ if (!opt_quiet)
+ pthread_join(pt, NULL);
+
+ xdpsock_cleanup();
+
munmap(bufs, NUM_FRAMES * opt_xsk_frame_size);
return 0;
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c
new file mode 100644
index 0000000..1cd97c8
--- /dev/null
+++ b/samples/bpf/xsk_fwd.c
@@ -0,0 +1,1085 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+#define _GNU_SOURCE
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <netinet/ether.h>
+#include <net/if.h>
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/if_xdp.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/xsk.h>
+#include <bpf/bpf.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef __u64 u64;
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
+
+/* This program illustrates the packet forwarding between multiple AF_XDP
+ * sockets in multi-threaded environment. All threads are sharing a common
+ * buffer pool, with each socket having its own private buffer cache.
+ *
+ * Example 1: Single thread handling two sockets. The packets received by socket
+ * A (interface IFA, queue QA) are forwarded to socket B (interface IFB, queue
+ * QB), while the packets received by socket B are forwarded to socket A. The
+ * thread is running on CPU core X:
+ *
+ * ./xsk_fwd -i IFA -q QA -i IFB -q QB -c X
+ *
+ * Example 2: Two threads, each handling two sockets. The thread running on CPU
+ * core X forwards all the packets received by socket A to socket B, and all the
+ * packets received by socket B to socket A. The thread running on CPU core Y is
+ * performing the same packet forwarding between sockets C and D:
+ *
+ * ./xsk_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD
+ * -c CX -c CY
+ */
+
+/*
+ * Buffer pool and buffer cache
+ *
+ * For packet forwarding, the packet buffers are typically allocated from the
+ * pool for packet reception and freed back to the pool for further reuse once
+ * the packet transmission is completed.
+ *
+ * The buffer pool is shared between multiple threads. In order to minimize the
+ * access latency to the shared buffer pool, each thread creates one (or
+ * several) buffer caches, which, unlike the buffer pool, are private to the
+ * thread that creates them and therefore cannot be shared with other threads.
+ * The access to the shared pool is only needed either (A) when the cache gets
+ * empty due to repeated buffer allocations and it needs to be replenished from
+ * the pool, or (B) when the cache gets full due to repeated buffer free and it
+ * needs to be flushed back to the pull.
+ *
+ * In a packet forwarding system, a packet received on any input port can
+ * potentially be transmitted on any output port, depending on the forwarding
+ * configuration. For AF_XDP sockets, for this to work with zero-copy of the
+ * packet buffers when, it is required that the buffer pool memory fits into the
+ * UMEM area shared by all the sockets.
+ */
+
+struct bpool_params {
+ u32 n_buffers;
+ u32 buffer_size;
+ int mmap_flags;
+
+ u32 n_users_max;
+ u32 n_buffers_per_slab;
+};
+
+/* This buffer pool implementation organizes the buffers into equally sized
+ * slabs of *n_buffers_per_slab*. Initially, there are *n_slabs* slabs in the
+ * pool that are completely filled with buffer pointers (full slabs).
+ *
+ * Each buffer cache has a slab for buffer allocation and a slab for buffer
+ * free, with both of these slabs initially empty. When the cache's allocation
+ * slab goes empty, it is swapped with one of the available full slabs from the
+ * pool, if any is available. When the cache's free slab goes full, it is
+ * swapped for one of the empty slabs from the pool, which is guaranteed to
+ * succeed.
+ *
+ * Partially filled slabs never get traded between the cache and the pool
+ * (except when the cache itself is destroyed), which enables fast operation
+ * through pointer swapping.
+ */
+struct bpool {
+ struct bpool_params params;
+ pthread_mutex_t lock;
+ void *addr;
+
+ u64 **slabs;
+ u64 **slabs_reserved;
+ u64 *buffers;
+ u64 *buffers_reserved;
+
+ u64 n_slabs;
+ u64 n_slabs_reserved;
+ u64 n_buffers;
+
+ u64 n_slabs_available;
+ u64 n_slabs_reserved_available;
+
+ struct xsk_umem_config umem_cfg;
+ struct xsk_ring_prod umem_fq;
+ struct xsk_ring_cons umem_cq;
+ struct xsk_umem *umem;
+};
+
+static struct bpool *
+bpool_init(struct bpool_params *params,
+ struct xsk_umem_config *umem_cfg)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved;
+ u64 slabs_size, slabs_reserved_size;
+ u64 buffers_size, buffers_reserved_size;
+ u64 total_size, i;
+ struct bpool *bp;
+ u8 *p;
+ int status;
+
+ /* mmap prep. */
+ if (setrlimit(RLIMIT_MEMLOCK, &r))
+ return NULL;
+
+ /* bpool internals dimensioning. */
+ n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) /
+ params->n_buffers_per_slab;
+ n_slabs_reserved = params->n_users_max * 2;
+ n_buffers = n_slabs * params->n_buffers_per_slab;
+ n_buffers_reserved = n_slabs_reserved * params->n_buffers_per_slab;
+
+ slabs_size = n_slabs * sizeof(u64 *);
+ slabs_reserved_size = n_slabs_reserved * sizeof(u64 *);
+ buffers_size = n_buffers * sizeof(u64);
+ buffers_reserved_size = n_buffers_reserved * sizeof(u64);
+
+ total_size = sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size +
+ buffers_size + buffers_reserved_size;
+
+ /* bpool memory allocation. */
+ p = calloc(total_size, sizeof(u8));
+ if (!p)
+ return NULL;
+
+ /* bpool memory initialization. */
+ bp = (struct bpool *)p;
+ memcpy(&bp->params, params, sizeof(*params));
+ bp->params.n_buffers = n_buffers;
+
+ bp->slabs = (u64 **)&p[sizeof(struct bpool)];
+ bp->slabs_reserved = (u64 **)&p[sizeof(struct bpool) +
+ slabs_size];
+ bp->buffers = (u64 *)&p[sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size];
+ bp->buffers_reserved = (u64 *)&p[sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size + buffers_size];
+
+ bp->n_slabs = n_slabs;
+ bp->n_slabs_reserved = n_slabs_reserved;
+ bp->n_buffers = n_buffers;
+
+ for (i = 0; i < n_slabs; i++)
+ bp->slabs[i] = &bp->buffers[i * params->n_buffers_per_slab];
+ bp->n_slabs_available = n_slabs;
+
+ for (i = 0; i < n_slabs_reserved; i++)
+ bp->slabs_reserved[i] = &bp->buffers_reserved[i *
+ params->n_buffers_per_slab];
+ bp->n_slabs_reserved_available = n_slabs_reserved;
+
+ for (i = 0; i < n_buffers; i++)
+ bp->buffers[i] = i * params->buffer_size;
+
+ /* lock. */
+ status = pthread_mutex_init(&bp->lock, NULL);
+ if (status) {
+ free(p);
+ return NULL;
+ }
+
+ /* mmap. */
+ bp->addr = mmap(NULL,
+ n_buffers * params->buffer_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | params->mmap_flags,
+ -1,
+ 0);
+ if (bp->addr == MAP_FAILED) {
+ pthread_mutex_destroy(&bp->lock);
+ free(p);
+ return NULL;
+ }
+
+ /* umem. */
+ status = xsk_umem__create(&bp->umem,
+ bp->addr,
+ bp->params.n_buffers * bp->params.buffer_size,
+ &bp->umem_fq,
+ &bp->umem_cq,
+ umem_cfg);
+ if (status) {
+ munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
+ pthread_mutex_destroy(&bp->lock);
+ free(p);
+ return NULL;
+ }
+ memcpy(&bp->umem_cfg, umem_cfg, sizeof(*umem_cfg));
+
+ return bp;
+}
+
+static void
+bpool_free(struct bpool *bp)
+{
+ if (!bp)
+ return;
+
+ xsk_umem__delete(bp->umem);
+ munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
+ pthread_mutex_destroy(&bp->lock);
+ free(bp);
+}
+
+struct bcache {
+ struct bpool *bp;
+
+ u64 *slab_cons;
+ u64 *slab_prod;
+
+ u64 n_buffers_cons;
+ u64 n_buffers_prod;
+};
+
+static u32
+bcache_slab_size(struct bcache *bc)
+{
+ struct bpool *bp = bc->bp;
+
+ return bp->params.n_buffers_per_slab;
+}
+
+static struct bcache *
+bcache_init(struct bpool *bp)
+{
+ struct bcache *bc;
+
+ bc = calloc(1, sizeof(struct bcache));
+ if (!bc)
+ return NULL;
+
+ bc->bp = bp;
+ bc->n_buffers_cons = 0;
+ bc->n_buffers_prod = 0;
+
+ pthread_mutex_lock(&bp->lock);
+ if (bp->n_slabs_reserved_available == 0) {
+ pthread_mutex_unlock(&bp->lock);
+ free(bc);
+ return NULL;
+ }
+
+ bc->slab_cons = bp->slabs_reserved[bp->n_slabs_reserved_available - 1];
+ bc->slab_prod = bp->slabs_reserved[bp->n_slabs_reserved_available - 2];
+ bp->n_slabs_reserved_available -= 2;
+ pthread_mutex_unlock(&bp->lock);
+
+ return bc;
+}
+
+static void
+bcache_free(struct bcache *bc)
+{
+ struct bpool *bp;
+
+ if (!bc)
+ return;
+
+ /* In order to keep this example simple, the case of freeing any
+ * existing buffers from the cache back to the pool is ignored.
+ */
+
+ bp = bc->bp;
+ pthread_mutex_lock(&bp->lock);
+ bp->slabs_reserved[bp->n_slabs_reserved_available] = bc->slab_prod;
+ bp->slabs_reserved[bp->n_slabs_reserved_available + 1] = bc->slab_cons;
+ bp->n_slabs_reserved_available += 2;
+ pthread_mutex_unlock(&bp->lock);
+
+ free(bc);
+}
+
+/* To work correctly, the implementation requires that the *n_buffers* input
+ * argument is never greater than the buffer pool's *n_buffers_per_slab*. This
+ * is typically the case, with one exception taking place when large number of
+ * buffers are allocated at init time (e.g. for the UMEM fill queue setup).
+ */
+static inline u32
+bcache_cons_check(struct bcache *bc, u32 n_buffers)
+{
+ struct bpool *bp = bc->bp;
+ u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
+ u64 n_buffers_cons = bc->n_buffers_cons;
+ u64 n_slabs_available;
+ u64 *slab_full;
+
+ /*
+ * Consumer slab is not empty: Use what's available locally. Do not
+ * look for more buffers from the pool when the ask can only be
+ * partially satisfied.
+ */
+ if (n_buffers_cons)
+ return (n_buffers_cons < n_buffers) ?
+ n_buffers_cons :
+ n_buffers;
+
+ /*
+ * Consumer slab is empty: look to trade the current consumer slab
+ * (full) for a full slab from the pool, if any is available.
+ */
+ pthread_mutex_lock(&bp->lock);
+ n_slabs_available = bp->n_slabs_available;
+ if (!n_slabs_available) {
+ pthread_mutex_unlock(&bp->lock);
+ return 0;
+ }
+
+ n_slabs_available--;
+ slab_full = bp->slabs[n_slabs_available];
+ bp->slabs[n_slabs_available] = bc->slab_cons;
+ bp->n_slabs_available = n_slabs_available;
+ pthread_mutex_unlock(&bp->lock);
+
+ bc->slab_cons = slab_full;
+ bc->n_buffers_cons = n_buffers_per_slab;
+ return n_buffers;
+}
+
+static inline u64
+bcache_cons(struct bcache *bc)
+{
+ u64 n_buffers_cons = bc->n_buffers_cons - 1;
+ u64 buffer;
+
+ buffer = bc->slab_cons[n_buffers_cons];
+ bc->n_buffers_cons = n_buffers_cons;
+ return buffer;
+}
+
+static inline void
+bcache_prod(struct bcache *bc, u64 buffer)
+{
+ struct bpool *bp = bc->bp;
+ u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
+ u64 n_buffers_prod = bc->n_buffers_prod;
+ u64 n_slabs_available;
+ u64 *slab_empty;
+
+ /*
+ * Producer slab is not yet full: store the current buffer to it.
+ */
+ if (n_buffers_prod < n_buffers_per_slab) {
+ bc->slab_prod[n_buffers_prod] = buffer;
+ bc->n_buffers_prod = n_buffers_prod + 1;
+ return;
+ }
+
+ /*
+ * Producer slab is full: trade the cache's current producer slab
+ * (full) for an empty slab from the pool, then store the current
+ * buffer to the new producer slab. As one full slab exists in the
+ * cache, it is guaranteed that there is at least one empty slab
+ * available in the pool.
+ */
+ pthread_mutex_lock(&bp->lock);
+ n_slabs_available = bp->n_slabs_available;
+ slab_empty = bp->slabs[n_slabs_available];
+ bp->slabs[n_slabs_available] = bc->slab_prod;
+ bp->n_slabs_available = n_slabs_available + 1;
+ pthread_mutex_unlock(&bp->lock);
+
+ slab_empty[0] = buffer;
+ bc->slab_prod = slab_empty;
+ bc->n_buffers_prod = 1;
+}
+
+/*
+ * Port
+ *
+ * Each of the forwarding ports sits on top of an AF_XDP socket. In order for
+ * packet forwarding to happen with no packet buffer copy, all the sockets need
+ * to share the same UMEM area, which is used as the buffer pool memory.
+ */
+#ifndef MAX_BURST_RX
+#define MAX_BURST_RX 64
+#endif
+
+#ifndef MAX_BURST_TX
+#define MAX_BURST_TX 64
+#endif
+
+struct burst_rx {
+ u64 addr[MAX_BURST_RX];
+ u32 len[MAX_BURST_RX];
+};
+
+struct burst_tx {
+ u64 addr[MAX_BURST_TX];
+ u32 len[MAX_BURST_TX];
+ u32 n_pkts;
+};
+
+struct port_params {
+ struct xsk_socket_config xsk_cfg;
+ struct bpool *bp;
+ const char *iface;
+ u32 iface_queue;
+};
+
+struct port {
+ struct port_params params;
+
+ struct bcache *bc;
+
+ struct xsk_ring_cons rxq;
+ struct xsk_ring_prod txq;
+ struct xsk_ring_prod umem_fq;
+ struct xsk_ring_cons umem_cq;
+ struct xsk_socket *xsk;
+ int umem_fq_initialized;
+
+ u64 n_pkts_rx;
+ u64 n_pkts_tx;
+};
+
+static void
+port_free(struct port *p)
+{
+ if (!p)
+ return;
+
+ /* To keep this example simple, the code to free the buffers from the
+ * socket's receive and transmit queues, as well as from the UMEM fill
+ * and completion queues, is not included.
+ */
+
+ if (p->xsk)
+ xsk_socket__delete(p->xsk);
+
+ bcache_free(p->bc);
+
+ free(p);
+}
+
+static struct port *
+port_init(struct port_params *params)
+{
+ struct port *p;
+ u32 umem_fq_size, pos = 0;
+ int status, i;
+
+ /* Memory allocation and initialization. */
+ p = calloc(sizeof(struct port), 1);
+ if (!p)
+ return NULL;
+
+ memcpy(&p->params, params, sizeof(p->params));
+ umem_fq_size = params->bp->umem_cfg.fill_size;
+
+ /* bcache. */
+ p->bc = bcache_init(params->bp);
+ if (!p->bc ||
+ (bcache_slab_size(p->bc) < umem_fq_size) ||
+ (bcache_cons_check(p->bc, umem_fq_size) < umem_fq_size)) {
+ port_free(p);
+ return NULL;
+ }
+
+ /* xsk socket. */
+ status = xsk_socket__create_shared(&p->xsk,
+ params->iface,
+ params->iface_queue,
+ params->bp->umem,
+ &p->rxq,
+ &p->txq,
+ &p->umem_fq,
+ &p->umem_cq,
+ ¶ms->xsk_cfg);
+ if (status) {
+ port_free(p);
+ return NULL;
+ }
+
+ /* umem fq. */
+ xsk_ring_prod__reserve(&p->umem_fq, umem_fq_size, &pos);
+
+ for (i = 0; i < umem_fq_size; i++)
+ *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
+ bcache_cons(p->bc);
+
+ xsk_ring_prod__submit(&p->umem_fq, umem_fq_size);
+ p->umem_fq_initialized = 1;
+
+ return p;
+}
+
+static inline u32
+port_rx_burst(struct port *p, struct burst_rx *b)
+{
+ u32 n_pkts, pos, i;
+
+ /* Free buffers for FQ replenish. */
+ n_pkts = ARRAY_SIZE(b->addr);
+
+ n_pkts = bcache_cons_check(p->bc, n_pkts);
+ if (!n_pkts)
+ return 0;
+
+ /* RXQ. */
+ n_pkts = xsk_ring_cons__peek(&p->rxq, n_pkts, &pos);
+ if (!n_pkts) {
+ if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
+ struct pollfd pollfd = {
+ .fd = xsk_socket__fd(p->xsk),
+ .events = POLLIN,
+ };
+
+ poll(&pollfd, 1, 0);
+ }
+ return 0;
+ }
+
+ for (i = 0; i < n_pkts; i++) {
+ b->addr[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->addr;
+ b->len[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->len;
+ }
+
+ xsk_ring_cons__release(&p->rxq, n_pkts);
+ p->n_pkts_rx += n_pkts;
+
+ /* UMEM FQ. */
+ for ( ; ; ) {
+ int status;
+
+ status = xsk_ring_prod__reserve(&p->umem_fq, n_pkts, &pos);
+ if (status == n_pkts)
+ break;
+
+ if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
+ struct pollfd pollfd = {
+ .fd = xsk_socket__fd(p->xsk),
+ .events = POLLIN,
+ };
+
+ poll(&pollfd, 1, 0);
+ }
+ }
+
+ for (i = 0; i < n_pkts; i++)
+ *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
+ bcache_cons(p->bc);
+
+ xsk_ring_prod__submit(&p->umem_fq, n_pkts);
+
+ return n_pkts;
+}
+
+static inline void
+port_tx_burst(struct port *p, struct burst_tx *b)
+{
+ u32 n_pkts, pos, i;
+ int status;
+
+ /* UMEM CQ. */
+ n_pkts = p->params.bp->umem_cfg.comp_size;
+
+ n_pkts = xsk_ring_cons__peek(&p->umem_cq, n_pkts, &pos);
+
+ for (i = 0; i < n_pkts; i++) {
+ u64 addr = *xsk_ring_cons__comp_addr(&p->umem_cq, pos + i);
+
+ bcache_prod(p->bc, addr);
+ }
+
+ xsk_ring_cons__release(&p->umem_cq, n_pkts);
+
+ /* TXQ. */
+ n_pkts = b->n_pkts;
+
+ for ( ; ; ) {
+ status = xsk_ring_prod__reserve(&p->txq, n_pkts, &pos);
+ if (status == n_pkts)
+ break;
+
+ if (xsk_ring_prod__needs_wakeup(&p->txq))
+ sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT,
+ NULL, 0);
+ }
+
+ for (i = 0; i < n_pkts; i++) {
+ xsk_ring_prod__tx_desc(&p->txq, pos + i)->addr = b->addr[i];
+ xsk_ring_prod__tx_desc(&p->txq, pos + i)->len = b->len[i];
+ }
+
+ xsk_ring_prod__submit(&p->txq, n_pkts);
+ if (xsk_ring_prod__needs_wakeup(&p->txq))
+ sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ p->n_pkts_tx += n_pkts;
+}
+
+/*
+ * Thread
+ *
+ * Packet forwarding threads.
+ */
+#ifndef MAX_PORTS_PER_THREAD
+#define MAX_PORTS_PER_THREAD 16
+#endif
+
+struct thread_data {
+ struct port *ports_rx[MAX_PORTS_PER_THREAD];
+ struct port *ports_tx[MAX_PORTS_PER_THREAD];
+ u32 n_ports_rx;
+ struct burst_rx burst_rx;
+ struct burst_tx burst_tx[MAX_PORTS_PER_THREAD];
+ u32 cpu_core_id;
+ int quit;
+};
+
+static void swap_mac_addresses(void *data)
+{
+ struct ether_header *eth = (struct ether_header *)data;
+ struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost;
+ struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost;
+ struct ether_addr tmp;
+
+ tmp = *src_addr;
+ *src_addr = *dst_addr;
+ *dst_addr = tmp;
+}
+
+static void *
+thread_func(void *arg)
+{
+ struct thread_data *t = arg;
+ cpu_set_t cpu_cores;
+ u32 i;
+
+ CPU_ZERO(&cpu_cores);
+ CPU_SET(t->cpu_core_id, &cpu_cores);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores);
+
+ for (i = 0; !t->quit; i = (i + 1) & (t->n_ports_rx - 1)) {
+ struct port *port_rx = t->ports_rx[i];
+ struct port *port_tx = t->ports_tx[i];
+ struct burst_rx *brx = &t->burst_rx;
+ struct burst_tx *btx = &t->burst_tx[i];
+ u32 n_pkts, j;
+
+ /* RX. */
+ n_pkts = port_rx_burst(port_rx, brx);
+ if (!n_pkts)
+ continue;
+
+ /* Process & TX. */
+ for (j = 0; j < n_pkts; j++) {
+ u64 addr = xsk_umem__add_offset_to_addr(brx->addr[j]);
+ u8 *pkt = xsk_umem__get_data(port_rx->params.bp->addr,
+ addr);
+
+ swap_mac_addresses(pkt);
+
+ btx->addr[btx->n_pkts] = brx->addr[j];
+ btx->len[btx->n_pkts] = brx->len[j];
+ btx->n_pkts++;
+
+ if (btx->n_pkts == MAX_BURST_TX) {
+ port_tx_burst(port_tx, btx);
+ btx->n_pkts = 0;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Process
+ */
+static const struct bpool_params bpool_params_default = {
+ .n_buffers = 64 * 1024,
+ .buffer_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .mmap_flags = 0,
+
+ .n_users_max = 16,
+ .n_buffers_per_slab = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
+};
+
+static const struct xsk_umem_config umem_cfg_default = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+ .flags = 0,
+};
+
+static const struct port_params port_params_default = {
+ .xsk_cfg = {
+ .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .libbpf_flags = 0,
+ .xdp_flags = XDP_FLAGS_DRV_MODE,
+ .bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY,
+ },
+
+ .bp = NULL,
+ .iface = NULL,
+ .iface_queue = 0,
+};
+
+#ifndef MAX_PORTS
+#define MAX_PORTS 64
+#endif
+
+#ifndef MAX_THREADS
+#define MAX_THREADS 64
+#endif
+
+static struct bpool_params bpool_params;
+static struct xsk_umem_config umem_cfg;
+static struct bpool *bp;
+
+static struct port_params port_params[MAX_PORTS];
+static struct port *ports[MAX_PORTS];
+static u64 n_pkts_rx[MAX_PORTS];
+static u64 n_pkts_tx[MAX_PORTS];
+static int n_ports;
+
+static pthread_t threads[MAX_THREADS];
+static struct thread_data thread_data[MAX_THREADS];
+static int n_threads;
+
+static void
+print_usage(char *prog_name)
+{
+ const char *usage =
+ "Usage:\n"
+ "\t%s [ -b SIZE ] -c CORE -i INTERFACE [ -q QUEUE ]\n"
+ "\n"
+ "-c CORE CPU core to run a packet forwarding thread\n"
+ " on. May be invoked multiple times.\n"
+ "\n"
+ "-b SIZE Number of buffers in the buffer pool shared\n"
+ " by all the forwarding threads. Default: %u.\n"
+ "\n"
+ "-i INTERFACE Network interface. Each (INTERFACE, QUEUE)\n"
+ " pair specifies one forwarding port. May be\n"
+ " invoked multiple times.\n"
+ "\n"
+ "-q QUEUE Network interface queue for RX and TX. Each\n"
+ " (INTERFACE, QUEUE) pair specified one\n"
+ " forwarding port. Default: %u. May be invoked\n"
+ " multiple times.\n"
+ "\n";
+ printf(usage,
+ prog_name,
+ bpool_params_default.n_buffers,
+ port_params_default.iface_queue);
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+ struct option lgopts[] = {
+ { NULL, 0, 0, 0 }
+ };
+ int opt, option_index;
+
+ /* Parse the input arguments. */
+ for ( ; ;) {
+ opt = getopt_long(argc, argv, "c:i:q:", lgopts, &option_index);
+ if (opt == EOF)
+ break;
+
+ switch (opt) {
+ case 'b':
+ bpool_params.n_buffers = atoi(optarg);
+ break;
+
+ case 'c':
+ if (n_threads == MAX_THREADS) {
+ printf("Max number of threads (%d) reached.\n",
+ MAX_THREADS);
+ return -1;
+ }
+
+ thread_data[n_threads].cpu_core_id = atoi(optarg);
+ n_threads++;
+ break;
+
+ case 'i':
+ if (n_ports == MAX_PORTS) {
+ printf("Max number of ports (%d) reached.\n",
+ MAX_PORTS);
+ return -1;
+ }
+
+ port_params[n_ports].iface = optarg;
+ port_params[n_ports].iface_queue = 0;
+ n_ports++;
+ break;
+
+ case 'q':
+ if (n_ports == 0) {
+ printf("No port specified for queue.\n");
+ return -1;
+ }
+ port_params[n_ports - 1].iface_queue = atoi(optarg);
+ break;
+
+ default:
+ printf("Illegal argument.\n");
+ return -1;
+ }
+ }
+
+ optind = 1; /* reset getopt lib */
+
+ /* Check the input arguments. */
+ if (!n_ports) {
+ printf("No ports specified.\n");
+ return -1;
+ }
+
+ if (!n_threads) {
+ printf("No threads specified.\n");
+ return -1;
+ }
+
+ if (n_ports % n_threads) {
+ printf("Ports cannot be evenly distributed to threads.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+print_port(u32 port_id)
+{
+ struct port *port = ports[port_id];
+
+ printf("Port %u: interface = %s, queue = %u\n",
+ port_id, port->params.iface, port->params.iface_queue);
+}
+
+static void
+print_thread(u32 thread_id)
+{
+ struct thread_data *t = &thread_data[thread_id];
+ u32 i;
+
+ printf("Thread %u (CPU core %u): ",
+ thread_id, t->cpu_core_id);
+
+ for (i = 0; i < t->n_ports_rx; i++) {
+ struct port *port_rx = t->ports_rx[i];
+ struct port *port_tx = t->ports_tx[i];
+
+ printf("(%s, %u) -> (%s, %u), ",
+ port_rx->params.iface,
+ port_rx->params.iface_queue,
+ port_tx->params.iface,
+ port_tx->params.iface_queue);
+ }
+
+ printf("\n");
+}
+
+static void
+print_port_stats_separator(void)
+{
+ printf("+-%4s-+-%12s-+-%13s-+-%12s-+-%13s-+\n",
+ "----",
+ "------------",
+ "-------------",
+ "------------",
+ "-------------");
+}
+
+static void
+print_port_stats_header(void)
+{
+ print_port_stats_separator();
+ printf("| %4s | %12s | %13s | %12s | %13s |\n",
+ "Port",
+ "RX packets",
+ "RX rate (pps)",
+ "TX packets",
+ "TX_rate (pps)");
+ print_port_stats_separator();
+}
+
+static void
+print_port_stats_trailer(void)
+{
+ print_port_stats_separator();
+ printf("\n");
+}
+
+static void
+print_port_stats(int port_id, u64 ns_diff)
+{
+ struct port *p = ports[port_id];
+ double rx_pps, tx_pps;
+
+ rx_pps = (p->n_pkts_rx - n_pkts_rx[port_id]) * 1000000000. / ns_diff;
+ tx_pps = (p->n_pkts_tx - n_pkts_tx[port_id]) * 1000000000. / ns_diff;
+
+ printf("| %4d | %12llu | %13.0f | %12llu | %13.0f |\n",
+ port_id,
+ p->n_pkts_rx,
+ rx_pps,
+ p->n_pkts_tx,
+ tx_pps);
+
+ n_pkts_rx[port_id] = p->n_pkts_rx;
+ n_pkts_tx[port_id] = p->n_pkts_tx;
+}
+
+static void
+print_port_stats_all(u64 ns_diff)
+{
+ int i;
+
+ print_port_stats_header();
+ for (i = 0; i < n_ports; i++)
+ print_port_stats(i, ns_diff);
+ print_port_stats_trailer();
+}
+
+static int quit;
+
+static void
+signal_handler(int sig)
+{
+ quit = 1;
+}
+
+static void remove_xdp_program(void)
+{
+ int i;
+
+ for (i = 0 ; i < n_ports; i++)
+ bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
+ port_params[i].xsk_cfg.xdp_flags);
+}
+
+int main(int argc, char **argv)
+{
+ struct timespec time;
+ u64 ns0;
+ int i;
+
+ /* Parse args. */
+ memcpy(&bpool_params, &bpool_params_default,
+ sizeof(struct bpool_params));
+ memcpy(&umem_cfg, &umem_cfg_default,
+ sizeof(struct xsk_umem_config));
+ for (i = 0; i < MAX_PORTS; i++)
+ memcpy(&port_params[i], &port_params_default,
+ sizeof(struct port_params));
+
+ if (parse_args(argc, argv)) {
+ print_usage(argv[0]);
+ return -1;
+ }
+
+ /* Buffer pool initialization. */
+ bp = bpool_init(&bpool_params, &umem_cfg);
+ if (!bp) {
+ printf("Buffer pool initialization failed.\n");
+ return -1;
+ }
+ printf("Buffer pool created successfully.\n");
+
+ /* Ports initialization. */
+ for (i = 0; i < MAX_PORTS; i++)
+ port_params[i].bp = bp;
+
+ for (i = 0; i < n_ports; i++) {
+ ports[i] = port_init(&port_params[i]);
+ if (!ports[i]) {
+ printf("Port %d initialization failed.\n", i);
+ return -1;
+ }
+ print_port(i);
+ }
+ printf("All ports created successfully.\n");
+
+ /* Threads. */
+ for (i = 0; i < n_threads; i++) {
+ struct thread_data *t = &thread_data[i];
+ u32 n_ports_per_thread = n_ports / n_threads, j;
+
+ for (j = 0; j < n_ports_per_thread; j++) {
+ t->ports_rx[j] = ports[i * n_ports_per_thread + j];
+ t->ports_tx[j] = ports[i * n_ports_per_thread +
+ (j + 1) % n_ports_per_thread];
+ }
+
+ t->n_ports_rx = n_ports_per_thread;
+
+ print_thread(i);
+ }
+
+ for (i = 0; i < n_threads; i++) {
+ int status;
+
+ status = pthread_create(&threads[i],
+ NULL,
+ thread_func,
+ &thread_data[i]);
+ if (status) {
+ printf("Thread %d creation failed.\n", i);
+ return -1;
+ }
+ }
+ printf("All threads created successfully.\n");
+
+ /* Print statistics. */
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+ signal(SIGABRT, signal_handler);
+
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ ns0 = time.tv_sec * 1000000000UL + time.tv_nsec;
+ for ( ; !quit; ) {
+ u64 ns1, ns_diff;
+
+ sleep(1);
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ ns1 = time.tv_sec * 1000000000UL + time.tv_nsec;
+ ns_diff = ns1 - ns0;
+ ns0 = ns1;
+
+ print_port_stats_all(ns_diff);
+ }
+
+ /* Threads completion. */
+ printf("Quit.\n");
+ for (i = 0; i < n_threads; i++)
+ thread_data[i].quit = 1;
+
+ for (i = 0; i < n_threads; i++)
+ pthread_join(threads[i], NULL);
+
+ for (i = 0; i < n_ports; i++)
+ port_free(ports[i]);
+
+ bpool_free(bp);
+
+ remove_xdp_program();
+
+ return 0;
+}
diff --git a/samples/configfs/configfs_sample.c b/samples/configfs/configfs_sample.c
index e2398d9..f9008be 100644
--- a/samples/configfs/configfs_sample.c
+++ b/samples/configfs/configfs_sample.c
@@ -7,19 +7,17 @@
* macros defined by configfs.h
*
* Based on sysfs:
- * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
*
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*/
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
-
#include <linux/configfs.h>
-
-
/*
* 01-childless
*
@@ -40,8 +38,8 @@
static inline struct childless *to_childless(struct config_item *item)
{
- return item ? container_of(to_configfs_subsystem(to_config_group(item)),
- struct childless, subsys) : NULL;
+ return container_of(to_configfs_subsystem(to_config_group(item)),
+ struct childless, subsys);
}
static ssize_t childless_showme_show(struct config_item *item, char *page)
@@ -64,17 +62,11 @@
const char *page, size_t count)
{
struct childless *childless = to_childless(item);
- unsigned long tmp;
- char *p = (char *) page;
+ int ret;
- tmp = simple_strtoul(p, &p, 10);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
-
- if (tmp > INT_MAX)
- return -ERANGE;
-
- childless->storeme = tmp;
+ ret = kstrtoint(page, 10, &childless->storeme);
+ if (ret)
+ return ret;
return count;
}
@@ -117,7 +109,6 @@
},
};
-
/* ----------------------------------------------------------------- */
/*
@@ -136,7 +127,7 @@
static inline struct simple_child *to_simple_child(struct config_item *item)
{
- return item ? container_of(item, struct simple_child, item) : NULL;
+ return container_of(item, struct simple_child, item);
}
static ssize_t simple_child_storeme_show(struct config_item *item, char *page)
@@ -148,17 +139,11 @@
const char *page, size_t count)
{
struct simple_child *simple_child = to_simple_child(item);
- unsigned long tmp;
- char *p = (char *) page;
+ int ret;
- tmp = simple_strtoul(p, &p, 10);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
-
- if (tmp > INT_MAX)
- return -ERANGE;
-
- simple_child->storeme = tmp;
+ ret = kstrtoint(page, 10, &simple_child->storeme);
+ if (ret)
+ return ret;
return count;
}
@@ -176,7 +161,7 @@
}
static struct configfs_item_operations simple_child_item_ops = {
- .release = simple_child_release,
+ .release = simple_child_release,
};
static const struct config_item_type simple_child_type = {
@@ -185,15 +170,14 @@
.ct_owner = THIS_MODULE,
};
-
struct simple_children {
struct config_group group;
};
static inline struct simple_children *to_simple_children(struct config_item *item)
{
- return item ? container_of(to_config_group(item),
- struct simple_children, group) : NULL;
+ return container_of(to_config_group(item),
+ struct simple_children, group);
}
static struct config_item *simple_children_make_item(struct config_group *group,
@@ -208,8 +192,6 @@
config_item_init_type_name(&simple_child->item, name,
&simple_child_type);
- simple_child->storeme = 0;
-
return &simple_child->item;
}
@@ -263,7 +245,6 @@
},
};
-
/* ----------------------------------------------------------------- */
/*
@@ -350,9 +331,8 @@
static int __init configfs_example_init(void)
{
- int ret;
- int i;
struct configfs_subsystem *subsys;
+ int ret, i;
for (i = 0; example_subsys[i]; i++) {
subsys = example_subsys[i];
@@ -361,9 +341,8 @@
mutex_init(&subsys->su_mutex);
ret = configfs_register_subsystem(subsys);
if (ret) {
- printk(KERN_ERR "Error %d while registering subsystem %s\n",
- ret,
- subsys->su_group.cg_item.ci_namebuf);
+ pr_err("Error %d while registering subsystem %s\n",
+ ret, subsys->su_group.cg_item.ci_namebuf);
goto out_unregister;
}
}
diff --git a/samples/connector/.gitignore b/samples/connector/.gitignore
index d2b9c32..d86f2ff 100644
--- a/samples/connector/.gitignore
+++ b/samples/connector/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
ucon
diff --git a/samples/connector/Makefile b/samples/connector/Makefile
index 6ad7162..d98a9e0 100644
--- a/samples/connector/Makefile
+++ b/samples/connector/Makefile
@@ -1,17 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SAMPLE_CONNECTOR) += cn_test.o
-# List of programs to build
-ifdef CONFIG_SAMPLE_CONNECTOR
-hostprogs-y := ucon
-endif
+userprogs-always-$(CONFIG_CC_CAN_LINK) += ucon
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_ucon.o += -I$(objtree)/usr/include
-
-all: modules
-
-modules clean:
- $(MAKE) -C ../.. M=$(CURDIR) $@
+userccflags += -I usr/include
diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile
new file mode 100644
index 0000000..4ce896e
--- /dev/null
+++ b/samples/ftrace/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct.o
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o
+
+CFLAGS_sample-trace-array.o := -I$(src)
+obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
new file mode 100644
index 0000000..89e6bf2
--- /dev/null
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/ftrace.h>
+
+extern void my_direct_func1(void);
+extern void my_direct_func2(void);
+
+void my_direct_func1(void)
+{
+ trace_printk("my direct func1\n");
+}
+
+void my_direct_func2(void)
+{
+ trace_printk("my direct func2\n");
+}
+
+extern void my_tramp1(void *);
+extern void my_tramp2(void *);
+
+static unsigned long my_ip = (unsigned long)schedule;
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:"
+" pushq %rbp\n"
+" movq %rsp, %rbp\n"
+" call my_direct_func1\n"
+" leave\n"
+" .size my_tramp1, .-my_tramp1\n"
+" ret\n"
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:"
+" pushq %rbp\n"
+" movq %rsp, %rbp\n"
+" call my_direct_func2\n"
+" leave\n"
+" ret\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+static unsigned long my_tramp = (unsigned long)my_tramp1;
+static unsigned long tramps[2] = {
+ (unsigned long)my_tramp1,
+ (unsigned long)my_tramp2,
+};
+
+static int simple_thread(void *arg)
+{
+ static int t;
+ int ret = 0;
+
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(2 * HZ);
+
+ if (ret)
+ continue;
+ t ^= 1;
+ ret = modify_ftrace_direct(my_ip, my_tramp, tramps[t]);
+ if (!ret)
+ my_tramp = tramps[t];
+ WARN_ON_ONCE(ret);
+ }
+
+ return 0;
+}
+
+static struct task_struct *simple_tsk;
+
+static int __init ftrace_direct_init(void)
+{
+ int ret;
+
+ ret = register_ftrace_direct(my_ip, my_tramp);
+ if (!ret)
+ simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn");
+ return ret;
+}
+
+static void __exit ftrace_direct_exit(void)
+{
+ kthread_stop(simple_tsk);
+ unregister_ftrace_direct(my_ip, my_tramp);
+}
+
+module_init(ftrace_direct_init);
+module_exit(ftrace_direct_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct()");
+MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
new file mode 100644
index 0000000..11b9932
--- /dev/null
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+
+#include <linux/mm.h> /* for handle_mm_fault() */
+#include <linux/ftrace.h>
+
+extern void my_direct_func(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags);
+
+void my_direct_func(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
+{
+ trace_printk("handle mm fault vma=%p address=%lx flags=%x\n",
+ vma, address, flags);
+}
+
+extern void my_tramp(void *);
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+" pushq %rbp\n"
+" movq %rsp, %rbp\n"
+" pushq %rdi\n"
+" pushq %rsi\n"
+" pushq %rdx\n"
+" call my_direct_func\n"
+" popq %rdx\n"
+" popq %rsi\n"
+" popq %rdi\n"
+" leave\n"
+" ret\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+
+static int __init ftrace_direct_init(void)
+{
+ return register_ftrace_direct((unsigned long)handle_mm_fault,
+ (unsigned long)my_tramp);
+}
+
+static void __exit ftrace_direct_exit(void)
+{
+ unregister_ftrace_direct((unsigned long)handle_mm_fault,
+ (unsigned long)my_tramp);
+}
+
+module_init(ftrace_direct_init);
+module_exit(ftrace_direct_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("Another example use case of using register_ftrace_direct()");
+MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
new file mode 100644
index 0000000..642c50b
--- /dev/null
+++ b/samples/ftrace/ftrace-direct.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+
+#include <linux/sched.h> /* for wake_up_process() */
+#include <linux/ftrace.h>
+
+extern void my_direct_func(struct task_struct *p);
+
+void my_direct_func(struct task_struct *p)
+{
+ trace_printk("waking up %s-%d\n", p->comm, p->pid);
+}
+
+extern void my_tramp(void *);
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+" pushq %rbp\n"
+" movq %rsp, %rbp\n"
+" pushq %rdi\n"
+" call my_direct_func\n"
+" popq %rdi\n"
+" leave\n"
+" ret\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+
+static int __init ftrace_direct_init(void)
+{
+ return register_ftrace_direct((unsigned long)wake_up_process,
+ (unsigned long)my_tramp);
+}
+
+static void __exit ftrace_direct_exit(void)
+{
+ unregister_ftrace_direct((unsigned long)wake_up_process,
+ (unsigned long)my_tramp);
+}
+
+module_init(ftrace_direct_init);
+module_exit(ftrace_direct_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("Example use case of using register_ftrace_direct()");
+MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c
new file mode 100644
index 0000000..6aba02a
--- /dev/null
+++ b/samples/ftrace/sample-trace-array.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/trace.h>
+#include <linux/trace_events.h>
+#include <linux/timer.h>
+#include <linux/err.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+
+/*
+ * Any file that uses trace points, must include the header.
+ * But only one file, must include the header by defining
+ * CREATE_TRACE_POINTS first. This will make the C code that
+ * creates the handles for the trace points.
+ */
+#define CREATE_TRACE_POINTS
+#include "sample-trace-array.h"
+
+struct trace_array *tr;
+static void mytimer_handler(struct timer_list *unused);
+static struct task_struct *simple_tsk;
+
+static void trace_work_fn(struct work_struct *work)
+{
+ /*
+ * Disable tracing for event "sample_event".
+ */
+ trace_array_set_clr_event(tr, "sample-subsystem", "sample_event",
+ false);
+}
+static DECLARE_WORK(trace_work, trace_work_fn);
+
+/*
+ * mytimer: Timer setup to disable tracing for event "sample_event". This
+ * timer is only for the purposes of the sample module to demonstrate access of
+ * Ftrace instances from within kernel.
+ */
+static DEFINE_TIMER(mytimer, mytimer_handler);
+
+static void mytimer_handler(struct timer_list *unused)
+{
+ schedule_work(&trace_work);
+}
+
+static void simple_thread_func(int count)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+
+ /*
+ * Printing count value using trace_array_printk() - trace_printk()
+ * equivalent for the instance buffers.
+ */
+ trace_array_printk(tr, _THIS_IP_, "trace_array_printk: count=%d\n",
+ count);
+ /*
+ * Tracepoint for event "sample_event". This will print the
+ * current value of count and current jiffies.
+ */
+ trace_sample_event(count, jiffies);
+}
+
+static int simple_thread(void *arg)
+{
+ int count = 0;
+ unsigned long delay = msecs_to_jiffies(5000);
+
+ /*
+ * Enable tracing for "sample_event".
+ */
+ trace_array_set_clr_event(tr, "sample-subsystem", "sample_event", true);
+
+ /*
+ * Adding timer - mytimer. This timer will disable tracing after
+ * delay seconds.
+ *
+ */
+ add_timer(&mytimer);
+ mod_timer(&mytimer, jiffies+delay);
+
+ while (!kthread_should_stop())
+ simple_thread_func(count++);
+
+ del_timer(&mytimer);
+ cancel_work_sync(&trace_work);
+
+ /*
+ * trace_array_put() decrements the reference counter associated with
+ * the trace array - "tr". We are done using the trace array, hence
+ * decrement the reference counter so that it can be destroyed using
+ * trace_array_destroy().
+ */
+ trace_array_put(tr);
+
+ return 0;
+}
+
+static int __init sample_trace_array_init(void)
+{
+ /*
+ * Return a pointer to the trace array with name "sample-instance" if it
+ * exists, else create a new trace array.
+ *
+ * NOTE: This function increments the reference counter
+ * associated with the trace array - "tr".
+ */
+ tr = trace_array_get_by_name("sample-instance");
+
+ if (!tr)
+ return -1;
+ /*
+ * If context specific per-cpu buffers havent already been allocated.
+ */
+ trace_printk_init_buffers();
+
+ simple_tsk = kthread_run(simple_thread, NULL, "sample-instance");
+ if (IS_ERR(simple_tsk)) {
+ trace_array_put(tr);
+ trace_array_destroy(tr);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void __exit sample_trace_array_exit(void)
+{
+ kthread_stop(simple_tsk);
+
+ /*
+ * We are unloading our module and no longer require the trace array.
+ * Remove/destroy "tr" using trace_array_destroy()
+ */
+ trace_array_destroy(tr);
+}
+
+module_init(sample_trace_array_init);
+module_exit(sample_trace_array_exit);
+
+MODULE_AUTHOR("Divya Indi");
+MODULE_DESCRIPTION("Sample module for kernel access to Ftrace instances");
+MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/sample-trace-array.h b/samples/ftrace/sample-trace-array.h
new file mode 100644
index 0000000..6f89624
--- /dev/null
+++ b/samples/ftrace/sample-trace-array.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * If TRACE_SYSTEM is defined, that will be the directory created
+ * in the ftrace directory under /sys/kernel/tracing/events/<system>
+ *
+ * The define_trace.h below will also look for a file name of
+ * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
+ * In this case, it would look for sample-trace.h
+ *
+ * If the header name will be different than the system name
+ * (as in this case), then you can override the header name that
+ * define_trace.h will look up by defining TRACE_INCLUDE_FILE
+ *
+ * This file is called sample-trace-array.h but we want the system
+ * to be called "sample-subsystem". Therefore we must define the name of this
+ * file:
+ *
+ * #define TRACE_INCLUDE_FILE sample-trace-array
+ *
+ * As we do in the bottom of this file.
+ *
+ * Notice that TRACE_SYSTEM should be defined outside of #if
+ * protection, just like TRACE_INCLUDE_FILE.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sample-subsystem
+
+/*
+ * TRACE_SYSTEM is expected to be a C valid variable (alpha-numeric
+ * and underscore), although it may start with numbers. If for some
+ * reason it is not, you need to add the following lines:
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR sample_subsystem
+
+/*
+ * But the above is only needed if TRACE_SYSTEM is not alpha-numeric
+ * and underscored. By default, TRACE_SYSTEM_VAR will be equal to
+ * TRACE_SYSTEM. As TRACE_SYSTEM_VAR must be alpha-numeric, if
+ * TRACE_SYSTEM is not, then TRACE_SYSTEM_VAR must be defined with
+ * only alpha-numeric and underscores.
+ *
+ * The TRACE_SYSTEM_VAR is only used internally and not visible to
+ * user space.
+ */
+
+/*
+ * Notice that this file is not protected like a normal header.
+ * We also must allow for rereading of this file. The
+ *
+ * || defined(TRACE_HEADER_MULTI_READ)
+ *
+ * serves this purpose.
+ */
+#if !defined(_SAMPLE_TRACE_ARRAY_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _SAMPLE_TRACE_ARRAY_H
+
+#include <linux/tracepoint.h>
+TRACE_EVENT(sample_event,
+
+ TP_PROTO(int count, unsigned long time),
+
+ TP_ARGS(count, time),
+
+ TP_STRUCT__entry(
+ __field(int, count)
+ __field(unsigned long, time)
+ ),
+
+ TP_fast_assign(
+ __entry->count = count;
+ __entry->time = time;
+ ),
+
+ TP_printk("count value=%d at jiffies=%lu", __entry->count,
+ __entry->time)
+ );
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE sample-trace-array
+#include <trace/define_trace.h>
diff --git a/samples/hidraw/.gitignore b/samples/hidraw/.gitignore
index 05e51a6..d7a6074 100644
--- a/samples/hidraw/.gitignore
+++ b/samples/hidraw/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
hid-example
diff --git a/samples/hidraw/Makefile b/samples/hidraw/Makefile
index dec1b22..594d989 100644
--- a/samples/hidraw/Makefile
+++ b/samples/hidraw/Makefile
@@ -1,10 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-# List of programs to build
-hostprogs-y := hid-example
+userprogs-always-y += hid-example
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_hid-example.o += -I$(objtree)/usr/include
-
-all: hid-example
+userccflags += -I usr/include
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index c585047..418c46f 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -23,7 +23,7 @@
struct perf_event * __percpu *sample_hbp;
-static char ksym_name[KSYM_NAME_LEN] = "pid_max";
+static char ksym_name[KSYM_NAME_LEN] = "jiffies";
module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
" write operations on the kernel symbol");
@@ -41,11 +41,15 @@
{
int ret;
struct perf_event_attr attr;
+ void *addr = __symbol_get(ksym_name);
+
+ if (!addr)
+ return -ENXIO;
hw_breakpoint_init(&attr);
- attr.bp_addr = kallsyms_lookup_name(ksym_name);
+ attr.bp_addr = (unsigned long)addr;
attr.bp_len = HW_BREAKPOINT_LEN_4;
- attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+ attr.bp_type = HW_BREAKPOINT_W;
sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL);
if (IS_ERR((void __force *)sample_hbp)) {
@@ -66,6 +70,7 @@
static void __exit hw_break_module_exit(void)
{
unregister_wide_hw_breakpoint(sample_hbp);
+ symbol_put(ksym_name);
printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
}
diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
index ecae213..5a90aa5 100644
--- a/samples/kfifo/bytestream-example.c
+++ b/samples/kfifo/bytestream-example.c
@@ -146,11 +146,10 @@
return copied;
}
-static const struct file_operations fifo_fops = {
- .owner = THIS_MODULE,
- .read = fifo_read,
- .write = fifo_write,
- .llseek = noop_llseek,
+static const struct proc_ops fifo_proc_ops = {
+ .proc_read = fifo_read,
+ .proc_write = fifo_write,
+ .proc_lseek = noop_llseek,
};
static int __init example_init(void)
@@ -173,7 +172,7 @@
return -EIO;
}
- if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
+ if (proc_create(PROC_FIFO, 0, NULL, &fifo_proc_ops) == NULL) {
#ifdef DYNAMIC
kfifo_free(&test);
#endif
diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
index 7b4489e..e5403d8 100644
--- a/samples/kfifo/inttype-example.c
+++ b/samples/kfifo/inttype-example.c
@@ -139,11 +139,10 @@
return copied;
}
-static const struct file_operations fifo_fops = {
- .owner = THIS_MODULE,
- .read = fifo_read,
- .write = fifo_write,
- .llseek = noop_llseek,
+static const struct proc_ops fifo_proc_ops = {
+ .proc_read = fifo_read,
+ .proc_write = fifo_write,
+ .proc_lseek = noop_llseek,
};
static int __init example_init(void)
@@ -164,7 +163,7 @@
return -EIO;
}
- if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
+ if (proc_create(PROC_FIFO, 0, NULL, &fifo_proc_ops) == NULL) {
#ifdef DYNAMIC
kfifo_free(&test);
#endif
diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c
index eafe083..f64f3d6 100644
--- a/samples/kfifo/record-example.c
+++ b/samples/kfifo/record-example.c
@@ -153,11 +153,10 @@
return copied;
}
-static const struct file_operations fifo_fops = {
- .owner = THIS_MODULE,
- .read = fifo_read,
- .write = fifo_write,
- .llseek = noop_llseek,
+static const struct proc_ops fifo_proc_ops = {
+ .proc_read = fifo_read,
+ .proc_write = fifo_write,
+ .proc_lseek = noop_llseek,
};
static int __init example_init(void)
@@ -180,7 +179,7 @@
return -EIO;
}
- if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
+ if (proc_create(PROC_FIFO, 0, NULL, &fifo_proc_ops) == NULL) {
#ifdef DYNAMIC
kfifo_free(&test);
#endif
diff --git a/samples/kmemleak/Makefile b/samples/kmemleak/Makefile
new file mode 100644
index 0000000..16b6132
--- /dev/null
+++ b/samples/kmemleak/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/samples/kmemleak/kmemleak-test.c b/samples/kmemleak/kmemleak-test.c
new file mode 100644
index 0000000..7b476eb
--- /dev/null
+++ b/samples/kmemleak/kmemleak-test.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * samples/kmemleak/kmemleak-test.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ */
+
+#define pr_fmt(fmt) "kmemleak: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/fdtable.h>
+
+#include <linux/kmemleak.h>
+
+struct test_node {
+ long header[25];
+ struct list_head list;
+ long footer[25];
+};
+
+static LIST_HEAD(test_list);
+static DEFINE_PER_CPU(void *, kmemleak_test_pointer);
+
+/*
+ * Some very simple testing. This function needs to be extended for
+ * proper testing.
+ */
+static int __init kmemleak_test_init(void)
+{
+ struct test_node *elem;
+ int i;
+
+ pr_info("Kmemleak testing\n");
+
+ /* make some orphan objects */
+ pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+ pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+ pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+ pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+ pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+ pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+ pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+ pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+#ifndef CONFIG_MODULES
+ pr_info("kmem_cache_alloc(files_cachep) = %p\n",
+ kmem_cache_alloc(files_cachep, GFP_KERNEL));
+ pr_info("kmem_cache_alloc(files_cachep) = %p\n",
+ kmem_cache_alloc(files_cachep, GFP_KERNEL));
+#endif
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+
+ /*
+ * Add elements to a list. They should only appear as orphan
+ * after the module is removed.
+ */
+ for (i = 0; i < 10; i++) {
+ elem = kzalloc(sizeof(*elem), GFP_KERNEL);
+ pr_info("kzalloc(sizeof(*elem)) = %p\n", elem);
+ if (!elem)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&elem->list);
+ list_add_tail(&elem->list, &test_list);
+ }
+
+ for_each_possible_cpu(i) {
+ per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
+ pr_info("kmalloc(129) = %p\n",
+ per_cpu(kmemleak_test_pointer, i));
+ }
+
+ return 0;
+}
+module_init(kmemleak_test_init);
+
+static void __exit kmemleak_test_exit(void)
+{
+ struct test_node *elem, *tmp;
+
+ /*
+ * Remove the list elements without actually freeing the
+ * memory.
+ */
+ list_for_each_entry_safe(elem, tmp, &test_list, list)
+ list_del(&elem->list);
+}
+module_exit(kmemleak_test_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index d693c23..365905c 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -2,13 +2,13 @@
/*
* NOTE: This example is works on x86 and powerpc.
* Here's a sample kernel module showing the use of kprobes to dump a
- * stack trace and selected registers when _do_fork() is called.
+ * stack trace and selected registers when kernel_clone() is called.
*
* For more information on theory of operation of kprobes, see
- * Documentation/kprobes.txt
+ * Documentation/trace/kprobes.rst
*
* You will see the trace data in /var/log/messages and on the console
- * whenever _do_fork() is invoked to create a new process.
+ * whenever kernel_clone() is invoked to create a new process.
*/
#include <linux/kernel.h>
@@ -16,7 +16,7 @@
#include <linux/kprobes.h>
#define MAX_SYMBOL_LEN 64
-static char symbol[MAX_SYMBOL_LEN] = "_do_fork";
+static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
module_param_string(symbol, symbol, sizeof(symbol), 0644);
/* For each probe you need to allocate a kprobe structure */
@@ -25,7 +25,7 @@
};
/* kprobe pre_handler: called just before the probed instruction is executed */
-static int handler_pre(struct kprobe *p, struct pt_regs *regs)
+static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
{
#ifdef CONFIG_X86
pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n",
@@ -54,7 +54,7 @@
}
/* kprobe post_handler: called after the probed instruction is executed */
-static void handler_post(struct kprobe *p, struct pt_regs *regs,
+static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
unsigned long flags)
{
#ifdef CONFIG_X86
@@ -90,6 +90,8 @@
/* Return 0 because we don't handle the fault. */
return 0;
}
+/* NOKPROBE_SYMBOL() is also available */
+NOKPROBE_SYMBOL(handler_fault);
static int __init kprobe_init(void)
{
diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
index 186315c..228321e 100644
--- a/samples/kprobes/kretprobe_example.c
+++ b/samples/kprobes/kretprobe_example.c
@@ -8,10 +8,10 @@
*
* usage: insmod kretprobe_example.ko func=<func_name>
*
- * If no func_name is specified, _do_fork is instrumented
+ * If no func_name is specified, kernel_clone is instrumented
*
* For more information on theory of operation of kretprobes, see
- * Documentation/kprobes.txt
+ * Documentation/trace/kprobes.rst
*
* Build and insert the kernel module as done in the kprobe example.
* You will see the trace data in /var/log/messages and on the console
@@ -26,7 +26,7 @@
#include <linux/limits.h>
#include <linux/sched.h>
-static char func_name[NAME_MAX] = "_do_fork";
+static char func_name[NAME_MAX] = "kernel_clone";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time");
@@ -48,6 +48,7 @@
data->entry_stamp = ktime_get();
return 0;
}
+NOKPROBE_SYMBOL(entry_handler);
/*
* Return-probe handler: Log the return value and duration. Duration may turn
@@ -67,6 +68,7 @@
func_name, retval, (long long)delta);
return 0;
}
+NOKPROBE_SYMBOL(ret_handler);
static struct kretprobe my_kretprobe = {
.handler = ret_handler,
@@ -84,7 +86,7 @@
ret = register_kretprobe(&my_kretprobe);
if (ret < 0) {
pr_err("register_kretprobe failed, returned %d\n", ret);
- return -1;
+ return ret;
}
pr_info("Planted return probe at %s: %p\n",
my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c
index e89ca45..918ce17 100644
--- a/samples/livepatch/livepatch-shadow-fix1.c
+++ b/samples/livepatch/livepatch-shadow-fix1.c
@@ -52,17 +52,21 @@
*/
static int shadow_leak_ctor(void *obj, void *shadow_data, void *ctor_data)
{
- void **shadow_leak = shadow_data;
- void *leak = ctor_data;
+ int **shadow_leak = shadow_data;
+ int **leak = ctor_data;
- *shadow_leak = leak;
+ if (!ctor_data)
+ return -EINVAL;
+
+ *shadow_leak = *leak;
return 0;
}
static struct dummy *livepatch_fix1_dummy_alloc(void)
{
struct dummy *d;
- void *leak;
+ int *leak;
+ int **shadow_leak;
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
@@ -76,25 +80,34 @@
* variable. A patched dummy_free routine can later fetch this
* pointer to handle resource release.
*/
- leak = kzalloc(sizeof(int), GFP_KERNEL);
- if (!leak) {
- kfree(d);
- return NULL;
- }
+ leak = kzalloc(sizeof(*leak), GFP_KERNEL);
+ if (!leak)
+ goto err_leak;
- klp_shadow_alloc(d, SV_LEAK, sizeof(leak), GFP_KERNEL,
- shadow_leak_ctor, leak);
+ shadow_leak = klp_shadow_alloc(d, SV_LEAK, sizeof(leak), GFP_KERNEL,
+ shadow_leak_ctor, &leak);
+ if (!shadow_leak) {
+ pr_err("%s: failed to allocate shadow variable for the leaking pointer: dummy @ %p, leak @ %p\n",
+ __func__, d, leak);
+ goto err_shadow;
+ }
pr_info("%s: dummy @ %p, expires @ %lx\n",
__func__, d, d->jiffies_expire);
return d;
+
+err_shadow:
+ kfree(leak);
+err_leak:
+ kfree(d);
+ return NULL;
}
static void livepatch_fix1_dummy_leak_dtor(void *obj, void *shadow_data)
{
void *d = obj;
- void **shadow_leak = shadow_data;
+ int **shadow_leak = shadow_data;
kfree(*shadow_leak);
pr_info("%s: dummy @ %p, prevented leak @ %p\n",
@@ -103,7 +116,7 @@
static void livepatch_fix1_dummy_free(struct dummy *d)
{
- void **shadow_leak;
+ int **shadow_leak;
/*
* Patch: fetch the saved SV_LEAK shadow variable, detach and
diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c
index 50d223b..29fe5cd 100644
--- a/samples/livepatch/livepatch-shadow-fix2.c
+++ b/samples/livepatch/livepatch-shadow-fix2.c
@@ -59,7 +59,7 @@
static void livepatch_fix2_dummy_leak_dtor(void *obj, void *shadow_data)
{
void *d = obj;
- void **shadow_leak = shadow_data;
+ int **shadow_leak = shadow_data;
kfree(*shadow_leak);
pr_info("%s: dummy @ %p, prevented leak @ %p\n",
@@ -68,7 +68,7 @@
static void livepatch_fix2_dummy_free(struct dummy *d)
{
- void **shadow_leak;
+ int **shadow_leak;
int *shadow_count;
/* Patch: copy the memory leak patch from the fix1 module. */
diff --git a/samples/livepatch/livepatch-shadow-mod.c b/samples/livepatch/livepatch-shadow-mod.c
index ecfe83a..7e753b0 100644
--- a/samples/livepatch/livepatch-shadow-mod.c
+++ b/samples/livepatch/livepatch-shadow-mod.c
@@ -95,7 +95,7 @@
static __used noinline struct dummy *dummy_alloc(void)
{
struct dummy *d;
- void *leak;
+ int *leak;
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
@@ -105,7 +105,7 @@
msecs_to_jiffies(1000 * EXPIRE_PERIOD);
/* Oops, forgot to save leak! */
- leak = kzalloc(sizeof(int), GFP_KERNEL);
+ leak = kzalloc(sizeof(*leak), GFP_KERNEL);
if (!leak) {
kfree(d);
return NULL;
diff --git a/samples/mei/.gitignore b/samples/mei/.gitignore
index f356b81..db5e802 100644
--- a/samples/mei/.gitignore
+++ b/samples/mei/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
mei-amt-version
diff --git a/samples/mei/Makefile b/samples/mei/Makefile
index c7e52e9..c54b8a0 100644
--- a/samples/mei/Makefile
+++ b/samples/mei/Makefile
@@ -1,10 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-CC := $(CROSS_COMPILE)gcc
-CFLAGS := -I../../usr/include
+# Copyright (c) 2012-2019, Intel Corporation. All rights reserved.
+userprogs-always-y += mei-amt-version
-PROGS := mei-amt-version
-
-all: $(PROGS)
-
-clean:
- rm -fr $(PROGS)
+userccflags += -I usr/include
diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c
index 3223448..ad3e560 100644
--- a/samples/mei/mei-amt-version.c
+++ b/samples/mei/mei-amt-version.c
@@ -267,7 +267,7 @@
struct amt_host_if_resp_header {
struct amt_host_if_msg_header header;
uint32_t status;
- unsigned char data[0];
+ unsigned char data[];
} __attribute__((packed));
const uuid_le MEI_IAMTHIF = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d, \
diff --git a/samples/mic/mpssd/.gitignore b/samples/mic/mpssd/.gitignore
deleted file mode 100644
index 8b7c72f..0000000
--- a/samples/mic/mpssd/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-mpssd
diff --git a/samples/mic/mpssd/Makefile b/samples/mic/mpssd/Makefile
deleted file mode 100644
index a7a6e0c..0000000
--- a/samples/mic/mpssd/Makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ifndef CROSS_COMPILE
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-
-ifeq ($(ARCH),x86)
-
-PROGS := mpssd
-CC = $(CROSS_COMPILE)gcc
-CFLAGS := -I../../../usr/include -I../../../tools/include
-
-ifdef DEBUG
-CFLAGS += -DDEBUG=$(DEBUG)
-endif
-
-all: $(PROGS)
-mpssd: mpssd.c sysfs.c
- $(CC) $(CFLAGS) mpssd.c sysfs.c -o mpssd -lpthread
-
-install:
- install mpssd /usr/sbin/mpssd
- install micctrl /usr/sbin/micctrl
-
-clean:
- rm -fr $(PROGS)
-
-endif
-endif
diff --git a/samples/mic/mpssd/micctrl b/samples/mic/mpssd/micctrl
deleted file mode 100755
index 030a60b..0000000
--- a/samples/mic/mpssd/micctrl
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-# Intel MIC Platform Software Stack (MPSS)
-#
-# Copyright(c) 2013 Intel Corporation.
-#
-# Intel MIC User Space Tools.
-#
-# micctrl - Controls MIC boot/start/stop.
-#
-# chkconfig: 2345 95 05
-# description: start MPSS stack processing.
-#
-### BEGIN INIT INFO
-# Provides: micctrl
-### END INIT INFO
-
-# Source function library.
-. /etc/init.d/functions
-
-sysfs="/sys/class/mic"
-
-_status()
-{
- f=$sysfs/$1
- echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`"
-}
-
-status()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _status $1
- return $?
- fi
- for f in $sysfs/*
- do
- _status `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-_reset()
-{
- f=$sysfs/$1
- echo reset > $f/state
-}
-
-reset()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _reset $1
- return $?
- fi
- for f in $sysfs/*
- do
- _reset `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-_boot()
-{
- f=$sysfs/$1
- echo "linux" > $f/bootmode
- echo "mic/uos.img" > $f/firmware
- echo "mic/$1.image" > $f/ramdisk
- echo "boot" > $f/state
-}
-
-boot()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _boot $1
- return $?
- fi
- for f in $sysfs/*
- do
- _boot `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-_shutdown()
-{
- f=$sysfs/$1
- echo shutdown > $f/state
-}
-
-shutdown()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _shutdown $1
- return $?
- fi
- for f in $sysfs/*
- do
- _shutdown `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-_wait()
-{
- f=$sysfs/$1
- while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ]
- do
- sleep 1
- echo -e "Waiting for $1 to go offline"
- done
-}
-
-wait()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _wait $1
- return $?
- fi
- # Wait for the cards to go offline
- for f in $sysfs/*
- do
- _wait `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-if [ ! -d "$sysfs" ]; then
- echo -e $"Module unloaded "
- exit 3
-fi
-
-case $1 in
- -s)
- status $2
- ;;
- -r)
- reset $2
- ;;
- -b)
- boot $2
- ;;
- -S)
- shutdown $2
- ;;
- -w)
- wait $2
- ;;
- *)
- echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}"
- exit 2
-esac
-
-exit $?
diff --git a/samples/mic/mpssd/mpss b/samples/mic/mpssd/mpss
deleted file mode 100755
index 248ac73..0000000
--- a/samples/mic/mpssd/mpss
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-# Intel MIC Platform Software Stack (MPSS)
-#
-# Copyright(c) 2013 Intel Corporation.
-#
-# Intel MIC User Space Tools.
-#
-# mpss Start mpssd.
-#
-# chkconfig: 2345 95 05
-# description: start MPSS stack processing.
-#
-### BEGIN INIT INFO
-# Provides: mpss
-# Required-Start:
-# Required-Stop:
-# Short-Description: MPSS stack control
-# Description: MPSS stack control
-### END INIT INFO
-
-# Source function library.
-. /etc/init.d/functions
-
-exec=/usr/sbin/mpssd
-sysfs="/sys/class/mic"
-mic_modules="mic_host mic_x100_dma scif vop"
-
-start()
-{
- [ -x $exec ] || exit 5
-
- if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then
- echo -e $"MPSSD already running! "
- success
- echo
- return 0
- fi
-
- echo -e $"Starting MPSS Stack"
- echo -e $"Loading MIC drivers:" $mic_modules
-
- modprobe -a $mic_modules
- RETVAL=$?
- if [ $RETVAL -ne 0 ]; then
- failure
- echo
- return $RETVAL
- fi
-
- # Start the daemon
- echo -n $"Starting MPSSD "
- $exec
- RETVAL=$?
- if [ $RETVAL -ne 0 ]; then
- failure
- echo
- return $RETVAL
- fi
- success
- echo
-
- sleep 5
-
- # Boot the cards
- micctrl -b
-
- # Wait till ping works
- for f in $sysfs/*
- do
- count=100
- ipaddr=`cat $f/cmdline`
- ipaddr=${ipaddr#*address,}
- ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1`
- while [ $count -ge 0 ]
- do
- echo -e "Pinging "`basename $f`" "
- ping -c 1 $ipaddr &> /dev/null
- RETVAL=$?
- if [ $RETVAL -eq 0 ]; then
- success
- break
- fi
- sleep 1
- count=`expr $count - 1`
- done
- [ $RETVAL -ne 0 ] && failure || success
- echo
- done
- return $RETVAL
-}
-
-stop()
-{
- echo -e $"Shutting down MPSS Stack: "
-
- # Bail out if module is unloaded
- if [ ! -d "$sysfs" ]; then
- echo -n $"Module unloaded "
- success
- echo
- return 0
- fi
-
- # Shut down the cards.
- micctrl -S
-
- # Wait for the cards to go offline
- for f in $sysfs/*
- do
- while [ "`cat $f/state`" != "ready" ]
- do
- sleep 1
- echo -e "Waiting for "`basename $f`" to become ready"
- done
- done
-
- # Display the status of the cards
- micctrl -s
-
- # Kill MPSSD now
- echo -n $"Killing MPSSD"
- killall -9 mpssd 2>/dev/null
- RETVAL=$?
- [ $RETVAL -ne 0 ] && failure || success
- echo
- return $RETVAL
-}
-
-restart()
-{
- stop
- sleep 5
- start
-}
-
-status()
-{
- micctrl -s
- if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then
- echo "mpssd is running"
- else
- echo "mpssd is stopped"
- fi
- return 0
-}
-
-unload()
-{
- if [ ! -d "$sysfs" ]; then
- echo -n $"No MIC_HOST Module: "
- success
- echo
- return
- fi
-
- stop
-
- sleep 5
- echo -n $"Removing MIC drivers:" $mic_modules
- modprobe -r $mic_modules
- RETVAL=$?
- [ $RETVAL -ne 0 ] && failure || success
- echo
- return $RETVAL
-}
-
-case $1 in
- start)
- start
- ;;
- stop)
- stop
- ;;
- restart)
- restart
- ;;
- status)
- status
- ;;
- unload)
- unload
- ;;
- *)
- echo $"Usage: $0 {start|stop|restart|status|unload}"
- exit 2
-esac
-
-exit $?
diff --git a/samples/mic/mpssd/mpssd.c b/samples/mic/mpssd/mpssd.c
deleted file mode 100644
index cd3f16a..0000000
--- a/samples/mic/mpssd/mpssd.c
+++ /dev/null
@@ -1,1815 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-
-#define _GNU_SOURCE
-
-#include <stdlib.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <assert.h>
-#include <unistd.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <poll.h>
-#include <features.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <linux/virtio_ring.h>
-#include <linux/virtio_net.h>
-#include <linux/virtio_console.h>
-#include <linux/virtio_blk.h>
-#include <linux/version.h>
-#include "mpssd.h"
-#include <linux/mic_ioctl.h>
-#include <linux/mic_common.h>
-#include <tools/endian.h>
-
-static void *init_mic(void *arg);
-
-static FILE *logfp;
-static struct mic_info mic_list;
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define min_t(type, x, y) ({ \
- type __min1 = (x); \
- type __min2 = (y); \
- __min1 < __min2 ? __min1 : __min2; })
-
-/* align addr on a size boundary - adjust address up/down if needed */
-#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
-#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
-
-/* align addr on a size boundary - adjust address up if needed */
-#define _ALIGN(addr, size) _ALIGN_UP(addr, size)
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
-
-#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
-
-#define GSO_ENABLED 1
-#define MAX_GSO_SIZE (64 * 1024)
-#define ETH_H_LEN 14
-#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
-#define MIC_DEVICE_PAGE_END 0x1000
-
-#ifndef VIRTIO_NET_HDR_F_DATA_VALID
-#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
-#endif
-
-static struct {
- struct mic_device_desc dd;
- struct mic_vqconfig vqconfig[2];
- __u32 host_features, guest_acknowledgements;
- struct virtio_console_config cons_config;
-} virtcons_dev_page = {
- .dd = {
- .type = VIRTIO_ID_CONSOLE,
- .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
- .feature_len = sizeof(virtcons_dev_page.host_features),
- .config_len = sizeof(virtcons_dev_page.cons_config),
- },
- .vqconfig[0] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
- .vqconfig[1] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
-};
-
-static struct {
- struct mic_device_desc dd;
- struct mic_vqconfig vqconfig[2];
- __u32 host_features, guest_acknowledgements;
- struct virtio_net_config net_config;
-} virtnet_dev_page = {
- .dd = {
- .type = VIRTIO_ID_NET,
- .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
- .feature_len = sizeof(virtnet_dev_page.host_features),
- .config_len = sizeof(virtnet_dev_page.net_config),
- },
- .vqconfig[0] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
- .vqconfig[1] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
-#if GSO_ENABLED
- .host_features = htole32(
- 1 << VIRTIO_NET_F_CSUM |
- 1 << VIRTIO_NET_F_GSO |
- 1 << VIRTIO_NET_F_GUEST_TSO4 |
- 1 << VIRTIO_NET_F_GUEST_TSO6 |
- 1 << VIRTIO_NET_F_GUEST_ECN),
-#else
- .host_features = 0,
-#endif
-};
-
-static const char *mic_config_dir = "/etc/mpss";
-static const char *virtblk_backend = "VIRTBLK_BACKEND";
-static struct {
- struct mic_device_desc dd;
- struct mic_vqconfig vqconfig[1];
- __u32 host_features, guest_acknowledgements;
- struct virtio_blk_config blk_config;
-} virtblk_dev_page = {
- .dd = {
- .type = VIRTIO_ID_BLOCK,
- .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
- .feature_len = sizeof(virtblk_dev_page.host_features),
- .config_len = sizeof(virtblk_dev_page.blk_config),
- },
- .vqconfig[0] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
- .host_features =
- htole32(1<<VIRTIO_BLK_F_SEG_MAX),
- .blk_config = {
- .seg_max = htole32(MIC_VRING_ENTRIES - 2),
- .capacity = htole64(0),
- }
-};
-
-static char *myname;
-
-static int
-tap_configure(struct mic_info *mic, char *dev)
-{
- pid_t pid;
- char *ifargv[7];
- char ipaddr[IFNAMSIZ];
- int ret = 0;
-
- pid = fork();
- if (pid == 0) {
- ifargv[0] = "ip";
- ifargv[1] = "link";
- ifargv[2] = "set";
- ifargv[3] = dev;
- ifargv[4] = "up";
- ifargv[5] = NULL;
- mpsslog("Configuring %s\n", dev);
- ret = execvp("ip", ifargv);
- if (ret < 0) {
- mpsslog("%s execvp failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
- }
- if (pid < 0) {
- mpsslog("%s fork failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
-
- ret = waitpid(pid, NULL, 0);
- if (ret < 0) {
- mpsslog("%s waitpid failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
-
- snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
-
- pid = fork();
- if (pid == 0) {
- ifargv[0] = "ip";
- ifargv[1] = "addr";
- ifargv[2] = "add";
- ifargv[3] = ipaddr;
- ifargv[4] = "dev";
- ifargv[5] = dev;
- ifargv[6] = NULL;
- mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
- ret = execvp("ip", ifargv);
- if (ret < 0) {
- mpsslog("%s execvp failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
- }
- if (pid < 0) {
- mpsslog("%s fork failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
-
- ret = waitpid(pid, NULL, 0);
- if (ret < 0) {
- mpsslog("%s waitpid failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
- mpsslog("MIC name %s %s %d DONE!\n",
- mic->name, __func__, __LINE__);
- return 0;
-}
-
-static int tun_alloc(struct mic_info *mic, char *dev)
-{
- struct ifreq ifr;
- int fd, err;
-#if GSO_ENABLED
- unsigned offload;
-#endif
- fd = open("/dev/net/tun", O_RDWR);
- if (fd < 0) {
- mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
- goto done;
- }
-
- memset(&ifr, 0, sizeof(ifr));
-
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
- if (*dev)
- strncpy(ifr.ifr_name, dev, IFNAMSIZ);
-
- err = ioctl(fd, TUNSETIFF, (void *)&ifr);
- if (err < 0) {
- mpsslog("%s %s %d TUNSETIFF failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- close(fd);
- return err;
- }
-#if GSO_ENABLED
- offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
-
- err = ioctl(fd, TUNSETOFFLOAD, offload);
- if (err < 0) {
- mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- close(fd);
- return err;
- }
-#endif
- strcpy(dev, ifr.ifr_name);
- mpsslog("Created TAP %s\n", dev);
-done:
- return fd;
-}
-
-#define NET_FD_VIRTIO_NET 0
-#define NET_FD_TUN 1
-#define MAX_NET_FD 2
-
-static void set_dp(struct mic_info *mic, int type, void *dp)
-{
- switch (type) {
- case VIRTIO_ID_CONSOLE:
- mic->mic_console.console_dp = dp;
- return;
- case VIRTIO_ID_NET:
- mic->mic_net.net_dp = dp;
- return;
- case VIRTIO_ID_BLOCK:
- mic->mic_virtblk.block_dp = dp;
- return;
- }
- mpsslog("%s %s %d not found\n", mic->name, __func__, type);
- assert(0);
-}
-
-static void *get_dp(struct mic_info *mic, int type)
-{
- switch (type) {
- case VIRTIO_ID_CONSOLE:
- return mic->mic_console.console_dp;
- case VIRTIO_ID_NET:
- return mic->mic_net.net_dp;
- case VIRTIO_ID_BLOCK:
- return mic->mic_virtblk.block_dp;
- }
- mpsslog("%s %s %d not found\n", mic->name, __func__, type);
- assert(0);
- return NULL;
-}
-
-static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
-{
- struct mic_device_desc *d;
- int i;
- void *dp = get_dp(mic, type);
-
- for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
- i += mic_total_desc_size(d)) {
- d = dp + i;
-
- /* End of list */
- if (d->type == 0)
- break;
-
- if (d->type == -1)
- continue;
-
- mpsslog("%s %s d-> type %d d %p\n",
- mic->name, __func__, d->type, d);
-
- if (d->type == (__u8)type)
- return d;
- }
- mpsslog("%s %s %d not found\n", mic->name, __func__, type);
- return NULL;
-}
-
-/* See comments in vhost.c for explanation of next_desc() */
-static unsigned next_desc(struct vring_desc *desc)
-{
- unsigned int next;
-
- if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
- return -1U;
- next = le16toh(desc->next);
- return next;
-}
-
-/* Sum up all the IOVEC length */
-static ssize_t
-sum_iovec_len(struct mic_copy_desc *copy)
-{
- ssize_t sum = 0;
- unsigned int i;
-
- for (i = 0; i < copy->iovcnt; i++)
- sum += copy->iov[i].iov_len;
- return sum;
-}
-
-static inline void verify_out_len(struct mic_info *mic,
- struct mic_copy_desc *copy)
-{
- if (copy->out_len != sum_iovec_len(copy)) {
- mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
- mic->name, __func__, __LINE__,
- copy->out_len, sum_iovec_len(copy));
- assert(copy->out_len == sum_iovec_len(copy));
- }
-}
-
-/* Display an iovec */
-static void
-disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
- const char *s, int line)
-{
- unsigned int i;
-
- for (i = 0; i < copy->iovcnt; i++)
- mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
- mic->name, s, line, i,
- copy->iov[i].iov_base, copy->iov[i].iov_len);
-}
-
-static inline __u16 read_avail_idx(struct mic_vring *vr)
-{
- return READ_ONCE(vr->info->avail_idx);
-}
-
-static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
- struct mic_copy_desc *copy, ssize_t len)
-{
- copy->vr_idx = tx ? 0 : 1;
- copy->update_used = true;
- if (type == VIRTIO_ID_NET)
- copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
- else
- copy->iov[0].iov_len = len;
-}
-
-/* Central API which triggers the copies */
-static int
-mic_virtio_copy(struct mic_info *mic, int fd,
- struct mic_vring *vr, struct mic_copy_desc *copy)
-{
- int ret;
-
- ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
- if (ret) {
- mpsslog("%s %s %d errno %s ret %d\n",
- mic->name, __func__, __LINE__,
- strerror(errno), ret);
- }
- return ret;
-}
-
-static inline unsigned _vring_size(unsigned int num, unsigned long align)
-{
- return _ALIGN_UP(((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
- + align - 1) & ~(align - 1))
- + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num, 4);
-}
-
-/*
- * This initialization routine requires at least one
- * vring i.e. vr0. vr1 is optional.
- */
-static void *
-init_vr(struct mic_info *mic, int fd, int type,
- struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
-{
- int vr_size;
- char *va;
-
- vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
- MIC_VIRTIO_RING_ALIGN) +
- sizeof(struct _mic_vring_info));
- va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
- PROT_READ, MAP_SHARED, fd, 0);
- if (MAP_FAILED == va) {
- mpsslog("%s %s %d mmap failed errno %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- goto done;
- }
- set_dp(mic, type, va);
- vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
- vr0->info = vr0->va +
- _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
- vring_init(&vr0->vr,
- MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
- mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
- __func__, mic->name, vr0->va, vr0->info, vr_size,
- _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
- mpsslog("magic 0x%x expected 0x%x\n",
- le32toh(vr0->info->magic), MIC_MAGIC + type);
- assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
- if (vr1) {
- vr1->va = (struct mic_vring *)
- &va[MIC_DEVICE_PAGE_END + vr_size];
- vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
- MIC_VIRTIO_RING_ALIGN);
- vring_init(&vr1->vr,
- MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
- mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
- __func__, mic->name, vr1->va, vr1->info, vr_size,
- _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
- mpsslog("magic 0x%x expected 0x%x\n",
- le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
- assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
- }
-done:
- return va;
-}
-
-static int
-wait_for_card_driver(struct mic_info *mic, int fd, int type)
-{
- struct pollfd pollfd;
- int err;
- struct mic_device_desc *desc = get_device_desc(mic, type);
- __u8 prev_status;
-
- if (!desc)
- return -ENODEV;
- prev_status = desc->status;
- pollfd.fd = fd;
- mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
- mic->name, __func__, type, desc->status);
-
- while (1) {
- pollfd.events = POLLIN;
- pollfd.revents = 0;
- err = poll(&pollfd, 1, -1);
- if (err < 0) {
- mpsslog("%s %s poll failed %s\n",
- mic->name, __func__, strerror(errno));
- continue;
- }
-
- if (pollfd.revents) {
- if (desc->status != prev_status) {
- mpsslog("%s %s Waiting... desc-> type %d "
- "status 0x%x\n",
- mic->name, __func__, type,
- desc->status);
- prev_status = desc->status;
- }
- if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
- mpsslog("%s %s poll.revents %d\n",
- mic->name, __func__, pollfd.revents);
- mpsslog("%s %s desc-> type %d status 0x%x\n",
- mic->name, __func__, type,
- desc->status);
- break;
- }
- }
- }
- return 0;
-}
-
-/* Spin till we have some descriptors */
-static void
-spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
-{
- __u16 avail_idx = read_avail_idx(vr);
-
- while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) {
-#ifdef DEBUG
- mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
- mic->name, __func__,
- le16toh(vr->vr.avail->idx), vr->info->avail_idx);
-#endif
- sched_yield();
- }
-}
-
-static void *
-virtio_net(void *arg)
-{
- static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
- static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
- struct iovec vnet_iov[2][2] = {
- { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
- { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
- { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
- { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
- };
- struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
- struct mic_info *mic = (struct mic_info *)arg;
- char if_name[IFNAMSIZ];
- struct pollfd net_poll[MAX_NET_FD];
- struct mic_vring tx_vr, rx_vr;
- struct mic_copy_desc copy;
- struct mic_device_desc *desc;
- int err;
-
- snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
- mic->mic_net.tap_fd = tun_alloc(mic, if_name);
- if (mic->mic_net.tap_fd < 0)
- goto done;
-
- if (tap_configure(mic, if_name))
- goto done;
- mpsslog("MIC name %s id %d\n", mic->name, mic->id);
-
- net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
- net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
- net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
- net_poll[NET_FD_TUN].events = POLLIN;
-
- if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
- VIRTIO_ID_NET, &tx_vr, &rx_vr,
- virtnet_dev_page.dd.num_vq)) {
- mpsslog("%s init_vr failed %s\n",
- mic->name, strerror(errno));
- goto done;
- }
-
- copy.iovcnt = 2;
- desc = get_device_desc(mic, VIRTIO_ID_NET);
-
- while (1) {
- ssize_t len;
-
- net_poll[NET_FD_VIRTIO_NET].revents = 0;
- net_poll[NET_FD_TUN].revents = 0;
-
- /* Start polling for data from tap and virtio net */
- err = poll(net_poll, 2, -1);
- if (err < 0) {
- mpsslog("%s poll failed %s\n",
- __func__, strerror(errno));
- continue;
- }
- if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- err = wait_for_card_driver(mic,
- mic->mic_net.virtio_net_fd,
- VIRTIO_ID_NET);
- if (err) {
- mpsslog("%s %s %d Exiting...\n",
- mic->name, __func__, __LINE__);
- break;
- }
- }
- /*
- * Check if there is data to be read from TUN and write to
- * virtio net fd if there is.
- */
- if (net_poll[NET_FD_TUN].revents & POLLIN) {
- copy.iov = iov0;
- len = readv(net_poll[NET_FD_TUN].fd,
- copy.iov, copy.iovcnt);
- if (len > 0) {
- struct virtio_net_hdr *hdr
- = (struct virtio_net_hdr *)vnet_hdr[0];
-
- /* Disable checksums on the card since we are on
- a reliable PCIe link */
- hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
-#ifdef DEBUG
- mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
- __func__, __LINE__, hdr->flags);
- mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
- copy.out_len, hdr->gso_type);
-#endif
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__, __LINE__);
- mpsslog("%s %s %d read from tap 0x%lx\n",
- mic->name, __func__, __LINE__,
- len);
-#endif
- spin_for_descriptors(mic, &tx_vr);
- txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©,
- len);
-
- err = mic_virtio_copy(mic,
- mic->mic_net.virtio_net_fd, &tx_vr,
- ©);
- if (err < 0) {
- mpsslog("%s %s %d mic_virtio_copy %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- }
- if (!err)
- verify_out_len(mic, ©);
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__, __LINE__);
- mpsslog("%s %s %d wrote to net 0x%lx\n",
- mic->name, __func__, __LINE__,
- sum_iovec_len(©));
-#endif
- /* Reinitialize IOV for next run */
- iov0[1].iov_len = MAX_NET_PKT_SIZE;
- } else if (len < 0) {
- disp_iovec(mic, ©, __func__, __LINE__);
- mpsslog("%s %s %d read failed %s ", mic->name,
- __func__, __LINE__, strerror(errno));
- mpsslog("cnt %d sum %zd\n",
- copy.iovcnt, sum_iovec_len(©));
- }
- }
-
- /*
- * Check if there is data to be read from virtio net and
- * write to TUN if there is.
- */
- if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
- while (rx_vr.info->avail_idx !=
- le16toh(rx_vr.vr.avail->idx)) {
- copy.iov = iov1;
- txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©,
- MAX_NET_PKT_SIZE
- + sizeof(struct virtio_net_hdr));
-
- err = mic_virtio_copy(mic,
- mic->mic_net.virtio_net_fd, &rx_vr,
- ©);
- if (!err) {
-#ifdef DEBUG
- struct virtio_net_hdr *hdr
- = (struct virtio_net_hdr *)
- vnet_hdr[1];
-
- mpsslog("%s %s %d hdr->flags 0x%x, ",
- mic->name, __func__, __LINE__,
- hdr->flags);
- mpsslog("out_len %d gso_type 0x%x\n",
- copy.out_len,
- hdr->gso_type);
-#endif
- /* Set the correct output iov_len */
- iov1[1].iov_len = copy.out_len -
- sizeof(struct virtio_net_hdr);
- verify_out_len(mic, ©);
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__,
- __LINE__);
- mpsslog("%s %s %d ",
- mic->name, __func__, __LINE__);
- mpsslog("read from net 0x%lx\n",
- sum_iovec_len(copy));
-#endif
- len = writev(net_poll[NET_FD_TUN].fd,
- copy.iov, copy.iovcnt);
- if (len != sum_iovec_len(©)) {
- mpsslog("Tun write failed %s ",
- strerror(errno));
- mpsslog("len 0x%zx ", len);
- mpsslog("read_len 0x%zx\n",
- sum_iovec_len(©));
- } else {
-#ifdef DEBUG
- disp_iovec(mic, ©, __func__,
- __LINE__);
- mpsslog("%s %s %d ",
- mic->name, __func__,
- __LINE__);
- mpsslog("wrote to tap 0x%lx\n",
- len);
-#endif
- }
- } else {
- mpsslog("%s %s %d mic_virtio_copy %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- break;
- }
- }
- }
- if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
- mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
- }
-done:
- pthread_exit(NULL);
-}
-
-/* virtio_console */
-#define VIRTIO_CONSOLE_FD 0
-#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
-#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
-#define MAX_BUFFER_SIZE PAGE_SIZE
-
-static void *
-virtio_console(void *arg)
-{
- static __u8 vcons_buf[2][PAGE_SIZE];
- struct iovec vcons_iov[2] = {
- { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
- { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
- };
- struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
- struct mic_info *mic = (struct mic_info *)arg;
- int err;
- struct pollfd console_poll[MAX_CONSOLE_FD];
- int pty_fd;
- char *pts_name;
- ssize_t len;
- struct mic_vring tx_vr, rx_vr;
- struct mic_copy_desc copy;
- struct mic_device_desc *desc;
-
- pty_fd = posix_openpt(O_RDWR);
- if (pty_fd < 0) {
- mpsslog("can't open a pseudoterminal master device: %s\n",
- strerror(errno));
- goto _return;
- }
- pts_name = ptsname(pty_fd);
- if (pts_name == NULL) {
- mpsslog("can't get pts name\n");
- goto _close_pty;
- }
- printf("%s console message goes to %s\n", mic->name, pts_name);
- mpsslog("%s console message goes to %s\n", mic->name, pts_name);
- err = grantpt(pty_fd);
- if (err < 0) {
- mpsslog("can't grant access: %s %s\n",
- pts_name, strerror(errno));
- goto _close_pty;
- }
- err = unlockpt(pty_fd);
- if (err < 0) {
- mpsslog("can't unlock a pseudoterminal: %s %s\n",
- pts_name, strerror(errno));
- goto _close_pty;
- }
- console_poll[MONITOR_FD].fd = pty_fd;
- console_poll[MONITOR_FD].events = POLLIN;
-
- console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
- console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
-
- if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
- VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
- virtcons_dev_page.dd.num_vq)) {
- mpsslog("%s init_vr failed %s\n",
- mic->name, strerror(errno));
- goto _close_pty;
- }
-
- copy.iovcnt = 1;
- desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
-
- for (;;) {
- console_poll[MONITOR_FD].revents = 0;
- console_poll[VIRTIO_CONSOLE_FD].revents = 0;
- err = poll(console_poll, MAX_CONSOLE_FD, -1);
- if (err < 0) {
- mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
- strerror(errno));
- continue;
- }
- if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- err = wait_for_card_driver(mic,
- mic->mic_console.virtio_console_fd,
- VIRTIO_ID_CONSOLE);
- if (err) {
- mpsslog("%s %s %d Exiting...\n",
- mic->name, __func__, __LINE__);
- break;
- }
- }
-
- if (console_poll[MONITOR_FD].revents & POLLIN) {
- copy.iov = iov0;
- len = readv(pty_fd, copy.iov, copy.iovcnt);
- if (len > 0) {
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__, __LINE__);
- mpsslog("%s %s %d read from tap 0x%lx\n",
- mic->name, __func__, __LINE__,
- len);
-#endif
- spin_for_descriptors(mic, &tx_vr);
- txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
- ©, len);
-
- err = mic_virtio_copy(mic,
- mic->mic_console.virtio_console_fd,
- &tx_vr, ©);
- if (err < 0) {
- mpsslog("%s %s %d mic_virtio_copy %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- }
- if (!err)
- verify_out_len(mic, ©);
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__, __LINE__);
- mpsslog("%s %s %d wrote to net 0x%lx\n",
- mic->name, __func__, __LINE__,
- sum_iovec_len(copy));
-#endif
- /* Reinitialize IOV for next run */
- iov0->iov_len = PAGE_SIZE;
- } else if (len < 0) {
- disp_iovec(mic, ©, __func__, __LINE__);
- mpsslog("%s %s %d read failed %s ",
- mic->name, __func__, __LINE__,
- strerror(errno));
- mpsslog("cnt %d sum %zd\n",
- copy.iovcnt, sum_iovec_len(©));
- }
- }
-
- if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
- while (rx_vr.info->avail_idx !=
- le16toh(rx_vr.vr.avail->idx)) {
- copy.iov = iov1;
- txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
- ©, PAGE_SIZE);
-
- err = mic_virtio_copy(mic,
- mic->mic_console.virtio_console_fd,
- &rx_vr, ©);
- if (!err) {
- /* Set the correct output iov_len */
- iov1->iov_len = copy.out_len;
- verify_out_len(mic, ©);
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__,
- __LINE__);
- mpsslog("%s %s %d ",
- mic->name, __func__, __LINE__);
- mpsslog("read from net 0x%lx\n",
- sum_iovec_len(copy));
-#endif
- len = writev(pty_fd,
- copy.iov, copy.iovcnt);
- if (len != sum_iovec_len(©)) {
- mpsslog("Tun write failed %s ",
- strerror(errno));
- mpsslog("len 0x%zx ", len);
- mpsslog("read_len 0x%zx\n",
- sum_iovec_len(©));
- } else {
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__,
- __LINE__);
- mpsslog("%s %s %d ",
- mic->name, __func__,
- __LINE__);
- mpsslog("wrote to tap 0x%lx\n",
- len);
-#endif
- }
- } else {
- mpsslog("%s %s %d mic_virtio_copy %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- break;
- }
- }
- }
- if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
- mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
- }
-_close_pty:
- close(pty_fd);
-_return:
- pthread_exit(NULL);
-}
-
-static void
-add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
-{
- char path[PATH_MAX];
- int fd, err;
-
- snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
- fd = open(path, O_RDWR);
- if (fd < 0) {
- mpsslog("Could not open %s %s\n", path, strerror(errno));
- return;
- }
-
- err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
- if (err < 0) {
- mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
- close(fd);
- return;
- }
- switch (dd->type) {
- case VIRTIO_ID_NET:
- mic->mic_net.virtio_net_fd = fd;
- mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
- break;
- case VIRTIO_ID_CONSOLE:
- mic->mic_console.virtio_console_fd = fd;
- mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
- break;
- case VIRTIO_ID_BLOCK:
- mic->mic_virtblk.virtio_block_fd = fd;
- mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
- break;
- }
-}
-
-static bool
-set_backend_file(struct mic_info *mic)
-{
- FILE *config;
- char buff[PATH_MAX], *line, *evv, *p;
-
- snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
- config = fopen(buff, "r");
- if (config == NULL)
- return false;
- do { /* look for "virtblk_backend=XXXX" */
- line = fgets(buff, PATH_MAX, config);
- if (line == NULL)
- break;
- if (*line == '#')
- continue;
- p = strchr(line, '\n');
- if (p)
- *p = '\0';
- } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
- fclose(config);
- if (line == NULL)
- return false;
- evv = strchr(line, '=');
- if (evv == NULL)
- return false;
- mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
- if (mic->mic_virtblk.backend_file == NULL) {
- mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
- return false;
- }
- strcpy(mic->mic_virtblk.backend_file, evv + 1);
- return true;
-}
-
-#define SECTOR_SIZE 512
-static bool
-set_backend_size(struct mic_info *mic)
-{
- mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
- SEEK_END);
- if (mic->mic_virtblk.backend_size < 0) {
- mpsslog("%s: can't seek: %s\n",
- mic->name, mic->mic_virtblk.backend_file);
- return false;
- }
- virtblk_dev_page.blk_config.capacity =
- mic->mic_virtblk.backend_size / SECTOR_SIZE;
- if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
- virtblk_dev_page.blk_config.capacity++;
-
- virtblk_dev_page.blk_config.capacity =
- htole64(virtblk_dev_page.blk_config.capacity);
-
- return true;
-}
-
-static bool
-open_backend(struct mic_info *mic)
-{
- if (!set_backend_file(mic))
- goto _error_exit;
- mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
- if (mic->mic_virtblk.backend < 0) {
- mpsslog("%s: can't open: %s\n", mic->name,
- mic->mic_virtblk.backend_file);
- goto _error_free;
- }
- if (!set_backend_size(mic))
- goto _error_close;
- mic->mic_virtblk.backend_addr = mmap(NULL,
- mic->mic_virtblk.backend_size,
- PROT_READ|PROT_WRITE, MAP_SHARED,
- mic->mic_virtblk.backend, 0L);
- if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
- mpsslog("%s: can't map: %s %s\n",
- mic->name, mic->mic_virtblk.backend_file,
- strerror(errno));
- goto _error_close;
- }
- return true;
-
- _error_close:
- close(mic->mic_virtblk.backend);
- _error_free:
- free(mic->mic_virtblk.backend_file);
- _error_exit:
- return false;
-}
-
-static void
-close_backend(struct mic_info *mic)
-{
- munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
- close(mic->mic_virtblk.backend);
- free(mic->mic_virtblk.backend_file);
-}
-
-static bool
-start_virtblk(struct mic_info *mic, struct mic_vring *vring)
-{
- if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
- mpsslog("%s: blk_config is not 8 byte aligned.\n",
- mic->name);
- return false;
- }
- add_virtio_device(mic, &virtblk_dev_page.dd);
- if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
- VIRTIO_ID_BLOCK, vring, NULL,
- virtblk_dev_page.dd.num_vq)) {
- mpsslog("%s init_vr failed %s\n",
- mic->name, strerror(errno));
- return false;
- }
- return true;
-}
-
-static void
-stop_virtblk(struct mic_info *mic)
-{
- int vr_size, ret;
-
- vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
- MIC_VIRTIO_RING_ALIGN) +
- sizeof(struct _mic_vring_info));
- ret = munmap(mic->mic_virtblk.block_dp,
- MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
- if (ret < 0)
- mpsslog("%s munmap errno %d\n", mic->name, errno);
- close(mic->mic_virtblk.virtio_block_fd);
-}
-
-static __u8
-header_error_check(struct vring_desc *desc)
-{
- if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
- mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
- __func__, __LINE__);
- return -EIO;
- }
- if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
- mpsslog("%s() %d: alone\n",
- __func__, __LINE__);
- return -EIO;
- }
- if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
- mpsslog("%s() %d: not read\n",
- __func__, __LINE__);
- return -EIO;
- }
- return 0;
-}
-
-static int
-read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
-{
- struct iovec iovec;
- struct mic_copy_desc copy;
-
- iovec.iov_len = sizeof(*hdr);
- iovec.iov_base = hdr;
- copy.iov = &iovec;
- copy.iovcnt = 1;
- copy.vr_idx = 0; /* only one vring on virtio_block */
- copy.update_used = false; /* do not update used index */
- return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
-}
-
-static int
-transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
-{
- struct mic_copy_desc copy;
-
- copy.iov = iovec;
- copy.iovcnt = iovcnt;
- copy.vr_idx = 0; /* only one vring on virtio_block */
- copy.update_used = false; /* do not update used index */
- return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
-}
-
-static __u8
-status_error_check(struct vring_desc *desc)
-{
- if (le32toh(desc->len) != sizeof(__u8)) {
- mpsslog("%s() %d: length is not sizeof(status)\n",
- __func__, __LINE__);
- return -EIO;
- }
- return 0;
-}
-
-static int
-write_status(int fd, __u8 *status)
-{
- struct iovec iovec;
- struct mic_copy_desc copy;
-
- iovec.iov_base = status;
- iovec.iov_len = sizeof(*status);
- copy.iov = &iovec;
- copy.iovcnt = 1;
- copy.vr_idx = 0; /* only one vring on virtio_block */
- copy.update_used = true; /* Update used index */
- return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
-}
-
-#ifndef VIRTIO_BLK_T_GET_ID
-#define VIRTIO_BLK_T_GET_ID 8
-#endif
-
-static void *
-virtio_block(void *arg)
-{
- struct mic_info *mic = (struct mic_info *)arg;
- int ret;
- struct pollfd block_poll;
- struct mic_vring vring;
- __u16 avail_idx;
- __u32 desc_idx;
- struct vring_desc *desc;
- struct iovec *iovec, *piov;
- __u8 status;
- __u32 buffer_desc_idx;
- struct virtio_blk_outhdr hdr;
- void *fos;
-
- for (;;) { /* forever */
- if (!open_backend(mic)) { /* No virtblk */
- for (mic->mic_virtblk.signaled = 0;
- !mic->mic_virtblk.signaled;)
- sleep(1);
- continue;
- }
-
- /* backend file is specified. */
- if (!start_virtblk(mic, &vring))
- goto _close_backend;
- iovec = malloc(sizeof(*iovec) *
- le32toh(virtblk_dev_page.blk_config.seg_max));
- if (!iovec) {
- mpsslog("%s: can't alloc iovec: %s\n",
- mic->name, strerror(ENOMEM));
- goto _stop_virtblk;
- }
-
- block_poll.fd = mic->mic_virtblk.virtio_block_fd;
- block_poll.events = POLLIN;
- for (mic->mic_virtblk.signaled = 0;
- !mic->mic_virtblk.signaled;) {
- block_poll.revents = 0;
- /* timeout in 1 sec to see signaled */
- ret = poll(&block_poll, 1, 1000);
- if (ret < 0) {
- mpsslog("%s %d: poll failed: %s\n",
- __func__, __LINE__,
- strerror(errno));
- continue;
- }
-
- if (!(block_poll.revents & POLLIN)) {
-#ifdef DEBUG
- mpsslog("%s %d: block_poll.revents=0x%x\n",
- __func__, __LINE__, block_poll.revents);
-#endif
- continue;
- }
-
- /* POLLIN */
- while (vring.info->avail_idx !=
- le16toh(vring.vr.avail->idx)) {
- /* read header element */
- avail_idx =
- vring.info->avail_idx &
- (vring.vr.num - 1);
- desc_idx = le16toh(
- vring.vr.avail->ring[avail_idx]);
- desc = &vring.vr.desc[desc_idx];
-#ifdef DEBUG
- mpsslog("%s() %d: avail_idx=%d ",
- __func__, __LINE__,
- vring.info->avail_idx);
- mpsslog("vring.vr.num=%d desc=%p\n",
- vring.vr.num, desc);
-#endif
- status = header_error_check(desc);
- ret = read_header(
- mic->mic_virtblk.virtio_block_fd,
- &hdr, desc_idx);
- if (ret < 0) {
- mpsslog("%s() %d %s: ret=%d %s\n",
- __func__, __LINE__,
- mic->name, ret,
- strerror(errno));
- break;
- }
- /* buffer element */
- piov = iovec;
- status = 0;
- fos = mic->mic_virtblk.backend_addr +
- (hdr.sector * SECTOR_SIZE);
- buffer_desc_idx = next_desc(desc);
- desc_idx = buffer_desc_idx;
- for (desc = &vring.vr.desc[buffer_desc_idx];
- desc->flags & VRING_DESC_F_NEXT;
- desc_idx = next_desc(desc),
- desc = &vring.vr.desc[desc_idx]) {
- piov->iov_len = desc->len;
- piov->iov_base = fos;
- piov++;
- fos += desc->len;
- }
- /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
- if (hdr.type & ~(VIRTIO_BLK_T_OUT |
- VIRTIO_BLK_T_GET_ID)) {
- /*
- VIRTIO_BLK_T_IN - does not do
- anything. Probably for documenting.
- VIRTIO_BLK_T_SCSI_CMD - for
- virtio_scsi.
- VIRTIO_BLK_T_FLUSH - turned off in
- config space.
- VIRTIO_BLK_T_BARRIER - defined but not
- used in anywhere.
- */
- mpsslog("%s() %d: type %x ",
- __func__, __LINE__,
- hdr.type);
- mpsslog("is not supported\n");
- status = -ENOTSUP;
-
- } else {
- ret = transfer_blocks(
- mic->mic_virtblk.virtio_block_fd,
- iovec,
- piov - iovec);
- if (ret < 0 &&
- status != 0)
- status = ret;
- }
- /* write status and update used pointer */
- if (status != 0)
- status = status_error_check(desc);
- ret = write_status(
- mic->mic_virtblk.virtio_block_fd,
- &status);
-#ifdef DEBUG
- mpsslog("%s() %d: write status=%d on desc=%p\n",
- __func__, __LINE__,
- status, desc);
-#endif
- }
- }
- free(iovec);
-_stop_virtblk:
- stop_virtblk(mic);
-_close_backend:
- close_backend(mic);
- } /* forever */
-
- pthread_exit(NULL);
-}
-
-static void
-reset(struct mic_info *mic)
-{
-#define RESET_TIMEOUT 120
- int i = RESET_TIMEOUT;
- setsysfs(mic->name, "state", "reset");
- while (i) {
- char *state;
- state = readsysfs(mic->name, "state");
- if (!state)
- goto retry;
- mpsslog("%s: %s %d state %s\n",
- mic->name, __func__, __LINE__, state);
-
- if (!strcmp(state, "ready")) {
- free(state);
- break;
- }
- free(state);
-retry:
- sleep(1);
- i--;
- }
-}
-
-static int
-get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
-{
- if (!strcmp(shutdown_status, "nop"))
- return MIC_NOP;
- if (!strcmp(shutdown_status, "crashed"))
- return MIC_CRASHED;
- if (!strcmp(shutdown_status, "halted"))
- return MIC_HALTED;
- if (!strcmp(shutdown_status, "poweroff"))
- return MIC_POWER_OFF;
- if (!strcmp(shutdown_status, "restart"))
- return MIC_RESTART;
- mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
- /* Invalid state */
- assert(0);
-};
-
-static int get_mic_state(struct mic_info *mic)
-{
- char *state = NULL;
- enum mic_states mic_state;
-
- while (!state) {
- state = readsysfs(mic->name, "state");
- sleep(1);
- }
- mpsslog("%s: %s %d state %s\n",
- mic->name, __func__, __LINE__, state);
-
- if (!strcmp(state, "ready")) {
- mic_state = MIC_READY;
- } else if (!strcmp(state, "booting")) {
- mic_state = MIC_BOOTING;
- } else if (!strcmp(state, "online")) {
- mic_state = MIC_ONLINE;
- } else if (!strcmp(state, "shutting_down")) {
- mic_state = MIC_SHUTTING_DOWN;
- } else if (!strcmp(state, "reset_failed")) {
- mic_state = MIC_RESET_FAILED;
- } else if (!strcmp(state, "resetting")) {
- mic_state = MIC_RESETTING;
- } else {
- mpsslog("%s: BUG invalid state %s\n", mic->name, state);
- assert(0);
- }
-
- free(state);
- return mic_state;
-};
-
-static void mic_handle_shutdown(struct mic_info *mic)
-{
-#define SHUTDOWN_TIMEOUT 60
- int i = SHUTDOWN_TIMEOUT;
- char *shutdown_status;
- while (i) {
- shutdown_status = readsysfs(mic->name, "shutdown_status");
- if (!shutdown_status) {
- sleep(1);
- continue;
- }
- mpsslog("%s: %s %d shutdown_status %s\n",
- mic->name, __func__, __LINE__, shutdown_status);
- switch (get_mic_shutdown_status(mic, shutdown_status)) {
- case MIC_RESTART:
- mic->restart = 1;
- case MIC_HALTED:
- case MIC_POWER_OFF:
- case MIC_CRASHED:
- free(shutdown_status);
- goto reset;
- default:
- break;
- }
- free(shutdown_status);
- sleep(1);
- i--;
- }
-reset:
- if (!i)
- mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
- mic->name, __func__, __LINE__, shutdown_status);
- reset(mic);
-}
-
-static int open_state_fd(struct mic_info *mic)
-{
- char pathname[PATH_MAX];
- int fd;
-
- snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
- MICSYSFSDIR, mic->name, "state");
-
- fd = open(pathname, O_RDONLY);
- if (fd < 0)
- mpsslog("%s: opening file %s failed %s\n",
- mic->name, pathname, strerror(errno));
- return fd;
-}
-
-static int block_till_state_change(int fd, struct mic_info *mic)
-{
- struct pollfd ufds[1];
- char value[PAGE_SIZE];
- int ret;
-
- ufds[0].fd = fd;
- ufds[0].events = POLLERR | POLLPRI;
- ret = poll(ufds, 1, -1);
- if (ret < 0) {
- mpsslog("%s: %s %d poll failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- return ret;
- }
-
- ret = lseek(fd, 0, SEEK_SET);
- if (ret < 0) {
- mpsslog("%s: %s %d Failed to seek to 0: %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- return ret;
- }
-
- ret = read(fd, value, sizeof(value));
- if (ret < 0) {
- mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- return ret;
- }
-
- return 0;
-}
-
-static void *
-mic_config(void *arg)
-{
- struct mic_info *mic = (struct mic_info *)arg;
- int fd, ret, stat = 0;
-
- fd = open_state_fd(mic);
- if (fd < 0) {
- mpsslog("%s: %s %d open state fd failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- goto exit;
- }
-
- do {
- ret = block_till_state_change(fd, mic);
- if (ret < 0) {
- mpsslog("%s: %s %d block_till_state_change error %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- goto close_exit;
- }
-
- switch (get_mic_state(mic)) {
- case MIC_SHUTTING_DOWN:
- mic_handle_shutdown(mic);
- break;
- case MIC_READY:
- case MIC_RESET_FAILED:
- ret = kill(mic->pid, SIGTERM);
- mpsslog("%s: %s %d kill pid %d ret %d\n",
- mic->name, __func__, __LINE__,
- mic->pid, ret);
- if (!ret) {
- ret = waitpid(mic->pid, &stat,
- WIFSIGNALED(stat));
- mpsslog("%s: %s %d waitpid ret %d pid %d\n",
- mic->name, __func__, __LINE__,
- ret, mic->pid);
- }
- if (mic->boot_on_resume) {
- setsysfs(mic->name, "state", "boot");
- mic->boot_on_resume = 0;
- }
- goto close_exit;
- default:
- break;
- }
- } while (1);
-
-close_exit:
- close(fd);
-exit:
- init_mic(mic);
- pthread_exit(NULL);
-}
-
-static void
-set_cmdline(struct mic_info *mic)
-{
- char buffer[PATH_MAX];
- int len;
-
- len = snprintf(buffer, PATH_MAX,
- "clocksource=tsc highres=off nohz=off ");
- len += snprintf(buffer + len, PATH_MAX - len,
- "cpufreq_on;corec6_off;pc3_off;pc6_off ");
- len += snprintf(buffer + len, PATH_MAX - len,
- "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
- mic->id + 1);
-
- setsysfs(mic->name, "cmdline", buffer);
- mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
- snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
- mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
-}
-
-static void
-set_log_buf_info(struct mic_info *mic)
-{
- int fd;
- off_t len;
- char system_map[] = "/lib/firmware/mic/System.map";
- char *map, *temp, log_buf[17] = {'\0'};
-
- fd = open(system_map, O_RDONLY);
- if (fd < 0) {
- mpsslog("%s: Opening System.map failed: %d\n",
- mic->name, errno);
- return;
- }
- len = lseek(fd, 0, SEEK_END);
- if (len < 0) {
- mpsslog("%s: Reading System.map size failed: %d\n",
- mic->name, errno);
- close(fd);
- return;
- }
- map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
- if (map == MAP_FAILED) {
- mpsslog("%s: mmap of System.map failed: %d\n",
- mic->name, errno);
- close(fd);
- return;
- }
- temp = strstr(map, "__log_buf");
- if (!temp) {
- mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
- munmap(map, len);
- close(fd);
- return;
- }
- strncpy(log_buf, temp - 19, 16);
- setsysfs(mic->name, "log_buf_addr", log_buf);
- mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
- temp = strstr(map, "log_buf_len");
- if (!temp) {
- mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
- munmap(map, len);
- close(fd);
- return;
- }
- strncpy(log_buf, temp - 19, 16);
- setsysfs(mic->name, "log_buf_len", log_buf);
- mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
- munmap(map, len);
- close(fd);
-}
-
-static void
-change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
-{
- struct mic_info *mic;
-
- for (mic = mic_list.next; mic != NULL; mic = mic->next)
- mic->mic_virtblk.signaled = 1/* true */;
-}
-
-static void
-set_mic_boot_params(struct mic_info *mic)
-{
- set_log_buf_info(mic);
- set_cmdline(mic);
-}
-
-static void *
-init_mic(void *arg)
-{
- struct mic_info *mic = (struct mic_info *)arg;
- struct sigaction ignore = {
- .sa_flags = 0,
- .sa_handler = SIG_IGN
- };
- struct sigaction act = {
- .sa_flags = SA_SIGINFO,
- .sa_sigaction = change_virtblk_backend,
- };
- char buffer[PATH_MAX];
- int err, fd;
-
- /*
- * Currently, one virtio block device is supported for each MIC card
- * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
- * The signal informs the virtio block backend about a change in the
- * configuration file which specifies the virtio backend file name on
- * the host. Virtio block backend then re-reads the configuration file
- * and switches to the new block device. This signalling mechanism may
- * not be required once multiple virtio block devices are supported by
- * the MIC daemon.
- */
- sigaction(SIGUSR1, &ignore, NULL);
-retry:
- fd = open_state_fd(mic);
- if (fd < 0) {
- mpsslog("%s: %s %d open state fd failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- sleep(2);
- goto retry;
- }
-
- if (mic->restart) {
- snprintf(buffer, PATH_MAX, "boot");
- setsysfs(mic->name, "state", buffer);
- mpsslog("%s restarting mic %d\n",
- mic->name, mic->restart);
- mic->restart = 0;
- }
-
- while (1) {
- while (block_till_state_change(fd, mic)) {
- mpsslog("%s: %s %d block_till_state_change error %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- sleep(2);
- continue;
- }
-
- if (get_mic_state(mic) == MIC_BOOTING)
- break;
- }
-
- mic->pid = fork();
- switch (mic->pid) {
- case 0:
- add_virtio_device(mic, &virtcons_dev_page.dd);
- add_virtio_device(mic, &virtnet_dev_page.dd);
- err = pthread_create(&mic->mic_console.console_thread, NULL,
- virtio_console, mic);
- if (err)
- mpsslog("%s virtcons pthread_create failed %s\n",
- mic->name, strerror(err));
- err = pthread_create(&mic->mic_net.net_thread, NULL,
- virtio_net, mic);
- if (err)
- mpsslog("%s virtnet pthread_create failed %s\n",
- mic->name, strerror(err));
- err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
- virtio_block, mic);
- if (err)
- mpsslog("%s virtblk pthread_create failed %s\n",
- mic->name, strerror(err));
- sigemptyset(&act.sa_mask);
- err = sigaction(SIGUSR1, &act, NULL);
- if (err)
- mpsslog("%s sigaction SIGUSR1 failed %s\n",
- mic->name, strerror(errno));
- while (1)
- sleep(60);
- case -1:
- mpsslog("fork failed MIC name %s id %d errno %d\n",
- mic->name, mic->id, errno);
- break;
- default:
- err = pthread_create(&mic->config_thread, NULL,
- mic_config, mic);
- if (err)
- mpsslog("%s mic_config pthread_create failed %s\n",
- mic->name, strerror(err));
- }
-
- return NULL;
-}
-
-static void
-start_daemon(void)
-{
- struct mic_info *mic;
- int err;
-
- for (mic = mic_list.next; mic; mic = mic->next) {
- set_mic_boot_params(mic);
- err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
- if (err)
- mpsslog("%s init_mic pthread_create failed %s\n",
- mic->name, strerror(err));
- }
-
- while (1)
- sleep(60);
-}
-
-static int
-init_mic_list(void)
-{
- struct mic_info *mic = &mic_list;
- struct dirent *file;
- DIR *dp;
- int cnt = 0;
-
- dp = opendir(MICSYSFSDIR);
- if (!dp)
- return 0;
-
- while ((file = readdir(dp)) != NULL) {
- if (!strncmp(file->d_name, "mic", 3)) {
- mic->next = calloc(1, sizeof(struct mic_info));
- if (mic->next) {
- mic = mic->next;
- mic->id = atoi(&file->d_name[3]);
- mic->name = malloc(strlen(file->d_name) + 16);
- if (mic->name)
- strcpy(mic->name, file->d_name);
- mpsslog("MIC name %s id %d\n", mic->name,
- mic->id);
- cnt++;
- }
- }
- }
-
- closedir(dp);
- return cnt;
-}
-
-void
-mpsslog(char *format, ...)
-{
- va_list args;
- char buffer[4096];
- char ts[52], *ts1;
- time_t t;
-
- if (logfp == NULL)
- return;
-
- va_start(args, format);
- vsprintf(buffer, format, args);
- va_end(args);
-
- time(&t);
- ts1 = ctime_r(&t, ts);
- ts1[strlen(ts1) - 1] = '\0';
- fprintf(logfp, "%s: %s", ts1, buffer);
-
- fflush(logfp);
-}
-
-int
-main(int argc, char *argv[])
-{
- int cnt;
- pid_t pid;
-
- myname = argv[0];
-
- logfp = fopen(LOGFILE_NAME, "a+");
- if (!logfp) {
- fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
- exit(1);
- }
- pid = fork();
- switch (pid) {
- case 0:
- break;
- case -1:
- exit(2);
- default:
- exit(0);
- }
-
- mpsslog("MIC Daemon start\n");
-
- cnt = init_mic_list();
- if (cnt == 0) {
- mpsslog("MIC module not loaded\n");
- exit(3);
- }
- mpsslog("MIC found %d devices\n", cnt);
-
- start_daemon();
-
- exit(0);
-}
diff --git a/samples/mic/mpssd/mpssd.h b/samples/mic/mpssd/mpssd.h
deleted file mode 100644
index 5f98bda..0000000
--- a/samples/mic/mpssd/mpssd.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-#ifndef _MPSSD_H_
-#define _MPSSD_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <libgen.h>
-#include <pthread.h>
-#include <stdarg.h>
-#include <time.h>
-#include <errno.h>
-#include <sys/dir.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/utsname.h>
-#include <sys/wait.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <signal.h>
-#include <limits.h>
-#include <syslog.h>
-#include <getopt.h>
-#include <net/if.h>
-#include <linux/if_tun.h>
-#include <linux/virtio_ids.h>
-
-#define MICSYSFSDIR "/sys/class/mic"
-#define LOGFILE_NAME "/var/log/mpssd"
-#define PAGE_SIZE 4096
-
-struct mic_console_info {
- pthread_t console_thread;
- int virtio_console_fd;
- void *console_dp;
-};
-
-struct mic_net_info {
- pthread_t net_thread;
- int virtio_net_fd;
- int tap_fd;
- void *net_dp;
-};
-
-struct mic_virtblk_info {
- pthread_t block_thread;
- int virtio_block_fd;
- void *block_dp;
- volatile sig_atomic_t signaled;
- char *backend_file;
- int backend;
- void *backend_addr;
- long backend_size;
-};
-
-struct mic_info {
- int id;
- char *name;
- pthread_t config_thread;
- pthread_t init_thread;
- pid_t pid;
- struct mic_console_info mic_console;
- struct mic_net_info mic_net;
- struct mic_virtblk_info mic_virtblk;
- int restart;
- int boot_on_resume;
- struct mic_info *next;
-};
-
-__attribute__((format(printf, 1, 2)))
-void mpsslog(char *format, ...);
-char *readsysfs(char *dir, char *entry);
-int setsysfs(char *dir, char *entry, char *value);
-#endif
diff --git a/samples/mic/mpssd/sysfs.c b/samples/mic/mpssd/sysfs.c
deleted file mode 100644
index 3fb08eb..0000000
--- a/samples/mic/mpssd/sysfs.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-
-#include "mpssd.h"
-
-#define PAGE_SIZE 4096
-
-char *
-readsysfs(char *dir, char *entry)
-{
- char filename[PATH_MAX];
- char value[PAGE_SIZE];
- char *string = NULL;
- int fd;
- int len;
-
- if (dir == NULL)
- snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
- else
- snprintf(filename, PATH_MAX,
- "%s/%s/%s", MICSYSFSDIR, dir, entry);
-
- fd = open(filename, O_RDONLY);
- if (fd < 0) {
- mpsslog("Failed to open sysfs entry '%s': %s\n",
- filename, strerror(errno));
- return NULL;
- }
-
- len = read(fd, value, sizeof(value));
- if (len < 0) {
- mpsslog("Failed to read sysfs entry '%s': %s\n",
- filename, strerror(errno));
- goto readsys_ret;
- }
- if (len == 0)
- goto readsys_ret;
-
- value[len - 1] = '\0';
-
- string = malloc(strlen(value) + 1);
- if (string)
- strcpy(string, value);
-
-readsys_ret:
- close(fd);
- return string;
-}
-
-int
-setsysfs(char *dir, char *entry, char *value)
-{
- char filename[PATH_MAX];
- char *oldvalue;
- int fd, ret = 0;
-
- if (dir == NULL)
- snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
- else
- snprintf(filename, PATH_MAX, "%s/%s/%s",
- MICSYSFSDIR, dir, entry);
-
- oldvalue = readsysfs(dir, entry);
-
- fd = open(filename, O_RDWR);
- if (fd < 0) {
- ret = errno;
- mpsslog("Failed to open sysfs entry '%s': %s\n",
- filename, strerror(errno));
- goto done;
- }
-
- if (!oldvalue || strcmp(value, oldvalue)) {
- if (write(fd, value, strlen(value)) < 0) {
- ret = errno;
- mpsslog("Failed to write new sysfs entry '%s': %s\n",
- filename, strerror(errno));
- }
- }
- close(fd);
-done:
- if (oldvalue)
- free(oldvalue);
- return ret;
-}
diff --git a/samples/nitro_enclaves/.gitignore b/samples/nitro_enclaves/.gitignore
new file mode 100644
index 0000000..8279341
--- /dev/null
+++ b/samples/nitro_enclaves/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+ne_ioctl_sample
diff --git a/samples/nitro_enclaves/Makefile b/samples/nitro_enclaves/Makefile
new file mode 100644
index 0000000..a3ec78f
--- /dev/null
+++ b/samples/nitro_enclaves/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+# Enclave lifetime management support for Nitro Enclaves (NE) - ioctl sample
+# usage.
+
+.PHONY: all clean
+
+CFLAGS += -Wall
+
+all:
+ $(CC) $(CFLAGS) -o ne_ioctl_sample ne_ioctl_sample.c -lpthread
+
+clean:
+ rm -f ne_ioctl_sample
diff --git a/samples/nitro_enclaves/ne_ioctl_sample.c b/samples/nitro_enclaves/ne_ioctl_sample.c
new file mode 100644
index 0000000..480b763
--- /dev/null
+++ b/samples/nitro_enclaves/ne_ioctl_sample.c
@@ -0,0 +1,883 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ */
+
+/**
+ * DOC: Sample flow of using the ioctl interface provided by the Nitro Enclaves (NE)
+ * kernel driver.
+ *
+ * Usage
+ * -----
+ *
+ * Load the nitro_enclaves module, setting also the enclave CPU pool. The
+ * enclave CPUs need to be full cores from the same NUMA node. CPU 0 and its
+ * siblings have to remain available for the primary / parent VM, so they
+ * cannot be included in the enclave CPU pool.
+ *
+ * See the cpu list section from the kernel documentation.
+ * https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html#cpu-lists
+ *
+ * insmod drivers/virt/nitro_enclaves/nitro_enclaves.ko
+ * lsmod
+ *
+ * The CPU pool can be set at runtime, after the kernel module is loaded.
+ *
+ * echo <cpu-list> > /sys/module/nitro_enclaves/parameters/ne_cpus
+ *
+ * NUMA and CPU siblings information can be found using:
+ *
+ * lscpu
+ * /proc/cpuinfo
+ *
+ * Check the online / offline CPU list. The CPUs from the pool should be
+ * offlined.
+ *
+ * lscpu
+ *
+ * Check dmesg for any warnings / errors through the NE driver lifetime / usage.
+ * The NE logs contain the "nitro_enclaves" or "pci 0000:00:02.0" pattern.
+ *
+ * dmesg
+ *
+ * Setup hugetlbfs huge pages. The memory needs to be from the same NUMA node as
+ * the enclave CPUs.
+ *
+ * https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html
+ *
+ * By default, the allocation of hugetlb pages are distributed on all possible
+ * NUMA nodes. Use the following configuration files to set the number of huge
+ * pages from a NUMA node:
+ *
+ * /sys/devices/system/node/node<X>/hugepages/hugepages-2048kB/nr_hugepages
+ * /sys/devices/system/node/node<X>/hugepages/hugepages-1048576kB/nr_hugepages
+ *
+ * or, if not on a system with multiple NUMA nodes, can also set the number
+ * of 2 MiB / 1 GiB huge pages using
+ *
+ * /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+ * /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
+ *
+ * In this example 256 hugepages of 2 MiB are used.
+ *
+ * Build and run the NE sample.
+ *
+ * make -C samples/nitro_enclaves clean
+ * make -C samples/nitro_enclaves
+ * ./samples/nitro_enclaves/ne_ioctl_sample <path_to_enclave_image>
+ *
+ * Unload the nitro_enclaves module.
+ *
+ * rmmod nitro_enclaves
+ * lsmod
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <poll.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/mman.h>
+#include <linux/nitro_enclaves.h>
+#include <linux/vm_sockets.h>
+
+/**
+ * NE_DEV_NAME - Nitro Enclaves (NE) misc device that provides the ioctl interface.
+ */
+#define NE_DEV_NAME "/dev/nitro_enclaves"
+
+/**
+ * NE_POLL_WAIT_TIME - Timeout in seconds for each poll event.
+ */
+#define NE_POLL_WAIT_TIME (60)
+/**
+ * NE_POLL_WAIT_TIME_MS - Timeout in milliseconds for each poll event.
+ */
+#define NE_POLL_WAIT_TIME_MS (NE_POLL_WAIT_TIME * 1000)
+
+/**
+ * NE_SLEEP_TIME - Amount of time in seconds for the process to keep the enclave alive.
+ */
+#define NE_SLEEP_TIME (300)
+
+/**
+ * NE_DEFAULT_NR_VCPUS - Default number of vCPUs set for an enclave.
+ */
+#define NE_DEFAULT_NR_VCPUS (2)
+
+/**
+ * NE_MIN_MEM_REGION_SIZE - Minimum size of a memory region - 2 MiB.
+ */
+#define NE_MIN_MEM_REGION_SIZE (2 * 1024 * 1024)
+
+/**
+ * NE_DEFAULT_NR_MEM_REGIONS - Default number of memory regions of 2 MiB set for
+ * an enclave.
+ */
+#define NE_DEFAULT_NR_MEM_REGIONS (256)
+
+/**
+ * NE_IMAGE_LOAD_HEARTBEAT_CID - Vsock CID for enclave image loading heartbeat logic.
+ */
+#define NE_IMAGE_LOAD_HEARTBEAT_CID (3)
+/**
+ * NE_IMAGE_LOAD_HEARTBEAT_PORT - Vsock port for enclave image loading heartbeat logic.
+ */
+#define NE_IMAGE_LOAD_HEARTBEAT_PORT (9000)
+/**
+ * NE_IMAGE_LOAD_HEARTBEAT_VALUE - Heartbeat value for enclave image loading.
+ */
+#define NE_IMAGE_LOAD_HEARTBEAT_VALUE (0xb7)
+
+/**
+ * struct ne_user_mem_region - User space memory region set for an enclave.
+ * @userspace_addr: Address of the user space memory region.
+ * @memory_size: Size of the user space memory region.
+ */
+struct ne_user_mem_region {
+ void *userspace_addr;
+ size_t memory_size;
+};
+
+/**
+ * ne_create_vm() - Create a slot for the enclave VM.
+ * @ne_dev_fd: The file descriptor of the NE misc device.
+ * @slot_uid: The generated slot uid for the enclave.
+ * @enclave_fd : The generated file descriptor for the enclave.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_create_vm(int ne_dev_fd, unsigned long *slot_uid, int *enclave_fd)
+{
+ int rc = -EINVAL;
+ *enclave_fd = ioctl(ne_dev_fd, NE_CREATE_VM, slot_uid);
+
+ if (*enclave_fd < 0) {
+ rc = *enclave_fd;
+ switch (errno) {
+ case NE_ERR_NO_CPUS_AVAIL_IN_POOL: {
+ printf("Error in create VM, no CPUs available in the NE CPU pool\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in create VM [%m]\n");
+ }
+
+ return rc;
+ }
+
+ return 0;
+}
+
+
+/**
+ * ne_poll_enclave_fd() - Thread function for polling the enclave fd.
+ * @data: Argument provided for the polling function.
+ *
+ * Context: Process context.
+ * Return:
+ * * NULL on success / failure.
+ */
+void *ne_poll_enclave_fd(void *data)
+{
+ int enclave_fd = *(int *)data;
+ struct pollfd fds[1] = {};
+ int i = 0;
+ int rc = -EINVAL;
+
+ printf("Running from poll thread, enclave fd %d\n", enclave_fd);
+
+ fds[0].fd = enclave_fd;
+ fds[0].events = POLLIN | POLLERR | POLLHUP;
+
+ /* Keep on polling until the current process is terminated. */
+ while (1) {
+ printf("[iter %d] Polling ...\n", i);
+
+ rc = poll(fds, 1, NE_POLL_WAIT_TIME_MS);
+ if (rc < 0) {
+ printf("Error in poll [%m]\n");
+
+ return NULL;
+ }
+
+ i++;
+
+ if (!rc) {
+ printf("Poll: %d seconds elapsed\n",
+ i * NE_POLL_WAIT_TIME);
+
+ continue;
+ }
+
+ printf("Poll received value 0x%x\n", fds[0].revents);
+
+ if (fds[0].revents & POLLHUP) {
+ printf("Received POLLHUP\n");
+
+ return NULL;
+ }
+
+ if (fds[0].revents & POLLNVAL) {
+ printf("Received POLLNVAL\n");
+
+ return NULL;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * ne_alloc_user_mem_region() - Allocate a user space memory region for an enclave.
+ * @ne_user_mem_region: User space memory region allocated using hugetlbfs.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_alloc_user_mem_region(struct ne_user_mem_region *ne_user_mem_region)
+{
+ /**
+ * Check available hugetlb encodings for different huge page sizes in
+ * include/uapi/linux/mman.h.
+ */
+ ne_user_mem_region->userspace_addr = mmap(NULL, ne_user_mem_region->memory_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS |
+ MAP_HUGETLB | MAP_HUGE_2MB, -1, 0);
+ if (ne_user_mem_region->userspace_addr == MAP_FAILED) {
+ printf("Error in mmap memory [%m]\n");
+
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_load_enclave_image() - Place the enclave image in the enclave memory.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ * @ne_user_mem_regions: User space memory regions allocated for the enclave.
+ * @enclave_image_path : The file path of the enclave image.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_load_enclave_image(int enclave_fd, struct ne_user_mem_region ne_user_mem_regions[],
+ char *enclave_image_path)
+{
+ unsigned char *enclave_image = NULL;
+ int enclave_image_fd = -1;
+ size_t enclave_image_size = 0;
+ size_t enclave_memory_size = 0;
+ unsigned long i = 0;
+ size_t image_written_bytes = 0;
+ struct ne_image_load_info image_load_info = {
+ .flags = NE_EIF_IMAGE,
+ };
+ struct stat image_stat_buf = {};
+ int rc = -EINVAL;
+ size_t temp_image_offset = 0;
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++)
+ enclave_memory_size += ne_user_mem_regions[i].memory_size;
+
+ rc = stat(enclave_image_path, &image_stat_buf);
+ if (rc < 0) {
+ printf("Error in get image stat info [%m]\n");
+
+ return rc;
+ }
+
+ enclave_image_size = image_stat_buf.st_size;
+
+ if (enclave_memory_size < enclave_image_size) {
+ printf("The enclave memory is smaller than the enclave image size\n");
+
+ return -ENOMEM;
+ }
+
+ rc = ioctl(enclave_fd, NE_GET_IMAGE_LOAD_INFO, &image_load_info);
+ if (rc < 0) {
+ switch (errno) {
+ case NE_ERR_NOT_IN_INIT_STATE: {
+ printf("Error in get image load info, enclave not in init state\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_FLAG_VALUE: {
+ printf("Error in get image load info, provided invalid flag\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in get image load info [%m]\n");
+ }
+
+ return rc;
+ }
+
+ printf("Enclave image offset in enclave memory is %lld\n",
+ image_load_info.memory_offset);
+
+ enclave_image_fd = open(enclave_image_path, O_RDONLY);
+ if (enclave_image_fd < 0) {
+ printf("Error in open enclave image file [%m]\n");
+
+ return enclave_image_fd;
+ }
+
+ enclave_image = mmap(NULL, enclave_image_size, PROT_READ,
+ MAP_PRIVATE, enclave_image_fd, 0);
+ if (enclave_image == MAP_FAILED) {
+ printf("Error in mmap enclave image [%m]\n");
+
+ return -1;
+ }
+
+ temp_image_offset = image_load_info.memory_offset;
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
+ size_t bytes_to_write = 0;
+ size_t memory_offset = 0;
+ size_t memory_size = ne_user_mem_regions[i].memory_size;
+ size_t remaining_bytes = 0;
+ void *userspace_addr = ne_user_mem_regions[i].userspace_addr;
+
+ if (temp_image_offset >= memory_size) {
+ temp_image_offset -= memory_size;
+
+ continue;
+ } else if (temp_image_offset != 0) {
+ memory_offset = temp_image_offset;
+ memory_size -= temp_image_offset;
+ temp_image_offset = 0;
+ }
+
+ remaining_bytes = enclave_image_size - image_written_bytes;
+ bytes_to_write = memory_size < remaining_bytes ?
+ memory_size : remaining_bytes;
+
+ memcpy(userspace_addr + memory_offset,
+ enclave_image + image_written_bytes, bytes_to_write);
+
+ image_written_bytes += bytes_to_write;
+
+ if (image_written_bytes == enclave_image_size)
+ break;
+ }
+
+ munmap(enclave_image, enclave_image_size);
+
+ close(enclave_image_fd);
+
+ return 0;
+}
+
+/**
+ * ne_set_user_mem_region() - Set a user space memory region for the given enclave.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ * @ne_user_mem_region : User space memory region to be set for the enclave.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_set_user_mem_region(int enclave_fd, struct ne_user_mem_region ne_user_mem_region)
+{
+ struct ne_user_memory_region mem_region = {
+ .flags = NE_DEFAULT_MEMORY_REGION,
+ .memory_size = ne_user_mem_region.memory_size,
+ .userspace_addr = (__u64)ne_user_mem_region.userspace_addr,
+ };
+ int rc = -EINVAL;
+
+ rc = ioctl(enclave_fd, NE_SET_USER_MEMORY_REGION, &mem_region);
+ if (rc < 0) {
+ switch (errno) {
+ case NE_ERR_NOT_IN_INIT_STATE: {
+ printf("Error in set user memory region, enclave not in init state\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_MEM_REGION_SIZE: {
+ printf("Error in set user memory region, mem size not multiple of 2 MiB\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_MEM_REGION_ADDR: {
+ printf("Error in set user memory region, invalid user space address\n");
+
+ break;
+ }
+
+ case NE_ERR_UNALIGNED_MEM_REGION_ADDR: {
+ printf("Error in set user memory region, unaligned user space address\n");
+
+ break;
+ }
+
+ case NE_ERR_MEM_REGION_ALREADY_USED: {
+ printf("Error in set user memory region, memory region already used\n");
+
+ break;
+ }
+
+ case NE_ERR_MEM_NOT_HUGE_PAGE: {
+ printf("Error in set user memory region, not backed by huge pages\n");
+
+ break;
+ }
+
+ case NE_ERR_MEM_DIFFERENT_NUMA_NODE: {
+ printf("Error in set user memory region, different NUMA node than CPUs\n");
+
+ break;
+ }
+
+ case NE_ERR_MEM_MAX_REGIONS: {
+ printf("Error in set user memory region, max memory regions reached\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_PAGE_SIZE: {
+ printf("Error in set user memory region, has page not multiple of 2 MiB\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_FLAG_VALUE: {
+ printf("Error in set user memory region, provided invalid flag\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in set user memory region [%m]\n");
+ }
+
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_free_mem_regions() - Unmap all the user space memory regions that were set
+ * aside for the enclave.
+ * @ne_user_mem_regions: The user space memory regions associated with an enclave.
+ *
+ * Context: Process context.
+ */
+static void ne_free_mem_regions(struct ne_user_mem_region ne_user_mem_regions[])
+{
+ unsigned int i = 0;
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++)
+ munmap(ne_user_mem_regions[i].userspace_addr,
+ ne_user_mem_regions[i].memory_size);
+}
+
+/**
+ * ne_add_vcpu() - Add a vCPU to the given enclave.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ * @vcpu_id: vCPU id to be set for the enclave, either provided or
+ * auto-generated (if provided vCPU id is 0).
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_add_vcpu(int enclave_fd, unsigned int *vcpu_id)
+{
+ int rc = -EINVAL;
+
+ rc = ioctl(enclave_fd, NE_ADD_VCPU, vcpu_id);
+ if (rc < 0) {
+ switch (errno) {
+ case NE_ERR_NO_CPUS_AVAIL_IN_POOL: {
+ printf("Error in add vcpu, no CPUs available in the NE CPU pool\n");
+
+ break;
+ }
+
+ case NE_ERR_VCPU_ALREADY_USED: {
+ printf("Error in add vcpu, the provided vCPU is already used\n");
+
+ break;
+ }
+
+ case NE_ERR_VCPU_NOT_IN_CPU_POOL: {
+ printf("Error in add vcpu, the provided vCPU is not in the NE CPU pool\n");
+
+ break;
+ }
+
+ case NE_ERR_VCPU_INVALID_CPU_CORE: {
+ printf("Error in add vcpu, the core id of the provided vCPU is invalid\n");
+
+ break;
+ }
+
+ case NE_ERR_NOT_IN_INIT_STATE: {
+ printf("Error in add vcpu, enclave not in init state\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_VCPU: {
+ printf("Error in add vcpu, the provided vCPU is out of avail CPUs range\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in add vcpu [%m]\n");
+
+ }
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_start_enclave() - Start the given enclave.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ * @enclave_start_info : Enclave metadata used for starting e.g. vsock CID.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_start_enclave(int enclave_fd, struct ne_enclave_start_info *enclave_start_info)
+{
+ int rc = -EINVAL;
+
+ rc = ioctl(enclave_fd, NE_START_ENCLAVE, enclave_start_info);
+ if (rc < 0) {
+ switch (errno) {
+ case NE_ERR_NOT_IN_INIT_STATE: {
+ printf("Error in start enclave, enclave not in init state\n");
+
+ break;
+ }
+
+ case NE_ERR_NO_MEM_REGIONS_ADDED: {
+ printf("Error in start enclave, no memory regions have been added\n");
+
+ break;
+ }
+
+ case NE_ERR_NO_VCPUS_ADDED: {
+ printf("Error in start enclave, no vCPUs have been added\n");
+
+ break;
+ }
+
+ case NE_ERR_FULL_CORES_NOT_USED: {
+ printf("Error in start enclave, enclave has no full cores set\n");
+
+ break;
+ }
+
+ case NE_ERR_ENCLAVE_MEM_MIN_SIZE: {
+ printf("Error in start enclave, enclave memory is less than min size\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_FLAG_VALUE: {
+ printf("Error in start enclave, provided invalid flag\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_ENCLAVE_CID: {
+ printf("Error in start enclave, provided invalid enclave CID\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in start enclave [%m]\n");
+ }
+
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_start_enclave_check_booted() - Start the enclave and wait for a hearbeat
+ * from it, on a newly created vsock channel,
+ * to check it has booted.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_start_enclave_check_booted(int enclave_fd)
+{
+ struct sockaddr_vm client_vsock_addr = {};
+ int client_vsock_fd = -1;
+ socklen_t client_vsock_len = sizeof(client_vsock_addr);
+ struct ne_enclave_start_info enclave_start_info = {};
+ struct pollfd fds[1] = {};
+ int rc = -EINVAL;
+ unsigned char recv_buf = 0;
+ struct sockaddr_vm server_vsock_addr = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = NE_IMAGE_LOAD_HEARTBEAT_CID,
+ .svm_port = NE_IMAGE_LOAD_HEARTBEAT_PORT,
+ };
+ int server_vsock_fd = -1;
+
+ server_vsock_fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (server_vsock_fd < 0) {
+ rc = server_vsock_fd;
+
+ printf("Error in socket [%m]\n");
+
+ return rc;
+ }
+
+ rc = bind(server_vsock_fd, (struct sockaddr *)&server_vsock_addr,
+ sizeof(server_vsock_addr));
+ if (rc < 0) {
+ printf("Error in bind [%m]\n");
+
+ goto out;
+ }
+
+ rc = listen(server_vsock_fd, 1);
+ if (rc < 0) {
+ printf("Error in listen [%m]\n");
+
+ goto out;
+ }
+
+ rc = ne_start_enclave(enclave_fd, &enclave_start_info);
+ if (rc < 0)
+ goto out;
+
+ printf("Enclave started, CID %llu\n", enclave_start_info.enclave_cid);
+
+ fds[0].fd = server_vsock_fd;
+ fds[0].events = POLLIN;
+
+ rc = poll(fds, 1, NE_POLL_WAIT_TIME_MS);
+ if (rc < 0) {
+ printf("Error in poll [%m]\n");
+
+ goto out;
+ }
+
+ if (!rc) {
+ printf("Poll timeout, %d seconds elapsed\n", NE_POLL_WAIT_TIME);
+
+ rc = -ETIMEDOUT;
+
+ goto out;
+ }
+
+ if ((fds[0].revents & POLLIN) == 0) {
+ printf("Poll received value %d\n", fds[0].revents);
+
+ rc = -EINVAL;
+
+ goto out;
+ }
+
+ rc = accept(server_vsock_fd, (struct sockaddr *)&client_vsock_addr,
+ &client_vsock_len);
+ if (rc < 0) {
+ printf("Error in accept [%m]\n");
+
+ goto out;
+ }
+
+ client_vsock_fd = rc;
+
+ /*
+ * Read the heartbeat value that the init process in the enclave sends
+ * after vsock connect.
+ */
+ rc = read(client_vsock_fd, &recv_buf, sizeof(recv_buf));
+ if (rc < 0) {
+ printf("Error in read [%m]\n");
+
+ goto out;
+ }
+
+ if (rc != sizeof(recv_buf) || recv_buf != NE_IMAGE_LOAD_HEARTBEAT_VALUE) {
+ printf("Read %d instead of %d\n", recv_buf,
+ NE_IMAGE_LOAD_HEARTBEAT_VALUE);
+
+ goto out;
+ }
+
+ /* Write the heartbeat value back. */
+ rc = write(client_vsock_fd, &recv_buf, sizeof(recv_buf));
+ if (rc < 0) {
+ printf("Error in write [%m]\n");
+
+ goto out;
+ }
+
+ rc = 0;
+
+out:
+ close(server_vsock_fd);
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ int enclave_fd = -1;
+ unsigned int i = 0;
+ int ne_dev_fd = -1;
+ struct ne_user_mem_region ne_user_mem_regions[NE_DEFAULT_NR_MEM_REGIONS] = {};
+ unsigned int ne_vcpus[NE_DEFAULT_NR_VCPUS] = {};
+ int rc = -EINVAL;
+ pthread_t thread_id = 0;
+ unsigned long slot_uid = 0;
+
+ if (argc != 2) {
+ printf("Usage: %s <path_to_enclave_image>\n", argv[0]);
+
+ exit(EXIT_FAILURE);
+ }
+
+ if (strlen(argv[1]) >= PATH_MAX) {
+ printf("The size of the path to enclave image is higher than max path\n");
+
+ exit(EXIT_FAILURE);
+ }
+
+ ne_dev_fd = open(NE_DEV_NAME, O_RDWR | O_CLOEXEC);
+ if (ne_dev_fd < 0) {
+ printf("Error in open NE device [%m]\n");
+
+ exit(EXIT_FAILURE);
+ }
+
+ printf("Creating enclave slot ...\n");
+
+ rc = ne_create_vm(ne_dev_fd, &slot_uid, &enclave_fd);
+
+ close(ne_dev_fd);
+
+ if (rc < 0)
+ exit(EXIT_FAILURE);
+
+ printf("Enclave fd %d\n", enclave_fd);
+
+ rc = pthread_create(&thread_id, NULL, ne_poll_enclave_fd, (void *)&enclave_fd);
+ if (rc < 0) {
+ printf("Error in thread create [%m]\n");
+
+ close(enclave_fd);
+
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
+ ne_user_mem_regions[i].memory_size = NE_MIN_MEM_REGION_SIZE;
+
+ rc = ne_alloc_user_mem_region(&ne_user_mem_regions[i]);
+ if (rc < 0) {
+ printf("Error in alloc userspace memory region, iter %d\n", i);
+
+ goto release_enclave_fd;
+ }
+ }
+
+ rc = ne_load_enclave_image(enclave_fd, ne_user_mem_regions, argv[1]);
+ if (rc < 0)
+ goto release_enclave_fd;
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
+ rc = ne_set_user_mem_region(enclave_fd, ne_user_mem_regions[i]);
+ if (rc < 0) {
+ printf("Error in set memory region, iter %d\n", i);
+
+ goto release_enclave_fd;
+ }
+ }
+
+ printf("Enclave memory regions were added\n");
+
+ for (i = 0; i < NE_DEFAULT_NR_VCPUS; i++) {
+ /*
+ * The vCPU is chosen from the enclave vCPU pool, if the value
+ * of the vcpu_id is 0.
+ */
+ ne_vcpus[i] = 0;
+ rc = ne_add_vcpu(enclave_fd, &ne_vcpus[i]);
+ if (rc < 0) {
+ printf("Error in add vcpu, iter %d\n", i);
+
+ goto release_enclave_fd;
+ }
+
+ printf("Added vCPU %d to the enclave\n", ne_vcpus[i]);
+ }
+
+ printf("Enclave vCPUs were added\n");
+
+ rc = ne_start_enclave_check_booted(enclave_fd);
+ if (rc < 0) {
+ printf("Error in the enclave start / image loading heartbeat logic [rc=%d]\n", rc);
+
+ goto release_enclave_fd;
+ }
+
+ printf("Entering sleep for %d seconds ...\n", NE_SLEEP_TIME);
+
+ sleep(NE_SLEEP_TIME);
+
+ close(enclave_fd);
+
+ ne_free_mem_regions(ne_user_mem_regions);
+
+ exit(EXIT_SUCCESS);
+
+release_enclave_fd:
+ close(enclave_fd);
+ ne_free_mem_regions(ne_user_mem_regions);
+
+ exit(EXIT_FAILURE);
+}
diff --git a/samples/pidfd/.gitignore b/samples/pidfd/.gitignore
index be52b3b..eea857f 100644
--- a/samples/pidfd/.gitignore
+++ b/samples/pidfd/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
pidfd-metadata
diff --git a/samples/pidfd/Makefile b/samples/pidfd/Makefile
index 0ff9778..9754e2d 100644
--- a/samples/pidfd/Makefile
+++ b/samples/pidfd/Makefile
@@ -1,6 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
+usertprogs-always-y += pidfd-metadata
-hostprogs-y := pidfd-metadata
-always := $(hostprogs-y)
-HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include
-all: pidfd-metadata
+userccflags += -I usr/include
diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index fd39215..f9c53ca 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -3,7 +3,7 @@
This directory contains some pktgen sample and benchmark scripts, that
can easily be copied and adjusted for your own use-case.
-General doc is located in kernel: Documentation/networking/pktgen.txt
+General doc is located in kernel: Documentation/networking/pktgen.rst
Helper include files
====================
@@ -18,7 +18,7 @@
Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX
-i : ($DEV) output interface/device (required)
-s : ($PKT_SIZE) packet size
- -d : ($DEST_IP) destination IP
+ -d : ($DEST_IP) destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed
-m : ($DST_MAC) destination MAC-addr
-p : ($DST_PORT) destination PORT range (e.g. 433-444) is also allowed
-t : ($THREADS) threads to start
diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh
index 40873a5..dae06d5 100644
--- a/samples/pktgen/functions.sh
+++ b/samples/pktgen/functions.sh
@@ -168,6 +168,137 @@
echo $node_cpu_list
}
+# Check $1 is in between $2, $3 ($2 <= $1 <= $3)
+function in_between() { [[ ($1 -ge $2) && ($1 -le $3) ]] ; }
+
+# Extend shrunken IPv6 address.
+# fe80::42:bcff:fe84:e10a => fe80:0:0:0:42:bcff:fe84:e10a
+function extend_addr6()
+{
+ local addr=$1
+ local sep=: sep2=::
+ local sep_cnt=$(tr -cd $sep <<< $1 | wc -c)
+ local shrink
+
+ # separator count should be (2 <= $sep_cnt <= 7)
+ if ! (in_between $sep_cnt 2 7); then
+ err 5 "Invalid IP6 address: $1"
+ fi
+
+ # if shrink '::' occurs multiple, it's malformed.
+ shrink=( $(egrep -o "$sep{2,}" <<< $addr) )
+ if [[ ${#shrink[@]} -ne 0 ]]; then
+ if [[ ${#shrink[@]} -gt 1 || ( ${shrink[0]} != $sep2 ) ]]; then
+ err 5 "Invalid IP6 address: $1"
+ fi
+ fi
+
+ # add 0 at begin & end, and extend addr by adding :0
+ [[ ${addr:0:1} == $sep ]] && addr=0${addr}
+ [[ ${addr: -1} == $sep ]] && addr=${addr}0
+ echo "${addr/$sep2/$(printf ':0%.s' $(seq $[8-sep_cnt])):}"
+}
+
+# Given a single IP(v4/v6) address, whether it is valid.
+function validate_addr()
+{
+ # check function is called with (funcname)6
+ [[ ${FUNCNAME[1]: -1} == 6 ]] && local IP6=6
+ local bitlen=$[ IP6 ? 128 : 32 ]
+ local len=$[ IP6 ? 8 : 4 ]
+ local max=$[ 2**(len*2)-1 ]
+ local net prefix
+ local addr sep
+
+ IFS='/' read net prefix <<< $1
+ [[ $IP6 ]] && net=$(extend_addr6 $net)
+
+ # if prefix exists, check (0 <= $prefix <= $bitlen)
+ if [[ -n $prefix ]]; then
+ if ! (in_between $prefix 0 $bitlen); then
+ err 5 "Invalid prefix: /$prefix"
+ fi
+ fi
+
+ # set separator for each IP(v4/v6)
+ [[ $IP6 ]] && sep=: || sep=.
+ IFS=$sep read -a addr <<< $net
+
+ # array length
+ if [[ ${#addr[@]} != $len ]]; then
+ err 5 "Invalid IP$IP6 address: $1"
+ fi
+
+ # check each digit (0 <= $digit <= $max)
+ for digit in "${addr[@]}"; do
+ [[ $IP6 ]] && digit=$[ 16#$digit ]
+ if ! (in_between $digit 0 $max); then
+ err 5 "Invalid IP$IP6 address: $1"
+ fi
+ done
+
+ return 0
+}
+
+function validate_addr6() { validate_addr $@ ; }
+
+# Given a single IP(v4/v6) or CIDR, return minimum and maximum IP addr.
+function parse_addr()
+{
+ # check function is called with (funcname)6
+ [[ ${FUNCNAME[1]: -1} == 6 ]] && local IP6=6
+ local net prefix
+ local min_ip max_ip
+
+ IFS='/' read net prefix <<< $1
+ [[ $IP6 ]] && net=$(extend_addr6 $net)
+
+ if [[ -z $prefix ]]; then
+ min_ip=$net
+ max_ip=$net
+ else
+ # defining array for converting Decimal 2 Binary
+ # 00000000 00000001 00000010 00000011 00000100 ...
+ local d2b='{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}'
+ [[ $IP6 ]] && d2b+=$d2b
+ eval local D2B=($d2b)
+
+ local bitlen=$[ IP6 ? 128 : 32 ]
+ local remain=$[ bitlen-prefix ]
+ local octet=$[ IP6 ? 16 : 8 ]
+ local min_mask max_mask
+ local min max
+ local ip_bit
+ local ip sep
+
+ # set separator for each IP(v4/v6)
+ [[ $IP6 ]] && sep=: || sep=.
+ IFS=$sep read -ra ip <<< $net
+
+ min_mask="$(printf '1%.s' $(seq $prefix))$(printf '0%.s' $(seq $remain))"
+ max_mask="$(printf '0%.s' $(seq $prefix))$(printf '1%.s' $(seq $remain))"
+
+ # calculate min/max ip with &,| operator
+ for i in "${!ip[@]}"; do
+ digit=$[ IP6 ? 16#${ip[$i]} : ${ip[$i]} ]
+ ip_bit=${D2B[$digit]}
+
+ idx=$[ octet*i ]
+ min[$i]=$[ 2#$ip_bit & 2#${min_mask:$idx:$octet} ]
+ max[$i]=$[ 2#$ip_bit | 2#${max_mask:$idx:$octet} ]
+ [[ $IP6 ]] && { min[$i]=$(printf '%X' ${min[$i]});
+ max[$i]=$(printf '%X' ${max[$i]}); }
+ done
+
+ min_ip=$(IFS=$sep; echo "${min[*]}")
+ max_ip=$(IFS=$sep; echo "${max[*]}")
+ fi
+
+ echo $min_ip $max_ip
+}
+
+function parse_addr6() { parse_addr $@ ; }
+
# Given a single or range of port(s), return minimum and maximum port number.
function parse_ports()
{
@@ -190,9 +321,9 @@
local min_port=$1
local max_port=$2
- # 0 < port < 65536
- if [[ $min_port -gt 0 && $min_port -lt 65536 ]]; then
- if [[ $max_port -gt 0 && $max_port -lt 65536 ]]; then
+ # 1 <= port <= 65535
+ if (in_between $min_port 1 65535); then
+ if (in_between $max_port 1 65535); then
if [[ $min_port -le $max_port ]]; then
return 0
fi
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index a06b00a..ff0ed47 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -8,7 +8,7 @@
echo "Usage: $0 [-vx] -i ethX"
echo " -i : (\$DEV) output interface/device (required)"
echo " -s : (\$PKT_SIZE) packet size"
- echo " -d : (\$DEST_IP) destination IP"
+ echo " -d : (\$DEST_IP) destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed"
echo " -m : (\$DST_MAC) destination MAC-addr"
echo " -p : (\$DST_PORT) destination PORT range (e.g. 433-444) is also allowed"
echo " -t : (\$THREADS) threads to start"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index e14b1a9..1b62041 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -41,9 +41,13 @@
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$BURST" ] && BURST=1024
[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
if [ -n "$DST_PORT" ]; then
- read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
- validate_ports $DST_MIN $DST_MAX
+ read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
# Base Config
@@ -71,13 +75,14 @@
# Destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst$IP6 $DEST_IP"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
pg_set $dev "flag UDPDST_RND"
- pg_set $dev "udp_dst_min $DST_MIN"
- pg_set $dev "udp_dst_max $DST_MAX"
+ pg_set $dev "udp_dst_min $UDP_DST_MIN"
+ pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
# Inject packet into RX path of stack
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index 82c3e50..e607cb3 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -24,9 +24,13 @@
err 1 "Bursting not supported for this mode"
fi
[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
if [ -n "$DST_PORT" ]; then
- read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
- validate_ports $DST_MIN $DST_MAX
+ read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
# Base Config
@@ -54,13 +58,14 @@
# Destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst$IP6 $DEST_IP"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
pg_set $dev "flag UDPDST_RND"
- pg_set $dev "udp_dst_min $DST_MIN"
- pg_set $dev "udp_dst_max $DST_MAX"
+ pg_set $dev "udp_dst_min $UDP_DST_MIN"
+ pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
# Inject packet into TX qdisc egress path of stack
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index d1702fd..a4e250b 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -22,17 +22,21 @@
# Example enforce param "-m" for dst_mac
[ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac"
[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
if [ -n "$DST_PORT" ]; then
- read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
- validate_ports $DST_MIN $DST_MAX
+ read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
# Base Config
DELAY="0" # Zero means max speed
# Flow variation random source port between min and max
-UDP_MIN=9
-UDP_MAX=109
+UDP_SRC_MIN=9
+UDP_SRC_MAX=109
# General cleanup everything since last run
# (especially important if other threads were configured by other scripts)
@@ -61,19 +65,20 @@
# Destination
pg_set $DEV "dst_mac $DST_MAC"
-pg_set $DEV "dst$IP6 $DEST_IP"
+pg_set $DEV "dst${IP6}_min $DST_MIN"
+pg_set $DEV "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
pg_set $DEV "flag UDPDST_RND"
- pg_set $DEV "udp_dst_min $DST_MIN"
- pg_set $DEV "udp_dst_max $DST_MAX"
+ pg_set $DEV "udp_dst_min $UDP_DST_MIN"
+ pg_set $DEV "udp_dst_max $UDP_DST_MAX"
fi
# Setup random UDP port src range
pg_set $DEV "flag UDPSRC_RND"
-pg_set $DEV "udp_src_min $UDP_MIN"
-pg_set $DEV "udp_src_max $UDP_MAX"
+pg_set $DEV "udp_src_min $UDP_SRC_MIN"
+pg_set $DEV "udp_src_max $UDP_SRC_MAX"
# start_run
echo "Running... ctrl^C to stop" >&2
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index 7f7a9a2..cb2495f 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -21,17 +21,21 @@
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
# Flow variation random source port between min and max
-UDP_MIN=9
-UDP_MAX=109
+UDP_SRC_MIN=9
+UDP_SRC_MAX=109
# (example of setting default params in your script)
if [ -z "$DEST_IP" ]; then
[ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+if [ -n "$DEST_IP" ]; then
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
if [ -n "$DST_PORT" ]; then
- read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
- validate_ports $DST_MIN $DST_MAX
+ read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
# General cleanup everything since last run
@@ -62,19 +66,20 @@
# Destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst$IP6 $DEST_IP"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
pg_set $dev "flag UDPDST_RND"
- pg_set $dev "udp_dst_min $DST_MIN"
- pg_set $dev "udp_dst_max $DST_MAX"
+ pg_set $dev "udp_dst_min $UDP_DST_MIN"
+ pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
# Setup random UDP port src range
pg_set $dev "flag UDPSRC_RND"
- pg_set $dev "udp_src_min $UDP_MIN"
- pg_set $dev "udp_src_max $UDP_MAX"
+ pg_set $dev "udp_src_min $UDP_SRC_MIN"
+ pg_set $dev "udp_src_max $UDP_SRC_MAX"
done
# start_run
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index b520637..fff5076 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -33,9 +33,13 @@
[ -z "$BURST" ] && BURST=32
[ -z "$CLONE_SKB" ] && CLONE_SKB="0" # No need for clones when bursting
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
if [ -n "$DST_PORT" ]; then
- read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
- validate_ports $DST_MIN $DST_MAX
+ read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
# Base Config
@@ -62,13 +66,14 @@
# Destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst$IP6 $DEST_IP"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
pg_set $dev "flag UDPDST_RND"
- pg_set $dev "udp_dst_min $DST_MIN"
- pg_set $dev "udp_dst_max $DST_MAX"
+ pg_set $dev "udp_dst_min $UDP_DST_MIN"
+ pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
# Setup burst, for easy testing -b 0 disable bursting
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index 5b6e9d9..9db1ecf 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -13,13 +13,19 @@
# Parameter parsing via include
source ${basedir}/parameters.sh
# Set some default params, if they didn't get set
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+ [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
if [ -n "$DST_PORT" ]; then
- read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
- validate_ports $DST_MIN $DST_MAX
+ read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
# NOTICE: Script specific settings
@@ -37,6 +43,9 @@
err 1 "Bursting not supported for this mode"
fi
+# 198.18.0.0 / 198.19.255.255
+read -r SRC_MIN SRC_MAX <<< $(parse_addr 198.18.0.0/15)
+
# General cleanup everything since last run
pg_ctrl "reset"
@@ -58,19 +67,20 @@
# Single destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst $DEST_IP"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
pg_set $dev "flag UDPDST_RND"
- pg_set $dev "udp_dst_min $DST_MIN"
- pg_set $dev "udp_dst_max $DST_MAX"
+ pg_set $dev "udp_dst_min $UDP_DST_MIN"
+ pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
# Randomize source IP-addresses
pg_set $dev "flag IPSRC_RND"
- pg_set $dev "src_min 198.18.0.0"
- pg_set $dev "src_max 198.19.255.255"
+ pg_set $dev "src_min $SRC_MIN"
+ pg_set $dev "src_max $SRC_MAX"
# Limit number of flows (max 65535)
pg_set $dev "flows $FLOWS"
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 0c06e63..9fc6c6d 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -17,14 +17,20 @@
# Parameter parsing via include
source ${basedir}/parameters.sh
# Set some default params, if they didn't get set
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+ [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
[ -z "$BURST" ] && BURST=32
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
+if [ -n "$DEST_IP" ]; then
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
if [ -n "$DST_PORT" ]; then
- read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
- validate_ports $DST_MIN $DST_MAX
+ read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
# Base Config
@@ -51,13 +57,14 @@
# Single destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst $DEST_IP"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
pg_set $dev "flag UDPDST_RND"
- pg_set $dev "udp_dst_min $DST_MIN"
- pg_set $dev "udp_dst_max $DST_MAX"
+ pg_set $dev "udp_dst_min $UDP_DST_MIN"
+ pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
# Setup source IP-addresses based on thread number
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 97f0266..7281060 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -20,8 +20,8 @@
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
# Flow variation random source port between min and max
-UDP_MIN=9
-UDP_MAX=109
+UDP_SRC_MIN=9
+UDP_SRC_MAX=109
node=`get_iface_node $DEV`
irq_array=(`get_iface_irqs $DEV`)
@@ -35,9 +35,13 @@
[ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+if [ -n "$DEST_IP" ]; then
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
+fi
if [ -n "$DST_PORT" ]; then
- read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
- validate_ports $DST_MIN $DST_MAX
+ read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
# General cleanup everything since last run
@@ -79,19 +83,20 @@
# Destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst$IP6 $DEST_IP"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
pg_set $dev "flag UDPDST_RND"
- pg_set $dev "udp_dst_min $DST_MIN"
- pg_set $dev "udp_dst_max $DST_MAX"
+ pg_set $dev "udp_dst_min $UDP_DST_MIN"
+ pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
# Setup random UDP port src range
pg_set $dev "flag UDPSRC_RND"
- pg_set $dev "udp_src_min $UDP_MIN"
- pg_set $dev "udp_src_max $UDP_MAX"
+ pg_set $dev "udp_src_min $UDP_SRC_MIN"
+ pg_set $dev "udp_src_max $UDP_SRC_MAX"
done
# start_run
diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore
index d1e2e81..4a5a5b7 100644
--- a/samples/seccomp/.gitignore
+++ b/samples/seccomp/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
bpf-direct
bpf-fancy
dropper
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index 009775b..c85ae0e 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -1,44 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-ifndef CROSS_COMPILE
-hostprogs-y := bpf-fancy dropper bpf-direct user-trap
+userprogs-always-y += bpf-fancy dropper bpf-direct user-trap
-HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
-HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include
-HOSTCFLAGS_bpf-helper.o += -I$(objtree)/usr/include
-HOSTCFLAGS_bpf-helper.o += -idirafter $(objtree)/include
bpf-fancy-objs := bpf-fancy.o bpf-helper.o
-HOSTCFLAGS_dropper.o += -I$(objtree)/usr/include
-HOSTCFLAGS_dropper.o += -idirafter $(objtree)/include
-dropper-objs := dropper.o
-
-HOSTCFLAGS_bpf-direct.o += -I$(objtree)/usr/include
-HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
-bpf-direct-objs := bpf-direct.o
-
-HOSTCFLAGS_user-trap.o += -I$(objtree)/usr/include
-HOSTCFLAGS_user-trap.o += -idirafter $(objtree)/include
-user-trap-objs := user-trap.o
-
-# Try to match the kernel target.
-ifndef CONFIG_64BIT
-
-# s390 has -m31 flag to build 31 bit binaries
-ifndef CONFIG_S390
-MFLAG = -m32
-else
-MFLAG = -m31
-endif
-
-HOSTCFLAGS_bpf-direct.o += $(MFLAG)
-HOSTCFLAGS_dropper.o += $(MFLAG)
-HOSTCFLAGS_bpf-helper.o += $(MFLAG)
-HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
-HOSTCFLAGS_user-trap.o += $(MFLAG)
-HOSTLDLIBS_bpf-direct += $(MFLAG)
-HOSTLDLIBS_bpf-fancy += $(MFLAG)
-HOSTLDLIBS_dropper += $(MFLAG)
-HOSTLDLIBS_user-trap += $(MFLAG)
-endif
-always := $(hostprogs-y)
-endif
+userccflags += -I usr/include
diff --git a/samples/timers/.gitignore b/samples/timers/.gitignore
index c5c45d7..40510c3 100644
--- a/samples/timers/.gitignore
+++ b/samples/timers/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
hpet_example
diff --git a/samples/timers/Makefile b/samples/timers/Makefile
index f9fa074..e6836cd 100644
--- a/samples/timers/Makefile
+++ b/samples/timers/Makefile
@@ -1,16 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-ifndef CROSS_COMPILE
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+userprogs-always-y += hpet_example
-ifeq ($(ARCH),x86)
-CC := $(CROSS_COMPILE)gcc
-PROGS := hpet_example
-
-all: $(PROGS)
-
-clean:
- rm -fr $(PROGS)
-
-endif
-endif
+userccflags += -I usr/include
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 80b4a70..13a35f7 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -416,7 +416,7 @@
* Note, TRACE_EVENT() itself is simply defined as:
*
* #define TRACE_EVENT(name, proto, args, tstruct, assign, printk) \
- * DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
+ * DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
* DEFINE_EVENT(name, name, proto, args)
*
* The DEFINE_EVENT() also can be declared with conditions and reg functions:
diff --git a/samples/uhid/.gitignore b/samples/uhid/.gitignore
new file mode 100644
index 0000000..0e0a5a9
--- /dev/null
+++ b/samples/uhid/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/uhid-example
diff --git a/samples/uhid/Makefile b/samples/uhid/Makefile
index 8c9bc9f..0aa424e 100644
--- a/samples/uhid/Makefile
+++ b/samples/uhid/Makefile
@@ -1,8 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-# List of programs to build
-hostprogs-y := uhid-example
+userprogs-always-y += uhid-example
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_uhid-example.o += -I$(objtree)/usr/include
+userccflags += -I usr/include
diff --git a/samples/uhid/uhid-example.c b/samples/uhid/uhid-example.c
index b72d645..015cb06 100644
--- a/samples/uhid/uhid-example.c
+++ b/samples/uhid/uhid-example.c
@@ -165,7 +165,7 @@
fprintf(stderr, "Cannot write to uhid: %m\n");
return -errno;
} else if (ret != sizeof(*ev)) {
- fprintf(stderr, "Wrong size written to uhid: %ld != %lu\n",
+ fprintf(stderr, "Wrong size written to uhid: %zd != %zu\n",
ret, sizeof(ev));
return -EFAULT;
} else {
@@ -236,7 +236,7 @@
fprintf(stderr, "Cannot read uhid-cdev: %m\n");
return -errno;
} else if (ret != sizeof(ev)) {
- fprintf(stderr, "Invalid size read from uhid-dev: %ld != %lu\n",
+ fprintf(stderr, "Invalid size read from uhid-dev: %zd != %zu\n",
ret, sizeof(ev));
return -EFAULT;
}
diff --git a/samples/v4l/v4l2-pci-skeleton.c b/samples/v4l/v4l2-pci-skeleton.c
index f6a551b..3fa6582 100644
--- a/samples/v4l/v4l2-pci-skeleton.c
+++ b/samples/v4l/v4l2-pci-skeleton.c
@@ -879,7 +879,7 @@
vdev->tvnorms = SKEL_TVNORMS;
video_set_drvdata(vdev, skel);
- ret = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
+ ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
if (ret)
goto free_hdl;
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index ac5c8c1..e030689 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -846,7 +846,7 @@
if (sg_alloc_table_from_pages(sg, dmabuf->pages, dmabuf->pagecount,
0, dmabuf->mode.size, GFP_KERNEL) < 0)
goto err2;
- if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction))
+ if (dma_map_sgtable(at->dev, sg, direction, 0))
goto err3;
return sg;
@@ -868,6 +868,7 @@
dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+ dma_unmap_sgtable(at->dev, sg, direction, 0);
sg_free_table(sg);
kfree(sg);
}
@@ -891,26 +892,10 @@
mutex_unlock(&mdev_state->ops_lock);
}
-static void *mbochs_kmap_dmabuf(struct dma_buf *buf, unsigned long page_num)
-{
- struct mbochs_dmabuf *dmabuf = buf->priv;
- struct page *page = dmabuf->pages[page_num];
-
- return kmap(page);
-}
-
-static void mbochs_kunmap_dmabuf(struct dma_buf *buf, unsigned long page_num,
- void *vaddr)
-{
- kunmap(vaddr);
-}
-
static struct dma_buf_ops mbochs_dmabuf_ops = {
.map_dma_buf = mbochs_map_dmabuf,
.unmap_dma_buf = mbochs_unmap_dmabuf,
.release = mbochs_release_dmabuf,
- .map = mbochs_kmap_dmabuf,
- .unmap = mbochs_kunmap_dmabuf,
.mmap = mbochs_mmap_dmabuf,
};
diff --git a/samples/vfio-mdev/mdpy-defs.h b/samples/vfio-mdev/mdpy-defs.h
index eb26421..961c55e 100644
--- a/samples/vfio-mdev/mdpy-defs.h
+++ b/samples/vfio-mdev/mdpy-defs.h
@@ -9,7 +9,7 @@
*/
/* pci ids */
-#define MDPY_PCI_VENDOR_ID 0x1b36 /* redhat */
+#define MDPY_PCI_VENDOR_ID PCI_VENDOR_ID_REDHAT
#define MDPY_PCI_DEVICE_ID 0x000f
#define MDPY_PCI_SUBVENDOR_ID PCI_SUBVENDOR_ID_REDHAT_QUMRANET
#define MDPY_PCI_SUBDEVICE_ID PCI_SUBDEVICE_ID_QEMU
diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
index a760e13..9ec93d9 100644
--- a/samples/vfio-mdev/mdpy-fb.c
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -86,7 +86,7 @@
iounmap(info->screen_base);
}
-static struct fb_ops mdpy_fb_ops = {
+static const struct fb_ops mdpy_fb_ops = {
.owner = THIS_MODULE,
.fb_destroy = mdpy_fb_destroy,
.fb_setcolreg = mdpy_fb_setcolreg,
diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore
index 0806eb0..8fdabf7 100644
--- a/samples/vfs/.gitignore
+++ b/samples/vfs/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
test-fsmount
test-statx
diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
index e21c9f6..6377a67 100644
--- a/samples/vfs/Makefile
+++ b/samples/vfs/Makefile
@@ -1,11 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-# List of programs to build
-hostprogs-y := \
- test-fsmount \
- test-statx
+userprogs-always-y += test-fsmount test-statx
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include
-HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
+userccflags += -I usr/include
diff --git a/samples/vfs/test-statx.c b/samples/vfs/test-statx.c
index 507f09c..49c7a46 100644
--- a/samples/vfs/test-statx.c
+++ b/samples/vfs/test-statx.c
@@ -218,7 +218,7 @@
struct statx stx;
int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW;
- unsigned int mask = STATX_ALL;
+ unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
for (argv++; *argv; argv++) {
if (strcmp(*argv, "-F") == 0) {
diff --git a/samples/watch_queue/.gitignore b/samples/watch_queue/.gitignore
new file mode 100644
index 0000000..2aa3c7e
--- /dev/null
+++ b/samples/watch_queue/.gitignore
@@ -0,0 +1 @@
+watch_test
diff --git a/samples/watch_queue/Makefile b/samples/watch_queue/Makefile
new file mode 100644
index 0000000..c0db3a6
--- /dev/null
+++ b/samples/watch_queue/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+userprogs-always-y += watch_test
+
+userccflags += -I usr/include
diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c
new file mode 100644
index 0000000..8c6cb57
--- /dev/null
+++ b/samples/watch_queue/watch_test.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Use watch_queue API to watch for notifications.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <limits.h>
+#include <linux/watch_queue.h>
+#include <linux/unistd.h>
+#include <linux/keyctl.h>
+
+#ifndef KEYCTL_WATCH_KEY
+#define KEYCTL_WATCH_KEY -1
+#endif
+#ifndef __NR_keyctl
+#define __NR_keyctl -1
+#endif
+
+#define BUF_SIZE 256
+
+static long keyctl_watch_key(int key, int watch_fd, int watch_id)
+{
+ return syscall(__NR_keyctl, KEYCTL_WATCH_KEY, key, watch_fd, watch_id);
+}
+
+static const char *key_subtypes[256] = {
+ [NOTIFY_KEY_INSTANTIATED] = "instantiated",
+ [NOTIFY_KEY_UPDATED] = "updated",
+ [NOTIFY_KEY_LINKED] = "linked",
+ [NOTIFY_KEY_UNLINKED] = "unlinked",
+ [NOTIFY_KEY_CLEARED] = "cleared",
+ [NOTIFY_KEY_REVOKED] = "revoked",
+ [NOTIFY_KEY_INVALIDATED] = "invalidated",
+ [NOTIFY_KEY_SETATTR] = "setattr",
+};
+
+static void saw_key_change(struct watch_notification *n, size_t len)
+{
+ struct key_notification *k = (struct key_notification *)n;
+
+ if (len != sizeof(struct key_notification)) {
+ fprintf(stderr, "Incorrect key message length\n");
+ return;
+ }
+
+ printf("KEY %08x change=%u[%s] aux=%u\n",
+ k->key_id, n->subtype, key_subtypes[n->subtype], k->aux);
+}
+
+/*
+ * Consume and display events.
+ */
+static void consumer(int fd)
+{
+ unsigned char buffer[433], *p, *end;
+ union {
+ struct watch_notification n;
+ unsigned char buf1[128];
+ } n;
+ ssize_t buf_len;
+
+ for (;;) {
+ buf_len = read(fd, buffer, sizeof(buffer));
+ if (buf_len == -1) {
+ perror("read");
+ exit(1);
+ }
+
+ if (buf_len == 0) {
+ printf("-- END --\n");
+ return;
+ }
+
+ if (buf_len > sizeof(buffer)) {
+ fprintf(stderr, "Read buffer overrun: %zd\n", buf_len);
+ return;
+ }
+
+ printf("read() = %zd\n", buf_len);
+
+ p = buffer;
+ end = buffer + buf_len;
+ while (p < end) {
+ size_t largest, len;
+
+ largest = end - p;
+ if (largest > 128)
+ largest = 128;
+ if (largest < sizeof(struct watch_notification)) {
+ fprintf(stderr, "Short message header: %zu\n", largest);
+ return;
+ }
+ memcpy(&n, p, largest);
+
+ printf("NOTIFY[%03zx]: ty=%06x sy=%02x i=%08x\n",
+ p - buffer, n.n.type, n.n.subtype, n.n.info);
+
+ len = n.n.info & WATCH_INFO_LENGTH;
+ if (len < sizeof(n.n) || len > largest) {
+ fprintf(stderr, "Bad message length: %zu/%zu\n", len, largest);
+ exit(1);
+ }
+
+ switch (n.n.type) {
+ case WATCH_TYPE_META:
+ switch (n.n.subtype) {
+ case WATCH_META_REMOVAL_NOTIFICATION:
+ printf("REMOVAL of watchpoint %08x\n",
+ (n.n.info & WATCH_INFO_ID) >>
+ WATCH_INFO_ID__SHIFT);
+ break;
+ case WATCH_META_LOSS_NOTIFICATION:
+ printf("-- LOSS --\n");
+ break;
+ default:
+ printf("other meta record\n");
+ break;
+ }
+ break;
+ case WATCH_TYPE_KEY_NOTIFY:
+ saw_key_change(&n.n, len);
+ break;
+ default:
+ printf("other type\n");
+ break;
+ }
+
+ p += len;
+ }
+ }
+}
+
+static struct watch_notification_filter filter = {
+ .nr_filters = 1,
+ .filters = {
+ [0] = {
+ .type = WATCH_TYPE_KEY_NOTIFY,
+ .subtype_filter[0] = UINT_MAX,
+ },
+ },
+};
+
+int main(int argc, char **argv)
+{
+ int pipefd[2], fd;
+
+ if (pipe2(pipefd, O_NOTIFICATION_PIPE) == -1) {
+ perror("pipe2");
+ exit(1);
+ }
+ fd = pipefd[0];
+
+ if (ioctl(fd, IOC_WATCH_QUEUE_SET_SIZE, BUF_SIZE) == -1) {
+ perror("watch_queue(size)");
+ exit(1);
+ }
+
+ if (ioctl(fd, IOC_WATCH_QUEUE_SET_FILTER, &filter) == -1) {
+ perror("watch_queue(filter)");
+ exit(1);
+ }
+
+ if (keyctl_watch_key(KEY_SPEC_SESSION_KEYRING, fd, 0x01) == -1) {
+ perror("keyctl");
+ exit(1);
+ }
+
+ if (keyctl_watch_key(KEY_SPEC_USER_KEYRING, fd, 0x02) == -1) {
+ perror("keyctl");
+ exit(1);
+ }
+
+ consumer(fd);
+ exit(0);
+}
diff --git a/samples/watchdog/.gitignore b/samples/watchdog/.gitignore
index ff0ebb5..74153b8 100644
--- a/samples/watchdog/.gitignore
+++ b/samples/watchdog/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
watchdog-simple
diff --git a/samples/watchdog/Makefile b/samples/watchdog/Makefile
index a9430fa..ab39d23 100644
--- a/samples/watchdog/Makefile
+++ b/samples/watchdog/Makefile
@@ -1,9 +1,2 @@
# SPDX-License-Identifier: GPL-2.0
-CC := $(CROSS_COMPILE)gcc
-PROGS := watchdog-simple
-
-all: $(PROGS)
-
-clean:
- rm -fr $(PROGS)
-
+userprogs-always-y += watchdog-simple