Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 74d31fd..b2f29bc 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
cpustat
fds_example
hbm
@@ -49,3 +50,5 @@
xdp_sample_pkts
xdp_tx_iptunnel
xdpsock
+xsk_fwd
+testfile.img
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6d1df71..aeebf5d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -4,55 +4,56 @@
TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools
# List of programs to build
-hostprogs-y := test_lru_dist
-hostprogs-y += sock_example
-hostprogs-y += fds_example
-hostprogs-y += sockex1
-hostprogs-y += sockex2
-hostprogs-y += sockex3
-hostprogs-y += tracex1
-hostprogs-y += tracex2
-hostprogs-y += tracex3
-hostprogs-y += tracex4
-hostprogs-y += tracex5
-hostprogs-y += tracex6
-hostprogs-y += tracex7
-hostprogs-y += test_probe_write_user
-hostprogs-y += trace_output
-hostprogs-y += lathist
-hostprogs-y += offwaketime
-hostprogs-y += spintest
-hostprogs-y += map_perf_test
-hostprogs-y += test_overhead
-hostprogs-y += test_cgrp2_array_pin
-hostprogs-y += test_cgrp2_attach
-hostprogs-y += test_cgrp2_sock
-hostprogs-y += test_cgrp2_sock2
-hostprogs-y += xdp1
-hostprogs-y += xdp2
-hostprogs-y += xdp_router_ipv4
-hostprogs-y += test_current_task_under_cgroup
-hostprogs-y += trace_event
-hostprogs-y += sampleip
-hostprogs-y += tc_l2_redirect
-hostprogs-y += lwt_len_hist
-hostprogs-y += xdp_tx_iptunnel
-hostprogs-y += test_map_in_map
-hostprogs-y += per_socket_stats_example
-hostprogs-y += xdp_redirect
-hostprogs-y += xdp_redirect_map
-hostprogs-y += xdp_redirect_cpu
-hostprogs-y += xdp_monitor
-hostprogs-y += xdp_rxq_info
-hostprogs-y += syscall_tp
-hostprogs-y += cpustat
-hostprogs-y += xdp_adjust_tail
-hostprogs-y += xdpsock
-hostprogs-y += xdp_fwd
-hostprogs-y += task_fd_query
-hostprogs-y += xdp_sample_pkts
-hostprogs-y += ibumad
-hostprogs-y += hbm
+tprogs-y := test_lru_dist
+tprogs-y += sock_example
+tprogs-y += fds_example
+tprogs-y += sockex1
+tprogs-y += sockex2
+tprogs-y += sockex3
+tprogs-y += tracex1
+tprogs-y += tracex2
+tprogs-y += tracex3
+tprogs-y += tracex4
+tprogs-y += tracex5
+tprogs-y += tracex6
+tprogs-y += tracex7
+tprogs-y += test_probe_write_user
+tprogs-y += trace_output
+tprogs-y += lathist
+tprogs-y += offwaketime
+tprogs-y += spintest
+tprogs-y += map_perf_test
+tprogs-y += test_overhead
+tprogs-y += test_cgrp2_array_pin
+tprogs-y += test_cgrp2_attach
+tprogs-y += test_cgrp2_sock
+tprogs-y += test_cgrp2_sock2
+tprogs-y += xdp1
+tprogs-y += xdp2
+tprogs-y += xdp_router_ipv4
+tprogs-y += test_current_task_under_cgroup
+tprogs-y += trace_event
+tprogs-y += sampleip
+tprogs-y += tc_l2_redirect
+tprogs-y += lwt_len_hist
+tprogs-y += xdp_tx_iptunnel
+tprogs-y += test_map_in_map
+tprogs-y += per_socket_stats_example
+tprogs-y += xdp_redirect
+tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect_cpu
+tprogs-y += xdp_monitor
+tprogs-y += xdp_rxq_info
+tprogs-y += syscall_tp
+tprogs-y += cpustat
+tprogs-y += xdp_adjust_tail
+tprogs-y += xdpsock
+tprogs-y += xsk_fwd
+tprogs-y += xdp_fwd
+tprogs-y += task_fd_query
+tprogs-y += xdp_sample_pkts
+tprogs-y += ibumad
+tprogs-y += hbm
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -63,20 +64,20 @@
fds_example-objs := fds_example.o
sockex1-objs := sockex1_user.o
sockex2-objs := sockex2_user.o
-sockex3-objs := bpf_load.o sockex3_user.o
-tracex1-objs := bpf_load.o tracex1_user.o
-tracex2-objs := bpf_load.o tracex2_user.o
-tracex3-objs := bpf_load.o tracex3_user.o
-tracex4-objs := bpf_load.o tracex4_user.o
-tracex5-objs := bpf_load.o tracex5_user.o
-tracex6-objs := bpf_load.o tracex6_user.o
-tracex7-objs := bpf_load.o tracex7_user.o
-test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
-trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
-lathist-objs := bpf_load.o lathist_user.o
-offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS)
-spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS)
-map_perf_test-objs := bpf_load.o map_perf_test_user.o
+sockex3-objs := sockex3_user.o
+tracex1-objs := tracex1_user.o $(TRACE_HELPERS)
+tracex2-objs := tracex2_user.o
+tracex3-objs := tracex3_user.o
+tracex4-objs := tracex4_user.o
+tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
+tracex6-objs := tracex6_user.o
+tracex7-objs := tracex7_user.o
+test_probe_write_user-objs := test_probe_write_user_user.o
+trace_output-objs := trace_output_user.o $(TRACE_HELPERS)
+lathist-objs := lathist_user.o
+offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS)
+spintest-objs := spintest_user.o $(TRACE_HELPERS)
+map_perf_test-objs := map_perf_test_user.o
test_overhead-objs := bpf_load.o test_overhead_user.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
test_cgrp2_attach-objs := test_cgrp2_attach.o
@@ -86,24 +87,25 @@
# reuse xdp1 source intentionally
xdp2-objs := xdp1_user.o
xdp_router_ipv4-objs := xdp_router_ipv4_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \
+test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
-trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS)
-sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS)
+trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
+sampleip-objs := sampleip_user.o $(TRACE_HELPERS)
tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o
lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o
xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
-test_map_in_map-objs := bpf_load.o test_map_in_map_user.o
+test_map_in_map-objs := test_map_in_map_user.o
per_socket_stats_example-objs := cookie_uid_helper_example.o
xdp_redirect-objs := xdp_redirect_user.o
xdp_redirect_map-objs := xdp_redirect_map_user.o
-xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
-xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
+xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
+xdp_monitor-objs := xdp_monitor_user.o
xdp_rxq_info-objs := xdp_rxq_info_user.o
-syscall_tp-objs := bpf_load.o syscall_tp_user.o
-cpustat-objs := bpf_load.o cpustat_user.o
+syscall_tp-objs := syscall_tp_user.o
+cpustat-objs := cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := xdpsock_user.o
+xsk_fwd-objs := xsk_fwd.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
@@ -111,100 +113,119 @@
hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-always += sockex1_kern.o
-always += sockex2_kern.o
-always += sockex3_kern.o
-always += tracex1_kern.o
-always += tracex2_kern.o
-always += tracex3_kern.o
-always += tracex4_kern.o
-always += tracex5_kern.o
-always += tracex6_kern.o
-always += tracex7_kern.o
-always += sock_flags_kern.o
-always += test_probe_write_user_kern.o
-always += trace_output_kern.o
-always += tcbpf1_kern.o
-always += tc_l2_redirect_kern.o
-always += lathist_kern.o
-always += offwaketime_kern.o
-always += spintest_kern.o
-always += map_perf_test_kern.o
-always += test_overhead_tp_kern.o
-always += test_overhead_raw_tp_kern.o
-always += test_overhead_kprobe_kern.o
-always += parse_varlen.o parse_simple.o parse_ldabs.o
-always += test_cgrp2_tc_kern.o
-always += xdp1_kern.o
-always += xdp2_kern.o
-always += xdp_router_ipv4_kern.o
-always += test_current_task_under_cgroup_kern.o
-always += trace_event_kern.o
-always += sampleip_kern.o
-always += lwt_len_hist_kern.o
-always += xdp_tx_iptunnel_kern.o
-always += test_map_in_map_kern.o
-always += cookie_uid_helper_example.o
-always += tcp_synrto_kern.o
-always += tcp_rwnd_kern.o
-always += tcp_bufs_kern.o
-always += tcp_cong_kern.o
-always += tcp_iw_kern.o
-always += tcp_clamp_kern.o
-always += tcp_basertt_kern.o
-always += tcp_tos_reflect_kern.o
-always += tcp_dumpstats_kern.o
-always += xdp_redirect_kern.o
-always += xdp_redirect_map_kern.o
-always += xdp_redirect_cpu_kern.o
-always += xdp_monitor_kern.o
-always += xdp_rxq_info_kern.o
-always += xdp2skb_meta_kern.o
-always += syscall_tp_kern.o
-always += cpustat_kern.o
-always += xdp_adjust_tail_kern.o
-always += xdp_fwd_kern.o
-always += task_fd_query_kern.o
-always += xdp_sample_pkts_kern.o
-always += ibumad_kern.o
-always += hbm_out_kern.o
-always += hbm_edt_kern.o
+always-y := $(tprogs-y)
+always-y += sockex1_kern.o
+always-y += sockex2_kern.o
+always-y += sockex3_kern.o
+always-y += tracex1_kern.o
+always-y += tracex2_kern.o
+always-y += tracex3_kern.o
+always-y += tracex4_kern.o
+always-y += tracex5_kern.o
+always-y += tracex6_kern.o
+always-y += tracex7_kern.o
+always-y += sock_flags_kern.o
+always-y += test_probe_write_user_kern.o
+always-y += trace_output_kern.o
+always-y += tcbpf1_kern.o
+always-y += tc_l2_redirect_kern.o
+always-y += lathist_kern.o
+always-y += offwaketime_kern.o
+always-y += spintest_kern.o
+always-y += map_perf_test_kern.o
+always-y += test_overhead_tp_kern.o
+always-y += test_overhead_raw_tp_kern.o
+always-y += test_overhead_kprobe_kern.o
+always-y += parse_varlen.o parse_simple.o parse_ldabs.o
+always-y += test_cgrp2_tc_kern.o
+always-y += xdp1_kern.o
+always-y += xdp2_kern.o
+always-y += xdp_router_ipv4_kern.o
+always-y += test_current_task_under_cgroup_kern.o
+always-y += trace_event_kern.o
+always-y += sampleip_kern.o
+always-y += lwt_len_hist_kern.o
+always-y += xdp_tx_iptunnel_kern.o
+always-y += test_map_in_map_kern.o
+always-y += tcp_synrto_kern.o
+always-y += tcp_rwnd_kern.o
+always-y += tcp_bufs_kern.o
+always-y += tcp_cong_kern.o
+always-y += tcp_iw_kern.o
+always-y += tcp_clamp_kern.o
+always-y += tcp_basertt_kern.o
+always-y += tcp_tos_reflect_kern.o
+always-y += tcp_dumpstats_kern.o
+always-y += xdp_redirect_kern.o
+always-y += xdp_redirect_map_kern.o
+always-y += xdp_redirect_cpu_kern.o
+always-y += xdp_monitor_kern.o
+always-y += xdp_rxq_info_kern.o
+always-y += xdp2skb_meta_kern.o
+always-y += syscall_tp_kern.o
+always-y += cpustat_kern.o
+always-y += xdp_adjust_tail_kern.o
+always-y += xdp_fwd_kern.o
+always-y += task_fd_query_kern.o
+always-y += xdp_sample_pkts_kern.o
+always-y += ibumad_kern.o
+always-y += hbm_out_kern.o
+always-y += hbm_edt_kern.o
+always-y += xdpsock_kern.o
-KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf
-KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0
+ifeq ($(ARCH), arm)
+# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
+# headers when arm instruction set identification is requested.
+ARM_ARCH_SELECTOR := $(filter -D__LINUX_ARM_ARCH__%, $(KBUILD_CFLAGS))
+BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR)
+TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR)
+endif
-HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+TPROGS_CFLAGS += -Wall -O2
+TPROGS_CFLAGS += -Wmissing-prototypes
+TPROGS_CFLAGS += -Wstrict-prototypes
-KBUILD_HOSTLDLIBS += $(LIBBPF) -lelf
-HOSTLDLIBS_tracex4 += -lrt
-HOSTLDLIBS_trace_output += -lrt
-HOSTLDLIBS_map_perf_test += -lrt
-HOSTLDLIBS_test_overhead += -lrt
-HOSTLDLIBS_xdpsock += -pthread
+TPROGS_CFLAGS += -I$(objtree)/usr/include
+TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
+TPROGS_CFLAGS += -I$(srctree)/tools/lib/
+TPROGS_CFLAGS += -I$(srctree)/tools/include
+TPROGS_CFLAGS += -I$(srctree)/tools/perf
+TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
+
+ifdef SYSROOT
+TPROGS_CFLAGS += --sysroot=$(SYSROOT)
+TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
+endif
+
+TPROGCFLAGS_bpf_load.o += -Wno-unused-variable
+
+TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
+TPROGLDLIBS_tracex4 += -lrt
+TPROGLDLIBS_trace_output += -lrt
+TPROGLDLIBS_map_perf_test += -lrt
+TPROGLDLIBS_test_overhead += -lrt
+TPROGLDLIBS_xdpsock += -pthread
+TPROGLDLIBS_xsk_fwd += -pthread
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
-# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+# make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
+OPT ?= opt
+LLVM_DIS ?= llvm-dis
LLVM_OBJCOPY ?= llvm-objcopy
BTF_PAHOLE ?= pahole
# Detect that we're cross compiling and use the cross compiler
ifdef CROSS_COMPILE
-HOSTCC = $(CROSS_COMPILE)gcc
-CLANG_ARCH_ARGS = -target $(ARCH)
+CLANG_ARCH_ARGS = --target=$(notdir $(CROSS_COMPILE:%-=%))
endif
# Don't evaluate probes and warnings if we need to run make recursively
ifneq ($(src),)
-HDR_PROBE := $(shell echo "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
- $(HOSTCC) $(KBUILD_HOSTCFLAGS) -x c - -o /dev/null 2>/dev/null && \
- echo okay)
+HDR_PROBE := $(shell printf "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
+ $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \
+ -o /dev/null 2>/dev/null && echo okay)
ifeq ($(HDR_PROBE),)
$(warning WARNING: Detected possible issues with include path.)
@@ -221,10 +242,10 @@
BPF_EXTRA_CFLAGS += -fno-stack-protector
ifneq ($(BTF_LLVM_PROBE),)
- EXTRA_CFLAGS += -g
+ BPF_EXTRA_CFLAGS += -g
else
ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
- EXTRA_CFLAGS += -g
+ BPF_EXTRA_CFLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
endif
@@ -233,7 +254,7 @@
# Trick to allow make to be run from this directory
all:
- $(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR)
+ $(MAKE) -C ../../ M=$(CURDIR) BPF_SAMPLES_PATH=$(CURDIR)
clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
@@ -241,7 +262,8 @@
$(LIBBPF): FORCE
# Fix up variables inherited from Kbuild that tools/ build system won't like
- $(MAKE) -C $(dir $@) RM='rm -rf' LDFLAGS= srctree=$(BPF_SAMPLES_PATH)/../../ O=
+ $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
+ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O=
$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
$(call filechk,offsets,__SYSCALL_NRS_H__)
@@ -278,20 +300,30 @@
$(obj)/hbm.o: $(src)/hbm.h
$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
+-include $(BPF_SAMPLES_PATH)/Makefile.target
+
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
+# below we use long chain of commands, clang | opt | llvm-dis | llc,
+# to generate final object file. 'clang' compiles the source into IR
+# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin
+# processing (llvm12) and IR optimizations. 'llvm-dis' converts
+# 'opt' output to IR, and finally 'llc' generates bpf byte code.
$(obj)/%.o: $(src)/%.c
@echo " CLANG-bpf " $@
- $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
- -I$(srctree)/tools/testing/selftests/bpf/ \
+ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
+ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
+ -I$(srctree)/tools/lib/ \
-D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
-D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
+ -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
+ $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
+ $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif
diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target
new file mode 100644
index 0000000..7621f55
--- /dev/null
+++ b/samples/bpf/Makefile.target
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+# ==========================================================================
+# Building binaries on the host system
+# Binaries are not used during the compilation of the kernel, and intended
+# to be build for target board, target board can be host of course. Added to
+# build binaries to run not on host system.
+#
+# Sample syntax
+# tprogs-y := xsk_example
+# Will compile xsk_example.c and create an executable named xsk_example
+#
+# tprogs-y := xdpsock
+# xdpsock-objs := xdpsock_1.o xdpsock_2.o
+# Will compile xdpsock_1.c and xdpsock_2.c, and then link the executable
+# xdpsock, based on xdpsock_1.o and xdpsock_2.o
+#
+# Derived from scripts/Makefile.host
+#
+__tprogs := $(sort $(tprogs-y))
+
+# C code
+# Executables compiled from a single .c file
+tprog-csingle := $(foreach m,$(__tprogs), \
+ $(if $($(m)-objs),,$(m)))
+
+# C executables linked based on several .o files
+tprog-cmulti := $(foreach m,$(__tprogs),\
+ $(if $($(m)-objs),$(m)))
+
+# Object (.o) files compiled from .c files
+tprog-cobjs := $(sort $(foreach m,$(__tprogs),$($(m)-objs)))
+
+tprog-csingle := $(addprefix $(obj)/,$(tprog-csingle))
+tprog-cmulti := $(addprefix $(obj)/,$(tprog-cmulti))
+tprog-cobjs := $(addprefix $(obj)/,$(tprog-cobjs))
+
+#####
+# Handle options to gcc. Support building with separate output directory
+
+_tprogc_flags = $(TPROGS_CFLAGS) \
+ $(TPROGCFLAGS_$(basetarget).o)
+
+# $(objtree)/$(obj) for including generated headers from checkin source files
+ifeq ($(KBUILD_EXTMOD),)
+ifdef building_out_of_srctree
+_tprogc_flags += -I $(objtree)/$(obj)
+endif
+endif
+
+tprogc_flags = -Wp,-MD,$(depfile) $(_tprogc_flags)
+
+# Create executable from a single .c file
+# tprog-csingle -> Executable
+quiet_cmd_tprog-csingle = CC $@
+ cmd_tprog-csingle = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ $< \
+ $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-csingle): $(obj)/%: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-csingle)
+
+# Link an executable based on list of .o files, all plain c
+# tprog-cmulti -> executable
+quiet_cmd_tprog-cmulti = LD $@
+ cmd_tprog-cmulti = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ \
+ $(addprefix $(obj)/,$($(@F)-objs)) \
+ $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-cmulti): $(tprog-cobjs) FORCE
+ $(call if_changed,tprog-cmulti)
+$(call multi_depend, $(tprog-cmulti), , -objs)
+
+# Create .o file from a single .c file
+# tprog-cobjs -> .o
+quiet_cmd_tprog-cobjs = CC $@
+ cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $<
+$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-cobjs)
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index 5f27e4f..dd34b2d 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -14,6 +14,20 @@
Note that LLVM's tool 'llc' must support target 'bpf', list version
and supported targets with command: ``llc --version``
+Clean and configuration
+-----------------------
+
+It can be needed to clean tools, samples or kernel before trying new arch or
+after some changes (on demand)::
+
+ make -C tools clean
+ make -C samples/bpf clean
+ make clean
+
+Configure kernel, defconfig for instance::
+
+ make defconfig
+
Kernel headers
--------------
@@ -32,12 +46,10 @@
For building the BPF samples, issue the below command from the kernel
top level directory::
- make samples/bpf/
-
-Do notice the "/" slash after the directory name.
+ make M=samples/bpf
It is also possible to call make from this directory. This will just
-hide the the invocation of make as above with the appended "/".
+hide the invocation of make as above.
Manually compiling LLVM with 'bpf' support
------------------------------------------
@@ -63,14 +75,31 @@
It is also possible to point make to the newly compiled 'llc' or
'clang' command via redefining LLC or CLANG on the make command line::
- make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+ make M=samples/bpf LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
Cross compiling samples
-----------------------
In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH
-environment variables before calling make. This will direct make to build
-samples for the cross target.
+environment variables before calling make. But do this before clean,
+cofiguration and header install steps described above. This will direct make to
+build samples for the cross target::
-export ARCH=arm64
-export CROSS_COMPILE="aarch64-linux-gnu-"
-make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+ export ARCH=arm64
+ export CROSS_COMPILE="aarch64-linux-gnu-"
+
+Headers can be also installed on RFS of target board if need to keep them in
+sync (not necessarily and it creates a local "usr/include" directory also)::
+
+ make INSTALL_HDR_PATH=~/some_sysroot/usr headers_install
+
+Pointing LLC and CLANG is not necessarily if it's installed on HOST and have
+in its targets appropriate arm64 arch (usually it has several arches).
+Build samples::
+
+ make M=samples/bpf
+
+Or build samples with SYSROOT if some header or library is absent in toolchain,
+say libelf, providing address to file system containing headers and libs,
+can be RFS of target board::
+
+ make M=samples/bpf SYSROOT=~/some_sysroot
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 4574b19..c5ad528 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -665,23 +665,3 @@
{
return do_load_bpf_file(path, fixup_map);
}
-
-void read_trace_pipe(void)
-{
- int trace_fd;
-
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
- if (trace_fd < 0)
- return;
-
- while (1) {
- static char buf[4096];
- ssize_t sz;
-
- sz = read(trace_fd, buf, sizeof(buf) - 1);
- if (sz > 0) {
- buf[sz] = 0;
- puts(buf);
- }
- }
-}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 814894a..4fcd258 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -53,6 +53,5 @@
int load_bpf_file(char *path);
int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
-void read_trace_pipe(void);
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
#endif
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
index 68c84da..5aefd19 100644
--- a/samples/bpf/cpustat_kern.c
+++ b/samples/bpf/cpustat_kern.c
@@ -3,7 +3,7 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/*
* The CPU number, cstate number and pstate number are based
@@ -51,28 +51,28 @@
#define MAP_OFF_PSTATE_IDX 3
#define MAP_OFF_NUM 4
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAP_OFF_NUM,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAP_OFF_NUM);
+} my_map SEC(".maps");
/* cstate_duration records duration time for every idle state per CPU */
-struct bpf_map_def SEC("maps") cstate_duration = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
+} cstate_duration SEC(".maps");
/* pstate_duration records duration time for every operating point per CPU */
-struct bpf_map_def SEC("maps") pstate_duration = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
+} pstate_duration SEC(".maps");
/*
* The trace events for cpu_idle and cpu_frequency are taken from:
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
index 869a994..9667598 100644
--- a/samples/bpf/cpustat_user.c
+++ b/samples/bpf/cpustat_user.c
@@ -9,7 +9,6 @@
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
-#include <linux/bpf.h>
#include <locale.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -18,7 +17,9 @@
#include <sys/wait.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+
+static int cstate_map_fd, pstate_map_fd;
#define MAX_CPU 8
#define MAX_PSTATE_ENTRIES 5
@@ -181,21 +182,50 @@
{
cpu_stat_inject_cpu_idle_event();
cpu_stat_inject_cpu_frequency_event();
- cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
cpu_stat_print();
exit(0);
}
int main(int argc, char **argv)
{
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration");
+ pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration");
+ if (cstate_map_fd < 0 || pstate_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
ret = cpu_stat_inject_cpu_idle_event();
@@ -210,10 +240,13 @@
signal(SIGTERM, int_exit);
while (1) {
- cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
cpu_stat_print();
sleep(5);
}
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index 34b3fca..59f45fe 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c
@@ -14,7 +14,7 @@
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "bpf_insn.h"
#include "sock_example.h"
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index e0fbab9..ff4c533 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -40,6 +40,7 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/unistd.h>
+#include <linux/compiler.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
@@ -50,8 +51,8 @@
#include "cgroup_helpers.h"
#include "hbm.h"
#include "bpf_util.h"
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
bool outFlag = true;
int minRate = 1000; /* cgroup rate limit in Mbps */
@@ -147,7 +148,7 @@
}
if (ret) {
- printf("ERROR: load_bpf_file failed for: %s\n", prog);
+ printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog);
printf(" Output from verifier:\n%s\n------\n", bpf_log_buf);
ret = -1;
} else {
@@ -483,7 +484,7 @@
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
- // fallthrough
+ __fallthrough;
default:
Usage();
return 0;
diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
index aa207a2..e00f26f 100644
--- a/samples/bpf/hbm_kern.h
+++ b/samples/bpf/hbm_kern.h
@@ -22,8 +22,8 @@
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
#include <net/inet_ecn.h>
-#include "bpf_endian.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
#include "hbm.h"
#define DROP_PKT 0
@@ -59,21 +59,18 @@
#define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
#define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
-struct bpf_map_def SEC("maps") queue_state = {
- .type = BPF_MAP_TYPE_CGROUP_STORAGE,
- .key_size = sizeof(struct bpf_cgroup_storage_key),
- .value_size = sizeof(struct hbm_vqueue),
-};
-BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key,
- struct hbm_vqueue);
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct hbm_vqueue);
+} queue_state SEC(".maps");
-struct bpf_map_def SEC("maps") queue_stats = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct hbm_queue_stats),
- .max_entries = 1,
-};
-BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, struct hvm_queue_stats);
+} queue_stats SEC(".maps");
struct hbm_pkt_info {
int cwnd;
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
index f281df7..3a91b4c 100644
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -13,7 +13,7 @@
#define KBUILD_MODNAME "ibumad_count_pkts_by_class"
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct bpf_map_def SEC("maps") read_count = {
diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c
index cb5a8f9..fa06eef 100644
--- a/samples/bpf/ibumad_user.c
+++ b/samples/bpf/ibumad_user.c
@@ -25,7 +25,7 @@
#include "bpf_load.h"
#include "bpf_util.h"
-#include "libbpf.h"
+#include <bpf/libbpf.h>
static void dump_counts(int fd)
{
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
index 18fa088..4adfcbb 100644
--- a/samples/bpf/lathist_kern.c
+++ b/samples/bpf/lathist_kern.c
@@ -8,7 +8,7 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define MAX_ENTRIES 20
#define MAX_CPU 4
@@ -18,12 +18,12 @@
* trace_preempt_[on|off] tracepoints hooks is not supported.
*/
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU);
+} my_map SEC(".maps");
SEC("kprobe/trace_preempt_off")
int bpf_prog1(struct pt_regs *ctx)
@@ -61,12 +61,12 @@
return log2(v);
}
-struct bpf_map_def SEC("maps") my_lat = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(long),
- .max_entries = MAX_CPU * MAX_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, long);
+ __uint(max_entries, MAX_CPU * MAX_ENTRIES);
+} my_lat SEC(".maps");
SEC("kprobe/trace_preempt_on")
int bpf_prog2(struct pt_regs *ctx)
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
index 2ff2839..7d8ff24 100644
--- a/samples/bpf/lathist_user.c
+++ b/samples/bpf/lathist_user.c
@@ -6,9 +6,8 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
#define MAX_ENTRIES 20
#define MAX_CPU 4
@@ -81,20 +80,51 @@
int main(int argc, char **argv)
{
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd, i = 0;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_lat");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
while (1) {
- get_data(map_fd[1]);
+ get_data(map_fd);
print_hist();
sleep(5);
}
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist_kern.c
index df75383..9ed63e1 100644
--- a/samples/bpf/lwt_len_hist_kern.c
+++ b/samples/bpf/lwt_len_hist_kern.c
@@ -14,7 +14,7 @@
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/in.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
# define printk(fmt, ...) \
({ \
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 2b2ffb9..8773f22 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -8,94 +8,101 @@
#include <linux/netdevice.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
#define MAX_ENTRIES 1000
#define MAX_NR_CPUS 1024
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 10000);
+} lru_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 10000);
+ __uint(map_flags, BPF_F_NO_COMMON_LRU);
+} nocommon_lru_hash_map SEC(".maps");
+
+struct inner_lru {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NUMA_NODE);
+ __uint(numa_node, 0);
+} inner_lru_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_NR_CPUS);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_lru); /* use inner_lru as inner map */
+} array_of_lru_hashs SEC(".maps") = {
+ /* statically initialize the first element */
+ .values = { &inner_lru_hash_map },
};
-struct bpf_map_def SEC("maps") lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, MAX_ENTRIES);
+} percpu_hash_map SEC(".maps");
-struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 10000,
- .map_flags = BPF_F_NO_COMMON_LRU,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_map_alloc SEC(".maps");
-struct bpf_map_def SEC("maps") inner_lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NUMA_NODE,
- .numa_node = 0,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} percpu_hash_map_alloc SEC(".maps");
-struct bpf_map_def SEC("maps") array_of_lru_hashs = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(u32),
- .max_entries = MAX_NR_CPUS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(key_size, 8);
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 10000);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} lpm_trie_map_alloc SEC(".maps");
-struct bpf_map_def SEC("maps") percpu_hash_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} array_map SEC(".maps");
-struct bpf_map_def SEC("maps") hash_map_alloc = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NO_PREALLOC,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} lru_hash_lookup_map SEC(".maps");
-struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
- .type = BPF_MAP_TYPE_LPM_TRIE,
- .key_size = 8,
- .value_size = sizeof(long),
- .max_entries = 10000,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def SEC("maps") array_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-struct bpf_map_def SEC("maps") lru_hash_lookup_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-SEC("kprobe/sys_getuid")
+SEC("kprobe/" SYSCALL(sys_getuid))
int stress_hmap(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
@@ -110,7 +117,7 @@
return 0;
}
-SEC("kprobe/sys_geteuid")
+SEC("kprobe/" SYSCALL(sys_geteuid))
int stress_percpu_hmap(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
@@ -124,7 +131,7 @@
return 0;
}
-SEC("kprobe/sys_getgid")
+SEC("kprobe/" SYSCALL(sys_getgid))
int stress_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
@@ -138,7 +145,7 @@
return 0;
}
-SEC("kprobe/sys_getegid")
+SEC("kprobe/" SYSCALL(sys_getegid))
int stress_percpu_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
@@ -152,9 +159,10 @@
return 0;
}
-SEC("kprobe/sys_connect")
+SEC("kprobe/" SYSCALL(sys_connect))
int stress_lru_hmap_alloc(struct pt_regs *ctx)
{
+ struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn";
union {
u16 dst6[8];
@@ -173,14 +181,14 @@
long val = 1;
u32 key = 0;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
- addrlen = (int)PT_REGS_PARM3(ctx);
+ in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs);
+ addrlen = (int)PT_REGS_PARM3_CORE(real_regs);
if (addrlen != sizeof(*in6))
return 0;
- ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
- &in6->sin6_addr);
+ ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
+ &in6->sin6_addr);
if (ret)
goto done;
@@ -231,7 +239,7 @@
return 0;
}
-SEC("kprobe/sys_gettid")
+SEC("kprobe/" SYSCALL(sys_gettid))
int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
{
union {
@@ -253,7 +261,7 @@
return 0;
}
-SEC("kprobe/sys_getpgid")
+SEC("kprobe/" SYSCALL(sys_getpgid))
int stress_hash_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
@@ -266,7 +274,7 @@
return 0;
}
-SEC("kprobe/sys_getppid")
+SEC("kprobe/" SYSCALL(sys_getppid))
int stress_array_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index fe5564b..8b13230 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -11,7 +11,6 @@
#include <sys/wait.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <time.h>
#include <sys/resource.h>
@@ -19,7 +18,7 @@
#include <errno.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#define TEST_BIT(t) (1U << (t))
#define MAX_NR_CPUS 1024
@@ -61,12 +60,18 @@
[LRU_HASH_LOOKUP] = "lru_hash_lookup_map",
};
+enum map_idx {
+ array_of_lru_hashs_idx,
+ hash_map_alloc_idx,
+ lru_hash_lookup_idx,
+ NR_IDXES,
+};
+
+static int map_fd[NR_IDXES];
+
static int test_flags = ~0;
static uint32_t num_map_entries;
static uint32_t inner_lru_hash_size;
-static int inner_lru_hash_idx = -1;
-static int array_of_lru_hashs_idx = -1;
-static int lru_hash_lookup_idx = -1;
static int lru_hash_lookup_test_entries = 32;
static uint32_t max_cnt = 1000000;
@@ -122,30 +127,30 @@
__u64 start_time;
int i, ret;
- if (test == INNER_LRU_HASH_PREALLOC) {
+ if (test == INNER_LRU_HASH_PREALLOC && cpu) {
+ /* If CPU is not 0, create inner_lru hash map and insert the fd
+ * value into the array_of_lru_hash map. In case of CPU 0,
+ * 'inner_lru_hash_map' was statically inserted on the map init
+ */
int outer_fd = map_fd[array_of_lru_hashs_idx];
unsigned int mycpu, mynode;
assert(cpu < MAX_NR_CPUS);
- if (cpu) {
- ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL);
- assert(!ret);
+ ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL);
+ assert(!ret);
- inner_lru_map_fds[cpu] =
- bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH,
- test_map_names[INNER_LRU_HASH_PREALLOC],
- sizeof(uint32_t),
- sizeof(long),
- inner_lru_hash_size, 0,
- mynode);
- if (inner_lru_map_fds[cpu] == -1) {
- printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
- strerror(errno), errno);
- exit(1);
- }
- } else {
- inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx];
+ inner_lru_map_fds[cpu] =
+ bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH,
+ test_map_names[INNER_LRU_HASH_PREALLOC],
+ sizeof(uint32_t),
+ sizeof(long),
+ inner_lru_hash_size, 0,
+ mynode);
+ if (inner_lru_map_fds[cpu] == -1) {
+ printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
+ strerror(errno), errno);
+ exit(1);
}
ret = bpf_map_update_elem(outer_fd, &cpu,
@@ -377,7 +382,8 @@
key->data[1] = rand() & 0xff;
key->data[2] = rand() & 0xff;
key->data[3] = rand() & 0xff;
- r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ r = bpf_map_update_elem(map_fd[hash_map_alloc_idx],
+ key, &value, 0);
assert(!r);
}
@@ -388,59 +394,52 @@
key->data[3] = 1;
value = 128;
- r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ r = bpf_map_update_elem(map_fd[hash_map_alloc_idx], key, &value, 0);
assert(!r);
}
-static void fixup_map(struct bpf_map_data *map, int idx)
+static void fixup_map(struct bpf_object *obj)
{
+ struct bpf_map *map;
int i;
- if (!strcmp("inner_lru_hash_map", map->name)) {
- inner_lru_hash_idx = idx;
- inner_lru_hash_size = map->def.max_entries;
- }
+ bpf_object__for_each_map(map, obj) {
+ const char *name = bpf_map__name(map);
- if (!strcmp("array_of_lru_hashs", map->name)) {
- if (inner_lru_hash_idx == -1) {
- printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
- exit(1);
+ /* Only change the max_entries for the enabled test(s) */
+ for (i = 0; i < NR_TESTS; i++) {
+ if (!strcmp(test_map_names[i], name) &&
+ (check_test_flags(i))) {
+ bpf_map__resize(map, num_map_entries);
+ continue;
+ }
}
- map->def.inner_map_idx = inner_lru_hash_idx;
- array_of_lru_hashs_idx = idx;
}
- if (!strcmp("lru_hash_lookup_map", map->name))
- lru_hash_lookup_idx = idx;
-
- if (num_map_entries <= 0)
- return;
-
inner_lru_hash_size = num_map_entries;
-
- /* Only change the max_entries for the enabled test(s) */
- for (i = 0; i < NR_TESTS; i++) {
- if (!strcmp(test_map_names[i], map->name) &&
- (check_test_flags(i))) {
- map->def.max_entries = num_map_entries;
- }
- }
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct bpf_link *links[8];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_map *map;
char filename[256];
- int num_cpu = 8;
+ int i = 0;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
if (argc > 1)
test_flags = atoi(argv[1]) ? : test_flags;
if (argc > 2)
- num_cpu = atoi(argv[2]) ? : num_cpu;
+ nr_cpus = atoi(argv[2]) ? : nr_cpus;
if (argc > 3)
num_map_entries = atoi(argv[3]);
@@ -448,14 +447,61 @@
if (argc > 4)
max_cnt = atoi(argv[4]);
- if (load_bpf_file_fixup_map(filename, fixup_map)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "inner_lru_hash_map");
+ if (libbpf_get_error(map)) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ inner_lru_hash_size = bpf_map__max_entries(map);
+ if (!inner_lru_hash_size) {
+ fprintf(stderr, "ERROR: failed to get map attribute\n");
+ goto cleanup;
+ }
+
+ /* resize BPF map prior to loading */
+ if (num_map_entries > 0)
+ fixup_map(obj);
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "array_of_lru_hashs");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "hash_map_alloc");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "lru_hash_lookup_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
fill_lpm_trie();
- run_perf_test(num_cpu);
+ run_perf_test(nr_cpus);
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index e7d9a0a..14b7929 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -5,13 +5,19 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
#include <uapi/linux/ptrace.h>
#include <uapi/linux/perf_event.h>
#include <linux/version.h>
#include <linux/sched.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
#define MINBLOCK_US 1
@@ -22,38 +28,38 @@
u32 tret;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") start = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} start SEC(".maps");
struct wokeby_t {
char name[TASK_COMM_LEN];
u32 ret;
};
-struct bpf_map_def SEC("maps") wokeby = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct wokeby_t),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, struct wokeby_t);
+ __uint(max_entries, 10000);
+} wokeby SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index fc8767d..5734cfd 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -5,19 +5,19 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
#include <stdbool.h>
#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
#include "trace_helpers.h"
#define PRINT_RAW_ADDR 0
+/* counts, stackmap */
+static int map_fd[2];
+
static void print_ksym(__u64 addr)
{
struct ksym *sym;
@@ -52,14 +52,14 @@
int i;
printf("%s;", key->target);
- if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->tret, ip) != 0) {
printf("---;");
} else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
print_ksym(ip[i]);
}
printf("-;");
- if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->wret, ip) != 0) {
printf("---;");
} else {
for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
@@ -96,23 +96,54 @@
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ int delay = 1, i = 0;
char filename[256];
- int delay = 1;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
if (argc > 1)
@@ -120,5 +151,10 @@
sleep(delay);
print_stacks(map_fd[0]);
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/parse_ldabs.c b/samples/bpf/parse_ldabs.c
index 6db6b21..c6f65f9 100644
--- a/samples/bpf/parse_ldabs.c
+++ b/samples/bpf/parse_ldabs.c
@@ -11,7 +11,8 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define DEFAULT_PKTGEN_UDP_PORT 9
#define IP_MF 0x2000
diff --git a/samples/bpf/parse_simple.c b/samples/bpf/parse_simple.c
index 10af53d..4a486cb 100644
--- a/samples/bpf/parse_simple.c
+++ b/samples/bpf/parse_simple.c
@@ -12,7 +12,7 @@
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
#include <net/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define DEFAULT_PKTGEN_UDP_PORT 9
diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c
index 0b6f22f..d862384 100644
--- a/samples/bpf/parse_varlen.c
+++ b/samples/bpf/parse_varlen.c
@@ -14,7 +14,7 @@
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
#include <net/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define DEFAULT_PKTGEN_UDP_PORT 9
#define DEBUG 0
diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
index ceabf31..f24806a 100644
--- a/samples/bpf/sampleip_kern.c
+++ b/samples/bpf/sampleip_kern.c
@@ -8,16 +8,17 @@
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#define MAX_IPS 8192
-struct bpf_map_def SEC("maps") ip_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u64),
- .value_size = sizeof(u32),
- .max_entries = MAX_IPS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, u32);
+ __uint(max_entries, MAX_IPS);
+} ip_map SEC(".maps");
SEC("perf_event")
int do_sample(struct bpf_perf_event_data *ctx)
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index 6b5dc26..921c505 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -10,13 +10,11 @@
#include <errno.h>
#include <signal.h>
#include <string.h>
-#include <assert.h>
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
-#include <sys/ioctl.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
#include "trace_helpers.h"
@@ -25,6 +23,7 @@
#define MAX_IPS 8192
#define PAGE_OFFSET 0xffff880000000000
+static int map_fd;
static int nr_cpus;
static void usage(void)
@@ -34,9 +33,10 @@
printf(" duration # sampling duration (seconds), default 5\n");
}
-static int sampling_start(int *pmu_fd, int freq)
+static int sampling_start(int freq, struct bpf_program *prog,
+ struct bpf_link *links[])
{
- int i;
+ int i, pmu_fd;
struct perf_event_attr pe_sample_attr = {
.type = PERF_TYPE_SOFTWARE,
@@ -47,26 +47,30 @@
};
for (i = 0; i < nr_cpus; i++) {
- pmu_fd[i] = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
+ pmu_fd = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
-1 /* group_fd */, 0 /* flags */);
- if (pmu_fd[i] < 0) {
+ if (pmu_fd < 0) {
fprintf(stderr, "ERROR: Initializing perf sampling\n");
return 1;
}
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF,
- prog_fd[0]) == 0);
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+ links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: Attach perf event\n");
+ links[i] = NULL;
+ close(pmu_fd);
+ return 1;
+ }
}
return 0;
}
-static void sampling_end(int *pmu_fd)
+static void sampling_end(struct bpf_link *links[])
{
int i;
for (i = 0; i < nr_cpus; i++)
- close(pmu_fd[i]);
+ bpf_link__destroy(links[i]);
}
struct ipcount {
@@ -128,14 +132,17 @@
static void int_exit(int sig)
{
printf("\n");
- print_ip_map(map_fd[0]);
+ print_ip_map(map_fd);
exit(0);
}
int main(int argc, char **argv)
{
+ int opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS, error = 1;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ struct bpf_link **links;
char filename[256];
- int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS;
/* process arguments */
while ((opt = getopt(argc, argv, "F:h")) != -1) {
@@ -163,38 +170,58 @@
}
/* create perf FDs for each CPU */
- nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- pmu_fd = malloc(nr_cpus * sizeof(int));
- if (pmu_fd == NULL) {
- fprintf(stderr, "ERROR: malloc of pmu_fd\n");
- return 1;
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ links = calloc(nr_cpus, sizeof(struct bpf_link *));
+ if (!links) {
+ fprintf(stderr, "ERROR: malloc of links\n");
+ goto cleanup;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "do_sample");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
}
/* load BPF program */
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- fprintf(stderr, "ERROR: loading BPF program (errno %d):\n",
- errno);
- if (strcmp(bpf_log_buf, "") == 0)
- fprintf(stderr, "Try: ulimit -l unlimited\n");
- else
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
}
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "ip_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
/* do sampling */
printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n",
freq, secs);
- if (sampling_start(pmu_fd, freq) != 0)
- return 1;
+ if (sampling_start(freq, prog, links) != 0)
+ goto cleanup;
+
sleep(secs);
- sampling_end(pmu_fd);
- free(pmu_fd);
+ error = 0;
+cleanup:
+ sampling_end(links);
/* output sample counts */
- print_ip_map(map_fd[0]);
+ if (!error)
+ print_ip_map(map_fd);
- return 0;
+ free(links);
+ bpf_object__close(obj);
+ return error;
}
diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
index 05dcdf8..6d0ac75 100644
--- a/samples/bpf/sock_flags_kern.c
+++ b/samples/bpf/sock_flags_kern.c
@@ -3,7 +3,7 @@
#include <linux/net.h>
#include <uapi/linux/in.h>
#include <uapi/linux/in6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("cgroup/sock1")
int bpf_prog1(struct bpf_sock *sk)
diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c
index 43e38ce..431c956 100644
--- a/samples/bpf/sockex1_kern.c
+++ b/samples/bpf/sockex1_kern.c
@@ -2,7 +2,8 @@
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
index a219442..3c83722 100644
--- a/samples/bpf/sockex1_user.c
+++ b/samples/bpf/sockex1_user.c
@@ -3,7 +3,7 @@
#include <assert.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index ae4bdc8..b799754 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -1,11 +1,12 @@
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index 6de383d..af925a5 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -3,7 +3,7 @@
#include <assert.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index c527b57..b363503 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -5,7 +5,6 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
@@ -13,28 +12,32 @@
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
#include <uapi/linux/mpls.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
-struct bpf_map_def SEC("maps") jmp_table = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 8,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 8);
+} jmp_table SEC(".maps");
#define PARSE_VLAN 1
#define PARSE_MPLS 2
#define PARSE_IP 3
#define PARSE_IPV6 4
-/* protocol dispatch routine.
- * It tail-calls next BPF program depending on eth proto
- * Note, we could have used:
- * bpf_tail_call(skb, &jmp_table, proto);
- * but it would need large prog_array
+/* Protocol dispatch routine. It tail-calls next BPF program depending
+ * on eth proto. Note, we could have used ...
+ *
+ * bpf_tail_call(skb, &jmp_table, proto);
+ *
+ * ... but it would need large prog_array and cannot be optimised given
+ * the map key is not static.
*/
static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
{
@@ -91,12 +94,12 @@
struct flow_key_record flow;
};
-struct bpf_map_def SEC("maps") percpu_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct globals),
- .max_entries = 32,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct globals);
+ __uint(max_entries, 32);
+} percpu_map SEC(".maps");
/* user poor man's per_cpu until native support is ready */
static struct globals *this_cpu_globals(void)
@@ -112,12 +115,12 @@
__u64 bytes;
};
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct flow_key_record),
- .value_size = sizeof(struct pair),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct flow_key_record);
+ __type(value, struct pair);
+ __uint(max_entries, 1024);
+} hash_map SEC(".maps");
static void update_stats(struct __sk_buff *skb, struct globals *g)
{
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index bbb1cd0..7793f6a 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -1,18 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
-#include <linux/bpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/resource.h>
-#define PARSE_IP 3
-#define PARSE_IP_PROG_FD (prog_fd[0])
-#define PROG_ARRAY_FD (map_fd[0])
-
struct flow_key_record {
__be32 src;
__be32 dst;
@@ -30,31 +25,55 @@
int main(int argc, char **argv)
{
+ int i, sock, key, fd, main_prog_fd, jmp_table_fd, hash_map_fd;
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ const char *section;
char filename[256];
FILE *f;
- int i, sock, err, id, key = PARSE_IP;
- struct bpf_prog_info info = {};
- uint32_t info_len = sizeof(info);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
}
- /* Test fd array lookup which returns the id of the bpf_prog */
- err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
- assert(!err);
- err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
- assert(!err);
- assert(id == info.id);
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ jmp_table_fd = bpf_object__find_map_fd_by_name(obj, "jmp_table");
+ hash_map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
+ if (jmp_table_fd < 0 || hash_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ fd = bpf_program__fd(prog);
+
+ section = bpf_program__section_name(prog);
+ if (sscanf(section, "socket/%d", &key) != 1) {
+ fprintf(stderr, "ERROR: finding prog failed\n");
+ goto cleanup;
+ }
+
+ if (key == 0)
+ main_prog_fd = fd;
+ else
+ bpf_map_update_elem(jmp_table_fd, &key, &fd, BPF_ANY);
+ }
sock = open_raw_sock("lo");
- assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+ /* attach BPF program to socket */
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &main_prog_fd,
sizeof(__u32)) == 0);
if (argc > 1)
@@ -69,8 +88,8 @@
sleep(1);
printf("IP src.port -> dst.port bytes packets\n");
- while (bpf_map_get_next_key(map_fd[2], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[2], &next_key, &value);
+ while (bpf_map_get_next_key(hash_map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(hash_map_fd, &next_key, &value);
printf("%s.%05d -> %s.%05d %12lld %12lld\n",
inet_ntoa((struct in_addr){htonl(next_key.src)}),
next_key.port16[0],
@@ -80,5 +99,8 @@
key = next_key;
}
}
+
+cleanup:
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c
index ce0167d..455da77 100644
--- a/samples/bpf/spintest_kern.c
+++ b/samples/bpf/spintest_kern.c
@@ -9,27 +9,28 @@
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/perf_event.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
-struct bpf_map_def SEC("maps") my_map2 = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, long);
+ __uint(max_entries, 1024);
+} my_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(long));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 1024);
+} my_map2 SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define PROG(foo) \
int foo(struct pt_regs *ctx) \
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index 2556af2..f090d0d 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -1,40 +1,77 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <unistd.h>
-#include <linux/bpf.h>
#include <string.h>
#include <assert.h>
#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
#include "trace_helpers.h"
int main(int ac, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ char filename[256], symbol[256];
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[20];
long key, next_key, value;
- char filename[256];
+ struct bpf_program *prog;
+ int map_fd, i, j = 0;
+ const char *section;
struct ksym *sym;
- int i;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ section = bpf_program__section_name(prog);
+ if (sscanf(section, "kprobe/%s", symbol) != 1)
+ continue;
+
+ /* Attach prog only when symbol exists */
+ if (ksym_get_addr(symbol)) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
}
for (i = 0; i < 5; i++) {
key = 0;
printf("kprobing funcs:");
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &value);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
assert(next_key == value);
sym = ksym_search(value);
key = next_key;
@@ -48,10 +85,15 @@
if (key)
printf("\n");
key = 0;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0)
- bpf_map_delete_elem(map_fd[0], &next_key);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0)
+ bpf_map_delete_elem(map_fd, &next_key);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index 630ce8c..50231c2 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2017 Facebook
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct syscalls_enter_open_args {
unsigned long long unused;
@@ -18,19 +18,19 @@
long ret;
};
-struct bpf_map_def SEC("maps") enter_open_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} enter_open_map SEC(".maps");
-struct bpf_map_def SEC("maps") exit_open_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} exit_open_map SEC(".maps");
static __always_inline void count(void *map)
{
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 57014ba..76a1d00 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -5,16 +5,12 @@
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
-#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
-#include <stdbool.h>
#include <sys/resource.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
@@ -49,16 +45,44 @@
static int test(char *filename, int num_progs)
{
- int i, fd, map0_fds[num_progs], map1_fds[num_progs];
+ int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0;
+ struct bpf_link *links[num_progs * 4];
+ struct bpf_object *objs[num_progs];
+ struct bpf_program *prog;
for (i = 0; i < num_progs; i++) {
- if (load_bpf_file(filename)) {
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ objs[i] = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(objs[i])) {
+ fprintf(stderr, "opening BPF object file failed\n");
+ objs[i] = NULL;
+ goto cleanup;
}
- printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]);
- map0_fds[i] = map_fd[0];
- map1_fds[i] = map_fd[1];
+
+ /* load BPF program */
+ if (bpf_object__load(objs[i])) {
+ fprintf(stderr, "loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map0_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "enter_open_map");
+ map1_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "exit_open_map");
+ if (map0_fds[i] < 0 || map1_fds[i] < 0) {
+ fprintf(stderr, "finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, objs[i]) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+ printf("prog #%d: map ids %d %d\n", i, map0_fds[i], map1_fds[i]);
}
/* current load_bpf_file has perf_event_open default pid = -1
@@ -80,6 +104,12 @@
verify_map(map1_fds[i]);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ for (i--; i >= 0; i--)
+ bpf_object__close(objs[i]);
return 0;
}
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
index fb56fc2..c821294 100644
--- a/samples/bpf/task_fd_query_kern.c
+++ b/samples/bpf/task_fd_query_kern.c
@@ -2,7 +2,7 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("kprobe/blk_mq_start_request")
int bpf_prog1(struct pt_regs *ctx)
@@ -10,7 +10,7 @@
return 0;
}
-SEC("kretprobe/blk_account_io_completion")
+SEC("kretprobe/blk_account_io_done")
int bpf_prog2(struct pt_regs *ctx)
{
return 0;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index 4c31b30..b68bd2f 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -15,7 +15,7 @@
#include <sys/stat.h>
#include <linux/perf_event.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include "perf-sys.h"
@@ -290,7 +290,7 @@
int main(int argc, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
extern char __executable_start;
char filename[256], buf[256];
__u64 uprobe_file_offset;
@@ -314,7 +314,7 @@
/* test two functions in the corresponding *_kern.c file */
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request",
BPF_FD_TYPE_KPROBE));
- CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+ CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_done",
BPF_FD_TYPE_KRETPROBE));
/* test nondebug fs kprobe */
diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c
index 7ef2a12..fd2fa00 100644
--- a/samples/bpf/tc_l2_redirect_kern.c
+++ b/samples/bpf/tc_l2_redirect_kern.c
@@ -15,7 +15,7 @@
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define _htonl __builtin_bswap32
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 274c884..e935613 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -7,7 +7,8 @@
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
/* compiler workaround */
#define _htonl __builtin_bswap32
diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
index 9dba48c..8dfe09a 100644
--- a/samples/bpf/tcp_basertt_kern.c
+++ b/samples/bpf/tcp_basertt_kern.c
@@ -16,8 +16,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
index af8486f..6a80d08 100644
--- a/samples/bpf/tcp_bufs_kern.c
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -17,8 +17,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c
index 26c0fd0..e88bd9a 100644
--- a/samples/bpf/tcp_clamp_kern.c
+++ b/samples/bpf/tcp_clamp_kern.c
@@ -17,8 +17,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
index 6d4dc4c..2311fc9 100644
--- a/samples/bpf/tcp_cong_kern.c
+++ b/samples/bpf/tcp_cong_kern.c
@@ -16,8 +16,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
index 8557913..e80d3af 100644
--- a/samples/bpf/tcp_dumpstats_kern.c
+++ b/samples/bpf/tcp_dumpstats_kern.c
@@ -4,8 +4,8 @@
*/
#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define INTERVAL 1000000000ULL
diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c
index da61d53..d144455 100644
--- a/samples/bpf/tcp_iw_kern.c
+++ b/samples/bpf/tcp_iw_kern.c
@@ -17,8 +17,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
index d011e38..223d9c2 100644
--- a/samples/bpf/tcp_rwnd_kern.c
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -16,8 +16,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
index 720d195..d58004e 100644
--- a/samples/bpf/tcp_synrto_kern.c
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -16,8 +16,8 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/tcp_tos_reflect_kern.c b/samples/bpf/tcp_tos_reflect_kern.c
index 369faca..953fedc 100644
--- a/samples/bpf/tcp_tos_reflect_kern.c
+++ b/samples/bpf/tcp_tos_reflect_kern.c
@@ -15,8 +15,8 @@
#include <uapi/linux/ipv6.h>
#include <uapi/linux/in.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
index 1547b36..4dd532a 100644
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -10,7 +10,7 @@
#include <uapi/linux/ipv6.h>
#include <uapi/linux/pkt_cls.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* copy of 'struct ethhdr' without __packed */
struct eth_hdr {
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
index 86b28d7..fbd43e2 100644
--- a/samples/bpf/test_current_task_under_cgroup_kern.c
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c
@@ -8,25 +8,26 @@
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include <uapi/linux/utsname.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") cgroup_map = {
- .type = BPF_MAP_TYPE_CGROUP_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 1);
+} cgroup_map SEC(".maps");
-struct bpf_map_def SEC("maps") perf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 1);
+} perf_map SEC(".maps");
/* Writes the last PID that called sync to a map at index 0 */
-SEC("kprobe/sys_sync")
+SEC("kprobe/" SYSCALL(sys_sync))
int bpf_prog1(struct pt_regs *ctx)
{
u64 pid = bpf_get_current_pid_tgid();
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
index 06e9f8c..ac251a4 100644
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -4,10 +4,9 @@
#define _GNU_SOURCE
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
#define CGROUP_PATH "/my-cgroup"
@@ -15,13 +14,44 @@
int main(int argc, char **argv)
{
pid_t remote_pid, local_pid = getpid();
- int cg2, idx = 0, rc = 0;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ int cg2, idx = 0, rc = 1;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd[2];
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
if (setup_cgroup_environment())
@@ -70,12 +100,14 @@
goto err;
}
- goto out;
-err:
- rc = 1;
+ rc = 0;
-out:
+err:
close(cg2);
cleanup_cgroup_environment();
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return rc;
}
diff --git a/samples/bpf/test_lwt_bpf.c b/samples/bpf/test_lwt_bpf.c
index bacc801..1b56857 100644
--- a/samples/bpf/test_lwt_bpf.c
+++ b/samples/bpf/test_lwt_bpf.c
@@ -20,7 +20,7 @@
#include <linux/udp.h>
#include <linux/icmpv6.h>
#include <linux/if_ether.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include <string.h>
# define printk(fmt, ...) \
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c
index 42c44d0..b0200c8 100644
--- a/samples/bpf/test_map_in_map_kern.c
+++ b/samples/bpf/test_map_in_map_kern.c
@@ -10,65 +10,68 @@
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/in6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
#define MAX_NR_PORTS 65536
/* map #0 */
-struct bpf_map_def SEC("maps") port_a = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = MAX_NR_PORTS,
-};
+struct inner_a {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, MAX_NR_PORTS);
+} port_a SEC(".maps");
/* map #1 */
-struct bpf_map_def SEC("maps") port_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct inner_h {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} port_h SEC(".maps");
/* map #2 */
-struct bpf_map_def SEC("maps") reg_result_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} reg_result_h SEC(".maps");
/* map #3 */
-struct bpf_map_def SEC("maps") inline_result_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} inline_result_h SEC(".maps");
/* map #4 */ /* Test case #0 */
-struct bpf_map_def SEC("maps") a_of_port_a = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 0, /* map_fd[0] is port_a */
- .max_entries = MAX_NR_PORTS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_NR_PORTS);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_a); /* use inner_a as inner map */
+} a_of_port_a SEC(".maps");
/* map #5 */ /* Test case #1 */
-struct bpf_map_def SEC("maps") h_of_port_a = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 0, /* map_fd[0] is port_a */
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_a); /* use inner_a as inner map */
+} h_of_port_a SEC(".maps");
/* map #6 */ /* Test case #2 */
-struct bpf_map_def SEC("maps") h_of_port_h = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 1, /* map_fd[1] is port_h */
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_h); /* use inner_h as inner map */
+} h_of_port_h SEC(".maps");
static __always_inline int do_reg_lookup(void *inner_map, u32 port)
{
@@ -100,7 +103,7 @@
return result ? *result : -ENOENT;
}
-SEC("kprobe/sys_connect")
+SEC("kprobe/__sys_connect")
int trace_sys_connect(struct pt_regs *ctx)
{
struct sockaddr_in6 *in6;
@@ -110,13 +113,13 @@
void *outer_map, *inner_map;
bool inline_hash = false;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
- addrlen = (int)PT_REGS_PARM3(ctx);
+ in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx);
+ addrlen = (int)PT_REGS_PARM3_CORE(ctx);
if (addrlen != sizeof(*in6))
return 0;
- ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+ ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
if (ret) {
inline_ret = ret;
goto done;
@@ -127,7 +130,7 @@
test_case = dst6[7];
- ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
+ ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
if (ret) {
inline_ret = ret;
goto done;
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index eb29bcb..98656de 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -11,7 +11,9 @@
#include <stdlib.h>
#include <stdio.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+
+static int map_fd[7];
#define PORT_A (map_fd[0])
#define PORT_H (map_fd[1])
@@ -113,18 +115,59 @@
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- assert(!setrlimit(RLIMIT_MEMLOCK, &r));
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "trace_sys_connect");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "port_a");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "port_h");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "reg_result_h");
+ map_fd[3] = bpf_object__find_map_fd_by_name(obj, "inline_result_h");
+ map_fd[4] = bpf_object__find_map_fd_by_name(obj, "a_of_port_a");
+ map_fd[5] = bpf_object__find_map_fd_by_name(obj, "h_of_port_a");
+ map_fd[6] = bpf_object__find_map_fd_by_name(obj, "h_of_port_h");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0 ||
+ map_fd[3] < 0 || map_fd[4] < 0 || map_fd[5] < 0 || map_fd[6] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
test_map_in_map();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c
index 468a66a..f6d593e 100644
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ b/samples/bpf/test_overhead_kprobe_kern.c
@@ -7,9 +7,15 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val = 0; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
SEC("kprobe/__set_task_comm")
int prog(struct pt_regs *ctx)
@@ -24,8 +30,9 @@
tsk = (void *)PT_REGS_PARM1(ctx);
pid = _(tsk->pid);
- bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm);
- bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm);
+ bpf_probe_read_kernel(newcomm, sizeof(newcomm),
+ (void *)PT_REGS_PARM2(ctx));
signal = _(tsk->signal);
oom_score_adj = _(signal->oom_score_adj);
return 0;
diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c
index d2af8bc..8763181 100644
--- a/samples/bpf/test_overhead_raw_tp_kern.c
+++ b/samples/bpf/test_overhead_raw_tp_kern.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("raw_tracepoint/task_rename")
int prog(struct bpf_raw_tracepoint_args *ctx)
diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c
index 38f5c0b..eaa3269 100644
--- a/samples/bpf/test_overhead_tp_kern.c
+++ b/samples/bpf/test_overhead_tp_kern.c
@@ -5,7 +5,7 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* from /sys/kernel/debug/tracing/events/task/task_rename/format */
struct task_rename {
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
index 3a677c8..220a964 100644
--- a/samples/bpf/test_probe_write_user_kern.c
+++ b/samples/bpf/test_probe_write_user_kern.c
@@ -8,14 +8,17 @@
#include <linux/netdevice.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") dnat_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct sockaddr_in),
- .value_size = sizeof(struct sockaddr_in),
- .max_entries = 256,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct sockaddr_in);
+ __type(value, struct sockaddr_in);
+ __uint(max_entries, 256);
+} dnat_map SEC(".maps");
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
@@ -25,18 +28,19 @@
* This example sits on a syscall, and the syscall ABI is relatively stable
* of course, across platforms, and over time, the ABI may change.
*/
-SEC("kprobe/sys_connect")
+SEC("kprobe/" SYSCALL(sys_connect))
int bpf_prog1(struct pt_regs *ctx)
{
+ struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
+ void *sockaddr_arg = (void *)PT_REGS_PARM2_CORE(real_regs);
+ int sockaddr_len = (int)PT_REGS_PARM3_CORE(real_regs);
struct sockaddr_in new_addr, orig_addr = {};
struct sockaddr_in *mapped_addr;
- void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx);
- int sockaddr_len = (int)PT_REGS_PARM3(ctx);
if (sockaddr_len > sizeof(orig_addr))
return 0;
- if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
+ if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
return 0;
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
index 045eb5e..00ccfb8 100644
--- a/samples/bpf/test_probe_write_user_user.c
+++ b/samples/bpf/test_probe_write_user_user.c
@@ -1,21 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
-#include <linux/bpf.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include <sys/socket.h>
-#include <string.h>
#include <netinet/in.h>
#include <arpa/inet.h>
int main(int ac, char **argv)
{
- int serverfd, serverconnfd, clientfd;
- socklen_t sockaddr_len;
- struct sockaddr serv_addr, mapped_addr, tmp_addr;
struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
+ struct sockaddr serv_addr, mapped_addr, tmp_addr;
+ int serverfd, serverconnfd, clientfd, map_fd;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ socklen_t sockaddr_len;
char filename[256];
char *ip;
@@ -24,10 +25,35 @@
tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
@@ -51,7 +77,7 @@
mapped_addr_in->sin_port = htons(5555);
mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
- assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
+ assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY));
assert(listen(serverfd, 5) == 0);
@@ -75,5 +101,8 @@
/* Is the server's getsockname = the socket getpeername */
assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/trace_common.h b/samples/bpf/trace_common.h
new file mode 100644
index 0000000..8cb5400
--- /dev/null
+++ b/samples/bpf/trace_common.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __TRACE_COMMON_H
+#define __TRACE_COMMON_H
+
+#ifdef __x86_64__
+#define SYSCALL(SYS) "__x64_" __stringify(SYS)
+#elif defined(__s390x__)
+#define SYSCALL(SYS) "__s390x_" __stringify(SYS)
+#else
+#define SYSCALL(SYS) __stringify(SYS)
+#endif
+
+#endif
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index 7068fbd..7d3c66f 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -9,7 +9,8 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/perf_event.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
struct key_t {
char comm[TASK_COMM_LEN];
@@ -17,19 +18,19 @@
u32 userstack;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index 749a50f..ac1ba36 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -6,22 +6,22 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
-#include <fcntl.h>
-#include <poll.h>
-#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/bpf.h>
#include <signal.h>
-#include <assert.h>
#include <errno.h>
#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
#include "trace_helpers.h"
#define SAMPLE_FREQ 50
+static int pid;
+/* counts, stackmap */
+static int map_fd[2];
+struct bpf_program *prog;
static bool sys_read_seen, sys_write_seen;
static void print_ksym(__u64 addr)
@@ -91,10 +91,10 @@
}
}
-static void int_exit(int sig)
+static void err_exit(int err)
{
- kill(0, SIGKILL);
- exit(0);
+ kill(pid, SIGKILL);
+ exit(err);
}
static void print_stacks(void)
@@ -102,7 +102,7 @@
struct key_t key = {}, next_key;
__u64 value;
__u32 stackid = 0, next_id;
- int fd = map_fd[0], stack_map = map_fd[1];
+ int error = 1, fd = map_fd[0], stack_map = map_fd[1];
sys_read_seen = sys_write_seen = false;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
@@ -114,7 +114,7 @@
printf("\n");
if (!sys_read_seen || !sys_write_seen) {
printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n");
- int_exit(0);
+ err_exit(error);
}
/* clear stack map */
@@ -136,43 +136,52 @@
static void test_perf_event_all_cpu(struct perf_event_attr *attr)
{
- int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- int *pmu_fd = malloc(nr_cpus * sizeof(int));
- int i, error = 0;
+ int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct bpf_link **links = calloc(nr_cpus, sizeof(struct bpf_link *));
+ int i, pmu_fd, error = 1;
+
+ if (!links) {
+ printf("malloc of links failed\n");
+ goto err;
+ }
/* system wide perf event, no need to inherit */
attr->inherit = 0;
/* open perf_event on all cpus */
for (i = 0; i < nr_cpus; i++) {
- pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0);
- if (pmu_fd[i] < 0) {
+ pmu_fd = sys_perf_event_open(attr, -1, i, -1, 0);
+ if (pmu_fd < 0) {
printf("sys_perf_event_open failed\n");
- error = 1;
goto all_cpu_err;
}
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0);
+ links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(links[i])) {
+ printf("bpf_program__attach_perf_event failed\n");
+ links[i] = NULL;
+ close(pmu_fd);
+ goto all_cpu_err;
+ }
}
- if (generate_load() < 0) {
- error = 1;
+ if (generate_load() < 0)
goto all_cpu_err;
- }
+
print_stacks();
+ error = 0;
all_cpu_err:
- for (i--; i >= 0; i--) {
- ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
- close(pmu_fd[i]);
- }
- free(pmu_fd);
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+err:
+ free(links);
if (error)
- int_exit(0);
+ err_exit(error);
}
static void test_perf_event_task(struct perf_event_attr *attr)
{
- int pmu_fd, error = 0;
+ struct bpf_link *link = NULL;
+ int pmu_fd, error = 1;
/* per task perf event, enable inherit so the "dd ..." command can be traced properly.
* Enabling inherit will cause bpf_perf_prog_read_time helper failure.
@@ -183,21 +192,25 @@
pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
if (pmu_fd < 0) {
printf("sys_perf_event_open failed\n");
- int_exit(0);
- }
- assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
- assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0);
-
- if (generate_load() < 0) {
- error = 1;
goto err;
}
+ link = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(link)) {
+ printf("bpf_program__attach_perf_event failed\n");
+ link = NULL;
+ close(pmu_fd);
+ goto err;
+ }
+
+ if (generate_load() < 0)
+ goto err;
+
print_stacks();
+ error = 0;
err:
- ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
- close(pmu_fd);
+ bpf_link__destroy(link);
if (error)
- int_exit(0);
+ err_exit(error);
}
static void test_bpf_perf_event(void)
@@ -282,29 +295,60 @@
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
char filename[256];
+ int error = 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ signal(SIGINT, err_exit);
+ signal(SIGTERM, err_exit);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
- return 1;
+ goto cleanup;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 2;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ printf("opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
}
- if (fork() == 0) {
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ printf("finding a counts/stackmap map in obj file failed\n");
+ goto cleanup;
+ }
+
+ pid = fork();
+ if (pid == 0) {
read_trace_pipe();
return 0;
+ } else if (pid == -1) {
+ printf("couldn't spawn process\n");
+ goto cleanup;
}
+
test_bpf_perf_event();
- int_exit(0);
- return 0;
+ error = 0;
+
+cleanup:
+ bpf_object__close(obj);
+ err_exit(error);
}
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
index 9b96f4f..b64815a 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output_kern.c
@@ -1,16 +1,17 @@
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 2);
+} my_map SEC(".maps");
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
int bpf_prog1(struct pt_regs *ctx)
{
struct S {
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index 8ee4769..364b987 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -1,23 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
#include <fcntl.h>
#include <poll.h>
-#include <linux/perf_event.h>
-#include <linux/bpf.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <time.h>
#include <signal.h>
-#include <libbpf.h>
-#include "bpf_load.h"
-#include "perf-sys.h"
+#include <bpf/libbpf.h>
static __u64 time_get_ns(void)
{
@@ -57,20 +44,48 @@
int main(int argc, char **argv)
{
struct perf_buffer_opts pb_opts = {};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
struct perf_buffer *pb;
+ struct bpf_object *obj;
+ int map_fd, ret = 0;
char filename[256];
FILE *f;
- int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
pb_opts.sample_cb = print_bpf_output;
- pb = perf_buffer__new(map_fd[0], 8, &pb_opts);
+ pb = perf_buffer__new(map_fd, 8, &pb_opts);
ret = libbpf_get_error(pb);
if (ret) {
printf("failed to setup perf_buffer: %d\n", ret);
@@ -84,5 +99,9 @@
while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
}
kill(0, SIGINT);
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return ret;
}
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
index 9c74b45..ef30d2b 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -8,9 +8,15 @@
#include <linux/netdevice.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val = 0; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
@@ -33,7 +39,7 @@
dev = _(skb->dev);
len = _(skb->len);
- bpf_probe_read(devname, sizeof(devname), dev->name);
+ bpf_probe_read_kernel(devname, sizeof(devname), dev->name);
if (devname[0] == 'l' && devname[1] == 'o') {
char fmt[] = "skb %p len %d\n";
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
index af8c206..9d4adb7 100644
--- a/samples/bpf/tracex1_user.c
+++ b/samples/bpf/tracex1_user.c
@@ -1,20 +1,41 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include "trace_helpers.h"
int main(int ac, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
f = popen("taskset 1 ping -c5 localhost", "r");
@@ -22,5 +43,8 @@
read_trace_pipe();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index 5e11c20..5bc696b 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -8,14 +8,16 @@
#include <linux/netdevice.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, long);
+ __uint(max_entries, 1024);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
@@ -69,14 +71,14 @@
u64 index;
};
-struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(struct hist_key),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(struct hist_key));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 1024);
+} my_hist_map SEC(".maps");
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
int bpf_prog3(struct pt_regs *ctx)
{
long write_size = PT_REGS_PARM3(ctx);
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index c9544a4..3d6eab7 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -3,17 +3,19 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
#define MAX_INDEX 64
#define MAX_STARS 38
+/* my_map, my_hist_map */
+static int map_fd[2];
+
static void stars(char *str, long val, long max, int width)
{
int i;
@@ -114,19 +116,40 @@
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
- char filename[256];
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
long key, next_key, value;
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ char filename[256];
+ int i, j = 0;
FILE *f;
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "my_map");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "my_hist_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
@@ -138,9 +161,14 @@
f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
(void) f;
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
}
for (i = 0; i < 5; i++) {
@@ -156,5 +184,10 @@
}
print_hist(map_fd[1]);
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index ea1d4c1..710a441 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -8,14 +8,15 @@
#include <linux/netdevice.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(u64),
- .max_entries = 4096,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, u64);
+ __uint(max_entries, 4096);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
@@ -41,14 +42,14 @@
#define SLOTS 100
-struct bpf_map_def SEC("maps") lat_map = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = SLOTS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u64));
+ __uint(max_entries, SLOTS);
+} lat_map SEC(".maps");
-SEC("kprobe/blk_account_io_completion")
+SEC("kprobe/blk_account_io_done")
int bpf_prog2(struct pt_regs *ctx)
{
long rq = PT_REGS_PARM1(ctx);
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index cf8fedc..83e0fec 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -7,11 +7,10 @@
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
-#include <linux/bpf.h>
#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
#define SLOTS 100
@@ -108,21 +107,12 @@
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
+ int map_fd, i, j = 0;
for (i = 1; i < ac; i++) {
if (strcmp(argv[i], "-a") == 0) {
@@ -137,6 +127,40 @@
}
}
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "lat_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+
printf(" heatmap of IO latency\n");
if (text_only)
printf(" %s", sym[num_colors - 1]);
@@ -153,9 +177,14 @@
for (i = 0; ; i++) {
if (i % 20 == 0)
print_banner();
- print_hist(map_fd[1]);
+ print_hist(map_fd);
sleep(2);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c
index 6dd8e38..eb0f8fd 100644
--- a/samples/bpf/tracex4_kern.c
+++ b/samples/bpf/tracex4_kern.c
@@ -7,19 +7,20 @@
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
struct pair {
u64 val;
u64 ip;
};
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(struct pair),
- .max_entries = 1000000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, struct pair);
+ __uint(max_entries, 1000000);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index ec52203..e8faf8f 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -8,11 +8,10 @@
#include <stdbool.h>
#include <string.h>
#include <time.h>
-#include <linux/bpf.h>
#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
struct pair {
long long val;
@@ -36,8 +35,8 @@
key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
key = -1;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &v);
+ while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(fd, &next_key, &v);
key = next_key;
if (val - v.val < 1000000000ll)
/* object was allocated more then 1 sec ago */
@@ -50,25 +49,55 @@
int main(int ac, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ int map_fd, i, j = 0;
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
return 1;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
}
for (i = 0; ; i++) {
- print_old_objects(map_fd[1]);
+ print_old_objects(map_fd);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index 35cb0ee..64a1f75 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -10,20 +10,21 @@
#include <uapi/linux/seccomp.h>
#include <uapi/linux/unistd.h>
#include "syscall_nrs.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
-struct bpf_map_def SEC("maps") progs = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
#ifdef __mips__
- .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */
+ __uint(max_entries, 6000); /* MIPS n64 syscalls start at 5000 */
#else
- .max_entries = 1024,
+ __uint(max_entries, 1024);
#endif
-};
+} progs SEC(".maps");
SEC("kprobe/__seccomp_filter")
int bpf_prog1(struct pt_regs *ctx)
@@ -46,7 +47,7 @@
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@@ -59,7 +60,7 @@
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index c4ab91c..c17d3fb 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -1,13 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
-#include <linux/bpf.h>
+#include <stdlib.h>
#include <unistd.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <sys/prctl.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include <sys/resource.h>
+#include "trace_helpers.h"
+
+#ifdef __mips__
+#define MAX_ENTRIES 6000 /* MIPS n64 syscalls start at 5000 */
+#else
+#define MAX_ENTRIES 1024
+#endif
/* install fake seccomp program to enable seccomp code path inside the kernel,
* so that our kprobe attached to seccomp_phase1() can be triggered
@@ -27,16 +34,57 @@
int main(int ac, char **argv)
{
- FILE *f;
- char filename[256];
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int key, fd, progs_fd;
+ const char *section;
+ char filename[256];
+ FILE *f;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
+ }
+
+ progs_fd = bpf_object__find_map_fd_by_name(obj, "progs");
+ if (progs_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ section = bpf_program__section_name(prog);
+ /* register only syscalls to PROG_ARRAY */
+ if (sscanf(section, "kprobe/%d", &key) != 1)
+ continue;
+
+ fd = bpf_program__fd(prog);
+ bpf_map_update_elem(progs_fd, &key, &fd, BPF_ANY);
}
install_accept_all_seccomp();
@@ -46,5 +94,8 @@
read_trace_pipe();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
index 46c557a..acad571 100644
--- a/samples/bpf/tracex6_kern.c
+++ b/samples/bpf/tracex6_kern.c
@@ -1,26 +1,28 @@
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") counters = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values2 = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(struct bpf_perf_event_value),
- .max_entries = 64,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 64);
+} counters SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, 64);
+} values SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct bpf_perf_event_value);
+ __uint(max_entries, 64);
+} values2 SEC(".maps");
SEC("kprobe/htab_map_get_next_key")
int bpf_prog1(struct pt_regs *ctx)
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index 4bb3c83..33df978 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -4,7 +4,6 @@
#include <assert.h>
#include <fcntl.h>
#include <linux/perf_event.h>
-#include <linux/bpf.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
@@ -15,12 +14,15 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "bpf_load.h"
#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
#define SAMPLE_PERIOD 0x7fffffffffffffffULL
+/* counters, values, values2 */
+static int map_fd[3];
+
static void check_on_cpu(int cpu, struct perf_event_attr *attr)
{
struct bpf_perf_event_value value2;
@@ -174,16 +176,51 @@
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ int i = 0;
setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counters");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "values");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "values2");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
test_bpf_perf_event();
+
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c
index 1ab308a..c5a92df 100644
--- a/samples/bpf/tracex7_kern.c
+++ b/samples/bpf/tracex7_kern.c
@@ -1,7 +1,7 @@
#include <uapi/linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("kprobe/open_ctree")
int bpf_prog1(struct pt_regs *ctx)
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c
index 2ed13e9..8be7ce1 100644
--- a/samples/bpf/tracex7_user.c
+++ b/samples/bpf/tracex7_user.c
@@ -1,17 +1,18 @@
#define _GNU_SOURCE
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
int main(int argc, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
char command[256];
- int ret;
+ int ret = 0;
+ FILE *f;
if (!argv[1]) {
fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
@@ -19,15 +20,37 @@
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
snprintf(command, 256, "mount %s tmpmnt/", argv[1]);
f = popen(command, "r");
ret = pclose(f);
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return ret ? 0 : 1;
}
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
index db6870a..34b6439 100644
--- a/samples/bpf/xdp1_kern.c
+++ b/samples/bpf/xdp1_kern.c
@@ -12,7 +12,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index a8e5fa0..c447ad9 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -15,8 +15,8 @@
#include <net/if.h>
#include "bpf_util.h"
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
static int ifindex;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -98,7 +98,7 @@
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -109,6 +109,9 @@
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
@@ -139,7 +142,7 @@
map_fd = bpf_map__fd(map);
if (!prog_fd) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
index c74b52c..c787f4b 100644
--- a/samples/bpf/xdp2_kern.c
+++ b/samples/bpf/xdp2_kern.c
@@ -12,7 +12,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
index 0c12048..9b78331 100644
--- a/samples/bpf/xdp2skb_meta_kern.c
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -12,7 +12,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/*
* This struct is stored in the XDP 'data_meta' area, which is located
diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c
index cd9ff2a..ffdd548 100644
--- a/samples/bpf/xdp_adjust_tail_kern.c
+++ b/samples/bpf/xdp_adjust_tail_kern.c
@@ -18,13 +18,16 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/icmp.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define DEFAULT_TTL 64
#define MAX_PCKT_SIZE 600
#define ICMP_TOOBIG_SIZE 98
#define ICMP_TOOBIG_PAYLOAD_SIZE 92
+/* volatile to prevent compiler optimizations */
+static volatile __u32 max_pcktsz = MAX_PCKT_SIZE;
+
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, __u32);
@@ -92,7 +95,7 @@
orig_iph = data + off;
icmp_hdr->type = ICMP_DEST_UNREACH;
icmp_hdr->code = ICMP_FRAG_NEEDED;
- icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
+ icmp_hdr->un.frag.mtu = htons(max_pcktsz - sizeof(struct ethhdr));
icmp_hdr->checksum = 0;
ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
icmp_hdr->checksum = csum;
@@ -121,7 +124,7 @@
int pckt_size = data_end - data;
int offset;
- if (pckt_size > MAX_PCKT_SIZE) {
+ if (pckt_size > max(max_pcktsz, ICMP_TOOBIG_SIZE)) {
offset = pckt_size - ICMP_TOOBIG_SIZE;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_PASS;
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index a3596b6..ba482dc 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -19,10 +19,11 @@
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#define STATS_INTERVAL_S 2U
+#define MAX_PCKT_SIZE 600
static int ifindex = -1;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -72,6 +73,7 @@
printf("Usage: %s [...]\n", cmd);
printf(" -i <ifname|ifindex> Interface\n");
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
+ printf(" -P <MAX_PCKT_SIZE> Default: %u\n", MAX_PCKT_SIZE);
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
printf(" -F force loading prog\n");
@@ -85,13 +87,14 @@
.prog_type = BPF_PROG_TYPE_XDP,
};
unsigned char opt_flags[256] = {};
- const char *optstr = "i:T:SNFh";
+ const char *optstr = "i:T:P:SNFh";
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
int i, prog_fd, map_fd, opt;
struct bpf_object *obj;
- struct bpf_map *map;
+ __u32 max_pckt_size = 0;
+ __u32 key = 0;
char filename[256];
int err;
@@ -110,11 +113,14 @@
case 'T':
kill_after_s = atoi(optarg);
break;
+ case 'P':
+ max_pckt_size = atoi(optarg);
+ break;
case 'S':
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -126,6 +132,9 @@
opt_flags[opt] = 0;
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
@@ -150,15 +159,20 @@
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
- map = bpf_map__next(NULL, obj);
- if (!map) {
- printf("finding a map in obj file failed\n");
- return 1;
+ /* static global var 'max_pcktsz' is accessible from .data section */
+ if (max_pckt_size) {
+ map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data");
+ if (map_fd < 0) {
+ printf("finding a max_pcktsz map in obj file failed\n");
+ return 1;
+ }
+ bpf_map_update_elem(map_fd, &key, &max_pckt_size, BPF_ANY);
}
- map_fd = bpf_map__fd(map);
- if (!prog_fd) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ /* fetch icmpcnt map */
+ map_fd = bpf_object__find_map_fd_by_name(obj, "icmpcnt");
+ if (map_fd < 0) {
+ printf("finding a icmpcnt map in obj file failed\n");
return 1;
}
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
index d013029..54c099c 100644
--- a/samples/bpf/xdp_fwd_kern.c
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -19,7 +19,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 97ff1da..74a4583 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -24,14 +24,16 @@
#include <fcntl.h>
#include <libgen.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+
static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
{
int err;
- err = bpf_set_link_xdp_fd(idx, prog_fd, 0);
+ err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
@@ -49,7 +51,7 @@
{
int err;
- err = bpf_set_link_xdp_fd(idx, -1, 0);
+ err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
@@ -83,11 +85,17 @@
int attach = 1;
int ret = 0;
- while ((opt = getopt(argc, argv, ":dD")) != -1) {
+ while ((opt = getopt(argc, argv, ":dDSF")) != -1) {
switch (opt) {
case 'd':
attach = 0;
break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
case 'D':
prog_name = "xdp_fwd_direct";
break;
@@ -97,6 +105,9 @@
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index ad10fe7..5c955b8 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -4,23 +4,23 @@
* XDP monitor tool, based on tracepoints
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") redirect_err_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 2,
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 2);
/* TODO: have entries for all possible errno's */
-};
+} redirect_err_cnt SEC(".maps");
#define XDP_UNKNOWN XDP_REDIRECT + 1
-struct bpf_map_def SEC("maps") exception_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = XDP_UNKNOWN + 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, XDP_UNKNOWN + 1);
+} exception_cnt SEC(".maps");
/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
* Code in: kernel/include/trace/events/xdp.h
@@ -129,19 +129,19 @@
};
#define MAX_CPUS 64
-struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = MAX_CPUS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, struct datarec);
+ __uint(max_entries, MAX_CPUS);
+} cpumap_enqueue_cnt SEC(".maps");
-struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, struct datarec);
+ __uint(max_entries, 1);
+} cpumap_kthread_cnt SEC(".maps");
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
* Code in: kernel/include/trace/events/xdp.h
@@ -210,26 +210,24 @@
return 0;
}
-struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, struct datarec);
+ __uint(max_entries, 1);
+} devmap_xmit_cnt SEC(".maps");
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
* Code in: kernel/include/trace/events/xdp.h
*/
struct devmap_xmit_ctx {
u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
+ int from_ifindex; // offset:8; size:4; signed:1;
u32 act; // offset:12; size:4; signed:0;
- u32 map_index; // offset:16; size:4; signed:0;
+ int to_ifindex; // offset:16; size:4; signed:1;
int drops; // offset:20; size:4; signed:1;
int sent; // offset:24; size:4; signed:1;
- int from_ifindex; // offset:28; size:4; signed:1;
- int to_ifindex; // offset:32; size:4; signed:1;
- int err; // offset:36; size:4; signed:1;
+ int err; // offset:28; size:4; signed:1;
};
SEC("tracepoint/xdp/xdp_devmap_xmit")
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index ef53b93..03d0a18 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -26,12 +26,37 @@
#include <net/if.h>
#include <time.h>
+#include <signal.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
+enum map_type {
+ REDIRECT_ERR_CNT,
+ EXCEPTION_CNT,
+ CPUMAP_ENQUEUE_CNT,
+ CPUMAP_KTHREAD_CNT,
+ DEVMAP_XMIT_CNT,
+};
+
+static const char *const map_type_strings[] = {
+ [REDIRECT_ERR_CNT] = "redirect_err_cnt",
+ [EXCEPTION_CNT] = "exception_cnt",
+ [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
+ [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
+ [DEVMAP_XMIT_CNT] = "devmap_xmit_cnt",
+};
+
+#define NUM_MAP 5
+#define NUM_TP 8
+
+static int tp_cnt;
+static int map_cnt;
static int verbose = 1;
static bool debug = false;
+struct bpf_map *map_data[NUM_MAP] = {};
+struct bpf_link *tp_links[NUM_TP] = {};
+struct bpf_object *obj;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
@@ -41,6 +66,16 @@
{0, 0, NULL, 0 }
};
+static void int_exit(int sig)
+{
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
+ bpf_object__close(obj);
+ exit(0);
+}
+
/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
#define EXIT_FAIL_MEM 5
@@ -483,23 +518,23 @@
* this can happen by someone running perf-record -e
*/
- fd = map_data[0].fd; /* map0: redirect_err_cnt */
+ fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]);
for (i = 0; i < REDIR_RES_MAX; i++)
map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
- fd = map_data[1].fd; /* map1: exception_cnt */
+ fd = bpf_map__fd(map_data[EXCEPTION_CNT]);
for (i = 0; i < XDP_ACTION_MAX; i++) {
map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
}
- fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
+ fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]);
for (i = 0; i < MAX_CPUS; i++)
map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
- fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
+ fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]);
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
- fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
+ fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]);
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
return true;
@@ -598,8 +633,8 @@
/* TODO Need more advanced stats on error types */
if (verbose) {
- printf(" - Stats map0: %s\n", map_data[0].name);
- printf(" - Stats map1: %s\n", map_data[1].name);
+ printf(" - Stats map0: %s\n", bpf_map__name(map_data[0]));
+ printf(" - Stats map1: %s\n", bpf_map__name(map_data[1]));
printf("\n");
}
fflush(stdout);
@@ -618,44 +653,51 @@
static void print_bpf_prog_info(void)
{
- int i;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int i = 0;
/* Prog info */
- printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
- for (i = 0; i < prog_cnt; i++) {
- printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
+ printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt);
+ bpf_object__for_each_program(prog, obj) {
+ printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog));
+ i++;
}
+ i = 0;
/* Maps info */
- printf("Loaded BPF prog have %d map(s)\n", map_data_count);
- for (i = 0; i < map_data_count; i++) {
- char *name = map_data[i].name;
- int fd = map_data[i].fd;
+ printf("Loaded BPF prog have %d map(s)\n", map_cnt);
+ bpf_object__for_each_map(map, obj) {
+ const char *name = bpf_map__name(map);
+ int fd = bpf_map__fd(map);
printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
+ i++;
}
/* Event info */
- printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
- for (i = 0; i < prog_cnt; i++) {
- if (event_fd[i] != -1)
- printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
+ printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt);
+ for (i = 0; i < tp_cnt; i++) {
+ int fd = bpf_link__fd(tp_links[i]);
+
+ if (fd != -1)
+ printf(" - event_fd[%d] = fd(%d)\n", i, fd);
}
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_program *prog;
int longindex = 0, opt;
- int ret = EXIT_SUCCESS;
- char bpf_obj_file[256];
+ int ret = EXIT_FAILURE;
+ enum map_type type;
+ char filename[256];
/* Default settings: */
bool errors_only = true;
int interval = 2;
- snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
-
/* Parse commands line args */
while ((opt = getopt_long(argc, argv, "hDSs:",
long_options, &longindex)) != -1) {
@@ -672,40 +714,79 @@
case 'h':
default:
usage(argv);
- return EXIT_FAILURE;
+ return ret;
}
}
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
- return EXIT_FAILURE;
+ return ret;
}
- if (load_bpf_file(bpf_obj_file)) {
- printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
- return EXIT_FAILURE;
+ /* Remove tracepoint program when program is interrupted or killed */
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ printf("ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
}
- if (!prog_fd[0]) {
- printf("ERROR - load_bpf_file: %s\n", strerror(errno));
- return EXIT_FAILURE;
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ for (type = 0; type < NUM_MAP; type++) {
+ map_data[type] =
+ bpf_object__find_map_by_name(obj, map_type_strings[type]);
+
+ if (libbpf_get_error(map_data[type])) {
+ printf("ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+ map_cnt++;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ tp_links[tp_cnt] = bpf_program__attach(prog);
+ if (libbpf_get_error(tp_links[tp_cnt])) {
+ printf("ERROR: bpf_program__attach failed\n");
+ tp_links[tp_cnt] = NULL;
+ goto cleanup;
+ }
+ tp_cnt++;
}
if (debug) {
print_bpf_prog_info();
}
- /* Unload/stop tracepoint event by closing fd's */
+ /* Unload/stop tracepoint event by closing bpf_link's */
if (errors_only) {
- /* The prog_fd[i] and event_fd[i] depend on the
- * order the functions was defined in _kern.c
+ /* The bpf_link[i] depend on the order of
+ * the functions was defined in _kern.c
*/
- close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
- close(prog_fd[2]); /* func: trace_xdp_redirect */
- close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
- close(prog_fd[3]); /* func: trace_xdp_redirect_map */
+ bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */
+ tp_links[2] = NULL;
+
+ bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */
+ tp_links[3] = NULL;
}
stats_poll(interval, errors_only);
+ ret = EXIT_SUCCESS;
+
+cleanup:
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
+ bpf_object__close(obj);
return ret;
}
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index d94a999..8255025 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -12,7 +12,7 @@
#include <uapi/linux/udp.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include "hash_func01.h"
#define MAX_CPUS NR_CPUS
@@ -21,7 +21,7 @@
struct {
__uint(type, BPF_MAP_TYPE_CPUMAP);
__uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
__uint(max_entries, MAX_CPUS);
} cpu_map SEC(".maps");
@@ -30,6 +30,9 @@
__u64 processed;
__u64 dropped;
__u64 issue;
+ __u64 xdp_pass;
+ __u64 xdp_drop;
+ __u64 xdp_redirect;
};
/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
@@ -692,13 +695,16 @@
* Code in: kernel/include/trace/events/xdp.h
*/
struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
+ u64 __pad; // First 8 bytes are not accessible
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int sched; // offset:28; size:4; signed:1;
+ unsigned int xdp_pass; // offset:32; size:4; signed:0;
+ unsigned int xdp_drop; // offset:36; size:4; signed:0;
+ unsigned int xdp_redirect; // offset:40; size:4; signed:0;
};
SEC("tracepoint/xdp/xdp_cpumap_kthread")
@@ -712,6 +718,9 @@
return 0;
rec->processed += ctx->processed;
rec->dropped += ctx->drops;
+ rec->xdp_pass += ctx->xdp_pass;
+ rec->xdp_drop += ctx->xdp_drop;
+ rec->xdp_redirect += ctx->xdp_redirect;
/* Count times kthread yielded CPU via schedule call */
if (ctx->sched)
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 0a76725..16eb839 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -19,9 +19,6 @@
#include <time.h>
#include <linux/limits.h>
-#define __must_check
-#include <linux/err.h>
-
#include <arpa/inet.h>
#include <linux/if_link.h>
@@ -29,7 +26,7 @@
#define MAX_PROG 6
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
@@ -40,18 +37,35 @@
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int n_cpus;
-static int cpu_map_fd;
-static int rx_cnt_map_fd;
-static int redirect_err_cnt_map_fd;
-static int cpumap_enqueue_cnt_map_fd;
-static int cpumap_kthread_cnt_map_fd;
-static int cpus_available_map_fd;
-static int cpus_count_map_fd;
-static int cpus_iterator_map_fd;
-static int exception_cnt_map_fd;
+
+enum map_type {
+ CPU_MAP,
+ RX_CNT,
+ REDIRECT_ERR_CNT,
+ CPUMAP_ENQUEUE_CNT,
+ CPUMAP_KTHREAD_CNT,
+ CPUS_AVAILABLE,
+ CPUS_COUNT,
+ CPUS_ITERATOR,
+ EXCEPTION_CNT,
+};
+
+static const char *const map_type_strings[] = {
+ [CPU_MAP] = "cpu_map",
+ [RX_CNT] = "rx_cnt",
+ [REDIRECT_ERR_CNT] = "redirect_err_cnt",
+ [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
+ [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
+ [CPUS_AVAILABLE] = "cpus_available",
+ [CPUS_COUNT] = "cpus_count",
+ [CPUS_ITERATOR] = "cpus_iterator",
+ [EXCEPTION_CNT] = "exception_cnt",
+};
#define NUM_TP 5
-struct bpf_link *tp_links[NUM_TP] = { 0 };
+#define NUM_MAP 9
+struct bpf_link *tp_links[NUM_TP] = {};
+static int map_fds[NUM_MAP];
static int tp_cnt = 0;
/* Exit return codes */
@@ -73,6 +87,11 @@
{"stress-mode", no_argument, NULL, 'x' },
{"no-separators", no_argument, NULL, 'z' },
{"force", no_argument, NULL, 'F' },
+ {"mprog-disable", no_argument, NULL, 'n' },
+ {"mprog-name", required_argument, NULL, 'e' },
+ {"mprog-filename", required_argument, NULL, 'f' },
+ {"redirect-device", required_argument, NULL, 'r' },
+ {"redirect-map", required_argument, NULL, 'm' },
{0, 0, NULL, 0 }
};
@@ -109,7 +128,7 @@
bpf_object__for_each_program(pos, obj) {
if (bpf_program__is_xdp(pos))
- printf(" %s\n", bpf_program__title(pos, false));
+ printf(" %s\n", bpf_program__section_name(pos));
}
}
@@ -159,6 +178,9 @@
__u64 processed;
__u64 dropped;
__u64 issue;
+ __u64 xdp_pass;
+ __u64 xdp_drop;
+ __u64 xdp_redirect;
};
struct record {
__u64 timestamp;
@@ -178,6 +200,9 @@
/* For percpu maps, userspace gets a value per possible CPU */
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec values[nr_cpus];
+ __u64 sum_xdp_redirect = 0;
+ __u64 sum_xdp_pass = 0;
+ __u64 sum_xdp_drop = 0;
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_issue = 0;
@@ -199,10 +224,19 @@
sum_dropped += values[i].dropped;
rec->cpu[i].issue = values[i].issue;
sum_issue += values[i].issue;
+ rec->cpu[i].xdp_pass = values[i].xdp_pass;
+ sum_xdp_pass += values[i].xdp_pass;
+ rec->cpu[i].xdp_drop = values[i].xdp_drop;
+ sum_xdp_drop += values[i].xdp_drop;
+ rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
+ sum_xdp_redirect += values[i].xdp_redirect;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.issue = sum_issue;
+ rec->total.xdp_pass = sum_xdp_pass;
+ rec->total.xdp_drop = sum_xdp_drop;
+ rec->total.xdp_redirect = sum_xdp_redirect;
return true;
}
@@ -303,17 +337,33 @@
return pps;
}
+static void calc_xdp_pps(struct datarec *r, struct datarec *p,
+ double *xdp_pass, double *xdp_drop,
+ double *xdp_redirect, double period_)
+{
+ *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+ if (period_ > 0) {
+ *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+ *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+ *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+ }
+}
+
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
- char *prog_name)
+ char *prog_name, char *mprog_name, int mprog_fd)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
double pps = 0, drop = 0, err = 0;
+ bool mprog_enabled = false;
struct record *rec, *prev;
int to_cpu;
double t;
int i;
+ if (mprog_fd > 0)
+ mprog_enabled = true;
+
/* Header */
printf("Running XDP/eBPF prog_name:%s\n", prog_name);
printf("%-15s %-7s %-14s %-11s %-9s\n",
@@ -458,6 +508,34 @@
printf(fm2_err, "xdp_exception", "total", pps, drop);
}
+ /* CPUMAP attached XDP program that runs on remote/destination CPU */
+ if (mprog_enabled) {
+ char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
+ char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
+ double xdp_pass, xdp_drop, xdp_redirect;
+
+ printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
+ printf("%-15s %-7s %-14s %-11s %-9s\n",
+ "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
+ printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
+ xdp_redirect);
+ }
+ calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
+ }
+
printf("\n");
fflush(stdout);
}
@@ -466,20 +544,20 @@
{
int fd, i;
- fd = rx_cnt_map_fd;
+ fd = map_fds[RX_CNT];
map_collect_percpu(fd, 0, &rec->rx_cnt);
- fd = redirect_err_cnt_map_fd;
+ fd = map_fds[REDIRECT_ERR_CNT];
map_collect_percpu(fd, 1, &rec->redir_err);
- fd = cpumap_enqueue_cnt_map_fd;
+ fd = map_fds[CPUMAP_ENQUEUE_CNT];
for (i = 0; i < n_cpus; i++)
map_collect_percpu(fd, i, &rec->enq[i]);
- fd = cpumap_kthread_cnt_map_fd;
+ fd = map_fds[CPUMAP_KTHREAD_CNT];
map_collect_percpu(fd, 0, &rec->kthread);
- fd = exception_cnt_map_fd;
+ fd = map_fds[EXCEPTION_CNT];
map_collect_percpu(fd, 0, &rec->exception);
}
@@ -494,7 +572,7 @@
*b = tmp;
}
-static int create_cpu_entry(__u32 cpu, __u32 queue_size,
+static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
__u32 avail_idx, bool new)
{
__u32 curr_cpus_count = 0;
@@ -504,7 +582,7 @@
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
- ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
+ ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0);
if (ret) {
fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
exit(EXIT_FAIL_BPF);
@@ -513,21 +591,21 @@
/* Inform bpf_prog's that a new CPU is available to select
* from via some control maps.
*/
- ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
+ ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0);
if (ret) {
fprintf(stderr, "Add to avail CPUs failed\n");
exit(EXIT_FAIL_BPF);
}
/* When not replacing/updating existing entry, bump the count */
- ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
+ ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count);
if (ret) {
fprintf(stderr, "Failed reading curr cpus_count\n");
exit(EXIT_FAIL_BPF);
}
if (new) {
curr_cpus_count++;
- ret = bpf_map_update_elem(cpus_count_map_fd, &key,
+ ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key,
&curr_cpus_count, 0);
if (ret) {
fprintf(stderr, "Failed write curr cpus_count\n");
@@ -535,9 +613,9 @@
}
}
/* map_fd[7] = cpus_iterator */
- printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
+ printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
new ? "Add-new":"Replace", cpu, avail_idx,
- queue_size, curr_cpus_count);
+ value->qsize, value->bpf_prog.fd, curr_cpus_count);
return 0;
}
@@ -551,7 +629,7 @@
int ret, i;
for (i = 0; i < n_cpus; i++) {
- ret = bpf_map_update_elem(cpus_available_map_fd, &i,
+ ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i,
&invalid_cpu, 0);
if (ret) {
fprintf(stderr, "Failed marking CPU unavailable\n");
@@ -561,21 +639,26 @@
}
/* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(void)
+static void stress_cpumap(struct bpf_cpumap_val *value)
{
/* Changing qsize will cause kernel to free and alloc a new
* bpf_cpu_map_entry, with an associated/complicated tear-down
* procedure.
*/
- create_cpu_entry(1, 1024, 0, false);
- create_cpu_entry(1, 8, 0, false);
- create_cpu_entry(1, 16000, 0, false);
+ value->qsize = 1024;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 8;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 16000;
+ create_cpu_entry(1, value, 0, false);
}
static void stats_poll(int interval, bool use_separators, char *prog_name,
+ char *mprog_name, struct bpf_cpumap_val *value,
bool stress_mode)
{
struct stats_record *record, *prev;
+ int mprog_fd;
record = alloc_stats_record();
prev = alloc_stats_record();
@@ -587,103 +670,125 @@
while (1) {
swap(&prev, &record);
+ mprog_fd = value->bpf_prog.fd;
stats_collect(record);
- stats_print(record, prev, prog_name);
+ stats_print(record, prev, prog_name, mprog_name, mprog_fd);
sleep(interval);
if (stress_mode)
- stress_cpumap();
+ stress_cpumap(value);
}
free_stats_record(record);
free_stats_record(prev);
}
-static struct bpf_link * attach_tp(struct bpf_object *obj,
- const char *tp_category,
- const char* tp_name)
+static int init_tracepoints(struct bpf_object *obj)
{
struct bpf_program *prog;
- struct bpf_link *link;
- char sec_name[PATH_MAX];
- int len;
- len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
- tp_category, tp_name);
- if (len < 0)
- exit(EXIT_FAIL);
+ bpf_object__for_each_program(prog, obj) {
+ if (bpf_program__is_tracepoint(prog) != true)
+ continue;
- prog = bpf_object__find_program_by_title(obj, sec_name);
- if (!prog) {
- fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
- exit(EXIT_FAIL_BPF);
+ tp_links[tp_cnt] = bpf_program__attach(prog);
+ if (libbpf_get_error(tp_links[tp_cnt])) {
+ tp_links[tp_cnt] = NULL;
+ return -EINVAL;
+ }
+ tp_cnt++;
}
- link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
- if (IS_ERR(link))
- exit(EXIT_FAIL_BPF);
-
- return link;
-}
-
-static void init_tracepoints(struct bpf_object *obj) {
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
-}
-
-static int init_map_fds(struct bpf_object *obj)
-{
- /* Maps updated by tracepoints */
- redirect_err_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
- exception_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "exception_cnt");
- cpumap_enqueue_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
- cpumap_kthread_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
-
- /* Maps used by XDP */
- rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
- cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
- cpus_available_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpus_available");
- cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
- cpus_iterator_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
-
- if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
- redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
- cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
- cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
- exception_cnt_map_fd < 0)
- return -ENOENT;
-
return 0;
}
+static int init_map_fds(struct bpf_object *obj)
+{
+ enum map_type type;
+
+ for (type = 0; type < NUM_MAP; type++) {
+ map_fds[type] =
+ bpf_object__find_map_fd_by_name(obj,
+ map_type_strings[type]);
+
+ if (map_fds[type] < 0)
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int load_cpumap_prog(char *file_name, char *prog_name,
+ char *redir_interface, char *redir_map)
+{
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .expected_attach_type = BPF_XDP_CPUMAP,
+ .file = file_name,
+ };
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int fd;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
+ return -1;
+
+ if (fd < 0) {
+ fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+ strerror(errno));
+ return fd;
+ }
+
+ if (redir_interface && redir_map) {
+ int err, map_fd, ifindex_out, key = 0;
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
+ if (map_fd < 0)
+ return map_fd;
+
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ return -1;
+
+ err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
+ if (err < 0)
+ return err;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (!prog) {
+ fprintf(stderr, "bpf_object__find_program_by_title failed\n");
+ return EXIT_FAIL;
+ }
+
+ return bpf_program__fd(prog);
+}
+
int main(int argc, char **argv)
{
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
+ char *mprog_filename = "xdp_redirect_kern.o";
+ char *redir_interface = NULL, *redir_map = NULL;
+ char *mprog_name = "xdp_redirect_dummy";
+ bool mprog_disable = false;
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_UNSPEC,
};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
+ struct bpf_cpumap_val value;
bool use_separators = true;
bool stress_mode = false;
struct bpf_program *prog;
struct bpf_object *obj;
+ int err = EXIT_FAIL;
char filename[256];
int added_cpus = 0;
int longindex = 0;
int interval = 2;
int add_cpu = -1;
- int opt, err;
- int prog_fd;
+ int opt, prog_fd;
+ int *cpu, i;
__u32 qsize;
n_cpus = get_nprocs_conf();
@@ -705,22 +810,34 @@
}
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return EXIT_FAIL;
+ return err;
if (prog_fd < 0) {
fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
strerror(errno));
- return EXIT_FAIL;
+ return err;
}
- init_tracepoints(obj);
+
+ if (init_tracepoints(obj) < 0) {
+ fprintf(stderr, "ERR: bpf_program__attach failed\n");
+ return err;
+ }
+
if (init_map_fds(obj) < 0) {
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
- return EXIT_FAIL;
+ return err;
}
mark_cpus_unavailable();
+ cpu = malloc(n_cpus * sizeof(int));
+ if (!cpu) {
+ fprintf(stderr, "failed to allocate cpu array\n");
+ return err;
+ }
+ memset(cpu, 0, n_cpus * sizeof(int));
+
/* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
+ while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
@@ -754,6 +871,21 @@
/* Selecting eBPF prog to load */
prog_name = optarg;
break;
+ case 'n':
+ mprog_disable = true;
+ break;
+ case 'f':
+ mprog_filename = optarg;
+ break;
+ case 'e':
+ mprog_name = optarg;
+ break;
+ case 'r':
+ redir_interface = optarg;
+ break;
+ case 'm':
+ redir_map = optarg;
+ break;
case 'c':
/* Add multiple CPUs */
add_cpu = strtoul(optarg, NULL, 0);
@@ -763,8 +895,7 @@
errno, strerror(errno));
goto error;
}
- create_cpu_entry(add_cpu, qsize, added_cpus, true);
- added_cpus++;
+ cpu[added_cpus++] = add_cpu;
break;
case 'q':
qsize = atoi(optarg);
@@ -775,24 +906,44 @@
case 'h':
error:
default:
+ free(cpu);
usage(argv, obj);
return EXIT_FAIL_OPTION;
}
}
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ err = EXIT_FAIL_OPTION;
+ goto out;
}
/* Required option */
if (add_cpu == -1) {
fprintf(stderr, "ERR: required option --cpu missing\n");
fprintf(stderr, " Specify multiple --cpu option to add more\n");
usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ err = EXIT_FAIL_OPTION;
+ goto out;
}
+ value.bpf_prog.fd = 0;
+ if (!mprog_disable)
+ value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
+ redir_interface, redir_map);
+ if (value.bpf_prog.fd < 0) {
+ err = value.bpf_prog.fd;
+ goto out;
+ }
+ value.qsize = qsize;
+
+ for (i = 0; i < added_cpus; i++)
+ create_cpu_entry(cpu[i], &value, i, true);
+
/* Remove XDP program when program is interrupted or killed */
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
@@ -800,27 +951,33 @@
prog = bpf_object__find_program_by_title(obj, prog_name);
if (!prog) {
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
- return EXIT_FAIL;
+ goto out;
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
fprintf(stderr, "bpf_program__fd failed\n");
- return EXIT_FAIL;
+ goto out;
}
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
- return EXIT_FAIL_XDP;
+ err = EXIT_FAIL_XDP;
+ goto out;
}
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
- return err;
+ goto out;
}
prog_id = info.id;
- stats_poll(interval, use_separators, prog_name, stress_mode);
- return EXIT_OK;
+ stats_poll(interval, use_separators, prog_name, mprog_name,
+ &value, stress_mode);
+
+ err = EXIT_OK;
+out:
+ free(cpu);
+ return err;
}
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
index 1f0b7d0..d26ec3a 100644
--- a/samples/bpf/xdp_redirect_kern.c
+++ b/samples/bpf/xdp_redirect_kern.c
@@ -17,7 +17,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
index 4631b48..6489352 100644
--- a/samples/bpf/xdp_redirect_map_kern.c
+++ b/samples/bpf/xdp_redirect_map_kern.c
@@ -17,7 +17,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index f70ee33..35e16de 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -17,7 +17,7 @@
#include "bpf_util.h"
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
static int ifindex_in;
static int ifindex_out;
@@ -116,7 +116,7 @@
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -127,6 +127,9 @@
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index b7bc2a3..3c92adc 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -17,7 +17,7 @@
#include "bpf_util.h"
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
static int ifindex_in;
static int ifindex_out;
@@ -117,7 +117,7 @@
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -128,7 +128,10 @@
}
}
- if (optind == argc) {
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+ if (optind + 2 != argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
}
diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c
index bf11efc..b37ca2b 100644
--- a/samples/bpf/xdp_router_ipv4_kern.c
+++ b/samples/bpf/xdp_router_ipv4_kern.c
@@ -12,7 +12,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include <linux/slab.h>
#include <net/ip_fib.h>
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 1469b66..c2da1b5 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -21,7 +21,7 @@
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include "bpf_util.h"
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include <sys/resource.h>
#include <libgen.h>
@@ -662,6 +662,9 @@
}
}
+ if (!(flags & XDP_FLAGS_SKB_MODE))
+ flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == ac) {
usage(basename(argv[0]));
return 1;
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
index 272d0f8..5e7459f 100644
--- a/samples/bpf/xdp_rxq_info_kern.c
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -6,7 +6,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/in.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* Config setup from with userspace
*
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index 21d6e50..93fa1bc 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -22,8 +22,8 @@
#include <arpa/inet.h>
#include <linux/if_link.h>
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "bpf_util.h"
static int ifindex = -1;
@@ -51,8 +51,8 @@
{"sec", required_argument, NULL, 's' },
{"no-separators", no_argument, NULL, 'z' },
{"action", required_argument, NULL, 'a' },
- {"readmem", no_argument, NULL, 'r' },
- {"swapmac", no_argument, NULL, 'm' },
+ {"readmem", no_argument, NULL, 'r' },
+ {"swapmac", no_argument, NULL, 'm' },
{"force", no_argument, NULL, 'F' },
{0, 0, NULL, 0 }
};
@@ -450,7 +450,7 @@
int main(int argc, char **argv)
{
__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
@@ -492,7 +492,7 @@
map_fd = bpf_map__fd(map);
if (!prog_fd) {
- fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
+ fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", strerror(errno));
return EXIT_FAIL;
}
@@ -544,6 +544,10 @@
return EXIT_FAIL_OPTION;
}
}
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
index 6c7c7e0..9cf76b3 100644
--- a/samples/bpf/xdp_sample_pkts_kern.c
+++ b/samples/bpf/xdp_sample_pkts_kern.c
@@ -2,17 +2,15 @@
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define SAMPLE_SIZE 64ul
-#define MAX_CPUS 128
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = MAX_CPUS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+} my_map SEC(".maps");
SEC("xdp_sample")
int xdp_sample_prog(struct xdp_md *ctx)
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index 3002714..4b2a300 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -10,7 +10,7 @@
#include <sys/sysinfo.h>
#include <sys/ioctl.h>
#include <signal.h>
-#include <libbpf.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include <sys/resource.h>
#include <libgen.h>
@@ -18,7 +18,6 @@
#include "perf-sys.h"
-#define MAX_CPUS 128
static int if_idx;
static char *if_name;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -52,13 +51,13 @@
__u32 curr_prog_id = 0;
int err = 0;
- err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0);
+ err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
if (err) {
printf("bpf_get_link_xdp_id failed\n");
return err;
}
if (prog_id == curr_prog_id) {
- err = bpf_set_link_xdp_fd(idx, -1, 0);
+ err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
if (err < 0)
printf("ERROR: failed to detach prog from %s\n", name);
} else if (!curr_prog_id) {
@@ -115,7 +114,7 @@
.prog_type = BPF_PROG_TYPE_XDP,
};
struct perf_buffer_opts pb_opts = {};
- const char *optstr = "F";
+ const char *optstr = "FS";
int prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
@@ -127,12 +126,18 @@
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
default:
usage(basename(argv[0]));
return 1;
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
@@ -150,7 +155,7 @@
return 1;
if (!prog_fd) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
index 6db450a..575d57e 100644
--- a/samples/bpf/xdp_tx_iptunnel_kern.c
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -16,7 +16,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include "xdp_tx_iptunnel_common.h"
struct {
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index dfb6858..a419bee 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -15,7 +15,7 @@
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include "bpf_util.h"
#include "xdp_tx_iptunnel_common.h"
@@ -231,7 +231,7 @@
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -243,6 +243,9 @@
opt_flags[opt] = 0;
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
@@ -268,7 +271,7 @@
return 1;
if (!prog_fd) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h
new file mode 100644
index 0000000..b7eca15
--- /dev/null
+++ b/samples/bpf/xdpsock.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright(c) 2019 Intel Corporation.
+ */
+
+#ifndef XDPSOCK_H_
+#define XDPSOCK_H_
+
+#define MAX_SOCKS 4
+
+#endif /* XDPSOCK_H */
diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c
new file mode 100644
index 0000000..0543048
--- /dev/null
+++ b/samples/bpf/xdpsock_kern.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "xdpsock.h"
+
+/* This XDP program is only needed for the XDP_SHARED_UMEM mode.
+ * If you do not use this mode, libbpf can supply an XDP program for you.
+ */
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, MAX_SOCKS);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} xsks_map SEC(".maps");
+
+static unsigned int rr;
+
+SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+{
+ rr = (rr + 1) & (MAX_SOCKS - 1);
+
+ return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
+}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 3b604c1..2e4508a 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -10,6 +10,10 @@
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+#include <arpa/inet.h>
#include <locale.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -27,9 +31,10 @@
#include <time.h>
#include <unistd.h>
-#include "libbpf.h"
-#include "xsk.h"
+#include <bpf/libbpf.h>
+#include <bpf/xsk.h>
#include <bpf/bpf.h>
+#include "xdpsock.h"
#ifndef SOL_XDP
#define SOL_XDP 283
@@ -44,13 +49,14 @@
#endif
#define NUM_FRAMES (4 * 1024)
-#define BATCH_SIZE 64
+#define MIN_PKT_SIZE 64
#define DEBUG_HEXDUMP 0
-#define MAX_SOCKS 8
typedef __u64 u64;
typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
static unsigned long prev_time;
@@ -65,17 +71,67 @@
static const char *opt_if = "";
static int opt_ifindex;
static int opt_queue;
+static unsigned long opt_duration;
+static unsigned long start_time;
+static bool benchmark_done;
+static u32 opt_batch_size = 64;
+static int opt_pkt_count;
+static u16 opt_pkt_size = MIN_PKT_SIZE;
+static u32 opt_pkt_fill_pattern = 0x12345678;
+static bool opt_extra_stats;
+static bool opt_quiet;
+static bool opt_app_stats;
+static const char *opt_irq_str = "";
+static u32 irq_no;
+static int irqs_at_init = -1;
static int opt_poll;
static int opt_interval = 1;
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
static u32 opt_umem_flags;
static int opt_unaligned_chunks;
static int opt_mmap_flags;
-static u32 opt_xdp_bind_flags;
static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static int opt_timeout = 1000;
static bool opt_need_wakeup = true;
-static __u32 prog_id;
+static u32 opt_num_xsks = 1;
+static u32 prog_id;
+
+struct xsk_ring_stats {
+ unsigned long rx_npkts;
+ unsigned long tx_npkts;
+ unsigned long rx_dropped_npkts;
+ unsigned long rx_invalid_npkts;
+ unsigned long tx_invalid_npkts;
+ unsigned long rx_full_npkts;
+ unsigned long rx_fill_empty_npkts;
+ unsigned long tx_empty_npkts;
+ unsigned long prev_rx_npkts;
+ unsigned long prev_tx_npkts;
+ unsigned long prev_rx_dropped_npkts;
+ unsigned long prev_rx_invalid_npkts;
+ unsigned long prev_tx_invalid_npkts;
+ unsigned long prev_rx_full_npkts;
+ unsigned long prev_rx_fill_empty_npkts;
+ unsigned long prev_tx_empty_npkts;
+};
+
+struct xsk_driver_stats {
+ unsigned long intrs;
+ unsigned long prev_intrs;
+};
+
+struct xsk_app_stats {
+ unsigned long rx_empty_polls;
+ unsigned long fill_fail_polls;
+ unsigned long copy_tx_sendtos;
+ unsigned long tx_wakeup_sendtos;
+ unsigned long opt_polls;
+ unsigned long prev_rx_empty_polls;
+ unsigned long prev_fill_fail_polls;
+ unsigned long prev_copy_tx_sendtos;
+ unsigned long prev_tx_wakeup_sendtos;
+ unsigned long prev_opt_polls;
+};
struct xsk_umem_info {
struct xsk_ring_prod fq;
@@ -89,10 +145,9 @@
struct xsk_ring_prod tx;
struct xsk_umem_info *umem;
struct xsk_socket *xsk;
- unsigned long rx_npkts;
- unsigned long tx_npkts;
- unsigned long prev_rx_npkts;
- unsigned long prev_tx_npkts;
+ struct xsk_ring_stats ring_stats;
+ struct xsk_app_stats app_stats;
+ struct xsk_driver_stats drv_stats;
u32 outstanding_tx;
};
@@ -135,6 +190,163 @@
}
}
+static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk)
+{
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err)
+ return err;
+
+ if (optlen == sizeof(struct xdp_statistics)) {
+ xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped;
+ xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs;
+ xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs;
+ xsk->ring_stats.rx_full_npkts = stats.rx_ring_full;
+ xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs;
+ xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void dump_app_stats(long dt)
+{
+ int i;
+
+ for (i = 0; i < num_socks && xsks[i]; i++) {
+ char *fmt = "%-18s %'-14.0f %'-14lu\n";
+ double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps,
+ tx_wakeup_sendtos_ps, opt_polls_ps;
+
+ rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls -
+ xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt;
+ fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls -
+ xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt;
+ copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos -
+ xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt;
+ tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos -
+ xsks[i]->app_stats.prev_tx_wakeup_sendtos)
+ * 1000000000. / dt;
+ opt_polls_ps = (xsks[i]->app_stats.opt_polls -
+ xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt;
+
+ printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count");
+ printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls);
+ printf(fmt, "fill fail polls", fill_fail_polls_ps,
+ xsks[i]->app_stats.fill_fail_polls);
+ printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps,
+ xsks[i]->app_stats.copy_tx_sendtos);
+ printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps,
+ xsks[i]->app_stats.tx_wakeup_sendtos);
+ printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls);
+
+ xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls;
+ xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls;
+ xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos;
+ xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
+ xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
+ }
+}
+
+static bool get_interrupt_number(void)
+{
+ FILE *f_int_proc;
+ char line[4096];
+ bool found = false;
+
+ f_int_proc = fopen("/proc/interrupts", "r");
+ if (f_int_proc == NULL) {
+ printf("Failed to open /proc/interrupts.\n");
+ return found;
+ }
+
+ while (!feof(f_int_proc) && !found) {
+ /* Make sure to read a full line at a time */
+ if (fgets(line, sizeof(line), f_int_proc) == NULL ||
+ line[strlen(line) - 1] != '\n') {
+ printf("Error reading from interrupts file\n");
+ break;
+ }
+
+ /* Extract interrupt number from line */
+ if (strstr(line, opt_irq_str) != NULL) {
+ irq_no = atoi(line);
+ found = true;
+ break;
+ }
+ }
+
+ fclose(f_int_proc);
+
+ return found;
+}
+
+static int get_irqs(void)
+{
+ char count_path[PATH_MAX];
+ int total_intrs = -1;
+ FILE *f_count_proc;
+ char line[4096];
+
+ snprintf(count_path, sizeof(count_path),
+ "/sys/kernel/irq/%i/per_cpu_count", irq_no);
+ f_count_proc = fopen(count_path, "r");
+ if (f_count_proc == NULL) {
+ printf("Failed to open %s\n", count_path);
+ return total_intrs;
+ }
+
+ if (fgets(line, sizeof(line), f_count_proc) == NULL ||
+ line[strlen(line) - 1] != '\n') {
+ printf("Error reading from %s\n", count_path);
+ } else {
+ static const char com[2] = ",";
+ char *token;
+
+ total_intrs = 0;
+ token = strtok(line, com);
+ while (token != NULL) {
+ /* sum up interrupts across all cores */
+ total_intrs += atoi(token);
+ token = strtok(NULL, com);
+ }
+ }
+
+ fclose(f_count_proc);
+
+ return total_intrs;
+}
+
+static void dump_driver_stats(long dt)
+{
+ int i;
+
+ for (i = 0; i < num_socks && xsks[i]; i++) {
+ char *fmt = "%-18s %'-14.0f %'-14lu\n";
+ double intrs_ps;
+ int n_ints = get_irqs();
+
+ if (n_ints < 0) {
+ printf("error getting intr info for intr %i\n", irq_no);
+ return;
+ }
+ xsks[i]->drv_stats.intrs = n_ints - irqs_at_init;
+
+ intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) *
+ 1000000000. / dt;
+
+ printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count");
+ printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs);
+
+ xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs;
+ }
+}
+
static void dump_stats(void)
{
unsigned long now = get_nsecs();
@@ -144,32 +356,100 @@
prev_time = now;
for (i = 0; i < num_socks && xsks[i]; i++) {
- char *fmt = "%-15s %'-11.0f %'-11lu\n";
- double rx_pps, tx_pps;
+ char *fmt = "%-18s %'-14.0f %'-14lu\n";
+ double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps,
+ tx_invalid_pps, tx_empty_pps;
- rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
+ rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) *
1000000000. / dt;
- tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
+ tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) *
1000000000. / dt;
printf("\n sock%d@", i);
print_benchmark(false);
printf("\n");
- printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
+ printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts",
dt / 1000000000.);
- printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
- printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
+ printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts);
+ printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts);
- xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
- xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
+ xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts;
+ xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts;
+
+ if (opt_extra_stats) {
+ if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) {
+ dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts -
+ xsks[i]->ring_stats.prev_rx_dropped_npkts) *
+ 1000000000. / dt;
+ rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts -
+ xsks[i]->ring_stats.prev_rx_invalid_npkts) *
+ 1000000000. / dt;
+ tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts -
+ xsks[i]->ring_stats.prev_tx_invalid_npkts) *
+ 1000000000. / dt;
+ full_pps = (xsks[i]->ring_stats.rx_full_npkts -
+ xsks[i]->ring_stats.prev_rx_full_npkts) *
+ 1000000000. / dt;
+ fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts -
+ xsks[i]->ring_stats.prev_rx_fill_empty_npkts) *
+ 1000000000. / dt;
+ tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts -
+ xsks[i]->ring_stats.prev_tx_empty_npkts) *
+ 1000000000. / dt;
+
+ printf(fmt, "rx dropped", dropped_pps,
+ xsks[i]->ring_stats.rx_dropped_npkts);
+ printf(fmt, "rx invalid", rx_invalid_pps,
+ xsks[i]->ring_stats.rx_invalid_npkts);
+ printf(fmt, "tx invalid", tx_invalid_pps,
+ xsks[i]->ring_stats.tx_invalid_npkts);
+ printf(fmt, "rx queue full", full_pps,
+ xsks[i]->ring_stats.rx_full_npkts);
+ printf(fmt, "fill ring empty", fill_empty_pps,
+ xsks[i]->ring_stats.rx_fill_empty_npkts);
+ printf(fmt, "tx ring empty", tx_empty_pps,
+ xsks[i]->ring_stats.tx_empty_npkts);
+
+ xsks[i]->ring_stats.prev_rx_dropped_npkts =
+ xsks[i]->ring_stats.rx_dropped_npkts;
+ xsks[i]->ring_stats.prev_rx_invalid_npkts =
+ xsks[i]->ring_stats.rx_invalid_npkts;
+ xsks[i]->ring_stats.prev_tx_invalid_npkts =
+ xsks[i]->ring_stats.tx_invalid_npkts;
+ xsks[i]->ring_stats.prev_rx_full_npkts =
+ xsks[i]->ring_stats.rx_full_npkts;
+ xsks[i]->ring_stats.prev_rx_fill_empty_npkts =
+ xsks[i]->ring_stats.rx_fill_empty_npkts;
+ xsks[i]->ring_stats.prev_tx_empty_npkts =
+ xsks[i]->ring_stats.tx_empty_npkts;
+ } else {
+ printf("%-15s\n", "Error retrieving extra stats");
+ }
+ }
}
+
+ if (opt_app_stats)
+ dump_app_stats(dt);
+ if (irq_no)
+ dump_driver_stats(dt);
+}
+
+static bool is_benchmark_done(void)
+{
+ if (opt_duration > 0) {
+ unsigned long dt = (get_nsecs() - start_time);
+
+ if (dt >= opt_duration)
+ benchmark_done = true;
+ }
+ return benchmark_done;
}
static void *poller(void *arg)
{
(void)arg;
- for (;;) {
+ while (!is_benchmark_done()) {
sleep(opt_interval);
dump_stats();
}
@@ -179,7 +459,7 @@
static void remove_xdp_program(void)
{
- __u32 curr_prog_id = 0;
+ u32 curr_prog_id = 0;
if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
@@ -195,16 +475,19 @@
static void int_exit(int sig)
{
- struct xsk_umem *umem = xsks[0]->umem->umem;
+ benchmark_done = true;
+}
- (void)sig;
+static void xdpsock_cleanup(void)
+{
+ struct xsk_umem *umem = xsks[0]->umem->umem;
+ int i;
dump_stats();
- xsk_socket__delete(xsks[0]->xsk);
+ for (i = 0; i < num_socks; i++)
+ xsk_socket__delete(xsks[i]->xsk);
(void)xsk_umem__delete(umem);
remove_xdp_program();
-
- exit(EXIT_SUCCESS);
}
static void __exit_with_error(int error, const char *file, const char *func,
@@ -219,13 +502,6 @@
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
__LINE__)
-
-static const char pkt_data[] =
- "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
- "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
- "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
- "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
-
static void swap_mac_addresses(void *data)
{
struct ether_header *eth = (struct ether_header *)data;
@@ -273,24 +549,264 @@
printf("\n");
}
-static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
+static void *memset32_htonl(void *dest, u32 val, u32 size)
+{
+ u32 *ptr = (u32 *)dest;
+ int i;
+
+ val = htonl(val);
+
+ for (i = 0; i < (size & (~0x3)); i += 4)
+ ptr[i >> 2] = val;
+
+ for (; i < size; i++)
+ ((char *)dest)[i] = ((char *)&val)[i & 3];
+
+ return dest;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline unsigned short from32to16(unsigned int x)
+{
+ /* add up 16-bit and 16-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+ unsigned int result = 0;
+ int odd;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long)buff;
+ if (odd) {
+#ifdef __LITTLE_ENDIAN
+ result += (*buff << 8);
+#else
+ result = *buff;
+#endif
+ len--;
+ buff++;
+ }
+ if (len >= 2) {
+ if (2 & (unsigned long)buff) {
+ result += *(unsigned short *)buff;
+ len -= 2;
+ buff += 2;
+ }
+ if (len >= 4) {
+ const unsigned char *end = buff +
+ ((unsigned int)len & ~3);
+ unsigned int carry = 0;
+
+ do {
+ unsigned int w = *(unsigned int *)buff;
+
+ buff += 4;
+ result += carry;
+ result += w;
+ carry = (w > result);
+ } while (buff < end);
+ result += carry;
+ result = (result & 0xffff) + (result >> 16);
+ }
+ if (len & 2) {
+ result += *(unsigned short *)buff;
+ buff += 2;
+ }
+ }
+ if (len & 1)
+#ifdef __LITTLE_ENDIAN
+ result += *buff;
+#else
+ result += (*buff << 8);
+#endif
+ result = from32to16(result);
+ if (odd)
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+ return result;
+}
+
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ return (__force __sum16)~do_csum(iph, ihl * 4);
+}
+
+/*
+ * Fold a partial checksum
+ * This function code has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum);
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __wsum)from64to32(s);
+}
+
+/*
+ * This function has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
+ u8 proto, u16 *udp_pkt)
+{
+ u32 csum = 0;
+ u32 cnt = 0;
+
+ /* udp hdr and data */
+ for (; cnt < len; cnt += 2)
+ csum += udp_pkt[cnt >> 1];
+
+ return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+}
+
+#define ETH_FCS_SIZE 4
+
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+ sizeof(struct udphdr))
+
+#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
+#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+
+static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
+
+static void gen_eth_hdr_data(void)
+{
+ struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
+ sizeof(struct ethhdr) +
+ sizeof(struct iphdr));
+ struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
+ sizeof(struct ethhdr));
+ struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+
+ /* ethernet header */
+ memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
+ memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_IP);
+
+ /* IP header */
+ ip_hdr->version = IPVERSION;
+ ip_hdr->ihl = 0x5; /* 20 byte header */
+ ip_hdr->tos = 0x0;
+ ip_hdr->tot_len = htons(IP_PKT_SIZE);
+ ip_hdr->id = 0;
+ ip_hdr->frag_off = 0;
+ ip_hdr->ttl = IPDEFTTL;
+ ip_hdr->protocol = IPPROTO_UDP;
+ ip_hdr->saddr = htonl(0x0a0a0a10);
+ ip_hdr->daddr = htonl(0x0a0a0a20);
+
+ /* IP header checksum */
+ ip_hdr->check = 0;
+ ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
+
+ /* UDP header */
+ udp_hdr->source = htons(0x1000);
+ udp_hdr->dest = htons(0x1000);
+ udp_hdr->len = htons(UDP_PKT_SIZE);
+
+ /* UDP data */
+ memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
+ UDP_PKT_DATA_SIZE);
+
+ /* UDP header checksum */
+ udp_hdr->check = 0;
+ udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
+ IPPROTO_UDP, (u16 *)udp_hdr);
+}
+
+static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
{
memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
- sizeof(pkt_data) - 1);
- return sizeof(pkt_data) - 1;
+ PKT_SIZE);
}
static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
{
struct xsk_umem_info *umem;
struct xsk_umem_config cfg = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ /* We recommend that you set the fill ring size >= HW RX ring size +
+ * AF_XDP RX ring size. Make sure you fill up the fill ring
+ * with buffers at regular intervals, and you will with this setting
+ * avoid allocation failures in the driver. These are usually quite
+ * expensive since drivers have not been written to assume that
+ * allocation failures are common. For regular sockets, kernel
+ * allocated memory is used that only runs out in OOM situations
+ * that should be rare.
+ */
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = opt_xsk_frame_size,
.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
.flags = opt_umem_flags
};
-
int ret;
umem = calloc(1, sizeof(*umem));
@@ -299,7 +815,6 @@
ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
&cfg);
-
if (ret)
exit_with_error(-ret);
@@ -307,13 +822,29 @@
return umem;
}
-static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
+{
+ int ret, i;
+ u32 idx;
+
+ ret = xsk_ring_prod__reserve(&umem->fq,
+ XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx);
+ if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2)
+ exit_with_error(-ret);
+ for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx++) =
+ i * opt_xsk_frame_size;
+ xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2);
+}
+
+static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
+ bool rx, bool tx)
{
struct xsk_socket_config cfg;
struct xsk_socket_info *xsk;
+ struct xsk_ring_cons *rxr;
+ struct xsk_ring_prod *txr;
int ret;
- u32 idx;
- int i;
xsk = calloc(1, sizeof(*xsk));
if (!xsk)
@@ -322,11 +853,17 @@
xsk->umem = umem;
cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg.libbpf_flags = 0;
+ if (opt_num_xsks > 1)
+ cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
+ else
+ cfg.libbpf_flags = 0;
cfg.xdp_flags = opt_xdp_flags;
cfg.bind_flags = opt_xdp_bind_flags;
+
+ rxr = rx ? &xsk->rx : NULL;
+ txr = tx ? &xsk->tx : NULL;
ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
- &xsk->rx, &xsk->tx, &cfg);
+ rxr, txr, &cfg);
if (ret)
exit_with_error(-ret);
@@ -334,16 +871,16 @@
if (ret)
exit_with_error(-ret);
- ret = xsk_ring_prod__reserve(&xsk->umem->fq,
- XSK_RING_PROD__DEFAULT_NUM_DESCS,
- &idx);
- if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
- exit_with_error(-ret);
- for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) =
- i * opt_xsk_frame_size;
- xsk_ring_prod__submit(&xsk->umem->fq,
- XSK_RING_PROD__DEFAULT_NUM_DESCS);
+ xsk->app_stats.rx_empty_polls = 0;
+ xsk->app_stats.fill_fail_polls = 0;
+ xsk->app_stats.copy_tx_sendtos = 0;
+ xsk->app_stats.tx_wakeup_sendtos = 0;
+ xsk->app_stats.opt_polls = 0;
+ xsk->app_stats.prev_rx_empty_polls = 0;
+ xsk->app_stats.prev_fill_fail_polls = 0;
+ xsk->app_stats.prev_copy_tx_sendtos = 0;
+ xsk->app_stats.prev_tx_wakeup_sendtos = 0;
+ xsk->app_stats.prev_opt_polls = 0;
return xsk;
}
@@ -363,6 +900,17 @@
{"frame-size", required_argument, 0, 'f'},
{"no-need-wakeup", no_argument, 0, 'm'},
{"unaligned", no_argument, 0, 'u'},
+ {"shared-umem", no_argument, 0, 'M'},
+ {"force", no_argument, 0, 'F'},
+ {"duration", required_argument, 0, 'd'},
+ {"batch-size", required_argument, 0, 'b'},
+ {"tx-pkt-count", required_argument, 0, 'C'},
+ {"tx-pkt-size", required_argument, 0, 's'},
+ {"tx-pkt-pattern", required_argument, 0, 'P'},
+ {"extra-stats", no_argument, 0, 'x'},
+ {"quiet", no_argument, 0, 'Q'},
+ {"app-stats", no_argument, 0, 'a'},
+ {"irq-string", no_argument, 0, 'I'},
{0, 0, 0, 0}
};
@@ -378,16 +926,34 @@
" -q, --queue=n Use queue n (default 0)\n"
" -p, --poll Use poll syscall\n"
" -S, --xdp-skb=n Use XDP skb-mod\n"
- " -N, --xdp-native=n Enfore XDP native mode\n"
+ " -N, --xdp-native=n Enforce XDP native mode\n"
" -n, --interval=n Specify statistics update interval (default 1 sec).\n"
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
- " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n"
" -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
" -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n"
" -u, --unaligned Enable unaligned chunk placement\n"
+ " -M, --shared-umem Enable XDP_SHARED_UMEM\n"
+ " -F, --force Force loading the XDP prog\n"
+ " -d, --duration=n Duration in secs to run command.\n"
+ " Default: forever.\n"
+ " -b, --batch-size=n Batch size for sending or receiving\n"
+ " packets. Default: %d\n"
+ " -C, --tx-pkt-count=n Number of packets to send.\n"
+ " Default: Continuous packets.\n"
+ " -s, --tx-pkt-size=n Transmit packet size.\n"
+ " (Default: %d bytes)\n"
+ " Min size: %d, Max size %d.\n"
+ " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
+ " -x, --extra-stats Display extra statistics.\n"
+ " -Q, --quiet Do not display any stats.\n"
+ " -a, --app-stats Display application (syscall) statistics.\n"
+ " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n"
"\n";
- fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
+ fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
+ opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
+ XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
+
exit(EXIT_FAILURE);
}
@@ -398,7 +964,7 @@
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:mu",
+ c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:",
long_options, &option_index);
if (c == -1)
break;
@@ -427,7 +993,7 @@
opt_xdp_bind_flags |= XDP_COPY;
break;
case 'N':
- opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'n':
opt_interval = atoi(optarg);
@@ -448,16 +1014,64 @@
break;
case 'f':
opt_xsk_frame_size = atoi(optarg);
+ break;
case 'm':
opt_need_wakeup = false;
opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
break;
+ case 'M':
+ opt_num_xsks = MAX_SOCKS;
+ break;
+ case 'd':
+ opt_duration = atoi(optarg);
+ opt_duration *= 1000000000;
+ break;
+ case 'b':
+ opt_batch_size = atoi(optarg);
+ break;
+ case 'C':
+ opt_pkt_count = atoi(optarg);
+ break;
+ case 's':
+ opt_pkt_size = atoi(optarg);
+ if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) ||
+ opt_pkt_size < MIN_PKT_SIZE) {
+ fprintf(stderr,
+ "ERROR: Invalid frame size %d\n",
+ opt_pkt_size);
+ usage(basename(argv[0]));
+ }
+ break;
+ case 'P':
+ opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
+ break;
+ case 'x':
+ opt_extra_stats = 1;
+ break;
+ case 'Q':
+ opt_quiet = 1;
+ break;
+ case 'a':
+ opt_app_stats = 1;
+ break;
+ case 'I':
+ opt_irq_str = optarg;
+ if (get_interrupt_number())
+ irqs_at_init = get_irqs();
+ if (irqs_at_init < 0) {
+ fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str);
+ usage(basename(argv[0]));
+ }
+ break;
default:
usage(basename(argv[0]));
}
}
+ if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
+ opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+
opt_ifindex = if_nametoindex(opt_if);
if (!opt_ifindex) {
fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
@@ -478,7 +1092,8 @@
int ret;
ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
- if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
+ if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN ||
+ errno == EBUSY || errno == ENETDOWN)
return;
exit_with_error(errno);
}
@@ -494,10 +1109,17 @@
if (!xsk->outstanding_tx)
return;
- if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to
+ * really send the packets. In zero-copy mode we do not have to do this, since Tx
+ * is driven by the NAPI loop. So as an optimization, we do not have to call
+ * sendto() all the time in zero-copy mode for l2fwd.
+ */
+ if (opt_xdp_bind_flags & XDP_COPY) {
+ xsk->app_stats.copy_tx_sendtos++;
kick_tx(xsk);
+ }
- ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
+ ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
xsk->outstanding_tx;
/* re-add completed Tx buffers */
@@ -510,8 +1132,10 @@
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
- if (xsk_ring_prod__needs_wakeup(&umem->fq))
+ if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+ xsk->app_stats.fill_fail_polls++;
ret = poll(fds, num_socks, opt_timeout);
+ }
ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
}
@@ -522,11 +1146,12 @@
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
+ xsk->ring_stats.tx_npkts += rcvd;
}
}
-static inline void complete_tx_only(struct xsk_socket_info *xsk)
+static inline void complete_tx_only(struct xsk_socket_info *xsk,
+ int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -534,14 +1159,16 @@
if (!xsk->outstanding_tx)
return;
- if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ xsk->app_stats.tx_wakeup_sendtos++;
kick_tx(xsk);
+ }
- rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
+ rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
if (rcvd > 0) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
+ xsk->ring_stats.tx_npkts += rcvd;
}
}
@@ -551,10 +1178,12 @@
u32 idx_rx = 0, idx_fq = 0;
int ret;
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ xsk->app_stats.rx_empty_polls++;
ret = poll(fds, num_socks, opt_timeout);
+ }
return;
}
@@ -562,8 +1191,10 @@
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ xsk->app_stats.fill_fail_polls++;
ret = poll(fds, num_socks, opt_timeout);
+ }
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
}
@@ -581,16 +1212,14 @@
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
+ xsk->ring_stats.rx_npkts += rcvd;
}
static void rx_drop_all(void)
{
- struct pollfd fds[MAX_SOCKS + 1];
+ struct pollfd fds[MAX_SOCKS] = {};
int i, ret;
- memset(fds, 0, sizeof(fds));
-
for (i = 0; i < num_socks; i++) {
fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
fds[i].events = POLLIN;
@@ -598,6 +1227,8 @@
for (;;) {
if (opt_poll) {
+ for (i = 0; i < num_socks; i++)
+ xsks[i]->app_stats.opt_polls++;
ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
continue;
@@ -605,46 +1236,83 @@
for (i = 0; i < num_socks; i++)
rx_drop(xsks[i], fds);
+
+ if (benchmark_done)
+ break;
}
}
-static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
+static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
{
u32 idx;
+ unsigned int i;
- if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
- unsigned int i;
-
- for (i = 0; i < BATCH_SIZE; i++) {
- xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr =
- (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
- sizeof(pkt_data) - 1;
- }
-
- xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
- xsk->outstanding_tx += BATCH_SIZE;
- frame_nb += BATCH_SIZE;
- frame_nb %= NUM_FRAMES;
+ while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
+ batch_size) {
+ complete_tx_only(xsk, batch_size);
+ if (benchmark_done)
+ return;
}
- complete_tx_only(xsk);
+ for (i = 0; i < batch_size; i++) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
+ idx + i);
+ tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
+ tx_desc->len = PKT_SIZE;
+ }
+
+ xsk_ring_prod__submit(&xsk->tx, batch_size);
+ xsk->outstanding_tx += batch_size;
+ *frame_nb += batch_size;
+ *frame_nb %= NUM_FRAMES;
+ complete_tx_only(xsk, batch_size);
+}
+
+static inline int get_batch_size(int pkt_cnt)
+{
+ if (!opt_pkt_count)
+ return opt_batch_size;
+
+ if (pkt_cnt + opt_batch_size <= opt_pkt_count)
+ return opt_batch_size;
+
+ return opt_pkt_count - pkt_cnt;
+}
+
+static void complete_tx_only_all(void)
+{
+ bool pending;
+ int i;
+
+ do {
+ pending = false;
+ for (i = 0; i < num_socks; i++) {
+ if (xsks[i]->outstanding_tx) {
+ complete_tx_only(xsks[i], opt_batch_size);
+ pending = !!xsks[i]->outstanding_tx;
+ }
+ }
+ } while (pending);
}
static void tx_only_all(void)
{
- struct pollfd fds[MAX_SOCKS];
+ struct pollfd fds[MAX_SOCKS] = {};
u32 frame_nb[MAX_SOCKS] = {};
+ int pkt_cnt = 0;
int i, ret;
- memset(fds, 0, sizeof(fds));
for (i = 0; i < num_socks; i++) {
fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
fds[0].events = POLLOUT;
}
- for (;;) {
+ while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
+ int batch_size = get_batch_size(pkt_cnt);
+
if (opt_poll) {
+ for (i = 0; i < num_socks; i++)
+ xsks[i]->app_stats.opt_polls++;
ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
continue;
@@ -654,8 +1322,16 @@
}
for (i = 0; i < num_socks; i++)
- tx_only(xsks[i], frame_nb[i]);
+ tx_only(xsks[i], &frame_nb[i], batch_size);
+
+ pkt_cnt += batch_size;
+
+ if (benchmark_done)
+ break;
}
+
+ if (opt_pkt_count)
+ complete_tx_only_all();
}
static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
@@ -666,10 +1342,12 @@
complete_tx_l2fwd(xsk, fds);
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ xsk->app_stats.rx_empty_polls++;
ret = poll(fds, num_socks, opt_timeout);
+ }
return;
}
@@ -678,8 +1356,10 @@
if (ret < 0)
exit_with_error(-ret);
complete_tx_l2fwd(xsk, fds);
- if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ xsk->app_stats.tx_wakeup_sendtos++;
kick_tx(xsk);
+ }
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
}
@@ -701,17 +1381,15 @@
xsk_ring_prod__submit(&xsk->tx, rcvd);
xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
+ xsk->ring_stats.rx_npkts += rcvd;
xsk->outstanding_tx += rcvd;
}
static void l2fwd_all(void)
{
- struct pollfd fds[MAX_SOCKS];
+ struct pollfd fds[MAX_SOCKS] = {};
int i, ret;
- memset(fds, 0, sizeof(fds));
-
for (i = 0; i < num_socks; i++) {
fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
fds[i].events = POLLOUT | POLLIN;
@@ -719,6 +1397,8 @@
for (;;) {
if (opt_poll) {
+ for (i = 0; i < num_socks; i++)
+ xsks[i]->app_stats.opt_polls++;
ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
continue;
@@ -726,16 +1406,72 @@
for (i = 0; i < num_socks; i++)
l2fwd(xsks[i], fds);
+
+ if (benchmark_done)
+ break;
+ }
+}
+
+static void load_xdp_program(char **argv, struct bpf_object **obj)
+{
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ };
+ char xdp_filename[256];
+ int prog_fd;
+
+ snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = xdp_filename;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd))
+ exit(EXIT_FAILURE);
+ if (prog_fd < 0) {
+ fprintf(stderr, "ERROR: no program found: %s\n",
+ strerror(prog_fd));
+ exit(EXIT_FAILURE);
+ }
+
+ if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
+ fprintf(stderr, "ERROR: link set xdp fd failed\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void enter_xsks_into_map(struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ int i, xsks_map;
+
+ map = bpf_object__find_map_by_name(obj, "xsks_map");
+ xsks_map = bpf_map__fd(map);
+ if (xsks_map < 0) {
+ fprintf(stderr, "ERROR: no xsks map found: %s\n",
+ strerror(xsks_map));
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < num_socks; i++) {
+ int fd = xsk_socket__fd(xsks[i]->xsk);
+ int key, ret;
+
+ key = i;
+ ret = bpf_map_update_elem(xsks_map, &key, &fd, 0);
+ if (ret) {
+ fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
+ exit(EXIT_FAILURE);
+ }
}
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ bool rx = false, tx = false;
struct xsk_umem_info *umem;
+ struct bpf_object *obj;
pthread_t pt;
+ int i, ret;
void *bufs;
- int ret;
parse_command_line(argc, argv);
@@ -745,6 +1481,9 @@
exit(EXIT_FAILURE);
}
+ if (opt_num_xsks > 1)
+ load_xdp_program(argv, &obj);
+
/* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
PROT_READ | PROT_WRITE,
@@ -753,28 +1492,42 @@
printf("ERROR: mmap failed\n");
exit(EXIT_FAILURE);
}
- /* Create sockets... */
+
+ /* Create sockets... */
umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
- xsks[num_socks++] = xsk_configure_socket(umem);
+ if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) {
+ rx = true;
+ xsk_populate_fill_ring(umem);
+ }
+ if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY)
+ tx = true;
+ for (i = 0; i < opt_num_xsks; i++)
+ xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
if (opt_bench == BENCH_TXONLY) {
- int i;
+ gen_eth_hdr_data();
for (i = 0; i < NUM_FRAMES; i++)
- (void)gen_eth_frame(umem, i * opt_xsk_frame_size);
+ gen_eth_frame(umem, i * opt_xsk_frame_size);
}
+ if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
+ enter_xsks_into_map(obj);
+
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
signal(SIGABRT, int_exit);
setlocale(LC_ALL, "");
- ret = pthread_create(&pt, NULL, poller, NULL);
- if (ret)
- exit_with_error(ret);
+ if (!opt_quiet) {
+ ret = pthread_create(&pt, NULL, poller, NULL);
+ if (ret)
+ exit_with_error(ret);
+ }
prev_time = get_nsecs();
+ start_time = prev_time;
if (opt_bench == BENCH_RXDROP)
rx_drop_all();
@@ -783,6 +1536,13 @@
else
l2fwd_all();
+ benchmark_done = true;
+
+ if (!opt_quiet)
+ pthread_join(pt, NULL);
+
+ xdpsock_cleanup();
+
munmap(bufs, NUM_FRAMES * opt_xsk_frame_size);
return 0;
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c
new file mode 100644
index 0000000..1cd97c8
--- /dev/null
+++ b/samples/bpf/xsk_fwd.c
@@ -0,0 +1,1085 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+#define _GNU_SOURCE
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <netinet/ether.h>
+#include <net/if.h>
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/if_xdp.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/xsk.h>
+#include <bpf/bpf.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef __u64 u64;
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
+
+/* This program illustrates the packet forwarding between multiple AF_XDP
+ * sockets in multi-threaded environment. All threads are sharing a common
+ * buffer pool, with each socket having its own private buffer cache.
+ *
+ * Example 1: Single thread handling two sockets. The packets received by socket
+ * A (interface IFA, queue QA) are forwarded to socket B (interface IFB, queue
+ * QB), while the packets received by socket B are forwarded to socket A. The
+ * thread is running on CPU core X:
+ *
+ * ./xsk_fwd -i IFA -q QA -i IFB -q QB -c X
+ *
+ * Example 2: Two threads, each handling two sockets. The thread running on CPU
+ * core X forwards all the packets received by socket A to socket B, and all the
+ * packets received by socket B to socket A. The thread running on CPU core Y is
+ * performing the same packet forwarding between sockets C and D:
+ *
+ * ./xsk_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD
+ * -c CX -c CY
+ */
+
+/*
+ * Buffer pool and buffer cache
+ *
+ * For packet forwarding, the packet buffers are typically allocated from the
+ * pool for packet reception and freed back to the pool for further reuse once
+ * the packet transmission is completed.
+ *
+ * The buffer pool is shared between multiple threads. In order to minimize the
+ * access latency to the shared buffer pool, each thread creates one (or
+ * several) buffer caches, which, unlike the buffer pool, are private to the
+ * thread that creates them and therefore cannot be shared with other threads.
+ * The access to the shared pool is only needed either (A) when the cache gets
+ * empty due to repeated buffer allocations and it needs to be replenished from
+ * the pool, or (B) when the cache gets full due to repeated buffer free and it
+ * needs to be flushed back to the pull.
+ *
+ * In a packet forwarding system, a packet received on any input port can
+ * potentially be transmitted on any output port, depending on the forwarding
+ * configuration. For AF_XDP sockets, for this to work with zero-copy of the
+ * packet buffers when, it is required that the buffer pool memory fits into the
+ * UMEM area shared by all the sockets.
+ */
+
+struct bpool_params {
+ u32 n_buffers;
+ u32 buffer_size;
+ int mmap_flags;
+
+ u32 n_users_max;
+ u32 n_buffers_per_slab;
+};
+
+/* This buffer pool implementation organizes the buffers into equally sized
+ * slabs of *n_buffers_per_slab*. Initially, there are *n_slabs* slabs in the
+ * pool that are completely filled with buffer pointers (full slabs).
+ *
+ * Each buffer cache has a slab for buffer allocation and a slab for buffer
+ * free, with both of these slabs initially empty. When the cache's allocation
+ * slab goes empty, it is swapped with one of the available full slabs from the
+ * pool, if any is available. When the cache's free slab goes full, it is
+ * swapped for one of the empty slabs from the pool, which is guaranteed to
+ * succeed.
+ *
+ * Partially filled slabs never get traded between the cache and the pool
+ * (except when the cache itself is destroyed), which enables fast operation
+ * through pointer swapping.
+ */
+struct bpool {
+ struct bpool_params params;
+ pthread_mutex_t lock;
+ void *addr;
+
+ u64 **slabs;
+ u64 **slabs_reserved;
+ u64 *buffers;
+ u64 *buffers_reserved;
+
+ u64 n_slabs;
+ u64 n_slabs_reserved;
+ u64 n_buffers;
+
+ u64 n_slabs_available;
+ u64 n_slabs_reserved_available;
+
+ struct xsk_umem_config umem_cfg;
+ struct xsk_ring_prod umem_fq;
+ struct xsk_ring_cons umem_cq;
+ struct xsk_umem *umem;
+};
+
+static struct bpool *
+bpool_init(struct bpool_params *params,
+ struct xsk_umem_config *umem_cfg)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved;
+ u64 slabs_size, slabs_reserved_size;
+ u64 buffers_size, buffers_reserved_size;
+ u64 total_size, i;
+ struct bpool *bp;
+ u8 *p;
+ int status;
+
+ /* mmap prep. */
+ if (setrlimit(RLIMIT_MEMLOCK, &r))
+ return NULL;
+
+ /* bpool internals dimensioning. */
+ n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) /
+ params->n_buffers_per_slab;
+ n_slabs_reserved = params->n_users_max * 2;
+ n_buffers = n_slabs * params->n_buffers_per_slab;
+ n_buffers_reserved = n_slabs_reserved * params->n_buffers_per_slab;
+
+ slabs_size = n_slabs * sizeof(u64 *);
+ slabs_reserved_size = n_slabs_reserved * sizeof(u64 *);
+ buffers_size = n_buffers * sizeof(u64);
+ buffers_reserved_size = n_buffers_reserved * sizeof(u64);
+
+ total_size = sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size +
+ buffers_size + buffers_reserved_size;
+
+ /* bpool memory allocation. */
+ p = calloc(total_size, sizeof(u8));
+ if (!p)
+ return NULL;
+
+ /* bpool memory initialization. */
+ bp = (struct bpool *)p;
+ memcpy(&bp->params, params, sizeof(*params));
+ bp->params.n_buffers = n_buffers;
+
+ bp->slabs = (u64 **)&p[sizeof(struct bpool)];
+ bp->slabs_reserved = (u64 **)&p[sizeof(struct bpool) +
+ slabs_size];
+ bp->buffers = (u64 *)&p[sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size];
+ bp->buffers_reserved = (u64 *)&p[sizeof(struct bpool) +
+ slabs_size + slabs_reserved_size + buffers_size];
+
+ bp->n_slabs = n_slabs;
+ bp->n_slabs_reserved = n_slabs_reserved;
+ bp->n_buffers = n_buffers;
+
+ for (i = 0; i < n_slabs; i++)
+ bp->slabs[i] = &bp->buffers[i * params->n_buffers_per_slab];
+ bp->n_slabs_available = n_slabs;
+
+ for (i = 0; i < n_slabs_reserved; i++)
+ bp->slabs_reserved[i] = &bp->buffers_reserved[i *
+ params->n_buffers_per_slab];
+ bp->n_slabs_reserved_available = n_slabs_reserved;
+
+ for (i = 0; i < n_buffers; i++)
+ bp->buffers[i] = i * params->buffer_size;
+
+ /* lock. */
+ status = pthread_mutex_init(&bp->lock, NULL);
+ if (status) {
+ free(p);
+ return NULL;
+ }
+
+ /* mmap. */
+ bp->addr = mmap(NULL,
+ n_buffers * params->buffer_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | params->mmap_flags,
+ -1,
+ 0);
+ if (bp->addr == MAP_FAILED) {
+ pthread_mutex_destroy(&bp->lock);
+ free(p);
+ return NULL;
+ }
+
+ /* umem. */
+ status = xsk_umem__create(&bp->umem,
+ bp->addr,
+ bp->params.n_buffers * bp->params.buffer_size,
+ &bp->umem_fq,
+ &bp->umem_cq,
+ umem_cfg);
+ if (status) {
+ munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
+ pthread_mutex_destroy(&bp->lock);
+ free(p);
+ return NULL;
+ }
+ memcpy(&bp->umem_cfg, umem_cfg, sizeof(*umem_cfg));
+
+ return bp;
+}
+
+static void
+bpool_free(struct bpool *bp)
+{
+ if (!bp)
+ return;
+
+ xsk_umem__delete(bp->umem);
+ munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
+ pthread_mutex_destroy(&bp->lock);
+ free(bp);
+}
+
+struct bcache {
+ struct bpool *bp;
+
+ u64 *slab_cons;
+ u64 *slab_prod;
+
+ u64 n_buffers_cons;
+ u64 n_buffers_prod;
+};
+
+static u32
+bcache_slab_size(struct bcache *bc)
+{
+ struct bpool *bp = bc->bp;
+
+ return bp->params.n_buffers_per_slab;
+}
+
+static struct bcache *
+bcache_init(struct bpool *bp)
+{
+ struct bcache *bc;
+
+ bc = calloc(1, sizeof(struct bcache));
+ if (!bc)
+ return NULL;
+
+ bc->bp = bp;
+ bc->n_buffers_cons = 0;
+ bc->n_buffers_prod = 0;
+
+ pthread_mutex_lock(&bp->lock);
+ if (bp->n_slabs_reserved_available == 0) {
+ pthread_mutex_unlock(&bp->lock);
+ free(bc);
+ return NULL;
+ }
+
+ bc->slab_cons = bp->slabs_reserved[bp->n_slabs_reserved_available - 1];
+ bc->slab_prod = bp->slabs_reserved[bp->n_slabs_reserved_available - 2];
+ bp->n_slabs_reserved_available -= 2;
+ pthread_mutex_unlock(&bp->lock);
+
+ return bc;
+}
+
+static void
+bcache_free(struct bcache *bc)
+{
+ struct bpool *bp;
+
+ if (!bc)
+ return;
+
+ /* In order to keep this example simple, the case of freeing any
+ * existing buffers from the cache back to the pool is ignored.
+ */
+
+ bp = bc->bp;
+ pthread_mutex_lock(&bp->lock);
+ bp->slabs_reserved[bp->n_slabs_reserved_available] = bc->slab_prod;
+ bp->slabs_reserved[bp->n_slabs_reserved_available + 1] = bc->slab_cons;
+ bp->n_slabs_reserved_available += 2;
+ pthread_mutex_unlock(&bp->lock);
+
+ free(bc);
+}
+
+/* To work correctly, the implementation requires that the *n_buffers* input
+ * argument is never greater than the buffer pool's *n_buffers_per_slab*. This
+ * is typically the case, with one exception taking place when large number of
+ * buffers are allocated at init time (e.g. for the UMEM fill queue setup).
+ */
+static inline u32
+bcache_cons_check(struct bcache *bc, u32 n_buffers)
+{
+ struct bpool *bp = bc->bp;
+ u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
+ u64 n_buffers_cons = bc->n_buffers_cons;
+ u64 n_slabs_available;
+ u64 *slab_full;
+
+ /*
+ * Consumer slab is not empty: Use what's available locally. Do not
+ * look for more buffers from the pool when the ask can only be
+ * partially satisfied.
+ */
+ if (n_buffers_cons)
+ return (n_buffers_cons < n_buffers) ?
+ n_buffers_cons :
+ n_buffers;
+
+ /*
+ * Consumer slab is empty: look to trade the current consumer slab
+ * (full) for a full slab from the pool, if any is available.
+ */
+ pthread_mutex_lock(&bp->lock);
+ n_slabs_available = bp->n_slabs_available;
+ if (!n_slabs_available) {
+ pthread_mutex_unlock(&bp->lock);
+ return 0;
+ }
+
+ n_slabs_available--;
+ slab_full = bp->slabs[n_slabs_available];
+ bp->slabs[n_slabs_available] = bc->slab_cons;
+ bp->n_slabs_available = n_slabs_available;
+ pthread_mutex_unlock(&bp->lock);
+
+ bc->slab_cons = slab_full;
+ bc->n_buffers_cons = n_buffers_per_slab;
+ return n_buffers;
+}
+
+static inline u64
+bcache_cons(struct bcache *bc)
+{
+ u64 n_buffers_cons = bc->n_buffers_cons - 1;
+ u64 buffer;
+
+ buffer = bc->slab_cons[n_buffers_cons];
+ bc->n_buffers_cons = n_buffers_cons;
+ return buffer;
+}
+
+static inline void
+bcache_prod(struct bcache *bc, u64 buffer)
+{
+ struct bpool *bp = bc->bp;
+ u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
+ u64 n_buffers_prod = bc->n_buffers_prod;
+ u64 n_slabs_available;
+ u64 *slab_empty;
+
+ /*
+ * Producer slab is not yet full: store the current buffer to it.
+ */
+ if (n_buffers_prod < n_buffers_per_slab) {
+ bc->slab_prod[n_buffers_prod] = buffer;
+ bc->n_buffers_prod = n_buffers_prod + 1;
+ return;
+ }
+
+ /*
+ * Producer slab is full: trade the cache's current producer slab
+ * (full) for an empty slab from the pool, then store the current
+ * buffer to the new producer slab. As one full slab exists in the
+ * cache, it is guaranteed that there is at least one empty slab
+ * available in the pool.
+ */
+ pthread_mutex_lock(&bp->lock);
+ n_slabs_available = bp->n_slabs_available;
+ slab_empty = bp->slabs[n_slabs_available];
+ bp->slabs[n_slabs_available] = bc->slab_prod;
+ bp->n_slabs_available = n_slabs_available + 1;
+ pthread_mutex_unlock(&bp->lock);
+
+ slab_empty[0] = buffer;
+ bc->slab_prod = slab_empty;
+ bc->n_buffers_prod = 1;
+}
+
+/*
+ * Port
+ *
+ * Each of the forwarding ports sits on top of an AF_XDP socket. In order for
+ * packet forwarding to happen with no packet buffer copy, all the sockets need
+ * to share the same UMEM area, which is used as the buffer pool memory.
+ */
+#ifndef MAX_BURST_RX
+#define MAX_BURST_RX 64
+#endif
+
+#ifndef MAX_BURST_TX
+#define MAX_BURST_TX 64
+#endif
+
+struct burst_rx {
+ u64 addr[MAX_BURST_RX];
+ u32 len[MAX_BURST_RX];
+};
+
+struct burst_tx {
+ u64 addr[MAX_BURST_TX];
+ u32 len[MAX_BURST_TX];
+ u32 n_pkts;
+};
+
+struct port_params {
+ struct xsk_socket_config xsk_cfg;
+ struct bpool *bp;
+ const char *iface;
+ u32 iface_queue;
+};
+
+struct port {
+ struct port_params params;
+
+ struct bcache *bc;
+
+ struct xsk_ring_cons rxq;
+ struct xsk_ring_prod txq;
+ struct xsk_ring_prod umem_fq;
+ struct xsk_ring_cons umem_cq;
+ struct xsk_socket *xsk;
+ int umem_fq_initialized;
+
+ u64 n_pkts_rx;
+ u64 n_pkts_tx;
+};
+
+static void
+port_free(struct port *p)
+{
+ if (!p)
+ return;
+
+ /* To keep this example simple, the code to free the buffers from the
+ * socket's receive and transmit queues, as well as from the UMEM fill
+ * and completion queues, is not included.
+ */
+
+ if (p->xsk)
+ xsk_socket__delete(p->xsk);
+
+ bcache_free(p->bc);
+
+ free(p);
+}
+
+static struct port *
+port_init(struct port_params *params)
+{
+ struct port *p;
+ u32 umem_fq_size, pos = 0;
+ int status, i;
+
+ /* Memory allocation and initialization. */
+ p = calloc(sizeof(struct port), 1);
+ if (!p)
+ return NULL;
+
+ memcpy(&p->params, params, sizeof(p->params));
+ umem_fq_size = params->bp->umem_cfg.fill_size;
+
+ /* bcache. */
+ p->bc = bcache_init(params->bp);
+ if (!p->bc ||
+ (bcache_slab_size(p->bc) < umem_fq_size) ||
+ (bcache_cons_check(p->bc, umem_fq_size) < umem_fq_size)) {
+ port_free(p);
+ return NULL;
+ }
+
+ /* xsk socket. */
+ status = xsk_socket__create_shared(&p->xsk,
+ params->iface,
+ params->iface_queue,
+ params->bp->umem,
+ &p->rxq,
+ &p->txq,
+ &p->umem_fq,
+ &p->umem_cq,
+ ¶ms->xsk_cfg);
+ if (status) {
+ port_free(p);
+ return NULL;
+ }
+
+ /* umem fq. */
+ xsk_ring_prod__reserve(&p->umem_fq, umem_fq_size, &pos);
+
+ for (i = 0; i < umem_fq_size; i++)
+ *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
+ bcache_cons(p->bc);
+
+ xsk_ring_prod__submit(&p->umem_fq, umem_fq_size);
+ p->umem_fq_initialized = 1;
+
+ return p;
+}
+
+static inline u32
+port_rx_burst(struct port *p, struct burst_rx *b)
+{
+ u32 n_pkts, pos, i;
+
+ /* Free buffers for FQ replenish. */
+ n_pkts = ARRAY_SIZE(b->addr);
+
+ n_pkts = bcache_cons_check(p->bc, n_pkts);
+ if (!n_pkts)
+ return 0;
+
+ /* RXQ. */
+ n_pkts = xsk_ring_cons__peek(&p->rxq, n_pkts, &pos);
+ if (!n_pkts) {
+ if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
+ struct pollfd pollfd = {
+ .fd = xsk_socket__fd(p->xsk),
+ .events = POLLIN,
+ };
+
+ poll(&pollfd, 1, 0);
+ }
+ return 0;
+ }
+
+ for (i = 0; i < n_pkts; i++) {
+ b->addr[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->addr;
+ b->len[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->len;
+ }
+
+ xsk_ring_cons__release(&p->rxq, n_pkts);
+ p->n_pkts_rx += n_pkts;
+
+ /* UMEM FQ. */
+ for ( ; ; ) {
+ int status;
+
+ status = xsk_ring_prod__reserve(&p->umem_fq, n_pkts, &pos);
+ if (status == n_pkts)
+ break;
+
+ if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
+ struct pollfd pollfd = {
+ .fd = xsk_socket__fd(p->xsk),
+ .events = POLLIN,
+ };
+
+ poll(&pollfd, 1, 0);
+ }
+ }
+
+ for (i = 0; i < n_pkts; i++)
+ *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
+ bcache_cons(p->bc);
+
+ xsk_ring_prod__submit(&p->umem_fq, n_pkts);
+
+ return n_pkts;
+}
+
+static inline void
+port_tx_burst(struct port *p, struct burst_tx *b)
+{
+ u32 n_pkts, pos, i;
+ int status;
+
+ /* UMEM CQ. */
+ n_pkts = p->params.bp->umem_cfg.comp_size;
+
+ n_pkts = xsk_ring_cons__peek(&p->umem_cq, n_pkts, &pos);
+
+ for (i = 0; i < n_pkts; i++) {
+ u64 addr = *xsk_ring_cons__comp_addr(&p->umem_cq, pos + i);
+
+ bcache_prod(p->bc, addr);
+ }
+
+ xsk_ring_cons__release(&p->umem_cq, n_pkts);
+
+ /* TXQ. */
+ n_pkts = b->n_pkts;
+
+ for ( ; ; ) {
+ status = xsk_ring_prod__reserve(&p->txq, n_pkts, &pos);
+ if (status == n_pkts)
+ break;
+
+ if (xsk_ring_prod__needs_wakeup(&p->txq))
+ sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT,
+ NULL, 0);
+ }
+
+ for (i = 0; i < n_pkts; i++) {
+ xsk_ring_prod__tx_desc(&p->txq, pos + i)->addr = b->addr[i];
+ xsk_ring_prod__tx_desc(&p->txq, pos + i)->len = b->len[i];
+ }
+
+ xsk_ring_prod__submit(&p->txq, n_pkts);
+ if (xsk_ring_prod__needs_wakeup(&p->txq))
+ sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ p->n_pkts_tx += n_pkts;
+}
+
+/*
+ * Thread
+ *
+ * Packet forwarding threads.
+ */
+#ifndef MAX_PORTS_PER_THREAD
+#define MAX_PORTS_PER_THREAD 16
+#endif
+
+struct thread_data {
+ struct port *ports_rx[MAX_PORTS_PER_THREAD];
+ struct port *ports_tx[MAX_PORTS_PER_THREAD];
+ u32 n_ports_rx;
+ struct burst_rx burst_rx;
+ struct burst_tx burst_tx[MAX_PORTS_PER_THREAD];
+ u32 cpu_core_id;
+ int quit;
+};
+
+static void swap_mac_addresses(void *data)
+{
+ struct ether_header *eth = (struct ether_header *)data;
+ struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost;
+ struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost;
+ struct ether_addr tmp;
+
+ tmp = *src_addr;
+ *src_addr = *dst_addr;
+ *dst_addr = tmp;
+}
+
+static void *
+thread_func(void *arg)
+{
+ struct thread_data *t = arg;
+ cpu_set_t cpu_cores;
+ u32 i;
+
+ CPU_ZERO(&cpu_cores);
+ CPU_SET(t->cpu_core_id, &cpu_cores);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores);
+
+ for (i = 0; !t->quit; i = (i + 1) & (t->n_ports_rx - 1)) {
+ struct port *port_rx = t->ports_rx[i];
+ struct port *port_tx = t->ports_tx[i];
+ struct burst_rx *brx = &t->burst_rx;
+ struct burst_tx *btx = &t->burst_tx[i];
+ u32 n_pkts, j;
+
+ /* RX. */
+ n_pkts = port_rx_burst(port_rx, brx);
+ if (!n_pkts)
+ continue;
+
+ /* Process & TX. */
+ for (j = 0; j < n_pkts; j++) {
+ u64 addr = xsk_umem__add_offset_to_addr(brx->addr[j]);
+ u8 *pkt = xsk_umem__get_data(port_rx->params.bp->addr,
+ addr);
+
+ swap_mac_addresses(pkt);
+
+ btx->addr[btx->n_pkts] = brx->addr[j];
+ btx->len[btx->n_pkts] = brx->len[j];
+ btx->n_pkts++;
+
+ if (btx->n_pkts == MAX_BURST_TX) {
+ port_tx_burst(port_tx, btx);
+ btx->n_pkts = 0;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Process
+ */
+static const struct bpool_params bpool_params_default = {
+ .n_buffers = 64 * 1024,
+ .buffer_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .mmap_flags = 0,
+
+ .n_users_max = 16,
+ .n_buffers_per_slab = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
+};
+
+static const struct xsk_umem_config umem_cfg_default = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+ .flags = 0,
+};
+
+static const struct port_params port_params_default = {
+ .xsk_cfg = {
+ .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .libbpf_flags = 0,
+ .xdp_flags = XDP_FLAGS_DRV_MODE,
+ .bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY,
+ },
+
+ .bp = NULL,
+ .iface = NULL,
+ .iface_queue = 0,
+};
+
+#ifndef MAX_PORTS
+#define MAX_PORTS 64
+#endif
+
+#ifndef MAX_THREADS
+#define MAX_THREADS 64
+#endif
+
+static struct bpool_params bpool_params;
+static struct xsk_umem_config umem_cfg;
+static struct bpool *bp;
+
+static struct port_params port_params[MAX_PORTS];
+static struct port *ports[MAX_PORTS];
+static u64 n_pkts_rx[MAX_PORTS];
+static u64 n_pkts_tx[MAX_PORTS];
+static int n_ports;
+
+static pthread_t threads[MAX_THREADS];
+static struct thread_data thread_data[MAX_THREADS];
+static int n_threads;
+
+static void
+print_usage(char *prog_name)
+{
+ const char *usage =
+ "Usage:\n"
+ "\t%s [ -b SIZE ] -c CORE -i INTERFACE [ -q QUEUE ]\n"
+ "\n"
+ "-c CORE CPU core to run a packet forwarding thread\n"
+ " on. May be invoked multiple times.\n"
+ "\n"
+ "-b SIZE Number of buffers in the buffer pool shared\n"
+ " by all the forwarding threads. Default: %u.\n"
+ "\n"
+ "-i INTERFACE Network interface. Each (INTERFACE, QUEUE)\n"
+ " pair specifies one forwarding port. May be\n"
+ " invoked multiple times.\n"
+ "\n"
+ "-q QUEUE Network interface queue for RX and TX. Each\n"
+ " (INTERFACE, QUEUE) pair specified one\n"
+ " forwarding port. Default: %u. May be invoked\n"
+ " multiple times.\n"
+ "\n";
+ printf(usage,
+ prog_name,
+ bpool_params_default.n_buffers,
+ port_params_default.iface_queue);
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+ struct option lgopts[] = {
+ { NULL, 0, 0, 0 }
+ };
+ int opt, option_index;
+
+ /* Parse the input arguments. */
+ for ( ; ;) {
+ opt = getopt_long(argc, argv, "c:i:q:", lgopts, &option_index);
+ if (opt == EOF)
+ break;
+
+ switch (opt) {
+ case 'b':
+ bpool_params.n_buffers = atoi(optarg);
+ break;
+
+ case 'c':
+ if (n_threads == MAX_THREADS) {
+ printf("Max number of threads (%d) reached.\n",
+ MAX_THREADS);
+ return -1;
+ }
+
+ thread_data[n_threads].cpu_core_id = atoi(optarg);
+ n_threads++;
+ break;
+
+ case 'i':
+ if (n_ports == MAX_PORTS) {
+ printf("Max number of ports (%d) reached.\n",
+ MAX_PORTS);
+ return -1;
+ }
+
+ port_params[n_ports].iface = optarg;
+ port_params[n_ports].iface_queue = 0;
+ n_ports++;
+ break;
+
+ case 'q':
+ if (n_ports == 0) {
+ printf("No port specified for queue.\n");
+ return -1;
+ }
+ port_params[n_ports - 1].iface_queue = atoi(optarg);
+ break;
+
+ default:
+ printf("Illegal argument.\n");
+ return -1;
+ }
+ }
+
+ optind = 1; /* reset getopt lib */
+
+ /* Check the input arguments. */
+ if (!n_ports) {
+ printf("No ports specified.\n");
+ return -1;
+ }
+
+ if (!n_threads) {
+ printf("No threads specified.\n");
+ return -1;
+ }
+
+ if (n_ports % n_threads) {
+ printf("Ports cannot be evenly distributed to threads.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+print_port(u32 port_id)
+{
+ struct port *port = ports[port_id];
+
+ printf("Port %u: interface = %s, queue = %u\n",
+ port_id, port->params.iface, port->params.iface_queue);
+}
+
+static void
+print_thread(u32 thread_id)
+{
+ struct thread_data *t = &thread_data[thread_id];
+ u32 i;
+
+ printf("Thread %u (CPU core %u): ",
+ thread_id, t->cpu_core_id);
+
+ for (i = 0; i < t->n_ports_rx; i++) {
+ struct port *port_rx = t->ports_rx[i];
+ struct port *port_tx = t->ports_tx[i];
+
+ printf("(%s, %u) -> (%s, %u), ",
+ port_rx->params.iface,
+ port_rx->params.iface_queue,
+ port_tx->params.iface,
+ port_tx->params.iface_queue);
+ }
+
+ printf("\n");
+}
+
+static void
+print_port_stats_separator(void)
+{
+ printf("+-%4s-+-%12s-+-%13s-+-%12s-+-%13s-+\n",
+ "----",
+ "------------",
+ "-------------",
+ "------------",
+ "-------------");
+}
+
+static void
+print_port_stats_header(void)
+{
+ print_port_stats_separator();
+ printf("| %4s | %12s | %13s | %12s | %13s |\n",
+ "Port",
+ "RX packets",
+ "RX rate (pps)",
+ "TX packets",
+ "TX_rate (pps)");
+ print_port_stats_separator();
+}
+
+static void
+print_port_stats_trailer(void)
+{
+ print_port_stats_separator();
+ printf("\n");
+}
+
+static void
+print_port_stats(int port_id, u64 ns_diff)
+{
+ struct port *p = ports[port_id];
+ double rx_pps, tx_pps;
+
+ rx_pps = (p->n_pkts_rx - n_pkts_rx[port_id]) * 1000000000. / ns_diff;
+ tx_pps = (p->n_pkts_tx - n_pkts_tx[port_id]) * 1000000000. / ns_diff;
+
+ printf("| %4d | %12llu | %13.0f | %12llu | %13.0f |\n",
+ port_id,
+ p->n_pkts_rx,
+ rx_pps,
+ p->n_pkts_tx,
+ tx_pps);
+
+ n_pkts_rx[port_id] = p->n_pkts_rx;
+ n_pkts_tx[port_id] = p->n_pkts_tx;
+}
+
+static void
+print_port_stats_all(u64 ns_diff)
+{
+ int i;
+
+ print_port_stats_header();
+ for (i = 0; i < n_ports; i++)
+ print_port_stats(i, ns_diff);
+ print_port_stats_trailer();
+}
+
+static int quit;
+
+static void
+signal_handler(int sig)
+{
+ quit = 1;
+}
+
+static void remove_xdp_program(void)
+{
+ int i;
+
+ for (i = 0 ; i < n_ports; i++)
+ bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
+ port_params[i].xsk_cfg.xdp_flags);
+}
+
+int main(int argc, char **argv)
+{
+ struct timespec time;
+ u64 ns0;
+ int i;
+
+ /* Parse args. */
+ memcpy(&bpool_params, &bpool_params_default,
+ sizeof(struct bpool_params));
+ memcpy(&umem_cfg, &umem_cfg_default,
+ sizeof(struct xsk_umem_config));
+ for (i = 0; i < MAX_PORTS; i++)
+ memcpy(&port_params[i], &port_params_default,
+ sizeof(struct port_params));
+
+ if (parse_args(argc, argv)) {
+ print_usage(argv[0]);
+ return -1;
+ }
+
+ /* Buffer pool initialization. */
+ bp = bpool_init(&bpool_params, &umem_cfg);
+ if (!bp) {
+ printf("Buffer pool initialization failed.\n");
+ return -1;
+ }
+ printf("Buffer pool created successfully.\n");
+
+ /* Ports initialization. */
+ for (i = 0; i < MAX_PORTS; i++)
+ port_params[i].bp = bp;
+
+ for (i = 0; i < n_ports; i++) {
+ ports[i] = port_init(&port_params[i]);
+ if (!ports[i]) {
+ printf("Port %d initialization failed.\n", i);
+ return -1;
+ }
+ print_port(i);
+ }
+ printf("All ports created successfully.\n");
+
+ /* Threads. */
+ for (i = 0; i < n_threads; i++) {
+ struct thread_data *t = &thread_data[i];
+ u32 n_ports_per_thread = n_ports / n_threads, j;
+
+ for (j = 0; j < n_ports_per_thread; j++) {
+ t->ports_rx[j] = ports[i * n_ports_per_thread + j];
+ t->ports_tx[j] = ports[i * n_ports_per_thread +
+ (j + 1) % n_ports_per_thread];
+ }
+
+ t->n_ports_rx = n_ports_per_thread;
+
+ print_thread(i);
+ }
+
+ for (i = 0; i < n_threads; i++) {
+ int status;
+
+ status = pthread_create(&threads[i],
+ NULL,
+ thread_func,
+ &thread_data[i]);
+ if (status) {
+ printf("Thread %d creation failed.\n", i);
+ return -1;
+ }
+ }
+ printf("All threads created successfully.\n");
+
+ /* Print statistics. */
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+ signal(SIGABRT, signal_handler);
+
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ ns0 = time.tv_sec * 1000000000UL + time.tv_nsec;
+ for ( ; !quit; ) {
+ u64 ns1, ns_diff;
+
+ sleep(1);
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ ns1 = time.tv_sec * 1000000000UL + time.tv_nsec;
+ ns_diff = ns1 - ns0;
+ ns0 = ns1;
+
+ print_port_stats_all(ns_diff);
+ }
+
+ /* Threads completion. */
+ printf("Quit.\n");
+ for (i = 0; i < n_threads; i++)
+ thread_data[i].quit = 1;
+
+ for (i = 0; i < n_threads; i++)
+ pthread_join(threads[i], NULL);
+
+ for (i = 0; i < n_ports; i++)
+ port_free(ports[i]);
+
+ bpool_free(bp);
+
+ remove_xdp_program();
+
+ return 0;
+}