Update Linux to v5.4.2 Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd

commit: 0f672f6c0b52b7b0700b0915c72b540721af4465 [log] [tgz]
author: David Brazdil <dbrazdil@google.com> Tue Dec 10 10:32:29 2019 +0000
committer: David Brazdil <dbrazdil@google.com> Tue Dec 10 19:03:18 2019 +0000
tree: 85c8cba019caa205e4f8920d72d93f6d6deaf29c
parent: 3a0ad55d848b50499b68d7141d4eca997fce28ef [diff]
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 8ae4940..74d31fd 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore

@@ -1,7 +1,8 @@
 cpustat
 fds_example
+hbm
+ibumad
 lathist
-load_sock_ops
 lwt_len_hist
 map_perf_test
 offwaketime
@@ -45,5 +46,6 @@
 xdp_redirect_map
 xdp_router_ipv4
 xdp_rxq_info
+xdp_sample_pkts
 xdp_tx_iptunnel
 xdpsock

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 36f9f41..42b571c 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile

@@ -26,7 +26,6 @@
 hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += test_cgrp2_attach
-hostprogs-y += test_cgrp2_attach2
 hostprogs-y += test_cgrp2_sock
 hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
@@ -40,7 +39,6 @@
 hostprogs-y += xdp_tx_iptunnel
 hostprogs-y += test_map_in_map
 hostprogs-y += per_socket_stats_example
-hostprogs-y += load_sock_ops
 hostprogs-y += xdp_redirect
 hostprogs-y += xdp_redirect_map
 hostprogs-y += xdp_redirect_cpu
@@ -53,6 +51,8 @@
 hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
 hostprogs-y += xdp_sample_pkts
+hostprogs-y += ibumad
+hostprogs-y += hbm
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -60,9 +60,9 @@
 CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
 TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
 
-fds_example-objs := bpf_load.o fds_example.o
-sockex1-objs := bpf_load.o sockex1_user.o
-sockex2-objs := bpf_load.o sockex2_user.o
+fds_example-objs := fds_example.o
+sockex1-objs := sockex1_user.o
+sockex2-objs := sockex2_user.o
 sockex3-objs := bpf_load.o sockex3_user.o
 tracex1-objs := bpf_load.o tracex1_user.o
 tracex2-objs := bpf_load.o tracex2_user.o
@@ -71,7 +71,6 @@
 tracex5-objs := bpf_load.o tracex5_user.o
 tracex6-objs := bpf_load.o tracex6_user.o
 tracex7-objs := bpf_load.o tracex7_user.o
-load_sock_ops-objs := bpf_load.o load_sock_ops.o
 test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
 trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
 lathist-objs := bpf_load.o lathist_user.o
@@ -81,24 +80,23 @@
 test_overhead-objs := bpf_load.o test_overhead_user.o
 test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
 test_cgrp2_attach-objs := test_cgrp2_attach.o
-test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
 test_cgrp2_sock-objs := test_cgrp2_sock.o
 test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
 xdp1-objs := xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := xdp1_user.o
-xdp_router_ipv4-objs := bpf_load.o xdp_router_ipv4_user.o
+xdp_router_ipv4-objs := xdp_router_ipv4_user.o
 test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \
 				       test_current_task_under_cgroup_user.o
 trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS)
 sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS)
 tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o
 lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o
-xdp_tx_iptunnel-objs := bpf_load.o xdp_tx_iptunnel_user.o
+xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
 test_map_in_map-objs := bpf_load.o test_map_in_map_user.o
 per_socket_stats_example-objs := cookie_uid_helper_example.o
-xdp_redirect-objs := bpf_load.o xdp_redirect_user.o
-xdp_redirect_map-objs := bpf_load.o xdp_redirect_map_user.o
+xdp_redirect-objs := xdp_redirect_user.o
+xdp_redirect_map-objs := xdp_redirect_map_user.o
 xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
 xdp_rxq_info-objs := xdp_rxq_info_user.o
@@ -109,6 +107,8 @@
 xdp_fwd-objs := xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
 xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS)
+hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -153,6 +153,8 @@
 always += tcp_iw_kern.o
 always += tcp_clamp_kern.o
 always += tcp_basertt_kern.o
+always += tcp_tos_reflect_kern.o
+always += tcp_dumpstats_kern.o
 always += xdp_redirect_kern.o
 always += xdp_redirect_map_kern.o
 always += xdp_redirect_cpu_kern.o
@@ -162,27 +164,21 @@
 always += syscall_tp_kern.o
 always += cpustat_kern.o
 always += xdp_adjust_tail_kern.o
-always += xdpsock_kern.o
 always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
 always += xdp_sample_pkts_kern.o
+always += ibumad_kern.o
+always += hbm_out_kern.o
+always += hbm_edt_kern.o
 
 KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
-KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/
+KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf
+KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0
 
 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
-HOSTCFLAGS_trace_helpers.o += -I$(srctree)/tools/lib/bpf/
-
-HOSTCFLAGS_trace_output_user.o += -I$(srctree)/tools/lib/bpf/
-HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
-HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
-HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
-HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
-HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
-HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
 
 KBUILD_HOSTLDLIBS		+= $(LIBBPF) -lelf
 HOSTLDLIBS_tracex4		+= -lrt
@@ -204,15 +200,35 @@
 CLANG_ARCH_ARGS = -target $(ARCH)
 endif
 
+# Don't evaluate probes and warnings if we need to run make recursively
+ifneq ($(src),)
+HDR_PROBE := $(shell echo "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
+	$(HOSTCC) $(KBUILD_HOSTCFLAGS) -x c - -o /dev/null 2>/dev/null && \
+	echo okay)
+
+ifeq ($(HDR_PROBE),)
+$(warning WARNING: Detected possible issues with include path.)
+$(warning WARNING: Please install kernel headers locally (make headers_install).)
+endif
+
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+			  $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+			  readelf -S ./llvm_btf_verify.o | grep BTF; \
+			  /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+	EXTRA_CFLAGS += -g
+else
 ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
 	EXTRA_CFLAGS += -g
 	LLC_FLAGS += -mattr=dwarfris
 	DWARF2BTF = y
 endif
+endif
+endif
 
 # Trick to allow make to be run from this directory
 all:
@@ -226,12 +242,10 @@
 # Fix up variables inherited from Kbuild that tools/ build system won't like
 	$(MAKE) -C $(dir $@) RM='rm -rf' LDFLAGS= srctree=$(BPF_SAMPLES_PATH)/../../ O=
 
-$(obj)/syscall_nrs.s:	$(src)/syscall_nrs.c
-	$(call if_changed_dep,cc_s_c)
-
 $(obj)/syscall_nrs.h:	$(obj)/syscall_nrs.s FORCE
 	$(call filechk,offsets,__SYSCALL_NRS_H__)
 
+targets += syscall_nrs.s
 clean-files += syscall_nrs.h
 
 FORCE:
@@ -259,6 +273,9 @@
 $(src)/*.c: verify_target_bpf $(LIBBPF)
 
 $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
+$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
+$(obj)/hbm.o: $(src)/hbm.h
+$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
 
 # asm/sysreg.h - inline assembly used by it is incompatible with llvm.
 # But, there is no easy way to fix it, so just exclude it since it is
@@ -268,10 +285,11 @@
 	$(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
 		-I$(srctree)/tools/testing/selftests/bpf/ \
 		-D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
-		-D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \
+		-D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
 		-Wno-gnu-variable-sized-type-not-at-end \
 		-Wno-address-of-packed-member -Wno-tautological-compare \
 		-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
+		-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
 		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
 ifeq ($(DWARF2BTF),y)
 	$(BTF_PAHOLE) -J $@

diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
new file mode 100644
index 0000000..7048bb3
--- /dev/null
+++ b/samples/bpf/asm_goto_workaround.h

@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Facebook */
+#ifndef __ASM_GOTO_WORKAROUND_H
+#define __ASM_GOTO_WORKAROUND_H
+
+/*
+ * This will bring in asm_volatile_goto and asm_inline macro definitions
+ * if enabled by compiler and config options.
+ */
+#include <linux/types.h>
+
+#ifdef asm_volatile_goto
+#undef asm_volatile_goto
+#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
+#endif
+
+/*
+ * asm_inline is defined as asm __inline in "include/linux/compiler_types.h"
+ * if supported by the kernel's CC (i.e CONFIG_CC_HAS_ASM_INLINE) which is not
+ * supported by CLANG.
+ */
+#ifdef asm_inline
+#undef asm_inline
+#define asm_inline asm
+#endif
+
+#define volatile(x...) volatile("")
+#endif

diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h
index 20dc5ce..5442379 100644
--- a/samples/bpf/bpf_insn.h
+++ b/samples/bpf/bpf_insn.h

@@ -164,6 +164,16 @@
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
 
 #define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
@@ -174,6 +184,16 @@
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
 /* Raw code statement block */
 
 #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 904e775..4574b19 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c

@@ -16,7 +16,6 @@
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/types.h>
-#include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
@@ -41,7 +40,7 @@
 int prog_array_fd = -1;
 
 struct bpf_map_data map_data[MAX_MAPS];
-int map_data_count = 0;
+int map_data_count;
 
 static int populate_prog_array(const char *event, int prog_fd)
 {
@@ -55,6 +54,25 @@
 	return 0;
 }
 
+static int write_kprobe_events(const char *val)
+{
+	int fd, ret, flags;
+
+	if (val == NULL)
+		return -1;
+	else if (val[0] == '\0')
+		flags = O_WRONLY | O_TRUNC;
+	else
+		flags = O_WRONLY | O_APPEND;
+
+	fd = open(DEBUGFS "kprobe_events", flags);
+
+	ret = write(fd, val, strlen(val));
+	close(fd);
+
+	return ret;
+}
+
 static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 {
 	bool is_socket = strncmp(event, "socket", 6) == 0;
@@ -166,10 +184,9 @@
 
 #ifdef __x86_64__
 		if (strncmp(event, "sys_", 4) == 0) {
-			snprintf(buf, sizeof(buf),
-				 "echo '%c:__x64_%s __x64_%s' >> /sys/kernel/debug/tracing/kprobe_events",
-				 is_kprobe ? 'p' : 'r', event, event);
-			err = system(buf);
+			snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s",
+				is_kprobe ? 'p' : 'r', event, event);
+			err = write_kprobe_events(buf);
 			if (err >= 0) {
 				need_normal_check = false;
 				event_prefix = "__x64_";
@@ -177,10 +194,9 @@
 		}
 #endif
 		if (need_normal_check) {
-			snprintf(buf, sizeof(buf),
-				 "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
-				 is_kprobe ? 'p' : 'r', event, event);
-			err = system(buf);
+			snprintf(buf, sizeof(buf), "%c:%s %s",
+				is_kprobe ? 'p' : 'r', event, event);
+			err = write_kprobe_events(buf);
 			if (err < 0) {
 				printf("failed to create kprobe '%s' error '%s'\n",
 				       event, strerror(errno));
@@ -285,8 +301,8 @@
 							numa_node);
 		}
 		if (map_fd[i] < 0) {
-			printf("failed to create a map: %d %s\n",
-			       errno, strerror(errno));
+			printf("failed to create map %d (%s): %d %s\n",
+			       i, maps[i].name, errno, strerror(errno));
 			return 1;
 		}
 		maps[i].fd = map_fd[i];
@@ -474,8 +490,8 @@
 
 		/* Verify no newer features were requested */
 		if (validate_zero) {
-			addr = (unsigned char*) def + map_sz_copy;
-			end  = (unsigned char*) def + map_sz_elf;
+			addr = (unsigned char *) def + map_sz_copy;
+			end  = (unsigned char *) def + map_sz_elf;
 			for (; addr < end; addr++) {
 				if (*addr != 0) {
 					free(sym);
@@ -520,7 +536,7 @@
 		return 1;
 
 	/* clear all kprobes */
-	i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+	i = write_kprobe_events("");
 
 	/* scan over all elf sections to get license and map info */
 	for (i = 1; i < ehdr.e_shnum; i++) {
@@ -662,7 +678,7 @@
 		static char buf[4096];
 		ssize_t sz;
 
-		sz = read(trace_fd, buf, sizeof(buf));
+		sz = read(trace_fd, buf, sizeof(buf) - 1);
 		if (sz > 0) {
 			buf[sz] = 0;
 			puts(buf);

diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
new file mode 100755
index 0000000..ffe4c06
--- /dev/null
+++ b/samples/bpf/do_hbm_test.sh

@@ -0,0 +1,442 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 Facebook
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of version 2 of the GNU General Public
+# License as published by the Free Software Foundation.
+
+Usage() {
+  echo "Script for testing HBM (Host Bandwidth Manager) framework."
+  echo "It creates a cgroup to use for testing and load a BPF program to limit"
+  echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
+  echo "loads. The output is the goodput in Mbps (unless -D was used)."
+  echo ""
+  echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
+  echo "             [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
+  echo "             [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
+  echo "             [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
+  echo "             [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
+  echo "             [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
+  echo "  Where:"
+  echo "    out               egress (default)"
+  echo "    -b or --bpf       BPF program filename to load and attach."
+  echo "                      Default is hbm_out_kern.o for egress,"
+  echo "    -c or -cc         TCP congestion control (cubic or dctcp)"
+  echo "    --debug           print BPF trace buffer"
+  echo "    -d or --delay     add a delay in ms using netem"
+  echo "    -D                In addition to the goodput in Mbps, it also outputs"
+  echo "                      other detailed information. This information is"
+  echo "                      test dependent (i.e. iperf3 or netperf)."
+  echo "    -E                enable ECN (not required for dctcp)"
+  echo "    --edt             use fq's Earliest Departure Time (requires fq)"
+  echo "    -f or --flows     number of concurrent flows (default=1)"
+  echo "    -i or --id        cgroup id (an integer, default is 1)"
+  echo "    -N                use netperf instead of iperf3"
+  echo "    --no_cn           Do not return CN notifications"
+  echo "    -l                do not limit flows using loopback"
+  echo "    -h                Help"
+  echo "    -p or --port      iperf3 port (default is 5201)"
+  echo "    -P                use an iperf3 instance for each flow"
+  echo "    -q                use the specified qdisc"
+  echo "    -r or --rate      rate in Mbps (default 1s 1Gbps)"
+  echo "    -R                Use TCP_RR for netperf. 1st flow has req"
+  echo "                      size of 10KB, rest of 1MB. Reply in all"
+  echo "                      cases is 1 byte."
+  echo "                      More detailed output for each flow can be found"
+  echo "                      in the files netperf.<cg>.<flow>, where <cg> is the"
+  echo "                      cgroup id as specified with the -i flag, and <flow>"
+  echo "                      is the flow id starting at 1 and increasing by 1 for"
+  echo "                      flow (as specified by -f)."
+  echo "    -s or --server    hostname of netperf server. Used to create netperf"
+  echo "                      test traffic between to hosts (default is within host)"
+  echo "                      netserver must be running on the host."
+  echo "    -S or --stats     whether to update hbm stats (default is yes)."
+  echo "    -t or --time      duration of iperf3 in seconds (default=5)"
+  echo "    -w                Work conserving flag. cgroup can increase its"
+  echo "                      bandwidth beyond the rate limit specified"
+  echo "                      while there is available bandwidth. Current"
+  echo "                      implementation assumes there is only one NIC"
+  echo "                      (eth0), but can be extended to support multiple"
+  echo "                       NICs."
+  echo "    cubic or dctcp    specify which TCP CC to use"
+  echo " "
+  exit
+}
+
+#set -x
+
+debug_flag=0
+args="$@"
+name="$0"
+netem=0
+cc=x
+dir="-o"
+dir_name="out"
+dur=5
+flows=1
+id=1
+prog=""
+port=5201
+rate=1000
+multi_iperf=0
+flow_cnt=1
+use_netperf=0
+rr=0
+ecn=0
+details=0
+server=""
+qdisc=""
+flags=""
+do_stats=0
+
+function start_hbm () {
+  rm -f hbm.out
+  echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
+  echo " " >> hbm.out
+  ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1  &
+  echo $!
+}
+
+processArgs () {
+  for i in $args ; do
+    case $i in
+    # Support for upcomming ingress rate limiting
+    #in)         # support for upcoming ingress rate limiting
+    #  dir="-i"
+    #  dir_name="in"
+    #  ;;
+    out)
+      dir="-o"
+      dir_name="out"
+      ;;
+    -b=*|--bpf=*)
+      prog="${i#*=}"
+      ;;
+    -c=*|--cc=*)
+      cc="${i#*=}"
+      ;;
+    --no_cn)
+      flags="$flags --no_cn"
+      ;;
+    --debug)
+      flags="$flags -d"
+      debug_flag=1
+      ;;
+    -d=*|--delay=*)
+      netem="${i#*=}"
+      ;;
+    -D)
+      details=1
+      ;;
+    -E)
+      ecn=1
+      ;;
+    --edt)
+      flags="$flags --edt"
+      qdisc="fq"
+     ;;
+    -f=*|--flows=*)
+      flows="${i#*=}"
+      ;;
+    -i=*|--id=*)
+      id="${i#*=}"
+      ;;
+    -l)
+      flags="$flags -l"
+      ;;
+    -N)
+      use_netperf=1
+      ;;
+    -p=*|--port=*)
+      port="${i#*=}"
+      ;;
+    -P)
+      multi_iperf=1
+      ;;
+    -q=*)
+      qdisc="${i#*=}"
+      ;;
+    -r=*|--rate=*)
+      rate="${i#*=}"
+      ;;
+    -R)
+      rr=1
+      ;;
+    -s=*|--server=*)
+      server="${i#*=}"
+      ;;
+    -S|--stats)
+      flags="$flags -s"
+      do_stats=1
+      ;;
+    -t=*|--time=*)
+      dur="${i#*=}"
+      ;;
+    -w)
+      flags="$flags -w"
+      ;;
+    cubic)
+      cc=cubic
+      ;;
+    dctcp)
+      cc=dctcp
+      ;;
+    *)
+      echo "Unknown arg:$i"
+      Usage
+      ;;
+    esac
+  done
+}
+
+processArgs
+
+if [ $debug_flag -eq 1 ] ; then
+  rm -f hbm_out.log
+fi
+
+hbm_pid=$(start_hbm)
+usleep 100000
+
+host=`hostname`
+cg_base_dir=/sys/fs/cgroup
+cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
+
+echo $$ >> $cg_dir/cgroup.procs
+
+ulimit -l unlimited
+
+rm -f ss.out
+rm -f hbm.[0-9]*.$dir_name
+if [ $ecn -ne 0 ] ; then
+  sysctl -w -q -n net.ipv4.tcp_ecn=1
+fi
+
+if [ $use_netperf -eq 0 ] ; then
+  cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
+  if [ "$cc" != "x" ] ; then
+    sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
+  fi
+fi
+
+if [ "$netem" -ne "0" ] ; then
+  if [ "$qdisc" != "" ] ; then
+    echo "WARNING: Ignoring -q options because -d option used"
+  fi
+  tc qdisc del dev lo root > /dev/null 2>&1
+  tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
+elif [ "$qdisc" != "" ] ; then
+  tc qdisc del dev eth0 root > /dev/null 2>&1
+  tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
+fi
+
+n=0
+m=$[$dur * 5]
+hn="::1"
+if [ $use_netperf -ne 0 ] ; then
+  if [ "$server" != "" ] ; then
+    hn=$server
+  fi
+fi
+
+( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
+
+if [ $use_netperf -ne 0 ] ; then
+  begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
+                   awk '{ print $1 }'`
+  if [ "$begNetserverPid" == "" ] ; then
+    if [ "$server" == "" ] ; then
+      ( ./netserver > /dev/null 2>&1) &
+      usleep 100000
+    fi
+  fi
+  flow_cnt=1
+  if [ "$server" == "" ] ; then
+    np_server=$host
+  else
+    np_server=$server
+  fi
+  if [ "$cc" == "x" ] ; then
+    np_cc=""
+  else
+    np_cc="-K $cc,$cc"
+  fi
+  replySize=1
+  while [ $flow_cnt -le $flows ] ; do
+    if [ $rr -ne 0 ] ; then
+      reqSize=1M
+      if [ $flow_cnt -eq 1 ] ; then
+        reqSize=10K
+      fi
+      if [ "$dir" == "-i" ] ; then
+        replySize=$reqSize
+        reqSize=1
+      fi
+      ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR  -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+    else
+      if [ "$dir" == "-i" ] ; then
+        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+      else
+        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+      fi
+    fi
+    flow_cnt=$[flow_cnt+1]
+  done
+
+# sleep for duration of test (plus some buffer)
+  n=$[dur+2]
+  sleep $n
+
+# force graceful termination of netperf
+  pids=`pgrep netperf`
+  for p in $pids ; do
+    kill -SIGALRM $p
+  done
+
+  flow_cnt=1
+  rate=0
+  if [ $details -ne 0 ] ; then
+    echo ""
+    echo "Details for HBM in cgroup $id"
+    if [ $do_stats -eq 1 ] ; then
+      if [ -e hbm.$id.$dir_name ] ; then
+        cat hbm.$id.$dir_name
+      fi
+    fi
+  fi
+  while [ $flow_cnt -le $flows ] ; do
+    if [ "$dir" == "-i" ] ; then
+      r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
+    else
+      r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
+    fi
+    echo "rate for flow $flow_cnt: $r"
+    rate=$[rate+r]
+    if [ $details -ne 0 ] ; then
+      echo "-----"
+      echo "Details for cgroup $id, flow $flow_cnt"
+      cat netperf.$id.$flow_cnt
+    fi
+    flow_cnt=$[flow_cnt+1]
+  done
+  if [ $details -ne 0 ] ; then
+    echo ""
+    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+    echo "PING AVG DELAY:$delay"
+    echo "AGGREGATE_GOODPUT:$rate"
+  else
+    echo $rate
+  fi
+elif [ $multi_iperf -eq 0 ] ; then
+  (iperf3 -s -p $port -1 > /dev/null 2>&1) &
+  usleep 100000
+  iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
+  rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
+  rate=`echo $rates | grep -o "[0-9]*$"`
+
+  if [ $details -ne 0 ] ; then
+    echo ""
+    echo "Details for HBM in cgroup $id"
+    if [ $do_stats -eq 1 ] ; then
+      if [ -e hbm.$id.$dir_name ] ; then
+        cat hbm.$id.$dir_name
+      fi
+    fi
+    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+    echo "PING AVG DELAY:$delay"
+    echo "AGGREGATE_GOODPUT:$rate"
+  else
+    echo $rate
+  fi
+else
+  flow_cnt=1
+  while [ $flow_cnt -le $flows ] ; do
+    (iperf3 -s -p $port -1 > /dev/null 2>&1) &
+    ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
+    port=$[port+1]
+    flow_cnt=$[flow_cnt+1]
+  done
+  n=$[dur+1]
+  sleep $n
+  flow_cnt=1
+  rate=0
+  if [ $details -ne 0 ] ; then
+    echo ""
+    echo "Details for HBM in cgroup $id"
+    if [ $do_stats -eq 1 ] ; then
+      if [ -e hbm.$id.$dir_name ] ; then
+        cat hbm.$id.$dir_name
+      fi
+    fi
+  fi
+
+  while [ $flow_cnt -le $flows ] ; do
+    r=`cat iperf3.$id.$flow_cnt`
+#    echo "rate for flow $flow_cnt: $r"
+  if [ $details -ne 0 ] ; then
+    echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
+  fi
+    rate=$[rate+r]
+    flow_cnt=$[flow_cnt+1]
+  done
+  if [ $details -ne 0 ] ; then
+    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+    echo "PING AVG DELAY:$delay"
+    echo "AGGREGATE_GOODPUT:$rate"
+  else
+    echo $rate
+  fi
+fi
+
+if [ $use_netperf -eq 0 ] ; then
+  sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
+fi
+if [ $ecn -ne 0 ] ; then
+  sysctl -w -q -n net.ipv4.tcp_ecn=0
+fi
+if [ "$netem" -ne "0" ] ; then
+  tc qdisc del dev lo root > /dev/null 2>&1
+fi
+if [ "$qdisc" != "" ] ; then
+  tc qdisc del dev eth0 root > /dev/null 2>&1
+fi
+sleep 2
+
+hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
+if [ "$hbmPid" == "$hbm_pid" ] ; then
+  kill $hbm_pid
+fi
+
+sleep 1
+
+# Detach any BPF programs that may have lingered
+ttx=`bpftool cgroup tree | grep hbm`
+v=2
+for x in $ttx ; do
+    if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
+	cg=$x ; v=0
+    else
+	if [ $v -eq 0 ] ; then
+	    id=$x ; v=1
+	else
+	    if [ $v -eq 1 ] ; then
+		type=$x ; bpftool cgroup detach $cg $type id $id
+		v=0
+	    fi
+	fi
+    fi
+done
+
+if [ $use_netperf -ne 0 ] ; then
+  if [ "$server" == "" ] ; then
+    if [ "$begNetserverPid" == "" ] ; then
+      netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
+      if [ "$netserverPid" != "" ] ; then
+        kill $netserverPid
+      fi
+    fi
+  fi
+fi
+exit

diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index 9854854..2d4b717 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c

@@ -14,8 +14,8 @@
 
 #include <bpf/bpf.h>
 
+#include "libbpf.h"
 #include "bpf_insn.h"
-#include "bpf_load.h"
 #include "sock_example.h"
 
 #define BPF_F_PIN	(1 << 0)
@@ -57,10 +57,14 @@
 		BPF_EXIT_INSN(),
 	};
 	size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
+	char bpf_log_buf[BPF_LOG_BUF_SIZE];
+	struct bpf_object *obj;
+	int prog_fd;
 
 	if (object) {
-		assert(!load_bpf_file((char *)object));
-		return prog_fd[0];
+		assert(!bpf_prog_load(object, BPF_PROG_TYPE_UNSPEC,
+				      &obj, &prog_fd));
+		return prog_fd;
 	} else {
 		return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER,
 					insns, insns_cnt, "GPL", 0,

diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
new file mode 100644
index 0000000..e0fbab9
--- /dev/null
+++ b/samples/bpf/hbm.c

@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Example program for Host Bandwidth Managment
+ *
+ * This program loads a cgroup skb BPF program to enforce cgroup output
+ * (egress) or input (ingress) bandwidth limits.
+ *
+ * USAGE: hbm [-d] [-l] [-n <id>] [-r <rate>] [-s] [-t <secs>] [-w] [-h] [prog]
+ *   Where:
+ *    -d	Print BPF trace debug buffer
+ *    -l	Also limit flows doing loopback
+ *    -n <#>	To create cgroup \"/hbm#\" and attach prog
+ *		Default is /hbm1
+ *    --no_cn   Do not return cn notifications
+ *    -r <rate>	Rate limit in Mbps
+ *    -s	Get HBM stats (marked, dropped, etc.)
+ *    -t <time>	Exit after specified seconds (default is 0)
+ *    -w	Work conserving flag. cgroup can increase its bandwidth
+ *		beyond the rate limit specified while there is available
+ *		bandwidth. Current implementation assumes there is only
+ *		NIC (eth0), but can be extended to support multiple NICs.
+ *		Currrently only supported for egress.
+ *    -h	Print this info
+ *    prog	BPF program file name. Name defaults to hbm_out_kern.o
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <getopt.h>
+
+#include "bpf_load.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+#include "hbm.h"
+#include "bpf_util.h"
+#include "bpf.h"
+#include "libbpf.h"
+
+bool outFlag = true;
+int minRate = 1000;		/* cgroup rate limit in Mbps */
+int rate = 1000;		/* can grow if rate conserving is enabled */
+int dur = 1;
+bool stats_flag;
+bool loopback_flag;
+bool debugFlag;
+bool work_conserving_flag;
+bool no_cn_flag;
+bool edt_flag;
+
+static void Usage(void);
+static void read_trace_pipe2(void);
+static void do_error(char *msg, bool errno_flag);
+
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
+struct bpf_object *obj;
+int bpfprog_fd;
+int cgroup_storage_fd;
+
+static void read_trace_pipe2(void)
+{
+	int trace_fd;
+	FILE *outf;
+	char *outFname = "hbm_out.log";
+
+	trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+	if (trace_fd < 0) {
+		printf("Error opening trace_pipe\n");
+		return;
+	}
+
+//	Future support of ingress
+//	if (!outFlag)
+//		outFname = "hbm_in.log";
+	outf = fopen(outFname, "w");
+
+	if (outf == NULL)
+		printf("Error creating %s\n", outFname);
+
+	while (1) {
+		static char buf[4097];
+		ssize_t sz;
+
+		sz = read(trace_fd, buf, sizeof(buf) - 1);
+		if (sz > 0) {
+			buf[sz] = 0;
+			puts(buf);
+			if (outf != NULL) {
+				fprintf(outf, "%s\n", buf);
+				fflush(outf);
+			}
+		}
+	}
+}
+
+static void do_error(char *msg, bool errno_flag)
+{
+	if (errno_flag)
+		printf("ERROR: %s, errno: %d\n", msg, errno);
+	else
+		printf("ERROR: %s\n", msg);
+	exit(1);
+}
+
+static int prog_load(char *prog)
+{
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+		.file = prog,
+		.expected_attach_type = BPF_CGROUP_INET_EGRESS,
+	};
+	int map_fd;
+	struct bpf_map *map;
+
+	int ret = 0;
+
+	if (access(prog, O_RDONLY) < 0) {
+		printf("Error accessing file %s: %s\n", prog, strerror(errno));
+		return 1;
+	}
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd))
+		ret = 1;
+	if (!ret) {
+		map = bpf_object__find_map_by_name(obj, "queue_stats");
+		map_fd = bpf_map__fd(map);
+		if (map_fd < 0) {
+			printf("Map not found: %s\n", strerror(map_fd));
+			ret = 1;
+		}
+	}
+
+	if (ret) {
+		printf("ERROR: load_bpf_file failed for: %s\n", prog);
+		printf("  Output from verifier:\n%s\n------\n", bpf_log_buf);
+		ret = -1;
+	} else {
+		ret = map_fd;
+	}
+
+	return ret;
+}
+
+static int run_bpf_prog(char *prog, int cg_id)
+{
+	int map_fd;
+	int rc = 0;
+	int key = 0;
+	int cg1 = 0;
+	int type = BPF_CGROUP_INET_EGRESS;
+	char cg_dir[100];
+	struct hbm_queue_stats qstats = {0};
+
+	sprintf(cg_dir, "/hbm%d", cg_id);
+	map_fd = prog_load(prog);
+	if (map_fd  == -1)
+		return 1;
+
+	if (setup_cgroup_environment()) {
+		printf("ERROR: setting cgroup environment\n");
+		goto err;
+	}
+	cg1 = create_and_get_cgroup(cg_dir);
+	if (!cg1) {
+		printf("ERROR: create_and_get_cgroup\n");
+		goto err;
+	}
+	if (join_cgroup(cg_dir)) {
+		printf("ERROR: join_cgroup\n");
+		goto err;
+	}
+
+	qstats.rate = rate;
+	qstats.stats = stats_flag ? 1 : 0;
+	qstats.loopback = loopback_flag ? 1 : 0;
+	qstats.no_cn = no_cn_flag ? 1 : 0;
+	if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) {
+		printf("ERROR: Could not update map element\n");
+		goto err;
+	}
+
+	if (!outFlag)
+		type = BPF_CGROUP_INET_INGRESS;
+	if (bpf_prog_attach(bpfprog_fd, cg1, type, 0)) {
+		printf("ERROR: bpf_prog_attach fails!\n");
+		log_err("Attaching prog");
+		goto err;
+	}
+
+	if (work_conserving_flag) {
+		struct timeval t0, t_last, t_new;
+		FILE *fin;
+		unsigned long long last_eth_tx_bytes, new_eth_tx_bytes;
+		signed long long last_cg_tx_bytes, new_cg_tx_bytes;
+		signed long long delta_time, delta_bytes, delta_rate;
+		int delta_ms;
+#define DELTA_RATE_CHECK 10000		/* in us */
+#define RATE_THRESHOLD 9500000000	/* 9.5 Gbps */
+
+		bpf_map_lookup_elem(map_fd, &key, &qstats);
+		if (gettimeofday(&t0, NULL) < 0)
+			do_error("gettimeofday failed", true);
+		t_last = t0;
+		fin = fopen("/sys/class/net/eth0/statistics/tx_bytes", "r");
+		if (fscanf(fin, "%llu", &last_eth_tx_bytes) != 1)
+			do_error("fscanf fails", false);
+		fclose(fin);
+		last_cg_tx_bytes = qstats.bytes_total;
+		while (true) {
+			usleep(DELTA_RATE_CHECK);
+			if (gettimeofday(&t_new, NULL) < 0)
+				do_error("gettimeofday failed", true);
+			delta_ms = (t_new.tv_sec - t0.tv_sec) * 1000 +
+				(t_new.tv_usec - t0.tv_usec)/1000;
+			if (delta_ms > dur * 1000)
+				break;
+			delta_time = (t_new.tv_sec - t_last.tv_sec) * 1000000 +
+				(t_new.tv_usec - t_last.tv_usec);
+			if (delta_time == 0)
+				continue;
+			t_last = t_new;
+			fin = fopen("/sys/class/net/eth0/statistics/tx_bytes",
+				    "r");
+			if (fscanf(fin, "%llu", &new_eth_tx_bytes) != 1)
+				do_error("fscanf fails", false);
+			fclose(fin);
+			printf("  new_eth_tx_bytes:%llu\n",
+			       new_eth_tx_bytes);
+			bpf_map_lookup_elem(map_fd, &key, &qstats);
+			new_cg_tx_bytes = qstats.bytes_total;
+			delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes;
+			last_eth_tx_bytes = new_eth_tx_bytes;
+			delta_rate = (delta_bytes * 8000000) / delta_time;
+			printf("%5d - eth_rate:%.1fGbps cg_rate:%.3fGbps",
+			       delta_ms, delta_rate/1000000000.0,
+			       rate/1000.0);
+			if (delta_rate < RATE_THRESHOLD) {
+				/* can increase cgroup rate limit, but first
+				 * check if we are using the current limit.
+				 * Currently increasing by 6.25%, unknown
+				 * if that is the optimal rate.
+				 */
+				int rate_diff100;
+
+				delta_bytes = new_cg_tx_bytes -
+					last_cg_tx_bytes;
+				last_cg_tx_bytes = new_cg_tx_bytes;
+				delta_rate = (delta_bytes * 8000000) /
+					delta_time;
+				printf(" rate:%.3fGbps",
+				       delta_rate/1000000000.0);
+				rate_diff100 = (((long long)rate)*1000000 -
+						     delta_rate) * 100 /
+					(((long long) rate) * 1000000);
+				printf("  rdiff:%d", rate_diff100);
+				if (rate_diff100  <= 3) {
+					rate += (rate >> 4);
+					if (rate > RATE_THRESHOLD / 1000000)
+						rate = RATE_THRESHOLD / 1000000;
+					qstats.rate = rate;
+					printf(" INC\n");
+				} else {
+					printf("\n");
+				}
+			} else {
+				/* Need to decrease cgroup rate limit.
+				 * Currently decreasing by 12.5%, unknown
+				 * if that is optimal
+				 */
+				printf(" DEC\n");
+				rate -= (rate >> 3);
+				if (rate < minRate)
+					rate = minRate;
+				qstats.rate = rate;
+			}
+			if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY))
+				do_error("update map element fails", false);
+		}
+	} else {
+		sleep(dur);
+	}
+	// Get stats!
+	if (stats_flag && bpf_map_lookup_elem(map_fd, &key, &qstats)) {
+		char fname[100];
+		FILE *fout;
+
+		if (!outFlag)
+			sprintf(fname, "hbm.%d.in", cg_id);
+		else
+			sprintf(fname, "hbm.%d.out", cg_id);
+		fout = fopen(fname, "w");
+		fprintf(fout, "id:%d\n", cg_id);
+		fprintf(fout, "ERROR: Could not lookup queue_stats\n");
+	} else if (stats_flag && qstats.lastPacketTime >
+		   qstats.firstPacketTime) {
+		long long delta_us = (qstats.lastPacketTime -
+				      qstats.firstPacketTime)/1000;
+		unsigned int rate_mbps = ((qstats.bytes_total -
+					   qstats.bytes_dropped) * 8 /
+					  delta_us);
+		double percent_pkts, percent_bytes;
+		char fname[100];
+		FILE *fout;
+		int k;
+		static const char *returnValNames[] = {
+			"DROP_PKT",
+			"ALLOW_PKT",
+			"DROP_PKT_CWR",
+			"ALLOW_PKT_CWR"
+		};
+#define RET_VAL_COUNT 4
+
+// Future support of ingress
+//		if (!outFlag)
+//			sprintf(fname, "hbm.%d.in", cg_id);
+//		else
+		sprintf(fname, "hbm.%d.out", cg_id);
+		fout = fopen(fname, "w");
+		fprintf(fout, "id:%d\n", cg_id);
+		fprintf(fout, "rate_mbps:%d\n", rate_mbps);
+		fprintf(fout, "duration:%.1f secs\n",
+			(qstats.lastPacketTime - qstats.firstPacketTime) /
+			1000000000.0);
+		fprintf(fout, "packets:%d\n", (int)qstats.pkts_total);
+		fprintf(fout, "bytes_MB:%d\n", (int)(qstats.bytes_total /
+						     1000000));
+		fprintf(fout, "pkts_dropped:%d\n", (int)qstats.pkts_dropped);
+		fprintf(fout, "bytes_dropped_MB:%d\n",
+			(int)(qstats.bytes_dropped /
+						       1000000));
+		// Marked Pkts and Bytes
+		percent_pkts = (qstats.pkts_marked * 100.0) /
+			(qstats.pkts_total + 1);
+		percent_bytes = (qstats.bytes_marked * 100.0) /
+			(qstats.bytes_total + 1);
+		fprintf(fout, "pkts_marked_percent:%6.2f\n", percent_pkts);
+		fprintf(fout, "bytes_marked_percent:%6.2f\n", percent_bytes);
+
+		// Dropped Pkts and Bytes
+		percent_pkts = (qstats.pkts_dropped * 100.0) /
+			(qstats.pkts_total + 1);
+		percent_bytes = (qstats.bytes_dropped * 100.0) /
+			(qstats.bytes_total + 1);
+		fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts);
+		fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes);
+
+		// ECN CE markings
+		percent_pkts = (qstats.pkts_ecn_ce * 100.0) /
+			(qstats.pkts_total + 1);
+		fprintf(fout, "pkts_ecn_ce:%6.2f (%d)\n", percent_pkts,
+			(int)qstats.pkts_ecn_ce);
+
+		// Average cwnd
+		fprintf(fout, "avg cwnd:%d\n",
+			(int)(qstats.sum_cwnd / (qstats.sum_cwnd_cnt + 1)));
+		// Average rtt
+		fprintf(fout, "avg rtt:%d\n",
+			(int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
+		// Average credit
+		if (edt_flag)
+			fprintf(fout, "avg credit_ms:%.03f\n",
+				(qstats.sum_credit /
+				 (qstats.pkts_total + 1.0)) / 1000000.0);
+		else
+			fprintf(fout, "avg credit:%d\n",
+				(int)(qstats.sum_credit /
+				      (1500 * ((int)qstats.pkts_total ) + 1)));
+
+		// Return values stats
+		for (k = 0; k < RET_VAL_COUNT; k++) {
+			percent_pkts = (qstats.returnValCount[k] * 100.0) /
+				(qstats.pkts_total + 1);
+			fprintf(fout, "%s:%6.2f (%d)\n", returnValNames[k],
+				percent_pkts, (int)qstats.returnValCount[k]);
+		}
+		fclose(fout);
+	}
+
+	if (debugFlag)
+		read_trace_pipe2();
+	return rc;
+err:
+	rc = 1;
+
+	if (cg1)
+		close(cg1);
+	cleanup_cgroup_environment();
+
+	return rc;
+}
+
+static void Usage(void)
+{
+	printf("This program loads a cgroup skb BPF program to enforce\n"
+	       "cgroup output (egress) bandwidth limits.\n\n"
+	       "USAGE: hbm [-o] [-d]  [-l] [-n <id>] [--no_cn] [-r <rate>]\n"
+	       "           [-s] [-t <secs>] [-w] [-h] [prog]\n"
+	       "  Where:\n"
+	       "    -o         indicates egress direction (default)\n"
+	       "    -d         print BPF trace debug buffer\n"
+	       "    --edt      use fq's Earliest Departure Time\n"
+	       "    -l         also limit flows using loopback\n"
+	       "    -n <#>     to create cgroup \"/hbm#\" and attach prog\n"
+	       "               Default is /hbm1\n"
+	       "    --no_cn    disable CN notifications\n"
+	       "    -r <rate>  Rate in Mbps\n"
+	       "    -s         Update HBM stats\n"
+	       "    -t <time>  Exit after specified seconds (default is 0)\n"
+	       "    -w	       Work conserving flag. cgroup can increase\n"
+	       "               bandwidth beyond the rate limit specified\n"
+	       "               while there is available bandwidth. Current\n"
+	       "               implementation assumes there is only eth0\n"
+	       "               but can be extended to support multiple NICs\n"
+	       "    -h         print this info\n"
+	       "    prog       BPF program file name. Name defaults to\n"
+	       "                 hbm_out_kern.o\n");
+}
+
+int main(int argc, char **argv)
+{
+	char *prog = "hbm_out_kern.o";
+	int  k;
+	int cg_id = 1;
+	char *optstring = "iodln:r:st:wh";
+	struct option loptions[] = {
+		{"no_cn", 0, NULL, 1},
+		{"edt", 0, NULL, 2},
+		{NULL, 0, NULL, 0}
+	};
+
+	while ((k = getopt_long(argc, argv, optstring, loptions, NULL)) != -1) {
+		switch (k) {
+		case 1:
+			no_cn_flag = true;
+			break;
+		case 2:
+			prog = "hbm_edt_kern.o";
+			edt_flag = true;
+			break;
+		case'o':
+			break;
+		case 'd':
+			debugFlag = true;
+			break;
+		case 'l':
+			loopback_flag = true;
+			break;
+		case 'n':
+			cg_id = atoi(optarg);
+			break;
+		case 'r':
+			minRate = atoi(optarg) * 1.024;
+			rate = minRate;
+			break;
+		case 's':
+			stats_flag = true;
+			break;
+		case 't':
+			dur = atoi(optarg);
+			break;
+		case 'w':
+			work_conserving_flag = true;
+			break;
+		case '?':
+			if (optopt == 'n' || optopt == 'r' || optopt == 't')
+				fprintf(stderr,
+					"Option -%c requires an argument.\n\n",
+					optopt);
+		case 'h':
+			// fallthrough
+		default:
+			Usage();
+			return 0;
+		}
+	}
+
+	if (optind < argc)
+		prog = argv[optind];
+	printf("HBM prog: %s\n", prog != NULL ? prog : "NULL");
+
+	return run_bpf_prog(prog, cg_id);
+}

diff --git a/samples/bpf/hbm.h b/samples/bpf/hbm.h
new file mode 100644
index 0000000..f0963ed
--- /dev/null
+++ b/samples/bpf/hbm.h

@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Include file for Host Bandwidth Management (HBM) programs
+ */
+struct hbm_vqueue {
+	struct bpf_spin_lock lock;
+	/* 4 byte hole */
+	unsigned long long lasttime;	/* In ns */
+	int credit;			/* In bytes */
+	unsigned int rate;		/* In bytes per NS << 20 */
+};
+
+struct hbm_queue_stats {
+	unsigned long rate;		/* in Mbps*/
+	unsigned long stats:1,		/* get HBM stats (marked, dropped,..) */
+		loopback:1,		/* also limit flows using loopback */
+		no_cn:1;		/* do not use cn flags */
+	unsigned long long pkts_marked;
+	unsigned long long bytes_marked;
+	unsigned long long pkts_dropped;
+	unsigned long long bytes_dropped;
+	unsigned long long pkts_total;
+	unsigned long long bytes_total;
+	unsigned long long firstPacketTime;
+	unsigned long long lastPacketTime;
+	unsigned long long pkts_ecn_ce;
+	unsigned long long returnValCount[4];
+	unsigned long long sum_cwnd;
+	unsigned long long sum_rtt;
+	unsigned long long sum_cwnd_cnt;
+	long long sum_credit;
+};

diff --git a/samples/bpf/hbm_edt_kern.c b/samples/bpf/hbm_edt_kern.c
new file mode 100644
index 0000000..a65b677
--- /dev/null
+++ b/samples/bpf/hbm_edt_kern.c

@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample Host Bandwidth Manager (HBM) BPF program.
+ *
+ * A cgroup skb BPF egress program to limit cgroup output bandwidth.
+ * It uses a modified virtual token bucket queue to limit average
+ * egress bandwidth. The implementation uses credits instead of tokens.
+ * Negative credits imply that queueing would have happened (this is
+ * a virtual queue, so no queueing is done by it. However, queueing may
+ * occur at the actual qdisc (which is not used for rate limiting).
+ *
+ * This implementation uses 3 thresholds, one to start marking packets and
+ * the other two to drop packets:
+ *                                  CREDIT
+ *        - <--------------------------|------------------------> +
+ *              |    |          |      0
+ *              |  Large pkt    |
+ *              |  drop thresh  |
+ *   Small pkt drop             Mark threshold
+ *       thresh
+ *
+ * The effect of marking depends on the type of packet:
+ * a) If the packet is ECN enabled and it is a TCP packet, then the packet
+ *    is ECN marked.
+ * b) If the packet is a TCP packet, then we probabilistically call tcp_cwr
+ *    to reduce the congestion window. The current implementation uses a linear
+ *    distribution (0% probability at marking threshold, 100% probability
+ *    at drop threshold).
+ * c) If the packet is not a TCP packet, then it is dropped.
+ *
+ * If the credit is below the drop threshold, the packet is dropped. If it
+ * is a TCP packet, then it also calls tcp_cwr since packets dropped by
+ * by a cgroup skb BPF program do not automatically trigger a call to
+ * tcp_cwr in the current kernel code.
+ *
+ * This BPF program actually uses 2 drop thresholds, one threshold
+ * for larger packets (>= 120 bytes) and another for smaller packets. This
+ * protects smaller packets such as SYNs, ACKs, etc.
+ *
+ * The default bandwidth limit is set at 1Gbps but this can be changed by
+ * a user program through a shared BPF map. In addition, by default this BPF
+ * program does not limit connections using loopback. This behavior can be
+ * overwritten by the user program. There is also an option to calculate
+ * some statistics, such as percent of packets marked or dropped, which
+ * a user program, such as hbm, can access.
+ */
+
+#include "hbm_kern.h"
+
+SEC("cgroup_skb/egress")
+int _hbm_out_cg(struct __sk_buff *skb)
+{
+	long long delta = 0, delta_send;
+	unsigned long long curtime, sendtime;
+	struct hbm_queue_stats *qsp = NULL;
+	unsigned int queue_index = 0;
+	bool congestion_flag = false;
+	bool ecn_ce_flag = false;
+	struct hbm_pkt_info pkti = {};
+	struct hbm_vqueue *qdp;
+	bool drop_flag = false;
+	bool cwr_flag = false;
+	int len = skb->len;
+	int rv = ALLOW_PKT;
+
+	qsp = bpf_map_lookup_elem(&queue_stats, &queue_index);
+
+	// Check if we should ignore loopback traffic
+	if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1))
+		return ALLOW_PKT;
+
+	hbm_get_pkt_info(skb, &pkti);
+
+	// We may want to account for the length of headers in len
+	// calculation, like ETH header + overhead, specially if it
+	// is a gso packet. But I am not doing it right now.
+
+	qdp = bpf_get_local_storage(&queue_state, 0);
+	if (!qdp)
+		return ALLOW_PKT;
+	if (qdp->lasttime == 0)
+		hbm_init_edt_vqueue(qdp, 1024);
+
+	curtime = bpf_ktime_get_ns();
+
+	// Begin critical section
+	bpf_spin_lock(&qdp->lock);
+	delta = qdp->lasttime - curtime;
+	// bound bursts to 100us
+	if (delta < -BURST_SIZE_NS) {
+		// negative delta is a credit that allows bursts
+		qdp->lasttime = curtime - BURST_SIZE_NS;
+		delta = -BURST_SIZE_NS;
+	}
+	sendtime = qdp->lasttime;
+	delta_send = BYTES_TO_NS(len, qdp->rate);
+	__sync_add_and_fetch(&(qdp->lasttime), delta_send);
+	bpf_spin_unlock(&qdp->lock);
+	// End critical section
+
+	// Set EDT of packet
+	skb->tstamp = sendtime;
+
+	// Check if we should update rate
+	if (qsp != NULL && (qsp->rate * 128) != qdp->rate)
+		qdp->rate = qsp->rate * 128;
+
+	// Set flags (drop, congestion, cwr)
+	// last packet will be sent in the future, bound latency
+	if (delta > DROP_THRESH_NS || (delta > LARGE_PKT_DROP_THRESH_NS &&
+				       len > LARGE_PKT_THRESH)) {
+		drop_flag = true;
+		if (pkti.is_tcp && pkti.ecn == 0)
+			cwr_flag = true;
+	} else if (delta > MARK_THRESH_NS) {
+		if (pkti.is_tcp)
+			congestion_flag = true;
+		else
+			drop_flag = true;
+	}
+
+	if (congestion_flag) {
+		if (bpf_skb_ecn_set_ce(skb)) {
+			ecn_ce_flag = true;
+		} else {
+			if (pkti.is_tcp) {
+				unsigned int rand = bpf_get_prandom_u32();
+
+				if (delta >= MARK_THRESH_NS +
+				    (rand % MARK_REGION_SIZE_NS)) {
+					// Do congestion control
+					cwr_flag = true;
+				}
+			} else if (len > LARGE_PKT_THRESH) {
+				// Problem if too many small packets?
+				drop_flag = true;
+				congestion_flag = false;
+			}
+		}
+	}
+
+	if (pkti.is_tcp && drop_flag && pkti.packets_out <= 1) {
+		drop_flag = false;
+		cwr_flag = true;
+		congestion_flag = false;
+	}
+
+	if (qsp != NULL && qsp->no_cn)
+			cwr_flag = false;
+
+	hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag,
+			 cwr_flag, ecn_ce_flag, &pkti, (int) delta);
+
+	if (drop_flag) {
+		__sync_add_and_fetch(&(qdp->lasttime), -delta_send);
+		rv = DROP_PKT;
+	}
+
+	if (cwr_flag)
+		rv |= CWR;
+	return rv;
+}
+char _license[] SEC("license") = "GPL";

diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
new file mode 100644
index 0000000..aa207a2
--- /dev/null
+++ b/samples/bpf/hbm_kern.h

@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Include file for sample Host Bandwidth Manager (HBM) BPF programs
+ */
+#define KBUILD_MODNAME "foo"
+#include <stddef.h>
+#include <stdbool.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+#include <uapi/linux/pkt_cls.h>
+#include <net/ipv6.h>
+#include <net/inet_ecn.h>
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+#include "hbm.h"
+
+#define DROP_PKT	0
+#define ALLOW_PKT	1
+#define TCP_ECN_OK	1
+#define CWR		2
+
+#ifndef HBM_DEBUG  // Define HBM_DEBUG to enable debugging
+#undef bpf_printk
+#define bpf_printk(fmt, ...)
+#endif
+
+#define INITIAL_CREDIT_PACKETS	100
+#define MAX_BYTES_PER_PACKET	1500
+#define MARK_THRESH		(40 * MAX_BYTES_PER_PACKET)
+#define DROP_THRESH		(80 * 5 * MAX_BYTES_PER_PACKET)
+#define LARGE_PKT_DROP_THRESH	(DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
+#define MARK_REGION_SIZE	(LARGE_PKT_DROP_THRESH - MARK_THRESH)
+#define LARGE_PKT_THRESH	120
+#define MAX_CREDIT		(100 * MAX_BYTES_PER_PACKET)
+#define INIT_CREDIT		(INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
+
+// Time base accounting for fq's EDT
+#define BURST_SIZE_NS		100000 // 100us
+#define MARK_THRESH_NS		50000 // 50us
+#define DROP_THRESH_NS		500000 // 500us
+// Reserve 20us of queuing for small packets (less than 120 bytes)
+#define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
+#define MARK_REGION_SIZE_NS	(LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
+
+// rate in bytes per ns << 20
+#define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
+#define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
+#define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
+
+struct bpf_map_def SEC("maps") queue_state = {
+	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
+	.key_size = sizeof(struct bpf_cgroup_storage_key),
+	.value_size = sizeof(struct hbm_vqueue),
+};
+BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key,
+		     struct hbm_vqueue);
+
+struct bpf_map_def SEC("maps") queue_stats = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(struct hbm_queue_stats),
+	.max_entries = 1,
+};
+BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
+
+struct hbm_pkt_info {
+	int	cwnd;
+	int	rtt;
+	int	packets_out;
+	bool	is_ip;
+	bool	is_tcp;
+	short	ecn;
+};
+
+static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
+{
+	struct bpf_sock *sk;
+	struct bpf_tcp_sock *tp;
+
+	sk = skb->sk;
+	if (sk) {
+		sk = bpf_sk_fullsock(sk);
+		if (sk) {
+			if (sk->protocol == IPPROTO_TCP) {
+				tp = bpf_tcp_sock(sk);
+				if (tp) {
+					pkti->cwnd = tp->snd_cwnd;
+					pkti->rtt = tp->srtt_us >> 3;
+					pkti->packets_out = tp->packets_out;
+					return 0;
+				}
+			}
+		}
+	}
+	pkti->cwnd = 0;
+	pkti->rtt = 0;
+	pkti->packets_out = 0;
+	return 1;
+}
+
+static void hbm_get_pkt_info(struct __sk_buff *skb,
+			     struct hbm_pkt_info *pkti)
+{
+	struct iphdr iph;
+	struct ipv6hdr *ip6h;
+
+	pkti->cwnd = 0;
+	pkti->rtt = 0;
+	bpf_skb_load_bytes(skb, 0, &iph, 12);
+	if (iph.version == 6) {
+		ip6h = (struct ipv6hdr *)&iph;
+		pkti->is_ip = true;
+		pkti->is_tcp = (ip6h->nexthdr == 6);
+		pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
+	} else if (iph.version == 4) {
+		pkti->is_ip = true;
+		pkti->is_tcp = (iph.protocol == 6);
+		pkti->ecn = iph.tos & INET_ECN_MASK;
+	} else {
+		pkti->is_ip = false;
+		pkti->is_tcp = false;
+		pkti->ecn = 0;
+	}
+	if (pkti->is_tcp)
+		get_tcp_info(skb, pkti);
+}
+
+static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
+{
+	bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
+	qdp->lasttime = bpf_ktime_get_ns();
+	qdp->credit = INIT_CREDIT;
+	qdp->rate = rate * 128;
+}
+
+static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
+						int rate)
+{
+	unsigned long long curtime;
+
+	curtime = bpf_ktime_get_ns();
+	bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
+	qdp->lasttime = curtime - BURST_SIZE_NS;	// support initial burst
+	qdp->credit = 0;				// not used
+	qdp->rate = rate * 128;
+}
+
+static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
+					     int len,
+					     unsigned long long curtime,
+					     bool congestion_flag,
+					     bool drop_flag,
+					     bool cwr_flag,
+					     bool ecn_ce_flag,
+					     struct hbm_pkt_info *pkti,
+					     int credit)
+{
+	int rv = ALLOW_PKT;
+
+	if (qsp != NULL) {
+		// Following is needed for work conserving
+		__sync_add_and_fetch(&(qsp->bytes_total), len);
+		if (qsp->stats) {
+			// Optionally update statistics
+			if (qsp->firstPacketTime == 0)
+				qsp->firstPacketTime = curtime;
+			qsp->lastPacketTime = curtime;
+			__sync_add_and_fetch(&(qsp->pkts_total), 1);
+			if (congestion_flag) {
+				__sync_add_and_fetch(&(qsp->pkts_marked), 1);
+				__sync_add_and_fetch(&(qsp->bytes_marked), len);
+			}
+			if (drop_flag) {
+				__sync_add_and_fetch(&(qsp->pkts_dropped), 1);
+				__sync_add_and_fetch(&(qsp->bytes_dropped),
+						     len);
+			}
+			if (ecn_ce_flag)
+				__sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
+			if (pkti->cwnd) {
+				__sync_add_and_fetch(&(qsp->sum_cwnd),
+						     pkti->cwnd);
+				__sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
+			}
+			if (pkti->rtt)
+				__sync_add_and_fetch(&(qsp->sum_rtt),
+						     pkti->rtt);
+			__sync_add_and_fetch(&(qsp->sum_credit), credit);
+
+			if (drop_flag)
+				rv = DROP_PKT;
+			if (cwr_flag)
+				rv |= 2;
+			if (rv == DROP_PKT)
+				__sync_add_and_fetch(&(qsp->returnValCount[0]),
+						     1);
+			else if (rv == ALLOW_PKT)
+				__sync_add_and_fetch(&(qsp->returnValCount[1]),
+						     1);
+			else if (rv == 2)
+				__sync_add_and_fetch(&(qsp->returnValCount[2]),
+						     1);
+			else if (rv == 3)
+				__sync_add_and_fetch(&(qsp->returnValCount[3]),
+						     1);
+		}
+	}
+}

diff --git a/samples/bpf/hbm_out_kern.c b/samples/bpf/hbm_out_kern.c
new file mode 100644
index 0000000..829934b
--- /dev/null
+++ b/samples/bpf/hbm_out_kern.c

@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample Host Bandwidth Manager (HBM) BPF program.
+ *
+ * A cgroup skb BPF egress program to limit cgroup output bandwidth.
+ * It uses a modified virtual token bucket queue to limit average
+ * egress bandwidth. The implementation uses credits instead of tokens.
+ * Negative credits imply that queueing would have happened (this is
+ * a virtual queue, so no queueing is done by it. However, queueing may
+ * occur at the actual qdisc (which is not used for rate limiting).
+ *
+ * This implementation uses 3 thresholds, one to start marking packets and
+ * the other two to drop packets:
+ *                                  CREDIT
+ *        - <--------------------------|------------------------> +
+ *              |    |          |      0
+ *              |  Large pkt    |
+ *              |  drop thresh  |
+ *   Small pkt drop             Mark threshold
+ *       thresh
+ *
+ * The effect of marking depends on the type of packet:
+ * a) If the packet is ECN enabled and it is a TCP packet, then the packet
+ *    is ECN marked.
+ * b) If the packet is a TCP packet, then we probabilistically call tcp_cwr
+ *    to reduce the congestion window. The current implementation uses a linear
+ *    distribution (0% probability at marking threshold, 100% probability
+ *    at drop threshold).
+ * c) If the packet is not a TCP packet, then it is dropped.
+ *
+ * If the credit is below the drop threshold, the packet is dropped. If it
+ * is a TCP packet, then it also calls tcp_cwr since packets dropped by
+ * by a cgroup skb BPF program do not automatically trigger a call to
+ * tcp_cwr in the current kernel code.
+ *
+ * This BPF program actually uses 2 drop thresholds, one threshold
+ * for larger packets (>= 120 bytes) and another for smaller packets. This
+ * protects smaller packets such as SYNs, ACKs, etc.
+ *
+ * The default bandwidth limit is set at 1Gbps but this can be changed by
+ * a user program through a shared BPF map. In addition, by default this BPF
+ * program does not limit connections using loopback. This behavior can be
+ * overwritten by the user program. There is also an option to calculate
+ * some statistics, such as percent of packets marked or dropped, which
+ * the user program can access.
+ *
+ * A latter patch provides such a program (hbm.c)
+ */
+
+#include "hbm_kern.h"
+
+SEC("cgroup_skb/egress")
+int _hbm_out_cg(struct __sk_buff *skb)
+{
+	struct hbm_pkt_info pkti;
+	int len = skb->len;
+	unsigned int queue_index = 0;
+	unsigned long long curtime;
+	int credit;
+	signed long long delta = 0, new_credit;
+	int max_credit = MAX_CREDIT;
+	bool congestion_flag = false;
+	bool drop_flag = false;
+	bool cwr_flag = false;
+	bool ecn_ce_flag = false;
+	struct hbm_vqueue *qdp;
+	struct hbm_queue_stats *qsp = NULL;
+	int rv = ALLOW_PKT;
+
+	qsp = bpf_map_lookup_elem(&queue_stats, &queue_index);
+	if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1))
+		return ALLOW_PKT;
+
+	hbm_get_pkt_info(skb, &pkti);
+
+	// We may want to account for the length of headers in len
+	// calculation, like ETH header + overhead, specially if it
+	// is a gso packet. But I am not doing it right now.
+
+	qdp = bpf_get_local_storage(&queue_state, 0);
+	if (!qdp)
+		return ALLOW_PKT;
+	else if (qdp->lasttime == 0)
+		hbm_init_vqueue(qdp, 1024);
+
+	curtime = bpf_ktime_get_ns();
+
+	// Begin critical section
+	bpf_spin_lock(&qdp->lock);
+	credit = qdp->credit;
+	delta = curtime - qdp->lasttime;
+	/* delta < 0 implies that another process with a curtime greater
+	 * than ours beat us to the critical section and already added
+	 * the new credit, so we should not add it ourselves
+	 */
+	if (delta > 0) {
+		qdp->lasttime = curtime;
+		new_credit = credit + CREDIT_PER_NS(delta, qdp->rate);
+		if (new_credit > MAX_CREDIT)
+			credit = MAX_CREDIT;
+		else
+			credit = new_credit;
+	}
+	credit -= len;
+	qdp->credit = credit;
+	bpf_spin_unlock(&qdp->lock);
+	// End critical section
+
+	// Check if we should update rate
+	if (qsp != NULL && (qsp->rate * 128) != qdp->rate) {
+		qdp->rate = qsp->rate * 128;
+		bpf_printk("Updating rate: %d (1sec:%llu bits)\n",
+			   (int)qdp->rate,
+			   CREDIT_PER_NS(1000000000, qdp->rate) * 8);
+	}
+
+	// Set flags (drop, congestion, cwr)
+	// Dropping => we are congested, so ignore congestion flag
+	if (credit < -DROP_THRESH ||
+	    (len > LARGE_PKT_THRESH && credit < -LARGE_PKT_DROP_THRESH)) {
+		// Very congested, set drop packet
+		drop_flag = true;
+		if (pkti.ecn)
+			congestion_flag = true;
+		else if (pkti.is_tcp)
+			cwr_flag = true;
+	} else if (credit < 0) {
+		// Congested, set congestion flag
+		if (pkti.ecn || pkti.is_tcp) {
+			if (credit < -MARK_THRESH)
+				congestion_flag = true;
+			else
+				congestion_flag = false;
+		} else {
+			congestion_flag = true;
+		}
+	}
+
+	if (congestion_flag) {
+		if (bpf_skb_ecn_set_ce(skb)) {
+			ecn_ce_flag = true;
+		} else {
+			if (pkti.is_tcp) {
+				unsigned int rand = bpf_get_prandom_u32();
+
+				if (-credit >= MARK_THRESH +
+				    (rand % MARK_REGION_SIZE)) {
+					// Do congestion control
+					cwr_flag = true;
+				}
+			} else if (len > LARGE_PKT_THRESH) {
+				// Problem if too many small packets?
+				drop_flag = true;
+			}
+		}
+	}
+
+	if (qsp != NULL)
+		if (qsp->no_cn)
+			cwr_flag = false;
+
+	hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag,
+			 cwr_flag, ecn_ce_flag, &pkti, credit);
+
+	if (drop_flag) {
+		__sync_add_and_fetch(&(qdp->credit), len);
+		rv = DROP_PKT;
+	}
+
+	if (cwr_flag)
+		rv |= 2;
+	return rv;
+}
+char _license[] SEC("license") = "GPL";

diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
new file mode 100644
index 0000000..f281df7
--- /dev/null
+++ b/samples/bpf/ibumad_kern.c

@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+
+/**
+ * ibumad BPF sample kernel side
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Copyright(c) 2018 Ira Weiny, Intel Corporation
+ */
+
+#define KBUILD_MODNAME "ibumad_count_pkts_by_class"
+#include <uapi/linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+
+struct bpf_map_def SEC("maps") read_count = {
+	.type        = BPF_MAP_TYPE_ARRAY,
+	.key_size    = sizeof(u32), /* class; u32 required */
+	.value_size  = sizeof(u64), /* count of mads read */
+	.max_entries = 256, /* Room for all Classes */
+};
+
+struct bpf_map_def SEC("maps") write_count = {
+	.type        = BPF_MAP_TYPE_ARRAY,
+	.key_size    = sizeof(u32), /* class; u32 required */
+	.value_size  = sizeof(u64), /* count of mads written */
+	.max_entries = 256, /* Room for all Classes */
+};
+
+#undef DEBUG
+#ifndef DEBUG
+#undef bpf_printk
+#define bpf_printk(fmt, ...)
+#endif
+
+/* Taken from the current format defined in
+ * include/trace/events/ib_umad.h
+ * and
+ * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_read/format
+ * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_write/format
+ */
+struct ib_umad_rw_args {
+	u64 pad;
+	u8 port_num;
+	u8 sl;
+	u8 path_bits;
+	u8 grh_present;
+	u32 id;
+	u32 status;
+	u32 timeout_ms;
+	u32 retires;
+	u32 length;
+	u32 qpn;
+	u32 qkey;
+	u8 gid_index;
+	u8 hop_limit;
+	u16 lid;
+	u16 attr_id;
+	u16 pkey_index;
+	u8 base_version;
+	u8 mgmt_class;
+	u8 class_version;
+	u8 method;
+	u32 flow_label;
+	u16 mad_status;
+	u16 class_specific;
+	u32 attr_mod;
+	u64 tid;
+	u8 gid[16];
+	u32 dev_index;
+	u8 traffic_class;
+};
+
+SEC("tracepoint/ib_umad/ib_umad_read_recv")
+int on_ib_umad_read_recv(struct ib_umad_rw_args *ctx)
+{
+	u64 zero = 0, *val;
+	u8 class = ctx->mgmt_class;
+
+	bpf_printk("ib_umad read recv : class 0x%x\n", class);
+
+	val = bpf_map_lookup_elem(&read_count, &class);
+	if (!val) {
+		bpf_map_update_elem(&read_count, &class, &zero, BPF_NOEXIST);
+		val = bpf_map_lookup_elem(&read_count, &class);
+		if (!val)
+			return 0;
+	}
+
+	(*val) += 1;
+
+	return 0;
+}
+SEC("tracepoint/ib_umad/ib_umad_read_send")
+int on_ib_umad_read_send(struct ib_umad_rw_args *ctx)
+{
+	u64 zero = 0, *val;
+	u8 class = ctx->mgmt_class;
+
+	bpf_printk("ib_umad read send : class 0x%x\n", class);
+
+	val = bpf_map_lookup_elem(&read_count, &class);
+	if (!val) {
+		bpf_map_update_elem(&read_count, &class, &zero, BPF_NOEXIST);
+		val = bpf_map_lookup_elem(&read_count, &class);
+		if (!val)
+			return 0;
+	}
+
+	(*val) += 1;
+
+	return 0;
+}
+SEC("tracepoint/ib_umad/ib_umad_write")
+int on_ib_umad_write(struct ib_umad_rw_args *ctx)
+{
+	u64 zero = 0, *val;
+	u8 class = ctx->mgmt_class;
+
+	bpf_printk("ib_umad write : class 0x%x\n", class);
+
+	val = bpf_map_lookup_elem(&write_count, &class);
+	if (!val) {
+		bpf_map_update_elem(&write_count, &class, &zero, BPF_NOEXIST);
+		val = bpf_map_lookup_elem(&write_count, &class);
+		if (!val)
+			return 0;
+	}
+
+	(*val) += 1;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";

diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c
new file mode 100644
index 0000000..cb5a8f9
--- /dev/null
+++ b/samples/bpf/ibumad_user.c

@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+
+/**
+ * ibumad BPF sample user side
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Copyright(c) 2018 Ira Weiny, Intel Corporation
+ */
+
+#include <linux/bpf.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <limits.h>
+
+#include <sys/resource.h>
+#include <getopt.h>
+#include <net/if.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+static void dump_counts(int fd)
+{
+	__u32 key;
+	__u64 value;
+
+	for (key = 0; key < 256; key++) {
+		if (bpf_map_lookup_elem(fd, &key, &value)) {
+			printf("failed to read key %u\n", key);
+			continue;
+		}
+		if (value)
+			printf("0x%02x : %llu\n", key, value);
+	}
+}
+
+static void dump_all_counts(void)
+{
+	printf("Read 'Class : count'\n");
+	dump_counts(map_fd[0]);
+	printf("Write 'Class : count'\n");
+	dump_counts(map_fd[1]);
+}
+
+static void dump_exit(int sig)
+{
+	dump_all_counts();
+	exit(0);
+}
+
+static const struct option long_options[] = {
+	{"help",      no_argument,       NULL, 'h'},
+	{"delay",     required_argument, NULL, 'd'},
+};
+
+static void usage(char *cmd)
+{
+	printf("eBPF test program to count packets from various IP addresses\n"
+		"Usage: %s <options>\n"
+		"       --help,   -h  this menu\n"
+		"       --delay,  -d  <delay>  wait <delay> sec between prints [1 - 1000000]\n"
+		, cmd
+		);
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long delay = 5;
+	int longindex = 0;
+	int opt;
+	char bpf_file[256];
+
+	/* Create the eBPF kernel code path name.
+	 * This follows the pattern of all of the other bpf samples
+	 */
+	snprintf(bpf_file, sizeof(bpf_file), "%s_kern.o", argv[0]);
+
+	/* Do one final dump when exiting */
+	signal(SIGINT, dump_exit);
+	signal(SIGTERM, dump_exit);
+
+	while ((opt = getopt_long(argc, argv, "hd:rSw",
+				  long_options, &longindex)) != -1) {
+		switch (opt) {
+		case 'd':
+			delay = strtoul(optarg, NULL, 0);
+			if (delay == ULONG_MAX || delay < 0 ||
+			    delay > 1000000) {
+				fprintf(stderr, "ERROR: invalid delay : %s\n",
+					optarg);
+				usage(argv[0]);
+				return 1;
+			}
+			break;
+		default:
+		case 'h':
+			usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (load_bpf_file(bpf_file)) {
+		fprintf(stderr, "ERROR: failed to load eBPF from file : %s\n",
+			bpf_file);
+		return 1;
+	}
+
+	while (1) {
+		sleep(delay);
+		dump_all_counts();
+	}
+
+	return 0;
+}

diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
index c8e88cc..2ff2839 100644
--- a/samples/bpf/lathist_user.c
+++ b/samples/bpf/lathist_user.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
  * Copyright (c) 2015 BMW Car IT GmbH
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <stdio.h>
 #include <unistd.h>

diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c
deleted file mode 100644
index 8ecb41e..0000000
--- a/samples/bpf/load_sock_ops.c
+++ /dev/null

@@ -1,97 +0,0 @@
-/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/unistd.h>
-
-static void usage(char *pname)
-{
-	printf("USAGE:\n  %s [-l] <cg-path> <prog filename>\n", pname);
-	printf("\tLoad and attach a sock_ops program to the specified "
-	       "cgroup\n");
-	printf("\tIf \"-l\" is used, the program will continue to run\n");
-	printf("\tprinting the BPF log buffer\n");
-	printf("\tIf the specified filename does not end in \".o\", it\n");
-	printf("\tappends \"_kern.o\" to the name\n");
-	printf("\n");
-	printf("  %s -r <cg-path>\n", pname);
-	printf("\tDetaches the currently attached sock_ops program\n");
-	printf("\tfrom the specified cgroup\n");
-	printf("\n");
-	exit(1);
-}
-
-int main(int argc, char **argv)
-{
-	int logFlag = 0;
-	int error = 0;
-	char *cg_path;
-	char fn[500];
-	char *prog;
-	int cg_fd;
-
-	if (argc < 3)
-		usage(argv[0]);
-
-	if (!strcmp(argv[1], "-r")) {
-		cg_path = argv[2];
-		cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
-		error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
-		if (error) {
-			printf("ERROR: bpf_prog_detach: %d (%s)\n",
-			       error, strerror(errno));
-			return 2;
-		}
-		return 0;
-	} else if (!strcmp(argv[1], "-h")) {
-		usage(argv[0]);
-	} else if (!strcmp(argv[1], "-l")) {
-		logFlag = 1;
-		if (argc < 4)
-			usage(argv[0]);
-	}
-
-	prog = argv[argc - 1];
-	cg_path = argv[argc - 2];
-	if (strlen(prog) > 480) {
-		fprintf(stderr, "ERROR: program name too long (> 480 chars)\n");
-		return 3;
-	}
-	cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
-
-	if (!strcmp(prog + strlen(prog)-2, ".o"))
-		strcpy(fn, prog);
-	else
-		sprintf(fn, "%s_kern.o", prog);
-	if (logFlag)
-		printf("loading bpf file:%s\n", fn);
-	if (load_bpf_file(fn)) {
-		printf("ERROR: load_bpf_file failed for: %s\n", fn);
-		printf("%s", bpf_log_buf);
-		return 4;
-	}
-	if (logFlag)
-		printf("TCP BPF Loaded %s\n", fn);
-
-	error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
-	if (error) {
-		printf("ERROR: bpf_prog_attach: %d (%s)\n",
-		       error, strerror(errno));
-		return 5;
-	} else if (logFlag) {
-		read_trace_pipe();
-	}
-
-	return error;
-}

diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 38b7b1a..fe5564b 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #define _GNU_SOURCE
 #include <sched.h>

diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index f06063a..fc8767d 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <stdio.h>
 #include <unistd.h>
@@ -28,6 +25,11 @@
 	if (!addr)
 		return;
 	sym = ksym_search(addr);
+	if (!sym) {
+		printf("ksym not found. Is kallsyms loaded?\n");
+		return;
+	}
+
 	if (PRINT_RAW_ADDR)
 		printf("%s/%llx;", sym->name, addr);
 	else

diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index 60c2b73..6b5dc26 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c

@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * sampleip: sample instruction pointer and frequency count in a BPF map.
  *
  * Copyright 2016 Netflix, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
 #include <signal.h>
@@ -110,6 +106,11 @@
 	for (i = 0; i < max; i++) {
 		if (counts[i].ip > PAGE_OFFSET) {
 			sym = ksym_search(counts[i].ip);
+			if (!sym) {
+				printf("ksym not found. Is kallsyms loaded?\n");
+				continue;
+			}
+
 			printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name,
 			       counts[i].count);
 		} else {

diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index 60ec467..00aae1d 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c

@@ -99,7 +99,7 @@
 {
 	FILE *f;
 
-	f = popen("ping -c5 localhost", "r");
+	f = popen("ping -4 -c5 localhost", "r");
 	(void)f;
 
 	return test_sock();

diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
index 93ec01c..a219442 100644
--- a/samples/bpf/sockex1_user.c
+++ b/samples/bpf/sockex1_user.c

@@ -3,30 +3,33 @@
 #include <assert.h>
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include "libbpf.h"
 #include "sock_example.h"
 #include <unistd.h>
 #include <arpa/inet.h>
 
 int main(int ac, char **argv)
 {
+	struct bpf_object *obj;
+	int map_fd, prog_fd;
 	char filename[256];
-	FILE *f;
 	int i, sock;
+	FILE *f;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
+	if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER,
+			  &obj, &prog_fd))
 		return 1;
-	}
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
 
 	sock = open_raw_sock("lo");
 
-	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
-			  sizeof(prog_fd[0])) == 0);
+	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
+			  sizeof(prog_fd)) == 0);
 
-	f = popen("ping -c5 localhost", "r");
+	f = popen("ping -4 -c5 localhost", "r");
 	(void) f;
 
 	for (i = 0; i < 5; i++) {
@@ -34,13 +37,13 @@
 		int key;
 
 		key = IPPROTO_TCP;
-		assert(bpf_map_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
+		assert(bpf_map_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
 
 		key = IPPROTO_UDP;
-		assert(bpf_map_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
+		assert(bpf_map_lookup_elem(map_fd, &key, &udp_cnt) == 0);
 
 		key = IPPROTO_ICMP;
-		assert(bpf_map_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
+		assert(bpf_map_lookup_elem(map_fd, &key, &icmp_cnt) == 0);
 
 		printf("TCP %lld UDP %lld ICMP %lld bytes\n",
 		       tcp_cnt, udp_cnt, icmp_cnt);

diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index f58acfc..f2f9dbc 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c

@@ -14,7 +14,7 @@
 	__be16 h_vlan_encapsulated_proto;
 };
 
-struct bpf_flow_keys {
+struct flow_key_record {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -59,7 +59,7 @@
 }
 
 static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
-			     struct bpf_flow_keys *flow)
+			     struct flow_key_record *flow)
 {
 	__u64 verlen;
 
@@ -83,7 +83,7 @@
 }
 
 static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
-			       struct bpf_flow_keys *flow)
+			       struct flow_key_record *flow)
 {
 	*ip_proto = load_byte(skb,
 			      nhoff + offsetof(struct ipv6hdr, nexthdr));
@@ -96,7 +96,8 @@
 	return nhoff;
 }
 
-static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow)
+static inline bool flow_dissector(struct __sk_buff *skb,
+				  struct flow_key_record *flow)
 {
 	__u64 nhoff = ETH_HLEN;
 	__u64 ip_proto;
@@ -198,7 +199,7 @@
 SEC("socket2")
 int bpf_prog2(struct __sk_buff *skb)
 {
-	struct bpf_flow_keys flow = {};
+	struct flow_key_record flow = {};
 	struct pair *value;
 	u32 key;
 

diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index 1d5c6e9..6de383d 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c

@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include "libbpf.h"
 #include "sock_example.h"
 #include <unistd.h>
 #include <arpa/inet.h>
@@ -17,32 +17,35 @@
 int main(int ac, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct bpf_object *obj;
+	int map_fd, prog_fd;
 	char filename[256];
-	FILE *f;
 	int i, sock;
+	FILE *f;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	setrlimit(RLIMIT_MEMLOCK, &r);
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
+	if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER,
+			  &obj, &prog_fd))
 		return 1;
-	}
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
 
 	sock = open_raw_sock("lo");
 
-	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
-			  sizeof(prog_fd[0])) == 0);
+	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
+			  sizeof(prog_fd)) == 0);
 
-	f = popen("ping -c5 localhost", "r");
+	f = popen("ping -4 -c5 localhost", "r");
 	(void) f;
 
 	for (i = 0; i < 5; i++) {
 		int key = 0, next_key;
 		struct pair value;
 
-		while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
-			bpf_map_lookup_elem(map_fd[0], &next_key, &value);
+		while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
+			bpf_map_lookup_elem(map_fd, &next_key, &value);
 			printf("ip %s bytes %lld packets %lld\n",
 			       inet_ntoa((struct in_addr){htonl(next_key)}),
 			       value.bytes, value.packets);

diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 95907f8..c527b57 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c

@@ -61,7 +61,7 @@
 	__be16 h_vlan_encapsulated_proto;
 };
 
-struct bpf_flow_keys {
+struct flow_key_record {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -88,7 +88,7 @@
 }
 
 struct globals {
-	struct bpf_flow_keys flow;
+	struct flow_key_record flow;
 };
 
 struct bpf_map_def SEC("maps") percpu_map = {
@@ -114,14 +114,14 @@
 
 struct bpf_map_def SEC("maps") hash_map = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct bpf_flow_keys),
+	.key_size = sizeof(struct flow_key_record),
 	.value_size = sizeof(struct pair),
 	.max_entries = 1024,
 };
 
 static void update_stats(struct __sk_buff *skb, struct globals *g)
 {
-	struct bpf_flow_keys key = g->flow;
+	struct flow_key_record key = g->flow;
 	struct pair *value;
 
 	value = bpf_map_lookup_elem(&hash_map, &key);

diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index 5ba3ae9..bbb1cd0 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c

@@ -13,7 +13,7 @@
 #define PARSE_IP_PROG_FD (prog_fd[0])
 #define PROG_ARRAY_FD (map_fd[0])
 
-struct bpf_flow_keys {
+struct flow_key_record {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -58,13 +58,13 @@
 			  sizeof(__u32)) == 0);
 
 	if (argc > 1)
-		f = popen("ping -c5 localhost", "r");
+		f = popen("ping -4 -c5 localhost", "r");
 	else
 		f = popen("netperf -l 4 localhost", "r");
 	(void) f;
 
 	for (i = 0; i < 5; i++) {
-		struct bpf_flow_keys key = {}, next_key;
+		struct flow_key_record key = {}, next_key;
 		struct pair value;
 
 		sleep(1);

diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index 8d3e9cf..2556af2 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c

@@ -37,8 +37,13 @@
 			bpf_map_lookup_elem(map_fd[0], &next_key, &value);
 			assert(next_key == value);
 			sym = ksym_search(value);
-			printf(" %s", sym->name);
 			key = next_key;
+			if (!sym) {
+				printf("ksym not found. Is kallsyms loaded?\n");
+				continue;
+			}
+
+			printf(" %s", sym->name);
 		}
 		if (key)
 			printf("\n");

diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c
index 516e255..88f9400 100644
--- a/samples/bpf/syscall_nrs.c
+++ b/samples/bpf/syscall_nrs.c

@@ -9,5 +9,11 @@
 	COMMENT("Linux system call numbers.");
 	SYSNR(__NR_write);
 	SYSNR(__NR_read);
+#ifdef __NR_mmap2
+	SYSNR(__NR_mmap2);
+#endif
+#ifdef __NR_mmap
 	SYSNR(__NR_mmap);
+#endif
+
 }

diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index 9149c52..1d78819 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"

diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 1a1d005..57014ba 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <stdio.h>
 #include <unistd.h>

diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
index f4b0a9e..fb56fc2 100644
--- a/samples/bpf/task_fd_query_kern.c
+++ b/samples/bpf/task_fd_query_kern.c

@@ -4,7 +4,7 @@
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
 
-SEC("kprobe/blk_start_request")
+SEC("kprobe/blk_mq_start_request")
 int bpf_prog1(struct pt_regs *ctx)
 {
 	return 0;

diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index 8381d79..4c31b30 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c

@@ -13,6 +13,7 @@
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <linux/perf_event.h>
 
 #include "libbpf.h"
 #include "bpf_load.h"
@@ -216,7 +217,7 @@
 {
 	const char *event_type = "uprobe";
 	struct perf_event_attr attr = {};
-	char buf[256], event_alias[256];
+	char buf[256], event_alias[sizeof("test_1234567890")];
 	__u64 probe_offset, probe_addr;
 	__u32 len, prog_id, fd_type;
 	int err, res, kfd, efd;
@@ -311,7 +312,7 @@
 	}
 
 	/* test two functions in the corresponding *_kern.c file */
-	CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
+	CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request",
 					   BPF_FD_TYPE_KPROBE));
 	CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
 					   BPF_FD_TYPE_KRETPROBE));

diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c
index 7ec45c3..d11a6e1 100644
--- a/samples/bpf/tc_l2_redirect_user.c
+++ b/samples/bpf/tc_l2_redirect_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <linux/unistd.h>
 #include <linux/bpf.h>

diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
index 4bf4fc5..9dba48c 100644
--- a/samples/bpf/tcp_basertt_kern.c
+++ b/samples/bpf/tcp_basertt_kern.c

@@ -7,7 +7,7 @@
  * BPF program to set base_rtt to 80us when host is running TCP-NV and
  * both hosts are in the same datacenter (as determined by IPv6 prefix).
  *
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
  */
 
 #include <uapi/linux/bpf.h>
@@ -21,13 +21,6 @@
 
 #define DEBUG 1
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sockops")
 int bpf_basertt(struct bpf_sock_ops *skops)
 {

diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme
index 831fb60..78e247f 100644
--- a/samples/bpf/tcp_bpf.readme
+++ b/samples/bpf/tcp_bpf.readme

@@ -8,14 +8,16 @@
   bash
   echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
 
-Anything that runs under this shell belongs to the foo cgroupv2 To load
+Anything that runs under this shell belongs to the foo cgroupv2. To load
 (attach) one of the tcp_*_kern.o programs:
 
-  ./load_sock_ops -l /tmp/cgroupv2/foo tcp_basertt_kern.o
+  bpftool prog load tcp_basertt_kern.o /sys/fs/bpf/tcp_prog
+  bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
+  bpftool prog tracelog
 
-If the "-l" flag is used, the load_sock_ops program will continue to run
-printing the BPF log buffer. The tcp_*_kern.o programs use special print
-functions to print logging information (if enabled by the ifdef).
+"bpftool prog tracelog" will continue to run printing the BPF log buffer.
+The tcp_*_kern.o programs use special print functions to print logging
+information (if enabled by the ifdef).
 
 If using netperf/netserver to create traffic, you need to run them under the
 cgroupv2 to which the BPF programs are attached (i.e. under bash shell
@@ -23,4 +25,4 @@
 
 To remove (unattach) a socket_ops BPF program from a cgroupv2:
 
-  ./load_sock_ops -r /tmp/cgroupv2/foo
+  bpftool cgroup detach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog

diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
index 0566b7f..af8486f 100644
--- a/samples/bpf/tcp_bufs_kern.c
+++ b/samples/bpf/tcp_bufs_kern.c

@@ -9,7 +9,7 @@
  * doing appropriate checks that indicate the hosts are far enough
  * away (i.e. large RTT).
  *
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
  */
 
 #include <uapi/linux/bpf.h>
@@ -22,13 +22,6 @@
 
 #define DEBUG 1
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sockops")
 int bpf_bufs(struct bpf_sock_ops *skops)
 {

diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c
index f4225c9..26c0fd0 100644
--- a/samples/bpf/tcp_clamp_kern.c
+++ b/samples/bpf/tcp_clamp_kern.c

@@ -9,7 +9,7 @@
  * the same datacenter. For his example, we assume they are within the same
  * datacenter when the first 5.5 bytes of their IPv6 addresses are the same.
  *
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
  */
 
 #include <uapi/linux/bpf.h>
@@ -22,13 +22,6 @@
 
 #define DEBUG 1
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sockops")
 int bpf_clamp(struct bpf_sock_ops *skops)
 {

diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
index ad0f1ba..6d4dc4c 100644
--- a/samples/bpf/tcp_cong_kern.c
+++ b/samples/bpf/tcp_cong_kern.c

@@ -7,7 +7,7 @@
  * BPF program to set congestion control to dctcp when both hosts are
  * in the same datacenter (as deteremined by IPv6 prefix).
  *
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
  */
 
 #include <uapi/linux/bpf.h>
@@ -21,13 +21,6 @@
 
 #define DEBUG 1
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sockops")
 int bpf_cong(struct bpf_sock_ops *skops)
 {

diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
new file mode 100644
index 0000000..8557913
--- /dev/null
+++ b/samples/bpf/tcp_dumpstats_kern.c

@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Refer to samples/bpf/tcp_bpf.readme for the instructions on
+ * how to run this sample program.
+ */
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define INTERVAL			1000000000ULL
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__u32 type;
+	__u32 map_flags;
+	int *key;
+	__u64 *value;
+} bpf_next_dump SEC(".maps") = {
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+	struct bpf_tcp_sock *tcp_sk;
+	struct bpf_sock *sk;
+	__u64 *next_dump;
+	__u64 now;
+
+	switch (ctx->op) {
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
+		return 1;
+	case BPF_SOCK_OPS_RTT_CB:
+		break;
+	default:
+		return 1;
+	}
+
+	sk = ctx->sk;
+	if (!sk)
+		return 1;
+
+	next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0,
+				       BPF_SK_STORAGE_GET_F_CREATE);
+	if (!next_dump)
+		return 1;
+
+	now = bpf_ktime_get_ns();
+	if (now < *next_dump)
+		return 1;
+
+	tcp_sk = bpf_tcp_sock(sk);
+	if (!tcp_sk)
+		return 1;
+
+	*next_dump = now + INTERVAL;
+
+	bpf_printk("dsack_dups=%u delivered=%u\n",
+		   tcp_sk->dsack_dups, tcp_sk->delivered);
+	bpf_printk("delivered_ce=%u icsk_retransmits=%u\n",
+		   tcp_sk->delivered_ce, tcp_sk->icsk_retransmits);
+
+	return 1;
+}

diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c
index 4ca5ecc..da61d53 100644
--- a/samples/bpf/tcp_iw_kern.c
+++ b/samples/bpf/tcp_iw_kern.c

@@ -9,7 +9,7 @@
  * would usually be done after doing appropriate checks that indicate
  * the hosts are far enough away (i.e. large RTT).
  *
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
  */
 
 #include <uapi/linux/bpf.h>
@@ -22,13 +22,6 @@
 
 #define DEBUG 1
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sockops")
 int bpf_iw(struct bpf_sock_ops *skops)
 {

diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
index 09ff65b..d011e38 100644
--- a/samples/bpf/tcp_rwnd_kern.c
+++ b/samples/bpf/tcp_rwnd_kern.c

@@ -8,7 +8,7 @@
  * and the first 5.5 bytes of the IPv6 addresses are not the same (in this
  * example that means both hosts are not the same datacenter).
  *
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
  */
 
 #include <uapi/linux/bpf.h>
@@ -21,13 +21,6 @@
 
 #define DEBUG 1
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sockops")
 int bpf_rwnd(struct bpf_sock_ops *skops)
 {

diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
index 232bb24..720d195 100644
--- a/samples/bpf/tcp_synrto_kern.c
+++ b/samples/bpf/tcp_synrto_kern.c

@@ -8,7 +8,7 @@
  * and the first 5.5 bytes of the IPv6 addresses are the same (in this example
  * that means both hosts are in the same datacenter).
  *
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
  */
 
 #include <uapi/linux/bpf.h>
@@ -21,13 +21,6 @@
 
 #define DEBUG 1
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sockops")
 int bpf_synrto(struct bpf_sock_ops *skops)
 {

diff --git a/samples/bpf/tcp_tos_reflect_kern.c b/samples/bpf/tcp_tos_reflect_kern.c
new file mode 100644
index 0000000..369faca
--- /dev/null
+++ b/samples/bpf/tcp_tos_reflect_kern.c

@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Facebook
+ *
+ * BPF program to automatically reflect TOS option from received syn packet
+ *
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+SEC("sockops")
+int bpf_basertt(struct bpf_sock_ops *skops)
+{
+	char header[sizeof(struct ipv6hdr)];
+	struct ipv6hdr *hdr6;
+	struct iphdr *hdr;
+	int hdr_size = 0;
+	int save_syn = 1;
+	int tos = 0;
+	int rv = 0;
+	int op;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("BPF command: %d\n", op);
+#endif
+	switch (op) {
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+		rv = bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+				   &save_syn, sizeof(save_syn));
+		break;
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		if (skops->family == AF_INET)
+			hdr_size = sizeof(struct iphdr);
+		else
+			hdr_size = sizeof(struct ipv6hdr);
+		rv = bpf_getsockopt(skops, SOL_TCP, TCP_SAVED_SYN,
+				    header, hdr_size);
+		if (!rv) {
+			if (skops->family == AF_INET) {
+				hdr = (struct iphdr *) header;
+				tos = hdr->tos;
+				if (tos != 0)
+					bpf_setsockopt(skops, SOL_IP, IP_TOS,
+						       &tos, sizeof(tos));
+			} else {
+				hdr6 = (struct ipv6hdr *) header;
+				tos = ((hdr6->priority) << 4 |
+				       (hdr6->flow_lbl[0]) >>  4);
+				if (tos)
+					bpf_setsockopt(skops, SOL_IPV6,
+						       IPV6_TCLASS,
+						       &tos, sizeof(tos));
+			}
+			rv = 0;
+		}
+		break;
+	default:
+		rv = -1;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";

diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c
index 2421842..6d564aa 100644
--- a/samples/bpf/test_cgrp2_array_pin.c
+++ b/samples/bpf/test_cgrp2_array_pin.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <linux/unistd.h>
 #include <linux/bpf.h>

diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
deleted file mode 100644
index 180f9d8..0000000
--- a/samples/bpf/test_cgrp2_attach2.c
+++ /dev/null

@@ -1,442 +0,0 @@
-/* eBPF example program:
- *
- * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
- *
- * - Loads eBPF program
- *
- *   The eBPF program accesses the map passed in to store two pieces of
- *   information. The number of invocations of the program, which maps
- *   to the number of packets received, is stored to key 0. Key 1 is
- *   incremented on each iteration by the number of bytes stored in
- *   the skb. The program also stores the number of received bytes
- *   in the cgroup storage.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- *
- * - Every second, reads map[0] and map[1] to see how many bytes and
- *   packets were seen on any socket of tasks in the given cgroup.
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <unistd.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-
-#include "bpf_insn.h"
-#include "bpf_rlimit.h"
-#include "cgroup_helpers.h"
-
-#define FOO		"/foo"
-#define BAR		"/foo/bar/"
-#define PING_CMD	"ping -c1 -w1 127.0.0.1 > /dev/null"
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-static int prog_load(int verdict)
-{
-	int ret;
-	struct bpf_insn prog[] = {
-		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
-		BPF_EXIT_INSN(),
-	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
-	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
-			       prog, insns_cnt, "GPL", 0,
-			       bpf_log_buf, BPF_LOG_BUF_SIZE);
-
-	if (ret < 0) {
-		log_err("Loading program");
-		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
-		return 0;
-	}
-	return ret;
-}
-
-static int test_foo_bar(void)
-{
-	int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
-
-	allow_prog = prog_load(1);
-	if (!allow_prog)
-		goto err;
-
-	drop_prog = prog_load(0);
-	if (!drop_prog)
-		goto err;
-
-	if (setup_cgroup_environment())
-		goto err;
-
-	/* Create cgroup /foo, get fd, and join it */
-	foo = create_and_get_cgroup(FOO);
-	if (!foo)
-		goto err;
-
-	if (join_cgroup(FOO))
-		goto err;
-
-	if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to /foo");
-		goto err;
-	}
-
-	printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
-	assert(system(PING_CMD) != 0);
-
-	/* Create cgroup /foo/bar, get fd, and join it */
-	bar = create_and_get_cgroup(BAR);
-	if (!bar)
-		goto err;
-
-	if (join_cgroup(BAR))
-		goto err;
-
-	printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
-	assert(system(PING_CMD) != 0);
-
-	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to /foo/bar");
-		goto err;
-	}
-
-	printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
-	assert(system(PING_CMD) == 0);
-
-	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching program from /foo/bar");
-		goto err;
-	}
-
-	printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
-	       "This ping in cgroup /foo/bar should fail...\n");
-	assert(system(PING_CMD) != 0);
-
-	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to /foo/bar");
-		goto err;
-	}
-
-	if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching program from /foo");
-		goto err;
-	}
-
-	printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
-	       "This ping in cgroup /foo/bar should pass...\n");
-	assert(system(PING_CMD) == 0);
-
-	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to /foo/bar");
-		goto err;
-	}
-
-	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
-		errno = 0;
-		log_err("Unexpected success attaching prog to /foo/bar");
-		goto err;
-	}
-
-	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching program from /foo/bar");
-		goto err;
-	}
-
-	if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
-		errno = 0;
-		log_err("Unexpected success in double detach from /foo");
-		goto err;
-	}
-
-	if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
-		log_err("Attaching non-overridable prog to /foo");
-		goto err;
-	}
-
-	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
-		errno = 0;
-		log_err("Unexpected success attaching non-overridable prog to /foo/bar");
-		goto err;
-	}
-
-	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
-			     BPF_F_ALLOW_OVERRIDE)) {
-		errno = 0;
-		log_err("Unexpected success attaching overridable prog to /foo/bar");
-		goto err;
-	}
-
-	if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
-			     BPF_F_ALLOW_OVERRIDE)) {
-		errno = 0;
-		log_err("Unexpected success attaching overridable prog to /foo");
-		goto err;
-	}
-
-	if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
-		log_err("Attaching different non-overridable prog to /foo");
-		goto err;
-	}
-
-	goto out;
-
-err:
-	rc = 1;
-
-out:
-	close(foo);
-	close(bar);
-	cleanup_cgroup_environment();
-	if (!rc)
-		printf("### override:PASS\n");
-	else
-		printf("### override:FAIL\n");
-	return rc;
-}
-
-static int map_fd = -1;
-
-static int prog_load_cnt(int verdict, int val)
-{
-	int cgroup_storage_fd;
-
-	if (map_fd < 0)
-		map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
-	if (map_fd < 0) {
-		printf("failed to create map '%s'\n", strerror(errno));
-		return -1;
-	}
-
-	cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
-				sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
-	if (cgroup_storage_fd < 0) {
-		printf("failed to create map '%s'\n", strerror(errno));
-		return -1;
-	}
-
-	struct bpf_insn prog[] = {
-		BPF_MOV32_IMM(BPF_REG_0, 0),
-		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
-		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
-		BPF_LD_MAP_FD(BPF_REG_1, map_fd),
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
-		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
-		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
-		BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
-		BPF_MOV64_IMM(BPF_REG_2, 0),
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
-		BPF_MOV64_IMM(BPF_REG_1, val),
-		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
-		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
-		BPF_EXIT_INSN(),
-	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-	int ret;
-
-	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
-			       prog, insns_cnt, "GPL", 0,
-			       bpf_log_buf, BPF_LOG_BUF_SIZE);
-
-	if (ret < 0) {
-		log_err("Loading program");
-		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
-		return 0;
-	}
-	close(cgroup_storage_fd);
-	return ret;
-}
-
-
-static int test_multiprog(void)
-{
-	__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
-	int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
-	int drop_prog, allow_prog[6] = {}, rc = 0;
-	unsigned long long value;
-	int i = 0;
-
-	for (i = 0; i < 6; i++) {
-		allow_prog[i] = prog_load_cnt(1, 1 << i);
-		if (!allow_prog[i])
-			goto err;
-	}
-	drop_prog = prog_load_cnt(0, 1);
-	if (!drop_prog)
-		goto err;
-
-	if (setup_cgroup_environment())
-		goto err;
-
-	cg1 = create_and_get_cgroup("/cg1");
-	if (!cg1)
-		goto err;
-	cg2 = create_and_get_cgroup("/cg1/cg2");
-	if (!cg2)
-		goto err;
-	cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
-	if (!cg3)
-		goto err;
-	cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
-	if (!cg4)
-		goto err;
-	cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
-	if (!cg5)
-		goto err;
-
-	if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
-		goto err;
-
-	if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_MULTI)) {
-		log_err("Attaching prog to cg1");
-		goto err;
-	}
-	if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
-			     BPF_F_ALLOW_MULTI)) {
-		log_err("Unexpected success attaching the same prog to cg1");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_MULTI)) {
-		log_err("Attaching prog2 to cg1");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to cg2");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_MULTI)) {
-		log_err("Attaching prog to cg3");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to cg4");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) {
-		log_err("Attaching prog to cg5");
-		goto err;
-	}
-	assert(system(PING_CMD) == 0);
-	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 8 + 32);
-
-	/* query the number of effective progs in cg5 */
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
-			      NULL, NULL, &prog_cnt) == 0);
-	assert(prog_cnt == 4);
-	/* retrieve prog_ids of effective progs in cg5 */
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
-			      &attach_flags, prog_ids, &prog_cnt) == 0);
-	assert(prog_cnt == 4);
-	assert(attach_flags == 0);
-	saved_prog_id = prog_ids[0];
-	/* check enospc handling */
-	prog_ids[0] = 0;
-	prog_cnt = 2;
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
-			      &attach_flags, prog_ids, &prog_cnt) == -1 &&
-	       errno == ENOSPC);
-	assert(prog_cnt == 4);
-	/* check that prog_ids are returned even when buffer is too small */
-	assert(prog_ids[0] == saved_prog_id);
-	/* retrieve prog_id of single attached prog in cg5 */
-	prog_ids[0] = 0;
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
-			      NULL, prog_ids, &prog_cnt) == 0);
-	assert(prog_cnt == 1);
-	assert(prog_ids[0] == saved_prog_id);
-
-	/* detach bottom program and ping again */
-	if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching prog from cg5");
-		goto err;
-	}
-	value = 0;
-	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
-	assert(system(PING_CMD) == 0);
-	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 8 + 16);
-
-	/* detach 3rd from bottom program and ping again */
-	errno = 0;
-	if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Unexpected success on detach from cg3");
-		goto err;
-	}
-	if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching from cg3");
-		goto err;
-	}
-	value = 0;
-	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
-	assert(system(PING_CMD) == 0);
-	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 16);
-
-	/* detach 2nd from bottom program and ping again */
-	if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching prog from cg4");
-		goto err;
-	}
-	value = 0;
-	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
-	assert(system(PING_CMD) == 0);
-	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 4);
-
-	prog_cnt = 4;
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
-			      &attach_flags, prog_ids, &prog_cnt) == 0);
-	assert(prog_cnt == 3);
-	assert(attach_flags == 0);
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
-			      NULL, prog_ids, &prog_cnt) == 0);
-	assert(prog_cnt == 0);
-	goto out;
-err:
-	rc = 1;
-
-out:
-	for (i = 0; i < 6; i++)
-		if (allow_prog[i] > 0)
-			close(allow_prog[i]);
-	close(cg1);
-	close(cg2);
-	close(cg3);
-	close(cg4);
-	close(cg5);
-	cleanup_cgroup_environment();
-	if (!rc)
-		printf("### multi:PASS\n");
-	else
-		printf("### multi:FAIL\n");
-	return rc;
-}
-
-int main(int argc, char **argv)
-{
-	int rc = 0;
-
-	rc = test_foo_bar();
-	if (rc)
-		return rc;
-
-	return test_multiprog();
-}

diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
index 4be4874..06e9f8c 100644
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ b/samples/bpf/test_current_task_under_cgroup_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 
 #define _GNU_SOURCE
@@ -11,7 +8,6 @@
 #include <unistd.h>
 #include <bpf/bpf.h>
 #include "bpf_load.h"
-#include <linux/bpf.h>
 #include "cgroup_helpers.h"
 
 #define CGROUP_PATH		"/my-cgroup"
@@ -33,7 +29,7 @@
 
 	cg2 = create_and_get_cgroup(CGROUP_PATH);
 
-	if (!cg2)
+	if (cg2 < 0)
 		goto err;
 
 	if (bpf_map_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {

diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c
index eec3e25..b313dba 100644
--- a/samples/bpf/test_lru_dist.c
+++ b/samples/bpf/test_lru_dist.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #define _GNU_SOURCE
 #include <linux/types.h>

diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index e308858..eb29bcb 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <sys/resource.h>
 #include <sys/socket.h>

diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
index 9d6dcaa..94f7411 100644
--- a/samples/bpf/test_overhead_user.c
+++ b/samples/bpf/test_overhead_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #define _GNU_SOURCE
 #include <sched.h>

diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index d08046a..16a16ea 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <stdio.h>
 #include <unistd.h>
@@ -34,6 +31,11 @@
 	if (!addr)
 		return;
 	sym = ksym_search(addr);
+	if (!sym) {
+		printf("ksym not found. Is kallsyms loaded?\n");
+		return;
+	}
+
 	printf("%s;", sym->name);
 	if (!strcmp(sym->name, "sys_read"))
 		sys_read_seen = true;

diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index 4837d73..8ee4769 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c

@@ -1,7 +1,4 @@
-/* This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
@@ -21,9 +18,6 @@
 #include <libbpf.h>
 #include "bpf_load.h"
 #include "perf-sys.h"
-#include "trace_helpers.h"
-
-static int pmu_fd;
 
 static __u64 time_get_ns(void)
 {
@@ -34,12 +28,12 @@
 }
 
 static __u64 start_time;
+static __u64 cnt;
 
 #define MAX_CNT 100000ll
 
-static int print_bpf_output(void *data, int size)
+static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
 {
-	static __u64 cnt;
 	struct {
 		__u64 pid;
 		__u64 cookie;
@@ -48,7 +42,7 @@
 	if (e->cookie != 0x12345678) {
 		printf("BUG pid %llx cookie %llx sized %d\n",
 		       e->pid, e->cookie, size);
-		return LIBBPF_PERF_EVENT_ERROR;
+		return;
 	}
 
 	cnt++;
@@ -56,30 +50,14 @@
 	if (cnt == MAX_CNT) {
 		printf("recv %lld events per sec\n",
 		       MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
-		return LIBBPF_PERF_EVENT_DONE;
+		return;
 	}
-
-	return LIBBPF_PERF_EVENT_CONT;
-}
-
-static void test_bpf_perf_event(void)
-{
-	struct perf_event_attr attr = {
-		.sample_type = PERF_SAMPLE_RAW,
-		.type = PERF_TYPE_SOFTWARE,
-		.config = PERF_COUNT_SW_BPF_OUTPUT,
-	};
-	int key = 0;
-
-	pmu_fd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
-
-	assert(pmu_fd >= 0);
-	assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
-	ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
 }
 
 int main(int argc, char **argv)
 {
+	struct perf_buffer_opts pb_opts = {};
+	struct perf_buffer *pb;
 	char filename[256];
 	FILE *f;
 	int ret;
@@ -91,16 +69,20 @@
 		return 1;
 	}
 
-	test_bpf_perf_event();
-
-	if (perf_event_mmap(pmu_fd) < 0)
+	pb_opts.sample_cb = print_bpf_output;
+	pb = perf_buffer__new(map_fd[0], 8, &pb_opts);
+	ret = libbpf_get_error(pb);
+	if (ret) {
+		printf("failed to setup perf_buffer: %d\n", ret);
 		return 1;
+	}
 
 	f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
 	(void) f;
 
 	start_time = time_get_ns();
-	ret = perf_event_poller(pmu_fd, print_bpf_output);
+	while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
+	}
 	kill(0, SIGINT);
 	return ret;
 }

diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 1a81e6a..c9544a4 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c

@@ -131,7 +131,7 @@
 	signal(SIGTERM, int_exit);
 
 	/* start 'ping' in the background to have some kfree_skb events */
-	f = popen("ping -c5 localhost", "r");
+	f = popen("ping -4 -c5 localhost", "r");
 	(void) f;
 
 	/* start 'dd' in the background to have plenty of 'write' syscalls */

diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index 9974c3d..ea1d4c1 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c

@@ -20,7 +20,7 @@
 /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
  * example will no longer be meaningful
  */
-SEC("kprobe/blk_start_request")
+SEC("kprobe/blk_mq_start_request")
 int bpf_prog1(struct pt_regs *ctx)
 {
 	long rq = PT_REGS_PARM1(ctx);

diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index 6c6b10f..cf8fedc 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <stdio.h>
 #include <stdlib.h>
@@ -17,8 +14,6 @@
 #include "bpf_load.h"
 #include "bpf_util.h"
 
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-
 #define SLOTS 100
 
 static void clear_stats(int fd)

diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index 14625c8..ec52203 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <stdio.h>
 #include <stdlib.h>

diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index f57f4e1..35cb0ee 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c

@@ -68,12 +68,25 @@
 	return 0;
 }
 
-PROG(SYS__NR_mmap)(struct pt_regs *ctx)
+#ifdef __NR_mmap2
+PROG(SYS__NR_mmap2)(struct pt_regs *ctx)
 {
-	char fmt[] = "mmap\n";
+	char fmt[] = "mmap2\n";
+
 	bpf_trace_printk(fmt, sizeof(fmt));
 	return 0;
 }
+#endif
+
+#ifdef __NR_mmap
+PROG(SYS__NR_mmap)(struct pt_regs *ctx)
+{
+	char fmt[] = "mmap\n";
+
+	bpf_trace_printk(fmt, sizeof(fmt));
+	return 0;
+}
+#endif
 
 char _license[] SEC("license") = "GPL";
 u32 _version SEC("version") = LINUX_VERSION_CODE;

diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index b02c531..a8e5fa0 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 PLUMgrid
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <linux/bpf.h>
 #include <linux/if_link.h>
@@ -15,17 +12,30 @@
 #include <unistd.h>
 #include <libgen.h>
 #include <sys/resource.h>
+#include <net/if.h>
 
 #include "bpf_util.h"
-#include "bpf/bpf.h"
-#include "bpf/libbpf.h"
+#include "bpf.h"
+#include "libbpf.h"
 
 static int ifindex;
-static __u32 xdp_flags;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static __u32 prog_id;
 
 static void int_exit(int sig)
 {
-	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	__u32 curr_prog_id = 0;
+
+	if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
+		printf("bpf_get_link_xdp_id failed\n");
+		exit(1);
+	}
+	if (prog_id == curr_prog_id)
+		bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	else if (!curr_prog_id)
+		printf("couldn't find a prog id on a given interface\n");
+	else
+		printf("program on interface changed, not removing\n");
 	exit(0);
 }
 
@@ -34,26 +44,24 @@
 static void poll_stats(int map_fd, int interval)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
-	const unsigned int nr_keys = 256;
-	__u64 values[nr_cpus], prev[nr_keys][nr_cpus];
-	__u32 key;
+	__u64 values[nr_cpus], prev[UINT8_MAX] = { 0 };
 	int i;
 
-	memset(prev, 0, sizeof(prev));
-
 	while (1) {
+		__u32 key = UINT32_MAX;
+
 		sleep(interval);
 
-		for (key = 0; key < nr_keys; key++) {
+		while (bpf_map_get_next_key(map_fd, &key, &key) != -1) {
 			__u64 sum = 0;
 
 			assert(bpf_map_lookup_elem(map_fd, &key, values) == 0);
 			for (i = 0; i < nr_cpus; i++)
-				sum += (values[i] - prev[key][i]);
-			if (sum)
+				sum += values[i];
+			if (sum > prev[key])
 				printf("proto %u: %10llu pkt/s\n",
-				       key, sum / interval);
-			memcpy(prev[key], values, sizeof(values));
+				       key, (sum - prev[key]) / interval);
+			prev[key] = sum;
 		}
 	}
 }
@@ -61,10 +69,11 @@
 static void usage(const char *prog)
 {
 	fprintf(stderr,
-		"usage: %s [OPTS] IFINDEX\n\n"
+		"usage: %s [OPTS] IFACE\n\n"
 		"OPTS:\n"
 		"    -S    use skb-mode\n"
-		"    -N    enforce native mode\n",
+		"    -N    enforce native mode\n"
+		"    -F    force loading prog\n",
 		prog);
 }
 
@@ -74,11 +83,14 @@
 	struct bpf_prog_load_attr prog_load_attr = {
 		.prog_type	= BPF_PROG_TYPE_XDP,
 	};
-	const char *optstr = "SN";
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	const char *optstr = "FSN";
 	int prog_fd, map_fd, opt;
 	struct bpf_object *obj;
 	struct bpf_map *map;
 	char filename[256];
+	int err;
 
 	while ((opt = getopt(argc, argv, optstr)) != -1) {
 		switch (opt) {
@@ -88,6 +100,9 @@
 		case 'N':
 			xdp_flags |= XDP_FLAGS_DRV_MODE;
 			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
 		default:
 			usage(basename(argv[0]));
 			return 1;
@@ -104,7 +119,11 @@
 		return 1;
 	}
 
-	ifindex = strtoul(argv[optind], NULL, 0);
+	ifindex = if_nametoindex(argv[optind]);
+	if (!ifindex) {
+		perror("if_nametoindex");
+		return 1;
+	}
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	prog_load_attr.file = filename;
@@ -132,6 +151,13 @@
 		return 1;
 	}
 
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (err) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return err;
+	}
+	prog_id = info.id;
+
 	poll_stats(map_fd, 2);
 
 	return 0;

diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index 3042ce3..a3596b6 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c

@@ -13,23 +13,37 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <net/if.h>
 #include <sys/resource.h>
 #include <arpa/inet.h>
 #include <netinet/ether.h>
 #include <unistd.h>
 #include <time.h>
-#include "bpf/bpf.h"
-#include "bpf/libbpf.h"
+#include "bpf.h"
+#include "libbpf.h"
 
 #define STATS_INTERVAL_S 2U
 
 static int ifindex = -1;
-static __u32 xdp_flags;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static __u32 prog_id;
 
 static void int_exit(int sig)
 {
-	if (ifindex > -1)
-		bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	__u32 curr_prog_id = 0;
+
+	if (ifindex > -1) {
+		if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
+			exit(1);
+		}
+		if (prog_id == curr_prog_id)
+			bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+		else if (!curr_prog_id)
+			printf("couldn't find a prog id on a given iface\n");
+		else
+			printf("program on interface changed, not removing\n");
+	}
 	exit(0);
 }
 
@@ -56,10 +70,11 @@
 	printf("Start a XDP prog which send ICMP \"packet too big\" \n"
 		"messages if ingress packet is bigger then MAX_SIZE bytes\n");
 	printf("Usage: %s [...]\n", cmd);
-	printf("    -i <ifindex> Interface Index\n");
+	printf("    -i <ifname|ifindex> Interface\n");
 	printf("    -T <stop-after-X-seconds> Default: 0 (forever)\n");
 	printf("    -S use skb-mode\n");
 	printf("    -N enforce native mode\n");
+	printf("    -F force loading prog\n");
 	printf("    -h Display this help\n");
 }
 
@@ -70,12 +85,15 @@
 		.prog_type	= BPF_PROG_TYPE_XDP,
 	};
 	unsigned char opt_flags[256] = {};
+	const char *optstr = "i:T:SNFh";
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
 	unsigned int kill_after_s = 0;
-	const char *optstr = "i:T:SNh";
 	int i, prog_fd, map_fd, opt;
 	struct bpf_object *obj;
 	struct bpf_map *map;
 	char filename[256];
+	int err;
 
 	for (i = 0; i < strlen(optstr); i++)
 		if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
@@ -85,7 +103,9 @@
 
 		switch (opt) {
 		case 'i':
-			ifindex = atoi(optarg);
+			ifindex = if_nametoindex(optarg);
+			if (!ifindex)
+				ifindex = atoi(optarg);
 			break;
 		case 'T':
 			kill_after_s = atoi(optarg);
@@ -96,6 +116,9 @@
 		case 'N':
 			xdp_flags |= XDP_FLAGS_DRV_MODE;
 			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
 		default:
 			usage(argv[0]);
 			return 1;
@@ -116,6 +139,11 @@
 		return 1;
 	}
 
+	if (!ifindex) {
+		fprintf(stderr, "Invalid ifname\n");
+		return 1;
+	}
+
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	prog_load_attr.file = filename;
 
@@ -142,9 +170,15 @@
 		return 1;
 	}
 
-	poll_stats(map_fd, kill_after_s);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (err) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return 1;
+	}
+	prog_id = info.id;
 
-	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	poll_stats(map_fd, kill_after_s);
+	int_exit(0);
 
 	return 0;
 }

diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
index a7e94e7..701a30f 100644
--- a/samples/bpf/xdp_fwd_kern.c
+++ b/samples/bpf/xdp_fwd_kern.c

@@ -23,7 +23,8 @@
 
 #define IPV6_FLOWINFO_MASK              cpu_to_be32(0x0FFFFFFF)
 
-struct bpf_map_def SEC("maps") tx_port = {
+/* For TX-traffic redirect requires net_device ifindex to be in this devmap */
+struct bpf_map_def SEC("maps") xdp_tx_ports = {
 	.type = BPF_MAP_TYPE_DEVMAP,
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
@@ -102,14 +103,34 @@
 	fib_params.ifindex = ctx->ingress_ifindex;
 
 	rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
-
-	/* verify egress index has xdp support
-	 * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
-	 *       cannot pass map_type 14 into func bpf_map_lookup_elem#1:
-	 * NOTE: without verification that egress index supports XDP
-	 *       forwarding packets are dropped.
+	/*
+	 * Some rc (return codes) from bpf_fib_lookup() are important,
+	 * to understand how this XDP-prog interacts with network stack.
+	 *
+	 * BPF_FIB_LKUP_RET_NO_NEIGH:
+	 *  Even if route lookup was a success, then the MAC-addresses are also
+	 *  needed.  This is obtained from arp/neighbour table, but if table is
+	 *  (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned.  To avoid
+	 *  doing ARP lookup directly from XDP, then send packet to normal
+	 *  network stack via XDP_PASS and expect it will do ARP resolution.
+	 *
+	 * BPF_FIB_LKUP_RET_FWD_DISABLED:
+	 *  The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
+	 *  setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
+	 *  enabled this on ingress device.
 	 */
-	if (rc == 0) {
+	if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
+		/* Verify egress index has been configured as TX-port.
+		 * (Note: User can still have inserted an egress ifindex that
+		 * doesn't support XDP xmit, which will result in packet drops).
+		 *
+		 * Note: lookup in devmap supported since 0cdbb4b09a0.
+		 * If not supported will fail with:
+		 *  cannot pass map_type 14 into func bpf_map_lookup_elem#1:
+		 */
+		if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
+			return XDP_PASS;
+
 		if (h_proto == htons(ETH_P_IP))
 			ip_decrease_ttl(iph);
 		else if (h_proto == htons(ETH_P_IPV6))
@@ -117,7 +138,7 @@
 
 		memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
 		memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
-		return bpf_redirect_map(&tx_port, fib_params.ifindex, 0);
+		return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
 	}
 
 	return XDP_PASS;

diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index f88e1d7..97ff1da 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c

@@ -24,17 +24,23 @@
 #include <fcntl.h>
 #include <libgen.h>
 
-#include "bpf/libbpf.h"
+#include "libbpf.h"
 #include <bpf/bpf.h>
 
-
-static int do_attach(int idx, int fd, const char *name)
+static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
 {
 	int err;
 
-	err = bpf_set_link_xdp_fd(idx, fd, 0);
-	if (err < 0)
+	err = bpf_set_link_xdp_fd(idx, prog_fd, 0);
+	if (err < 0) {
 		printf("ERROR: failed to attach program to %s\n", name);
+		return err;
+	}
+
+	/* Adding ifindex as a possible egress TX port */
+	err = bpf_map_update_elem(map_fd, &idx, &idx, 0);
+	if (err)
+		printf("ERROR: failed using device %s as TX-port\n", name);
 
 	return err;
 }
@@ -47,6 +53,9 @@
 	if (err < 0)
 		printf("ERROR: failed to detach program from %s\n", name);
 
+	/* TODO: Remember to cleanup map, when adding use of shared map
+	 *  bpf_map_delete_elem((map_fd, &idx);
+	 */
 	return err;
 }
 
@@ -67,10 +76,10 @@
 	};
 	const char *prog_name = "xdp_fwd";
 	struct bpf_program *prog;
+	int prog_fd, map_fd = -1;
 	char filename[PATH_MAX];
 	struct bpf_object *obj;
 	int opt, i, idx, err;
-	int prog_fd, map_fd;
 	int attach = 1;
 	int ret = 0;
 
@@ -103,8 +112,14 @@
 			return 1;
 		}
 
-		if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		err = bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd);
+		if (err) {
+			printf("Does kernel support devmap lookup?\n");
+			/* If not, the error message will be:
+			 *  "cannot pass map_type 14 into func bpf_map_lookup_elem#1"
+			 */
 			return 1;
+		}
 
 		prog = bpf_object__find_program_by_title(obj, prog_name);
 		prog_fd = bpf_program__fd(prog);
@@ -113,16 +128,12 @@
 			return 1;
 		}
 		map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
-								  "tx_port"));
+							"xdp_tx_ports"));
 		if (map_fd < 0) {
 			printf("map not found: %s\n", strerror(map_fd));
 			return 1;
 		}
 	}
-	if (attach) {
-		for (i = 1; i < 64; ++i)
-			bpf_map_update_elem(map_fd, &i, &i, 0);
-	}
 
 	for (i = optind; i < argc; ++i) {
 		idx = if_nametoindex(argv[i]);
@@ -138,7 +149,7 @@
 			if (err)
 				ret = err;
 		} else {
-			err = do_attach(idx, prog_fd, argv[i]);
+			err = do_attach(idx, prog_fd, map_fd, argv[i]);
 			if (err)
 				ret = err;
 		}

diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 2d23054..0da6e9e 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c

@@ -1,4 +1,5 @@
-/* GPLv2 Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
  */
 static const char *__doc__ =
 	" XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
@@ -24,20 +25,26 @@
 /* How many xdp_progs are defined in _kern.c */
 #define MAX_PROG 6
 
-/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
- * use bpf/libbpf.h), but cannot as (currently) needed for XDP
- * attaching to a device via bpf_set_link_xdp_fd()
- */
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include "libbpf.h"
 
 #include "bpf_util.h"
 
 static int ifindex = -1;
 static char ifname_buf[IF_NAMESIZE];
 static char *ifname;
+static __u32 prog_id;
 
-static __u32 xdp_flags;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int cpu_map_fd;
+static int rx_cnt_map_fd;
+static int redirect_err_cnt_map_fd;
+static int cpumap_enqueue_cnt_map_fd;
+static int cpumap_kthread_cnt_map_fd;
+static int cpus_available_map_fd;
+static int cpus_count_map_fd;
+static int cpus_iterator_map_fd;
+static int exception_cnt_map_fd;
 
 /* Exit return codes */
 #define EXIT_OK		0
@@ -51,27 +58,50 @@
 	{"help",	no_argument,		NULL, 'h' },
 	{"dev",		required_argument,	NULL, 'd' },
 	{"skb-mode",	no_argument,		NULL, 'S' },
-	{"debug",	no_argument,		NULL, 'D' },
 	{"sec",		required_argument,	NULL, 's' },
-	{"prognum",	required_argument,	NULL, 'p' },
+	{"progname",	required_argument,	NULL, 'p' },
 	{"qsize",	required_argument,	NULL, 'q' },
 	{"cpu",		required_argument,	NULL, 'c' },
 	{"stress-mode", no_argument,		NULL, 'x' },
 	{"no-separators", no_argument,		NULL, 'z' },
+	{"force",	no_argument,		NULL, 'F' },
 	{0, 0, NULL,  0 }
 };
 
 static void int_exit(int sig)
 {
-	fprintf(stderr,
-		"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
-		ifindex, ifname);
-	if (ifindex > -1)
-		bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	__u32 curr_prog_id = 0;
+
+	if (ifindex > -1) {
+		if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
+			exit(EXIT_FAIL);
+		}
+		if (prog_id == curr_prog_id) {
+			fprintf(stderr,
+				"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
+				ifindex, ifname);
+			bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+		} else if (!curr_prog_id) {
+			printf("couldn't find a prog id on a given iface\n");
+		} else {
+			printf("program on interface changed, not removing\n");
+		}
+	}
 	exit(EXIT_OK);
 }
 
-static void usage(char *argv[])
+static void print_avail_progs(struct bpf_object *obj)
+{
+	struct bpf_program *pos;
+
+	bpf_object__for_each_program(pos, obj) {
+		if (bpf_program__is_xdp(pos))
+			printf(" %s\n", bpf_program__title(pos, false));
+	}
+}
+
+static void usage(char *argv[], struct bpf_object *obj)
 {
 	int i;
 
@@ -89,6 +119,8 @@
 				long_options[i].val);
 		printf("\n");
 	}
+	printf("\n Programs to be used for --progname:\n");
+	print_avail_progs(obj);
 	printf("\n");
 }
 
@@ -263,7 +295,7 @@
 
 static void stats_print(struct stats_record *stats_rec,
 			struct stats_record *stats_prev,
-			int prog_num)
+			char *prog_name)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
 	double pps = 0, drop = 0, err = 0;
@@ -273,7 +305,7 @@
 	int i;
 
 	/* Header */
-	printf("Running XDP/eBPF prog_num:%d\n", prog_num);
+	printf("Running XDP/eBPF prog_name:%s\n", prog_name);
 	printf("%-15s %-7s %-14s %-11s %-9s\n",
 	       "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
 
@@ -424,20 +456,20 @@
 {
 	int fd, i;
 
-	fd = map_fd[1]; /* map: rx_cnt */
+	fd = rx_cnt_map_fd;
 	map_collect_percpu(fd, 0, &rec->rx_cnt);
 
-	fd = map_fd[2]; /* map: redirect_err_cnt */
+	fd = redirect_err_cnt_map_fd;
 	map_collect_percpu(fd, 1, &rec->redir_err);
 
-	fd = map_fd[3]; /* map: cpumap_enqueue_cnt */
+	fd = cpumap_enqueue_cnt_map_fd;
 	for (i = 0; i < MAX_CPUS; i++)
 		map_collect_percpu(fd, i, &rec->enq[i]);
 
-	fd = map_fd[4]; /* map: cpumap_kthread_cnt */
+	fd = cpumap_kthread_cnt_map_fd;
 	map_collect_percpu(fd, 0, &rec->kthread);
 
-	fd = map_fd[8]; /* map: exception_cnt */
+	fd = exception_cnt_map_fd;
 	map_collect_percpu(fd, 0, &rec->exception);
 }
 
@@ -462,7 +494,7 @@
 	/* Add a CPU entry to cpumap, as this allocate a cpu entry in
 	 * the kernel for the cpu.
 	 */
-	ret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0);
+	ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
 	if (ret) {
 		fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
 		exit(EXIT_FAIL_BPF);
@@ -471,23 +503,22 @@
 	/* Inform bpf_prog's that a new CPU is available to select
 	 * from via some control maps.
 	 */
-	/* map_fd[5] = cpus_available */
-	ret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0);
+	ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
 	if (ret) {
 		fprintf(stderr, "Add to avail CPUs failed\n");
 		exit(EXIT_FAIL_BPF);
 	}
 
 	/* When not replacing/updating existing entry, bump the count */
-	/* map_fd[6] = cpus_count */
-	ret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count);
+	ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
 	if (ret) {
 		fprintf(stderr, "Failed reading curr cpus_count\n");
 		exit(EXIT_FAIL_BPF);
 	}
 	if (new) {
 		curr_cpus_count++;
-		ret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0);
+		ret = bpf_map_update_elem(cpus_count_map_fd, &key,
+					  &curr_cpus_count, 0);
 		if (ret) {
 			fprintf(stderr, "Failed write curr cpus_count\n");
 			exit(EXIT_FAIL_BPF);
@@ -510,8 +541,8 @@
 	int ret, i;
 
 	for (i = 0; i < MAX_CPUS; i++) {
-		/* map_fd[5] = cpus_available */
-		ret = bpf_map_update_elem(map_fd[5], &i, &invalid_cpu, 0);
+		ret = bpf_map_update_elem(cpus_available_map_fd, &i,
+					  &invalid_cpu, 0);
 		if (ret) {
 			fprintf(stderr, "Failed marking CPU unavailable\n");
 			exit(EXIT_FAIL_BPF);
@@ -531,7 +562,7 @@
 	create_cpu_entry(1, 16000, 0, false);
 }
 
-static void stats_poll(int interval, bool use_separators, int prog_num,
+static void stats_poll(int interval, bool use_separators, char *prog_name,
 		       bool stress_mode)
 {
 	struct stats_record *record, *prev;
@@ -547,7 +578,7 @@
 	while (1) {
 		swap(&prev, &record);
 		stats_collect(record);
-		stats_print(record, prev, prog_num);
+		stats_print(record, prev, prog_name);
 		sleep(interval);
 		if (stress_mode)
 			stress_cpumap();
@@ -557,20 +588,55 @@
 	free_stats_record(prev);
 }
 
+static int init_map_fds(struct bpf_object *obj)
+{
+	cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
+	rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
+	redirect_err_cnt_map_fd =
+		bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
+	cpumap_enqueue_cnt_map_fd =
+		bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
+	cpumap_kthread_cnt_map_fd =
+		bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
+	cpus_available_map_fd =
+		bpf_object__find_map_fd_by_name(obj, "cpus_available");
+	cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
+	cpus_iterator_map_fd =
+		bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
+	exception_cnt_map_fd =
+		bpf_object__find_map_fd_by_name(obj, "exception_cnt");
+
+	if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
+	    redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
+	    cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
+	    cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
+	    exception_cnt_map_fd < 0)
+		return -ENOENT;
+
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+	char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	};
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
 	bool use_separators = true;
 	bool stress_mode = false;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
-	bool debug = false;
 	int added_cpus = 0;
 	int longindex = 0;
 	int interval = 2;
-	int prog_num = 5;
 	int add_cpu = -1;
+	int opt, err;
+	int prog_fd;
 	__u32 qsize;
-	int opt;
 
 	/* Notice: choosing he queue size is very important with the
 	 * ixgbe driver, because it's driver page recycling trick is
@@ -581,26 +647,29 @@
 	qsize = 128+64;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	prog_load_attr.file = filename;
 
 	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
 		perror("setrlimit(RLIMIT_MEMLOCK)");
 		return 1;
 	}
 
-	if (load_bpf_file(filename)) {
-		fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		return EXIT_FAIL;
+
+	if (prog_fd < 0) {
+		fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+			strerror(errno));
 		return EXIT_FAIL;
 	}
-
-	if (!prog_fd[0]) {
-		fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
+	if (init_map_fds(obj) < 0) {
+		fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
 		return EXIT_FAIL;
 	}
-
 	mark_cpus_unavailable();
 
 	/* Parse commands line args */
-	while ((opt = getopt_long(argc, argv, "hSd:",
+	while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		case 'd':
@@ -624,9 +693,6 @@
 		case 'S':
 			xdp_flags |= XDP_FLAGS_SKB_MODE;
 			break;
-		case 'D':
-			debug = true;
-			break;
 		case 'x':
 			stress_mode = true;
 			break;
@@ -635,13 +701,7 @@
 			break;
 		case 'p':
 			/* Selecting eBPF prog to load */
-			prog_num = atoi(optarg);
-			if (prog_num < 0 || prog_num >= MAX_PROG) {
-				fprintf(stderr,
-					"--prognum too large err(%d):%s\n",
-					errno, strerror(errno));
-				goto error;
-			}
+			prog_name = optarg;
 			break;
 		case 'c':
 			/* Add multiple CPUs */
@@ -658,24 +718,27 @@
 		case 'q':
 			qsize = atoi(optarg);
 			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
 		case 'h':
 		error:
 		default:
-			usage(argv);
+			usage(argv, obj);
 			return EXIT_FAIL_OPTION;
 		}
 	}
 	/* Required option */
 	if (ifindex == -1) {
 		fprintf(stderr, "ERR: required option --dev missing\n");
-		usage(argv);
+		usage(argv, obj);
 		return EXIT_FAIL_OPTION;
 	}
 	/* Required option */
 	if (add_cpu == -1) {
 		fprintf(stderr, "ERR: required option --cpu missing\n");
 		fprintf(stderr, " Specify multiple --cpu option to add more\n");
-		usage(argv);
+		usage(argv, obj);
 		return EXIT_FAIL_OPTION;
 	}
 
@@ -683,16 +746,30 @@
 	signal(SIGINT, int_exit);
 	signal(SIGTERM, int_exit);
 
-	if (bpf_set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (!prog) {
+		fprintf(stderr, "bpf_object__find_program_by_title failed\n");
+		return EXIT_FAIL;
+	}
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		fprintf(stderr, "bpf_program__fd failed\n");
+		return EXIT_FAIL;
+	}
+
+	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
 		fprintf(stderr, "link set xdp fd failed\n");
 		return EXIT_FAIL_XDP;
 	}
 
-	if (debug) {
-		printf("Debug-mode reading trace pipe (fix #define DEBUG)\n");
-		read_trace_pipe();
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (err) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return err;
 	}
+	prog_id = info.id;
 
-	stats_poll(interval, use_separators, prog_num, stress_mode);
+	stats_poll(interval, use_separators, prog_name, stress_mode);
 	return EXIT_OK;
 }

diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 4445e76..f70ee33 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
  */
 #include <linux/bpf.h>
 #include <linux/if_link.h>
@@ -18,25 +10,53 @@
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
+#include <net/if.h>
 #include <unistd.h>
 #include <libgen.h>
 #include <sys/resource.h>
 
-#include "bpf_load.h"
 #include "bpf_util.h"
 #include <bpf/bpf.h>
+#include "libbpf.h"
 
 static int ifindex_in;
 static int ifindex_out;
 static bool ifindex_out_xdp_dummy_attached = true;
+static __u32 prog_id;
+static __u32 dummy_prog_id;
 
-static __u32 xdp_flags;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int rxcnt_map_fd;
 
 static void int_exit(int sig)
 {
-	bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
-	if (ifindex_out_xdp_dummy_attached)
-		bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+	__u32 curr_prog_id = 0;
+
+	if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
+		printf("bpf_get_link_xdp_id failed\n");
+		exit(1);
+	}
+	if (prog_id == curr_prog_id)
+		bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+	else if (!curr_prog_id)
+		printf("couldn't find a prog id on iface IN\n");
+	else
+		printf("program on iface IN changed, not removing\n");
+
+	if (ifindex_out_xdp_dummy_attached) {
+		curr_prog_id = 0;
+		if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
+					xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
+			exit(1);
+		}
+		if (dummy_prog_id == curr_prog_id)
+			bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+		else if (!curr_prog_id)
+			printf("couldn't find a prog id on iface OUT\n");
+		else
+			printf("program on iface OUT changed, not removing\n");
+	}
 	exit(0);
 }
 
@@ -53,7 +73,7 @@
 		int i;
 
 		sleep(interval);
-		assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+		assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
 		for (i = 0; i < nr_cpus; i++)
 			sum += (values[i] - prev[i]);
 		if (sum)
@@ -66,19 +86,29 @@
 static void usage(const char *prog)
 {
 	fprintf(stderr,
-		"usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+		"usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
 		"OPTS:\n"
 		"    -S    use skb-mode\n"
-		"    -N    enforce native mode\n",
+		"    -N    enforce native mode\n"
+		"    -F    force loading prog\n",
 		prog);
 }
 
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
-	const char *optstr = "SN";
-	char filename[256];
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type	= BPF_PROG_TYPE_XDP,
+	};
+	struct bpf_program *prog, *dummy_prog;
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	int prog_fd, dummy_prog_fd;
+	const char *optstr = "FSN";
+	struct bpf_object *obj;
 	int ret, opt, key = 0;
+	char filename[256];
+	int tx_port_map_fd;
 
 	while ((opt = getopt(argc, argv, optstr)) != -1) {
 		switch (opt) {
@@ -88,6 +118,9 @@
 		case 'N':
 			xdp_flags |= XDP_FLAGS_DRV_MODE;
 			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
 		default:
 			usage(basename(argv[0]));
 			return 1;
@@ -95,7 +128,7 @@
 	}
 
 	if (optind == argc) {
-		printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+		printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
 		return 1;
 	}
 
@@ -104,42 +137,76 @@
 		return 1;
 	}
 
-	ifindex_in = strtoul(argv[optind], NULL, 0);
-	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+	ifindex_in = if_nametoindex(argv[optind]);
+	if (!ifindex_in)
+		ifindex_in = strtoul(argv[optind], NULL, 0);
+
+	ifindex_out = if_nametoindex(argv[optind + 1]);
+	if (!ifindex_out)
+		ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+
 	printf("input: %d output: %d\n", ifindex_in, ifindex_out);
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	prog_load_attr.file = filename;
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		return 1;
+
+	prog = bpf_program__next(NULL, obj);
+	dummy_prog = bpf_program__next(prog, obj);
+	if (!prog || !dummy_prog) {
+		printf("finding a prog in obj file failed\n");
+		return 1;
+	}
+	/* bpf_prog_load_xattr gives us the pointer to first prog's fd,
+	 * so we're missing only the fd for dummy prog
+	 */
+	dummy_prog_fd = bpf_program__fd(dummy_prog);
+	if (prog_fd < 0 || dummy_prog_fd < 0) {
+		printf("bpf_prog_load_xattr: %s\n", strerror(errno));
 		return 1;
 	}
 
-	if (!prog_fd[0]) {
-		printf("load_bpf_file: %s\n", strerror(errno));
+	tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
+	rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
+	if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
+		printf("bpf_object__find_map_fd_by_name failed\n");
 		return 1;
 	}
 
-	if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+	if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
 		printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
 		return 1;
 	}
 
+	ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (ret) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return ret;
+	}
+	prog_id = info.id;
+
 	/* Loading dummy XDP prog on out-device */
-	if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1],
+	if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
 			    (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
 		printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
 		ifindex_out_xdp_dummy_attached = false;
 	}
 
+	memset(&info, 0, sizeof(info));
+	ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
+	if (ret) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return ret;
+	}
+	dummy_prog_id = info.id;
+
 	signal(SIGINT, int_exit);
 	signal(SIGTERM, int_exit);
 
-	printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n",
-		map_fd[0], map_fd[1], map_fd[2]);
-
 	/* populate virtual to physical port map */
-	ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
+	ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
 	if (ret) {
 		perror("bpf_update_elem");
 		goto out;

diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 81a69e3..5440cd6 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
  */
 #include <linux/bpf.h>
 #include <linux/if_link.h>
@@ -18,25 +10,53 @@
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
+#include <net/if.h>
 #include <unistd.h>
 #include <libgen.h>
 #include <sys/resource.h>
 
-#include "bpf_load.h"
 #include "bpf_util.h"
 #include <bpf/bpf.h>
+#include "libbpf.h"
 
 static int ifindex_in;
 static int ifindex_out;
 static bool ifindex_out_xdp_dummy_attached = true;
+static __u32 prog_id;
+static __u32 dummy_prog_id;
 
-static __u32 xdp_flags;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int rxcnt_map_fd;
 
 static void int_exit(int sig)
 {
-	bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
-	if (ifindex_out_xdp_dummy_attached)
-		bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+	__u32 curr_prog_id = 0;
+
+	if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
+		printf("bpf_get_link_xdp_id failed\n");
+		exit(1);
+	}
+	if (prog_id == curr_prog_id)
+		bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+	else if (!curr_prog_id)
+		printf("couldn't find a prog id on iface IN\n");
+	else
+		printf("program on iface IN changed, not removing\n");
+
+	if (ifindex_out_xdp_dummy_attached) {
+		curr_prog_id = 0;
+		if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
+					xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
+			exit(1);
+		}
+		if (dummy_prog_id == curr_prog_id)
+			bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+		else if (!curr_prog_id)
+			printf("couldn't find a prog id on iface OUT\n");
+		else
+			printf("program on iface OUT changed, not removing\n");
+	}
 	exit(0);
 }
 
@@ -53,7 +73,7 @@
 		int i;
 
 		sleep(interval);
-		assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+		assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
 		for (i = 0; i < nr_cpus; i++)
 			sum += (values[i] - prev[i]);
 		if (sum)
@@ -66,10 +86,11 @@
 static void usage(const char *prog)
 {
 	fprintf(stderr,
-		"usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+		"usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
 		"OPTS:\n"
 		"    -S    use skb-mode\n"
-		"    -N    enforce native mode\n",
+		"    -N    enforce native mode\n"
+		"    -F    force loading prog\n",
 		prog);
 }
 
@@ -77,9 +98,18 @@
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
-	const char *optstr = "SN";
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type	= BPF_PROG_TYPE_XDP,
+	};
+	struct bpf_program *prog, *dummy_prog;
+	int prog_fd, tx_port_map_fd, opt;
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	const char *optstr = "FSN";
+	struct bpf_object *obj;
 	char filename[256];
-	int ret, opt, key = 0;
+	int dummy_prog_fd;
+	int ret, key = 0;
 
 	while ((opt = getopt(argc, argv, optstr)) != -1) {
 		switch (opt) {
@@ -89,6 +119,9 @@
 		case 'N':
 			xdp_flags |= XDP_FLAGS_DRV_MODE;
 			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
 		default:
 			usage(basename(argv[0]));
 			return 1;
@@ -96,7 +129,7 @@
 	}
 
 	if (optind == argc) {
-		printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+		printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
 		return 1;
 	}
 
@@ -105,39 +138,76 @@
 		return 1;
 	}
 
-	ifindex_in = strtoul(argv[optind], NULL, 0);
-	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+	ifindex_in = if_nametoindex(argv[optind]);
+	if (!ifindex_in)
+		ifindex_in = strtoul(argv[optind], NULL, 0);
+
+	ifindex_out = if_nametoindex(argv[optind + 1]);
+	if (!ifindex_out)
+		ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+
 	printf("input: %d output: %d\n", ifindex_in, ifindex_out);
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	prog_load_attr.file = filename;
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		return 1;
+
+	prog = bpf_program__next(NULL, obj);
+	dummy_prog = bpf_program__next(prog, obj);
+	if (!prog || !dummy_prog) {
+		printf("finding a prog in obj file failed\n");
+		return 1;
+	}
+	/* bpf_prog_load_xattr gives us the pointer to first prog's fd,
+	 * so we're missing only the fd for dummy prog
+	 */
+	dummy_prog_fd = bpf_program__fd(dummy_prog);
+	if (prog_fd < 0 || dummy_prog_fd < 0) {
+		printf("bpf_prog_load_xattr: %s\n", strerror(errno));
 		return 1;
 	}
 
-	if (!prog_fd[0]) {
-		printf("load_bpf_file: %s\n", strerror(errno));
+	tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
+	rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
+	if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
+		printf("bpf_object__find_map_fd_by_name failed\n");
 		return 1;
 	}
 
-	if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+	if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
 		printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
 		return 1;
 	}
 
+	ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (ret) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return ret;
+	}
+	prog_id = info.id;
+
 	/* Loading dummy XDP prog on out-device */
-	if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1],
+	if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
 			    (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
 		printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
 		ifindex_out_xdp_dummy_attached = false;
 	}
 
+	memset(&info, 0, sizeof(info));
+	ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
+	if (ret) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return ret;
+	}
+	dummy_prog_id = info.id;
+
 	signal(SIGINT, int_exit);
 	signal(SIGTERM, int_exit);
 
 	/* bpf redirect port */
-	ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
+	ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
 	if (ret) {
 		perror("bpf_update_elem");
 		goto out;

diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index b2b4dfa..1469b66 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2017 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 #include <linux/bpf.h>
 #include <linux/netlink.h>
@@ -15,7 +12,6 @@
 #include <string.h>
 #include <sys/socket.h>
 #include <unistd.h>
-#include "bpf_load.h"
 #include <bpf/bpf.h>
 #include <arpa/inet.h>
 #include <fcntl.h>
@@ -25,32 +21,52 @@
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include "bpf_util.h"
+#include "libbpf.h"
+#include <sys/resource.h>
+#include <libgen.h>
 
-int sock, sock_arp, flags = 0;
+int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int total_ifindex;
-int *ifindex_list;
+static int *ifindex_list;
+static __u32 *prog_id_list;
 char buf[8192];
+static int lpm_map_fd;
+static int rxcnt_map_fd;
+static int arp_table_map_fd;
+static int exact_match_map_fd;
+static int tx_port_map_fd;
 
 static int get_route_table(int rtm_family);
 static void int_exit(int sig)
 {
+	__u32 prog_id = 0;
 	int i = 0;
 
-	for (i = 0; i < total_ifindex; i++)
-		bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+	for (i = 0; i < total_ifindex; i++) {
+		if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
+			printf("bpf_get_link_xdp_id on iface %d failed\n",
+			       ifindex_list[i]);
+			exit(1);
+		}
+		if (prog_id_list[i] == prog_id)
+			bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+		else if (!prog_id)
+			printf("couldn't find a prog id on iface %d\n",
+			       ifindex_list[i]);
+		else
+			printf("program on iface %d changed, not removing\n",
+			       ifindex_list[i]);
+		prog_id = 0;
+	}
 	exit(0);
 }
 
 static void close_and_exit(int sig)
 {
-	int i = 0;
-
 	close(sock);
 	close(sock_arp);
 
-	for (i = 0; i < total_ifindex; i++)
-		bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
-	exit(0);
+	int_exit(0);
 }
 
 /* Get the mac address of the interface given interface name */
@@ -179,14 +195,10 @@
 		route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
 		route.iface_name = if_indextoname(route.iface, route.iface_name);
 		route.mac = getmac(route.iface_name);
-		if (route.mac == -1) {
-			int i = 0;
-
-			for (i = 0; i < total_ifindex; i++)
-				bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
-			exit(0);
-		}
-		assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0);
+		if (route.mac == -1)
+			int_exit(0);
+		assert(bpf_map_update_elem(tx_port_map_fd,
+					   &route.iface, &route.iface, 0) == 0);
 		if (rtm_family == AF_INET) {
 			struct trie_value {
 				__u8 prefix[4];
@@ -207,11 +219,16 @@
 			direct_entry.arp.dst = 0;
 			if (route.dst_len == 32) {
 				if (nh->nlmsg_type == RTM_DELROUTE) {
-					assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0);
+					assert(bpf_map_delete_elem(exact_match_map_fd,
+								   &route.dst) == 0);
 				} else {
-					if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0)
+					if (bpf_map_lookup_elem(arp_table_map_fd,
+								&route.dst,
+								&direct_entry.arp.mac) == 0)
 						direct_entry.arp.dst = route.dst;
-					assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0);
+					assert(bpf_map_update_elem(exact_match_map_fd,
+								   &route.dst,
+								   &direct_entry, 0) == 0);
 				}
 			}
 			for (i = 0; i < 4; i++)
@@ -225,7 +242,7 @@
 			       route.gw, route.dst_len,
 			       route.metric,
 			       route.iface_name);
-			if (bpf_map_lookup_elem(map_fd[0], prefix_key,
+			if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
 						prefix_value) < 0) {
 				for (i = 0; i < 4; i++)
 					prefix_value->prefix[i] = prefix_key->data[i];
@@ -234,7 +251,7 @@
 				prefix_value->gw = route.gw;
 				prefix_value->metric = route.metric;
 
-				assert(bpf_map_update_elem(map_fd[0],
+				assert(bpf_map_update_elem(lpm_map_fd,
 							   prefix_key,
 							   prefix_value, 0
 							   ) == 0);
@@ -247,7 +264,7 @@
 					       prefix_key->data[2],
 					       prefix_key->data[3],
 					       prefix_key->prefixlen);
-					assert(bpf_map_delete_elem(map_fd[0],
+					assert(bpf_map_delete_elem(lpm_map_fd,
 								   prefix_key
 								   ) == 0);
 					/* Rereading the route table to check if
@@ -275,8 +292,7 @@
 					prefix_value->ifindex = route.iface;
 					prefix_value->gw = route.gw;
 					prefix_value->metric = route.metric;
-					assert(bpf_map_update_elem(
-								   map_fd[0],
+					assert(bpf_map_update_elem(lpm_map_fd,
 								   prefix_key,
 								   prefix_value,
 								   0) == 0);
@@ -401,7 +417,8 @@
 		arp_entry.mac = atol(mac);
 		printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac);
 		if (ndm_family == AF_INET) {
-			if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst,
+			if (bpf_map_lookup_elem(exact_match_map_fd,
+						&arp_entry.dst,
 						&direct_entry) == 0) {
 				if (nh->nlmsg_type == RTM_DELNEIGH) {
 					direct_entry.arp.dst = 0;
@@ -410,16 +427,17 @@
 					direct_entry.arp.dst = arp_entry.dst;
 					direct_entry.arp.mac = arp_entry.mac;
 				}
-				assert(bpf_map_update_elem(map_fd[3],
+				assert(bpf_map_update_elem(exact_match_map_fd,
 							   &arp_entry.dst,
 							   &direct_entry, 0
 							   ) == 0);
 				memset(&direct_entry, 0, sizeof(direct_entry));
 			}
 			if (nh->nlmsg_type == RTM_DELNEIGH) {
-				assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0);
+				assert(bpf_map_delete_elem(arp_table_map_fd,
+							   &arp_entry.dst) == 0);
 			} else if (nh->nlmsg_type == RTM_NEWNEIGH) {
-				assert(bpf_map_update_elem(map_fd[2],
+				assert(bpf_map_update_elem(arp_table_map_fd,
 							   &arp_entry.dst,
 							   &arp_entry.mac, 0
 							   ) == 0);
@@ -553,7 +571,8 @@
 		for (key = 0; key < nr_keys; key++) {
 			__u64 sum = 0;
 
-			assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+			assert(bpf_map_lookup_elem(rxcnt_map_fd,
+						   &key, values) == 0);
 			for (i = 0; i < nr_cpus; i++)
 				sum += (values[i] - prev[key][i]);
 			if (sum)
@@ -594,36 +613,87 @@
 	return ret;
 }
 
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"%s: %s [OPTS] interface name list\n\n"
+		"OPTS:\n"
+		"    -S    use skb-mode\n"
+		"    -F    force loading prog\n",
+		__func__, prog);
+}
+
 int main(int ac, char **argv)
 {
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type	= BPF_PROG_TYPE_XDP,
+	};
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	const char *optstr = "SF";
+	struct bpf_object *obj;
 	char filename[256];
 	char **ifname_list;
-	int i = 1;
+	int prog_fd, opt;
+	int err, i = 1;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	if (ac < 2) {
-		printf("usage: %s [-S] Interface name list\n", argv[0]);
+	prog_load_attr.file = filename;
+
+	total_ifindex = ac - 1;
+	ifname_list = (argv + 1);
+
+	while ((opt = getopt(ac, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'S':
+			flags |= XDP_FLAGS_SKB_MODE;
+			total_ifindex--;
+			ifname_list++;
+			break;
+		case 'F':
+			flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			total_ifindex--;
+			ifname_list++;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (optind == ac) {
+		usage(basename(argv[0]));
 		return 1;
 	}
-	if (!strcmp(argv[1], "-S")) {
-		flags = XDP_FLAGS_SKB_MODE;
-		total_ifindex = ac - 2;
-		ifname_list = (argv + 2);
-	} else {
-		flags = 0;
-		total_ifindex = ac - 1;
-		ifname_list = (argv + 1);
-	}
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
 		return 1;
 	}
+
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		return 1;
+
 	printf("\n**************loading bpf file*********************\n\n\n");
-	if (!prog_fd[0]) {
-		printf("load_bpf_file: %s\n", strerror(errno));
+	if (!prog_fd) {
+		printf("bpf_prog_load_xattr: %s\n", strerror(errno));
 		return 1;
 	}
-	ifindex_list = (int *)malloc(total_ifindex * sizeof(int *));
+
+	lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map");
+	rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
+	arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table");
+	exact_match_map_fd = bpf_object__find_map_fd_by_name(obj,
+							     "exact_match");
+	tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
+	if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 ||
+	    exact_match_map_fd < 0 || tx_port_map_fd < 0) {
+		printf("bpf_object__find_map_fd_by_name failed\n");
+		return 1;
+	}
+
+	ifindex_list = (int *)calloc(total_ifindex, sizeof(int *));
 	for (i = 0; i < total_ifindex; i++) {
 		ifindex_list[i] = if_nametoindex(ifname_list[i]);
 		if (!ifindex_list[i]) {
@@ -632,8 +702,9 @@
 			return 1;
 		}
 	}
+	prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
 	for (i = 0; i < total_ifindex; i++) {
-		if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
+		if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
 			printf("link set xdp fd failed\n");
 			int recovery_index = i;
 
@@ -642,6 +713,13 @@
 
 			return 1;
 		}
+		err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+		if (err) {
+			printf("can't get prog info - %s\n", strerror(errno));
+			return err;
+		}
+		prog_id_list[i] = info.id;
+		memset(&info, 0, sizeof(info));
 		printf("Attached to %d\n", ifindex_list[i]);
 	}
 	signal(SIGINT, int_exit);

diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index ef26f88..c7e4e45 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c

@@ -22,15 +22,16 @@
 #include <arpa/inet.h>
 #include <linux/if_link.h>
 
-#include "bpf/bpf.h"
-#include "bpf/libbpf.h"
+#include "bpf.h"
+#include "libbpf.h"
 #include "bpf_util.h"
 
 static int ifindex = -1;
 static char ifname_buf[IF_NAMESIZE];
 static char *ifname;
+static __u32 prog_id;
 
-static __u32 xdp_flags;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 
 static struct bpf_map *stats_global_map;
 static struct bpf_map *rx_queue_index_map;
@@ -52,16 +53,30 @@
 	{"action",	required_argument,	NULL, 'a' },
 	{"readmem", 	no_argument,		NULL, 'r' },
 	{"swapmac", 	no_argument,		NULL, 'm' },
+	{"force",	no_argument,		NULL, 'F' },
 	{0, 0, NULL,  0 }
 };
 
 static void int_exit(int sig)
 {
-	fprintf(stderr,
-		"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
-		ifindex, ifname);
-	if (ifindex > -1)
-		bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	__u32 curr_prog_id = 0;
+
+	if (ifindex > -1) {
+		if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
+			exit(EXIT_FAIL);
+		}
+		if (prog_id == curr_prog_id) {
+			fprintf(stderr,
+				"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
+				ifindex, ifname);
+			bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+		} else if (!curr_prog_id) {
+			printf("couldn't find a prog id on a given iface\n");
+		} else {
+			printf("program on interface changed, not removing\n");
+		}
+	}
 	exit(EXIT_OK);
 }
 
@@ -446,6 +461,8 @@
 	struct bpf_prog_load_attr prog_load_attr = {
 		.prog_type	= BPF_PROG_TYPE_XDP,
 	};
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
 	int prog_fd, map_fd, opt, err;
 	bool use_separators = true;
 	struct config cfg = { 0 };
@@ -487,7 +504,7 @@
 	}
 
 	/* Parse commands line args */
-	while ((opt = getopt_long(argc, argv, "hSd:",
+	while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		case 'd':
@@ -524,6 +541,9 @@
 		case 'm':
 			cfg_options |= SWAP_MAC;
 			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
 		case 'h':
 		error:
 		default:
@@ -576,6 +596,13 @@
 		return EXIT_FAIL_XDP;
 	}
 
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (err) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return err;
+	}
+	prog_id = info.id;
+
 	stats_poll(interval, action, cfg_options);
 	return EXIT_OK;
 }

diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
index f7ca8b8..6c7c7e0 100644
--- a/samples/bpf/xdp_sample_pkts_kern.c
+++ b/samples/bpf/xdp_sample_pkts_kern.c

@@ -7,13 +7,6 @@
 #define SAMPLE_SIZE 64ul
 #define MAX_CPUS 128
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 struct bpf_map_def SEC("maps") my_map = {
 	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 	.key_size = sizeof(int),

diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index 8dd87c1..3002714 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c

@@ -12,40 +12,67 @@
 #include <signal.h>
 #include <libbpf.h>
 #include <bpf/bpf.h>
+#include <sys/resource.h>
+#include <libgen.h>
+#include <linux/if_link.h>
 
 #include "perf-sys.h"
-#include "trace_helpers.h"
 
 #define MAX_CPUS 128
-static int pmu_fds[MAX_CPUS], if_idx;
-static struct perf_event_mmap_page *headers[MAX_CPUS];
+static int if_idx;
 static char *if_name;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static __u32 prog_id;
+static struct perf_buffer *pb = NULL;
 
 static int do_attach(int idx, int fd, const char *name)
 {
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
 	int err;
 
-	err = bpf_set_link_xdp_fd(idx, fd, 0);
-	if (err < 0)
+	err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
+	if (err < 0) {
 		printf("ERROR: failed to attach program to %s\n", name);
+		return err;
+	}
+
+	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+	if (err) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return err;
+	}
+	prog_id = info.id;
 
 	return err;
 }
 
 static int do_detach(int idx, const char *name)
 {
-	int err;
+	__u32 curr_prog_id = 0;
+	int err = 0;
 
-	err = bpf_set_link_xdp_fd(idx, -1, 0);
-	if (err < 0)
-		printf("ERROR: failed to detach program from %s\n", name);
+	err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0);
+	if (err) {
+		printf("bpf_get_link_xdp_id failed\n");
+		return err;
+	}
+	if (prog_id == curr_prog_id) {
+		err = bpf_set_link_xdp_fd(idx, -1, 0);
+		if (err < 0)
+			printf("ERROR: failed to detach prog from %s\n", name);
+	} else if (!curr_prog_id) {
+		printf("couldn't find a prog id on a %s\n", name);
+	} else {
+		printf("program on interface changed, not removing\n");
+	}
 
 	return err;
 }
 
 #define SAMPLE_SIZE 64
 
-static int print_bpf_output(void *data, int size)
+static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
 {
 	struct {
 		__u16 cookie;
@@ -55,68 +82,66 @@
 	int i;
 
 	if (e->cookie != 0xdead) {
-		printf("BUG cookie %x sized %d\n",
-		       e->cookie, size);
-		return LIBBPF_PERF_EVENT_ERROR;
+		printf("BUG cookie %x sized %d\n", e->cookie, size);
+		return;
 	}
 
 	printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
 	for (i = 0; i < 14 && i < e->pkt_len; i++)
 		printf("%02x ", e->pkt_data[i]);
 	printf("\n");
-
-	return LIBBPF_PERF_EVENT_CONT;
-}
-
-static void test_bpf_perf_event(int map_fd, int num)
-{
-	struct perf_event_attr attr = {
-		.sample_type = PERF_SAMPLE_RAW,
-		.type = PERF_TYPE_SOFTWARE,
-		.config = PERF_COUNT_SW_BPF_OUTPUT,
-		.wakeup_events = 1, /* get an fd notification for every event */
-	};
-	int i;
-
-	for (i = 0; i < num; i++) {
-		int key = i;
-
-		pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/,
-						 -1/*group_fd*/, 0);
-
-		assert(pmu_fds[i] >= 0);
-		assert(bpf_map_update_elem(map_fd, &key,
-					   &pmu_fds[i], BPF_ANY) == 0);
-		ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0);
-	}
 }
 
 static void sig_handler(int signo)
 {
 	do_detach(if_idx, if_name);
+	perf_buffer__free(pb);
 	exit(0);
 }
 
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"%s: %s [OPTS] <ifname|ifindex>\n\n"
+		"OPTS:\n"
+		"    -F    force loading prog\n",
+		__func__, prog);
+}
+
 int main(int argc, char **argv)
 {
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	struct bpf_prog_load_attr prog_load_attr = {
 		.prog_type	= BPF_PROG_TYPE_XDP,
 	};
+	struct perf_buffer_opts pb_opts = {};
+	const char *optstr = "F";
+	int prog_fd, map_fd, opt;
 	struct bpf_object *obj;
 	struct bpf_map *map;
-	int prog_fd, map_fd;
 	char filename[256];
-	int ret, err, i;
-	int numcpus;
+	int ret, err;
 
-	if (argc < 2) {
-		printf("Usage: %s <ifname>\n", argv[0]);
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (optind == argc) {
+		usage(basename(argv[0]));
 		return 1;
 	}
 
-	numcpus = get_nprocs();
-	if (numcpus > MAX_CPUS)
-		numcpus = MAX_CPUS;
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	prog_load_attr.file = filename;
@@ -136,16 +161,16 @@
 	}
 	map_fd = bpf_map__fd(map);
 
-	if_idx = if_nametoindex(argv[1]);
+	if_idx = if_nametoindex(argv[optind]);
 	if (!if_idx)
-		if_idx = strtoul(argv[1], NULL, 0);
+		if_idx = strtoul(argv[optind], NULL, 0);
 
 	if (!if_idx) {
 		fprintf(stderr, "Invalid ifname\n");
 		return 1;
 	}
-	if_name = argv[1];
-	err = do_attach(if_idx, prog_fd, argv[1]);
+	if_name = argv[optind];
+	err = do_attach(if_idx, prog_fd, if_name);
 	if (err)
 		return err;
 
@@ -156,14 +181,17 @@
 		return 1;
 	}
 
-	test_bpf_perf_event(map_fd, numcpus);
+	pb_opts.sample_cb = print_bpf_output;
+	pb = perf_buffer__new(map_fd, 8, &pb_opts);
+	err = libbpf_get_error(pb);
+	if (err) {
+		perror("perf_buffer setup failed");
+		return 1;
+	}
 
-	for (i = 0; i < numcpus; i++)
-		if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0)
-			return 1;
+	while ((ret = perf_buffer__poll(pb, 1000)) >= 0) {
+	}
 
-	ret = perf_event_poller_multi(pmu_fds, headers, numcpus,
-				      print_bpf_output);
 	kill(0, SIGINT);
 	return ret;
 }

diff --git a/samples/bpf/xdp_tx_iptunnel_common.h b/samples/bpf/xdp_tx_iptunnel_common.h
index dd12cc3..be83989 100644
--- a/samples/bpf/xdp_tx_iptunnel_common.h
+++ b/samples/bpf/xdp_tx_iptunnel_common.h

@@ -1,8 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
 #define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H

diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index a4ccc33..dfb6858 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c

@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
  */
 #include <linux/bpf.h>
 #include <linux/if_link.h>
@@ -12,12 +9,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <net/if.h>
 #include <sys/resource.h>
 #include <arpa/inet.h>
 #include <netinet/ether.h>
 #include <unistd.h>
 #include <time.h>
-#include "bpf_load.h"
+#include "libbpf.h"
 #include <bpf/bpf.h>
 #include "bpf_util.h"
 #include "xdp_tx_iptunnel_common.h"
@@ -25,12 +23,26 @@
 #define STATS_INTERVAL_S 2U
 
 static int ifindex = -1;
-static __u32 xdp_flags = 0;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int rxcnt_map_fd;
+static __u32 prog_id;
 
 static void int_exit(int sig)
 {
-	if (ifindex > -1)
-		bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	__u32 curr_prog_id = 0;
+
+	if (ifindex > -1) {
+		if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
+			exit(1);
+		}
+		if (prog_id == curr_prog_id)
+			bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+		else if (!curr_prog_id)
+			printf("couldn't find a prog id on a given iface\n");
+		else
+			printf("program on interface changed, not removing\n");
+	}
 	exit(0);
 }
 
@@ -53,7 +65,8 @@
 		for (proto = 0; proto < nr_protos; proto++) {
 			__u64 sum = 0;
 
-			assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0);
+			assert(bpf_map_lookup_elem(rxcnt_map_fd, &proto,
+						   values) == 0);
 			for (i = 0; i < nr_cpus; i++)
 				sum += (values[i] - prev[proto][i]);
 
@@ -71,7 +84,7 @@
 	       "in an IPv4/v6 header and XDP_TX it out.  The dst <VIP:PORT>\n"
 	       "is used to select packets to encapsulate\n\n");
 	printf("Usage: %s [...]\n", cmd);
-	printf("    -i <ifindex> Interface Index\n");
+	printf("    -i <ifname|ifindex> Interface\n");
 	printf("    -a <vip-service-address> IPv4 or IPv6\n");
 	printf("    -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
 	printf("    -s <source-ip> Used in the IPTunnel header\n");
@@ -81,6 +94,7 @@
 	printf("    -P <IP-Protocol> Default is TCP\n");
 	printf("    -S use skb-mode\n");
 	printf("    -N enforce native mode\n");
+	printf("    -F Force loading the XDP prog\n");
 	printf("    -h Display this help\n");
 }
 
@@ -138,16 +152,22 @@
 
 int main(int argc, char **argv)
 {
-	unsigned char opt_flags[256] = {};
-	unsigned int kill_after_s = 0;
-	const char *optstr = "i:a:p:s:d:m:T:P:SNh";
-	int min_port = 0, max_port = 0;
-	struct iptnl_info tnl = {};
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type	= BPF_PROG_TYPE_XDP,
+	};
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	int min_port = 0, max_port = 0, vip2tnl_map_fd;
+	const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
+	unsigned char opt_flags[256] = {};
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	unsigned int kill_after_s = 0;
+	struct iptnl_info tnl = {};
+	struct bpf_object *obj;
 	struct vip vip = {};
 	char filename[256];
-	int opt;
-	int i;
+	int opt, prog_fd;
+	int i, err;
 
 	tnl.family = AF_UNSPEC;
 	vip.protocol = IPPROTO_TCP;
@@ -162,7 +182,9 @@
 
 		switch (opt) {
 		case 'i':
-			ifindex = atoi(optarg);
+			ifindex = if_nametoindex(optarg);
+			if (!ifindex)
+				ifindex = atoi(optarg);
 			break;
 		case 'a':
 			vip.family = parse_ipstr(optarg, vip.daddr.v6);
@@ -211,6 +233,9 @@
 		case 'N':
 			xdp_flags |= XDP_FLAGS_DRV_MODE;
 			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
 		default:
 			usage(argv[0]);
 			return 1;
@@ -231,34 +256,53 @@
 		return 1;
 	}
 
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
+	if (!ifindex) {
+		fprintf(stderr, "Invalid ifname\n");
 		return 1;
 	}
 
-	if (!prog_fd[0]) {
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	prog_load_attr.file = filename;
+
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		return 1;
+
+	if (!prog_fd) {
 		printf("load_bpf_file: %s\n", strerror(errno));
 		return 1;
 	}
 
+	rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
+	vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl");
+	if (vip2tnl_map_fd < 0 || rxcnt_map_fd < 0) {
+		printf("bpf_object__find_map_fd_by_name failed\n");
+		return 1;
+	}
+
 	signal(SIGINT, int_exit);
 	signal(SIGTERM, int_exit);
 
 	while (min_port <= max_port) {
 		vip.dport = htons(min_port++);
-		if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
+		if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl,
+					BPF_NOEXIST)) {
 			perror("bpf_map_update_elem(&vip2tnl)");
 			return 1;
 		}
 	}
 
-	if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
 		printf("link set xdp fd failed\n");
 		return 1;
 	}
 
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (err) {
+		printf("can't get prog info - %s\n", strerror(errno));
+		return err;
+	}
+	prog_id = info.id;
+
 	poll_stats(kill_after_s);
 
 	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);

diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h
deleted file mode 100644
index 533ab81..0000000
--- a/samples/bpf/xdpsock.h
+++ /dev/null

@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef XDPSOCK_H_
-#define XDPSOCK_H_
-
-/* Power-of-2 number of sockets */
-#define MAX_SOCKS 4
-
-/* Round-robin receive */
-#define RR_LB 0
-
-#endif /* XDPSOCK_H_ */

diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c
deleted file mode 100644
index d8806c4..0000000
--- a/samples/bpf/xdpsock_kern.c
+++ /dev/null

@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-
-#include "xdpsock.h"
-
-struct bpf_map_def SEC("maps") qidconf_map = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(int),
-	.value_size	= sizeof(int),
-	.max_entries	= 1,
-};
-
-struct bpf_map_def SEC("maps") xsks_map = {
-	.type = BPF_MAP_TYPE_XSKMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 4,
-};
-
-struct bpf_map_def SEC("maps") rr_map = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(unsigned int),
-	.max_entries = 1,
-};
-
-SEC("xdp_sock")
-int xdp_sock_prog(struct xdp_md *ctx)
-{
-	int *qidconf, key = 0, idx;
-	unsigned int *rr;
-
-	qidconf = bpf_map_lookup_elem(&qidconf_map, &key);
-	if (!qidconf)
-		return XDP_ABORTED;
-
-	if (*qidconf != ctx->rx_queue_index)
-		return XDP_PASS;
-
-#if RR_LB /* NB! RR_LB is configured in xdpsock.h */
-	rr = bpf_map_lookup_elem(&rr_map, &key);
-	if (!rr)
-		return XDP_ABORTED;
-
-	*rr = (*rr + 1) & (MAX_SOCKS - 1);
-	idx = *rr;
-#else
-	idx = 0;
-#endif
-
-	return bpf_redirect_map(&xsks_map, idx, 0);
-}
-
-char _license[] SEC("license") = "GPL";

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 4914788..df011ac 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c

@@ -1,37 +1,36 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2017 - 2018 Intel Corporation. */
 
-#include <assert.h>
+#include <asm/barrier.h>
 #include <errno.h>
 #include <getopt.h>
 #include <libgen.h>
 #include <linux/bpf.h>
+#include <linux/compiler.h>
 #include <linux/if_link.h>
 #include <linux/if_xdp.h>
 #include <linux/if_ether.h>
+#include <locale.h>
+#include <net/ethernet.h>
 #include <net/if.h>
+#include <poll.h>
+#include <pthread.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <net/ethernet.h>
+#include <sys/mman.h>
 #include <sys/resource.h>
 #include <sys/socket.h>
-#include <sys/mman.h>
+#include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
-#include <pthread.h>
-#include <locale.h>
-#include <sys/types.h>
-#include <poll.h>
 
-#include "bpf/libbpf.h"
-#include "bpf_util.h"
+#include "libbpf.h"
+#include "xsk.h"
 #include <bpf/bpf.h>
 
-#include "xdpsock.h"
-
 #ifndef SOL_XDP
 #define SOL_XDP 283
 #endif
@@ -44,17 +43,11 @@
 #define PF_XDP AF_XDP
 #endif
 
-#define NUM_FRAMES 131072
-#define FRAME_HEADROOM 0
-#define FRAME_SHIFT 11
-#define FRAME_SIZE 2048
-#define NUM_DESCS 1024
-#define BATCH_SIZE 16
-
-#define FQ_NUM_DESCS 1024
-#define CQ_NUM_DESCS 1024
+#define NUM_FRAMES (4 * 1024)
+#define BATCH_SIZE 64
 
 #define DEBUG_HEXDUMP 0
+#define MAX_SOCKS 8
 
 typedef __u64 u64;
 typedef __u32 u32;
@@ -68,59 +61,43 @@
 };
 
 static enum benchmark_type opt_bench = BENCH_RXDROP;
-static u32 opt_xdp_flags;
+static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static const char *opt_if = "";
 static int opt_ifindex;
 static int opt_queue;
 static int opt_poll;
-static int opt_shared_packet_buffer;
 static int opt_interval = 1;
+static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u32 opt_umem_flags;
+static int opt_unaligned_chunks;
+static int opt_mmap_flags;
 static u32 opt_xdp_bind_flags;
+static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+static int opt_timeout = 1000;
+static bool opt_need_wakeup = true;
+static __u32 prog_id;
 
-struct xdp_umem_uqueue {
-	u32 cached_prod;
-	u32 cached_cons;
-	u32 mask;
-	u32 size;
-	u32 *producer;
-	u32 *consumer;
-	u64 *ring;
-	void *map;
+struct xsk_umem_info {
+	struct xsk_ring_prod fq;
+	struct xsk_ring_cons cq;
+	struct xsk_umem *umem;
+	void *buffer;
 };
 
-struct xdp_umem {
-	char *frames;
-	struct xdp_umem_uqueue fq;
-	struct xdp_umem_uqueue cq;
-	int fd;
-};
-
-struct xdp_uqueue {
-	u32 cached_prod;
-	u32 cached_cons;
-	u32 mask;
-	u32 size;
-	u32 *producer;
-	u32 *consumer;
-	struct xdp_desc *ring;
-	void *map;
-};
-
-struct xdpsock {
-	struct xdp_uqueue rx;
-	struct xdp_uqueue tx;
-	int sfd;
-	struct xdp_umem *umem;
-	u32 outstanding_tx;
+struct xsk_socket_info {
+	struct xsk_ring_cons rx;
+	struct xsk_ring_prod tx;
+	struct xsk_umem_info *umem;
+	struct xsk_socket *xsk;
 	unsigned long rx_npkts;
 	unsigned long tx_npkts;
 	unsigned long prev_rx_npkts;
 	unsigned long prev_tx_npkts;
+	u32 outstanding_tx;
 };
 
-#define MAX_SOCKS 4
 static int num_socks;
-struct xdpsock *xsks[MAX_SOCKS];
+struct xsk_socket_info *xsks[MAX_SOCKS];
 
 static unsigned long get_nsecs(void)
 {
@@ -130,30 +107,118 @@
 	return ts.tv_sec * 1000000000UL + ts.tv_nsec;
 }
 
-static void dump_stats(void);
+static void print_benchmark(bool running)
+{
+	const char *bench_str = "INVALID";
 
-#define lassert(expr)							\
-	do {								\
-		if (!(expr)) {						\
-			fprintf(stderr, "%s:%s:%i: Assertion failed: "	\
-				#expr ": errno: %d/\"%s\"\n",		\
-				__FILE__, __func__, __LINE__,		\
-				errno, strerror(errno));		\
-			dump_stats();					\
-			exit(EXIT_FAILURE);				\
-		}							\
-	} while (0)
+	if (opt_bench == BENCH_RXDROP)
+		bench_str = "rxdrop";
+	else if (opt_bench == BENCH_TXONLY)
+		bench_str = "txonly";
+	else if (opt_bench == BENCH_L2FWD)
+		bench_str = "l2fwd";
 
-#define barrier() __asm__ __volatile__("": : :"memory")
-#ifdef __aarch64__
-#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
-#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
-#else
-#define u_smp_rmb() barrier()
-#define u_smp_wmb() barrier()
-#endif
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
+	printf("%s:%d %s ", opt_if, opt_queue, bench_str);
+	if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
+		printf("xdp-skb ");
+	else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
+		printf("xdp-drv ");
+	else
+		printf("	");
+
+	if (opt_poll)
+		printf("poll() ");
+
+	if (running) {
+		printf("running...");
+		fflush(stdout);
+	}
+}
+
+static void dump_stats(void)
+{
+	unsigned long now = get_nsecs();
+	long dt = now - prev_time;
+	int i;
+
+	prev_time = now;
+
+	for (i = 0; i < num_socks && xsks[i]; i++) {
+		char *fmt = "%-15s %'-11.0f %'-11lu\n";
+		double rx_pps, tx_pps;
+
+		rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
+			 1000000000. / dt;
+		tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
+			 1000000000. / dt;
+
+		printf("\n sock%d@", i);
+		print_benchmark(false);
+		printf("\n");
+
+		printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
+		       dt / 1000000000.);
+		printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
+		printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
+
+		xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
+		xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
+	}
+}
+
+static void *poller(void *arg)
+{
+	(void)arg;
+	for (;;) {
+		sleep(opt_interval);
+		dump_stats();
+	}
+
+	return NULL;
+}
+
+static void remove_xdp_program(void)
+{
+	__u32 curr_prog_id = 0;
+
+	if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
+		printf("bpf_get_link_xdp_id failed\n");
+		exit(EXIT_FAILURE);
+	}
+	if (prog_id == curr_prog_id)
+		bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
+	else if (!curr_prog_id)
+		printf("couldn't find a prog id on a given interface\n");
+	else
+		printf("program on interface changed, not removing\n");
+}
+
+static void int_exit(int sig)
+{
+	struct xsk_umem *umem = xsks[0]->umem->umem;
+
+	(void)sig;
+
+	dump_stats();
+	xsk_socket__delete(xsks[0]->xsk);
+	(void)xsk_umem__delete(umem);
+	remove_xdp_program();
+
+	exit(EXIT_SUCCESS);
+}
+
+static void __exit_with_error(int error, const char *file, const char *func,
+			      int line)
+{
+	fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
+		line, error, strerror(error));
+	dump_stats();
+	remove_xdp_program();
+	exit(EXIT_FAILURE);
+}
+
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
+						 __LINE__)
 
 static const char pkt_data[] =
 	"\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
@@ -161,195 +226,6 @@
 	"\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
 	"\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
 
-static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
-{
-	u32 free_entries = q->cached_cons - q->cached_prod;
-
-	if (free_entries >= nb)
-		return free_entries;
-
-	/* Refresh the local tail pointer */
-	q->cached_cons = *q->consumer + q->size;
-
-	return q->cached_cons - q->cached_prod;
-}
-
-static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
-{
-	u32 free_entries = q->cached_cons - q->cached_prod;
-
-	if (free_entries >= ndescs)
-		return free_entries;
-
-	/* Refresh the local tail pointer */
-	q->cached_cons = *q->consumer + q->size;
-	return q->cached_cons - q->cached_prod;
-}
-
-static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
-{
-	u32 entries = q->cached_prod - q->cached_cons;
-
-	if (entries == 0) {
-		q->cached_prod = *q->producer;
-		entries = q->cached_prod - q->cached_cons;
-	}
-
-	return (entries > nb) ? nb : entries;
-}
-
-static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
-{
-	u32 entries = q->cached_prod - q->cached_cons;
-
-	if (entries == 0) {
-		q->cached_prod = *q->producer;
-		entries = q->cached_prod - q->cached_cons;
-	}
-
-	return (entries > ndescs) ? ndescs : entries;
-}
-
-static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
-					 struct xdp_desc *d,
-					 size_t nb)
-{
-	u32 i;
-
-	if (umem_nb_free(fq, nb) < nb)
-		return -ENOSPC;
-
-	for (i = 0; i < nb; i++) {
-		u32 idx = fq->cached_prod++ & fq->mask;
-
-		fq->ring[idx] = d[i].addr;
-	}
-
-	u_smp_wmb();
-
-	*fq->producer = fq->cached_prod;
-
-	return 0;
-}
-
-static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u64 *d,
-				      size_t nb)
-{
-	u32 i;
-
-	if (umem_nb_free(fq, nb) < nb)
-		return -ENOSPC;
-
-	for (i = 0; i < nb; i++) {
-		u32 idx = fq->cached_prod++ & fq->mask;
-
-		fq->ring[idx] = d[i];
-	}
-
-	u_smp_wmb();
-
-	*fq->producer = fq->cached_prod;
-
-	return 0;
-}
-
-static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
-					       u64 *d, size_t nb)
-{
-	u32 idx, i, entries = umem_nb_avail(cq, nb);
-
-	u_smp_rmb();
-
-	for (i = 0; i < entries; i++) {
-		idx = cq->cached_cons++ & cq->mask;
-		d[i] = cq->ring[idx];
-	}
-
-	if (entries > 0) {
-		u_smp_wmb();
-
-		*cq->consumer = cq->cached_cons;
-	}
-
-	return entries;
-}
-
-static inline void *xq_get_data(struct xdpsock *xsk, u64 addr)
-{
-	return &xsk->umem->frames[addr];
-}
-
-static inline int xq_enq(struct xdp_uqueue *uq,
-			 const struct xdp_desc *descs,
-			 unsigned int ndescs)
-{
-	struct xdp_desc *r = uq->ring;
-	unsigned int i;
-
-	if (xq_nb_free(uq, ndescs) < ndescs)
-		return -ENOSPC;
-
-	for (i = 0; i < ndescs; i++) {
-		u32 idx = uq->cached_prod++ & uq->mask;
-
-		r[idx].addr = descs[i].addr;
-		r[idx].len = descs[i].len;
-	}
-
-	u_smp_wmb();
-
-	*uq->producer = uq->cached_prod;
-	return 0;
-}
-
-static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
-				 unsigned int id, unsigned int ndescs)
-{
-	struct xdp_desc *r = uq->ring;
-	unsigned int i;
-
-	if (xq_nb_free(uq, ndescs) < ndescs)
-		return -ENOSPC;
-
-	for (i = 0; i < ndescs; i++) {
-		u32 idx = uq->cached_prod++ & uq->mask;
-
-		r[idx].addr	= (id + i) << FRAME_SHIFT;
-		r[idx].len	= sizeof(pkt_data) - 1;
-	}
-
-	u_smp_wmb();
-
-	*uq->producer = uq->cached_prod;
-	return 0;
-}
-
-static inline int xq_deq(struct xdp_uqueue *uq,
-			 struct xdp_desc *descs,
-			 int ndescs)
-{
-	struct xdp_desc *r = uq->ring;
-	unsigned int idx;
-	int i, entries;
-
-	entries = xq_nb_avail(uq, ndescs);
-
-	u_smp_rmb();
-
-	for (i = 0; i < entries; i++) {
-		idx = uq->cached_cons++ & uq->mask;
-		descs[i] = r[idx];
-	}
-
-	if (entries > 0) {
-		u_smp_wmb();
-
-		*uq->consumer = uq->cached_cons;
-	}
-
-	return entries;
-}
-
 static void swap_mac_addresses(void *data)
 {
 	struct ether_header *eth = (struct ether_header *)data;
@@ -397,245 +273,79 @@
 	printf("\n");
 }
 
-static size_t gen_eth_frame(char *frame)
+static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
 {
-	memcpy(frame, pkt_data, sizeof(pkt_data) - 1);
+	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
+	       sizeof(pkt_data) - 1);
 	return sizeof(pkt_data) - 1;
 }
 
-static struct xdp_umem *xdp_umem_configure(int sfd)
+static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
 {
-	int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
-	struct xdp_mmap_offsets off;
-	struct xdp_umem_reg mr;
-	struct xdp_umem *umem;
-	socklen_t optlen;
-	void *bufs;
+	struct xsk_umem_info *umem;
+	struct xsk_umem_config cfg = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = opt_xsk_frame_size,
+		.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+		.flags = opt_umem_flags
+	};
+
+	int ret;
 
 	umem = calloc(1, sizeof(*umem));
-	lassert(umem);
+	if (!umem)
+		exit_with_error(errno);
 
-	lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
-			       NUM_FRAMES * FRAME_SIZE) == 0);
+	ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
+			       &cfg);
 
-	mr.addr = (__u64)bufs;
-	mr.len = NUM_FRAMES * FRAME_SIZE;
-	mr.chunk_size = FRAME_SIZE;
-	mr.headroom = FRAME_HEADROOM;
+	if (ret)
+		exit_with_error(-ret);
 
-	lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
-	lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
-			   sizeof(int)) == 0);
-	lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
-			   sizeof(int)) == 0);
-
-	optlen = sizeof(off);
-	lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
-			   &optlen) == 0);
-
-	umem->fq.map = mmap(0, off.fr.desc +
-			    FQ_NUM_DESCS * sizeof(u64),
-			    PROT_READ | PROT_WRITE,
-			    MAP_SHARED | MAP_POPULATE, sfd,
-			    XDP_UMEM_PGOFF_FILL_RING);
-	lassert(umem->fq.map != MAP_FAILED);
-
-	umem->fq.mask = FQ_NUM_DESCS - 1;
-	umem->fq.size = FQ_NUM_DESCS;
-	umem->fq.producer = umem->fq.map + off.fr.producer;
-	umem->fq.consumer = umem->fq.map + off.fr.consumer;
-	umem->fq.ring = umem->fq.map + off.fr.desc;
-	umem->fq.cached_cons = FQ_NUM_DESCS;
-
-	umem->cq.map = mmap(0, off.cr.desc +
-			     CQ_NUM_DESCS * sizeof(u64),
-			     PROT_READ | PROT_WRITE,
-			     MAP_SHARED | MAP_POPULATE, sfd,
-			     XDP_UMEM_PGOFF_COMPLETION_RING);
-	lassert(umem->cq.map != MAP_FAILED);
-
-	umem->cq.mask = CQ_NUM_DESCS - 1;
-	umem->cq.size = CQ_NUM_DESCS;
-	umem->cq.producer = umem->cq.map + off.cr.producer;
-	umem->cq.consumer = umem->cq.map + off.cr.consumer;
-	umem->cq.ring = umem->cq.map + off.cr.desc;
-
-	umem->frames = bufs;
-	umem->fd = sfd;
-
-	if (opt_bench == BENCH_TXONLY) {
-		int i;
-
-		for (i = 0; i < NUM_FRAMES * FRAME_SIZE; i += FRAME_SIZE)
-			(void)gen_eth_frame(&umem->frames[i]);
-	}
-
+	umem->buffer = buffer;
 	return umem;
 }
 
-static struct xdpsock *xsk_configure(struct xdp_umem *umem)
+static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
 {
-	struct sockaddr_xdp sxdp = {};
-	struct xdp_mmap_offsets off;
-	int sfd, ndescs = NUM_DESCS;
-	struct xdpsock *xsk;
-	bool shared = true;
-	socklen_t optlen;
-	u64 i;
-
-	sfd = socket(PF_XDP, SOCK_RAW, 0);
-	lassert(sfd >= 0);
-
-	xsk = calloc(1, sizeof(*xsk));
-	lassert(xsk);
-
-	xsk->sfd = sfd;
-	xsk->outstanding_tx = 0;
-
-	if (!umem) {
-		shared = false;
-		xsk->umem = xdp_umem_configure(sfd);
-	} else {
-		xsk->umem = umem;
-	}
-
-	lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING,
-			   &ndescs, sizeof(int)) == 0);
-	lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
-			   &ndescs, sizeof(int)) == 0);
-	optlen = sizeof(off);
-	lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
-			   &optlen) == 0);
-
-	/* Rx */
-	xsk->rx.map = mmap(NULL,
-			   off.rx.desc +
-			   NUM_DESCS * sizeof(struct xdp_desc),
-			   PROT_READ | PROT_WRITE,
-			   MAP_SHARED | MAP_POPULATE, sfd,
-			   XDP_PGOFF_RX_RING);
-	lassert(xsk->rx.map != MAP_FAILED);
-
-	if (!shared) {
-		for (i = 0; i < NUM_DESCS * FRAME_SIZE; i += FRAME_SIZE)
-			lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
-				== 0);
-	}
-
-	/* Tx */
-	xsk->tx.map = mmap(NULL,
-			   off.tx.desc +
-			   NUM_DESCS * sizeof(struct xdp_desc),
-			   PROT_READ | PROT_WRITE,
-			   MAP_SHARED | MAP_POPULATE, sfd,
-			   XDP_PGOFF_TX_RING);
-	lassert(xsk->tx.map != MAP_FAILED);
-
-	xsk->rx.mask = NUM_DESCS - 1;
-	xsk->rx.size = NUM_DESCS;
-	xsk->rx.producer = xsk->rx.map + off.rx.producer;
-	xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
-	xsk->rx.ring = xsk->rx.map + off.rx.desc;
-
-	xsk->tx.mask = NUM_DESCS - 1;
-	xsk->tx.size = NUM_DESCS;
-	xsk->tx.producer = xsk->tx.map + off.tx.producer;
-	xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
-	xsk->tx.ring = xsk->tx.map + off.tx.desc;
-	xsk->tx.cached_cons = NUM_DESCS;
-
-	sxdp.sxdp_family = PF_XDP;
-	sxdp.sxdp_ifindex = opt_ifindex;
-	sxdp.sxdp_queue_id = opt_queue;
-
-	if (shared) {
-		sxdp.sxdp_flags = XDP_SHARED_UMEM;
-		sxdp.sxdp_shared_umem_fd = umem->fd;
-	} else {
-		sxdp.sxdp_flags = opt_xdp_bind_flags;
-	}
-
-	lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
-
-	return xsk;
-}
-
-static void print_benchmark(bool running)
-{
-	const char *bench_str = "INVALID";
-
-	if (opt_bench == BENCH_RXDROP)
-		bench_str = "rxdrop";
-	else if (opt_bench == BENCH_TXONLY)
-		bench_str = "txonly";
-	else if (opt_bench == BENCH_L2FWD)
-		bench_str = "l2fwd";
-
-	printf("%s:%d %s ", opt_if, opt_queue, bench_str);
-	if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
-		printf("xdp-skb ");
-	else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
-		printf("xdp-drv ");
-	else
-		printf("	");
-
-	if (opt_poll)
-		printf("poll() ");
-
-	if (running) {
-		printf("running...");
-		fflush(stdout);
-	}
-}
-
-static void dump_stats(void)
-{
-	unsigned long now = get_nsecs();
-	long dt = now - prev_time;
+	struct xsk_socket_config cfg;
+	struct xsk_socket_info *xsk;
+	int ret;
+	u32 idx;
 	int i;
 
-	prev_time = now;
+	xsk = calloc(1, sizeof(*xsk));
+	if (!xsk)
+		exit_with_error(errno);
 
-	for (i = 0; i < num_socks; i++) {
-		char *fmt = "%-15s %'-11.0f %'-11lu\n";
-		double rx_pps, tx_pps;
+	xsk->umem = umem;
+	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+	cfg.libbpf_flags = 0;
+	cfg.xdp_flags = opt_xdp_flags;
+	cfg.bind_flags = opt_xdp_bind_flags;
+	ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
+				 &xsk->rx, &xsk->tx, &cfg);
+	if (ret)
+		exit_with_error(-ret);
 
-		rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
-			 1000000000. / dt;
-		tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
-			 1000000000. / dt;
+	ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
+	if (ret)
+		exit_with_error(-ret);
 
-		printf("\n sock%d@", i);
-		print_benchmark(false);
-		printf("\n");
+	ret = xsk_ring_prod__reserve(&xsk->umem->fq,
+				     XSK_RING_PROD__DEFAULT_NUM_DESCS,
+				     &idx);
+	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		exit_with_error(-ret);
+	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) =
+			i * opt_xsk_frame_size;
+	xsk_ring_prod__submit(&xsk->umem->fq,
+			      XSK_RING_PROD__DEFAULT_NUM_DESCS);
 
-		printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
-		       dt / 1000000000.);
-		printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
-		printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
-
-		xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
-		xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
-	}
-}
-
-static void *poller(void *arg)
-{
-	(void)arg;
-	for (;;) {
-		sleep(opt_interval);
-		dump_stats();
-	}
-
-	return NULL;
-}
-
-static void int_exit(int sig)
-{
-	(void)sig;
-	dump_stats();
-	bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
-	exit(EXIT_SUCCESS);
+	return xsk;
 }
 
 static struct option long_options[] = {
@@ -645,10 +355,14 @@
 	{"interface", required_argument, 0, 'i'},
 	{"queue", required_argument, 0, 'q'},
 	{"poll", no_argument, 0, 'p'},
-	{"shared-buffer", no_argument, 0, 's'},
 	{"xdp-skb", no_argument, 0, 'S'},
 	{"xdp-native", no_argument, 0, 'N'},
 	{"interval", required_argument, 0, 'n'},
+	{"zero-copy", no_argument, 0, 'z'},
+	{"copy", no_argument, 0, 'c'},
+	{"frame-size", required_argument, 0, 'f'},
+	{"no-need-wakeup", no_argument, 0, 'm'},
+	{"unaligned", no_argument, 0, 'u'},
 	{0, 0, 0, 0}
 };
 
@@ -663,12 +377,17 @@
 		"  -i, --interface=n	Run on interface n\n"
 		"  -q, --queue=n	Use queue n (default 0)\n"
 		"  -p, --poll		Use poll syscall\n"
-		"  -s, --shared-buffer	Use shared packet buffer\n"
 		"  -S, --xdp-skb=n	Use XDP skb-mod\n"
 		"  -N, --xdp-native=n	Enfore XDP native mode\n"
 		"  -n, --interval=n	Specify statistics update interval (default 1 sec).\n"
+		"  -z, --zero-copy      Force zero-copy mode.\n"
+		"  -c, --copy           Force copy mode.\n"
+		"  -f, --frame-size=n   Set the frame size (must be a power of two, default is %d).\n"
+		"  -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
+		"  -f, --frame-size=n   Set the frame size (must be a power of two in aligned mode, default is %d).\n"
+		"  -u, --unaligned	Enable unaligned chunk placement\n"
 		"\n";
-	fprintf(stderr, str, prog);
+	fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
 	exit(EXIT_FAILURE);
 }
 
@@ -679,8 +398,8 @@
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options,
-				&option_index);
+		c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:mu",
+				long_options, &option_index);
 		if (c == -1)
 			break;
 
@@ -700,9 +419,6 @@
 		case 'q':
 			opt_queue = atoi(optarg);
 			break;
-		case 's':
-			opt_shared_packet_buffer = 1;
-			break;
 		case 'p':
 			opt_poll = 1;
 			break;
@@ -716,6 +432,27 @@
 		case 'n':
 			opt_interval = atoi(optarg);
 			break;
+		case 'z':
+			opt_xdp_bind_flags |= XDP_ZEROCOPY;
+			break;
+		case 'c':
+			opt_xdp_bind_flags |= XDP_COPY;
+			break;
+		case 'u':
+			opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+			opt_unaligned_chunks = 1;
+			opt_mmap_flags = MAP_HUGETLB;
+			break;
+		case 'F':
+			opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
+		case 'f':
+			opt_xsk_frame_size = atoi(optarg);
+		case 'm':
+			opt_need_wakeup = false;
+			opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
+			break;
+
 		default:
 			usage(basename(argv[0]));
 		}
@@ -727,179 +464,277 @@
 			opt_if);
 		usage(basename(argv[0]));
 	}
+
+	if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
+	    !opt_unaligned_chunks) {
+		fprintf(stderr, "--frame-size=%d is not a power of two\n",
+			opt_xsk_frame_size);
+		usage(basename(argv[0]));
+	}
 }
 
-static void kick_tx(int fd)
+static void kick_tx(struct xsk_socket_info *xsk)
 {
 	int ret;
 
-	ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
+	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
 	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
 		return;
-	lassert(0);
+	exit_with_error(errno);
 }
 
-static inline void complete_tx_l2fwd(struct xdpsock *xsk)
+static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
+				     struct pollfd *fds)
 {
-	u64 descs[BATCH_SIZE];
+	struct xsk_umem_info *umem = xsk->umem;
+	u32 idx_cq = 0, idx_fq = 0;
 	unsigned int rcvd;
 	size_t ndescs;
 
 	if (!xsk->outstanding_tx)
 		return;
 
-	kick_tx(xsk->sfd);
+	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+		kick_tx(xsk);
+
 	ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
-		 xsk->outstanding_tx;
+		xsk->outstanding_tx;
 
 	/* re-add completed Tx buffers */
-	rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs);
+	rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
 	if (rcvd > 0) {
-		umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd);
+		unsigned int i;
+		int ret;
+
+		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+		while (ret != rcvd) {
+			if (ret < 0)
+				exit_with_error(-ret);
+			if (xsk_ring_prod__needs_wakeup(&umem->fq))
+				ret = poll(fds, num_socks, opt_timeout);
+			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+		}
+
+		for (i = 0; i < rcvd; i++)
+			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
+				*xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
+
+		xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
 		xsk->outstanding_tx -= rcvd;
 		xsk->tx_npkts += rcvd;
 	}
 }
 
-static inline void complete_tx_only(struct xdpsock *xsk)
+static inline void complete_tx_only(struct xsk_socket_info *xsk)
 {
-	u64 descs[BATCH_SIZE];
 	unsigned int rcvd;
+	u32 idx;
 
 	if (!xsk->outstanding_tx)
 		return;
 
-	kick_tx(xsk->sfd);
+	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+		kick_tx(xsk);
 
-	rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE);
+	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
 	if (rcvd > 0) {
+		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
 		xsk->outstanding_tx -= rcvd;
 		xsk->tx_npkts += rcvd;
 	}
 }
 
-static void rx_drop(struct xdpsock *xsk)
+static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
 {
-	struct xdp_desc descs[BATCH_SIZE];
 	unsigned int rcvd, i;
+	u32 idx_rx = 0, idx_fq = 0;
+	int ret;
 
-	rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
-	if (!rcvd)
+	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+	if (!rcvd) {
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+			ret = poll(fds, num_socks, opt_timeout);
 		return;
-
-	for (i = 0; i < rcvd; i++) {
-		char *pkt = xq_get_data(xsk, descs[i].addr);
-
-		hex_dump(pkt, descs[i].len, descs[i].addr);
 	}
 
-	xsk->rx_npkts += rcvd;
+	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+	while (ret != rcvd) {
+		if (ret < 0)
+			exit_with_error(-ret);
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+			ret = poll(fds, num_socks, opt_timeout);
+		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+	}
 
-	umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
+	for (i = 0; i < rcvd; i++) {
+		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+		u64 orig = xsk_umem__extract_addr(addr);
+
+		addr = xsk_umem__add_offset_to_addr(addr);
+		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
+
+		hex_dump(pkt, len, addr);
+		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+	}
+
+	xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+	xsk_ring_cons__release(&xsk->rx, rcvd);
+	xsk->rx_npkts += rcvd;
 }
 
 static void rx_drop_all(void)
 {
 	struct pollfd fds[MAX_SOCKS + 1];
-	int i, ret, timeout, nfds = 1;
+	int i, ret;
 
 	memset(fds, 0, sizeof(fds));
 
 	for (i = 0; i < num_socks; i++) {
-		fds[i].fd = xsks[i]->sfd;
+		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
 		fds[i].events = POLLIN;
-		timeout = 1000; /* 1sn */
 	}
 
 	for (;;) {
 		if (opt_poll) {
-			ret = poll(fds, nfds, timeout);
+			ret = poll(fds, num_socks, opt_timeout);
 			if (ret <= 0)
 				continue;
 		}
 
 		for (i = 0; i < num_socks; i++)
-			rx_drop(xsks[i]);
+			rx_drop(xsks[i], fds);
 	}
 }
 
-static void tx_only(struct xdpsock *xsk)
+static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
 {
-	int timeout, ret, nfds = 1;
-	struct pollfd fds[nfds + 1];
-	unsigned int idx = 0;
+	u32 idx;
+
+	if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
+		unsigned int i;
+
+		for (i = 0; i < BATCH_SIZE; i++) {
+			xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr	=
+				(frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+			xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
+				sizeof(pkt_data) - 1;
+		}
+
+		xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
+		xsk->outstanding_tx += BATCH_SIZE;
+		frame_nb += BATCH_SIZE;
+		frame_nb %= NUM_FRAMES;
+	}
+
+	complete_tx_only(xsk);
+}
+
+static void tx_only_all(void)
+{
+	struct pollfd fds[MAX_SOCKS];
+	u32 frame_nb[MAX_SOCKS] = {};
+	int i, ret;
 
 	memset(fds, 0, sizeof(fds));
-	fds[0].fd = xsk->sfd;
-	fds[0].events = POLLOUT;
-	timeout = 1000; /* 1sn */
+	for (i = 0; i < num_socks; i++) {
+		fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
+		fds[0].events = POLLOUT;
+	}
 
 	for (;;) {
 		if (opt_poll) {
-			ret = poll(fds, nfds, timeout);
+			ret = poll(fds, num_socks, opt_timeout);
 			if (ret <= 0)
 				continue;
 
-			if (fds[0].fd != xsk->sfd ||
-			    !(fds[0].revents & POLLOUT))
+			if (!(fds[0].revents & POLLOUT))
 				continue;
 		}
 
-		if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) {
-			lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0);
-
-			xsk->outstanding_tx += BATCH_SIZE;
-			idx += BATCH_SIZE;
-			idx %= NUM_FRAMES;
-		}
-
-		complete_tx_only(xsk);
+		for (i = 0; i < num_socks; i++)
+			tx_only(xsks[i], frame_nb[i]);
 	}
 }
 
-static void l2fwd(struct xdpsock *xsk)
+static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
 {
+	unsigned int rcvd, i;
+	u32 idx_rx = 0, idx_tx = 0;
+	int ret;
+
+	complete_tx_l2fwd(xsk, fds);
+
+	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+	if (!rcvd) {
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+			ret = poll(fds, num_socks, opt_timeout);
+		return;
+	}
+
+	ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
+	while (ret != rcvd) {
+		if (ret < 0)
+			exit_with_error(-ret);
+		if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+			kick_tx(xsk);
+		ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
+	}
+
+	for (i = 0; i < rcvd; i++) {
+		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+		u64 orig = addr;
+
+		addr = xsk_umem__add_offset_to_addr(addr);
+		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
+
+		swap_mac_addresses(pkt);
+
+		hex_dump(pkt, len, addr);
+		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
+		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
+	}
+
+	xsk_ring_prod__submit(&xsk->tx, rcvd);
+	xsk_ring_cons__release(&xsk->rx, rcvd);
+
+	xsk->rx_npkts += rcvd;
+	xsk->outstanding_tx += rcvd;
+}
+
+static void l2fwd_all(void)
+{
+	struct pollfd fds[MAX_SOCKS];
+	int i, ret;
+
+	memset(fds, 0, sizeof(fds));
+
+	for (i = 0; i < num_socks; i++) {
+		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
+		fds[i].events = POLLOUT | POLLIN;
+	}
+
 	for (;;) {
-		struct xdp_desc descs[BATCH_SIZE];
-		unsigned int rcvd, i;
-		int ret;
-
-		for (;;) {
-			complete_tx_l2fwd(xsk);
-
-			rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
-			if (rcvd > 0)
-				break;
+		if (opt_poll) {
+			ret = poll(fds, num_socks, opt_timeout);
+			if (ret <= 0)
+				continue;
 		}
 
-		for (i = 0; i < rcvd; i++) {
-			char *pkt = xq_get_data(xsk, descs[i].addr);
-
-			swap_mac_addresses(pkt);
-
-			hex_dump(pkt, descs[i].len, descs[i].addr);
-		}
-
-		xsk->rx_npkts += rcvd;
-
-		ret = xq_enq(&xsk->tx, descs, rcvd);
-		lassert(ret == 0);
-		xsk->outstanding_tx += rcvd;
+		for (i = 0; i < num_socks; i++)
+			l2fwd(xsks[i], fds);
 	}
 }
 
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
-	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type	= BPF_PROG_TYPE_XDP,
-	};
-	int prog_fd, qidconf_map, xsks_map;
-	struct bpf_object *obj;
-	char xdp_filename[256];
-	struct bpf_map *map;
-	int i, ret, key = 0;
+	struct xsk_umem_info *umem;
 	pthread_t pt;
+	void *bufs;
+	int ret;
 
 	parse_command_line(argc, argv);
 
@@ -909,60 +744,23 @@
 		exit(EXIT_FAILURE);
 	}
 
-	snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
-	prog_load_attr.file = xdp_filename;
-
-	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
-		exit(EXIT_FAILURE);
-	if (prog_fd < 0) {
-		fprintf(stderr, "ERROR: no program found: %s\n",
-			strerror(prog_fd));
+	/* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
+	bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
+		    PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
+	if (bufs == MAP_FAILED) {
+		printf("ERROR: mmap failed\n");
 		exit(EXIT_FAILURE);
 	}
+       /* Create sockets... */
+	umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
+	xsks[num_socks++] = xsk_configure_socket(umem);
 
-	map = bpf_object__find_map_by_name(obj, "qidconf_map");
-	qidconf_map = bpf_map__fd(map);
-	if (qidconf_map < 0) {
-		fprintf(stderr, "ERROR: no qidconf map found: %s\n",
-			strerror(qidconf_map));
-		exit(EXIT_FAILURE);
-	}
+	if (opt_bench == BENCH_TXONLY) {
+		int i;
 
-	map = bpf_object__find_map_by_name(obj, "xsks_map");
-	xsks_map = bpf_map__fd(map);
-	if (xsks_map < 0) {
-		fprintf(stderr, "ERROR: no xsks map found: %s\n",
-			strerror(xsks_map));
-		exit(EXIT_FAILURE);
-	}
-
-	if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
-		fprintf(stderr, "ERROR: link set xdp fd failed\n");
-		exit(EXIT_FAILURE);
-	}
-
-	ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
-	if (ret) {
-		fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
-		exit(EXIT_FAILURE);
-	}
-
-	/* Create sockets... */
-	xsks[num_socks++] = xsk_configure(NULL);
-
-#if RR_LB
-	for (i = 0; i < MAX_SOCKS - 1; i++)
-		xsks[num_socks++] = xsk_configure(xsks[0]->umem);
-#endif
-
-	/* ...and insert them into the map. */
-	for (i = 0; i < num_socks; i++) {
-		key = i;
-		ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
-		if (ret) {
-			fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
-			exit(EXIT_FAILURE);
-		}
+		for (i = 0; i < NUM_FRAMES; i++)
+			(void)gen_eth_frame(umem, i * opt_xsk_frame_size);
 	}
 
 	signal(SIGINT, int_exit);
@@ -972,16 +770,17 @@
 	setlocale(LC_ALL, "");
 
 	ret = pthread_create(&pt, NULL, poller, NULL);
-	lassert(ret == 0);
+	if (ret)
+		exit_with_error(ret);
 
 	prev_time = get_nsecs();
 
 	if (opt_bench == BENCH_RXDROP)
 		rx_drop_all();
 	else if (opt_bench == BENCH_TXONLY)
-		tx_only(xsks[0]);
+		tx_only_all();
 	else
-		l2fwd(xsks[0]);
+		l2fwd_all();
 
 	return 0;
 }
commit	0f672f6c0b52b7b0700b0915c72b540721af4465	[log] [tgz]
author	David Brazdil <dbrazdil@google.com>	Tue Dec 10 10:32:29 2019 +0000
committer	David Brazdil <dbrazdil@google.com>	Tue Dec 10 19:03:18 2019 +0000
tree	85c8cba019caa205e4f8920d72d93f6d6deaf29c
parent	3a0ad55d848b50499b68d7141d4eca997fce28ef [diff]