From e8386c4e1fa3b5486487fa4d6c350a0d5e300aaf Mon Sep 17 00:00:00 2001
From: Andrea Claudi <aclaudi@redhat.com>
Date: Thu, 13 Jun 2019 14:37:56 +0200
Subject: [PATCH] bpf: implement btf handling and map annotation
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361
Upstream Status: iproute2.git commit f823f36012fb5
commit f823f36012fb5ab4ddfca6ed4ff56188730f281e
Author: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed Jul 18 01:31:22 2018 +0200
bpf: implement btf handling and map annotation
Implement loading of .BTF section from object file and build up
internal table for retrieving key/value id related to maps in
the BPF program. Latter is done by setting up struct btf_type
table.
One of the issues is that there's a disconnect between the data
types used in the map and struct bpf_elf_map, meaning the underlying
types are unknown from the map description. One way to overcome
this is to add a annotation such that the loader will recognize
the relation to both. BPF_ANNOTATE_KV_PAIR(map_foo, struct key,
struct val); has been added to the API that programs can use.
The loader will then pick the corresponding key/value type ids and
attach it to the maps for creation. This can later on be dumped via
bpftool for introspection.
Example with test_xdp_noinline.o from kernel selftests:
[...]
struct ctl_value {
union {
__u64 value;
__u32 ifindex;
__u8 mac[6];
};
};
struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct ctl_value),
.max_entries = 16,
.map_flags = 0,
};
BPF_ANNOTATE_KV_PAIR(ctl_array, __u32, struct ctl_value);
[...]
Above could also further be wrapped in a macro. Compiling through LLVM and
converting to BTF:
# llc --version
LLVM (http://llvm.org/):
LLVM version 7.0.0svn
Optimized build.
Default target: x86_64-unknown-linux-gnu
Host CPU: skylake
Registered Targets:
bpf - BPF (host endian)
bpfeb - BPF (big endian)
bpfel - BPF (little endian)
[...]
# clang [...] -O2 -target bpf -g -emit-llvm -c test_xdp_noinline.c -o - |
llc -march=bpf -mcpu=probe -mattr=dwarfris -filetype=obj -o test_xdp_noinline.o
# pahole -J test_xdp_noinline.o
Checking pahole dump of BPF object file:
# file test_xdp_noinline.o
test_xdp_noinline.o: ELF 64-bit LSB relocatable, *unknown arch 0xf7* version 1 (SYSV), with debug_info, not stripped
# pahole test_xdp_noinline.o
[...]
struct ctl_value {
union {
__u64 value; /* 0 8 */
__u32 ifindex; /* 0 4 */
__u8 mac[0]; /* 0 0 */
}; /* 0 8 */
/* size: 8, cachelines: 1, members: 1 */
/* last cacheline: 8 bytes */
};
Now loading into kernel and dumping the map via bpftool:
# ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test
# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 xdpgeneric/id:227 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
[...]
# bpftool prog show id 227
227: xdp tag a85e060c275c5616 gpl
loaded_at 2018-07-17T14:41:29+0000 uid 0
xlated 8152B not jited memlock 12288B map_ids 381,385,386,382,384,383
# bpftool map dump id 386
[{
"key": 0,
"value": {
"": {
"value": 0,
"ifindex": 0,
"mac": []
}
}
},{
"key": 1,
"value": {
"": {
"value": 0,
"ifindex": 0,
"mac": []
}
}
},{
[...]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/bpf_elf.h | 9 ++
include/bpf_util.h | 1 +
lib/bpf.c | 332 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 338 insertions(+), 4 deletions(-)
diff --git a/include/bpf_elf.h b/include/bpf_elf.h
index a8e360f3bbb28..84e8ae00834c8 100644
--- a/include/bpf_elf.h
+++ b/include/bpf_elf.h
@@ -41,4 +41,13 @@ struct bpf_elf_map {
__u32 inner_idx;
};
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
+ struct ____btf_map_##name { \
+ type_key key; \
+ type_val value; \
+ }; \
+ struct ____btf_map_##name \
+ __attribute__ ((section(".maps." #name), used)) \
+ ____btf_map_##name = { }
+
#endif /* __BPF_ELF__ */
diff --git a/include/bpf_util.h b/include/bpf_util.h
index 219beb40cd253..63837a04e56fe 100644
--- a/include/bpf_util.h
+++ b/include/bpf_util.h
@@ -14,6 +14,7 @@
#define __BPF_UTIL__
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <linux/filter.h>
#include <linux/magic.h>
#include <linux/elf-em.h>
diff --git a/lib/bpf.c b/lib/bpf.c
index 1b87490555050..d093d0bd86eae 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -393,6 +393,8 @@ struct bpf_prog_data {
struct bpf_map_ext {
struct bpf_prog_data owner;
+ unsigned int btf_id_key;
+ unsigned int btf_id_val;
};
static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map,
@@ -1125,24 +1127,36 @@ struct bpf_config {
unsigned int jit_enabled;
};
+struct bpf_btf {
+ const struct btf_header *hdr;
+ const void *raw;
+ const char *strings;
+ const struct btf_type **types;
+ int types_num;
+};
+
struct bpf_elf_ctx {
struct bpf_config cfg;
Elf *elf_fd;
GElf_Ehdr elf_hdr;
Elf_Data *sym_tab;
Elf_Data *str_tab;
+ Elf_Data *btf_data;
char obj_uid[64];
int obj_fd;
+ int btf_fd;
int map_fds[ELF_MAX_MAPS];
struct bpf_elf_map maps[ELF_MAX_MAPS];
struct bpf_map_ext maps_ext[ELF_MAX_MAPS];
struct bpf_elf_prog prog_text;
+ struct bpf_btf btf;
int sym_num;
int map_num;
int map_len;
bool *sec_done;
int sec_maps;
int sec_text;
+ int sec_btf;
char license[ELF_MAX_LICENSE_LEN];
enum bpf_prog_type type;
__u32 ifindex;
@@ -1167,6 +1181,11 @@ struct bpf_map_data {
struct bpf_elf_map *ent;
};
+static bool bpf_log_has_data(struct bpf_elf_ctx *ctx)
+{
+ return ctx->log && ctx->log[0];
+}
+
static __check_format_string(2, 3) void
bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
{
@@ -1176,7 +1195,7 @@ bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
vfprintf(stderr, format, vl);
va_end(vl);
- if (ctx->log && ctx->log[0]) {
+ if (bpf_log_has_data(ctx)) {
if (ctx->verbose) {
fprintf(stderr, "%s\n", ctx->log);
} else {
@@ -1223,7 +1242,9 @@ static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
uint32_t size_value, uint32_t max_elem,
- uint32_t flags, int inner_fd, uint32_t ifindex)
+ uint32_t flags, int inner_fd, int btf_fd,
+ uint32_t ifindex, uint32_t btf_id_key,
+ uint32_t btf_id_val)
{
union bpf_attr attr = {};
@@ -1234,10 +1255,30 @@ static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
attr.map_flags = flags;
attr.inner_map_fd = inner_fd;
attr.map_ifindex = ifindex;
+ attr.btf_fd = btf_fd;
+ attr.btf_key_type_id = btf_id_key;
+ attr.btf_value_type_id = btf_id_val;
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
+static int bpf_btf_load(void *btf, size_t size_btf,
+ char *log, size_t size_log)
+{
+ union bpf_attr attr = {};
+
+ attr.btf = bpf_ptr_to_u64(btf);
+ attr.btf_size = size_btf;
+
+ if (size_log > 0) {
+ attr.btf_log_buf = bpf_ptr_to_u64(log);
+ attr.btf_log_size = size_log;
+ attr.btf_log_level = 1;
+ }
+
+ return bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+}
+
static int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr = {};
@@ -1613,7 +1654,8 @@ static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx,
ifindex = bpf_map_offload_neutral(map->type) ? 0 : ctx->ifindex;
errno = 0;
fd = bpf_map_create(map->type, map->size_key, map->size_value,
- map->max_elem, map->flags, map_inner_fd, ifindex);
+ map->max_elem, map->flags, map_inner_fd, ctx->btf_fd,
+ ifindex, ext->btf_id_key, ext->btf_id_val);
if (fd < 0 || ctx->verbose) {
bpf_map_report(fd, name, map, ctx, map_inner_fd);
@@ -1638,8 +1680,80 @@ static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
return ctx->str_tab->d_buf + sym->st_name;
}
+static int bpf_btf_find(struct bpf_elf_ctx *ctx, const char *name)
+{
+ const struct btf_type *type;
+ const char *res;
+ int id;
+
+ for (id = 1; id < ctx->btf.types_num; id++) {
+ type = ctx->btf.types[id];
+ if (type->name_off >= ctx->btf.hdr->str_len)
+ continue;
+ res = &ctx->btf.strings[type->name_off];
+ if (!strcmp(res, name))
+ return id;
+ }
+
+ return -ENOENT;
+}
+
+static int bpf_btf_find_kv(struct bpf_elf_ctx *ctx, const struct bpf_elf_map *map,
+ const char *name, uint32_t *id_key, uint32_t *id_val)
+{
+ const struct btf_member *key, *val;
+ const struct btf_type *type;
+ char btf_name[512];
+ const char *res;
+ int id;
+
+ snprintf(btf_name, sizeof(btf_name), "____btf_map_%s", name);
+ id = bpf_btf_find(ctx, btf_name);
+ if (id < 0)
+ return id;
+
+ type = ctx->btf.types[id];
+ if (BTF_INFO_KIND(type->info) != BTF_KIND_STRUCT)
+ return -EINVAL;
+ if (BTF_INFO_VLEN(type->info) != 2)
+ return -EINVAL;
+
+ key = ((void *) type) + sizeof(*type);
+ val = key + 1;
+ if (!key->type || key->type >= ctx->btf.types_num ||
+ !val->type || val->type >= ctx->btf.types_num)
+ return -EINVAL;
+
+ if (key->name_off >= ctx->btf.hdr->str_len ||
+ val->name_off >= ctx->btf.hdr->str_len)
+ return -EINVAL;
+
+ res = &ctx->btf.strings[key->name_off];
+ if (strcmp(res, "key"))
+ return -EINVAL;
+
+ res = &ctx->btf.strings[val->name_off];
+ if (strcmp(res, "value"))
+ return -EINVAL;
+
+ *id_key = key->type;
+ *id_val = val->type;
+ return 0;
+}
+
+static void bpf_btf_annotate(struct bpf_elf_ctx *ctx, int which, const char *name)
+{
+ uint32_t id_key = 0, id_val = 0;
+
+ if (!bpf_btf_find_kv(ctx, &ctx->maps[which], name, &id_key, &id_val)) {
+ ctx->maps_ext[which].btf_id_key = id_key;
+ ctx->maps_ext[which].btf_id_val = id_val;
+ }
+}
+
static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
{
+ const char *name;
GElf_Sym sym;
int i;
@@ -1653,7 +1767,9 @@ static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
sym.st_value / ctx->map_len != which)
continue;
- return bpf_str_tab_name(ctx, &sym);
+ name = bpf_str_tab_name(ctx, &sym);
+ bpf_btf_annotate(ctx, which, name);
+ return name;
}
return NULL;
@@ -1915,11 +2031,210 @@ static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section,
return 0;
}
+static void bpf_btf_report(int fd, struct bpf_elf_ctx *ctx)
+{
+ fprintf(stderr, "\nBTF debug data section \'.BTF\' %s%s (%d)!\n",
+ fd < 0 ? "rejected: " : "loaded",
+ fd < 0 ? strerror(errno) : "",
+ fd < 0 ? errno : fd);
+
+ fprintf(stderr, " - Length: %zu\n", ctx->btf_data->d_size);
+
+ bpf_dump_error(ctx, "Verifier analysis:\n\n");
+}
+
+static int bpf_btf_attach(struct bpf_elf_ctx *ctx)
+{
+ int tries = 0, fd;
+retry:
+ errno = 0;
+ fd = bpf_btf_load(ctx->btf_data->d_buf, ctx->btf_data->d_size,
+ ctx->log, ctx->log_size);
+ if (fd < 0 || ctx->verbose) {
+ if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
+ if (tries++ < 10 && !bpf_log_realloc(ctx))
+ goto retry;
+
+ fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
+ ctx->log_size, tries);
+ return fd;
+ }
+
+ if (bpf_log_has_data(ctx))
+ bpf_btf_report(fd, ctx);
+ }
+
+ return fd;
+}
+
+static int bpf_fetch_btf_begin(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->btf_data = data->sec_data;
+ ctx->sec_btf = section;
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_btf_check_header(struct bpf_elf_ctx *ctx)
+{
+ const struct btf_header *hdr = ctx->btf_data->d_buf;
+ const char *str_start, *str_end;
+ unsigned int data_len;
+
+ if (hdr->magic != BTF_MAGIC) {
+ fprintf(stderr, "Object has wrong BTF magic: %x, expected: %x!\n",
+ hdr->magic, BTF_MAGIC);
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ fprintf(stderr, "Object has wrong BTF version: %u, expected: %u!\n",
+ hdr->version, BTF_VERSION);
+ return -EINVAL;
+ }
+
+ if (hdr->flags) {
+ fprintf(stderr, "Object has unsupported BTF flags %x!\n",
+ hdr->flags);
+ return -EINVAL;
+ }
+
+ data_len = ctx->btf_data->d_size - sizeof(*hdr);
+ if (data_len < hdr->type_off ||
+ data_len < hdr->str_off ||
+ data_len < hdr->type_len + hdr->str_len ||
+ hdr->type_off >= hdr->str_off ||
+ hdr->type_off + hdr->type_len != hdr->str_off ||
+ hdr->str_off + hdr->str_len != data_len ||
+ (hdr->type_off & (sizeof(uint32_t) - 1))) {
+ fprintf(stderr, "Object has malformed BTF data!\n");
+ return -EINVAL;
+ }
+
+ ctx->btf.hdr = hdr;
+ ctx->btf.raw = hdr + 1;
+
+ str_start = ctx->btf.raw + hdr->str_off;
+ str_end = str_start + hdr->str_len;
+ if (!hdr->str_len ||
+ hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+ str_start[0] || str_end[-1]) {
+ fprintf(stderr, "Object has malformed BTF string data!\n");
+ return -EINVAL;
+ }
+
+ ctx->btf.strings = str_start;
+ return 0;
+}
+
+static int bpf_btf_register_type(struct bpf_elf_ctx *ctx,
+ const struct btf_type *type)
+{
+ int cur = ctx->btf.types_num, num = cur + 1;
+ const struct btf_type **types;
+
+ types = realloc(ctx->btf.types, num * sizeof(type));
+ if (!types) {
+ free(ctx->btf.types);
+ ctx->btf.types = NULL;
+ ctx->btf.types_num = 0;
+ return -ENOMEM;
+ }
+
+ ctx->btf.types = types;
+ ctx->btf.types[cur] = type;
+ ctx->btf.types_num = num;
+ return 0;
+}
+
+static struct btf_type btf_type_void;
+
+static int bpf_btf_prep_type_data(struct bpf_elf_ctx *ctx)
+{
+ const void *type_cur = ctx->btf.raw + ctx->btf.hdr->type_off;
+ const void *type_end = ctx->btf.raw + ctx->btf.hdr->str_off;
+ const struct btf_type *type;
+ uint16_t var_len;
+ int ret, kind;
+
+ ret = bpf_btf_register_type(ctx, &btf_type_void);
+ if (ret < 0)
+ return ret;
+
+ while (type_cur < type_end) {
+ type = type_cur;
+ type_cur += sizeof(*type);
+
+ var_len = BTF_INFO_VLEN(type->info);
+ kind = BTF_INFO_KIND(type->info);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ type_cur += sizeof(int);
+ break;
+ case BTF_KIND_ARRAY:
+ type_cur += sizeof(struct btf_array);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ type_cur += var_len * sizeof(struct btf_member);
+ break;
+ case BTF_KIND_ENUM:
+ type_cur += var_len * sizeof(struct btf_enum);
+ break;
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_PTR:
+ case BTF_KIND_FWD:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ break;
+ default:
+ fprintf(stderr, "Object has unknown BTF type: %u!\n", kind);
+ return -EINVAL;
+ }
+
+ ret = bpf_btf_register_type(ctx, type);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bpf_btf_prep_data(struct bpf_elf_ctx *ctx)
+{
+ int ret = bpf_btf_check_header(ctx);
+
+ if (!ret)
+ return bpf_btf_prep_type_data(ctx);
+ return ret;
+}
+
+static void bpf_fetch_btf_end(struct bpf_elf_ctx *ctx)
+{
+ int fd = bpf_btf_attach(ctx);
+
+ if (fd < 0)
+ return;
+ ctx->btf_fd = fd;
+ if (bpf_btf_prep_data(ctx) < 0) {
+ close(ctx->btf_fd);
+ ctx->btf_fd = 0;
+ }
+}
+
static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
{
return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
}
+static bool bpf_has_btf_data(const struct bpf_elf_ctx *ctx)
+{
+ return ctx->sec_btf;
+}
+
static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx)
{
return ctx->sec_text;
@@ -1952,6 +2267,9 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec)
else if (data.sec_hdr.sh_type == SHT_STRTAB &&
!strcmp(data.sec_name, ".strtab"))
ret = bpf_fetch_strtab(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
+ !strcmp(data.sec_name, ".BTF"))
+ ret = bpf_fetch_btf_begin(ctx, i, &data);
if (ret < 0) {
fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
i);
@@ -1959,6 +2277,8 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec)
}
}
+ if (bpf_has_btf_data(ctx))
+ bpf_fetch_btf_end(ctx);
if (bpf_has_map_data(ctx)) {
ret = bpf_fetch_maps_end(ctx);
if (ret < 0) {
@@ -2596,6 +2916,10 @@ static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
if (ctx->map_fds[i])
close(ctx->map_fds[i]);
}
+
+ if (ctx->btf_fd)
+ close(ctx->btf_fd);
+ free(ctx->btf.types);
}
static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
--
2.20.1