Blob Blame History Raw
From 80dcb40f8442f79a043c520ae9eef067519ee7ca Mon Sep 17 00:00:00 2001
From: Andrea Claudi <aclaudi@redhat.com>
Date: Thu, 13 Jun 2019 14:37:56 +0200
Subject: [PATCH] bpf: implement bpf to bpf calls support

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361
Upstream Status: iproute2.git commit b5cb33aec65cb

commit b5cb33aec65cb77183abbdfa5b61ecc9877ec776
Author: Daniel Borkmann <daniel@iogearbox.net>
Date:   Wed Jul 18 01:31:21 2018 +0200

    bpf: implement bpf to bpf calls support

    Implement missing bpf to bpf calls support. The loader will
    recognize .text section and handle relocation entries that
    are emitted by LLVM.

    First step is processing of map related relocation entries
    for .text section, and in a second step loader will copy .text
    section into program section and adjust call instruction
    offset accordingly.

    Example with test_xdp_noinline.o from kernel selftests:

     1) Every function as __attribute__ ((always_inline)), rest
        left unchanged:

      # ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test
      # ip a
      1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 xdpgeneric/id:233 qdisc noqueue state UNKNOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
        inet 127.0.0.1/8 scope host lo
           valid_lft forever preferred_lft forever
        inet6 ::1/128 scope host
           valid_lft forever preferred_lft forever
      [...]
      # bpftool prog dump xlated id 233
      [...]
      1669: (2d) if r3 > r2 goto pc+4
      1670: (79) r2 = *(u64 *)(r10 -136)
      1671: (61) r2 = *(u32 *)(r2 +0)
      1672: (63) *(u32 *)(r1 +0) = r2
      1673: (b7) r0 = 1
      1674: (95) exit        <-- 1674 insns total

     2) Every function as __attribute__ ((noinline)), rest
        left unchanged:

      # ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test
      # ip a
      1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 xdpgeneric/id:236 qdisc noqueue state UNKNOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
        inet 127.0.0.1/8 scope host lo
           valid_lft forever preferred_lft forever
        inet6 ::1/128 scope host
           valid_lft forever preferred_lft forever
      [...]
      # bpftool prog dump xlated id 236
      [...]
      1000: (bf) r1 = r6
      1001: (b7) r2 = 24
      1002: (85) call pc+3   <-- pc-relative call insns
      1003: (1f) r7 -= r0
      1004: (bf) r0 = r7
      1005: (95) exit
      1006: (bf) r0 = r1
      1007: (bf) r1 = r2
      1008: (67) r1 <<= 32
      1009: (77) r1 >>= 32
      1010: (bf) r3 = r0
      1011: (6f) r3 <<= r1
      1012: (87) r2 = -r2
      1013: (57) r2 &= 31
      1014: (67) r0 <<= 32
      1015: (77) r0 >>= 32
      1016: (7f) r0 >>= r2
      1017: (4f) r0 |= r3
      1018: (95) exit        <-- 1018 insns total

    Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
    Signed-off-by: David Ahern <dsahern@gmail.com>
---
 lib/bpf.c | 233 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 157 insertions(+), 76 deletions(-)

diff --git a/lib/bpf.c b/lib/bpf.c
index ead8b5a7219f0..1b87490555050 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -1109,7 +1109,8 @@ int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
 #ifdef HAVE_ELF
 struct bpf_elf_prog {
 	enum bpf_prog_type	type;
-	const struct bpf_insn	*insns;
+	struct bpf_insn		*insns;
+	unsigned int		insns_num;
 	size_t			size;
 	const char		*license;
 };
@@ -1135,11 +1136,13 @@ struct bpf_elf_ctx {
 	int			map_fds[ELF_MAX_MAPS];
 	struct bpf_elf_map	maps[ELF_MAX_MAPS];
 	struct bpf_map_ext	maps_ext[ELF_MAX_MAPS];
+	struct bpf_elf_prog	prog_text;
 	int			sym_num;
 	int			map_num;
 	int			map_len;
 	bool			*sec_done;
 	int			sec_maps;
+	int			sec_text;
 	char			license[ELF_MAX_LICENSE_LEN];
 	enum bpf_prog_type	type;
 	__u32			ifindex;
@@ -1904,12 +1907,25 @@ static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
 	return 0;
 }
 
+static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section,
+			  struct bpf_elf_sec_data *data)
+{
+	ctx->sec_text = section;
+	ctx->sec_done[section] = true;
+	return 0;
+}
+
 static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
 {
 	return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
 }
 
-static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
+static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx)
+{
+	return ctx->sec_text;
+}
+
+static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec)
 {
 	struct bpf_elf_sec_data data;
 	int i, ret = -1;
@@ -1925,6 +1941,11 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
 		else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
 			 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
 			ret = bpf_fetch_license(ctx, i, &data);
+		else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
+			 (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
+			 !strcmp(data.sec_name, ".text") &&
+			 check_text_sec)
+			ret = bpf_fetch_text(ctx, i, &data);
 		else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
 			 !strcmp(data.sec_name, ".symtab"))
 			ret = bpf_fetch_symtab(ctx, i, &data);
@@ -1969,17 +1990,18 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
 		ret = bpf_fill_section_data(ctx, i, &data);
 		if (ret < 0 ||
 		    !(data.sec_hdr.sh_type == SHT_PROGBITS &&
-		      data.sec_hdr.sh_flags & SHF_EXECINSTR &&
+		      (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
 		      !strcmp(data.sec_name, section)))
 			continue;
 
 		*sseen = true;
 
 		memset(&prog, 0, sizeof(prog));
-		prog.type    = ctx->type;
-		prog.insns   = data.sec_data->d_buf;
-		prog.size    = data.sec_data->d_size;
-		prog.license = ctx->license;
+		prog.type      = ctx->type;
+		prog.license   = ctx->license;
+		prog.size      = data.sec_data->d_size;
+		prog.insns_num = prog.size / sizeof(struct bpf_insn);
+		prog.insns     = data.sec_data->d_buf;
 
 		fd = bpf_prog_attach(section, &prog, ctx);
 		if (fd < 0)
@@ -1992,84 +2014,120 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
 	return fd;
 }
 
-struct bpf_tail_call_props {
-	unsigned int total;
-	unsigned int jited;
+struct bpf_relo_props {
+	struct bpf_tail_call {
+		unsigned int total;
+		unsigned int jited;
+	} tc;
+	int main_num;
 };
 
+static int bpf_apply_relo_map(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
+			      GElf_Rel *relo, GElf_Sym *sym,
+			      struct bpf_relo_props *props)
+{
+	unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
+	unsigned int map_idx = sym->st_value / ctx->map_len;
+
+	if (insn_off >= prog->insns_num)
+		return -EINVAL;
+	if (prog->insns[insn_off].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+		fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
+			insn_off);
+		return -EINVAL;
+	}
+
+	if (map_idx >= ARRAY_SIZE(ctx->map_fds))
+		return -EINVAL;
+	if (!ctx->map_fds[map_idx])
+		return -EINVAL;
+	if (ctx->maps[map_idx].type == BPF_MAP_TYPE_PROG_ARRAY) {
+		props->tc.total++;
+		if (ctx->maps_ext[map_idx].owner.jited ||
+		    (ctx->maps_ext[map_idx].owner.type == 0 &&
+		     ctx->cfg.jit_enabled))
+			props->tc.jited++;
+	}
+
+	prog->insns[insn_off].src_reg = BPF_PSEUDO_MAP_FD;
+	prog->insns[insn_off].imm = ctx->map_fds[map_idx];
+	return 0;
+}
+
+static int bpf_apply_relo_call(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
+			       GElf_Rel *relo, GElf_Sym *sym,
+			       struct bpf_relo_props *props)
+{
+	unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
+	struct bpf_elf_prog *prog_text = &ctx->prog_text;
+
+	if (insn_off >= prog->insns_num)
+		return -EINVAL;
+	if (prog->insns[insn_off].code != (BPF_JMP | BPF_CALL) &&
+	    prog->insns[insn_off].src_reg != BPF_PSEUDO_CALL) {
+		fprintf(stderr, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n",
+			insn_off);
+		return -EINVAL;
+	}
+
+	if (!props->main_num) {
+		struct bpf_insn *insns = realloc(prog->insns,
+						 prog->size + prog_text->size);
+		if (!insns)
+			return -ENOMEM;
+
+		memcpy(insns + prog->insns_num, prog_text->insns,
+		       prog_text->size);
+		props->main_num = prog->insns_num;
+		prog->insns = insns;
+		prog->insns_num += prog_text->insns_num;
+		prog->size += prog_text->size;
+	}
+
+	prog->insns[insn_off].imm += props->main_num - insn_off;
+	return 0;
+}
+
 static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
 			       struct bpf_elf_sec_data *data_relo,
-			       struct bpf_elf_sec_data *data_insn,
-			       struct bpf_tail_call_props *props)
+			       struct bpf_elf_prog *prog,
+			       struct bpf_relo_props *props)
 {
-	Elf_Data *idata = data_insn->sec_data;
 	GElf_Shdr *rhdr = &data_relo->sec_hdr;
 	int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
-	struct bpf_insn *insns = idata->d_buf;
-	unsigned int num_insns = idata->d_size / sizeof(*insns);
 
 	for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
-		unsigned int ioff, rmap;
 		GElf_Rel relo;
 		GElf_Sym sym;
+		int ret = -EIO;
 
 		if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
 			return -EIO;
-
-		ioff = relo.r_offset / sizeof(struct bpf_insn);
-		if (ioff >= num_insns ||
-		    insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
-			fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
-				ioff);
-			fprintf(stderr, " - Current section: %s\n", data_relo->sec_name);
-			if (ioff < num_insns &&
-			    insns[ioff].code == (BPF_JMP | BPF_CALL))
-				fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
-			return -EINVAL;
-		}
-
 		if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
 			return -EIO;
-		if (sym.st_shndx != ctx->sec_maps) {
-			fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
-				relo_ent, sym.st_shndx);
-			return -EIO;
-		}
 
-		rmap = sym.st_value / ctx->map_len;
-		if (rmap >= ARRAY_SIZE(ctx->map_fds))
-			return -EINVAL;
-		if (!ctx->map_fds[rmap])
-			return -EINVAL;
-		if (ctx->maps[rmap].type == BPF_MAP_TYPE_PROG_ARRAY) {
-			props->total++;
-			if (ctx->maps_ext[rmap].owner.jited ||
-			    (ctx->maps_ext[rmap].owner.type == 0 &&
-			     ctx->cfg.jit_enabled))
-				props->jited++;
-		}
-
-		if (ctx->verbose)
-			fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
-				bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
-				data_insn->sec_name, ioff);
-
-		insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
-		insns[ioff].imm     = ctx->map_fds[rmap];
+		if (sym.st_shndx == ctx->sec_maps)
+			ret = bpf_apply_relo_map(ctx, prog, &relo, &sym, props);
+		else if (sym.st_shndx == ctx->sec_text)
+			ret = bpf_apply_relo_call(ctx, prog, &relo, &sym, props);
+		else
+			fprintf(stderr, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n",
+				relo_ent, sym.st_shndx);
+		if (ret < 0)
+			return ret;
 	}
 
 	return 0;
 }
 
 static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
-			       bool *lderr, bool *sseen)
+			       bool *lderr, bool *sseen, struct bpf_elf_prog *prog)
 {
 	struct bpf_elf_sec_data data_relo, data_insn;
-	struct bpf_elf_prog prog;
 	int ret, idx, i, fd = -1;
 
 	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
-		struct bpf_tail_call_props props = {};
+		struct bpf_relo_props props = {};
 
 		ret = bpf_fill_section_data(ctx, i, &data_relo);
 		if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
@@ -2080,40 +2138,54 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
 		ret = bpf_fill_section_data(ctx, idx, &data_insn);
 		if (ret < 0 ||
 		    !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
-		      data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
+		      (data_insn.sec_hdr.sh_flags & SHF_EXECINSTR) &&
 		      !strcmp(data_insn.sec_name, section)))
 			continue;
+		if (sseen)
+			*sseen = true;
+
+		memset(prog, 0, sizeof(*prog));
+		prog->type = ctx->type;
+		prog->license = ctx->license;
+		prog->size = data_insn.sec_data->d_size;
+		prog->insns_num = prog->size / sizeof(struct bpf_insn);
+		prog->insns = malloc(prog->size);
+		if (!prog->insns) {
+			*lderr = true;
+			return -ENOMEM;
+		}
 
-		*sseen = true;
+		memcpy(prog->insns, data_insn.sec_data->d_buf, prog->size);
 
-		ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn, &props);
+		ret = bpf_apply_relo_data(ctx, &data_relo, prog, &props);
 		if (ret < 0) {
 			*lderr = true;
+			if (ctx->sec_text != idx)
+				free(prog->insns);
 			return ret;
 		}
+		if (ctx->sec_text == idx) {
+			fd = 0;
+			goto out;
+		}
 
-		memset(&prog, 0, sizeof(prog));
-		prog.type    = ctx->type;
-		prog.insns   = data_insn.sec_data->d_buf;
-		prog.size    = data_insn.sec_data->d_size;
-		prog.license = ctx->license;
-
-		fd = bpf_prog_attach(section, &prog, ctx);
+		fd = bpf_prog_attach(section, prog, ctx);
+		free(prog->insns);
 		if (fd < 0) {
 			*lderr = true;
-			if (props.total) {
+			if (props.tc.total) {
 				if (ctx->cfg.jit_enabled &&
-				    props.total != props.jited)
+				    props.tc.total != props.tc.jited)
 					fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
-						props.jited, props.total);
+						props.tc.jited, props.tc.total);
 				if (!ctx->cfg.jit_enabled &&
-				    props.jited)
+				    props.tc.jited)
 					fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
-						props.jited, props.total);
+						props.tc.jited, props.tc.total);
 			}
 			return fd;
 		}
-
+out:
 		ctx->sec_done[i]   = true;
 		ctx->sec_done[idx] = true;
 		break;
@@ -2125,10 +2197,18 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
 static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
 {
 	bool lderr = false, sseen = false;
+	struct bpf_elf_prog prog;
 	int ret = -1;
 
-	if (bpf_has_map_data(ctx))
-		ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen);
+	if (bpf_has_call_data(ctx)) {
+		ret = bpf_fetch_prog_relo(ctx, ".text", &lderr, NULL,
+					  &ctx->prog_text);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (bpf_has_map_data(ctx) || bpf_has_call_data(ctx))
+		ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen, &prog);
 	if (ret < 0 && !lderr)
 		ret = bpf_fetch_prog(ctx, section, &sseen);
 	if (ret < 0 && !sseen)
@@ -2525,6 +2605,7 @@ static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
 
 	bpf_hash_destroy(ctx);
 
+	free(ctx->prog_text.insns);
 	free(ctx->sec_done);
 	free(ctx->log);
 
@@ -2546,7 +2627,7 @@ static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
 		return ret;
 	}
 
-	ret = bpf_fetch_ancillary(ctx);
+	ret = bpf_fetch_ancillary(ctx, strcmp(section, ".text"));
 	if (ret < 0) {
 		fprintf(stderr, "Error fetching ELF ancillary data!\n");
 		goto out;
-- 
2.20.1