teknoraver / rpms / systemd

Forked from rpms/systemd 4 months ago
Clone

Blame SOURCES/13496-fb.patch

167d2b
From 6999572c06303057dbdc16c04c523f407aec08bd Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Wed, 16 Sep 2020 15:58:04 -0700
167d2b
Subject: [PATCH 1/9] shared: add bpf-program helpers
167d2b
167d2b
Add helpers to:
167d2b
- Create new BPFProgram instance from a path in bpf
167d2b
filesystem and bpf attach type;
167d2b
- Pin a program to bpf fs;
167d2b
- Get BPF program ID by BPF program FD.
167d2b
---
167d2b
 src/shared/bpf-program.c | 80 ++++++++++++++++++++++++++++++++++++++++
167d2b
 src/shared/bpf-program.h |  5 ++-
167d2b
 2 files changed, 84 insertions(+), 1 deletion(-)
167d2b
167d2b
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
167d2b
index 10239142af..d67ada23b0 100644
167d2b
--- a/src/shared/bpf-program.c
167d2b
+++ b/src/shared/bpf-program.c
167d2b
@@ -12,6 +12,26 @@
167d2b
 #include "missing_syscall.h"
167d2b
 #include "path-util.h"
167d2b
 
167d2b
+ /* struct bpf_prog_info info must be initialized since its value is both input and output
167d2b
+  * for BPF_OBJ_GET_INFO_BY_FD syscall. */
167d2b
+static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, uint32_t info_len) {
167d2b
+        union bpf_attr attr;
167d2b
+
167d2b
+        /* Explicitly memset to zero since some compilers may produce non-zero-initialized padding when
167d2b
+         * structured initialization is used.
167d2b
+         * Refer to https://github.com/systemd/systemd/issues/18164
167d2b
+         */
167d2b
+        zero(attr);
167d2b
+        attr.info.bpf_fd = prog_fd;
167d2b
+        attr.info.info_len = info_len;
167d2b
+        attr.info.info = PTR_TO_UINT64(info);
167d2b
+
167d2b
+        if (bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)) < 0)
167d2b
+                return -errno;
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
 int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
167d2b
         _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
167d2b
 
167d2b
@@ -28,6 +48,38 @@ int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
167d2b
         return 0;
167d2b
 }
167d2b
 
167d2b
+int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret) {
167d2b
+        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
167d2b
+        struct bpf_prog_info info = {};
167d2b
+        int r;
167d2b
+
167d2b
+        assert(path);
167d2b
+        assert(ret);
167d2b
+
167d2b
+        p = new(BPFProgram, 1);
167d2b
+        if (!p)
167d2b
+                return -ENOMEM;
167d2b
+
167d2b
+        *p = (BPFProgram) {
167d2b
+                .prog_type = BPF_PROG_TYPE_UNSPEC,
167d2b
+                .n_ref = 1,
167d2b
+                .kernel_fd = -1,
167d2b
+        };
167d2b
+
167d2b
+        r = bpf_program_load_from_bpf_fs(p, path);
167d2b
+        if (r < 0)
167d2b
+                return r;
167d2b
+
167d2b
+        r = bpf_program_get_info_by_fd(p->kernel_fd, &info, sizeof(info));
167d2b
+        if (r < 0)
167d2b
+                return r;
167d2b
+
167d2b
+        p->prog_type = info.type;
167d2b
+        *ret = TAKE_PTR(p);
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
 static BPFProgram *bpf_program_free(BPFProgram *p) {
167d2b
         assert(p);
167d2b
 
167d2b
@@ -254,3 +306,31 @@ int bpf_map_lookup_element(int fd, const void *key, void *value) {
167d2b
 
167d2b
         return 0;
167d2b
 }
167d2b
+
167d2b
+int bpf_program_pin(int prog_fd, const char *bpffs_path) {
167d2b
+        union bpf_attr attr;
167d2b
+
167d2b
+        zero(attr);
167d2b
+        attr.pathname = PTR_TO_UINT64((void *) bpffs_path);
167d2b
+        attr.bpf_fd = prog_fd;
167d2b
+
167d2b
+        if (bpf(BPF_OBJ_PIN, &attr, sizeof(attr)) < 0)
167d2b
+                return -errno;
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
+int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id) {
167d2b
+        struct bpf_prog_info info = {};
167d2b
+        int r;
167d2b
+
167d2b
+        assert(ret_id);
167d2b
+
167d2b
+        r = bpf_program_get_info_by_fd(prog_fd, &info, sizeof(info));
167d2b
+        if (r < 0)
167d2b
+                return r;
167d2b
+
167d2b
+        *ret_id = info.id;
167d2b
+
167d2b
+        return 0;
167d2b
+};
167d2b
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
167d2b
index eef77f9d8e..243cef923f 100644
167d2b
--- a/src/shared/bpf-program.h
167d2b
+++ b/src/shared/bpf-program.h
167d2b
@@ -26,8 +26,9 @@ struct BPFProgram {
167d2b
 };
167d2b
 
167d2b
 int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
167d2b
-BPFProgram *bpf_program_unref(BPFProgram *p);
167d2b
+int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret);
167d2b
 BPFProgram *bpf_program_ref(BPFProgram *p);
167d2b
+BPFProgram *bpf_program_unref(BPFProgram *p);
167d2b
 
167d2b
 int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
167d2b
 int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
167d2b
@@ -35,6 +36,8 @@ int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path);
167d2b
 
167d2b
 int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
167d2b
 int bpf_program_cgroup_detach(BPFProgram *p);
167d2b
+int bpf_program_pin(int prog_fd, const char *bpffs_path);
167d2b
+int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id);
167d2b
 
167d2b
 int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
167d2b
 int bpf_map_update_element(int fd, const void *key, void *value);
167d2b
-- 
167d2b
2.30.2
167d2b
167d2b
167d2b
From b04b09640c624df8f64944dead8d59faa0521d2e Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Thu, 4 Feb 2021 00:02:07 -0800
167d2b
Subject: [PATCH 2/9] shared: bpf_attach_type {from,to} string
167d2b
167d2b
Introduce bpf_cgroup_attach_type_table with accustomed attached type
167d2b
names also used in bpftool.
167d2b
Add bpf_cgroup_attach_type_{from|to}_string helpers to convert from|to
167d2b
string representation of pinned bpf program, e.g.
167d2b
"egress:/sys/fs/bpf/egress-hook" for
167d2b
/sys/fs/bpf/egress-hook path and BPF_CGROUP_INET_EGRESS attach type.
167d2b
---
167d2b
 src/shared/bpf-program.c | 24 ++++++++++++++++++++++++
167d2b
 src/shared/bpf-program.h |  3 +++
167d2b
 2 files changed, 27 insertions(+)
167d2b
167d2b
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
167d2b
index d67ada23b0..a8a34521fd 100644
167d2b
--- a/src/shared/bpf-program.c
167d2b
+++ b/src/shared/bpf-program.c
167d2b
@@ -11,6 +11,30 @@
167d2b
 #include "memory-util.h"
167d2b
 #include "missing_syscall.h"
167d2b
 #include "path-util.h"
167d2b
+#include "string-table.h"
167d2b
+
167d2b
+static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
167d2b
+        [BPF_CGROUP_INET_INGRESS] =     "ingress",
167d2b
+        [BPF_CGROUP_INET_EGRESS] =      "egress",
167d2b
+        [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
167d2b
+        [BPF_CGROUP_SOCK_OPS] =         "sock_ops",
167d2b
+        [BPF_CGROUP_DEVICE] =           "device",
167d2b
+        [BPF_CGROUP_INET4_BIND] =       "bind4",
167d2b
+        [BPF_CGROUP_INET6_BIND] =       "bind6",
167d2b
+        [BPF_CGROUP_INET4_CONNECT] =    "connect4",
167d2b
+        [BPF_CGROUP_INET6_CONNECT] =    "connect6",
167d2b
+        [BPF_CGROUP_INET4_POST_BIND] =  "post_bind4",
167d2b
+        [BPF_CGROUP_INET6_POST_BIND] =  "post_bind6",
167d2b
+        [BPF_CGROUP_UDP4_SENDMSG] =     "sendmsg4",
167d2b
+        [BPF_CGROUP_UDP6_SENDMSG] =     "sendmsg6",
167d2b
+        [BPF_CGROUP_SYSCTL] =           "sysctl",
167d2b
+        [BPF_CGROUP_UDP4_RECVMSG] =     "recvmsg4",
167d2b
+        [BPF_CGROUP_UDP6_RECVMSG] =     "recvmsg6",
167d2b
+        [BPF_CGROUP_GETSOCKOPT] =       "getsockopt",
167d2b
+        [BPF_CGROUP_SETSOCKOPT] =       "setsockopt",
167d2b
+};
167d2b
+
167d2b
+DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);
167d2b
 
167d2b
  /* struct bpf_prog_info info must be initialized since its value is both input and output
167d2b
   * for BPF_OBJ_GET_INFO_BY_FD syscall. */
167d2b
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
167d2b
index 243cef923f..86fd338c93 100644
167d2b
--- a/src/shared/bpf-program.h
167d2b
+++ b/src/shared/bpf-program.h
167d2b
@@ -43,4 +43,7 @@ int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size
167d2b
 int bpf_map_update_element(int fd, const void *key, void *value);
167d2b
 int bpf_map_lookup_element(int fd, const void *key, void *value);
167d2b
 
167d2b
+int bpf_cgroup_attach_type_from_string(const char *str) _pure_;
167d2b
+const char *bpf_cgroup_attach_type_to_string(int attach_type) _const_;
167d2b
+
167d2b
 DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
167d2b
-- 
167d2b
2.30.2
167d2b
167d2b
167d2b
From 2ab40fe0dc0f5ec83fe73bc750e9b3f5aabf1fb9 Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Mon, 1 Mar 2021 16:56:04 -0800
167d2b
Subject: [PATCH 3/9] cgroup: add foreign program to cgroup context
167d2b
167d2b
- Store foreign bpf programs in cgroup context. A program is considered
167d2b
foreign if it was loaded to a kernel by an entity external to systemd,
167d2b
so systemd is responsible only for attach and detach paths.
167d2b
- Support the case of pinned bpf programs: pinning to bpffs so a program
167d2b
is kept loaded to the kernel even when program fd is closed by a user
167d2b
application is a common way to extend program's lifetime.
167d2b
- Aadd linked list node struct with attach type and bpffs path
167d2b
fields.
167d2b
---
167d2b
 src/core/cgroup.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
167d2b
 src/core/cgroup.h | 10 ++++++++++
167d2b
 2 files changed, 55 insertions(+)
167d2b
167d2b
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
167d2b
index 3ed6ac09ff..a0af50518d 100644
167d2b
--- a/src/core/cgroup.c
167d2b
+++ b/src/core/cgroup.c
167d2b
@@ -190,6 +190,15 @@ void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockI
167d2b
         free(b);
167d2b
 }
167d2b
 
167d2b
+void cgroup_context_remove_bpf_foreign_program(CGroupContext *c, CGroupBPFForeignProgram *p) {
167d2b
+        assert(c);
167d2b
+        assert(p);
167d2b
+
167d2b
+        LIST_REMOVE(programs, c->bpf_foreign_programs, p);
167d2b
+        free(p->bpffs_path);
167d2b
+        free(p);
167d2b
+}
167d2b
+
167d2b
 void cgroup_context_done(CGroupContext *c) {
167d2b
         assert(c);
167d2b
 
167d2b
@@ -217,6 +226,9 @@ void cgroup_context_done(CGroupContext *c) {
167d2b
         c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
167d2b
         c->ip_filters_egress = strv_free(c->ip_filters_egress);
167d2b
 
167d2b
+        while (c->bpf_foreign_programs)
167d2b
+                cgroup_context_remove_bpf_foreign_program(c, c->bpf_foreign_programs);
167d2b
+
167d2b
         cpu_set_reset(&c->cpuset_cpus);
167d2b
         cpu_set_reset(&c->cpuset_mems);
167d2b
 }
167d2b
@@ -360,6 +372,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
167d2b
         CGroupIODeviceLatency *l;
167d2b
         CGroupBlockIODeviceBandwidth *b;
167d2b
         CGroupBlockIODeviceWeight *w;
167d2b
+        CGroupBPFForeignProgram *p;
167d2b
         CGroupDeviceAllow *a;
167d2b
         CGroupContext *c;
167d2b
         IPAddressAccessItem *iaai;
167d2b
@@ -544,6 +557,10 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
167d2b
 
167d2b
         STRV_FOREACH(path, c->ip_filters_egress)
167d2b
                 fprintf(f, "%sIPEgressFilterPath: %s\n", prefix, *path);
167d2b
+
167d2b
+        LIST_FOREACH(programs, p, c->bpf_foreign_programs)
167d2b
+                fprintf(f, "%sBPFProgram: %s:%s",
167d2b
+                        prefix, bpf_cgroup_attach_type_to_string(p->attach_type), p->bpffs_path);
167d2b
 }
167d2b
 
167d2b
 int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
167d2b
@@ -575,6 +592,34 @@ int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode)
167d2b
         return 0;
167d2b
 }
167d2b
 
167d2b
+int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *bpffs_path) {
167d2b
+        CGroupBPFForeignProgram *p;
167d2b
+        _cleanup_free_ char *d = NULL;
167d2b
+
167d2b
+        assert(c);
167d2b
+        assert(bpffs_path);
167d2b
+
167d2b
+        if (!path_is_normalized(bpffs_path) || !path_is_absolute(bpffs_path))
167d2b
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not normalized: %m");
167d2b
+
167d2b
+        d = strdup(bpffs_path);
167d2b
+        if (!d)
167d2b
+                return log_oom();
167d2b
+
167d2b
+        p = new(CGroupBPFForeignProgram, 1);
167d2b
+        if (!p)
167d2b
+                return log_oom();
167d2b
+
167d2b
+        *p = (CGroupBPFForeignProgram) {
167d2b
+                .attach_type = attach_type,
167d2b
+                .bpffs_path = TAKE_PTR(d),
167d2b
+        };
167d2b
+
167d2b
+        LIST_PREPEND(programs, c->bpf_foreign_programs, TAKE_PTR(p));
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
 #define UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(entry)                       \
167d2b
         uint64_t unit_get_ancestor_##entry(Unit *u) {                   \
167d2b
                 CGroupContext *c;                                       \
167d2b
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
167d2b
index fa79ba1523..be3060eba7 100644
167d2b
--- a/src/core/cgroup.h
167d2b
+++ b/src/core/cgroup.h
167d2b
@@ -31,6 +31,7 @@ typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
167d2b
 typedef struct CGroupIODeviceLatency CGroupIODeviceLatency;
167d2b
 typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
167d2b
 typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
167d2b
+typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram;
167d2b
 
167d2b
 typedef enum CGroupDevicePolicy {
167d2b
         /* When devices listed, will allow those, plus built-in ones, if none are listed will allow
167d2b
@@ -94,6 +95,12 @@ struct CGroupBlockIODeviceBandwidth {
167d2b
         uint64_t wbps;
167d2b
 };
167d2b
 
167d2b
+struct CGroupBPFForeignProgram {
167d2b
+        LIST_FIELDS(CGroupBPFForeignProgram, programs);
167d2b
+        uint32_t attach_type;
167d2b
+        char *bpffs_path;
167d2b
+};
167d2b
+
167d2b
 struct CGroupContext {
167d2b
         bool cpu_accounting;
167d2b
         bool io_accounting;
167d2b
@@ -142,6 +149,7 @@ struct CGroupContext {
167d2b
 
167d2b
         char **ip_filters_ingress;
167d2b
         char **ip_filters_egress;
167d2b
+        LIST_HEAD(CGroupBPFForeignProgram, bpf_foreign_programs);
167d2b
 
167d2b
         /* For legacy hierarchies */
167d2b
         uint64_t cpu_shares;
167d2b
@@ -202,8 +210,10 @@ void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *
167d2b
 void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l);
167d2b
 void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
167d2b
 void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
167d2b
+void cgroup_context_remove_bpf_foreign_program(CGroupContext *c, CGroupBPFForeignProgram *p);
167d2b
 
167d2b
 int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
167d2b
+int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path);
167d2b
 
167d2b
 void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path);
167d2b
 
167d2b
-- 
167d2b
2.30.2
167d2b
167d2b
167d2b
From 42b3c33e312d70fec24b5daa455ab4abdcab81b4 Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Wed, 16 Sep 2020 15:58:04 -0700
167d2b
Subject: [PATCH 4/9] core: add bpf-foreign unit helpers
167d2b
167d2b
- Introduce support of cgroup-bpf programs managed (i.e. compiled,
167d2b
loaded to and unloaded from kernel) externally. Systemd is only
167d2b
responsible for attaching programs to unit cgroup hence the name
167d2b
'foreign'.
167d2b
167d2b
Foreign BPF programs are identified by bpf program ID and attach type.
167d2b
167d2b
systemd:
167d2b
- Gets kernel FD of BPF program;
167d2b
- Makes a unique identifier of BPF program from BPF attach type and
167d2b
program ID. Same program IDs mean the same program, i.e the same
167d2b
chunk of kernel memory. Even if the same program is passed multiple
167d2b
times, identical (program_id, attach_type) instances are collapsed
167d2b
into one;
167d2b
- Attaches programs to unit cgroup.
167d2b
---
167d2b
 src/core/bpf-foreign.c | 151 +++++++++++++++++++++++++++++++++++++++++
167d2b
 src/core/bpf-foreign.h |  12 ++++
167d2b
 src/core/meson.build   |   2 +
167d2b
 src/core/unit.c        |   3 +
167d2b
 src/core/unit.h        |   4 ++
167d2b
 5 files changed, 172 insertions(+)
167d2b
 create mode 100644 src/core/bpf-foreign.c
167d2b
 create mode 100644 src/core/bpf-foreign.h
167d2b
167d2b
diff --git a/src/core/bpf-foreign.c b/src/core/bpf-foreign.c
167d2b
new file mode 100644
167d2b
index 0000000000..98655bda3c
167d2b
--- /dev/null
167d2b
+++ b/src/core/bpf-foreign.c
167d2b
@@ -0,0 +1,151 @@
167d2b
+/* SPDX-License-Identifier: LGPL-2.1+ */
167d2b
+
167d2b
+#include "bpf-foreign.h"
167d2b
+#include "bpf-program.h"
167d2b
+#include "cgroup.h"
167d2b
+#include "memory-util.h"
167d2b
+#include "mountpoint-util.h"
167d2b
+#include "set.h"
167d2b
+
167d2b
+typedef struct BPFForeignKey BPFForeignKey;
167d2b
+struct BPFForeignKey {
167d2b
+        uint32_t prog_id;
167d2b
+        uint32_t attach_type;
167d2b
+};
167d2b
+
167d2b
+static int bpf_foreign_key_new(uint32_t prog_id,
167d2b
+                enum bpf_attach_type attach_type,
167d2b
+                BPFForeignKey **ret) {
167d2b
+        _cleanup_free_ BPFForeignKey *p = NULL;
167d2b
+
167d2b
+        assert(ret);
167d2b
+
167d2b
+        p = new(BPFForeignKey, 1);
167d2b
+        if (!p)
167d2b
+                return log_oom();
167d2b
+
167d2b
+        *p = (BPFForeignKey) {
167d2b
+                .prog_id = prog_id,
167d2b
+                .attach_type = attach_type,
167d2b
+        };
167d2b
+
167d2b
+        *ret = TAKE_PTR(p);
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
+static int bpf_foreign_key_compare_func(const BPFForeignKey *a, const BPFForeignKey *b) {
167d2b
+        int r = CMP(a->prog_id, b->prog_id);
167d2b
+        if (r != 0)
167d2b
+                return r;
167d2b
+
167d2b
+        return CMP(a->attach_type, b->attach_type);
167d2b
+}
167d2b
+
167d2b
+static void bpf_foreign_key_hash_func(const BPFForeignKey *p, struct siphash *h) {
167d2b
+        siphash24_compress(&p->prog_id, sizeof(p->prog_id), h);
167d2b
+        siphash24_compress(&p->attach_type, sizeof(p->attach_type), h);
167d2b
+}
167d2b
+
167d2b
+DEFINE_PRIVATE_HASH_OPS_FULL(bpf_foreign_by_key_hash_ops,
167d2b
+                BPFForeignKey, bpf_foreign_key_hash_func, bpf_foreign_key_compare_func, free,
167d2b
+                BPFProgram, bpf_program_unref);
167d2b
+
167d2b
+static int attach_programs(Unit *u, const char *path, Hashmap* foreign_by_key, uint32_t attach_flags) {
167d2b
+        const BPFForeignKey *key;
167d2b
+        BPFProgram *prog;
167d2b
+        int r;
167d2b
+
167d2b
+        assert(u);
167d2b
+
167d2b
+        HASHMAP_FOREACH_KEY(prog, key, foreign_by_key) {
167d2b
+                r = bpf_program_cgroup_attach(prog, key->attach_type, path, attach_flags);
167d2b
+                if (r < 0)
167d2b
+                        return log_unit_error_errno(u, r, "Attaching foreign BPF program to cgroup %s failed: %m", path);
167d2b
+        }
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
+/*
167d2b
+ * Prepare foreign BPF program for installation:
167d2b
+ * - Load the program from BPF filesystem to the kernel;
167d2b
+ * - Store program FD identified by program ID and attach type in the unit.
167d2b
+ */
167d2b
+static int bpf_foreign_prepare(
167d2b
+                Unit *u,
167d2b
+                enum bpf_attach_type attach_type,
167d2b
+                const char *bpffs_path) {
167d2b
+        _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
167d2b
+        _cleanup_free_ BPFForeignKey *key = NULL;
167d2b
+        uint32_t prog_id;
167d2b
+        int r;
167d2b
+
167d2b
+        assert(u);
167d2b
+        assert(bpffs_path);
167d2b
+
167d2b
+        r = bpf_program_new_from_bpffs_path(bpffs_path, &prog;;
167d2b
+        if (r < 0)
167d2b
+                return log_unit_error_errno(u, r, "Failed to create foreign BPFProgram: %m");
167d2b
+
167d2b
+        r = bpf_program_get_id_by_fd(prog->kernel_fd, &prog_id);
167d2b
+        if (r < 0)
167d2b
+                return log_unit_error_errno(u, r, "Failed to get BPF program id by fd: %m");
167d2b
+
167d2b
+        r = bpf_foreign_key_new(prog_id, attach_type, &key);
167d2b
+        if (r < 0)
167d2b
+                return log_unit_error_errno(u, r,
167d2b
+                                "Failed to create foreign BPF program key from path '%s': %m", bpffs_path);
167d2b
+
167d2b
+        r = hashmap_ensure_put(&u->bpf_foreign_by_key, &bpf_foreign_by_key_hash_ops, key, prog);
167d2b
+        if (r == -EEXIST) {
167d2b
+                log_unit_warning_errno(u, r, "Foreign BPF program already exists, ignoring: %m");
167d2b
+                return 0;
167d2b
+        }
167d2b
+        if (r < 0)
167d2b
+                return log_unit_error_errno(u, r, "Failed to put foreign BPFProgram into map: %m");
167d2b
+
167d2b
+        TAKE_PTR(key);
167d2b
+        TAKE_PTR(prog);
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
+int bpf_foreign_supported(void) {
167d2b
+        int r;
167d2b
+
167d2b
+        r = cg_all_unified();
167d2b
+        if (r <= 0)
167d2b
+                return r;
167d2b
+
167d2b
+        return path_is_mount_point("/sys/fs/bpf", NULL, 0);
167d2b
+}
167d2b
+
167d2b
+int bpf_foreign_install(Unit *u) {
167d2b
+        _cleanup_free_ char *cgroup_path = NULL;
167d2b
+        CGroupBPFForeignProgram *p;
167d2b
+        CGroupContext *cc;
167d2b
+        int r;
167d2b
+
167d2b
+        assert(u);
167d2b
+
167d2b
+        cc = unit_get_cgroup_context(u);
167d2b
+        if (!cc)
167d2b
+                return 0;
167d2b
+
167d2b
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
167d2b
+        if (r < 0)
167d2b
+                return log_unit_error_errno(u, r, "Failed to get cgroup path: %m");
167d2b
+
167d2b
+        LIST_FOREACH(programs, p, cc->bpf_foreign_programs) {
167d2b
+                r = bpf_foreign_prepare(u, p->attach_type, p->bpffs_path);
167d2b
+                if (r < 0)
167d2b
+                        return log_unit_error_errno(u, r, "Failed to prepare foreign BPF hashmap: %m");
167d2b
+        }
167d2b
+
167d2b
+        r = attach_programs(u, cgroup_path, u->bpf_foreign_by_key, BPF_F_ALLOW_MULTI);
167d2b
+        if (r < 0)
167d2b
+                  return log_unit_error_errno(u, r, "Failed to install foreign BPF programs: %m");
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
diff --git a/src/core/bpf-foreign.h b/src/core/bpf-foreign.h
167d2b
new file mode 100644
167d2b
index 0000000000..7704986e3e
167d2b
--- /dev/null
167d2b
+++ b/src/core/bpf-foreign.h
167d2b
@@ -0,0 +1,12 @@
167d2b
+/* SPDX-License-Identifier: LGPL-2.1+ */
167d2b
+
167d2b
+#pragma once
167d2b
+
167d2b
+#include "unit.h"
167d2b
+
167d2b
+int bpf_foreign_supported(void);
167d2b
+/*
167d2b
+ * Attach cgroup-bpf programs foreign to systemd, i.e. loaded to the kernel by an entity
167d2b
+ * external to systemd.
167d2b
+ */
167d2b
+int bpf_foreign_install(Unit *u);
167d2b
diff --git a/src/core/meson.build b/src/core/meson.build
167d2b
index a389c906b3..a1294f3a72 100644
167d2b
--- a/src/core/meson.build
167d2b
+++ b/src/core/meson.build
167d2b
@@ -11,6 +11,8 @@ libcore_sources = '''
167d2b
         bpf-devices.h
167d2b
         bpf-firewall.c
167d2b
         bpf-firewall.h
167d2b
+        bpf-foreign.c
167d2b
+        bpf-foreign.h
167d2b
         cgroup.c
167d2b
         cgroup.h
167d2b
         core-varlink.c
167d2b
diff --git a/src/core/unit.c b/src/core/unit.c
167d2b
index c212f1043d..b7141b29af 100644
167d2b
--- a/src/core/unit.c
167d2b
+++ b/src/core/unit.c
167d2b
@@ -11,6 +11,7 @@
167d2b
 #include "all-units.h"
167d2b
 #include "alloc-util.h"
167d2b
 #include "bpf-firewall.h"
167d2b
+#include "bpf-foreign.h"
167d2b
 #include "bus-common-errors.h"
167d2b
 #include "bus-util.h"
167d2b
 #include "cgroup-setup.h"
167d2b
@@ -723,6 +724,8 @@ Unit* unit_free(Unit *u) {
167d2b
         set_free(u->ip_bpf_custom_ingress_installed);
167d2b
         set_free(u->ip_bpf_custom_egress_installed);
167d2b
 
167d2b
+        hashmap_free(u->bpf_foreign_by_key);
167d2b
+
167d2b
         bpf_program_unref(u->bpf_device_control_installed);
167d2b
 
167d2b
         condition_free_list(u->conditions);
167d2b
diff --git a/src/core/unit.h b/src/core/unit.h
167d2b
index 264431d04d..6de529af92 100644
167d2b
--- a/src/core/unit.h
167d2b
+++ b/src/core/unit.h
167d2b
@@ -305,6 +305,10 @@ typedef struct Unit {
167d2b
         Set *ip_bpf_custom_egress;
167d2b
         Set *ip_bpf_custom_egress_installed;
167d2b
 
167d2b
+        /* BPF programs managed (e.g. loaded to kernel) by an entity external to systemd,
167d2b
+         * attached to unit cgroup by provided program fd and attach type. */
167d2b
+        Hashmap *bpf_foreign_by_key;
167d2b
+
167d2b
         uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
167d2b
 
167d2b
         /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
167d2b
-- 
167d2b
2.30.2
167d2b
167d2b
167d2b
From 4d84c1af52683f7272e021fb4c6e68148d64e33b Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Thu, 4 Feb 2021 00:03:08 -0800
167d2b
Subject: [PATCH 5/9] core: add bpf-foreign cgroup mask and harness
167d2b
167d2b
Add CGROUP_MASK_BPF_FOREIGN to CGROUP_MASK_BPF and standard cgroup
167d2b
context harness.
167d2b
---
167d2b
 src/basic/cgroup-util.c     |  1 +
167d2b
 src/basic/cgroup-util.h     |  4 +++-
167d2b
 src/core/cgroup.c           | 29 +++++++++++++++++++++++++++++
167d2b
 src/test/test-cgroup-mask.c |  2 +-
167d2b
 4 files changed, 34 insertions(+), 2 deletions(-)
167d2b
167d2b
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
167d2b
index 8dd3f8cd95..743fb0afe1 100644
167d2b
--- a/src/basic/cgroup-util.c
167d2b
+++ b/src/basic/cgroup-util.c
167d2b
@@ -2162,6 +2162,7 @@ static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
167d2b
         [CGROUP_CONTROLLER_PIDS] = "pids",
167d2b
         [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
167d2b
         [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
167d2b
+        [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
167d2b
 };
167d2b
 
167d2b
 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
167d2b
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
167d2b
index f79e384147..8894fd9b0a 100644
167d2b
--- a/src/basic/cgroup-util.h
167d2b
+++ b/src/basic/cgroup-util.h
167d2b
@@ -30,6 +30,7 @@ typedef enum CGroupController {
167d2b
         /* BPF-based pseudo-controllers, v2 only */
167d2b
         CGROUP_CONTROLLER_BPF_FIREWALL,
167d2b
         CGROUP_CONTROLLER_BPF_DEVICES,
167d2b
+        CGROUP_CONTROLLER_BPF_FOREIGN,
167d2b
 
167d2b
         _CGROUP_CONTROLLER_MAX,
167d2b
         _CGROUP_CONTROLLER_INVALID = -EINVAL,
167d2b
@@ -49,6 +50,7 @@ typedef enum CGroupMask {
167d2b
         CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
167d2b
         CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
167d2b
         CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES),
167d2b
+        CGROUP_MASK_BPF_FOREIGN = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FOREIGN),
167d2b
 
167d2b
         /* All real cgroup v1 controllers */
167d2b
         CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS,
167d2b
@@ -57,7 +59,7 @@ typedef enum CGroupMask {
167d2b
         CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_CPUSET|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS,
167d2b
 
167d2b
         /* All cgroup v2 BPF pseudo-controllers */
167d2b
-        CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES,
167d2b
+        CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES|CGROUP_MASK_BPF_FOREIGN,
167d2b
 
167d2b
         _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
167d2b
 } CGroupMask;
167d2b
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
167d2b
index a0af50518d..f3d5d89339 100644
167d2b
--- a/src/core/cgroup.c
167d2b
+++ b/src/core/cgroup.c
167d2b
@@ -8,6 +8,7 @@
167d2b
 #include "blockdev-util.h"
167d2b
 #include "bpf-devices.h"
167d2b
 #include "bpf-firewall.h"
167d2b
+#include "bpf-foreign.h"
167d2b
 #include "btrfs-util.h"
167d2b
 #include "bus-error.h"
167d2b
 #include "cgroup-setup.h"
167d2b
@@ -1160,6 +1161,12 @@ static void set_io_weight(Unit *u, const char *controller, uint64_t weight) {
167d2b
         (void) set_attribute_and_warn(u, controller, p, buf);
167d2b
 }
167d2b
 
167d2b
+static void cgroup_apply_bpf_foreign_program(Unit *u) {
167d2b
+        assert(u);
167d2b
+
167d2b
+        (void) bpf_foreign_install(u);
167d2b
+}
167d2b
+
167d2b
 static void cgroup_context_apply(
167d2b
                 Unit *u,
167d2b
                 CGroupMask apply_mask,
167d2b
@@ -1473,6 +1480,9 @@ static void cgroup_context_apply(
167d2b
 
167d2b
         if (apply_mask & CGROUP_MASK_BPF_FIREWALL)
167d2b
                 cgroup_apply_firewall(u);
167d2b
+
167d2b
+        if (apply_mask & CGROUP_MASK_BPF_FOREIGN)
167d2b
+                cgroup_apply_bpf_foreign_program(u);
167d2b
 }
167d2b
 
167d2b
 static bool unit_get_needs_bpf_firewall(Unit *u) {
167d2b
@@ -1505,6 +1515,17 @@ static bool unit_get_needs_bpf_firewall(Unit *u) {
167d2b
         return false;
167d2b
 }
167d2b
 
167d2b
+static bool unit_get_needs_bpf_foreign_program(Unit *u) {
167d2b
+        CGroupContext *c;
167d2b
+        assert(u);
167d2b
+
167d2b
+        c = unit_get_cgroup_context(u);
167d2b
+        if (!c)
167d2b
+                return false;
167d2b
+
167d2b
+        return !LIST_IS_EMPTY(c->bpf_foreign_programs);
167d2b
+}
167d2b
+
167d2b
 static CGroupMask unit_get_cgroup_mask(Unit *u) {
167d2b
         CGroupMask mask = 0;
167d2b
         CGroupContext *c;
167d2b
@@ -1556,6 +1577,9 @@ static CGroupMask unit_get_bpf_mask(Unit *u) {
167d2b
         if (unit_get_needs_bpf_firewall(u))
167d2b
                 mask |= CGROUP_MASK_BPF_FIREWALL;
167d2b
 
167d2b
+        if (unit_get_needs_bpf_foreign_program(u))
167d2b
+                mask |= CGROUP_MASK_BPF_FOREIGN;
167d2b
+
167d2b
         return mask;
167d2b
 }
167d2b
 
167d2b
@@ -3032,6 +3056,11 @@ static int cg_bpf_mask_supported(CGroupMask *ret) {
167d2b
         if (r > 0)
167d2b
                 mask |= CGROUP_MASK_BPF_DEVICES;
167d2b
 
167d2b
+        /* BPF pinned prog */
167d2b
+        r = bpf_foreign_supported();
167d2b
+        if (r > 0)
167d2b
+                mask |= CGROUP_MASK_BPF_FOREIGN;
167d2b
+
167d2b
         *ret = mask;
167d2b
         return 0;
167d2b
 }
167d2b
diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c
167d2b
index b53e327c63..d721946f71 100644
167d2b
--- a/src/test/test-cgroup-mask.c
167d2b
+++ b/src/test/test-cgroup-mask.c
167d2b
@@ -140,7 +140,7 @@ static void test_cg_mask_to_string_one(CGroupMask mask, const char *t) {
167d2b
 
167d2b
 static void test_cg_mask_to_string(void) {
167d2b
         test_cg_mask_to_string_one(0, NULL);
167d2b
-        test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct cpuset io blkio memory devices pids bpf-firewall bpf-devices");
167d2b
+        test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct cpuset io blkio memory devices pids bpf-firewall bpf-devices bpf-foreign");
167d2b
         test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu");
167d2b
         test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct");
167d2b
         test_cg_mask_to_string_one(CGROUP_MASK_CPUSET, "cpuset");
167d2b
-- 
167d2b
2.30.2
167d2b
167d2b
167d2b
From d1f0204f1ca7bf7b562a2fe1c90decbb5e04cf31 Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Thu, 4 Feb 2021 00:04:19 -0800
167d2b
Subject: [PATCH 6/9] core: add bpf-foreign to fragment parser
167d2b
167d2b
- Parse a string for bpf attach type
167d2b
- Simplify bpffs path
167d2b
- Add foreign bpf program to cgroup context
167d2b
---
167d2b
 src/core/load-fragment-gperf.gperf.m4 |  3 +-
167d2b
 src/core/load-fragment.c              | 59 +++++++++++++++++++++++++++
167d2b
 src/core/load-fragment.h              |  1 +
167d2b
 3 files changed, 62 insertions(+), 1 deletion(-)
167d2b
167d2b
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
167d2b
index 21bbcffe41..bbb79a12ca 100644
167d2b
--- a/src/core/load-fragment-gperf.gperf.m4
167d2b
+++ b/src/core/load-fragment-gperf.gperf.m4
167d2b
@@ -234,7 +234,8 @@ $1.ManagedOOMSwap,                       config_parse_managed_oom_mode,
167d2b
 $1.ManagedOOMMemoryPressure,             config_parse_managed_oom_mode,               0,                                  offsetof($1, cgroup_context.moom_mem_pressure)
167d2b
 $1.ManagedOOMMemoryPressureLimit,        config_parse_managed_oom_mem_pressure_limit, 0,                                  offsetof($1, cgroup_context.moom_mem_pressure_limit)
167d2b
 $1.ManagedOOMPreference,                 config_parse_managed_oom_preference,         0,                                  offsetof($1, cgroup_context.moom_preference)
167d2b
-$1.NetClass,                             config_parse_warn_compat,                    DISABLED_LEGACY,                    0'
167d2b
+$1.NetClass,                             config_parse_warn_compat,                    DISABLED_LEGACY,                    0
167d2b
+$1.BPFProgram,                           config_parse_bpf_foreign_program,            0,                                  offsetof($1, cgroup_context)'
167d2b
 )m4_dnl
167d2b
 Unit.Description,                        config_parse_unit_string_printf,             0,                                  offsetof(Unit, description)
167d2b
 Unit.Documentation,                      config_parse_documentation,                  0,                                  offsetof(Unit, documentation)
167d2b
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
167d2b
index 6da623dbda..fb80acbc02 100644
167d2b
--- a/src/core/load-fragment.c
167d2b
+++ b/src/core/load-fragment.c
167d2b
@@ -19,6 +19,7 @@
167d2b
 #include "alloc-util.h"
167d2b
 #include "all-units.h"
167d2b
 #include "bpf-firewall.h"
167d2b
+#include "bpf-program.h"
167d2b
 #include "bus-error.h"
167d2b
 #include "bus-internal.h"
167d2b
 #include "bus-util.h"
167d2b
@@ -5580,6 +5581,64 @@ int config_parse_ip_filter_bpf_progs(
167d2b
         return 0;
167d2b
 }
167d2b
 
167d2b
+int config_parse_bpf_foreign_program(
167d2b
+                const char *unit,
167d2b
+                const char *filename,
167d2b
+                unsigned line,
167d2b
+                const char *section,
167d2b
+                unsigned section_line,
167d2b
+                const char *lvalue,
167d2b
+                int ltype,
167d2b
+                const char *rvalue,
167d2b
+                void *data,
167d2b
+                void *userdata) {
167d2b
+        _cleanup_free_ char *resolved = NULL, *word = NULL;
167d2b
+        CGroupContext *c = data;
167d2b
+        Unit *u = userdata;
167d2b
+        int attach_type, r;
167d2b
+
167d2b
+        assert(filename);
167d2b
+        assert(lvalue);
167d2b
+        assert(rvalue);
167d2b
+
167d2b
+        if (isempty(rvalue)) {
167d2b
+                while (c->bpf_foreign_programs)
167d2b
+                        cgroup_context_remove_bpf_foreign_program(c, c->bpf_foreign_programs);
167d2b
+
167d2b
+                return 0;
167d2b
+        }
167d2b
+
167d2b
+        r = extract_first_word(&rvalue, &word, ":", 0);
167d2b
+        if (r == -ENOMEM)
167d2b
+                return log_oom();
167d2b
+        if (r < 0) {
167d2b
+                log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse foreign BPF program, ignoring: %s", rvalue);
167d2b
+                return 0;
167d2b
+        }
167d2b
+
167d2b
+        attach_type = bpf_cgroup_attach_type_from_string(word);
167d2b
+        if (attach_type < 0) {
167d2b
+                log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown BPF attach type=%s, ignoring: %s", word, rvalue);
167d2b
+                return 0;
167d2b
+        }
167d2b
+
167d2b
+        r = unit_full_printf(u, rvalue, &resolved);
167d2b
+        if (r < 0) {
167d2b
+                log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
167d2b
+                return 0;
167d2b
+        }
167d2b
+
167d2b
+        r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
167d2b
+        if (r < 0)
167d2b
+                return 0;
167d2b
+
167d2b
+        r = cgroup_add_bpf_foreign_program(c, attach_type, resolved);
167d2b
+        if (r < 0)
167d2b
+                return log_error_errno(r, "Failed to add foreign BPF program to cgroup context: %m");
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
 static int merge_by_names(Unit **u, Set *names, const char *id) {
167d2b
         char *k;
167d2b
         int r;
167d2b
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
167d2b
index b8a6d5fead..eebeda5747 100644
167d2b
--- a/src/core/load-fragment.h
167d2b
+++ b/src/core/load-fragment.h
167d2b
@@ -139,6 +139,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_swap_priority);
167d2b
 CONFIG_PARSER_PROTOTYPE(config_parse_mount_images);
167d2b
 CONFIG_PARSER_PROTOTYPE(config_parse_socket_timestamping);
167d2b
 CONFIG_PARSER_PROTOTYPE(config_parse_extension_images);
167d2b
+CONFIG_PARSER_PROTOTYPE(config_parse_bpf_foreign_program);
167d2b
 
167d2b
 /* gperf prototypes */
167d2b
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
167d2b
-- 
167d2b
2.30.2
167d2b
167d2b
167d2b
From 75d16dfd0b2fc813534081d3bdf1a957360078e5 Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Tue, 3 Sep 2019 19:08:13 -0700
167d2b
Subject: [PATCH 7/9] tests: add unit file tests for BPFProgram=
167d2b
167d2b
- Pin trivial bpf programs to bpf filesystem, compose BPFProgram= option
167d2b
string and pass it to a unit. Programs store `0` in r0 BPF register for
167d2b
denying action, e.g. drop a packet.
167d2b
- Load trivial BPF programs
167d2b
- Test is skipped if not run under root or if can not lock enough
167d2b
memory.
167d2b
- For egress and ingress hooks, test BPFProgram= option along with
167d2b
with IP{Egress|Ingress}FilterPath=, expected result should not depend on
167d2b
which rule is executed first.
167d2b
Expected results for BPF_CGROUP_INET_INGRESS:
167d2b
5 packets transmitted, 0 received, 100% packet loss, time 89ms
167d2b
167d2b
For BPF_CGROUP_INET_SOCK_CREATE:
167d2b
ping: socket: Operation not permitted
167d2b
---
167d2b
 src/test/meson.build                 |   6 +
167d2b
 src/test/test-bpf-foreign-programs.c | 332 +++++++++++++++++++++++++++
167d2b
 2 files changed, 338 insertions(+)
167d2b
 create mode 100644 src/test/test-bpf-foreign-programs.c
167d2b
167d2b
diff --git a/src/test/meson.build b/src/test/meson.build
167d2b
index c752e995f6..6349034aeb 100644
167d2b
--- a/src/test/meson.build
167d2b
+++ b/src/test/meson.build
167d2b
@@ -335,6 +335,12 @@ tests += [
167d2b
           libblkid],
167d2b
          core_includes],
167d2b
 
167d2b
+        [['src/test/test-bpf-foreign-programs.c'],
167d2b
+         [libcore,
167d2b
+          libshared],
167d2b
+         [],
167d2b
+         core_includes],
167d2b
+
167d2b
         [['src/test/test-watch-pid.c'],
167d2b
          [libcore,
167d2b
           libshared],
167d2b
diff --git a/src/test/test-bpf-foreign-programs.c b/src/test/test-bpf-foreign-programs.c
167d2b
new file mode 100644
167d2b
index 0000000000..e703924077
167d2b
--- /dev/null
167d2b
+++ b/src/test/test-bpf-foreign-programs.c
167d2b
@@ -0,0 +1,332 @@
167d2b
+/* SPDX-License-Identifier: LGPL-2.1+ */
167d2b
+
167d2b
+#include <fcntl.h>
167d2b
+#include <linux/bpf_insn.h>
167d2b
+#include <string.h>
167d2b
+#include <sys/mman.h>
167d2b
+#include <unistd.h>
167d2b
+
167d2b
+#include "bpf-foreign.h"
167d2b
+#include "load-fragment.h"
167d2b
+#include "manager.h"
167d2b
+#include "process-util.h"
167d2b
+#include "rlimit-util.h"
167d2b
+#include "rm-rf.h"
167d2b
+#include "service.h"
167d2b
+#include "tests.h"
167d2b
+#include "unit.h"
167d2b
+#include "virt.h"
167d2b
+
167d2b
+struct Test {
167d2b
+        const char *option_name;
167d2b
+        enum bpf_prog_type prog_type;
167d2b
+        enum bpf_attach_type attach_type;
167d2b
+        const char *bpffs_path;
167d2b
+};
167d2b
+
167d2b
+typedef struct Test Test;
167d2b
+
167d2b
+#define BPFFS_PATH(prog_suffix) ("/sys/fs/bpf/test-bpf-foreing-" # prog_suffix)
167d2b
+static const Test single_prog[] = {
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
167d2b
+                .attach_type = BPF_CGROUP_INET_INGRESS,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-skb"),
167d2b
+        },
167d2b
+};
167d2b
+static const Test path_split_test[] = {
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
167d2b
+                .attach_type = BPF_CGROUP_INET_INGRESS,
167d2b
+                .bpffs_path = BPFFS_PATH("path:split:test"),
167d2b
+        },
167d2b
+};
167d2b
+
167d2b
+static const Test same_prog_same_hook[] = {
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
167d2b
+                .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-sock"),
167d2b
+        },
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
167d2b
+                .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-sock"),
167d2b
+        }
167d2b
+};
167d2b
+
167d2b
+static const Test multi_prog_same_hook[] = {
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
167d2b
+                .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-sock-0"),
167d2b
+        },
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
167d2b
+                .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-sock-1"),
167d2b
+        }
167d2b
+};
167d2b
+
167d2b
+static const Test same_prog_multi_hook[] = {
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
167d2b
+                .attach_type = BPF_CGROUP_INET_INGRESS,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-skb"),
167d2b
+        },
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
167d2b
+                .attach_type = BPF_CGROUP_INET_EGRESS,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-skb"),
167d2b
+        }
167d2b
+};
167d2b
+
167d2b
+static const Test same_prog_multi_option_0[] = {
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
167d2b
+                .attach_type = BPF_CGROUP_INET_INGRESS,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-skb"),
167d2b
+        },
167d2b
+        {
167d2b
+                .option_name = "IPIngressFilterPath",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
167d2b
+                .attach_type = BPF_CGROUP_INET_INGRESS,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-skb"),
167d2b
+        }
167d2b
+};
167d2b
+
167d2b
+static const Test same_prog_multi_option_1[] = {
167d2b
+        {
167d2b
+                .option_name = "IPEgressFilterPath",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
167d2b
+                .attach_type = BPF_CGROUP_INET_EGRESS,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-skb"),
167d2b
+        },
167d2b
+        {
167d2b
+                .option_name = "BPFProgram",
167d2b
+                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
167d2b
+                .attach_type = BPF_CGROUP_INET_EGRESS,
167d2b
+                .bpffs_path = BPFFS_PATH("trivial-skb"),
167d2b
+        }
167d2b
+};
167d2b
+#undef BPFFS_PATH
167d2b
+
167d2b
+static int bpf_foreign_test_to_string(enum bpf_attach_type attach_type, const char *bpffs_path, char **ret_str) {
167d2b
+        const char *s = NULL;
167d2b
+
167d2b
+        assert_se(bpffs_path);
167d2b
+        assert_se(ret_str);
167d2b
+
167d2b
+        assert_se(s = bpf_cgroup_attach_type_to_string(attach_type));
167d2b
+        assert_se(*ret_str = strjoin(s, ":", bpffs_path));
167d2b
+
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
+static char **unlink_paths_and_free(char **paths) {
167d2b
+        char **i;
167d2b
+
167d2b
+        STRV_FOREACH(i, paths)
167d2b
+                (void) unlink(*i);
167d2b
+
167d2b
+        return strv_free(paths);
167d2b
+}
167d2b
+
167d2b
+DEFINE_TRIVIAL_CLEANUP_FUNC(char **, unlink_paths_and_free);
167d2b
+
167d2b
+static int pin_programs(Unit *u, CGroupContext *cc, const Test *test_suite, size_t test_suite_size, char ***paths_ret) {
167d2b
+        _cleanup_(unlink_paths_and_freep) char **bpffs_paths = NULL;
167d2b
+        static const struct bpf_insn trivial[] = {
167d2b
+                BPF_MOV64_IMM(BPF_REG_0, 0),
167d2b
+                BPF_EXIT_INSN()
167d2b
+        };
167d2b
+        char log_buf[0xffff];
167d2b
+        int r;
167d2b
+
167d2b
+        assert_se(paths_ret);
167d2b
+
167d2b
+        for (size_t i = 0; i < test_suite_size; i++) {
167d2b
+                _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
167d2b
+                _cleanup_free_ char *str = NULL;
167d2b
+
167d2b
+                r = bpf_foreign_test_to_string(test_suite[i].attach_type, test_suite[i].bpffs_path, &str);
167d2b
+                if (r < 0)
167d2b
+                        return log_error_errno(r, "Failed to convert program to string");
167d2b
+
167d2b
+                r = bpf_program_new(test_suite[i].prog_type, &prog;;
167d2b
+                if (r < 0)
167d2b
+                        return log_error_errno(r, "Failed to create program '%s'", str);
167d2b
+
167d2b
+                r = bpf_program_add_instructions(prog, trivial, ELEMENTSOF(trivial));
167d2b
+                if (r < 0)
167d2b
+                        return log_error_errno(r, "Failed to add trivial instructions for '%s'", str);
167d2b
+
167d2b
+                r = bpf_program_load_kernel(prog, log_buf, ELEMENTSOF(log_buf));
167d2b
+                if (r < 0)
167d2b
+                        return log_error_errno(r, "Failed to load BPF program '%s'", str);
167d2b
+
167d2b
+                if (strv_contains(bpffs_paths, test_suite[i].bpffs_path))
167d2b
+                        continue;
167d2b
+
167d2b
+                r = strv_extend(&bpffs_paths, test_suite[i].bpffs_path);
167d2b
+                if (r < 0)
167d2b
+                        return log_error_errno(r, "Failed to put path into a vector: %m");
167d2b
+
167d2b
+                r = bpf_program_pin(prog->kernel_fd, test_suite[i].bpffs_path);
167d2b
+                if (r < 0)
167d2b
+                        return log_error_errno(r, "Failed to pin BPF program '%s'", str);
167d2b
+        }
167d2b
+
167d2b
+        *paths_ret = TAKE_PTR(bpffs_paths);
167d2b
+        return 0;
167d2b
+}
167d2b
+
167d2b
+static int test_bpf_cgroup_programs(Manager *m, const char *unit_name, const Test *test_suite, size_t test_suite_size) {
167d2b
+        _cleanup_(unlink_paths_and_freep) char **bpffs_paths = NULL;
167d2b
+        _cleanup_(unit_freep) Unit *u = NULL;
167d2b
+        CGroupContext *cc = NULL;
167d2b
+        int cld_code, r;
167d2b
+
167d2b
+        assert_se(u = unit_new(m, sizeof(Service)));
167d2b
+        assert_se(unit_add_name(u, unit_name) == 0);
167d2b
+        assert_se(cc = unit_get_cgroup_context(u));
167d2b
+
167d2b
+        r = pin_programs(u, cc, test_suite, test_suite_size, &bpffs_paths);
167d2b
+        if (r < 0)
167d2b
+                return log_error_errno(r, "Failed to pin programs: %m");
167d2b
+
167d2b
+        for (size_t i = 0; i < test_suite_size; i++) {
167d2b
+                if (streq(test_suite[i].option_name, "BPFProgram")) {
167d2b
+                        _cleanup_free_ char *option = NULL;
167d2b
+                        r = bpf_foreign_test_to_string(test_suite[i].attach_type, test_suite[i].bpffs_path, &option);
167d2b
+                        if (r < 0)
167d2b
+                                return log_error_errno(r, "Failed to compose option string: %m");
167d2b
+                        r = config_parse_bpf_foreign_program(
167d2b
+                                        u->id, "filename", 1, "Service", 1, test_suite[i].option_name, 0, option, cc, u);
167d2b
+
167d2b
+                        if (r < 0)
167d2b
+                                return log_error_errno(r, "Failed to parse option string '%s': %m", option);
167d2b
+                } else if (STR_IN_SET(test_suite[i].option_name, "IPIngressFilterPath", "IPEgressFilterPath")) {
167d2b
+                        const char *option = test_suite[i].bpffs_path;
167d2b
+                        void *paths = NULL;
167d2b
+
167d2b
+                        if (streq(test_suite[i].option_name, "IPIngressFilterPath"))
167d2b
+                                paths = &cc->ip_filters_ingress;
167d2b
+                        else
167d2b
+                                paths = &cc->ip_filters_egress;
167d2b
+
167d2b
+                        r = config_parse_ip_filter_bpf_progs(
167d2b
+                                        u->id, "filename", 1, "Service", 1, test_suite[i].option_name, 0, option, paths, u);
167d2b
+                        if (r < 0)
167d2b
+                                return log_error_errno(r, "Failed to parse option string '%s': %m", option);
167d2b
+                }
167d2b
+        }
167d2b
+
167d2b
+        r = config_parse_exec(
167d2b
+                        u->id,
167d2b
+                        "filename",
167d2b
+                        1,
167d2b
+                        "Service",
167d2b
+                        1,
167d2b
+                        "ExecStart",
167d2b
+                        SERVICE_EXEC_START,
167d2b
+                        "-/bin/ping -c 5 127.0.0.1 -W 1",
167d2b
+                        SERVICE(u)->exec_command,
167d2b
+                        u);
167d2b
+        if (r < 0)
167d2b
+                return log_error_errno(r, "Failed to parse ExecStart");
167d2b
+
167d2b
+        SERVICE(u)->type = SERVICE_ONESHOT;
167d2b
+        u->load_state = UNIT_LOADED;
167d2b
+
167d2b
+        r = unit_start(u);
167d2b
+        if (r < 0)
167d2b
+                return log_error_errno(r, "Unit start failed %m");
167d2b
+
167d2b
+        while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED)) {
167d2b
+                r = sd_event_run(m->event, UINT64_MAX);
167d2b
+                if (r < 0)
167d2b
+                        return log_error_errno(errno, "Event run failed %m");
167d2b
+        }
167d2b
+
167d2b
+        cld_code = SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code;
167d2b
+        if (cld_code != CLD_EXITED)
167d2b
+                return log_error_errno(SYNTHETIC_ERRNO(EBUSY),
167d2b
+                                "ExecStart didn't exited, code='%s'", sigchld_code_to_string(cld_code));
167d2b
+
167d2b
+        if (SERVICE(u)->state != SERVICE_DEAD)
167d2b
+                return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Service is not dead");
167d2b
+
167d2b
+        return r;
167d2b
+}
167d2b
+
167d2b
+int main(int argc, char *argv[]) {
167d2b
+        _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
167d2b
+        _cleanup_(manager_freep) Manager *m = NULL;
167d2b
+        _cleanup_free_ char *unit_dir = NULL;
167d2b
+        struct rlimit rl;
167d2b
+        int r;
167d2b
+
167d2b
+        test_setup_logging(LOG_DEBUG);
167d2b
+
167d2b
+        if (detect_container() > 0)
167d2b
+                return log_tests_skipped("test-bpf fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666");
167d2b
+
167d2b
+        if (getuid() != 0)
167d2b
+                return log_tests_skipped("not running as root");
167d2b
+
167d2b
+        assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
167d2b
+        rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
167d2b
+        (void) setrlimit_closest(RLIMIT_MEMLOCK, &rl);
167d2b
+
167d2b
+        if (!can_memlock())
167d2b
+                return log_tests_skipped("Can't use mlock(), skipping.");
167d2b
+
167d2b
+        r = cg_all_unified();
167d2b
+        if (r <= 0)
167d2b
+                return log_tests_skipped_errno(r, "Unified hierarchy is required, skipping.");
167d2b
+
167d2b
+        r = enter_cgroup_subroot(NULL);
167d2b
+        if (r == -ENOMEDIUM)
167d2b
+                return log_tests_skipped("cgroupfs not available");
167d2b
+
167d2b
+        assert_se(get_testdata_dir("units", &unit_dir) >= 0);
167d2b
+        assert_se(set_unit_path(unit_dir) >= 0);
167d2b
+        assert_se(runtime_dir = setup_fake_runtime_dir());
167d2b
+
167d2b
+        assert_se(manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m) >= 0);
167d2b
+        assert_se(manager_startup(m, NULL, NULL) >= 0);
167d2b
+
167d2b
+        assert_se(test_bpf_cgroup_programs(m,
167d2b
+                                "single_prog.service", single_prog, ELEMENTSOF(single_prog)) >= 0);
167d2b
+        assert_se(test_bpf_cgroup_programs(m,
167d2b
+                                "multi_prog_same_hook.service",
167d2b
+                                multi_prog_same_hook, ELEMENTSOF(multi_prog_same_hook)) >= 0);
167d2b
+        assert_se(test_bpf_cgroup_programs(m,
167d2b
+                                "same_prog_multi_hook.service",
167d2b
+                                same_prog_multi_hook, ELEMENTSOF(same_prog_multi_hook)) >= 0);
167d2b
+        assert_se(test_bpf_cgroup_programs(m,
167d2b
+                                "same_prog_multi_option_0.service",
167d2b
+                                same_prog_multi_option_0, ELEMENTSOF(same_prog_multi_option_0)) >= 0);
167d2b
+        assert_se(test_bpf_cgroup_programs(m,
167d2b
+                                "same_prog_multi_option_1.service",
167d2b
+                                same_prog_multi_option_1, ELEMENTSOF(same_prog_multi_option_1)) >= 0);
167d2b
+        assert_se(test_bpf_cgroup_programs(m,
167d2b
+                                "same_prog_same_hook.service",
167d2b
+                                same_prog_same_hook,
167d2b
+                                ELEMENTSOF(same_prog_same_hook)) >= 0);
167d2b
+        assert_se(test_bpf_cgroup_programs(m,
167d2b
+                                "path_split_test.service",
167d2b
+                                path_split_test,
167d2b
+                                ELEMENTSOF(path_split_test)) >= 0);
167d2b
+        return 0;
167d2b
+}
167d2b
-- 
167d2b
2.30.2
167d2b
167d2b
167d2b
From faebec942b34c62d8dce512839c3a43be6844394 Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Tue, 8 Dec 2020 22:06:56 -0800
167d2b
Subject: [PATCH 8/9] man: add BPFProgram= documentation
167d2b
167d2b
---
167d2b
 man/systemd.resource-control.xml | 52 ++++++++++++++++++++++++++++++++
167d2b
 1 file changed, 52 insertions(+)
167d2b
167d2b
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
167d2b
index 1bc45a9f00..a2d01f7afb 100644
167d2b
--- a/man/systemd.resource-control.xml
167d2b
+++ b/man/systemd.resource-control.xml
167d2b
@@ -696,6 +696,12 @@
167d2b
           <para>If these settings are used multiple times in the same unit all the specified programs are attached. If an
167d2b
           empty string is assigned to these settings the program list is reset and all previous specified programs ignored.</para>
167d2b
 
167d2b
+          <para>If the path <replaceable>BPF_FS_PROGRAM_PATH</replaceable> in <varname>IPIngressFilterPath=</varname> assignment
167d2b
+          is already being handled by <varname>BPFProgram=</varname> ingress hook, e.g.
167d2b
+          <varname>BPFProgram=</varname><constant>ingress</constant>:<replaceable>BPF_FS_PROGRAM_PATH</replaceable>,
167d2b
+          the assignment will be still considered valid and the program will be attached to a cgroup. Same for
167d2b
+          <varname>IPEgressFilterPath=</varname> path and <constant>egress</constant> hook.</para>
167d2b
+
167d2b
           <para>Note that for socket-activated services, the IP filter programs configured on the socket unit apply to
167d2b
           all sockets associated with it directly, but not to any sockets created by the ultimately activated services
167d2b
           for it. Conversely, the IP filter programs configured for the service are not applied to any sockets passed into
167d2b
@@ -710,6 +716,52 @@
167d2b
         </listitem>
167d2b
       </varlistentry>
167d2b
 
167d2b
+      <varlistentry>
167d2b
+        <term><varname>BPFProgram=<replaceable>type</replaceable><constant>:</constant><replaceable>program-path</replaceable></varname></term>
167d2b
+        <listitem>
167d2b
+          <para>Add a custom cgroup BPF program.</para>
167d2b
+
167d2b
+          <para><varname>BPFProgram=</varname> allows attaching BPF hooks to the cgroup of a systemd unit.
167d2b
+          (This generalizes the functionality exposed via <varname>IPEgressFilterPath=</varname> for egress and
167d2b
+          <varname>IPIngressFilterPath=</varname> for ingress.)
167d2b
+          Cgroup-bpf hooks in the form of BPF programs loaded to the BPF filesystem are attached with cgroup-bpf attach
167d2b
+          flags determined by the unit. For details about attachment types and flags see 
167d2b
+          url="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/linux/bpf.h"/>.
167d2b
+          For general BPF documentation please refer to <ulink url="https://www.kernel.org/doc/html/latest/bpf/index.html"/>.</para>
167d2b
+
167d2b
+          <para>The specification of BPF program consists of a <replaceable>type</replaceable> followed by a
167d2b
+          <replaceable>program-path</replaceable> with <literal>:</literal> as the separator:
167d2b
+          <replaceable>type</replaceable><constant>:</constant><replaceable>program-path</replaceable>.</para>
167d2b
+
167d2b
+          <para><replaceable>type</replaceable> is the string name of BPF attach type also used in
167d2b
+          <command>bpftool</command>. <replaceable>type</replaceable> can be one of <constant>egress</constant>,
167d2b
+          <constant>ingress</constant>, <constant>sock_create</constant>, <constant>sock_ops</constant>,
167d2b
+          <constant>device</constant>, <constant>bind4</constant>, <constant>bind6</constant>,
167d2b
+          <constant>connect4</constant>, <constant>connect6</constant>, <constant>post_bind4</constant>,
167d2b
+          <constant>post_bind6</constant>, <constant>sendmsg4</constant>, <constant>sendmsg6</constant>,
167d2b
+          <constant>sysctl</constant>, <constant>recvmsg4</constant>, <constant>recvmsg6</constant>,
167d2b
+          <constant>getsockopt</constant>, <constant>setsockopt</constant>.</para>
167d2b
+
167d2b
+          <para>Setting <varname>BPFProgram=</varname> to an empty value makes previous assignments ineffective.</para>
167d2b
+          <para>Multiple assignments of the same <replaceable>type</replaceable>:<replaceable>program-path</replaceable>
167d2b
+          value have the same effect as a single assignment: the program with the path <replaceable>program-path</replaceable>
167d2b
+          will be attached to cgroup hook <replaceable>type</replaceable> just once.</para>
167d2b
+          <para>If BPF <constant>egress</constant> pinned to <replaceable>program-path</replaceable> path is already being
167d2b
+          handled by <varname>IPEgressFilterPath=</varname>, <varname>BPFProgram=</varname>
167d2b
+          assignment will be considered valid and <varname>BPFProgram=</varname> will be attached to a cgroup.
167d2b
+          Similarly for <constant>ingress</constant> hook and <varname>IPIngressFilterPath=</varname> assignment.</para>
167d2b
+
167d2b
+          <para>BPF programs passed with <varname>BPFProgram=</varname> are attached to the cgroup of a unit with BPF
167d2b
+          attach flag <constant>multi</constant>, that allows further attachments of the same
167d2b
+          <replaceable>type</replaceable> within cgroup hierarchy topped by the unit cgroup.</para>
167d2b
+
167d2b
+          <para>Examples:<programlisting>
167d2b
+BPFProgram=egress:/sys/fs/bpf/egress-hook
167d2b
+BPFProgram=bind6:/sys/fs/bpf/sock-addr-hook
167d2b
+</programlisting></para>
167d2b
+        </listitem>
167d2b
+      </varlistentry>
167d2b
+
167d2b
       <varlistentry>
167d2b
         <term><varname>DeviceAllow=</varname></term>
167d2b
 
167d2b
-- 
167d2b
2.30.2
167d2b
167d2b
167d2b
From 3e1bfa33f198feb47431d19f4d48383e070dbdca Mon Sep 17 00:00:00 2001
167d2b
From: Julia Kartseva <hex@fb.com>
167d2b
Date: Tue, 8 Dec 2020 22:07:30 -0800
167d2b
Subject: [PATCH 9/9] dbus-cgroup: add BPFProgram= dbus support
167d2b
167d2b
- Handle BPFProgram= property in string format
167d2b
"<bpf_attach_type>:<bpffs_path>", e.g. egress:/sys/fs/bpf/egress-hook.
167d2b
- Add dbus getter to list foreign bpf programs attached to a cgroup.
167d2b
---
167d2b
 man/org.freedesktop.systemd1.xml |  36 +++++++++++
167d2b
 src/core/dbus-cgroup.c           | 108 +++++++++++++++++++++++++++++++
167d2b
 src/shared/bus-unit-util.c       |  20 ++++++
167d2b
 src/systemctl/systemctl-show.c   |  17 +++++
167d2b
 4 files changed, 181 insertions(+)
167d2b
167d2b
diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml
167d2b
index aff43217e1..d233d96d24 100644
167d2b
--- a/man/org.freedesktop.systemd1.xml
167d2b
+++ b/man/org.freedesktop.systemd1.xml
167d2b
@@ -2472,6 +2472,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
167d2b
       readonly u ManagedOOMMemoryPressureLimit = ...;
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
       readonly s ManagedOOMPreference = '...';
167d2b
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
+      readonly a(ss) BPFProgram = [...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
       readonly as Environment = ['...', ...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
@@ -3004,6 +3006,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
167d2b
 
167d2b
     
167d2b
 
167d2b
+    
167d2b
+
167d2b
     
167d2b
 
167d2b
     
167d2b
@@ -3560,6 +3564,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
167d2b
 
167d2b
+    <variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
167d2b
+
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
167d2b
@@ -4245,6 +4251,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
167d2b
       readonly u ManagedOOMMemoryPressureLimit = ...;
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
       readonly s ManagedOOMPreference = '...';
167d2b
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
+      readonly a(ss) BPFProgram = [...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
       readonly as Environment = ['...', ...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
@@ -4805,6 +4813,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
167d2b
 
167d2b
     
167d2b
 
167d2b
+    
167d2b
+
167d2b
     
167d2b
 
167d2b
     
167d2b
@@ -5359,6 +5369,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
167d2b
 
167d2b
+    <variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
167d2b
+
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
167d2b
@@ -5946,6 +5958,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
167d2b
       readonly u ManagedOOMMemoryPressureLimit = ...;
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
       readonly s ManagedOOMPreference = '...';
167d2b
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
+      readonly a(ss) BPFProgram = [...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
       readonly as Environment = ['...', ...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
@@ -6434,6 +6448,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
167d2b
 
167d2b
     
167d2b
 
167d2b
+    
167d2b
+
167d2b
     
167d2b
 
167d2b
     
167d2b
@@ -6906,6 +6922,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
167d2b
 
167d2b
+    <variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
167d2b
+
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
167d2b
@@ -7614,6 +7632,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
167d2b
       readonly u ManagedOOMMemoryPressureLimit = ...;
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
       readonly s ManagedOOMPreference = '...';
167d2b
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
+      readonly a(ss) BPFProgram = [...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
       readonly as Environment = ['...', ...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
@@ -8088,6 +8108,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
167d2b
 
167d2b
     
167d2b
 
167d2b
+    
167d2b
+
167d2b
     
167d2b
 
167d2b
     
167d2b
@@ -8546,6 +8568,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
167d2b
 
167d2b
+    <variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
167d2b
+
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
167d2b
@@ -9107,6 +9131,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
167d2b
       readonly u ManagedOOMMemoryPressureLimit = ...;
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
       readonly s ManagedOOMPreference = '...';
167d2b
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
+      readonly a(ss) BPFProgram = [...];
167d2b
   };
167d2b
   interface org.freedesktop.DBus.Peer { ... };
167d2b
   interface org.freedesktop.DBus.Introspectable { ... };
167d2b
@@ -9245,6 +9271,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
167d2b
 
167d2b
     
167d2b
 
167d2b
+    
167d2b
+
167d2b
     
167d2b
 
167d2b
     <variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Unit"/>
167d2b
@@ -9387,6 +9415,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
167d2b
 
167d2b
+    <variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
167d2b
+
167d2b
     
167d2b
 
167d2b
     <refsect2>
167d2b
@@ -9548,6 +9578,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
167d2b
       readonly u ManagedOOMMemoryPressureLimit = ...;
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
       readonly s ManagedOOMPreference = '...';
167d2b
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
167d2b
+      readonly a(ss) BPFProgram = [...];
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
       readonly s KillMode = '...';
167d2b
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
167d2b
@@ -9702,6 +9734,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
167d2b
 
167d2b
     
167d2b
 
167d2b
+    
167d2b
+
167d2b
     
167d2b
 
167d2b
     
167d2b
@@ -9870,6 +9904,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/>
167d2b
 
167d2b
+    <variablelist class="dbus-property" generated="True" extra-ref="BPFProgram"/>
167d2b
+
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="KillMode"/>
167d2b
 
167d2b
     <variablelist class="dbus-property" generated="True" extra-ref="KillSignal"/>
167d2b
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
167d2b
index 04d2ba34f3..604cecf84c 100644
167d2b
--- a/src/core/dbus-cgroup.c
167d2b
+++ b/src/core/dbus-cgroup.c
167d2b
@@ -5,6 +5,7 @@
167d2b
 #include "af-list.h"
167d2b
 #include "alloc-util.h"
167d2b
 #include "bpf-firewall.h"
167d2b
+#include "bpf-foreign.h"
167d2b
 #include "bus-get-properties.h"
167d2b
 #include "cgroup-util.h"
167d2b
 #include "cgroup.h"
167d2b
@@ -347,6 +348,33 @@ static int property_get_ip_address_access(
167d2b
         return sd_bus_message_close_container(reply);
167d2b
 }
167d2b
 
167d2b
+static int property_get_bpf_foreign_program(
167d2b
+                sd_bus *bus,
167d2b
+                const char *path,
167d2b
+                const char *interface,
167d2b
+                const char *property,
167d2b
+                sd_bus_message *reply,
167d2b
+                void *userdata,
167d2b
+                sd_bus_error *error) {
167d2b
+        CGroupContext *c = userdata;
167d2b
+        CGroupBPFForeignProgram *p;
167d2b
+        int r;
167d2b
+
167d2b
+        r = sd_bus_message_open_container(reply, 'a', "(ss)");
167d2b
+        if (r < 0)
167d2b
+                return r;
167d2b
+
167d2b
+        LIST_FOREACH(programs, p, c->bpf_foreign_programs) {
167d2b
+                const char *attach_type = bpf_cgroup_attach_type_to_string(p->attach_type);
167d2b
+
167d2b
+                r = sd_bus_message_append(reply, "(ss)", attach_type, p->bpffs_path);
167d2b
+                if (r < 0)
167d2b
+                        return r;
167d2b
+        }
167d2b
+
167d2b
+        return sd_bus_message_close_container(reply);
167d2b
+}
167d2b
+
167d2b
 const sd_bus_vtable bus_cgroup_vtable[] = {
167d2b
         SD_BUS_VTABLE_START(0),
167d2b
         SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
167d2b
@@ -398,6 +426,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
167d2b
         SD_BUS_PROPERTY("ManagedOOMMemoryPressure", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_mem_pressure), 0),
167d2b
         SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimit", "u", NULL, offsetof(CGroupContext, moom_mem_pressure_limit), 0),
167d2b
         SD_BUS_PROPERTY("ManagedOOMPreference", "s", property_get_managed_oom_preference, offsetof(CGroupContext, moom_preference), 0),
167d2b
+        SD_BUS_PROPERTY("BPFProgram", "a(ss)", property_get_bpf_foreign_program, 0, 0),
167d2b
         SD_BUS_VTABLE_END
167d2b
 };
167d2b
 
167d2b
@@ -570,6 +599,85 @@ static int bus_cgroup_set_transient_property(
167d2b
                         }
167d2b
                 }
167d2b
 
167d2b
+                return 1;
167d2b
+        } else if (streq(name, "BPFProgram")) {
167d2b
+                const char *a, *p;
167d2b
+                size_t n = 0;
167d2b
+
167d2b
+                r = sd_bus_message_enter_container(message, 'a', "(ss)");
167d2b
+                if (r < 0)
167d2b
+                        return r;
167d2b
+
167d2b
+                while ((r = sd_bus_message_read(message, "(ss)", &a, &p)) > 0) {
167d2b
+                        int attach_type = bpf_cgroup_attach_type_from_string(a);
167d2b
+                        if (attach_type < 0)
167d2b
+                                return sd_bus_error_setf(
167d2b
+                                                error,
167d2b
+                                                SD_BUS_ERROR_INVALID_ARGS,
167d2b
+                                                "%s expects a valid BPF attach type, got '%s'.",
167d2b
+                                                name, a);
167d2b
+
167d2b
+                        if (!path_is_normalized(p) || !path_is_absolute(p))
167d2b
+                                return sd_bus_error_setf(
167d2b
+                                                error,
167d2b
+                                                SD_BUS_ERROR_INVALID_ARGS,
167d2b
+                                                "%s= expects a normalized absolute path.",
167d2b
+                                                name);
167d2b
+
167d2b
+                        if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
167d2b
+                                r = cgroup_add_bpf_foreign_program(c, attach_type, p);
167d2b
+                                if (r < 0)
167d2b
+                                        return r;
167d2b
+                        }
167d2b
+                        n++;
167d2b
+                }
167d2b
+                if (r < 0)
167d2b
+                        return r;
167d2b
+
167d2b
+                r = sd_bus_message_exit_container(message);
167d2b
+                if (r < 0)
167d2b
+                        return r;
167d2b
+
167d2b
+                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
167d2b
+                        _cleanup_free_ char *buf = NULL;
167d2b
+                        _cleanup_fclose_ FILE *f = NULL;
167d2b
+                        CGroupBPFForeignProgram *fp;
167d2b
+                        size_t size = 0;
167d2b
+
167d2b
+                        if (n == 0)
167d2b
+                                while (c->bpf_foreign_programs)
167d2b
+                                        cgroup_context_remove_bpf_foreign_program(c, c->bpf_foreign_programs);
167d2b
+
167d2b
+                        f = open_memstream_unlocked(&buf, &size);
167d2b
+                        if (!f)
167d2b
+                                return -ENOMEM;
167d2b
+
167d2b
+                        fputs(name, f);
167d2b
+                        fputs("=\n", f);
167d2b
+
167d2b
+                        LIST_FOREACH(programs, fp, c->bpf_foreign_programs)
167d2b
+                                fprintf(f, "%s=%s:%s\n", name,
167d2b
+                                                bpf_cgroup_attach_type_to_string(fp->attach_type),
167d2b
+                                                fp->bpffs_path);
167d2b
+
167d2b
+                        r = fflush_and_check(f);
167d2b
+                        if (r < 0)
167d2b
+                                return r;
167d2b
+
167d2b
+                        unit_write_setting(u, flags, name, buf);
167d2b
+
167d2b
+                        if (!LIST_IS_EMPTY(c->bpf_foreign_programs)) {
167d2b
+                                r = bpf_foreign_supported();
167d2b
+                                if (r < 0)
167d2b
+                                        return r;
167d2b
+                                if (r == 0)
167d2b
+                                        log_full(LOG_DEBUG,
167d2b
+                                                 "Transient unit %s configures a BPF program pinned to BPF "
167d2b
+                                                 "filesystem, but the local system does not support that.\n"
167d2b
+                                                 "Starting this unit will fail!", u->id);
167d2b
+                        }
167d2b
+                }
167d2b
+
167d2b
                 return 1;
167d2b
         }
167d2b
 
167d2b
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
167d2b
index a75178068b..bcb4087e6d 100644
167d2b
--- a/src/shared/bus-unit-util.c
167d2b
+++ b/src/shared/bus-unit-util.c
167d2b
@@ -842,6 +842,26 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
167d2b
                 return 1;
167d2b
         }
167d2b
 
167d2b
+        if (streq(field, "BPFProgram")) {
167d2b
+                if (isempty(eq))
167d2b
+                        r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 0);
167d2b
+                else {
167d2b
+                        _cleanup_free_ char *word = NULL;
167d2b
+
167d2b
+                        r = extract_first_word(&eq, &word, ":", 0);
167d2b
+                        if (r == -ENOMEM)
167d2b
+                                return log_oom();
167d2b
+                        if (r < 0)
167d2b
+                                return log_error_errno(r, "Failed to parse %s: %m", field);
167d2b
+
167d2b
+                        r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 1, word, eq);
167d2b
+                }
167d2b
+                if (r < 0)
167d2b
+                        return bus_log_create_error(r);
167d2b
+
167d2b
+                return 1;
167d2b
+        }
167d2b
+
167d2b
         return 0;
167d2b
 }
167d2b
 
167d2b
diff --git a/src/systemctl/systemctl-show.c b/src/systemctl/systemctl-show.c
167d2b
index 2402a59d31..dc45de4b3d 100644
167d2b
--- a/src/systemctl/systemctl-show.c
167d2b
+++ b/src/systemctl/systemctl-show.c
167d2b
@@ -1696,6 +1696,23 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m
167d2b
 
167d2b
                         return 1;
167d2b
 
167d2b
+                } else if (streq(name, "BPFProgram")) {
167d2b
+                        const char *a, *p;
167d2b
+
167d2b
+                        r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
167d2b
+                        if (r < 0)
167d2b
+                                return bus_log_parse_error(r);
167d2b
+
167d2b
+                        while ((r = sd_bus_message_read(m, "(ss)", &a, &p)) > 0)
167d2b
+                                bus_print_property_valuef(name, expected_value, value, "%s:%s", a, p);
167d2b
+                        if (r < 0)
167d2b
+                                return bus_log_parse_error(r);
167d2b
+
167d2b
+                        r = sd_bus_message_exit_container(m);
167d2b
+                        if (r < 0)
167d2b
+                                return bus_log_parse_error(r);
167d2b
+
167d2b
+                        return 1;
167d2b
                 }
167d2b
 
167d2b
                 break;
167d2b
-- 
167d2b
2.30.2
167d2b